1 /* 2 * Copyright (c) 2015, Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include <linux/ip.h> 34 #include <linux/ipv6.h> 35 #include <linux/tcp.h> 36 #include <linux/bitmap.h> 37 #include <linux/filter.h> 38 #include <net/ip6_checksum.h> 39 #include <net/page_pool.h> 40 #include <net/inet_ecn.h> 41 #include <net/gro.h> 42 #include <net/udp.h> 43 #include <net/tcp.h> 44 #include <net/xdp_sock_drv.h> 45 #include "en.h" 46 #include "en/txrx.h" 47 #include "en_tc.h" 48 #include "eswitch.h" 49 #include "en_rep.h" 50 #include "en/rep/tc.h" 51 #include "ipoib/ipoib.h" 52 #include "en_accel/ipsec.h" 53 #include "en_accel/macsec.h" 54 #include "en_accel/ipsec_rxtx.h" 55 #include "en_accel/ktls_txrx.h" 56 #include "en/xdp.h" 57 #include "en/xsk/rx.h" 58 #include "en/health.h" 59 #include "en/params.h" 60 #include "devlink.h" 61 #include "en/devlink.h" 62 63 static struct sk_buff * 64 mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, 65 u16 cqe_bcnt, u32 head_offset, u32 page_idx); 66 static struct sk_buff * 67 mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, 68 u16 cqe_bcnt, u32 head_offset, u32 page_idx); 69 static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); 70 static void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); 71 static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); 72 73 const struct mlx5e_rx_handlers mlx5e_rx_handlers_nic = { 74 .handle_rx_cqe = mlx5e_handle_rx_cqe, 75 .handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq, 76 .handle_rx_cqe_mpwqe_shampo = mlx5e_handle_rx_cqe_mpwrq_shampo, 77 }; 78 79 static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config) 80 { 81 return config->rx_filter == HWTSTAMP_FILTER_ALL; 82 } 83 84 static inline void mlx5e_read_cqe_slot(struct mlx5_cqwq *wq, 85 u32 cqcc, void *data) 86 { 87 u32 ci = mlx5_cqwq_ctr2ix(wq, cqcc); 88 89 memcpy(data, mlx5_cqwq_get_wqe(wq, ci), sizeof(struct mlx5_cqe64)); 90 } 91 92 static inline void mlx5e_read_title_slot(struct mlx5e_rq *rq, 93 struct mlx5_cqwq *wq, 94 u32 cqcc) 95 { 96 struct mlx5e_cq_decomp *cqd = &rq->cqd; 97 struct mlx5_cqe64 *title = &cqd->title; 98 99 mlx5e_read_cqe_slot(wq, cqcc, title); 100 cqd->left = be32_to_cpu(title->byte_cnt); 101 cqd->wqe_counter = be16_to_cpu(title->wqe_counter); 102 rq->stats->cqe_compress_blks++; 103 } 104 105 static inline void mlx5e_read_mini_arr_slot(struct mlx5_cqwq *wq, 106 struct mlx5e_cq_decomp *cqd, 107 u32 cqcc) 108 { 109 mlx5e_read_cqe_slot(wq, cqcc, cqd->mini_arr); 110 cqd->mini_arr_idx = 0; 111 } 112 113 static inline void mlx5e_cqes_update_owner(struct mlx5_cqwq *wq, int n) 114 { 115 u32 cqcc = wq->cc; 116 u8 op_own = mlx5_cqwq_get_ctr_wrap_cnt(wq, cqcc) & 1; 117 u32 ci = mlx5_cqwq_ctr2ix(wq, cqcc); 118 u32 wq_sz = mlx5_cqwq_get_size(wq); 119 u32 ci_top = min_t(u32, wq_sz, ci + n); 120 121 for (; ci < ci_top; ci++, n--) { 122 struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(wq, ci); 123 124 cqe->op_own = op_own; 125 } 126 127 if (unlikely(ci == wq_sz)) { 128 op_own = !op_own; 129 for (ci = 0; ci < n; ci++) { 130 struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(wq, ci); 131 132 cqe->op_own = op_own; 133 } 134 } 135 } 136 137 static inline void mlx5e_decompress_cqe(struct mlx5e_rq *rq, 138 struct mlx5_cqwq *wq, 139 u32 cqcc) 140 { 141 struct mlx5e_cq_decomp *cqd = &rq->cqd; 142 struct mlx5_mini_cqe8 *mini_cqe = &cqd->mini_arr[cqd->mini_arr_idx]; 143 struct mlx5_cqe64 *title = &cqd->title; 144 145 title->byte_cnt = mini_cqe->byte_cnt; 146 title->check_sum = mini_cqe->checksum; 147 title->op_own &= 0xf0; 148 title->op_own |= 0x01 & (cqcc >> wq->fbc.log_sz); 149 150 /* state bit set implies linked-list striding RQ wq type and 151 * HW stride index capability supported 152 */ 153 if (test_bit(MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, &rq->state)) { 154 title->wqe_counter = mini_cqe->stridx; 155 return; 156 } 157 158 /* HW stride index capability not supported */ 159 title->wqe_counter = cpu_to_be16(cqd->wqe_counter); 160 if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) 161 cqd->wqe_counter += mpwrq_get_cqe_consumed_strides(title); 162 else 163 cqd->wqe_counter = 164 mlx5_wq_cyc_ctr2ix(&rq->wqe.wq, cqd->wqe_counter + 1); 165 } 166 167 static inline void mlx5e_decompress_cqe_no_hash(struct mlx5e_rq *rq, 168 struct mlx5_cqwq *wq, 169 u32 cqcc) 170 { 171 struct mlx5e_cq_decomp *cqd = &rq->cqd; 172 173 mlx5e_decompress_cqe(rq, wq, cqcc); 174 cqd->title.rss_hash_type = 0; 175 cqd->title.rss_hash_result = 0; 176 } 177 178 static inline u32 mlx5e_decompress_cqes_cont(struct mlx5e_rq *rq, 179 struct mlx5_cqwq *wq, 180 int update_owner_only, 181 int budget_rem) 182 { 183 struct mlx5e_cq_decomp *cqd = &rq->cqd; 184 u32 cqcc = wq->cc + update_owner_only; 185 u32 cqe_count; 186 u32 i; 187 188 cqe_count = min_t(u32, cqd->left, budget_rem); 189 190 for (i = update_owner_only; i < cqe_count; 191 i++, cqd->mini_arr_idx++, cqcc++) { 192 if (cqd->mini_arr_idx == MLX5_MINI_CQE_ARRAY_SIZE) 193 mlx5e_read_mini_arr_slot(wq, cqd, cqcc); 194 195 mlx5e_decompress_cqe_no_hash(rq, wq, cqcc); 196 INDIRECT_CALL_3(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq, 197 mlx5e_handle_rx_cqe_mpwrq_shampo, mlx5e_handle_rx_cqe, 198 rq, &cqd->title); 199 } 200 mlx5e_cqes_update_owner(wq, cqcc - wq->cc); 201 wq->cc = cqcc; 202 cqd->left -= cqe_count; 203 rq->stats->cqe_compress_pkts += cqe_count; 204 205 return cqe_count; 206 } 207 208 static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq, 209 struct mlx5_cqwq *wq, 210 int budget_rem) 211 { 212 struct mlx5e_cq_decomp *cqd = &rq->cqd; 213 u32 cc = wq->cc; 214 215 mlx5e_read_title_slot(rq, wq, cc); 216 mlx5e_read_mini_arr_slot(wq, cqd, cc + 1); 217 mlx5e_decompress_cqe(rq, wq, cc); 218 INDIRECT_CALL_3(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq, 219 mlx5e_handle_rx_cqe_mpwrq_shampo, mlx5e_handle_rx_cqe, 220 rq, &cqd->title); 221 cqd->mini_arr_idx++; 222 223 return mlx5e_decompress_cqes_cont(rq, wq, 1, budget_rem) - 1; 224 } 225 226 static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq, struct page *page) 227 { 228 struct mlx5e_page_cache *cache = &rq->page_cache; 229 u32 tail_next = (cache->tail + 1) & (MLX5E_CACHE_SIZE - 1); 230 struct mlx5e_rq_stats *stats = rq->stats; 231 232 if (tail_next == cache->head) { 233 stats->cache_full++; 234 return false; 235 } 236 237 if (!dev_page_is_reusable(page)) { 238 stats->cache_waive++; 239 return false; 240 } 241 242 cache->page_cache[cache->tail] = page; 243 cache->tail = tail_next; 244 return true; 245 } 246 247 static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq, union mlx5e_alloc_unit *au) 248 { 249 struct mlx5e_page_cache *cache = &rq->page_cache; 250 struct mlx5e_rq_stats *stats = rq->stats; 251 dma_addr_t addr; 252 253 if (unlikely(cache->head == cache->tail)) { 254 stats->cache_empty++; 255 return false; 256 } 257 258 if (page_ref_count(cache->page_cache[cache->head]) != 1) { 259 stats->cache_busy++; 260 return false; 261 } 262 263 au->page = cache->page_cache[cache->head]; 264 cache->head = (cache->head + 1) & (MLX5E_CACHE_SIZE - 1); 265 stats->cache_reuse++; 266 267 addr = page_pool_get_dma_addr(au->page); 268 /* Non-XSK always uses PAGE_SIZE. */ 269 dma_sync_single_for_device(rq->pdev, addr, PAGE_SIZE, rq->buff.map_dir); 270 return true; 271 } 272 273 static inline int mlx5e_page_alloc_pool(struct mlx5e_rq *rq, union mlx5e_alloc_unit *au) 274 { 275 dma_addr_t addr; 276 277 if (mlx5e_rx_cache_get(rq, au)) 278 return 0; 279 280 au->page = page_pool_dev_alloc_pages(rq->page_pool); 281 if (unlikely(!au->page)) 282 return -ENOMEM; 283 284 /* Non-XSK always uses PAGE_SIZE. */ 285 addr = dma_map_page(rq->pdev, au->page, 0, PAGE_SIZE, rq->buff.map_dir); 286 if (unlikely(dma_mapping_error(rq->pdev, addr))) { 287 page_pool_recycle_direct(rq->page_pool, au->page); 288 au->page = NULL; 289 return -ENOMEM; 290 } 291 page_pool_set_dma_addr(au->page, addr); 292 293 return 0; 294 } 295 296 void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct page *page) 297 { 298 dma_addr_t dma_addr = page_pool_get_dma_addr(page); 299 300 dma_unmap_page_attrs(rq->pdev, dma_addr, PAGE_SIZE, rq->buff.map_dir, 301 DMA_ATTR_SKIP_CPU_SYNC); 302 page_pool_set_dma_addr(page, 0); 303 } 304 305 void mlx5e_page_release_dynamic(struct mlx5e_rq *rq, struct page *page, bool recycle) 306 { 307 if (likely(recycle)) { 308 if (mlx5e_rx_cache_put(rq, page)) 309 return; 310 311 mlx5e_page_dma_unmap(rq, page); 312 page_pool_recycle_direct(rq->page_pool, page); 313 } else { 314 mlx5e_page_dma_unmap(rq, page); 315 page_pool_release_page(rq->page_pool, page); 316 put_page(page); 317 } 318 } 319 320 static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq, 321 struct mlx5e_wqe_frag_info *frag) 322 { 323 int err = 0; 324 325 if (!frag->offset) 326 /* On first frag (offset == 0), replenish page (alloc_unit actually). 327 * Other frags that point to the same alloc_unit (with a different 328 * offset) should just use the new one without replenishing again 329 * by themselves. 330 */ 331 err = mlx5e_page_alloc_pool(rq, frag->au); 332 333 return err; 334 } 335 336 static inline void mlx5e_put_rx_frag(struct mlx5e_rq *rq, 337 struct mlx5e_wqe_frag_info *frag, 338 bool recycle) 339 { 340 if (frag->last_in_page) 341 mlx5e_page_release_dynamic(rq, frag->au->page, recycle); 342 } 343 344 static inline struct mlx5e_wqe_frag_info *get_frag(struct mlx5e_rq *rq, u16 ix) 345 { 346 return &rq->wqe.frags[ix << rq->wqe.info.log_num_frags]; 347 } 348 349 static int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe_cyc *wqe, 350 u16 ix) 351 { 352 struct mlx5e_wqe_frag_info *frag = get_frag(rq, ix); 353 int err; 354 int i; 355 356 for (i = 0; i < rq->wqe.info.num_frags; i++, frag++) { 357 dma_addr_t addr; 358 u16 headroom; 359 360 err = mlx5e_get_rx_frag(rq, frag); 361 if (unlikely(err)) 362 goto free_frags; 363 364 headroom = i == 0 ? rq->buff.headroom : 0; 365 addr = page_pool_get_dma_addr(frag->au->page); 366 wqe->data[i].addr = cpu_to_be64(addr + frag->offset + headroom); 367 } 368 369 return 0; 370 371 free_frags: 372 while (--i >= 0) 373 mlx5e_put_rx_frag(rq, --frag, true); 374 375 return err; 376 } 377 378 static inline void mlx5e_free_rx_wqe(struct mlx5e_rq *rq, 379 struct mlx5e_wqe_frag_info *wi, 380 bool recycle) 381 { 382 int i; 383 384 if (rq->xsk_pool) { 385 /* The `recycle` parameter is ignored, and the page is always 386 * put into the Reuse Ring, because there is no way to return 387 * the page to the userspace when the interface goes down. 388 */ 389 xsk_buff_free(wi->au->xsk); 390 return; 391 } 392 393 for (i = 0; i < rq->wqe.info.num_frags; i++, wi++) 394 mlx5e_put_rx_frag(rq, wi, recycle); 395 } 396 397 static void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix) 398 { 399 struct mlx5e_wqe_frag_info *wi = get_frag(rq, ix); 400 401 mlx5e_free_rx_wqe(rq, wi, false); 402 } 403 404 static int mlx5e_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) 405 { 406 struct mlx5_wq_cyc *wq = &rq->wqe.wq; 407 int i; 408 409 for (i = 0; i < wqe_bulk; i++) { 410 int j = mlx5_wq_cyc_ctr2ix(wq, ix + i); 411 struct mlx5e_rx_wqe_cyc *wqe; 412 413 wqe = mlx5_wq_cyc_get_wqe(wq, j); 414 415 if (unlikely(mlx5e_alloc_rx_wqe(rq, wqe, j))) 416 break; 417 } 418 419 return i; 420 } 421 422 static inline void 423 mlx5e_add_skb_frag(struct mlx5e_rq *rq, struct sk_buff *skb, 424 union mlx5e_alloc_unit *au, u32 frag_offset, u32 len, 425 unsigned int truesize) 426 { 427 dma_addr_t addr = page_pool_get_dma_addr(au->page); 428 429 dma_sync_single_for_cpu(rq->pdev, addr + frag_offset, len, 430 rq->buff.map_dir); 431 page_ref_inc(au->page); 432 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, 433 au->page, frag_offset, len, truesize); 434 } 435 436 static inline void 437 mlx5e_copy_skb_header(struct mlx5e_rq *rq, struct sk_buff *skb, 438 struct page *page, dma_addr_t addr, 439 int offset_from, int dma_offset, u32 headlen) 440 { 441 const void *from = page_address(page) + offset_from; 442 /* Aligning len to sizeof(long) optimizes memcpy performance */ 443 unsigned int len = ALIGN(headlen, sizeof(long)); 444 445 dma_sync_single_for_cpu(rq->pdev, addr + dma_offset, len, 446 rq->buff.map_dir); 447 skb_copy_to_linear_data(skb, from, len); 448 } 449 450 static void 451 mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, bool recycle) 452 { 453 union mlx5e_alloc_unit *alloc_units = wi->alloc_units; 454 bool no_xdp_xmit; 455 int i; 456 457 /* A common case for AF_XDP. */ 458 if (bitmap_full(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe)) 459 return; 460 461 no_xdp_xmit = bitmap_empty(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe); 462 463 if (rq->xsk_pool) { 464 /* The `recycle` parameter is ignored, and the page is always 465 * put into the Reuse Ring, because there is no way to return 466 * the page to the userspace when the interface goes down. 467 */ 468 for (i = 0; i < rq->mpwqe.pages_per_wqe; i++) 469 if (no_xdp_xmit || !test_bit(i, wi->xdp_xmit_bitmap)) 470 xsk_buff_free(alloc_units[i].xsk); 471 } else { 472 for (i = 0; i < rq->mpwqe.pages_per_wqe; i++) 473 if (no_xdp_xmit || !test_bit(i, wi->xdp_xmit_bitmap)) 474 mlx5e_page_release_dynamic(rq, alloc_units[i].page, recycle); 475 } 476 } 477 478 static void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq, u8 n) 479 { 480 struct mlx5_wq_ll *wq = &rq->mpwqe.wq; 481 482 do { 483 u16 next_wqe_index = mlx5_wq_ll_get_wqe_next_ix(wq, wq->head); 484 485 mlx5_wq_ll_push(wq, next_wqe_index); 486 } while (--n); 487 488 /* ensure wqes are visible to device before updating doorbell record */ 489 dma_wmb(); 490 491 mlx5_wq_ll_update_db_record(wq); 492 } 493 494 /* This function returns the size of the continuous free space inside a bitmap 495 * that starts from first and no longer than len including circular ones. 496 */ 497 static int bitmap_find_window(unsigned long *bitmap, int len, 498 int bitmap_size, int first) 499 { 500 int next_one, count; 501 502 next_one = find_next_bit(bitmap, bitmap_size, first); 503 if (next_one == bitmap_size) { 504 if (bitmap_size - first >= len) 505 return len; 506 next_one = find_next_bit(bitmap, bitmap_size, 0); 507 count = next_one + bitmap_size - first; 508 } else { 509 count = next_one - first; 510 } 511 512 return min(len, count); 513 } 514 515 static void build_klm_umr(struct mlx5e_icosq *sq, struct mlx5e_umr_wqe *umr_wqe, 516 __be32 key, u16 offset, u16 klm_len, u16 wqe_bbs) 517 { 518 memset(umr_wqe, 0, offsetof(struct mlx5e_umr_wqe, inline_klms)); 519 umr_wqe->ctrl.opmod_idx_opcode = 520 cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | 521 MLX5_OPCODE_UMR); 522 umr_wqe->ctrl.umr_mkey = key; 523 umr_wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) 524 | MLX5E_KLM_UMR_DS_CNT(klm_len)); 525 umr_wqe->uctrl.flags = MLX5_UMR_TRANSLATION_OFFSET_EN | MLX5_UMR_INLINE; 526 umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset); 527 umr_wqe->uctrl.xlt_octowords = cpu_to_be16(klm_len); 528 umr_wqe->uctrl.mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); 529 } 530 531 static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq, 532 struct mlx5e_icosq *sq, 533 u16 klm_entries, u16 index) 534 { 535 struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; 536 u16 entries, pi, header_offset, err, wqe_bbs, new_entries; 537 u32 lkey = rq->mdev->mlx5e_res.hw_objs.mkey; 538 struct page *page = shampo->last_page; 539 u64 addr = shampo->last_addr; 540 struct mlx5e_dma_info *dma_info; 541 struct mlx5e_umr_wqe *umr_wqe; 542 int headroom, i; 543 544 headroom = rq->buff.headroom; 545 new_entries = klm_entries - (shampo->pi & (MLX5_UMR_KLM_ALIGNMENT - 1)); 546 entries = ALIGN(klm_entries, MLX5_UMR_KLM_ALIGNMENT); 547 wqe_bbs = MLX5E_KLM_UMR_WQEBBS(entries); 548 pi = mlx5e_icosq_get_next_pi(sq, wqe_bbs); 549 umr_wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); 550 build_klm_umr(sq, umr_wqe, shampo->key, index, entries, wqe_bbs); 551 552 for (i = 0; i < entries; i++, index++) { 553 dma_info = &shampo->info[index]; 554 if (i >= klm_entries || (index < shampo->pi && shampo->pi - index < 555 MLX5_UMR_KLM_ALIGNMENT)) 556 goto update_klm; 557 header_offset = (index & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) << 558 MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE; 559 if (!(header_offset & (PAGE_SIZE - 1))) { 560 union mlx5e_alloc_unit au; 561 562 err = mlx5e_page_alloc_pool(rq, &au); 563 if (unlikely(err)) 564 goto err_unmap; 565 page = dma_info->page = au.page; 566 addr = dma_info->addr = page_pool_get_dma_addr(au.page); 567 } else { 568 dma_info->addr = addr + header_offset; 569 dma_info->page = page; 570 } 571 572 update_klm: 573 umr_wqe->inline_klms[i].bcount = 574 cpu_to_be32(MLX5E_RX_MAX_HEAD); 575 umr_wqe->inline_klms[i].key = cpu_to_be32(lkey); 576 umr_wqe->inline_klms[i].va = 577 cpu_to_be64(dma_info->addr + headroom); 578 } 579 580 sq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) { 581 .wqe_type = MLX5E_ICOSQ_WQE_SHAMPO_HD_UMR, 582 .num_wqebbs = wqe_bbs, 583 .shampo.len = new_entries, 584 }; 585 586 shampo->pi = (shampo->pi + new_entries) & (shampo->hd_per_wq - 1); 587 shampo->last_page = page; 588 shampo->last_addr = addr; 589 sq->pc += wqe_bbs; 590 sq->doorbell_cseg = &umr_wqe->ctrl; 591 592 return 0; 593 594 err_unmap: 595 while (--i >= 0) { 596 dma_info = &shampo->info[--index]; 597 if (!(i & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1))) { 598 dma_info->addr = ALIGN_DOWN(dma_info->addr, PAGE_SIZE); 599 mlx5e_page_release_dynamic(rq, dma_info->page, true); 600 } 601 } 602 rq->stats->buff_alloc_err++; 603 return err; 604 } 605 606 static int mlx5e_alloc_rx_hd_mpwqe(struct mlx5e_rq *rq) 607 { 608 struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; 609 u16 klm_entries, num_wqe, index, entries_before; 610 struct mlx5e_icosq *sq = rq->icosq; 611 int i, err, max_klm_entries, len; 612 613 max_klm_entries = MLX5E_MAX_KLM_PER_WQE(rq->mdev); 614 klm_entries = bitmap_find_window(shampo->bitmap, 615 shampo->hd_per_wqe, 616 shampo->hd_per_wq, shampo->pi); 617 if (!klm_entries) 618 return 0; 619 620 klm_entries += (shampo->pi & (MLX5_UMR_KLM_ALIGNMENT - 1)); 621 index = ALIGN_DOWN(shampo->pi, MLX5_UMR_KLM_ALIGNMENT); 622 entries_before = shampo->hd_per_wq - index; 623 624 if (unlikely(entries_before < klm_entries)) 625 num_wqe = DIV_ROUND_UP(entries_before, max_klm_entries) + 626 DIV_ROUND_UP(klm_entries - entries_before, max_klm_entries); 627 else 628 num_wqe = DIV_ROUND_UP(klm_entries, max_klm_entries); 629 630 for (i = 0; i < num_wqe; i++) { 631 len = (klm_entries > max_klm_entries) ? max_klm_entries : 632 klm_entries; 633 if (unlikely(index + len > shampo->hd_per_wq)) 634 len = shampo->hd_per_wq - index; 635 err = mlx5e_build_shampo_hd_umr(rq, sq, len, index); 636 if (unlikely(err)) 637 return err; 638 index = (index + len) & (rq->mpwqe.shampo->hd_per_wq - 1); 639 klm_entries -= len; 640 } 641 642 return 0; 643 } 644 645 static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) 646 { 647 struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, ix); 648 union mlx5e_alloc_unit *au = &wi->alloc_units[0]; 649 struct mlx5e_icosq *sq = rq->icosq; 650 struct mlx5_wq_cyc *wq = &sq->wq; 651 struct mlx5e_umr_wqe *umr_wqe; 652 u32 offset; /* 17-bit value with MTT. */ 653 u16 pi; 654 int err; 655 int i; 656 657 if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) { 658 err = mlx5e_alloc_rx_hd_mpwqe(rq); 659 if (unlikely(err)) 660 goto err; 661 } 662 663 pi = mlx5e_icosq_get_next_pi(sq, rq->mpwqe.umr_wqebbs); 664 umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi); 665 memcpy(umr_wqe, &rq->mpwqe.umr_wqe, sizeof(struct mlx5e_umr_wqe)); 666 667 for (i = 0; i < rq->mpwqe.pages_per_wqe; i++, au++) { 668 dma_addr_t addr; 669 670 err = mlx5e_page_alloc_pool(rq, au); 671 if (unlikely(err)) 672 goto err_unmap; 673 addr = page_pool_get_dma_addr(au->page); 674 umr_wqe->inline_mtts[i] = (struct mlx5_mtt) { 675 .ptag = cpu_to_be64(addr | MLX5_EN_WR), 676 }; 677 } 678 679 bitmap_zero(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe); 680 wi->consumed_strides = 0; 681 682 umr_wqe->ctrl.opmod_idx_opcode = 683 cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | 684 MLX5_OPCODE_UMR); 685 686 offset = (ix * rq->mpwqe.mtts_per_wqe) * sizeof(struct mlx5_mtt) / MLX5_OCTWORD; 687 umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset); 688 689 sq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) { 690 .wqe_type = MLX5E_ICOSQ_WQE_UMR_RX, 691 .num_wqebbs = rq->mpwqe.umr_wqebbs, 692 .umr.rq = rq, 693 }; 694 695 sq->pc += rq->mpwqe.umr_wqebbs; 696 697 sq->doorbell_cseg = &umr_wqe->ctrl; 698 699 return 0; 700 701 err_unmap: 702 while (--i >= 0) { 703 au--; 704 mlx5e_page_release_dynamic(rq, au->page, true); 705 } 706 707 err: 708 rq->stats->buff_alloc_err++; 709 710 return err; 711 } 712 713 /* This function is responsible to dealloc SHAMPO header buffer. 714 * close == true specifies that we are in the middle of closing RQ operation so 715 * we go over all the entries and if they are not in use we free them, 716 * otherwise we only go over a specific range inside the header buffer that are 717 * not in use. 718 */ 719 void mlx5e_shampo_dealloc_hd(struct mlx5e_rq *rq, u16 len, u16 start, bool close) 720 { 721 struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; 722 int hd_per_wq = shampo->hd_per_wq; 723 struct page *deleted_page = NULL; 724 struct mlx5e_dma_info *hd_info; 725 int i, index = start; 726 727 for (i = 0; i < len; i++, index++) { 728 if (index == hd_per_wq) 729 index = 0; 730 731 if (close && !test_bit(index, shampo->bitmap)) 732 continue; 733 734 hd_info = &shampo->info[index]; 735 hd_info->addr = ALIGN_DOWN(hd_info->addr, PAGE_SIZE); 736 if (hd_info->page != deleted_page) { 737 deleted_page = hd_info->page; 738 mlx5e_page_release_dynamic(rq, hd_info->page, false); 739 } 740 } 741 742 if (start + len > hd_per_wq) { 743 len -= hd_per_wq - start; 744 bitmap_clear(shampo->bitmap, start, hd_per_wq - start); 745 start = 0; 746 } 747 748 bitmap_clear(shampo->bitmap, start, len); 749 } 750 751 static void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) 752 { 753 struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, ix); 754 /* Don't recycle, this function is called on rq/netdev close */ 755 mlx5e_free_rx_mpwqe(rq, wi, false); 756 } 757 758 INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) 759 { 760 struct mlx5_wq_cyc *wq = &rq->wqe.wq; 761 int wqe_bulk, count; 762 bool busy = false; 763 u16 head; 764 765 if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state))) 766 return false; 767 768 if (mlx5_wq_cyc_missing(wq) < rq->wqe.info.wqe_bulk) 769 return false; 770 771 if (rq->page_pool) 772 page_pool_nid_changed(rq->page_pool, numa_mem_id()); 773 774 wqe_bulk = mlx5_wq_cyc_missing(wq); 775 head = mlx5_wq_cyc_get_head(wq); 776 777 /* Don't allow any newly allocated WQEs to share the same page with old 778 * WQEs that aren't completed yet. Stop earlier. 779 */ 780 wqe_bulk -= (head + wqe_bulk) & rq->wqe.info.wqe_index_mask; 781 782 if (!rq->xsk_pool) 783 count = mlx5e_alloc_rx_wqes(rq, head, wqe_bulk); 784 else if (likely(!rq->xsk_pool->dma_need_sync)) 785 count = mlx5e_xsk_alloc_rx_wqes_batched(rq, head, wqe_bulk); 786 else 787 /* If dma_need_sync is true, it's more efficient to call 788 * xsk_buff_alloc in a loop, rather than xsk_buff_alloc_batch, 789 * because the latter does the same check and returns only one 790 * frame. 791 */ 792 count = mlx5e_xsk_alloc_rx_wqes(rq, head, wqe_bulk); 793 794 mlx5_wq_cyc_push_n(wq, count); 795 if (unlikely(count != wqe_bulk)) { 796 rq->stats->buff_alloc_err++; 797 busy = true; 798 } 799 800 /* ensure wqes are visible to device before updating doorbell record */ 801 dma_wmb(); 802 803 mlx5_wq_cyc_update_db_record(wq); 804 805 return busy; 806 } 807 808 void mlx5e_free_icosq_descs(struct mlx5e_icosq *sq) 809 { 810 u16 sqcc; 811 812 sqcc = sq->cc; 813 814 while (sqcc != sq->pc) { 815 struct mlx5e_icosq_wqe_info *wi; 816 u16 ci; 817 818 ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc); 819 wi = &sq->db.wqe_info[ci]; 820 sqcc += wi->num_wqebbs; 821 #ifdef CONFIG_MLX5_EN_TLS 822 switch (wi->wqe_type) { 823 case MLX5E_ICOSQ_WQE_SET_PSV_TLS: 824 mlx5e_ktls_handle_ctx_completion(wi); 825 break; 826 case MLX5E_ICOSQ_WQE_GET_PSV_TLS: 827 mlx5e_ktls_handle_get_psv_completion(wi, sq); 828 break; 829 } 830 #endif 831 } 832 sq->cc = sqcc; 833 } 834 835 static void mlx5e_handle_shampo_hd_umr(struct mlx5e_shampo_umr umr, 836 struct mlx5e_icosq *sq) 837 { 838 struct mlx5e_channel *c = container_of(sq, struct mlx5e_channel, icosq); 839 struct mlx5e_shampo_hd *shampo; 840 /* assume 1:1 relationship between RQ and icosq */ 841 struct mlx5e_rq *rq = &c->rq; 842 int end, from, len = umr.len; 843 844 shampo = rq->mpwqe.shampo; 845 end = shampo->hd_per_wq; 846 from = shampo->ci; 847 if (from + len > shampo->hd_per_wq) { 848 len -= end - from; 849 bitmap_set(shampo->bitmap, from, end - from); 850 from = 0; 851 } 852 853 bitmap_set(shampo->bitmap, from, len); 854 shampo->ci = (shampo->ci + umr.len) & (shampo->hd_per_wq - 1); 855 } 856 857 int mlx5e_poll_ico_cq(struct mlx5e_cq *cq) 858 { 859 struct mlx5e_icosq *sq = container_of(cq, struct mlx5e_icosq, cq); 860 struct mlx5_cqe64 *cqe; 861 u16 sqcc; 862 int i; 863 864 if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) 865 return 0; 866 867 cqe = mlx5_cqwq_get_cqe(&cq->wq); 868 if (likely(!cqe)) 869 return 0; 870 871 /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), 872 * otherwise a cq overrun may occur 873 */ 874 sqcc = sq->cc; 875 876 i = 0; 877 do { 878 u16 wqe_counter; 879 bool last_wqe; 880 881 mlx5_cqwq_pop(&cq->wq); 882 883 wqe_counter = be16_to_cpu(cqe->wqe_counter); 884 885 do { 886 struct mlx5e_icosq_wqe_info *wi; 887 u16 ci; 888 889 last_wqe = (sqcc == wqe_counter); 890 891 ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc); 892 wi = &sq->db.wqe_info[ci]; 893 sqcc += wi->num_wqebbs; 894 895 if (last_wqe && unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) { 896 netdev_WARN_ONCE(cq->netdev, 897 "Bad OP in ICOSQ CQE: 0x%x\n", 898 get_cqe_opcode(cqe)); 899 mlx5e_dump_error_cqe(&sq->cq, sq->sqn, 900 (struct mlx5_err_cqe *)cqe); 901 mlx5_wq_cyc_wqe_dump(&sq->wq, ci, wi->num_wqebbs); 902 if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) 903 queue_work(cq->priv->wq, &sq->recover_work); 904 break; 905 } 906 907 switch (wi->wqe_type) { 908 case MLX5E_ICOSQ_WQE_UMR_RX: 909 wi->umr.rq->mpwqe.umr_completed++; 910 break; 911 case MLX5E_ICOSQ_WQE_NOP: 912 break; 913 case MLX5E_ICOSQ_WQE_SHAMPO_HD_UMR: 914 mlx5e_handle_shampo_hd_umr(wi->shampo, sq); 915 break; 916 #ifdef CONFIG_MLX5_EN_TLS 917 case MLX5E_ICOSQ_WQE_UMR_TLS: 918 break; 919 case MLX5E_ICOSQ_WQE_SET_PSV_TLS: 920 mlx5e_ktls_handle_ctx_completion(wi); 921 break; 922 case MLX5E_ICOSQ_WQE_GET_PSV_TLS: 923 mlx5e_ktls_handle_get_psv_completion(wi, sq); 924 break; 925 #endif 926 default: 927 netdev_WARN_ONCE(cq->netdev, 928 "Bad WQE type in ICOSQ WQE info: 0x%x\n", 929 wi->wqe_type); 930 } 931 } while (!last_wqe); 932 } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq))); 933 934 sq->cc = sqcc; 935 936 mlx5_cqwq_update_db_record(&cq->wq); 937 938 return i; 939 } 940 941 INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq) 942 { 943 struct mlx5_wq_ll *wq = &rq->mpwqe.wq; 944 u8 umr_completed = rq->mpwqe.umr_completed; 945 struct mlx5e_icosq *sq = rq->icosq; 946 int alloc_err = 0; 947 u8 missing, i; 948 u16 head; 949 950 if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state))) 951 return false; 952 953 if (umr_completed) { 954 mlx5e_post_rx_mpwqe(rq, umr_completed); 955 rq->mpwqe.umr_in_progress -= umr_completed; 956 rq->mpwqe.umr_completed = 0; 957 } 958 959 missing = mlx5_wq_ll_missing(wq) - rq->mpwqe.umr_in_progress; 960 961 if (unlikely(rq->mpwqe.umr_in_progress > rq->mpwqe.umr_last_bulk)) 962 rq->stats->congst_umr++; 963 964 if (likely(missing < rq->mpwqe.min_wqe_bulk)) 965 return false; 966 967 if (rq->page_pool) 968 page_pool_nid_changed(rq->page_pool, numa_mem_id()); 969 970 head = rq->mpwqe.actual_wq_head; 971 i = missing; 972 do { 973 alloc_err = rq->xsk_pool ? mlx5e_xsk_alloc_rx_mpwqe(rq, head) : 974 mlx5e_alloc_rx_mpwqe(rq, head); 975 976 if (unlikely(alloc_err)) 977 break; 978 head = mlx5_wq_ll_get_wqe_next_ix(wq, head); 979 } while (--i); 980 981 rq->mpwqe.umr_last_bulk = missing - i; 982 if (sq->doorbell_cseg) { 983 mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, sq->doorbell_cseg); 984 sq->doorbell_cseg = NULL; 985 } 986 987 rq->mpwqe.umr_in_progress += rq->mpwqe.umr_last_bulk; 988 rq->mpwqe.actual_wq_head = head; 989 990 /* If XSK Fill Ring doesn't have enough frames, report the error, so 991 * that one of the actions can be performed: 992 * 1. If need_wakeup is used, signal that the application has to kick 993 * the driver when it refills the Fill Ring. 994 * 2. Otherwise, busy poll by rescheduling the NAPI poll. 995 */ 996 if (unlikely(alloc_err == -ENOMEM && rq->xsk_pool)) 997 return true; 998 999 return false; 1000 } 1001 1002 static void mlx5e_lro_update_tcp_hdr(struct mlx5_cqe64 *cqe, struct tcphdr *tcp) 1003 { 1004 u8 l4_hdr_type = get_cqe_l4_hdr_type(cqe); 1005 u8 tcp_ack = (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA) || 1006 (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA); 1007 1008 tcp->check = 0; 1009 tcp->psh = get_cqe_lro_tcppsh(cqe); 1010 1011 if (tcp_ack) { 1012 tcp->ack = 1; 1013 tcp->ack_seq = cqe->lro.ack_seq_num; 1014 tcp->window = cqe->lro.tcp_win; 1015 } 1016 } 1017 1018 static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe, 1019 u32 cqe_bcnt) 1020 { 1021 struct ethhdr *eth = (struct ethhdr *)(skb->data); 1022 struct tcphdr *tcp; 1023 int network_depth = 0; 1024 __wsum check; 1025 __be16 proto; 1026 u16 tot_len; 1027 void *ip_p; 1028 1029 proto = __vlan_get_protocol(skb, eth->h_proto, &network_depth); 1030 1031 tot_len = cqe_bcnt - network_depth; 1032 ip_p = skb->data + network_depth; 1033 1034 if (proto == htons(ETH_P_IP)) { 1035 struct iphdr *ipv4 = ip_p; 1036 1037 tcp = ip_p + sizeof(struct iphdr); 1038 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; 1039 1040 ipv4->ttl = cqe->lro.min_ttl; 1041 ipv4->tot_len = cpu_to_be16(tot_len); 1042 ipv4->check = 0; 1043 ipv4->check = ip_fast_csum((unsigned char *)ipv4, 1044 ipv4->ihl); 1045 1046 mlx5e_lro_update_tcp_hdr(cqe, tcp); 1047 check = csum_partial(tcp, tcp->doff * 4, 1048 csum_unfold((__force __sum16)cqe->check_sum)); 1049 /* Almost done, don't forget the pseudo header */ 1050 tcp->check = csum_tcpudp_magic(ipv4->saddr, ipv4->daddr, 1051 tot_len - sizeof(struct iphdr), 1052 IPPROTO_TCP, check); 1053 } else { 1054 u16 payload_len = tot_len - sizeof(struct ipv6hdr); 1055 struct ipv6hdr *ipv6 = ip_p; 1056 1057 tcp = ip_p + sizeof(struct ipv6hdr); 1058 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; 1059 1060 ipv6->hop_limit = cqe->lro.min_ttl; 1061 ipv6->payload_len = cpu_to_be16(payload_len); 1062 1063 mlx5e_lro_update_tcp_hdr(cqe, tcp); 1064 check = csum_partial(tcp, tcp->doff * 4, 1065 csum_unfold((__force __sum16)cqe->check_sum)); 1066 /* Almost done, don't forget the pseudo header */ 1067 tcp->check = csum_ipv6_magic(&ipv6->saddr, &ipv6->daddr, payload_len, 1068 IPPROTO_TCP, check); 1069 } 1070 } 1071 1072 static void *mlx5e_shampo_get_packet_hd(struct mlx5e_rq *rq, u16 header_index) 1073 { 1074 struct mlx5e_dma_info *last_head = &rq->mpwqe.shampo->info[header_index]; 1075 u16 head_offset = (last_head->addr & (PAGE_SIZE - 1)) + rq->buff.headroom; 1076 1077 return page_address(last_head->page) + head_offset; 1078 } 1079 1080 static void mlx5e_shampo_update_ipv4_udp_hdr(struct mlx5e_rq *rq, struct iphdr *ipv4) 1081 { 1082 int udp_off = rq->hw_gro_data->fk.control.thoff; 1083 struct sk_buff *skb = rq->hw_gro_data->skb; 1084 struct udphdr *uh; 1085 1086 uh = (struct udphdr *)(skb->data + udp_off); 1087 uh->len = htons(skb->len - udp_off); 1088 1089 if (uh->check) 1090 uh->check = ~udp_v4_check(skb->len - udp_off, ipv4->saddr, 1091 ipv4->daddr, 0); 1092 1093 skb->csum_start = (unsigned char *)uh - skb->head; 1094 skb->csum_offset = offsetof(struct udphdr, check); 1095 1096 skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_L4; 1097 } 1098 1099 static void mlx5e_shampo_update_ipv6_udp_hdr(struct mlx5e_rq *rq, struct ipv6hdr *ipv6) 1100 { 1101 int udp_off = rq->hw_gro_data->fk.control.thoff; 1102 struct sk_buff *skb = rq->hw_gro_data->skb; 1103 struct udphdr *uh; 1104 1105 uh = (struct udphdr *)(skb->data + udp_off); 1106 uh->len = htons(skb->len - udp_off); 1107 1108 if (uh->check) 1109 uh->check = ~udp_v6_check(skb->len - udp_off, &ipv6->saddr, 1110 &ipv6->daddr, 0); 1111 1112 skb->csum_start = (unsigned char *)uh - skb->head; 1113 skb->csum_offset = offsetof(struct udphdr, check); 1114 1115 skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_L4; 1116 } 1117 1118 static void mlx5e_shampo_update_fin_psh_flags(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, 1119 struct tcphdr *skb_tcp_hd) 1120 { 1121 u16 header_index = mlx5e_shampo_get_cqe_header_index(rq, cqe); 1122 struct tcphdr *last_tcp_hd; 1123 void *last_hd_addr; 1124 1125 last_hd_addr = mlx5e_shampo_get_packet_hd(rq, header_index); 1126 last_tcp_hd = last_hd_addr + ETH_HLEN + rq->hw_gro_data->fk.control.thoff; 1127 tcp_flag_word(skb_tcp_hd) |= tcp_flag_word(last_tcp_hd) & (TCP_FLAG_FIN | TCP_FLAG_PSH); 1128 } 1129 1130 static void mlx5e_shampo_update_ipv4_tcp_hdr(struct mlx5e_rq *rq, struct iphdr *ipv4, 1131 struct mlx5_cqe64 *cqe, bool match) 1132 { 1133 int tcp_off = rq->hw_gro_data->fk.control.thoff; 1134 struct sk_buff *skb = rq->hw_gro_data->skb; 1135 struct tcphdr *tcp; 1136 1137 tcp = (struct tcphdr *)(skb->data + tcp_off); 1138 if (match) 1139 mlx5e_shampo_update_fin_psh_flags(rq, cqe, tcp); 1140 1141 tcp->check = ~tcp_v4_check(skb->len - tcp_off, ipv4->saddr, 1142 ipv4->daddr, 0); 1143 skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4; 1144 if (ntohs(ipv4->id) == rq->hw_gro_data->second_ip_id) 1145 skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_FIXEDID; 1146 1147 skb->csum_start = (unsigned char *)tcp - skb->head; 1148 skb->csum_offset = offsetof(struct tcphdr, check); 1149 1150 if (tcp->cwr) 1151 skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; 1152 } 1153 1154 static void mlx5e_shampo_update_ipv6_tcp_hdr(struct mlx5e_rq *rq, struct ipv6hdr *ipv6, 1155 struct mlx5_cqe64 *cqe, bool match) 1156 { 1157 int tcp_off = rq->hw_gro_data->fk.control.thoff; 1158 struct sk_buff *skb = rq->hw_gro_data->skb; 1159 struct tcphdr *tcp; 1160 1161 tcp = (struct tcphdr *)(skb->data + tcp_off); 1162 if (match) 1163 mlx5e_shampo_update_fin_psh_flags(rq, cqe, tcp); 1164 1165 tcp->check = ~tcp_v6_check(skb->len - tcp_off, &ipv6->saddr, 1166 &ipv6->daddr, 0); 1167 skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6; 1168 skb->csum_start = (unsigned char *)tcp - skb->head; 1169 skb->csum_offset = offsetof(struct tcphdr, check); 1170 1171 if (tcp->cwr) 1172 skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; 1173 } 1174 1175 static void mlx5e_shampo_update_hdr(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, bool match) 1176 { 1177 bool is_ipv4 = (rq->hw_gro_data->fk.basic.n_proto == htons(ETH_P_IP)); 1178 struct sk_buff *skb = rq->hw_gro_data->skb; 1179 1180 skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; 1181 skb->ip_summed = CHECKSUM_PARTIAL; 1182 1183 if (is_ipv4) { 1184 int nhoff = rq->hw_gro_data->fk.control.thoff - sizeof(struct iphdr); 1185 struct iphdr *ipv4 = (struct iphdr *)(skb->data + nhoff); 1186 __be16 newlen = htons(skb->len - nhoff); 1187 1188 csum_replace2(&ipv4->check, ipv4->tot_len, newlen); 1189 ipv4->tot_len = newlen; 1190 1191 if (ipv4->protocol == IPPROTO_TCP) 1192 mlx5e_shampo_update_ipv4_tcp_hdr(rq, ipv4, cqe, match); 1193 else 1194 mlx5e_shampo_update_ipv4_udp_hdr(rq, ipv4); 1195 } else { 1196 int nhoff = rq->hw_gro_data->fk.control.thoff - sizeof(struct ipv6hdr); 1197 struct ipv6hdr *ipv6 = (struct ipv6hdr *)(skb->data + nhoff); 1198 1199 ipv6->payload_len = htons(skb->len - nhoff - sizeof(*ipv6)); 1200 1201 if (ipv6->nexthdr == IPPROTO_TCP) 1202 mlx5e_shampo_update_ipv6_tcp_hdr(rq, ipv6, cqe, match); 1203 else 1204 mlx5e_shampo_update_ipv6_udp_hdr(rq, ipv6); 1205 } 1206 } 1207 1208 static inline void mlx5e_skb_set_hash(struct mlx5_cqe64 *cqe, 1209 struct sk_buff *skb) 1210 { 1211 u8 cht = cqe->rss_hash_type; 1212 int ht = (cht & CQE_RSS_HTYPE_L4) ? PKT_HASH_TYPE_L4 : 1213 (cht & CQE_RSS_HTYPE_IP) ? PKT_HASH_TYPE_L3 : 1214 PKT_HASH_TYPE_NONE; 1215 skb_set_hash(skb, be32_to_cpu(cqe->rss_hash_result), ht); 1216 } 1217 1218 static inline bool is_last_ethertype_ip(struct sk_buff *skb, int *network_depth, 1219 __be16 *proto) 1220 { 1221 *proto = ((struct ethhdr *)skb->data)->h_proto; 1222 *proto = __vlan_get_protocol(skb, *proto, network_depth); 1223 1224 if (*proto == htons(ETH_P_IP)) 1225 return pskb_may_pull(skb, *network_depth + sizeof(struct iphdr)); 1226 1227 if (*proto == htons(ETH_P_IPV6)) 1228 return pskb_may_pull(skb, *network_depth + sizeof(struct ipv6hdr)); 1229 1230 return false; 1231 } 1232 1233 static inline void mlx5e_enable_ecn(struct mlx5e_rq *rq, struct sk_buff *skb) 1234 { 1235 int network_depth = 0; 1236 __be16 proto; 1237 void *ip; 1238 int rc; 1239 1240 if (unlikely(!is_last_ethertype_ip(skb, &network_depth, &proto))) 1241 return; 1242 1243 ip = skb->data + network_depth; 1244 rc = ((proto == htons(ETH_P_IP)) ? IP_ECN_set_ce((struct iphdr *)ip) : 1245 IP6_ECN_set_ce(skb, (struct ipv6hdr *)ip)); 1246 1247 rq->stats->ecn_mark += !!rc; 1248 } 1249 1250 static u8 get_ip_proto(struct sk_buff *skb, int network_depth, __be16 proto) 1251 { 1252 void *ip_p = skb->data + network_depth; 1253 1254 return (proto == htons(ETH_P_IP)) ? ((struct iphdr *)ip_p)->protocol : 1255 ((struct ipv6hdr *)ip_p)->nexthdr; 1256 } 1257 1258 #define short_frame(size) ((size) <= ETH_ZLEN + ETH_FCS_LEN) 1259 1260 #define MAX_PADDING 8 1261 1262 static void 1263 tail_padding_csum_slow(struct sk_buff *skb, int offset, int len, 1264 struct mlx5e_rq_stats *stats) 1265 { 1266 stats->csum_complete_tail_slow++; 1267 skb->csum = csum_block_add(skb->csum, 1268 skb_checksum(skb, offset, len, 0), 1269 offset); 1270 } 1271 1272 static void 1273 tail_padding_csum(struct sk_buff *skb, int offset, 1274 struct mlx5e_rq_stats *stats) 1275 { 1276 u8 tail_padding[MAX_PADDING]; 1277 int len = skb->len - offset; 1278 void *tail; 1279 1280 if (unlikely(len > MAX_PADDING)) { 1281 tail_padding_csum_slow(skb, offset, len, stats); 1282 return; 1283 } 1284 1285 tail = skb_header_pointer(skb, offset, len, tail_padding); 1286 if (unlikely(!tail)) { 1287 tail_padding_csum_slow(skb, offset, len, stats); 1288 return; 1289 } 1290 1291 stats->csum_complete_tail++; 1292 skb->csum = csum_block_add(skb->csum, csum_partial(tail, len, 0), offset); 1293 } 1294 1295 static void 1296 mlx5e_skb_csum_fixup(struct sk_buff *skb, int network_depth, __be16 proto, 1297 struct mlx5e_rq_stats *stats) 1298 { 1299 struct ipv6hdr *ip6; 1300 struct iphdr *ip4; 1301 int pkt_len; 1302 1303 /* Fixup vlan headers, if any */ 1304 if (network_depth > ETH_HLEN) 1305 /* CQE csum is calculated from the IP header and does 1306 * not cover VLAN headers (if present). This will add 1307 * the checksum manually. 1308 */ 1309 skb->csum = csum_partial(skb->data + ETH_HLEN, 1310 network_depth - ETH_HLEN, 1311 skb->csum); 1312 1313 /* Fixup tail padding, if any */ 1314 switch (proto) { 1315 case htons(ETH_P_IP): 1316 ip4 = (struct iphdr *)(skb->data + network_depth); 1317 pkt_len = network_depth + ntohs(ip4->tot_len); 1318 break; 1319 case htons(ETH_P_IPV6): 1320 ip6 = (struct ipv6hdr *)(skb->data + network_depth); 1321 pkt_len = network_depth + sizeof(*ip6) + ntohs(ip6->payload_len); 1322 break; 1323 default: 1324 return; 1325 } 1326 1327 if (likely(pkt_len >= skb->len)) 1328 return; 1329 1330 tail_padding_csum(skb, pkt_len, stats); 1331 } 1332 1333 static inline void mlx5e_handle_csum(struct net_device *netdev, 1334 struct mlx5_cqe64 *cqe, 1335 struct mlx5e_rq *rq, 1336 struct sk_buff *skb, 1337 bool lro) 1338 { 1339 struct mlx5e_rq_stats *stats = rq->stats; 1340 int network_depth = 0; 1341 __be16 proto; 1342 1343 if (unlikely(!(netdev->features & NETIF_F_RXCSUM))) 1344 goto csum_none; 1345 1346 if (lro) { 1347 skb->ip_summed = CHECKSUM_UNNECESSARY; 1348 stats->csum_unnecessary++; 1349 return; 1350 } 1351 1352 /* True when explicitly set via priv flag, or XDP prog is loaded */ 1353 if (test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state) || 1354 get_cqe_tls_offload(cqe)) 1355 goto csum_unnecessary; 1356 1357 /* CQE csum doesn't cover padding octets in short ethernet 1358 * frames. And the pad field is appended prior to calculating 1359 * and appending the FCS field. 1360 * 1361 * Detecting these padded frames requires to verify and parse 1362 * IP headers, so we simply force all those small frames to be 1363 * CHECKSUM_UNNECESSARY even if they are not padded. 1364 */ 1365 if (short_frame(skb->len)) 1366 goto csum_unnecessary; 1367 1368 if (likely(is_last_ethertype_ip(skb, &network_depth, &proto))) { 1369 if (unlikely(get_ip_proto(skb, network_depth, proto) == IPPROTO_SCTP)) 1370 goto csum_unnecessary; 1371 1372 stats->csum_complete++; 1373 skb->ip_summed = CHECKSUM_COMPLETE; 1374 skb->csum = csum_unfold((__force __sum16)cqe->check_sum); 1375 1376 if (test_bit(MLX5E_RQ_STATE_CSUM_FULL, &rq->state)) 1377 return; /* CQE csum covers all received bytes */ 1378 1379 /* csum might need some fixups ...*/ 1380 mlx5e_skb_csum_fixup(skb, network_depth, proto, stats); 1381 return; 1382 } 1383 1384 csum_unnecessary: 1385 if (likely((cqe->hds_ip_ext & CQE_L3_OK) && 1386 (cqe->hds_ip_ext & CQE_L4_OK))) { 1387 skb->ip_summed = CHECKSUM_UNNECESSARY; 1388 if (cqe_is_tunneled(cqe)) { 1389 skb->csum_level = 1; 1390 skb->encapsulation = 1; 1391 stats->csum_unnecessary_inner++; 1392 return; 1393 } 1394 stats->csum_unnecessary++; 1395 return; 1396 } 1397 csum_none: 1398 skb->ip_summed = CHECKSUM_NONE; 1399 stats->csum_none++; 1400 } 1401 1402 #define MLX5E_CE_BIT_MASK 0x80 1403 1404 static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, 1405 u32 cqe_bcnt, 1406 struct mlx5e_rq *rq, 1407 struct sk_buff *skb) 1408 { 1409 u8 lro_num_seg = be32_to_cpu(cqe->srqn) >> 24; 1410 struct mlx5e_rq_stats *stats = rq->stats; 1411 struct net_device *netdev = rq->netdev; 1412 1413 skb->mac_len = ETH_HLEN; 1414 1415 if (unlikely(get_cqe_tls_offload(cqe))) 1416 mlx5e_ktls_handle_rx_skb(rq, skb, cqe, &cqe_bcnt); 1417 1418 if (unlikely(mlx5_ipsec_is_rx_flow(cqe))) 1419 mlx5e_ipsec_offload_handle_rx_skb(netdev, skb, cqe); 1420 1421 if (unlikely(mlx5e_macsec_is_rx_flow(cqe))) 1422 mlx5e_macsec_offload_handle_rx_skb(netdev, skb, cqe); 1423 1424 if (lro_num_seg > 1) { 1425 mlx5e_lro_update_hdr(skb, cqe, cqe_bcnt); 1426 skb_shinfo(skb)->gso_size = DIV_ROUND_UP(cqe_bcnt, lro_num_seg); 1427 /* Subtract one since we already counted this as one 1428 * "regular" packet in mlx5e_complete_rx_cqe() 1429 */ 1430 stats->packets += lro_num_seg - 1; 1431 stats->lro_packets++; 1432 stats->lro_bytes += cqe_bcnt; 1433 } 1434 1435 if (unlikely(mlx5e_rx_hw_stamp(rq->tstamp))) 1436 skb_hwtstamps(skb)->hwtstamp = mlx5e_cqe_ts_to_ns(rq->ptp_cyc2time, 1437 rq->clock, get_cqe_ts(cqe)); 1438 skb_record_rx_queue(skb, rq->ix); 1439 1440 if (likely(netdev->features & NETIF_F_RXHASH)) 1441 mlx5e_skb_set_hash(cqe, skb); 1442 1443 if (cqe_has_vlan(cqe)) { 1444 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), 1445 be16_to_cpu(cqe->vlan_info)); 1446 stats->removed_vlan_packets++; 1447 } 1448 1449 skb->mark = be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK; 1450 1451 mlx5e_handle_csum(netdev, cqe, rq, skb, !!lro_num_seg); 1452 /* checking CE bit in cqe - MSB in ml_path field */ 1453 if (unlikely(cqe->ml_path & MLX5E_CE_BIT_MASK)) 1454 mlx5e_enable_ecn(rq, skb); 1455 1456 skb->protocol = eth_type_trans(skb, netdev); 1457 1458 if (unlikely(mlx5e_skb_is_multicast(skb))) 1459 stats->mcast_packets++; 1460 } 1461 1462 static void mlx5e_shampo_complete_rx_cqe(struct mlx5e_rq *rq, 1463 struct mlx5_cqe64 *cqe, 1464 u32 cqe_bcnt, 1465 struct sk_buff *skb) 1466 { 1467 struct mlx5e_rq_stats *stats = rq->stats; 1468 1469 stats->packets++; 1470 stats->gro_packets++; 1471 stats->bytes += cqe_bcnt; 1472 stats->gro_bytes += cqe_bcnt; 1473 if (NAPI_GRO_CB(skb)->count != 1) 1474 return; 1475 mlx5e_build_rx_skb(cqe, cqe_bcnt, rq, skb); 1476 skb_reset_network_header(skb); 1477 if (!skb_flow_dissect_flow_keys(skb, &rq->hw_gro_data->fk, 0)) { 1478 napi_gro_receive(rq->cq.napi, skb); 1479 rq->hw_gro_data->skb = NULL; 1480 } 1481 } 1482 1483 static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq, 1484 struct mlx5_cqe64 *cqe, 1485 u32 cqe_bcnt, 1486 struct sk_buff *skb) 1487 { 1488 struct mlx5e_rq_stats *stats = rq->stats; 1489 1490 stats->packets++; 1491 stats->bytes += cqe_bcnt; 1492 mlx5e_build_rx_skb(cqe, cqe_bcnt, rq, skb); 1493 } 1494 1495 static inline 1496 struct sk_buff *mlx5e_build_linear_skb(struct mlx5e_rq *rq, void *va, 1497 u32 frag_size, u16 headroom, 1498 u32 cqe_bcnt, u32 metasize) 1499 { 1500 struct sk_buff *skb = build_skb(va, frag_size); 1501 1502 if (unlikely(!skb)) { 1503 rq->stats->buff_alloc_err++; 1504 return NULL; 1505 } 1506 1507 skb_reserve(skb, headroom); 1508 skb_put(skb, cqe_bcnt); 1509 1510 if (metasize) 1511 skb_metadata_set(skb, metasize); 1512 1513 return skb; 1514 } 1515 1516 static void mlx5e_fill_xdp_buff(struct mlx5e_rq *rq, void *va, u16 headroom, 1517 u32 len, struct xdp_buff *xdp) 1518 { 1519 xdp_init_buff(xdp, rq->buff.frame0_sz, &rq->xdp_rxq); 1520 xdp_prepare_buff(xdp, va, headroom, len, true); 1521 } 1522 1523 static struct sk_buff * 1524 mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi, 1525 u32 cqe_bcnt) 1526 { 1527 union mlx5e_alloc_unit *au = wi->au; 1528 u16 rx_headroom = rq->buff.headroom; 1529 struct bpf_prog *prog; 1530 struct sk_buff *skb; 1531 u32 metasize = 0; 1532 void *va, *data; 1533 dma_addr_t addr; 1534 u32 frag_size; 1535 1536 va = page_address(au->page) + wi->offset; 1537 data = va + rx_headroom; 1538 frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt); 1539 1540 addr = page_pool_get_dma_addr(au->page); 1541 dma_sync_single_range_for_cpu(rq->pdev, addr, wi->offset, 1542 frag_size, rq->buff.map_dir); 1543 net_prefetch(data); 1544 1545 prog = rcu_dereference(rq->xdp_prog); 1546 if (prog) { 1547 struct xdp_buff xdp; 1548 1549 net_prefetchw(va); /* xdp_frame data area */ 1550 mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt, &xdp); 1551 if (mlx5e_xdp_handle(rq, au->page, prog, &xdp)) 1552 return NULL; /* page/packet was consumed by XDP */ 1553 1554 rx_headroom = xdp.data - xdp.data_hard_start; 1555 metasize = xdp.data - xdp.data_meta; 1556 cqe_bcnt = xdp.data_end - xdp.data; 1557 } 1558 frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt); 1559 skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt, metasize); 1560 if (unlikely(!skb)) 1561 return NULL; 1562 1563 /* queue up for recycling/reuse */ 1564 page_ref_inc(au->page); 1565 1566 return skb; 1567 } 1568 1569 static struct sk_buff * 1570 mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi, 1571 u32 cqe_bcnt) 1572 { 1573 struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0]; 1574 struct mlx5e_wqe_frag_info *head_wi = wi; 1575 union mlx5e_alloc_unit *au = wi->au; 1576 u16 rx_headroom = rq->buff.headroom; 1577 struct skb_shared_info *sinfo; 1578 u32 frag_consumed_bytes; 1579 struct bpf_prog *prog; 1580 struct xdp_buff xdp; 1581 struct sk_buff *skb; 1582 dma_addr_t addr; 1583 u32 truesize; 1584 void *va; 1585 1586 va = page_address(au->page) + wi->offset; 1587 frag_consumed_bytes = min_t(u32, frag_info->frag_size, cqe_bcnt); 1588 1589 addr = page_pool_get_dma_addr(au->page); 1590 dma_sync_single_range_for_cpu(rq->pdev, addr, wi->offset, 1591 rq->buff.frame0_sz, rq->buff.map_dir); 1592 net_prefetchw(va); /* xdp_frame data area */ 1593 net_prefetch(va + rx_headroom); 1594 1595 mlx5e_fill_xdp_buff(rq, va, rx_headroom, frag_consumed_bytes, &xdp); 1596 sinfo = xdp_get_shared_info_from_buff(&xdp); 1597 truesize = 0; 1598 1599 cqe_bcnt -= frag_consumed_bytes; 1600 frag_info++; 1601 wi++; 1602 1603 while (cqe_bcnt) { 1604 skb_frag_t *frag; 1605 1606 au = wi->au; 1607 1608 frag_consumed_bytes = min_t(u32, frag_info->frag_size, cqe_bcnt); 1609 1610 addr = page_pool_get_dma_addr(au->page); 1611 dma_sync_single_for_cpu(rq->pdev, addr + wi->offset, 1612 frag_consumed_bytes, rq->buff.map_dir); 1613 1614 if (!xdp_buff_has_frags(&xdp)) { 1615 /* Init on the first fragment to avoid cold cache access 1616 * when possible. 1617 */ 1618 sinfo->nr_frags = 0; 1619 sinfo->xdp_frags_size = 0; 1620 xdp_buff_set_frags_flag(&xdp); 1621 } 1622 1623 frag = &sinfo->frags[sinfo->nr_frags++]; 1624 __skb_frag_set_page(frag, au->page); 1625 skb_frag_off_set(frag, wi->offset); 1626 skb_frag_size_set(frag, frag_consumed_bytes); 1627 1628 if (page_is_pfmemalloc(au->page)) 1629 xdp_buff_set_frag_pfmemalloc(&xdp); 1630 1631 sinfo->xdp_frags_size += frag_consumed_bytes; 1632 truesize += frag_info->frag_stride; 1633 1634 cqe_bcnt -= frag_consumed_bytes; 1635 frag_info++; 1636 wi++; 1637 } 1638 1639 au = head_wi->au; 1640 1641 prog = rcu_dereference(rq->xdp_prog); 1642 if (prog && mlx5e_xdp_handle(rq, au->page, prog, &xdp)) { 1643 if (test_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { 1644 int i; 1645 1646 for (i = wi - head_wi; i < rq->wqe.info.num_frags; i++) 1647 mlx5e_put_rx_frag(rq, &head_wi[i], true); 1648 } 1649 return NULL; /* page/packet was consumed by XDP */ 1650 } 1651 1652 skb = mlx5e_build_linear_skb(rq, xdp.data_hard_start, rq->buff.frame0_sz, 1653 xdp.data - xdp.data_hard_start, 1654 xdp.data_end - xdp.data, 1655 xdp.data - xdp.data_meta); 1656 if (unlikely(!skb)) 1657 return NULL; 1658 1659 page_ref_inc(au->page); 1660 1661 if (unlikely(xdp_buff_has_frags(&xdp))) { 1662 int i; 1663 1664 /* sinfo->nr_frags is reset by build_skb, calculate again. */ 1665 xdp_update_skb_shared_info(skb, wi - head_wi - 1, 1666 sinfo->xdp_frags_size, truesize, 1667 xdp_buff_is_frag_pfmemalloc(&xdp)); 1668 1669 for (i = 0; i < sinfo->nr_frags; i++) { 1670 skb_frag_t *frag = &sinfo->frags[i]; 1671 1672 page_ref_inc(skb_frag_page(frag)); 1673 } 1674 } 1675 1676 return skb; 1677 } 1678 1679 static void trigger_report(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) 1680 { 1681 struct mlx5_err_cqe *err_cqe = (struct mlx5_err_cqe *)cqe; 1682 struct mlx5e_priv *priv = rq->priv; 1683 1684 if (cqe_syndrome_needs_recover(err_cqe->syndrome) && 1685 !test_and_set_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state)) { 1686 mlx5e_dump_error_cqe(&rq->cq, rq->rqn, err_cqe); 1687 queue_work(priv->wq, &rq->recover_work); 1688 } 1689 } 1690 1691 static void mlx5e_handle_rx_err_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) 1692 { 1693 trigger_report(rq, cqe); 1694 rq->stats->wqe_err++; 1695 } 1696 1697 static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) 1698 { 1699 struct mlx5_wq_cyc *wq = &rq->wqe.wq; 1700 struct mlx5e_wqe_frag_info *wi; 1701 struct sk_buff *skb; 1702 u32 cqe_bcnt; 1703 u16 ci; 1704 1705 ci = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter)); 1706 wi = get_frag(rq, ci); 1707 cqe_bcnt = be32_to_cpu(cqe->byte_cnt); 1708 1709 if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { 1710 mlx5e_handle_rx_err_cqe(rq, cqe); 1711 goto free_wqe; 1712 } 1713 1714 skb = INDIRECT_CALL_3(rq->wqe.skb_from_cqe, 1715 mlx5e_skb_from_cqe_linear, 1716 mlx5e_skb_from_cqe_nonlinear, 1717 mlx5e_xsk_skb_from_cqe_linear, 1718 rq, wi, cqe_bcnt); 1719 if (!skb) { 1720 /* probably for XDP */ 1721 if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { 1722 /* do not return page to cache, 1723 * it will be returned on XDP_TX completion. 1724 */ 1725 goto wq_cyc_pop; 1726 } 1727 goto free_wqe; 1728 } 1729 1730 mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); 1731 1732 if (mlx5e_cqe_regb_chain(cqe)) 1733 if (!mlx5e_tc_update_skb(cqe, skb)) { 1734 dev_kfree_skb_any(skb); 1735 goto free_wqe; 1736 } 1737 1738 napi_gro_receive(rq->cq.napi, skb); 1739 1740 free_wqe: 1741 mlx5e_free_rx_wqe(rq, wi, true); 1742 wq_cyc_pop: 1743 mlx5_wq_cyc_pop(wq); 1744 } 1745 1746 #ifdef CONFIG_MLX5_ESWITCH 1747 static void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) 1748 { 1749 struct net_device *netdev = rq->netdev; 1750 struct mlx5e_priv *priv = netdev_priv(netdev); 1751 struct mlx5e_rep_priv *rpriv = priv->ppriv; 1752 struct mlx5_eswitch_rep *rep = rpriv->rep; 1753 struct mlx5_wq_cyc *wq = &rq->wqe.wq; 1754 struct mlx5e_wqe_frag_info *wi; 1755 struct sk_buff *skb; 1756 u32 cqe_bcnt; 1757 u16 ci; 1758 1759 ci = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter)); 1760 wi = get_frag(rq, ci); 1761 cqe_bcnt = be32_to_cpu(cqe->byte_cnt); 1762 1763 if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { 1764 mlx5e_handle_rx_err_cqe(rq, cqe); 1765 goto free_wqe; 1766 } 1767 1768 skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe, 1769 mlx5e_skb_from_cqe_linear, 1770 mlx5e_skb_from_cqe_nonlinear, 1771 rq, wi, cqe_bcnt); 1772 if (!skb) { 1773 /* probably for XDP */ 1774 if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { 1775 /* do not return page to cache, 1776 * it will be returned on XDP_TX completion. 1777 */ 1778 goto wq_cyc_pop; 1779 } 1780 goto free_wqe; 1781 } 1782 1783 mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); 1784 1785 if (rep->vlan && skb_vlan_tag_present(skb)) 1786 skb_vlan_pop(skb); 1787 1788 mlx5e_rep_tc_receive(cqe, rq, skb); 1789 1790 free_wqe: 1791 mlx5e_free_rx_wqe(rq, wi, true); 1792 wq_cyc_pop: 1793 mlx5_wq_cyc_pop(wq); 1794 } 1795 1796 static void mlx5e_handle_rx_cqe_mpwrq_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) 1797 { 1798 u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe); 1799 u16 wqe_id = be16_to_cpu(cqe->wqe_id); 1800 struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, wqe_id); 1801 u16 stride_ix = mpwrq_get_cqe_stride_index(cqe); 1802 u32 wqe_offset = stride_ix << rq->mpwqe.log_stride_sz; 1803 u32 head_offset = wqe_offset & ((1 << rq->mpwqe.page_shift) - 1); 1804 u32 page_idx = wqe_offset >> rq->mpwqe.page_shift; 1805 struct mlx5e_rx_wqe_ll *wqe; 1806 struct mlx5_wq_ll *wq; 1807 struct sk_buff *skb; 1808 u16 cqe_bcnt; 1809 1810 wi->consumed_strides += cstrides; 1811 1812 if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { 1813 mlx5e_handle_rx_err_cqe(rq, cqe); 1814 goto mpwrq_cqe_out; 1815 } 1816 1817 if (unlikely(mpwrq_is_filler_cqe(cqe))) { 1818 struct mlx5e_rq_stats *stats = rq->stats; 1819 1820 stats->mpwqe_filler_cqes++; 1821 stats->mpwqe_filler_strides += cstrides; 1822 goto mpwrq_cqe_out; 1823 } 1824 1825 cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe); 1826 1827 skb = INDIRECT_CALL_2(rq->mpwqe.skb_from_cqe_mpwrq, 1828 mlx5e_skb_from_cqe_mpwrq_linear, 1829 mlx5e_skb_from_cqe_mpwrq_nonlinear, 1830 rq, wi, cqe_bcnt, head_offset, page_idx); 1831 if (!skb) 1832 goto mpwrq_cqe_out; 1833 1834 mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); 1835 1836 mlx5e_rep_tc_receive(cqe, rq, skb); 1837 1838 mpwrq_cqe_out: 1839 if (likely(wi->consumed_strides < rq->mpwqe.num_strides)) 1840 return; 1841 1842 wq = &rq->mpwqe.wq; 1843 wqe = mlx5_wq_ll_get_wqe(wq, wqe_id); 1844 mlx5e_free_rx_mpwqe(rq, wi, true); 1845 mlx5_wq_ll_pop(wq, cqe->wqe_id, &wqe->next.next_wqe_index); 1846 } 1847 1848 const struct mlx5e_rx_handlers mlx5e_rx_handlers_rep = { 1849 .handle_rx_cqe = mlx5e_handle_rx_cqe_rep, 1850 .handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq_rep, 1851 }; 1852 #endif 1853 1854 static void 1855 mlx5e_fill_skb_data(struct sk_buff *skb, struct mlx5e_rq *rq, 1856 union mlx5e_alloc_unit *au, u32 data_bcnt, u32 data_offset) 1857 { 1858 net_prefetchw(skb->data); 1859 1860 while (data_bcnt) { 1861 /* Non-linear mode, hence non-XSK, which always uses PAGE_SIZE. */ 1862 u32 pg_consumed_bytes = min_t(u32, PAGE_SIZE - data_offset, data_bcnt); 1863 unsigned int truesize; 1864 1865 if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) 1866 truesize = pg_consumed_bytes; 1867 else 1868 truesize = ALIGN(pg_consumed_bytes, BIT(rq->mpwqe.log_stride_sz)); 1869 1870 mlx5e_add_skb_frag(rq, skb, au, data_offset, 1871 pg_consumed_bytes, truesize); 1872 1873 data_bcnt -= pg_consumed_bytes; 1874 data_offset = 0; 1875 au++; 1876 } 1877 } 1878 1879 static struct sk_buff * 1880 mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, 1881 u16 cqe_bcnt, u32 head_offset, u32 page_idx) 1882 { 1883 union mlx5e_alloc_unit *au = &wi->alloc_units[page_idx]; 1884 u16 headlen = min_t(u16, MLX5E_RX_MAX_HEAD, cqe_bcnt); 1885 u32 frag_offset = head_offset + headlen; 1886 u32 byte_cnt = cqe_bcnt - headlen; 1887 union mlx5e_alloc_unit *head_au = au; 1888 struct sk_buff *skb; 1889 dma_addr_t addr; 1890 1891 skb = napi_alloc_skb(rq->cq.napi, 1892 ALIGN(MLX5E_RX_MAX_HEAD, sizeof(long))); 1893 if (unlikely(!skb)) { 1894 rq->stats->buff_alloc_err++; 1895 return NULL; 1896 } 1897 1898 net_prefetchw(skb->data); 1899 1900 /* Non-linear mode, hence non-XSK, which always uses PAGE_SIZE. */ 1901 if (unlikely(frag_offset >= PAGE_SIZE)) { 1902 au++; 1903 frag_offset -= PAGE_SIZE; 1904 } 1905 1906 mlx5e_fill_skb_data(skb, rq, au, byte_cnt, frag_offset); 1907 /* copy header */ 1908 addr = page_pool_get_dma_addr(head_au->page); 1909 mlx5e_copy_skb_header(rq, skb, head_au->page, addr, 1910 head_offset, head_offset, headlen); 1911 /* skb linear part was allocated with headlen and aligned to long */ 1912 skb->tail += headlen; 1913 skb->len += headlen; 1914 1915 return skb; 1916 } 1917 1918 static struct sk_buff * 1919 mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, 1920 u16 cqe_bcnt, u32 head_offset, u32 page_idx) 1921 { 1922 union mlx5e_alloc_unit *au = &wi->alloc_units[page_idx]; 1923 u16 rx_headroom = rq->buff.headroom; 1924 struct bpf_prog *prog; 1925 struct sk_buff *skb; 1926 u32 metasize = 0; 1927 void *va, *data; 1928 dma_addr_t addr; 1929 u32 frag_size; 1930 1931 /* Check packet size. Note LRO doesn't use linear SKB */ 1932 if (unlikely(cqe_bcnt > rq->hw_mtu)) { 1933 rq->stats->oversize_pkts_sw_drop++; 1934 return NULL; 1935 } 1936 1937 va = page_address(au->page) + head_offset; 1938 data = va + rx_headroom; 1939 frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt); 1940 1941 addr = page_pool_get_dma_addr(au->page); 1942 dma_sync_single_range_for_cpu(rq->pdev, addr, head_offset, 1943 frag_size, rq->buff.map_dir); 1944 net_prefetch(data); 1945 1946 prog = rcu_dereference(rq->xdp_prog); 1947 if (prog) { 1948 struct xdp_buff xdp; 1949 1950 net_prefetchw(va); /* xdp_frame data area */ 1951 mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt, &xdp); 1952 if (mlx5e_xdp_handle(rq, au->page, prog, &xdp)) { 1953 if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) 1954 __set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */ 1955 return NULL; /* page/packet was consumed by XDP */ 1956 } 1957 1958 rx_headroom = xdp.data - xdp.data_hard_start; 1959 metasize = xdp.data - xdp.data_meta; 1960 cqe_bcnt = xdp.data_end - xdp.data; 1961 } 1962 frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt); 1963 skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt, metasize); 1964 if (unlikely(!skb)) 1965 return NULL; 1966 1967 /* queue up for recycling/reuse */ 1968 page_ref_inc(au->page); 1969 1970 return skb; 1971 } 1972 1973 static struct sk_buff * 1974 mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, 1975 struct mlx5_cqe64 *cqe, u16 header_index) 1976 { 1977 struct mlx5e_dma_info *head = &rq->mpwqe.shampo->info[header_index]; 1978 u16 head_offset = head->addr & (PAGE_SIZE - 1); 1979 u16 head_size = cqe->shampo.header_size; 1980 u16 rx_headroom = rq->buff.headroom; 1981 struct sk_buff *skb = NULL; 1982 void *hdr, *data; 1983 u32 frag_size; 1984 1985 hdr = page_address(head->page) + head_offset; 1986 data = hdr + rx_headroom; 1987 frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + head_size); 1988 1989 if (likely(frag_size <= BIT(MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE))) { 1990 /* build SKB around header */ 1991 dma_sync_single_range_for_cpu(rq->pdev, head->addr, 0, frag_size, rq->buff.map_dir); 1992 prefetchw(hdr); 1993 prefetch(data); 1994 skb = mlx5e_build_linear_skb(rq, hdr, frag_size, rx_headroom, head_size, 0); 1995 1996 if (unlikely(!skb)) 1997 return NULL; 1998 1999 /* queue up for recycling/reuse */ 2000 page_ref_inc(head->page); 2001 2002 } else { 2003 /* allocate SKB and copy header for large header */ 2004 rq->stats->gro_large_hds++; 2005 skb = napi_alloc_skb(rq->cq.napi, 2006 ALIGN(head_size, sizeof(long))); 2007 if (unlikely(!skb)) { 2008 rq->stats->buff_alloc_err++; 2009 return NULL; 2010 } 2011 2012 prefetchw(skb->data); 2013 mlx5e_copy_skb_header(rq, skb, head->page, head->addr, 2014 head_offset + rx_headroom, 2015 rx_headroom, head_size); 2016 /* skb linear part was allocated with headlen and aligned to long */ 2017 skb->tail += head_size; 2018 skb->len += head_size; 2019 } 2020 return skb; 2021 } 2022 2023 static void 2024 mlx5e_shampo_align_fragment(struct sk_buff *skb, u8 log_stride_sz) 2025 { 2026 skb_frag_t *last_frag = &skb_shinfo(skb)->frags[skb_shinfo(skb)->nr_frags - 1]; 2027 unsigned int frag_size = skb_frag_size(last_frag); 2028 unsigned int frag_truesize; 2029 2030 frag_truesize = ALIGN(frag_size, BIT(log_stride_sz)); 2031 skb->truesize += frag_truesize - frag_size; 2032 } 2033 2034 static void 2035 mlx5e_shampo_flush_skb(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, bool match) 2036 { 2037 struct sk_buff *skb = rq->hw_gro_data->skb; 2038 struct mlx5e_rq_stats *stats = rq->stats; 2039 2040 stats->gro_skbs++; 2041 if (likely(skb_shinfo(skb)->nr_frags)) 2042 mlx5e_shampo_align_fragment(skb, rq->mpwqe.log_stride_sz); 2043 if (NAPI_GRO_CB(skb)->count > 1) 2044 mlx5e_shampo_update_hdr(rq, cqe, match); 2045 napi_gro_receive(rq->cq.napi, skb); 2046 rq->hw_gro_data->skb = NULL; 2047 } 2048 2049 static bool 2050 mlx5e_hw_gro_skb_has_enough_space(struct sk_buff *skb, u16 data_bcnt) 2051 { 2052 int nr_frags = skb_shinfo(skb)->nr_frags; 2053 2054 return PAGE_SIZE * nr_frags + data_bcnt <= GRO_LEGACY_MAX_SIZE; 2055 } 2056 2057 static void 2058 mlx5e_free_rx_shampo_hd_entry(struct mlx5e_rq *rq, u16 header_index) 2059 { 2060 struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; 2061 u64 addr = shampo->info[header_index].addr; 2062 2063 if (((header_index + 1) & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) == 0) { 2064 shampo->info[header_index].addr = ALIGN_DOWN(addr, PAGE_SIZE); 2065 mlx5e_page_release_dynamic(rq, shampo->info[header_index].page, true); 2066 } 2067 bitmap_clear(shampo->bitmap, header_index, 1); 2068 } 2069 2070 static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) 2071 { 2072 u16 data_bcnt = mpwrq_get_cqe_byte_cnt(cqe) - cqe->shampo.header_size; 2073 u16 header_index = mlx5e_shampo_get_cqe_header_index(rq, cqe); 2074 u32 wqe_offset = be32_to_cpu(cqe->shampo.data_offset); 2075 u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe); 2076 u32 data_offset = wqe_offset & (PAGE_SIZE - 1); 2077 u32 cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe); 2078 u16 wqe_id = be16_to_cpu(cqe->wqe_id); 2079 u32 page_idx = wqe_offset >> PAGE_SHIFT; 2080 u16 head_size = cqe->shampo.header_size; 2081 struct sk_buff **skb = &rq->hw_gro_data->skb; 2082 bool flush = cqe->shampo.flush; 2083 bool match = cqe->shampo.match; 2084 struct mlx5e_rq_stats *stats = rq->stats; 2085 struct mlx5e_rx_wqe_ll *wqe; 2086 union mlx5e_alloc_unit *au; 2087 struct mlx5e_mpw_info *wi; 2088 struct mlx5_wq_ll *wq; 2089 2090 wi = mlx5e_get_mpw_info(rq, wqe_id); 2091 wi->consumed_strides += cstrides; 2092 2093 if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { 2094 mlx5e_handle_rx_err_cqe(rq, cqe); 2095 goto mpwrq_cqe_out; 2096 } 2097 2098 if (unlikely(mpwrq_is_filler_cqe(cqe))) { 2099 stats->mpwqe_filler_cqes++; 2100 stats->mpwqe_filler_strides += cstrides; 2101 goto mpwrq_cqe_out; 2102 } 2103 2104 stats->gro_match_packets += match; 2105 2106 if (*skb && (!match || !(mlx5e_hw_gro_skb_has_enough_space(*skb, data_bcnt)))) { 2107 match = false; 2108 mlx5e_shampo_flush_skb(rq, cqe, match); 2109 } 2110 2111 if (!*skb) { 2112 if (likely(head_size)) 2113 *skb = mlx5e_skb_from_cqe_shampo(rq, wi, cqe, header_index); 2114 else 2115 *skb = mlx5e_skb_from_cqe_mpwrq_nonlinear(rq, wi, cqe_bcnt, data_offset, 2116 page_idx); 2117 if (unlikely(!*skb)) 2118 goto free_hd_entry; 2119 2120 NAPI_GRO_CB(*skb)->count = 1; 2121 skb_shinfo(*skb)->gso_size = cqe_bcnt - head_size; 2122 } else { 2123 NAPI_GRO_CB(*skb)->count++; 2124 if (NAPI_GRO_CB(*skb)->count == 2 && 2125 rq->hw_gro_data->fk.basic.n_proto == htons(ETH_P_IP)) { 2126 void *hd_addr = mlx5e_shampo_get_packet_hd(rq, header_index); 2127 int nhoff = ETH_HLEN + rq->hw_gro_data->fk.control.thoff - 2128 sizeof(struct iphdr); 2129 struct iphdr *iph = (struct iphdr *)(hd_addr + nhoff); 2130 2131 rq->hw_gro_data->second_ip_id = ntohs(iph->id); 2132 } 2133 } 2134 2135 if (likely(head_size)) { 2136 au = &wi->alloc_units[page_idx]; 2137 mlx5e_fill_skb_data(*skb, rq, au, data_bcnt, data_offset); 2138 } 2139 2140 mlx5e_shampo_complete_rx_cqe(rq, cqe, cqe_bcnt, *skb); 2141 if (flush) 2142 mlx5e_shampo_flush_skb(rq, cqe, match); 2143 free_hd_entry: 2144 mlx5e_free_rx_shampo_hd_entry(rq, header_index); 2145 mpwrq_cqe_out: 2146 if (likely(wi->consumed_strides < rq->mpwqe.num_strides)) 2147 return; 2148 2149 wq = &rq->mpwqe.wq; 2150 wqe = mlx5_wq_ll_get_wqe(wq, wqe_id); 2151 mlx5e_free_rx_mpwqe(rq, wi, true); 2152 mlx5_wq_ll_pop(wq, cqe->wqe_id, &wqe->next.next_wqe_index); 2153 } 2154 2155 static void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) 2156 { 2157 u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe); 2158 u16 wqe_id = be16_to_cpu(cqe->wqe_id); 2159 struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, wqe_id); 2160 u16 stride_ix = mpwrq_get_cqe_stride_index(cqe); 2161 u32 wqe_offset = stride_ix << rq->mpwqe.log_stride_sz; 2162 u32 head_offset = wqe_offset & ((1 << rq->mpwqe.page_shift) - 1); 2163 u32 page_idx = wqe_offset >> rq->mpwqe.page_shift; 2164 struct mlx5e_rx_wqe_ll *wqe; 2165 struct mlx5_wq_ll *wq; 2166 struct sk_buff *skb; 2167 u16 cqe_bcnt; 2168 2169 wi->consumed_strides += cstrides; 2170 2171 if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { 2172 mlx5e_handle_rx_err_cqe(rq, cqe); 2173 goto mpwrq_cqe_out; 2174 } 2175 2176 if (unlikely(mpwrq_is_filler_cqe(cqe))) { 2177 struct mlx5e_rq_stats *stats = rq->stats; 2178 2179 stats->mpwqe_filler_cqes++; 2180 stats->mpwqe_filler_strides += cstrides; 2181 goto mpwrq_cqe_out; 2182 } 2183 2184 cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe); 2185 2186 skb = INDIRECT_CALL_3(rq->mpwqe.skb_from_cqe_mpwrq, 2187 mlx5e_skb_from_cqe_mpwrq_linear, 2188 mlx5e_skb_from_cqe_mpwrq_nonlinear, 2189 mlx5e_xsk_skb_from_cqe_mpwrq_linear, 2190 rq, wi, cqe_bcnt, head_offset, page_idx); 2191 if (!skb) 2192 goto mpwrq_cqe_out; 2193 2194 mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); 2195 2196 if (mlx5e_cqe_regb_chain(cqe)) 2197 if (!mlx5e_tc_update_skb(cqe, skb)) { 2198 dev_kfree_skb_any(skb); 2199 goto mpwrq_cqe_out; 2200 } 2201 2202 napi_gro_receive(rq->cq.napi, skb); 2203 2204 mpwrq_cqe_out: 2205 if (likely(wi->consumed_strides < rq->mpwqe.num_strides)) 2206 return; 2207 2208 wq = &rq->mpwqe.wq; 2209 wqe = mlx5_wq_ll_get_wqe(wq, wqe_id); 2210 mlx5e_free_rx_mpwqe(rq, wi, true); 2211 mlx5_wq_ll_pop(wq, cqe->wqe_id, &wqe->next.next_wqe_index); 2212 } 2213 2214 int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) 2215 { 2216 struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq); 2217 struct mlx5_cqwq *cqwq = &cq->wq; 2218 struct mlx5_cqe64 *cqe; 2219 int work_done = 0; 2220 2221 if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state))) 2222 return 0; 2223 2224 if (rq->cqd.left) { 2225 work_done += mlx5e_decompress_cqes_cont(rq, cqwq, 0, budget); 2226 if (work_done >= budget) 2227 goto out; 2228 } 2229 2230 cqe = mlx5_cqwq_get_cqe(cqwq); 2231 if (!cqe) { 2232 if (unlikely(work_done)) 2233 goto out; 2234 return 0; 2235 } 2236 2237 do { 2238 if (mlx5_get_cqe_format(cqe) == MLX5_COMPRESSED) { 2239 work_done += 2240 mlx5e_decompress_cqes_start(rq, cqwq, 2241 budget - work_done); 2242 continue; 2243 } 2244 2245 mlx5_cqwq_pop(cqwq); 2246 2247 INDIRECT_CALL_3(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq, 2248 mlx5e_handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq_shampo, 2249 rq, cqe); 2250 } while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(cqwq))); 2251 2252 out: 2253 if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state) && rq->hw_gro_data->skb) 2254 mlx5e_shampo_flush_skb(rq, NULL, false); 2255 2256 if (rcu_access_pointer(rq->xdp_prog)) 2257 mlx5e_xdp_rx_poll_complete(rq); 2258 2259 mlx5_cqwq_update_db_record(cqwq); 2260 2261 /* ensure cq space is freed before enabling more cqes */ 2262 wmb(); 2263 2264 return work_done; 2265 } 2266 2267 #ifdef CONFIG_MLX5_CORE_IPOIB 2268 2269 #define MLX5_IB_GRH_SGID_OFFSET 8 2270 #define MLX5_IB_GRH_DGID_OFFSET 24 2271 #define MLX5_GID_SIZE 16 2272 2273 static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq, 2274 struct mlx5_cqe64 *cqe, 2275 u32 cqe_bcnt, 2276 struct sk_buff *skb) 2277 { 2278 struct hwtstamp_config *tstamp; 2279 struct mlx5e_rq_stats *stats; 2280 struct net_device *netdev; 2281 struct mlx5e_priv *priv; 2282 char *pseudo_header; 2283 u32 flags_rqpn; 2284 u32 qpn; 2285 u8 *dgid; 2286 u8 g; 2287 2288 qpn = be32_to_cpu(cqe->sop_drop_qpn) & 0xffffff; 2289 netdev = mlx5i_pkey_get_netdev(rq->netdev, qpn); 2290 2291 /* No mapping present, cannot process SKB. This might happen if a child 2292 * interface is going down while having unprocessed CQEs on parent RQ 2293 */ 2294 if (unlikely(!netdev)) { 2295 /* TODO: add drop counters support */ 2296 skb->dev = NULL; 2297 pr_warn_once("Unable to map QPN %u to dev - dropping skb\n", qpn); 2298 return; 2299 } 2300 2301 priv = mlx5i_epriv(netdev); 2302 tstamp = &priv->tstamp; 2303 stats = rq->stats; 2304 2305 flags_rqpn = be32_to_cpu(cqe->flags_rqpn); 2306 g = (flags_rqpn >> 28) & 3; 2307 dgid = skb->data + MLX5_IB_GRH_DGID_OFFSET; 2308 if ((!g) || dgid[0] != 0xff) 2309 skb->pkt_type = PACKET_HOST; 2310 else if (memcmp(dgid, netdev->broadcast + 4, MLX5_GID_SIZE) == 0) 2311 skb->pkt_type = PACKET_BROADCAST; 2312 else 2313 skb->pkt_type = PACKET_MULTICAST; 2314 2315 /* Drop packets that this interface sent, ie multicast packets 2316 * that the HCA has replicated. 2317 */ 2318 if (g && (qpn == (flags_rqpn & 0xffffff)) && 2319 (memcmp(netdev->dev_addr + 4, skb->data + MLX5_IB_GRH_SGID_OFFSET, 2320 MLX5_GID_SIZE) == 0)) { 2321 skb->dev = NULL; 2322 return; 2323 } 2324 2325 skb_pull(skb, MLX5_IB_GRH_BYTES); 2326 2327 skb->protocol = *((__be16 *)(skb->data)); 2328 2329 if (netdev->features & NETIF_F_RXCSUM) { 2330 skb->ip_summed = CHECKSUM_COMPLETE; 2331 skb->csum = csum_unfold((__force __sum16)cqe->check_sum); 2332 stats->csum_complete++; 2333 } else { 2334 skb->ip_summed = CHECKSUM_NONE; 2335 stats->csum_none++; 2336 } 2337 2338 if (unlikely(mlx5e_rx_hw_stamp(tstamp))) 2339 skb_hwtstamps(skb)->hwtstamp = mlx5e_cqe_ts_to_ns(rq->ptp_cyc2time, 2340 rq->clock, get_cqe_ts(cqe)); 2341 skb_record_rx_queue(skb, rq->ix); 2342 2343 if (likely(netdev->features & NETIF_F_RXHASH)) 2344 mlx5e_skb_set_hash(cqe, skb); 2345 2346 /* 20 bytes of ipoib header and 4 for encap existing */ 2347 pseudo_header = skb_push(skb, MLX5_IPOIB_PSEUDO_LEN); 2348 memset(pseudo_header, 0, MLX5_IPOIB_PSEUDO_LEN); 2349 skb_reset_mac_header(skb); 2350 skb_pull(skb, MLX5_IPOIB_HARD_LEN); 2351 2352 skb->dev = netdev; 2353 2354 stats->packets++; 2355 stats->bytes += cqe_bcnt; 2356 } 2357 2358 static void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) 2359 { 2360 struct mlx5_wq_cyc *wq = &rq->wqe.wq; 2361 struct mlx5e_wqe_frag_info *wi; 2362 struct sk_buff *skb; 2363 u32 cqe_bcnt; 2364 u16 ci; 2365 2366 ci = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter)); 2367 wi = get_frag(rq, ci); 2368 cqe_bcnt = be32_to_cpu(cqe->byte_cnt); 2369 2370 if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { 2371 rq->stats->wqe_err++; 2372 goto wq_free_wqe; 2373 } 2374 2375 skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe, 2376 mlx5e_skb_from_cqe_linear, 2377 mlx5e_skb_from_cqe_nonlinear, 2378 rq, wi, cqe_bcnt); 2379 if (!skb) 2380 goto wq_free_wqe; 2381 2382 mlx5i_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); 2383 if (unlikely(!skb->dev)) { 2384 dev_kfree_skb_any(skb); 2385 goto wq_free_wqe; 2386 } 2387 napi_gro_receive(rq->cq.napi, skb); 2388 2389 wq_free_wqe: 2390 mlx5e_free_rx_wqe(rq, wi, true); 2391 mlx5_wq_cyc_pop(wq); 2392 } 2393 2394 const struct mlx5e_rx_handlers mlx5i_rx_handlers = { 2395 .handle_rx_cqe = mlx5i_handle_rx_cqe, 2396 .handle_rx_cqe_mpwqe = NULL, /* Not supported */ 2397 }; 2398 #endif /* CONFIG_MLX5_CORE_IPOIB */ 2399 2400 int mlx5e_rq_set_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params, bool xsk) 2401 { 2402 struct net_device *netdev = rq->netdev; 2403 struct mlx5_core_dev *mdev = rq->mdev; 2404 struct mlx5e_priv *priv = rq->priv; 2405 2406 switch (rq->wq_type) { 2407 case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: 2408 rq->mpwqe.skb_from_cqe_mpwrq = xsk ? 2409 mlx5e_xsk_skb_from_cqe_mpwrq_linear : 2410 mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL) ? 2411 mlx5e_skb_from_cqe_mpwrq_linear : 2412 mlx5e_skb_from_cqe_mpwrq_nonlinear; 2413 rq->post_wqes = mlx5e_post_rx_mpwqes; 2414 rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe; 2415 2416 if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) { 2417 rq->handle_rx_cqe = priv->profile->rx_handlers->handle_rx_cqe_mpwqe_shampo; 2418 if (!rq->handle_rx_cqe) { 2419 netdev_err(netdev, "RX handler of SHAMPO MPWQE RQ is not set\n"); 2420 return -EINVAL; 2421 } 2422 } else { 2423 rq->handle_rx_cqe = priv->profile->rx_handlers->handle_rx_cqe_mpwqe; 2424 if (!rq->handle_rx_cqe) { 2425 netdev_err(netdev, "RX handler of MPWQE RQ is not set\n"); 2426 return -EINVAL; 2427 } 2428 } 2429 2430 break; 2431 default: /* MLX5_WQ_TYPE_CYCLIC */ 2432 rq->wqe.skb_from_cqe = xsk ? 2433 mlx5e_xsk_skb_from_cqe_linear : 2434 mlx5e_rx_is_linear_skb(mdev, params, NULL) ? 2435 mlx5e_skb_from_cqe_linear : 2436 mlx5e_skb_from_cqe_nonlinear; 2437 rq->post_wqes = mlx5e_post_rx_wqes; 2438 rq->dealloc_wqe = mlx5e_dealloc_rx_wqe; 2439 rq->handle_rx_cqe = priv->profile->rx_handlers->handle_rx_cqe; 2440 if (!rq->handle_rx_cqe) { 2441 netdev_err(netdev, "RX handler of RQ is not set\n"); 2442 return -EINVAL; 2443 } 2444 } 2445 2446 return 0; 2447 } 2448 2449 static void mlx5e_trap_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) 2450 { 2451 struct mlx5e_priv *priv = netdev_priv(rq->netdev); 2452 struct mlx5_wq_cyc *wq = &rq->wqe.wq; 2453 struct mlx5e_wqe_frag_info *wi; 2454 struct devlink_port *dl_port; 2455 struct sk_buff *skb; 2456 u32 cqe_bcnt; 2457 u16 trap_id; 2458 u16 ci; 2459 2460 trap_id = get_cqe_flow_tag(cqe); 2461 ci = mlx5_wq_cyc_ctr2ix(wq, be16_to_cpu(cqe->wqe_counter)); 2462 wi = get_frag(rq, ci); 2463 cqe_bcnt = be32_to_cpu(cqe->byte_cnt); 2464 2465 if (unlikely(MLX5E_RX_ERR_CQE(cqe))) { 2466 rq->stats->wqe_err++; 2467 goto free_wqe; 2468 } 2469 2470 skb = mlx5e_skb_from_cqe_nonlinear(rq, wi, cqe_bcnt); 2471 if (!skb) 2472 goto free_wqe; 2473 2474 mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); 2475 skb_push(skb, ETH_HLEN); 2476 2477 dl_port = mlx5e_devlink_get_dl_port(priv); 2478 mlx5_devlink_trap_report(rq->mdev, trap_id, skb, dl_port); 2479 dev_kfree_skb_any(skb); 2480 2481 free_wqe: 2482 mlx5e_free_rx_wqe(rq, wi, false); 2483 mlx5_wq_cyc_pop(wq); 2484 } 2485 2486 void mlx5e_rq_set_trap_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params) 2487 { 2488 rq->wqe.skb_from_cqe = mlx5e_rx_is_linear_skb(rq->mdev, params, NULL) ? 2489 mlx5e_skb_from_cqe_linear : 2490 mlx5e_skb_from_cqe_nonlinear; 2491 rq->post_wqes = mlx5e_post_rx_wqes; 2492 rq->dealloc_wqe = mlx5e_dealloc_rx_wqe; 2493 rq->handle_rx_cqe = mlx5e_trap_handle_rx_cqe; 2494 } 2495