1 /* 2 * Copyright (c) 2018, Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include <linux/bpf_trace.h> 34 #include <net/xdp_sock_drv.h> 35 #include "en/xdp.h" 36 #include "en/params.h" 37 #include <linux/bitfield.h> 38 39 int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk) 40 { 41 int hr = mlx5e_get_linear_rq_headroom(params, xsk); 42 43 /* Let S := SKB_DATA_ALIGN(sizeof(struct skb_shared_info)). 44 * The condition checked in mlx5e_rx_is_linear_skb is: 45 * SKB_DATA_ALIGN(sw_mtu + hard_mtu + hr) + S <= PAGE_SIZE (1) 46 * (Note that hw_mtu == sw_mtu + hard_mtu.) 47 * What is returned from this function is: 48 * max_mtu = PAGE_SIZE - S - hr - hard_mtu (2) 49 * After assigning sw_mtu := max_mtu, the left side of (1) turns to 50 * SKB_DATA_ALIGN(PAGE_SIZE - S) + S, which is equal to PAGE_SIZE, 51 * because both PAGE_SIZE and S are already aligned. Any number greater 52 * than max_mtu would make the left side of (1) greater than PAGE_SIZE, 53 * so max_mtu is the maximum MTU allowed. 54 */ 55 56 return MLX5E_HW2SW_MTU(params, SKB_MAX_HEAD(hr)); 57 } 58 59 static inline bool 60 mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq, 61 struct xdp_buff *xdp) 62 { 63 struct page *page = virt_to_page(xdp->data); 64 struct mlx5e_xmit_data_frags xdptxdf = {}; 65 struct mlx5e_xmit_data *xdptxd; 66 struct mlx5e_xdp_info xdpi; 67 struct xdp_frame *xdpf; 68 dma_addr_t dma_addr; 69 int i; 70 71 xdpf = xdp_convert_buff_to_frame(xdp); 72 if (unlikely(!xdpf)) 73 return false; 74 75 xdptxd = &xdptxdf.xd; 76 xdptxd->data = xdpf->data; 77 xdptxd->len = xdpf->len; 78 xdptxd->has_frags = xdp_frame_has_frags(xdpf); 79 80 if (xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL) { 81 /* The xdp_buff was in the UMEM and was copied into a newly 82 * allocated page. The UMEM page was returned via the ZCA, and 83 * this new page has to be mapped at this point and has to be 84 * unmapped and returned via xdp_return_frame on completion. 85 */ 86 87 /* Prevent double recycling of the UMEM page. Even in case this 88 * function returns false, the xdp_buff shouldn't be recycled, 89 * as it was already done in xdp_convert_zc_to_xdp_frame. 90 */ 91 __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */ 92 93 xdpi.mode = MLX5E_XDP_XMIT_MODE_FRAME; 94 95 if (unlikely(xdptxd->has_frags)) 96 return false; 97 98 dma_addr = dma_map_single(sq->pdev, xdptxd->data, xdptxd->len, 99 DMA_TO_DEVICE); 100 if (dma_mapping_error(sq->pdev, dma_addr)) { 101 xdp_return_frame(xdpf); 102 return false; 103 } 104 105 xdptxd->dma_addr = dma_addr; 106 xdpi.frame.xdpf = xdpf; 107 xdpi.frame.dma_addr = dma_addr; 108 109 if (unlikely(!INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe, 110 mlx5e_xmit_xdp_frame, sq, xdptxd, 0))) 111 return false; 112 113 mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi); 114 return true; 115 } 116 117 /* Driver assumes that xdp_convert_buff_to_frame returns an xdp_frame 118 * that points to the same memory region as the original xdp_buff. It 119 * allows to map the memory only once and to use the DMA_BIDIRECTIONAL 120 * mode. 121 */ 122 123 xdpi.mode = MLX5E_XDP_XMIT_MODE_PAGE; 124 xdpi.page.rq = rq; 125 126 dma_addr = page_pool_get_dma_addr(page) + (xdpf->data - (void *)xdpf); 127 dma_sync_single_for_device(sq->pdev, dma_addr, xdptxd->len, DMA_BIDIRECTIONAL); 128 129 if (xdptxd->has_frags) { 130 xdptxdf.sinfo = xdp_get_shared_info_from_frame(xdpf); 131 132 for (i = 0; i < xdptxdf.sinfo->nr_frags; i++) { 133 skb_frag_t *frag = &xdptxdf.sinfo->frags[i]; 134 dma_addr_t addr; 135 u32 len; 136 137 addr = page_pool_get_dma_addr(skb_frag_page(frag)) + 138 skb_frag_off(frag); 139 len = skb_frag_size(frag); 140 dma_sync_single_for_device(sq->pdev, addr, len, 141 DMA_BIDIRECTIONAL); 142 } 143 } 144 145 xdptxd->dma_addr = dma_addr; 146 147 if (unlikely(!INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe, 148 mlx5e_xmit_xdp_frame, sq, xdptxd, 0))) 149 return false; 150 151 xdpi.page.page = page; 152 mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi); 153 154 if (xdptxd->has_frags) { 155 for (i = 0; i < xdptxdf.sinfo->nr_frags; i++) { 156 skb_frag_t *frag = &xdptxdf.sinfo->frags[i]; 157 158 xdpi.page.page = skb_frag_page(frag); 159 mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi); 160 } 161 } 162 163 return true; 164 } 165 166 static int mlx5e_xdp_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp) 167 { 168 const struct mlx5e_xdp_buff *_ctx = (void *)ctx; 169 170 if (unlikely(!mlx5e_rx_hw_stamp(_ctx->rq->tstamp))) 171 return -ENODATA; 172 173 *timestamp = mlx5e_cqe_ts_to_ns(_ctx->rq->ptp_cyc2time, 174 _ctx->rq->clock, get_cqe_ts(_ctx->cqe)); 175 return 0; 176 } 177 178 /* Mapping HW RSS Type bits CQE_RSS_HTYPE_IP + CQE_RSS_HTYPE_L4 into 4-bits*/ 179 #define RSS_TYPE_MAX_TABLE 16 /* 4-bits max 16 entries */ 180 #define RSS_L4 GENMASK(1, 0) 181 #define RSS_L3 GENMASK(3, 2) /* Same as CQE_RSS_HTYPE_IP */ 182 183 /* Valid combinations of CQE_RSS_HTYPE_IP + CQE_RSS_HTYPE_L4 sorted numerical */ 184 enum mlx5_rss_hash_type { 185 RSS_TYPE_NO_HASH = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IP_NONE) | 186 FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_NONE)), 187 RSS_TYPE_L3_IPV4 = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV4) | 188 FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_NONE)), 189 RSS_TYPE_L4_IPV4_TCP = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV4) | 190 FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_TCP)), 191 RSS_TYPE_L4_IPV4_UDP = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV4) | 192 FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_UDP)), 193 RSS_TYPE_L4_IPV4_IPSEC = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV4) | 194 FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_IPSEC)), 195 RSS_TYPE_L3_IPV6 = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV6) | 196 FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_NONE)), 197 RSS_TYPE_L4_IPV6_TCP = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV6) | 198 FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_TCP)), 199 RSS_TYPE_L4_IPV6_UDP = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV6) | 200 FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_UDP)), 201 RSS_TYPE_L4_IPV6_IPSEC = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV6) | 202 FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_IPSEC)), 203 }; 204 205 /* Invalid combinations will simply return zero, allows no boundary checks */ 206 static const enum xdp_rss_hash_type mlx5_xdp_rss_type[RSS_TYPE_MAX_TABLE] = { 207 [RSS_TYPE_NO_HASH] = XDP_RSS_TYPE_NONE, 208 [1] = XDP_RSS_TYPE_NONE, /* Implicit zero */ 209 [2] = XDP_RSS_TYPE_NONE, /* Implicit zero */ 210 [3] = XDP_RSS_TYPE_NONE, /* Implicit zero */ 211 [RSS_TYPE_L3_IPV4] = XDP_RSS_TYPE_L3_IPV4, 212 [RSS_TYPE_L4_IPV4_TCP] = XDP_RSS_TYPE_L4_IPV4_TCP, 213 [RSS_TYPE_L4_IPV4_UDP] = XDP_RSS_TYPE_L4_IPV4_UDP, 214 [RSS_TYPE_L4_IPV4_IPSEC] = XDP_RSS_TYPE_L4_IPV4_IPSEC, 215 [RSS_TYPE_L3_IPV6] = XDP_RSS_TYPE_L3_IPV6, 216 [RSS_TYPE_L4_IPV6_TCP] = XDP_RSS_TYPE_L4_IPV6_TCP, 217 [RSS_TYPE_L4_IPV6_UDP] = XDP_RSS_TYPE_L4_IPV6_UDP, 218 [RSS_TYPE_L4_IPV6_IPSEC] = XDP_RSS_TYPE_L4_IPV6_IPSEC, 219 [12] = XDP_RSS_TYPE_NONE, /* Implicit zero */ 220 [13] = XDP_RSS_TYPE_NONE, /* Implicit zero */ 221 [14] = XDP_RSS_TYPE_NONE, /* Implicit zero */ 222 [15] = XDP_RSS_TYPE_NONE, /* Implicit zero */ 223 }; 224 225 static int mlx5e_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash, 226 enum xdp_rss_hash_type *rss_type) 227 { 228 const struct mlx5e_xdp_buff *_ctx = (void *)ctx; 229 const struct mlx5_cqe64 *cqe = _ctx->cqe; 230 u32 hash_type, l4_type, ip_type, lookup; 231 232 if (unlikely(!(_ctx->xdp.rxq->dev->features & NETIF_F_RXHASH))) 233 return -ENODATA; 234 235 *hash = be32_to_cpu(cqe->rss_hash_result); 236 237 hash_type = cqe->rss_hash_type; 238 BUILD_BUG_ON(CQE_RSS_HTYPE_IP != RSS_L3); /* same mask */ 239 ip_type = hash_type & CQE_RSS_HTYPE_IP; 240 l4_type = FIELD_GET(CQE_RSS_HTYPE_L4, hash_type); 241 lookup = ip_type | l4_type; 242 *rss_type = mlx5_xdp_rss_type[lookup]; 243 244 return 0; 245 } 246 247 const struct xdp_metadata_ops mlx5e_xdp_metadata_ops = { 248 .xmo_rx_timestamp = mlx5e_xdp_rx_timestamp, 249 .xmo_rx_hash = mlx5e_xdp_rx_hash, 250 }; 251 252 /* returns true if packet was consumed by xdp */ 253 bool mlx5e_xdp_handle(struct mlx5e_rq *rq, 254 struct bpf_prog *prog, struct mlx5e_xdp_buff *mxbuf) 255 { 256 struct xdp_buff *xdp = &mxbuf->xdp; 257 u32 act; 258 int err; 259 260 act = bpf_prog_run_xdp(prog, xdp); 261 switch (act) { 262 case XDP_PASS: 263 return false; 264 case XDP_TX: 265 if (unlikely(!mlx5e_xmit_xdp_buff(rq->xdpsq, rq, xdp))) 266 goto xdp_abort; 267 __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */ 268 return true; 269 case XDP_REDIRECT: 270 /* When XDP enabled then page-refcnt==1 here */ 271 err = xdp_do_redirect(rq->netdev, xdp, prog); 272 if (unlikely(err)) 273 goto xdp_abort; 274 __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); 275 __set_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags); 276 rq->stats->xdp_redirect++; 277 return true; 278 default: 279 bpf_warn_invalid_xdp_action(rq->netdev, prog, act); 280 fallthrough; 281 case XDP_ABORTED: 282 xdp_abort: 283 trace_xdp_exception(rq->netdev, prog, act); 284 fallthrough; 285 case XDP_DROP: 286 rq->stats->xdp_drop++; 287 return true; 288 } 289 } 290 291 static u16 mlx5e_xdpsq_get_next_pi(struct mlx5e_xdpsq *sq, u16 size) 292 { 293 struct mlx5_wq_cyc *wq = &sq->wq; 294 u16 pi, contig_wqebbs; 295 296 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); 297 contig_wqebbs = mlx5_wq_cyc_get_contig_wqebbs(wq, pi); 298 if (unlikely(contig_wqebbs < size)) { 299 struct mlx5e_xdp_wqe_info *wi, *edge_wi; 300 301 wi = &sq->db.wqe_info[pi]; 302 edge_wi = wi + contig_wqebbs; 303 304 /* Fill SQ frag edge with NOPs to avoid WQE wrapping two pages. */ 305 for (; wi < edge_wi; wi++) { 306 *wi = (struct mlx5e_xdp_wqe_info) { 307 .num_wqebbs = 1, 308 .num_pkts = 0, 309 }; 310 mlx5e_post_nop(wq, sq->sqn, &sq->pc); 311 } 312 sq->stats->nops += contig_wqebbs; 313 314 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); 315 } 316 317 return pi; 318 } 319 320 static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq) 321 { 322 struct mlx5e_tx_mpwqe *session = &sq->mpwqe; 323 struct mlx5e_xdpsq_stats *stats = sq->stats; 324 struct mlx5e_tx_wqe *wqe; 325 u16 pi; 326 327 pi = mlx5e_xdpsq_get_next_pi(sq, sq->max_sq_mpw_wqebbs); 328 wqe = MLX5E_TX_FETCH_WQE(sq, pi); 329 net_prefetchw(wqe->data); 330 331 *session = (struct mlx5e_tx_mpwqe) { 332 .wqe = wqe, 333 .bytes_count = 0, 334 .ds_count = MLX5E_TX_WQE_EMPTY_DS_COUNT, 335 .pkt_count = 0, 336 .inline_on = mlx5e_xdp_get_inline_state(sq, session->inline_on), 337 }; 338 339 stats->mpwqe++; 340 } 341 342 void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq) 343 { 344 struct mlx5_wq_cyc *wq = &sq->wq; 345 struct mlx5e_tx_mpwqe *session = &sq->mpwqe; 346 struct mlx5_wqe_ctrl_seg *cseg = &session->wqe->ctrl; 347 u16 ds_count = session->ds_count; 348 u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); 349 struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[pi]; 350 351 cseg->opmod_idx_opcode = 352 cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_ENHANCED_MPSW); 353 cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_count); 354 355 wi->num_wqebbs = DIV_ROUND_UP(ds_count, MLX5_SEND_WQEBB_NUM_DS); 356 wi->num_pkts = session->pkt_count; 357 358 sq->pc += wi->num_wqebbs; 359 360 sq->doorbell_cseg = cseg; 361 362 session->wqe = NULL; /* Close session */ 363 } 364 365 enum { 366 MLX5E_XDP_CHECK_OK = 1, 367 MLX5E_XDP_CHECK_START_MPWQE = 2, 368 }; 369 370 INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq) 371 { 372 if (unlikely(!sq->mpwqe.wqe)) { 373 if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, 374 sq->stop_room))) { 375 /* SQ is full, ring doorbell */ 376 mlx5e_xmit_xdp_doorbell(sq); 377 sq->stats->full++; 378 return -EBUSY; 379 } 380 381 return MLX5E_XDP_CHECK_START_MPWQE; 382 } 383 384 return MLX5E_XDP_CHECK_OK; 385 } 386 387 INDIRECT_CALLABLE_SCOPE bool 388 mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, 389 int check_result); 390 391 INDIRECT_CALLABLE_SCOPE bool 392 mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, 393 int check_result) 394 { 395 struct mlx5e_tx_mpwqe *session = &sq->mpwqe; 396 struct mlx5e_xdpsq_stats *stats = sq->stats; 397 398 if (xdptxd->has_frags) { 399 /* MPWQE is enabled, but a multi-buffer packet is queued for 400 * transmission. MPWQE can't send fragmented packets, so close 401 * the current session and fall back to a regular WQE. 402 */ 403 if (unlikely(sq->mpwqe.wqe)) 404 mlx5e_xdp_mpwqe_complete(sq); 405 return mlx5e_xmit_xdp_frame(sq, xdptxd, 0); 406 } 407 408 if (unlikely(xdptxd->len > sq->hw_mtu)) { 409 stats->err++; 410 return false; 411 } 412 413 if (!check_result) 414 check_result = mlx5e_xmit_xdp_frame_check_mpwqe(sq); 415 if (unlikely(check_result < 0)) 416 return false; 417 418 if (check_result == MLX5E_XDP_CHECK_START_MPWQE) { 419 /* Start the session when nothing can fail, so it's guaranteed 420 * that if there is an active session, it has at least one dseg, 421 * and it's safe to complete it at any time. 422 */ 423 mlx5e_xdp_mpwqe_session_start(sq); 424 } 425 426 mlx5e_xdp_mpwqe_add_dseg(sq, xdptxd, stats); 427 428 if (unlikely(mlx5e_xdp_mpwqe_is_full(session, sq->max_sq_mpw_wqebbs))) 429 mlx5e_xdp_mpwqe_complete(sq); 430 431 stats->xmit++; 432 return true; 433 } 434 435 static int mlx5e_xmit_xdp_frame_check_stop_room(struct mlx5e_xdpsq *sq, int stop_room) 436 { 437 if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, stop_room))) { 438 /* SQ is full, ring doorbell */ 439 mlx5e_xmit_xdp_doorbell(sq); 440 sq->stats->full++; 441 return -EBUSY; 442 } 443 444 return MLX5E_XDP_CHECK_OK; 445 } 446 447 INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq) 448 { 449 return mlx5e_xmit_xdp_frame_check_stop_room(sq, 1); 450 } 451 452 INDIRECT_CALLABLE_SCOPE bool 453 mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, 454 int check_result) 455 { 456 struct mlx5e_xmit_data_frags *xdptxdf = 457 container_of(xdptxd, struct mlx5e_xmit_data_frags, xd); 458 struct mlx5_wq_cyc *wq = &sq->wq; 459 struct mlx5_wqe_ctrl_seg *cseg; 460 struct mlx5_wqe_data_seg *dseg; 461 struct mlx5_wqe_eth_seg *eseg; 462 struct mlx5e_tx_wqe *wqe; 463 464 dma_addr_t dma_addr = xdptxd->dma_addr; 465 u32 dma_len = xdptxd->len; 466 u16 ds_cnt, inline_hdr_sz; 467 u8 num_wqebbs = 1; 468 int num_frags = 0; 469 u16 pi; 470 471 struct mlx5e_xdpsq_stats *stats = sq->stats; 472 473 if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE || sq->hw_mtu < dma_len)) { 474 stats->err++; 475 return false; 476 } 477 478 ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT + 1; 479 if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) 480 ds_cnt++; 481 482 /* check_result must be 0 if sinfo is passed. */ 483 if (!check_result) { 484 int stop_room = 1; 485 486 if (xdptxd->has_frags) { 487 ds_cnt += xdptxdf->sinfo->nr_frags; 488 num_frags = xdptxdf->sinfo->nr_frags; 489 num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); 490 /* Assuming MLX5_CAP_GEN(mdev, max_wqe_sz_sq) is big 491 * enough to hold all fragments. 492 */ 493 stop_room = MLX5E_STOP_ROOM(num_wqebbs); 494 } 495 496 check_result = mlx5e_xmit_xdp_frame_check_stop_room(sq, stop_room); 497 } 498 if (unlikely(check_result < 0)) 499 return false; 500 501 pi = mlx5e_xdpsq_get_next_pi(sq, num_wqebbs); 502 wqe = mlx5_wq_cyc_get_wqe(wq, pi); 503 net_prefetchw(wqe); 504 505 cseg = &wqe->ctrl; 506 eseg = &wqe->eth; 507 dseg = wqe->data; 508 509 inline_hdr_sz = 0; 510 511 /* copy the inline part if required */ 512 if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) { 513 memcpy(eseg->inline_hdr.start, xdptxd->data, sizeof(eseg->inline_hdr.start)); 514 memcpy(dseg, xdptxd->data + sizeof(eseg->inline_hdr.start), 515 MLX5E_XDP_MIN_INLINE - sizeof(eseg->inline_hdr.start)); 516 dma_len -= MLX5E_XDP_MIN_INLINE; 517 dma_addr += MLX5E_XDP_MIN_INLINE; 518 inline_hdr_sz = MLX5E_XDP_MIN_INLINE; 519 dseg++; 520 } 521 522 /* write the dma part */ 523 dseg->addr = cpu_to_be64(dma_addr); 524 dseg->byte_count = cpu_to_be32(dma_len); 525 526 cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND); 527 528 if (test_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state)) { 529 u8 num_pkts = 1 + num_frags; 530 int i; 531 532 memset(&cseg->trailer, 0, sizeof(cseg->trailer)); 533 memset(eseg, 0, sizeof(*eseg) - sizeof(eseg->trailer)); 534 535 eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz); 536 dseg->lkey = sq->mkey_be; 537 538 for (i = 0; i < num_frags; i++) { 539 skb_frag_t *frag = &xdptxdf->sinfo->frags[i]; 540 dma_addr_t addr; 541 542 addr = page_pool_get_dma_addr(skb_frag_page(frag)) + 543 skb_frag_off(frag); 544 545 dseg++; 546 dseg->addr = cpu_to_be64(addr); 547 dseg->byte_count = cpu_to_be32(skb_frag_size(frag)); 548 dseg->lkey = sq->mkey_be; 549 } 550 551 cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); 552 553 sq->db.wqe_info[pi] = (struct mlx5e_xdp_wqe_info) { 554 .num_wqebbs = num_wqebbs, 555 .num_pkts = num_pkts, 556 }; 557 558 sq->pc += num_wqebbs; 559 } else { 560 cseg->fm_ce_se = 0; 561 562 sq->pc++; 563 } 564 565 sq->doorbell_cseg = cseg; 566 567 stats->xmit++; 568 return true; 569 } 570 571 static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq, 572 struct mlx5e_xdp_wqe_info *wi, 573 u32 *xsk_frames, 574 struct xdp_frame_bulk *bq) 575 { 576 struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo; 577 u16 i; 578 579 for (i = 0; i < wi->num_pkts; i++) { 580 struct mlx5e_xdp_info xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo); 581 582 switch (xdpi.mode) { 583 case MLX5E_XDP_XMIT_MODE_FRAME: 584 /* XDP_TX from the XSK RQ and XDP_REDIRECT */ 585 dma_unmap_single(sq->pdev, xdpi.frame.dma_addr, 586 xdpi.frame.xdpf->len, DMA_TO_DEVICE); 587 xdp_return_frame_bulk(xdpi.frame.xdpf, bq); 588 break; 589 case MLX5E_XDP_XMIT_MODE_PAGE: 590 /* XDP_TX from the regular RQ */ 591 page_pool_put_defragged_page(xdpi.page.rq->page_pool, 592 xdpi.page.page, -1, true); 593 break; 594 case MLX5E_XDP_XMIT_MODE_XSK: 595 /* AF_XDP send */ 596 (*xsk_frames)++; 597 break; 598 default: 599 WARN_ON_ONCE(true); 600 } 601 } 602 } 603 604 bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) 605 { 606 struct xdp_frame_bulk bq; 607 struct mlx5e_xdpsq *sq; 608 struct mlx5_cqe64 *cqe; 609 u32 xsk_frames = 0; 610 u16 sqcc; 611 int i; 612 613 xdp_frame_bulk_init(&bq); 614 615 sq = container_of(cq, struct mlx5e_xdpsq, cq); 616 617 if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) 618 return false; 619 620 cqe = mlx5_cqwq_get_cqe(&cq->wq); 621 if (!cqe) 622 return false; 623 624 /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), 625 * otherwise a cq overrun may occur 626 */ 627 sqcc = sq->cc; 628 629 i = 0; 630 do { 631 struct mlx5e_xdp_wqe_info *wi; 632 u16 wqe_counter, ci; 633 bool last_wqe; 634 635 mlx5_cqwq_pop(&cq->wq); 636 637 wqe_counter = be16_to_cpu(cqe->wqe_counter); 638 639 do { 640 last_wqe = (sqcc == wqe_counter); 641 ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc); 642 wi = &sq->db.wqe_info[ci]; 643 644 sqcc += wi->num_wqebbs; 645 646 mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, &bq); 647 } while (!last_wqe); 648 649 if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) { 650 netdev_WARN_ONCE(sq->channel->netdev, 651 "Bad OP in XDPSQ CQE: 0x%x\n", 652 get_cqe_opcode(cqe)); 653 mlx5e_dump_error_cqe(&sq->cq, sq->sqn, 654 (struct mlx5_err_cqe *)cqe); 655 mlx5_wq_cyc_wqe_dump(&sq->wq, ci, wi->num_wqebbs); 656 } 657 } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq))); 658 659 xdp_flush_frame_bulk(&bq); 660 661 if (xsk_frames) 662 xsk_tx_completed(sq->xsk_pool, xsk_frames); 663 664 sq->stats->cqes += i; 665 666 mlx5_cqwq_update_db_record(&cq->wq); 667 668 /* ensure cq space is freed before enabling more cqes */ 669 wmb(); 670 671 sq->cc = sqcc; 672 return (i == MLX5E_TX_CQ_POLL_BUDGET); 673 } 674 675 void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq) 676 { 677 struct xdp_frame_bulk bq; 678 u32 xsk_frames = 0; 679 680 xdp_frame_bulk_init(&bq); 681 682 rcu_read_lock(); /* need for xdp_return_frame_bulk */ 683 684 while (sq->cc != sq->pc) { 685 struct mlx5e_xdp_wqe_info *wi; 686 u16 ci; 687 688 ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->cc); 689 wi = &sq->db.wqe_info[ci]; 690 691 sq->cc += wi->num_wqebbs; 692 693 mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, &bq); 694 } 695 696 xdp_flush_frame_bulk(&bq); 697 rcu_read_unlock(); 698 699 if (xsk_frames) 700 xsk_tx_completed(sq->xsk_pool, xsk_frames); 701 } 702 703 int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, 704 u32 flags) 705 { 706 struct mlx5e_priv *priv = netdev_priv(dev); 707 struct mlx5e_xdpsq *sq; 708 int nxmit = 0; 709 int sq_num; 710 int i; 711 712 /* this flag is sufficient, no need to test internal sq state */ 713 if (unlikely(!mlx5e_xdp_tx_is_enabled(priv))) 714 return -ENETDOWN; 715 716 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) 717 return -EINVAL; 718 719 sq_num = smp_processor_id(); 720 721 if (unlikely(sq_num >= priv->channels.num)) 722 return -ENXIO; 723 724 sq = &priv->channels.c[sq_num]->xdpsq; 725 726 for (i = 0; i < n; i++) { 727 struct xdp_frame *xdpf = frames[i]; 728 struct mlx5e_xmit_data xdptxd = {}; 729 struct mlx5e_xdp_info xdpi; 730 bool ret; 731 732 xdptxd.data = xdpf->data; 733 xdptxd.len = xdpf->len; 734 xdptxd.dma_addr = dma_map_single(sq->pdev, xdptxd.data, 735 xdptxd.len, DMA_TO_DEVICE); 736 737 if (unlikely(dma_mapping_error(sq->pdev, xdptxd.dma_addr))) 738 break; 739 740 xdpi.mode = MLX5E_XDP_XMIT_MODE_FRAME; 741 xdpi.frame.xdpf = xdpf; 742 xdpi.frame.dma_addr = xdptxd.dma_addr; 743 744 ret = INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe, 745 mlx5e_xmit_xdp_frame, sq, &xdptxd, 0); 746 if (unlikely(!ret)) { 747 dma_unmap_single(sq->pdev, xdptxd.dma_addr, 748 xdptxd.len, DMA_TO_DEVICE); 749 break; 750 } 751 mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi); 752 nxmit++; 753 } 754 755 if (flags & XDP_XMIT_FLUSH) { 756 if (sq->mpwqe.wqe) 757 mlx5e_xdp_mpwqe_complete(sq); 758 mlx5e_xmit_xdp_doorbell(sq); 759 } 760 761 return nxmit; 762 } 763 764 void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq) 765 { 766 struct mlx5e_xdpsq *xdpsq = rq->xdpsq; 767 768 if (xdpsq->mpwqe.wqe) 769 mlx5e_xdp_mpwqe_complete(xdpsq); 770 771 mlx5e_xmit_xdp_doorbell(xdpsq); 772 773 if (test_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags)) { 774 xdp_do_flush_map(); 775 __clear_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags); 776 } 777 } 778 779 void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw) 780 { 781 sq->xmit_xdp_frame_check = is_mpw ? 782 mlx5e_xmit_xdp_frame_check_mpwqe : mlx5e_xmit_xdp_frame_check; 783 sq->xmit_xdp_frame = is_mpw ? 784 mlx5e_xmit_xdp_frame_mpwqe : mlx5e_xmit_xdp_frame; 785 } 786