1 /* 2 * Copyright (c) 2018, Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include <linux/bpf_trace.h> 34 #include <net/xdp_sock.h> 35 #include "en/xdp.h" 36 #include "en/params.h" 37 38 int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk) 39 { 40 int hr = mlx5e_get_linear_rq_headroom(params, xsk); 41 42 /* Let S := SKB_DATA_ALIGN(sizeof(struct skb_shared_info)). 43 * The condition checked in mlx5e_rx_is_linear_skb is: 44 * SKB_DATA_ALIGN(sw_mtu + hard_mtu + hr) + S <= PAGE_SIZE (1) 45 * (Note that hw_mtu == sw_mtu + hard_mtu.) 46 * What is returned from this function is: 47 * max_mtu = PAGE_SIZE - S - hr - hard_mtu (2) 48 * After assigning sw_mtu := max_mtu, the left side of (1) turns to 49 * SKB_DATA_ALIGN(PAGE_SIZE - S) + S, which is equal to PAGE_SIZE, 50 * because both PAGE_SIZE and S are already aligned. Any number greater 51 * than max_mtu would make the left side of (1) greater than PAGE_SIZE, 52 * so max_mtu is the maximum MTU allowed. 53 */ 54 55 return MLX5E_HW2SW_MTU(params, SKB_MAX_HEAD(hr)); 56 } 57 58 static inline bool 59 mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq, 60 struct mlx5e_dma_info *di, struct xdp_buff *xdp) 61 { 62 struct mlx5e_xdp_xmit_data xdptxd; 63 struct mlx5e_xdp_info xdpi; 64 struct xdp_frame *xdpf; 65 dma_addr_t dma_addr; 66 67 xdpf = convert_to_xdp_frame(xdp); 68 if (unlikely(!xdpf)) 69 return false; 70 71 xdptxd.data = xdpf->data; 72 xdptxd.len = xdpf->len; 73 74 if (xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY) { 75 /* The xdp_buff was in the UMEM and was copied into a newly 76 * allocated page. The UMEM page was returned via the ZCA, and 77 * this new page has to be mapped at this point and has to be 78 * unmapped and returned via xdp_return_frame on completion. 79 */ 80 81 /* Prevent double recycling of the UMEM page. Even in case this 82 * function returns false, the xdp_buff shouldn't be recycled, 83 * as it was already done in xdp_convert_zc_to_xdp_frame. 84 */ 85 __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */ 86 87 xdpi.mode = MLX5E_XDP_XMIT_MODE_FRAME; 88 89 dma_addr = dma_map_single(sq->pdev, xdptxd.data, xdptxd.len, 90 DMA_TO_DEVICE); 91 if (dma_mapping_error(sq->pdev, dma_addr)) { 92 xdp_return_frame(xdpf); 93 return false; 94 } 95 96 xdptxd.dma_addr = dma_addr; 97 xdpi.frame.xdpf = xdpf; 98 xdpi.frame.dma_addr = dma_addr; 99 } else { 100 /* Driver assumes that convert_to_xdp_frame returns an xdp_frame 101 * that points to the same memory region as the original 102 * xdp_buff. It allows to map the memory only once and to use 103 * the DMA_BIDIRECTIONAL mode. 104 */ 105 106 xdpi.mode = MLX5E_XDP_XMIT_MODE_PAGE; 107 108 dma_addr = di->addr + (xdpf->data - (void *)xdpf); 109 dma_sync_single_for_device(sq->pdev, dma_addr, xdptxd.len, 110 DMA_TO_DEVICE); 111 112 xdptxd.dma_addr = dma_addr; 113 xdpi.page.rq = rq; 114 xdpi.page.di = *di; 115 } 116 117 return sq->xmit_xdp_frame(sq, &xdptxd, &xdpi, 0); 118 } 119 120 /* returns true if packet was consumed by xdp */ 121 bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, 122 void *va, u16 *rx_headroom, u32 *len, bool xsk) 123 { 124 struct bpf_prog *prog = READ_ONCE(rq->xdp_prog); 125 struct xdp_umem *umem = rq->umem; 126 struct xdp_buff xdp; 127 u32 act; 128 int err; 129 130 if (!prog) 131 return false; 132 133 xdp.data = va + *rx_headroom; 134 xdp_set_data_meta_invalid(&xdp); 135 xdp.data_end = xdp.data + *len; 136 xdp.data_hard_start = va; 137 if (xsk) 138 xdp.handle = di->xsk.handle; 139 xdp.rxq = &rq->xdp_rxq; 140 xdp.frame_sz = rq->buff.frame0_sz; 141 142 act = bpf_prog_run_xdp(prog, &xdp); 143 if (xsk) { 144 u64 off = xdp.data - xdp.data_hard_start; 145 146 xdp.handle = xsk_umem_adjust_offset(umem, xdp.handle, off); 147 } 148 switch (act) { 149 case XDP_PASS: 150 *rx_headroom = xdp.data - xdp.data_hard_start; 151 *len = xdp.data_end - xdp.data; 152 return false; 153 case XDP_TX: 154 if (unlikely(!mlx5e_xmit_xdp_buff(rq->xdpsq, rq, di, &xdp))) 155 goto xdp_abort; 156 __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */ 157 return true; 158 case XDP_REDIRECT: 159 /* When XDP enabled then page-refcnt==1 here */ 160 err = xdp_do_redirect(rq->netdev, &xdp, prog); 161 if (unlikely(err)) 162 goto xdp_abort; 163 __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); 164 __set_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags); 165 if (!xsk) 166 mlx5e_page_dma_unmap(rq, di); 167 rq->stats->xdp_redirect++; 168 return true; 169 default: 170 bpf_warn_invalid_xdp_action(act); 171 /* fall through */ 172 case XDP_ABORTED: 173 xdp_abort: 174 trace_xdp_exception(rq->netdev, prog, act); 175 /* fall through */ 176 case XDP_DROP: 177 rq->stats->xdp_drop++; 178 return true; 179 } 180 } 181 182 static u16 mlx5e_xdpsq_get_next_pi(struct mlx5e_xdpsq *sq, u16 size) 183 { 184 struct mlx5_wq_cyc *wq = &sq->wq; 185 u16 pi, contig_wqebbs; 186 187 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); 188 contig_wqebbs = mlx5_wq_cyc_get_contig_wqebbs(wq, pi); 189 if (unlikely(contig_wqebbs < size)) { 190 struct mlx5e_xdp_wqe_info *wi, *edge_wi; 191 192 wi = &sq->db.wqe_info[pi]; 193 edge_wi = wi + contig_wqebbs; 194 195 /* Fill SQ frag edge with NOPs to avoid WQE wrapping two pages. */ 196 for (; wi < edge_wi; wi++) { 197 *wi = (struct mlx5e_xdp_wqe_info) { 198 .num_wqebbs = 1, 199 .num_pkts = 0, 200 }; 201 mlx5e_post_nop(wq, sq->sqn, &sq->pc); 202 } 203 sq->stats->nops += contig_wqebbs; 204 205 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); 206 } 207 208 return pi; 209 } 210 211 static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq) 212 { 213 struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; 214 struct mlx5e_xdpsq_stats *stats = sq->stats; 215 u16 pi; 216 217 pi = mlx5e_xdpsq_get_next_pi(sq, MLX5_SEND_WQE_MAX_WQEBBS); 218 session->wqe = MLX5E_TX_FETCH_WQE(sq, pi); 219 220 prefetchw(session->wqe->data); 221 session->ds_count = MLX5E_XDP_TX_EMPTY_DS_COUNT; 222 session->pkt_count = 0; 223 224 mlx5e_xdp_update_inline_state(sq); 225 226 stats->mpwqe++; 227 } 228 229 void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq) 230 { 231 struct mlx5_wq_cyc *wq = &sq->wq; 232 struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; 233 struct mlx5_wqe_ctrl_seg *cseg = &session->wqe->ctrl; 234 u16 ds_count = session->ds_count; 235 u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); 236 struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[pi]; 237 238 cseg->opmod_idx_opcode = 239 cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_ENHANCED_MPSW); 240 cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_count); 241 242 wi->num_wqebbs = DIV_ROUND_UP(ds_count, MLX5_SEND_WQEBB_NUM_DS); 243 wi->num_pkts = session->pkt_count; 244 245 sq->pc += wi->num_wqebbs; 246 247 sq->doorbell_cseg = cseg; 248 249 session->wqe = NULL; /* Close session */ 250 } 251 252 enum { 253 MLX5E_XDP_CHECK_OK = 1, 254 MLX5E_XDP_CHECK_START_MPWQE = 2, 255 }; 256 257 static int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq) 258 { 259 if (unlikely(!sq->mpwqe.wqe)) { 260 if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, 261 MLX5E_XDPSQ_STOP_ROOM))) { 262 /* SQ is full, ring doorbell */ 263 mlx5e_xmit_xdp_doorbell(sq); 264 sq->stats->full++; 265 return -EBUSY; 266 } 267 268 return MLX5E_XDP_CHECK_START_MPWQE; 269 } 270 271 return MLX5E_XDP_CHECK_OK; 272 } 273 274 static bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, 275 struct mlx5e_xdp_xmit_data *xdptxd, 276 struct mlx5e_xdp_info *xdpi, 277 int check_result) 278 { 279 struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; 280 struct mlx5e_xdpsq_stats *stats = sq->stats; 281 282 if (unlikely(xdptxd->len > sq->hw_mtu)) { 283 stats->err++; 284 return false; 285 } 286 287 if (!check_result) 288 check_result = mlx5e_xmit_xdp_frame_check_mpwqe(sq); 289 if (unlikely(check_result < 0)) 290 return false; 291 292 if (check_result == MLX5E_XDP_CHECK_START_MPWQE) { 293 /* Start the session when nothing can fail, so it's guaranteed 294 * that if there is an active session, it has at least one dseg, 295 * and it's safe to complete it at any time. 296 */ 297 mlx5e_xdp_mpwqe_session_start(sq); 298 } 299 300 mlx5e_xdp_mpwqe_add_dseg(sq, xdptxd, stats); 301 302 if (unlikely(mlx5e_xdp_no_room_for_inline_pkt(session) || 303 session->ds_count == MLX5E_XDP_MPW_MAX_NUM_DS)) 304 mlx5e_xdp_mpwqe_complete(sq); 305 306 mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi); 307 stats->xmit++; 308 return true; 309 } 310 311 static int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq) 312 { 313 if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, 1))) { 314 /* SQ is full, ring doorbell */ 315 mlx5e_xmit_xdp_doorbell(sq); 316 sq->stats->full++; 317 return -EBUSY; 318 } 319 320 return MLX5E_XDP_CHECK_OK; 321 } 322 323 static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, 324 struct mlx5e_xdp_xmit_data *xdptxd, 325 struct mlx5e_xdp_info *xdpi, 326 int check_result) 327 { 328 struct mlx5_wq_cyc *wq = &sq->wq; 329 u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); 330 struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); 331 332 struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; 333 struct mlx5_wqe_eth_seg *eseg = &wqe->eth; 334 struct mlx5_wqe_data_seg *dseg = wqe->data; 335 336 dma_addr_t dma_addr = xdptxd->dma_addr; 337 u32 dma_len = xdptxd->len; 338 339 struct mlx5e_xdpsq_stats *stats = sq->stats; 340 341 prefetchw(wqe); 342 343 if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE || sq->hw_mtu < dma_len)) { 344 stats->err++; 345 return false; 346 } 347 348 if (!check_result) 349 check_result = mlx5e_xmit_xdp_frame_check(sq); 350 if (unlikely(check_result < 0)) 351 return false; 352 353 cseg->fm_ce_se = 0; 354 355 /* copy the inline part if required */ 356 if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) { 357 memcpy(eseg->inline_hdr.start, xdptxd->data, MLX5E_XDP_MIN_INLINE); 358 eseg->inline_hdr.sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE); 359 dma_len -= MLX5E_XDP_MIN_INLINE; 360 dma_addr += MLX5E_XDP_MIN_INLINE; 361 dseg++; 362 } 363 364 /* write the dma part */ 365 dseg->addr = cpu_to_be64(dma_addr); 366 dseg->byte_count = cpu_to_be32(dma_len); 367 368 cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND); 369 370 sq->pc++; 371 372 sq->doorbell_cseg = cseg; 373 374 mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi); 375 stats->xmit++; 376 return true; 377 } 378 379 static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq, 380 struct mlx5e_xdp_wqe_info *wi, 381 u32 *xsk_frames, 382 bool recycle) 383 { 384 struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo; 385 u16 i; 386 387 for (i = 0; i < wi->num_pkts; i++) { 388 struct mlx5e_xdp_info xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo); 389 390 switch (xdpi.mode) { 391 case MLX5E_XDP_XMIT_MODE_FRAME: 392 /* XDP_TX from the XSK RQ and XDP_REDIRECT */ 393 dma_unmap_single(sq->pdev, xdpi.frame.dma_addr, 394 xdpi.frame.xdpf->len, DMA_TO_DEVICE); 395 xdp_return_frame(xdpi.frame.xdpf); 396 break; 397 case MLX5E_XDP_XMIT_MODE_PAGE: 398 /* XDP_TX from the regular RQ */ 399 mlx5e_page_release_dynamic(xdpi.page.rq, &xdpi.page.di, recycle); 400 break; 401 case MLX5E_XDP_XMIT_MODE_XSK: 402 /* AF_XDP send */ 403 (*xsk_frames)++; 404 break; 405 default: 406 WARN_ON_ONCE(true); 407 } 408 } 409 } 410 411 bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) 412 { 413 struct mlx5e_xdpsq *sq; 414 struct mlx5_cqe64 *cqe; 415 u32 xsk_frames = 0; 416 u16 sqcc; 417 int i; 418 419 sq = container_of(cq, struct mlx5e_xdpsq, cq); 420 421 if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) 422 return false; 423 424 cqe = mlx5_cqwq_get_cqe(&cq->wq); 425 if (!cqe) 426 return false; 427 428 /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), 429 * otherwise a cq overrun may occur 430 */ 431 sqcc = sq->cc; 432 433 i = 0; 434 do { 435 struct mlx5e_xdp_wqe_info *wi; 436 u16 wqe_counter, ci; 437 bool last_wqe; 438 439 mlx5_cqwq_pop(&cq->wq); 440 441 wqe_counter = be16_to_cpu(cqe->wqe_counter); 442 443 do { 444 last_wqe = (sqcc == wqe_counter); 445 ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc); 446 wi = &sq->db.wqe_info[ci]; 447 448 sqcc += wi->num_wqebbs; 449 450 mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, true); 451 } while (!last_wqe); 452 453 if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) { 454 netdev_WARN_ONCE(sq->channel->netdev, 455 "Bad OP in XDPSQ CQE: 0x%x\n", 456 get_cqe_opcode(cqe)); 457 mlx5e_dump_error_cqe(&sq->cq, sq->sqn, 458 (struct mlx5_err_cqe *)cqe); 459 mlx5_wq_cyc_wqe_dump(&sq->wq, ci, wi->num_wqebbs); 460 } 461 } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq))); 462 463 if (xsk_frames) 464 xsk_umem_complete_tx(sq->umem, xsk_frames); 465 466 sq->stats->cqes += i; 467 468 mlx5_cqwq_update_db_record(&cq->wq); 469 470 /* ensure cq space is freed before enabling more cqes */ 471 wmb(); 472 473 sq->cc = sqcc; 474 return (i == MLX5E_TX_CQ_POLL_BUDGET); 475 } 476 477 void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq) 478 { 479 u32 xsk_frames = 0; 480 481 while (sq->cc != sq->pc) { 482 struct mlx5e_xdp_wqe_info *wi; 483 u16 ci; 484 485 ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->cc); 486 wi = &sq->db.wqe_info[ci]; 487 488 sq->cc += wi->num_wqebbs; 489 490 mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, false); 491 } 492 493 if (xsk_frames) 494 xsk_umem_complete_tx(sq->umem, xsk_frames); 495 } 496 497 int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, 498 u32 flags) 499 { 500 struct mlx5e_priv *priv = netdev_priv(dev); 501 struct mlx5e_xdpsq *sq; 502 int drops = 0; 503 int sq_num; 504 int i; 505 506 /* this flag is sufficient, no need to test internal sq state */ 507 if (unlikely(!mlx5e_xdp_tx_is_enabled(priv))) 508 return -ENETDOWN; 509 510 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) 511 return -EINVAL; 512 513 sq_num = smp_processor_id(); 514 515 if (unlikely(sq_num >= priv->channels.num)) 516 return -ENXIO; 517 518 sq = &priv->channels.c[sq_num]->xdpsq; 519 520 for (i = 0; i < n; i++) { 521 struct xdp_frame *xdpf = frames[i]; 522 struct mlx5e_xdp_xmit_data xdptxd; 523 struct mlx5e_xdp_info xdpi; 524 525 xdptxd.data = xdpf->data; 526 xdptxd.len = xdpf->len; 527 xdptxd.dma_addr = dma_map_single(sq->pdev, xdptxd.data, 528 xdptxd.len, DMA_TO_DEVICE); 529 530 if (unlikely(dma_mapping_error(sq->pdev, xdptxd.dma_addr))) { 531 xdp_return_frame_rx_napi(xdpf); 532 drops++; 533 continue; 534 } 535 536 xdpi.mode = MLX5E_XDP_XMIT_MODE_FRAME; 537 xdpi.frame.xdpf = xdpf; 538 xdpi.frame.dma_addr = xdptxd.dma_addr; 539 540 if (unlikely(!sq->xmit_xdp_frame(sq, &xdptxd, &xdpi, 0))) { 541 dma_unmap_single(sq->pdev, xdptxd.dma_addr, 542 xdptxd.len, DMA_TO_DEVICE); 543 xdp_return_frame_rx_napi(xdpf); 544 drops++; 545 } 546 } 547 548 if (flags & XDP_XMIT_FLUSH) { 549 if (sq->mpwqe.wqe) 550 mlx5e_xdp_mpwqe_complete(sq); 551 mlx5e_xmit_xdp_doorbell(sq); 552 } 553 554 return n - drops; 555 } 556 557 void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq) 558 { 559 struct mlx5e_xdpsq *xdpsq = rq->xdpsq; 560 561 if (xdpsq->mpwqe.wqe) 562 mlx5e_xdp_mpwqe_complete(xdpsq); 563 564 mlx5e_xmit_xdp_doorbell(xdpsq); 565 566 if (test_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags)) { 567 xdp_do_flush_map(); 568 __clear_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags); 569 } 570 } 571 572 void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw) 573 { 574 sq->xmit_xdp_frame_check = is_mpw ? 575 mlx5e_xmit_xdp_frame_check_mpwqe : mlx5e_xmit_xdp_frame_check; 576 sq->xmit_xdp_frame = is_mpw ? 577 mlx5e_xmit_xdp_frame_mpwqe : mlx5e_xmit_xdp_frame; 578 } 579 580