1 /* 2 * Copyright (c) 2018, Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include <linux/bpf_trace.h> 34 #include <net/xdp_sock_drv.h> 35 #include "en/xdp.h" 36 #include "en/params.h" 37 38 int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk) 39 { 40 int hr = mlx5e_get_linear_rq_headroom(params, xsk); 41 42 /* Let S := SKB_DATA_ALIGN(sizeof(struct skb_shared_info)). 43 * The condition checked in mlx5e_rx_is_linear_skb is: 44 * SKB_DATA_ALIGN(sw_mtu + hard_mtu + hr) + S <= PAGE_SIZE (1) 45 * (Note that hw_mtu == sw_mtu + hard_mtu.) 46 * What is returned from this function is: 47 * max_mtu = PAGE_SIZE - S - hr - hard_mtu (2) 48 * After assigning sw_mtu := max_mtu, the left side of (1) turns to 49 * SKB_DATA_ALIGN(PAGE_SIZE - S) + S, which is equal to PAGE_SIZE, 50 * because both PAGE_SIZE and S are already aligned. Any number greater 51 * than max_mtu would make the left side of (1) greater than PAGE_SIZE, 52 * so max_mtu is the maximum MTU allowed. 53 */ 54 55 return MLX5E_HW2SW_MTU(params, SKB_MAX_HEAD(hr)); 56 } 57 58 static inline bool 59 mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq, 60 struct mlx5e_dma_info *di, struct xdp_buff *xdp) 61 { 62 struct mlx5e_xdp_xmit_data xdptxd; 63 struct mlx5e_xdp_info xdpi; 64 struct xdp_frame *xdpf; 65 dma_addr_t dma_addr; 66 67 xdpf = xdp_convert_buff_to_frame(xdp); 68 if (unlikely(!xdpf)) 69 return false; 70 71 xdptxd.data = xdpf->data; 72 xdptxd.len = xdpf->len; 73 74 if (xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL) { 75 /* The xdp_buff was in the UMEM and was copied into a newly 76 * allocated page. The UMEM page was returned via the ZCA, and 77 * this new page has to be mapped at this point and has to be 78 * unmapped and returned via xdp_return_frame on completion. 79 */ 80 81 /* Prevent double recycling of the UMEM page. Even in case this 82 * function returns false, the xdp_buff shouldn't be recycled, 83 * as it was already done in xdp_convert_zc_to_xdp_frame. 84 */ 85 __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */ 86 87 xdpi.mode = MLX5E_XDP_XMIT_MODE_FRAME; 88 89 dma_addr = dma_map_single(sq->pdev, xdptxd.data, xdptxd.len, 90 DMA_TO_DEVICE); 91 if (dma_mapping_error(sq->pdev, dma_addr)) { 92 xdp_return_frame(xdpf); 93 return false; 94 } 95 96 xdptxd.dma_addr = dma_addr; 97 xdpi.frame.xdpf = xdpf; 98 xdpi.frame.dma_addr = dma_addr; 99 } else { 100 /* Driver assumes that xdp_convert_buff_to_frame returns 101 * an xdp_frame that points to the same memory region as 102 * the original xdp_buff. It allows to map the memory only 103 * once and to use the DMA_BIDIRECTIONAL mode. 104 */ 105 106 xdpi.mode = MLX5E_XDP_XMIT_MODE_PAGE; 107 108 dma_addr = di->addr + (xdpf->data - (void *)xdpf); 109 dma_sync_single_for_device(sq->pdev, dma_addr, xdptxd.len, 110 DMA_TO_DEVICE); 111 112 xdptxd.dma_addr = dma_addr; 113 xdpi.page.rq = rq; 114 xdpi.page.di = *di; 115 } 116 117 return INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe, 118 mlx5e_xmit_xdp_frame, sq, &xdptxd, &xdpi, 0); 119 } 120 121 /* returns true if packet was consumed by xdp */ 122 bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, 123 u32 *len, struct xdp_buff *xdp) 124 { 125 struct bpf_prog *prog = READ_ONCE(rq->xdp_prog); 126 u32 act; 127 int err; 128 129 if (!prog) 130 return false; 131 132 act = bpf_prog_run_xdp(prog, xdp); 133 switch (act) { 134 case XDP_PASS: 135 *len = xdp->data_end - xdp->data; 136 return false; 137 case XDP_TX: 138 if (unlikely(!mlx5e_xmit_xdp_buff(rq->xdpsq, rq, di, xdp))) 139 goto xdp_abort; 140 __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */ 141 return true; 142 case XDP_REDIRECT: 143 /* When XDP enabled then page-refcnt==1 here */ 144 err = xdp_do_redirect(rq->netdev, xdp, prog); 145 if (unlikely(err)) 146 goto xdp_abort; 147 __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); 148 __set_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags); 149 if (xdp->rxq->mem.type != MEM_TYPE_XSK_BUFF_POOL) 150 mlx5e_page_dma_unmap(rq, di); 151 rq->stats->xdp_redirect++; 152 return true; 153 default: 154 bpf_warn_invalid_xdp_action(act); 155 fallthrough; 156 case XDP_ABORTED: 157 xdp_abort: 158 trace_xdp_exception(rq->netdev, prog, act); 159 fallthrough; 160 case XDP_DROP: 161 rq->stats->xdp_drop++; 162 return true; 163 } 164 } 165 166 static u16 mlx5e_xdpsq_get_next_pi(struct mlx5e_xdpsq *sq, u16 size) 167 { 168 struct mlx5_wq_cyc *wq = &sq->wq; 169 u16 pi, contig_wqebbs; 170 171 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); 172 contig_wqebbs = mlx5_wq_cyc_get_contig_wqebbs(wq, pi); 173 if (unlikely(contig_wqebbs < size)) { 174 struct mlx5e_xdp_wqe_info *wi, *edge_wi; 175 176 wi = &sq->db.wqe_info[pi]; 177 edge_wi = wi + contig_wqebbs; 178 179 /* Fill SQ frag edge with NOPs to avoid WQE wrapping two pages. */ 180 for (; wi < edge_wi; wi++) { 181 *wi = (struct mlx5e_xdp_wqe_info) { 182 .num_wqebbs = 1, 183 .num_pkts = 0, 184 }; 185 mlx5e_post_nop(wq, sq->sqn, &sq->pc); 186 } 187 sq->stats->nops += contig_wqebbs; 188 189 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); 190 } 191 192 return pi; 193 } 194 195 static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq) 196 { 197 struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; 198 struct mlx5e_xdpsq_stats *stats = sq->stats; 199 u16 pi; 200 201 pi = mlx5e_xdpsq_get_next_pi(sq, MLX5_SEND_WQE_MAX_WQEBBS); 202 session->wqe = MLX5E_TX_FETCH_WQE(sq, pi); 203 204 prefetchw(session->wqe->data); 205 session->ds_count = MLX5E_XDP_TX_EMPTY_DS_COUNT; 206 session->pkt_count = 0; 207 208 mlx5e_xdp_update_inline_state(sq); 209 210 stats->mpwqe++; 211 } 212 213 void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq) 214 { 215 struct mlx5_wq_cyc *wq = &sq->wq; 216 struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; 217 struct mlx5_wqe_ctrl_seg *cseg = &session->wqe->ctrl; 218 u16 ds_count = session->ds_count; 219 u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); 220 struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[pi]; 221 222 cseg->opmod_idx_opcode = 223 cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_ENHANCED_MPSW); 224 cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_count); 225 226 wi->num_wqebbs = DIV_ROUND_UP(ds_count, MLX5_SEND_WQEBB_NUM_DS); 227 wi->num_pkts = session->pkt_count; 228 229 sq->pc += wi->num_wqebbs; 230 231 sq->doorbell_cseg = cseg; 232 233 session->wqe = NULL; /* Close session */ 234 } 235 236 enum { 237 MLX5E_XDP_CHECK_OK = 1, 238 MLX5E_XDP_CHECK_START_MPWQE = 2, 239 }; 240 241 INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq) 242 { 243 if (unlikely(!sq->mpwqe.wqe)) { 244 const u16 stop_room = mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS); 245 246 if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, 247 stop_room))) { 248 /* SQ is full, ring doorbell */ 249 mlx5e_xmit_xdp_doorbell(sq); 250 sq->stats->full++; 251 return -EBUSY; 252 } 253 254 return MLX5E_XDP_CHECK_START_MPWQE; 255 } 256 257 return MLX5E_XDP_CHECK_OK; 258 } 259 260 INDIRECT_CALLABLE_SCOPE bool 261 mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_xmit_data *xdptxd, 262 struct mlx5e_xdp_info *xdpi, int check_result) 263 { 264 struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; 265 struct mlx5e_xdpsq_stats *stats = sq->stats; 266 267 if (unlikely(xdptxd->len > sq->hw_mtu)) { 268 stats->err++; 269 return false; 270 } 271 272 if (!check_result) 273 check_result = mlx5e_xmit_xdp_frame_check_mpwqe(sq); 274 if (unlikely(check_result < 0)) 275 return false; 276 277 if (check_result == MLX5E_XDP_CHECK_START_MPWQE) { 278 /* Start the session when nothing can fail, so it's guaranteed 279 * that if there is an active session, it has at least one dseg, 280 * and it's safe to complete it at any time. 281 */ 282 mlx5e_xdp_mpwqe_session_start(sq); 283 } 284 285 mlx5e_xdp_mpwqe_add_dseg(sq, xdptxd, stats); 286 287 if (unlikely(mlx5e_xdp_no_room_for_inline_pkt(session) || 288 session->ds_count == MLX5E_XDP_MPW_MAX_NUM_DS)) 289 mlx5e_xdp_mpwqe_complete(sq); 290 291 mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi); 292 stats->xmit++; 293 return true; 294 } 295 296 INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq) 297 { 298 if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, 1))) { 299 /* SQ is full, ring doorbell */ 300 mlx5e_xmit_xdp_doorbell(sq); 301 sq->stats->full++; 302 return -EBUSY; 303 } 304 305 return MLX5E_XDP_CHECK_OK; 306 } 307 308 INDIRECT_CALLABLE_SCOPE bool 309 mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_xmit_data *xdptxd, 310 struct mlx5e_xdp_info *xdpi, int check_result) 311 { 312 struct mlx5_wq_cyc *wq = &sq->wq; 313 u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); 314 struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); 315 316 struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; 317 struct mlx5_wqe_eth_seg *eseg = &wqe->eth; 318 struct mlx5_wqe_data_seg *dseg = wqe->data; 319 320 dma_addr_t dma_addr = xdptxd->dma_addr; 321 u32 dma_len = xdptxd->len; 322 323 struct mlx5e_xdpsq_stats *stats = sq->stats; 324 325 prefetchw(wqe); 326 327 if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE || sq->hw_mtu < dma_len)) { 328 stats->err++; 329 return false; 330 } 331 332 if (!check_result) 333 check_result = mlx5e_xmit_xdp_frame_check(sq); 334 if (unlikely(check_result < 0)) 335 return false; 336 337 cseg->fm_ce_se = 0; 338 339 /* copy the inline part if required */ 340 if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) { 341 memcpy(eseg->inline_hdr.start, xdptxd->data, MLX5E_XDP_MIN_INLINE); 342 eseg->inline_hdr.sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE); 343 dma_len -= MLX5E_XDP_MIN_INLINE; 344 dma_addr += MLX5E_XDP_MIN_INLINE; 345 dseg++; 346 } 347 348 /* write the dma part */ 349 dseg->addr = cpu_to_be64(dma_addr); 350 dseg->byte_count = cpu_to_be32(dma_len); 351 352 cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND); 353 354 sq->pc++; 355 356 sq->doorbell_cseg = cseg; 357 358 mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi); 359 stats->xmit++; 360 return true; 361 } 362 363 static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq, 364 struct mlx5e_xdp_wqe_info *wi, 365 u32 *xsk_frames, 366 bool recycle) 367 { 368 struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo; 369 u16 i; 370 371 for (i = 0; i < wi->num_pkts; i++) { 372 struct mlx5e_xdp_info xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo); 373 374 switch (xdpi.mode) { 375 case MLX5E_XDP_XMIT_MODE_FRAME: 376 /* XDP_TX from the XSK RQ and XDP_REDIRECT */ 377 dma_unmap_single(sq->pdev, xdpi.frame.dma_addr, 378 xdpi.frame.xdpf->len, DMA_TO_DEVICE); 379 xdp_return_frame(xdpi.frame.xdpf); 380 break; 381 case MLX5E_XDP_XMIT_MODE_PAGE: 382 /* XDP_TX from the regular RQ */ 383 mlx5e_page_release_dynamic(xdpi.page.rq, &xdpi.page.di, recycle); 384 break; 385 case MLX5E_XDP_XMIT_MODE_XSK: 386 /* AF_XDP send */ 387 (*xsk_frames)++; 388 break; 389 default: 390 WARN_ON_ONCE(true); 391 } 392 } 393 } 394 395 bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) 396 { 397 struct mlx5e_xdpsq *sq; 398 struct mlx5_cqe64 *cqe; 399 u32 xsk_frames = 0; 400 u16 sqcc; 401 int i; 402 403 sq = container_of(cq, struct mlx5e_xdpsq, cq); 404 405 if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) 406 return false; 407 408 cqe = mlx5_cqwq_get_cqe(&cq->wq); 409 if (!cqe) 410 return false; 411 412 /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), 413 * otherwise a cq overrun may occur 414 */ 415 sqcc = sq->cc; 416 417 i = 0; 418 do { 419 struct mlx5e_xdp_wqe_info *wi; 420 u16 wqe_counter, ci; 421 bool last_wqe; 422 423 mlx5_cqwq_pop(&cq->wq); 424 425 wqe_counter = be16_to_cpu(cqe->wqe_counter); 426 427 do { 428 last_wqe = (sqcc == wqe_counter); 429 ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc); 430 wi = &sq->db.wqe_info[ci]; 431 432 sqcc += wi->num_wqebbs; 433 434 mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, true); 435 } while (!last_wqe); 436 437 if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) { 438 netdev_WARN_ONCE(sq->channel->netdev, 439 "Bad OP in XDPSQ CQE: 0x%x\n", 440 get_cqe_opcode(cqe)); 441 mlx5e_dump_error_cqe(&sq->cq, sq->sqn, 442 (struct mlx5_err_cqe *)cqe); 443 mlx5_wq_cyc_wqe_dump(&sq->wq, ci, wi->num_wqebbs); 444 } 445 } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq))); 446 447 if (xsk_frames) 448 xsk_umem_complete_tx(sq->umem, xsk_frames); 449 450 sq->stats->cqes += i; 451 452 mlx5_cqwq_update_db_record(&cq->wq); 453 454 /* ensure cq space is freed before enabling more cqes */ 455 wmb(); 456 457 sq->cc = sqcc; 458 return (i == MLX5E_TX_CQ_POLL_BUDGET); 459 } 460 461 void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq) 462 { 463 u32 xsk_frames = 0; 464 465 while (sq->cc != sq->pc) { 466 struct mlx5e_xdp_wqe_info *wi; 467 u16 ci; 468 469 ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->cc); 470 wi = &sq->db.wqe_info[ci]; 471 472 sq->cc += wi->num_wqebbs; 473 474 mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, false); 475 } 476 477 if (xsk_frames) 478 xsk_umem_complete_tx(sq->umem, xsk_frames); 479 } 480 481 int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, 482 u32 flags) 483 { 484 struct mlx5e_priv *priv = netdev_priv(dev); 485 struct mlx5e_xdpsq *sq; 486 int drops = 0; 487 int sq_num; 488 int i; 489 490 /* this flag is sufficient, no need to test internal sq state */ 491 if (unlikely(!mlx5e_xdp_tx_is_enabled(priv))) 492 return -ENETDOWN; 493 494 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) 495 return -EINVAL; 496 497 sq_num = smp_processor_id(); 498 499 if (unlikely(sq_num >= priv->channels.num)) 500 return -ENXIO; 501 502 sq = &priv->channels.c[sq_num]->xdpsq; 503 504 for (i = 0; i < n; i++) { 505 struct xdp_frame *xdpf = frames[i]; 506 struct mlx5e_xdp_xmit_data xdptxd; 507 struct mlx5e_xdp_info xdpi; 508 bool ret; 509 510 xdptxd.data = xdpf->data; 511 xdptxd.len = xdpf->len; 512 xdptxd.dma_addr = dma_map_single(sq->pdev, xdptxd.data, 513 xdptxd.len, DMA_TO_DEVICE); 514 515 if (unlikely(dma_mapping_error(sq->pdev, xdptxd.dma_addr))) { 516 xdp_return_frame_rx_napi(xdpf); 517 drops++; 518 continue; 519 } 520 521 xdpi.mode = MLX5E_XDP_XMIT_MODE_FRAME; 522 xdpi.frame.xdpf = xdpf; 523 xdpi.frame.dma_addr = xdptxd.dma_addr; 524 525 ret = INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe, 526 mlx5e_xmit_xdp_frame, sq, &xdptxd, &xdpi, 0); 527 if (unlikely(!ret)) { 528 dma_unmap_single(sq->pdev, xdptxd.dma_addr, 529 xdptxd.len, DMA_TO_DEVICE); 530 xdp_return_frame_rx_napi(xdpf); 531 drops++; 532 } 533 } 534 535 if (flags & XDP_XMIT_FLUSH) { 536 if (sq->mpwqe.wqe) 537 mlx5e_xdp_mpwqe_complete(sq); 538 mlx5e_xmit_xdp_doorbell(sq); 539 } 540 541 return n - drops; 542 } 543 544 void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq) 545 { 546 struct mlx5e_xdpsq *xdpsq = rq->xdpsq; 547 548 if (xdpsq->mpwqe.wqe) 549 mlx5e_xdp_mpwqe_complete(xdpsq); 550 551 mlx5e_xmit_xdp_doorbell(xdpsq); 552 553 if (test_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags)) { 554 xdp_do_flush_map(); 555 __clear_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags); 556 } 557 } 558 559 void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw) 560 { 561 sq->xmit_xdp_frame_check = is_mpw ? 562 mlx5e_xmit_xdp_frame_check_mpwqe : mlx5e_xmit_xdp_frame_check; 563 sq->xmit_xdp_frame = is_mpw ? 564 mlx5e_xmit_xdp_frame_mpwqe : mlx5e_xmit_xdp_frame; 565 } 566 567