1 /* 2 * Copyright (c) 2018, Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include <linux/bpf_trace.h> 34 #include "en/xdp.h" 35 36 int mlx5e_xdp_max_mtu(struct mlx5e_params *params) 37 { 38 int hr = NET_IP_ALIGN + XDP_PACKET_HEADROOM; 39 40 /* Let S := SKB_DATA_ALIGN(sizeof(struct skb_shared_info)). 41 * The condition checked in mlx5e_rx_is_linear_skb is: 42 * SKB_DATA_ALIGN(sw_mtu + hard_mtu + hr) + S <= PAGE_SIZE (1) 43 * (Note that hw_mtu == sw_mtu + hard_mtu.) 44 * What is returned from this function is: 45 * max_mtu = PAGE_SIZE - S - hr - hard_mtu (2) 46 * After assigning sw_mtu := max_mtu, the left side of (1) turns to 47 * SKB_DATA_ALIGN(PAGE_SIZE - S) + S, which is equal to PAGE_SIZE, 48 * because both PAGE_SIZE and S are already aligned. Any number greater 49 * than max_mtu would make the left side of (1) greater than PAGE_SIZE, 50 * so max_mtu is the maximum MTU allowed. 51 */ 52 53 return MLX5E_HW2SW_MTU(params, SKB_MAX_HEAD(hr)); 54 } 55 56 static inline bool 57 mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq, 58 struct mlx5e_dma_info *di, struct xdp_buff *xdp) 59 { 60 struct mlx5e_xdp_xmit_data xdptxd; 61 struct mlx5e_xdp_info xdpi; 62 struct xdp_frame *xdpf; 63 dma_addr_t dma_addr; 64 65 xdpf = convert_to_xdp_frame(xdp); 66 if (unlikely(!xdpf)) 67 return false; 68 69 xdptxd.data = xdpf->data; 70 xdptxd.len = xdpf->len; 71 72 if (xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY) { 73 /* The xdp_buff was in the UMEM and was copied into a newly 74 * allocated page. The UMEM page was returned via the ZCA, and 75 * this new page has to be mapped at this point and has to be 76 * unmapped and returned via xdp_return_frame on completion. 77 */ 78 79 /* Prevent double recycling of the UMEM page. Even in case this 80 * function returns false, the xdp_buff shouldn't be recycled, 81 * as it was already done in xdp_convert_zc_to_xdp_frame. 82 */ 83 __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */ 84 85 xdpi.mode = MLX5E_XDP_XMIT_MODE_FRAME; 86 87 dma_addr = dma_map_single(sq->pdev, xdptxd.data, xdptxd.len, 88 DMA_TO_DEVICE); 89 if (dma_mapping_error(sq->pdev, dma_addr)) { 90 xdp_return_frame(xdpf); 91 return false; 92 } 93 94 xdptxd.dma_addr = dma_addr; 95 xdpi.frame.xdpf = xdpf; 96 xdpi.frame.dma_addr = dma_addr; 97 } else { 98 /* Driver assumes that convert_to_xdp_frame returns an xdp_frame 99 * that points to the same memory region as the original 100 * xdp_buff. It allows to map the memory only once and to use 101 * the DMA_BIDIRECTIONAL mode. 102 */ 103 104 xdpi.mode = MLX5E_XDP_XMIT_MODE_PAGE; 105 106 dma_addr = di->addr + (xdpf->data - (void *)xdpf); 107 dma_sync_single_for_device(sq->pdev, dma_addr, xdptxd.len, 108 DMA_TO_DEVICE); 109 110 xdptxd.dma_addr = dma_addr; 111 xdpi.page.rq = rq; 112 xdpi.page.di = *di; 113 } 114 115 return sq->xmit_xdp_frame(sq, &xdptxd, &xdpi); 116 } 117 118 /* returns true if packet was consumed by xdp */ 119 bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, 120 void *va, u16 *rx_headroom, u32 *len) 121 { 122 struct bpf_prog *prog = READ_ONCE(rq->xdp_prog); 123 struct xdp_buff xdp; 124 u32 act; 125 int err; 126 127 if (!prog) 128 return false; 129 130 xdp.data = va + *rx_headroom; 131 xdp_set_data_meta_invalid(&xdp); 132 xdp.data_end = xdp.data + *len; 133 xdp.data_hard_start = va; 134 xdp.rxq = &rq->xdp_rxq; 135 136 act = bpf_prog_run_xdp(prog, &xdp); 137 switch (act) { 138 case XDP_PASS: 139 *rx_headroom = xdp.data - xdp.data_hard_start; 140 *len = xdp.data_end - xdp.data; 141 return false; 142 case XDP_TX: 143 if (unlikely(!mlx5e_xmit_xdp_buff(rq->xdpsq, rq, di, &xdp))) 144 goto xdp_abort; 145 __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */ 146 return true; 147 case XDP_REDIRECT: 148 /* When XDP enabled then page-refcnt==1 here */ 149 err = xdp_do_redirect(rq->netdev, &xdp, prog); 150 if (unlikely(err)) 151 goto xdp_abort; 152 __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); 153 __set_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags); 154 mlx5e_page_dma_unmap(rq, di); 155 rq->stats->xdp_redirect++; 156 return true; 157 default: 158 bpf_warn_invalid_xdp_action(act); 159 /* fall through */ 160 case XDP_ABORTED: 161 xdp_abort: 162 trace_xdp_exception(rq->netdev, prog, act); 163 /* fall through */ 164 case XDP_DROP: 165 rq->stats->xdp_drop++; 166 return true; 167 } 168 } 169 170 static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq) 171 { 172 struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; 173 struct mlx5e_xdpsq_stats *stats = sq->stats; 174 struct mlx5_wq_cyc *wq = &sq->wq; 175 u8 wqebbs; 176 u16 pi; 177 178 mlx5e_xdpsq_fetch_wqe(sq, &session->wqe); 179 180 prefetchw(session->wqe->data); 181 session->ds_count = MLX5E_XDP_TX_EMPTY_DS_COUNT; 182 session->pkt_count = 0; 183 session->complete = 0; 184 185 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); 186 187 /* The mult of MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS 188 * (16 * 4 == 64) does not fit in the 6-bit DS field of Ctrl Segment. 189 * We use a bound lower that MLX5_SEND_WQE_MAX_WQEBBS to let a 190 * full-session WQE be cache-aligned. 191 */ 192 #if L1_CACHE_BYTES < 128 193 #define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 1) 194 #else 195 #define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 2) 196 #endif 197 198 wqebbs = min_t(u16, mlx5_wq_cyc_get_contig_wqebbs(wq, pi), 199 MLX5E_XDP_MPW_MAX_WQEBBS); 200 201 session->max_ds_count = MLX5_SEND_WQEBB_NUM_DS * wqebbs; 202 203 mlx5e_xdp_update_inline_state(sq); 204 205 stats->mpwqe++; 206 } 207 208 static void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq) 209 { 210 struct mlx5_wq_cyc *wq = &sq->wq; 211 struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; 212 struct mlx5_wqe_ctrl_seg *cseg = &session->wqe->ctrl; 213 u16 ds_count = session->ds_count; 214 u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); 215 struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[pi]; 216 217 cseg->opmod_idx_opcode = 218 cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_ENHANCED_MPSW); 219 cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_count); 220 221 wi->num_wqebbs = DIV_ROUND_UP(ds_count, MLX5_SEND_WQEBB_NUM_DS); 222 wi->num_pkts = session->pkt_count; 223 224 sq->pc += wi->num_wqebbs; 225 226 sq->doorbell_cseg = cseg; 227 228 session->wqe = NULL; /* Close session */ 229 } 230 231 static bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, 232 struct mlx5e_xdp_xmit_data *xdptxd, 233 struct mlx5e_xdp_info *xdpi) 234 { 235 struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; 236 struct mlx5e_xdpsq_stats *stats = sq->stats; 237 238 if (unlikely(xdptxd->len > sq->hw_mtu)) { 239 stats->err++; 240 return false; 241 } 242 243 if (unlikely(!session->wqe)) { 244 if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, 245 MLX5_SEND_WQE_MAX_WQEBBS))) { 246 /* SQ is full, ring doorbell */ 247 mlx5e_xmit_xdp_doorbell(sq); 248 stats->full++; 249 return false; 250 } 251 252 mlx5e_xdp_mpwqe_session_start(sq); 253 } 254 255 mlx5e_xdp_mpwqe_add_dseg(sq, xdptxd, stats); 256 257 if (unlikely(session->complete || 258 session->ds_count == session->max_ds_count)) 259 mlx5e_xdp_mpwqe_complete(sq); 260 261 mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi); 262 stats->xmit++; 263 return true; 264 } 265 266 static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, 267 struct mlx5e_xdp_xmit_data *xdptxd, 268 struct mlx5e_xdp_info *xdpi) 269 { 270 struct mlx5_wq_cyc *wq = &sq->wq; 271 u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); 272 struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); 273 274 struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; 275 struct mlx5_wqe_eth_seg *eseg = &wqe->eth; 276 struct mlx5_wqe_data_seg *dseg = wqe->data; 277 278 dma_addr_t dma_addr = xdptxd->dma_addr; 279 u32 dma_len = xdptxd->len; 280 281 struct mlx5e_xdpsq_stats *stats = sq->stats; 282 283 prefetchw(wqe); 284 285 if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE || sq->hw_mtu < dma_len)) { 286 stats->err++; 287 return false; 288 } 289 290 if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1))) { 291 /* SQ is full, ring doorbell */ 292 mlx5e_xmit_xdp_doorbell(sq); 293 stats->full++; 294 return false; 295 } 296 297 cseg->fm_ce_se = 0; 298 299 /* copy the inline part if required */ 300 if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) { 301 memcpy(eseg->inline_hdr.start, xdptxd->data, MLX5E_XDP_MIN_INLINE); 302 eseg->inline_hdr.sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE); 303 dma_len -= MLX5E_XDP_MIN_INLINE; 304 dma_addr += MLX5E_XDP_MIN_INLINE; 305 dseg++; 306 } 307 308 /* write the dma part */ 309 dseg->addr = cpu_to_be64(dma_addr); 310 dseg->byte_count = cpu_to_be32(dma_len); 311 312 cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND); 313 314 sq->pc++; 315 316 sq->doorbell_cseg = cseg; 317 318 mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi); 319 stats->xmit++; 320 return true; 321 } 322 323 static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq, 324 struct mlx5e_xdp_wqe_info *wi, 325 bool recycle) 326 { 327 struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo; 328 u16 i; 329 330 for (i = 0; i < wi->num_pkts; i++) { 331 struct mlx5e_xdp_info xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo); 332 333 switch (xdpi.mode) { 334 case MLX5E_XDP_XMIT_MODE_FRAME: 335 /* XDP_TX from the XSK RQ and XDP_REDIRECT */ 336 dma_unmap_single(sq->pdev, xdpi.frame.dma_addr, 337 xdpi.frame.xdpf->len, DMA_TO_DEVICE); 338 xdp_return_frame(xdpi.frame.xdpf); 339 break; 340 case MLX5E_XDP_XMIT_MODE_PAGE: 341 /* XDP_TX from the regular RQ */ 342 mlx5e_page_release(xdpi.page.rq, &xdpi.page.di, recycle); 343 break; 344 default: 345 WARN_ON_ONCE(true); 346 } 347 } 348 } 349 350 bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) 351 { 352 struct mlx5e_xdpsq *sq; 353 struct mlx5_cqe64 *cqe; 354 u16 sqcc; 355 int i; 356 357 sq = container_of(cq, struct mlx5e_xdpsq, cq); 358 359 if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) 360 return false; 361 362 cqe = mlx5_cqwq_get_cqe(&cq->wq); 363 if (!cqe) 364 return false; 365 366 /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), 367 * otherwise a cq overrun may occur 368 */ 369 sqcc = sq->cc; 370 371 i = 0; 372 do { 373 u16 wqe_counter; 374 bool last_wqe; 375 376 mlx5_cqwq_pop(&cq->wq); 377 378 wqe_counter = be16_to_cpu(cqe->wqe_counter); 379 380 if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) 381 netdev_WARN_ONCE(sq->channel->netdev, 382 "Bad OP in XDPSQ CQE: 0x%x\n", 383 get_cqe_opcode(cqe)); 384 385 do { 386 struct mlx5e_xdp_wqe_info *wi; 387 u16 ci; 388 389 last_wqe = (sqcc == wqe_counter); 390 ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc); 391 wi = &sq->db.wqe_info[ci]; 392 393 sqcc += wi->num_wqebbs; 394 395 mlx5e_free_xdpsq_desc(sq, wi, true); 396 } while (!last_wqe); 397 } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq))); 398 399 sq->stats->cqes += i; 400 401 mlx5_cqwq_update_db_record(&cq->wq); 402 403 /* ensure cq space is freed before enabling more cqes */ 404 wmb(); 405 406 sq->cc = sqcc; 407 return (i == MLX5E_TX_CQ_POLL_BUDGET); 408 } 409 410 void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq) 411 { 412 while (sq->cc != sq->pc) { 413 struct mlx5e_xdp_wqe_info *wi; 414 u16 ci; 415 416 ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->cc); 417 wi = &sq->db.wqe_info[ci]; 418 419 sq->cc += wi->num_wqebbs; 420 421 mlx5e_free_xdpsq_desc(sq, wi, false); 422 } 423 } 424 425 int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, 426 u32 flags) 427 { 428 struct mlx5e_priv *priv = netdev_priv(dev); 429 struct mlx5e_xdpsq *sq; 430 int drops = 0; 431 int sq_num; 432 int i; 433 434 /* this flag is sufficient, no need to test internal sq state */ 435 if (unlikely(!mlx5e_xdp_tx_is_enabled(priv))) 436 return -ENETDOWN; 437 438 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) 439 return -EINVAL; 440 441 sq_num = smp_processor_id(); 442 443 if (unlikely(sq_num >= priv->channels.num)) 444 return -ENXIO; 445 446 sq = &priv->channels.c[sq_num]->xdpsq; 447 448 for (i = 0; i < n; i++) { 449 struct xdp_frame *xdpf = frames[i]; 450 struct mlx5e_xdp_xmit_data xdptxd; 451 struct mlx5e_xdp_info xdpi; 452 453 xdptxd.data = xdpf->data; 454 xdptxd.len = xdpf->len; 455 xdptxd.dma_addr = dma_map_single(sq->pdev, xdptxd.data, 456 xdptxd.len, DMA_TO_DEVICE); 457 458 if (unlikely(dma_mapping_error(sq->pdev, xdptxd.dma_addr))) { 459 xdp_return_frame_rx_napi(xdpf); 460 drops++; 461 continue; 462 } 463 464 xdpi.mode = MLX5E_XDP_XMIT_MODE_FRAME; 465 xdpi.frame.xdpf = xdpf; 466 xdpi.frame.dma_addr = xdptxd.dma_addr; 467 468 if (unlikely(!sq->xmit_xdp_frame(sq, &xdptxd, &xdpi))) { 469 dma_unmap_single(sq->pdev, xdptxd.dma_addr, 470 xdptxd.len, DMA_TO_DEVICE); 471 xdp_return_frame_rx_napi(xdpf); 472 drops++; 473 } 474 } 475 476 if (flags & XDP_XMIT_FLUSH) { 477 if (sq->mpwqe.wqe) 478 mlx5e_xdp_mpwqe_complete(sq); 479 mlx5e_xmit_xdp_doorbell(sq); 480 } 481 482 return n - drops; 483 } 484 485 void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq) 486 { 487 struct mlx5e_xdpsq *xdpsq = rq->xdpsq; 488 489 if (xdpsq->mpwqe.wqe) 490 mlx5e_xdp_mpwqe_complete(xdpsq); 491 492 mlx5e_xmit_xdp_doorbell(xdpsq); 493 494 if (test_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags)) { 495 xdp_do_flush_map(); 496 __clear_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags); 497 } 498 } 499 500 void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw) 501 { 502 sq->xmit_xdp_frame = is_mpw ? 503 mlx5e_xmit_xdp_frame_mpwqe : mlx5e_xmit_xdp_frame; 504 } 505 506