1 /* 2 * Copyright (c) 2018, Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include <linux/bpf_trace.h> 34 #include "en/xdp.h" 35 36 static inline bool 37 mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_dma_info *di, 38 struct xdp_buff *xdp) 39 { 40 struct mlx5e_xdp_info xdpi; 41 42 xdpi.xdpf = convert_to_xdp_frame(xdp); 43 if (unlikely(!xdpi.xdpf)) 44 return false; 45 xdpi.dma_addr = di->addr + (xdpi.xdpf->data - (void *)xdpi.xdpf); 46 dma_sync_single_for_device(sq->pdev, xdpi.dma_addr, 47 xdpi.xdpf->len, PCI_DMA_TODEVICE); 48 xdpi.di = *di; 49 50 return sq->xmit_xdp_frame(sq, &xdpi); 51 } 52 53 /* returns true if packet was consumed by xdp */ 54 bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, 55 void *va, u16 *rx_headroom, u32 *len) 56 { 57 struct bpf_prog *prog = READ_ONCE(rq->xdp_prog); 58 struct xdp_buff xdp; 59 u32 act; 60 int err; 61 62 if (!prog) 63 return false; 64 65 xdp.data = va + *rx_headroom; 66 xdp_set_data_meta_invalid(&xdp); 67 xdp.data_end = xdp.data + *len; 68 xdp.data_hard_start = va; 69 xdp.rxq = &rq->xdp_rxq; 70 71 act = bpf_prog_run_xdp(prog, &xdp); 72 switch (act) { 73 case XDP_PASS: 74 *rx_headroom = xdp.data - xdp.data_hard_start; 75 *len = xdp.data_end - xdp.data; 76 return false; 77 case XDP_TX: 78 if (unlikely(!mlx5e_xmit_xdp_buff(&rq->xdpsq, di, &xdp))) 79 goto xdp_abort; 80 __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */ 81 return true; 82 case XDP_REDIRECT: 83 /* When XDP enabled then page-refcnt==1 here */ 84 err = xdp_do_redirect(rq->netdev, &xdp, prog); 85 if (unlikely(err)) 86 goto xdp_abort; 87 __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); 88 __set_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags); 89 mlx5e_page_dma_unmap(rq, di); 90 rq->stats->xdp_redirect++; 91 return true; 92 default: 93 bpf_warn_invalid_xdp_action(act); 94 /* fall through */ 95 case XDP_ABORTED: 96 xdp_abort: 97 trace_xdp_exception(rq->netdev, prog, act); 98 /* fall through */ 99 case XDP_DROP: 100 rq->stats->xdp_drop++; 101 return true; 102 } 103 } 104 105 static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq) 106 { 107 struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; 108 struct mlx5_wq_cyc *wq = &sq->wq; 109 u8 wqebbs; 110 u16 pi; 111 112 mlx5e_xdpsq_fetch_wqe(sq, &session->wqe); 113 114 prefetchw(session->wqe->data); 115 session->ds_count = MLX5E_XDP_TX_EMPTY_DS_COUNT; 116 117 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); 118 119 /* The mult of MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS 120 * (16 * 4 == 64) does not fit in the 6-bit DS field of Ctrl Segment. 121 * We use a bound lower that MLX5_SEND_WQE_MAX_WQEBBS to let a 122 * full-session WQE be cache-aligned. 123 */ 124 #if L1_CACHE_BYTES < 128 125 #define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 1) 126 #else 127 #define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 2) 128 #endif 129 130 wqebbs = min_t(u16, mlx5_wq_cyc_get_contig_wqebbs(wq, pi), 131 MLX5E_XDP_MPW_MAX_WQEBBS); 132 133 session->max_ds_count = MLX5_SEND_WQEBB_NUM_DS * wqebbs; 134 } 135 136 static void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq) 137 { 138 struct mlx5_wq_cyc *wq = &sq->wq; 139 struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; 140 struct mlx5_wqe_ctrl_seg *cseg = &session->wqe->ctrl; 141 u16 ds_count = session->ds_count; 142 u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); 143 struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[pi]; 144 145 cseg->opmod_idx_opcode = 146 cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_ENHANCED_MPSW); 147 cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_count); 148 149 wi->num_wqebbs = DIV_ROUND_UP(ds_count, MLX5_SEND_WQEBB_NUM_DS); 150 wi->num_ds = ds_count - MLX5E_XDP_TX_EMPTY_DS_COUNT; 151 152 sq->pc += wi->num_wqebbs; 153 154 sq->doorbell_cseg = cseg; 155 156 session->wqe = NULL; /* Close session */ 157 } 158 159 static bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, 160 struct mlx5e_xdp_info *xdpi) 161 { 162 struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; 163 struct mlx5e_xdpsq_stats *stats = sq->stats; 164 165 dma_addr_t dma_addr = xdpi->dma_addr; 166 struct xdp_frame *xdpf = xdpi->xdpf; 167 unsigned int dma_len = xdpf->len; 168 169 if (unlikely(sq->hw_mtu < dma_len)) { 170 stats->err++; 171 return false; 172 } 173 174 if (unlikely(!session->wqe)) { 175 if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, 176 MLX5_SEND_WQE_MAX_WQEBBS))) { 177 /* SQ is full, ring doorbell */ 178 mlx5e_xmit_xdp_doorbell(sq); 179 stats->full++; 180 return false; 181 } 182 183 mlx5e_xdp_mpwqe_session_start(sq); 184 } 185 186 mlx5e_xdp_mpwqe_add_dseg(sq, dma_addr, dma_len); 187 188 if (unlikely(session->ds_count == session->max_ds_count)) 189 mlx5e_xdp_mpwqe_complete(sq); 190 191 mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi); 192 stats->xmit++; 193 return true; 194 } 195 196 static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi) 197 { 198 struct mlx5_wq_cyc *wq = &sq->wq; 199 u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); 200 struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); 201 202 struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; 203 struct mlx5_wqe_eth_seg *eseg = &wqe->eth; 204 struct mlx5_wqe_data_seg *dseg = wqe->data; 205 206 struct xdp_frame *xdpf = xdpi->xdpf; 207 dma_addr_t dma_addr = xdpi->dma_addr; 208 unsigned int dma_len = xdpf->len; 209 210 struct mlx5e_xdpsq_stats *stats = sq->stats; 211 212 prefetchw(wqe); 213 214 if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE || sq->hw_mtu < dma_len)) { 215 stats->err++; 216 return false; 217 } 218 219 if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1))) { 220 /* SQ is full, ring doorbell */ 221 mlx5e_xmit_xdp_doorbell(sq); 222 stats->full++; 223 return false; 224 } 225 226 cseg->fm_ce_se = 0; 227 228 /* copy the inline part if required */ 229 if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) { 230 memcpy(eseg->inline_hdr.start, xdpf->data, MLX5E_XDP_MIN_INLINE); 231 eseg->inline_hdr.sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE); 232 dma_len -= MLX5E_XDP_MIN_INLINE; 233 dma_addr += MLX5E_XDP_MIN_INLINE; 234 dseg++; 235 } 236 237 /* write the dma part */ 238 dseg->addr = cpu_to_be64(dma_addr); 239 dseg->byte_count = cpu_to_be32(dma_len); 240 241 cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND); 242 243 sq->pc++; 244 245 sq->doorbell_cseg = cseg; 246 247 mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi); 248 stats->xmit++; 249 return true; 250 } 251 252 bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq) 253 { 254 struct mlx5e_xdp_info_fifo *xdpi_fifo; 255 struct mlx5e_xdpsq *sq; 256 struct mlx5_cqe64 *cqe; 257 bool is_redirect; 258 u16 sqcc; 259 int i; 260 261 sq = container_of(cq, struct mlx5e_xdpsq, cq); 262 263 if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) 264 return false; 265 266 cqe = mlx5_cqwq_get_cqe(&cq->wq); 267 if (!cqe) 268 return false; 269 270 is_redirect = !rq; 271 xdpi_fifo = &sq->db.xdpi_fifo; 272 273 /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), 274 * otherwise a cq overrun may occur 275 */ 276 sqcc = sq->cc; 277 278 i = 0; 279 do { 280 u16 wqe_counter; 281 bool last_wqe; 282 283 mlx5_cqwq_pop(&cq->wq); 284 285 wqe_counter = be16_to_cpu(cqe->wqe_counter); 286 287 if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) 288 netdev_WARN_ONCE(sq->channel->netdev, 289 "Bad OP in XDPSQ CQE: 0x%x\n", 290 get_cqe_opcode(cqe)); 291 292 do { 293 struct mlx5e_xdp_wqe_info *wi; 294 u16 ci, j; 295 296 last_wqe = (sqcc == wqe_counter); 297 ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc); 298 wi = &sq->db.wqe_info[ci]; 299 300 sqcc += wi->num_wqebbs; 301 302 for (j = 0; j < wi->num_ds; j++) { 303 struct mlx5e_xdp_info xdpi = 304 mlx5e_xdpi_fifo_pop(xdpi_fifo); 305 306 if (is_redirect) { 307 xdp_return_frame(xdpi.xdpf); 308 dma_unmap_single(sq->pdev, xdpi.dma_addr, 309 xdpi.xdpf->len, DMA_TO_DEVICE); 310 } else { 311 /* Recycle RX page */ 312 mlx5e_page_release(rq, &xdpi.di, true); 313 } 314 } 315 } while (!last_wqe); 316 } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq))); 317 318 sq->stats->cqes += i; 319 320 mlx5_cqwq_update_db_record(&cq->wq); 321 322 /* ensure cq space is freed before enabling more cqes */ 323 wmb(); 324 325 sq->cc = sqcc; 326 return (i == MLX5E_TX_CQ_POLL_BUDGET); 327 } 328 329 void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq) 330 { 331 struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo; 332 bool is_redirect = !rq; 333 334 while (sq->cc != sq->pc) { 335 struct mlx5e_xdp_wqe_info *wi; 336 u16 ci, i; 337 338 ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->cc); 339 wi = &sq->db.wqe_info[ci]; 340 341 sq->cc += wi->num_wqebbs; 342 343 for (i = 0; i < wi->num_ds; i++) { 344 struct mlx5e_xdp_info xdpi = 345 mlx5e_xdpi_fifo_pop(xdpi_fifo); 346 347 if (is_redirect) { 348 xdp_return_frame(xdpi.xdpf); 349 dma_unmap_single(sq->pdev, xdpi.dma_addr, 350 xdpi.xdpf->len, DMA_TO_DEVICE); 351 } else { 352 /* Recycle RX page */ 353 mlx5e_page_release(rq, &xdpi.di, false); 354 } 355 } 356 } 357 } 358 359 int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, 360 u32 flags) 361 { 362 struct mlx5e_priv *priv = netdev_priv(dev); 363 struct mlx5e_xdpsq *sq; 364 int drops = 0; 365 int sq_num; 366 int i; 367 368 /* this flag is sufficient, no need to test internal sq state */ 369 if (unlikely(!mlx5e_xdp_tx_is_enabled(priv))) 370 return -ENETDOWN; 371 372 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) 373 return -EINVAL; 374 375 sq_num = smp_processor_id(); 376 377 if (unlikely(sq_num >= priv->channels.num)) 378 return -ENXIO; 379 380 sq = &priv->channels.c[sq_num]->xdpsq; 381 382 for (i = 0; i < n; i++) { 383 struct xdp_frame *xdpf = frames[i]; 384 struct mlx5e_xdp_info xdpi; 385 386 xdpi.dma_addr = dma_map_single(sq->pdev, xdpf->data, xdpf->len, 387 DMA_TO_DEVICE); 388 if (unlikely(dma_mapping_error(sq->pdev, xdpi.dma_addr))) { 389 xdp_return_frame_rx_napi(xdpf); 390 drops++; 391 continue; 392 } 393 394 xdpi.xdpf = xdpf; 395 396 if (unlikely(!sq->xmit_xdp_frame(sq, &xdpi))) { 397 dma_unmap_single(sq->pdev, xdpi.dma_addr, 398 xdpf->len, DMA_TO_DEVICE); 399 xdp_return_frame_rx_napi(xdpf); 400 drops++; 401 } 402 } 403 404 if (flags & XDP_XMIT_FLUSH) { 405 if (sq->mpwqe.wqe) 406 mlx5e_xdp_mpwqe_complete(sq); 407 mlx5e_xmit_xdp_doorbell(sq); 408 } 409 410 return n - drops; 411 } 412 413 void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq) 414 { 415 struct mlx5e_xdpsq *xdpsq = &rq->xdpsq; 416 417 if (xdpsq->mpwqe.wqe) 418 mlx5e_xdp_mpwqe_complete(xdpsq); 419 420 mlx5e_xmit_xdp_doorbell(xdpsq); 421 422 if (test_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags)) { 423 xdp_do_flush_map(); 424 __clear_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags); 425 } 426 } 427 428 void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw) 429 { 430 sq->xmit_xdp_frame = is_mpw ? 431 mlx5e_xmit_xdp_frame_mpwqe : mlx5e_xmit_xdp_frame; 432 } 433 434