1 /* 2 * Copyright (c) 2018, Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include <linux/bpf_trace.h> 34 #include "en/xdp.h" 35 36 int mlx5e_xdp_max_mtu(struct mlx5e_params *params) 37 { 38 int hr = NET_IP_ALIGN + XDP_PACKET_HEADROOM; 39 40 /* Let S := SKB_DATA_ALIGN(sizeof(struct skb_shared_info)). 41 * The condition checked in mlx5e_rx_is_linear_skb is: 42 * SKB_DATA_ALIGN(sw_mtu + hard_mtu + hr) + S <= PAGE_SIZE (1) 43 * (Note that hw_mtu == sw_mtu + hard_mtu.) 44 * What is returned from this function is: 45 * max_mtu = PAGE_SIZE - S - hr - hard_mtu (2) 46 * After assigning sw_mtu := max_mtu, the left side of (1) turns to 47 * SKB_DATA_ALIGN(PAGE_SIZE - S) + S, which is equal to PAGE_SIZE, 48 * because both PAGE_SIZE and S are already aligned. Any number greater 49 * than max_mtu would make the left side of (1) greater than PAGE_SIZE, 50 * so max_mtu is the maximum MTU allowed. 51 */ 52 53 return MLX5E_HW2SW_MTU(params, SKB_MAX_HEAD(hr)); 54 } 55 56 static inline bool 57 mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq, 58 struct mlx5e_dma_info *di, struct xdp_buff *xdp) 59 { 60 struct mlx5e_xdp_xmit_data xdptxd; 61 struct mlx5e_xdp_info xdpi; 62 struct xdp_frame *xdpf; 63 dma_addr_t dma_addr; 64 65 xdpf = convert_to_xdp_frame(xdp); 66 if (unlikely(!xdpf)) 67 return false; 68 69 xdptxd.data = xdpf->data; 70 xdptxd.len = xdpf->len; 71 72 xdpi.mode = MLX5E_XDP_XMIT_MODE_PAGE; 73 74 dma_addr = di->addr + (xdpf->data - (void *)xdpf); 75 dma_sync_single_for_device(sq->pdev, dma_addr, xdptxd.len, DMA_TO_DEVICE); 76 77 xdptxd.dma_addr = dma_addr; 78 xdpi.page.rq = rq; 79 xdpi.page.di = *di; 80 81 return sq->xmit_xdp_frame(sq, &xdptxd, &xdpi); 82 } 83 84 /* returns true if packet was consumed by xdp */ 85 bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, 86 void *va, u16 *rx_headroom, u32 *len) 87 { 88 struct bpf_prog *prog = READ_ONCE(rq->xdp_prog); 89 struct xdp_buff xdp; 90 u32 act; 91 int err; 92 93 if (!prog) 94 return false; 95 96 xdp.data = va + *rx_headroom; 97 xdp_set_data_meta_invalid(&xdp); 98 xdp.data_end = xdp.data + *len; 99 xdp.data_hard_start = va; 100 xdp.rxq = &rq->xdp_rxq; 101 102 act = bpf_prog_run_xdp(prog, &xdp); 103 switch (act) { 104 case XDP_PASS: 105 *rx_headroom = xdp.data - xdp.data_hard_start; 106 *len = xdp.data_end - xdp.data; 107 return false; 108 case XDP_TX: 109 if (unlikely(!mlx5e_xmit_xdp_buff(rq->xdpsq, rq, di, &xdp))) 110 goto xdp_abort; 111 __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */ 112 return true; 113 case XDP_REDIRECT: 114 /* When XDP enabled then page-refcnt==1 here */ 115 err = xdp_do_redirect(rq->netdev, &xdp, prog); 116 if (unlikely(err)) 117 goto xdp_abort; 118 __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); 119 __set_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags); 120 mlx5e_page_dma_unmap(rq, di); 121 rq->stats->xdp_redirect++; 122 return true; 123 default: 124 bpf_warn_invalid_xdp_action(act); 125 /* fall through */ 126 case XDP_ABORTED: 127 xdp_abort: 128 trace_xdp_exception(rq->netdev, prog, act); 129 /* fall through */ 130 case XDP_DROP: 131 rq->stats->xdp_drop++; 132 return true; 133 } 134 } 135 136 static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq) 137 { 138 struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; 139 struct mlx5e_xdpsq_stats *stats = sq->stats; 140 struct mlx5_wq_cyc *wq = &sq->wq; 141 u8 wqebbs; 142 u16 pi; 143 144 mlx5e_xdpsq_fetch_wqe(sq, &session->wqe); 145 146 prefetchw(session->wqe->data); 147 session->ds_count = MLX5E_XDP_TX_EMPTY_DS_COUNT; 148 session->pkt_count = 0; 149 session->complete = 0; 150 151 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); 152 153 /* The mult of MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS 154 * (16 * 4 == 64) does not fit in the 6-bit DS field of Ctrl Segment. 155 * We use a bound lower that MLX5_SEND_WQE_MAX_WQEBBS to let a 156 * full-session WQE be cache-aligned. 157 */ 158 #if L1_CACHE_BYTES < 128 159 #define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 1) 160 #else 161 #define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 2) 162 #endif 163 164 wqebbs = min_t(u16, mlx5_wq_cyc_get_contig_wqebbs(wq, pi), 165 MLX5E_XDP_MPW_MAX_WQEBBS); 166 167 session->max_ds_count = MLX5_SEND_WQEBB_NUM_DS * wqebbs; 168 169 mlx5e_xdp_update_inline_state(sq); 170 171 stats->mpwqe++; 172 } 173 174 static void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq) 175 { 176 struct mlx5_wq_cyc *wq = &sq->wq; 177 struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; 178 struct mlx5_wqe_ctrl_seg *cseg = &session->wqe->ctrl; 179 u16 ds_count = session->ds_count; 180 u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); 181 struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[pi]; 182 183 cseg->opmod_idx_opcode = 184 cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_ENHANCED_MPSW); 185 cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_count); 186 187 wi->num_wqebbs = DIV_ROUND_UP(ds_count, MLX5_SEND_WQEBB_NUM_DS); 188 wi->num_pkts = session->pkt_count; 189 190 sq->pc += wi->num_wqebbs; 191 192 sq->doorbell_cseg = cseg; 193 194 session->wqe = NULL; /* Close session */ 195 } 196 197 static bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, 198 struct mlx5e_xdp_xmit_data *xdptxd, 199 struct mlx5e_xdp_info *xdpi) 200 { 201 struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; 202 struct mlx5e_xdpsq_stats *stats = sq->stats; 203 204 if (unlikely(xdptxd->len > sq->hw_mtu)) { 205 stats->err++; 206 return false; 207 } 208 209 if (unlikely(!session->wqe)) { 210 if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, 211 MLX5_SEND_WQE_MAX_WQEBBS))) { 212 /* SQ is full, ring doorbell */ 213 mlx5e_xmit_xdp_doorbell(sq); 214 stats->full++; 215 return false; 216 } 217 218 mlx5e_xdp_mpwqe_session_start(sq); 219 } 220 221 mlx5e_xdp_mpwqe_add_dseg(sq, xdptxd, stats); 222 223 if (unlikely(session->complete || 224 session->ds_count == session->max_ds_count)) 225 mlx5e_xdp_mpwqe_complete(sq); 226 227 mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi); 228 stats->xmit++; 229 return true; 230 } 231 232 static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, 233 struct mlx5e_xdp_xmit_data *xdptxd, 234 struct mlx5e_xdp_info *xdpi) 235 { 236 struct mlx5_wq_cyc *wq = &sq->wq; 237 u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); 238 struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); 239 240 struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; 241 struct mlx5_wqe_eth_seg *eseg = &wqe->eth; 242 struct mlx5_wqe_data_seg *dseg = wqe->data; 243 244 dma_addr_t dma_addr = xdptxd->dma_addr; 245 u32 dma_len = xdptxd->len; 246 247 struct mlx5e_xdpsq_stats *stats = sq->stats; 248 249 prefetchw(wqe); 250 251 if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE || sq->hw_mtu < dma_len)) { 252 stats->err++; 253 return false; 254 } 255 256 if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1))) { 257 /* SQ is full, ring doorbell */ 258 mlx5e_xmit_xdp_doorbell(sq); 259 stats->full++; 260 return false; 261 } 262 263 cseg->fm_ce_se = 0; 264 265 /* copy the inline part if required */ 266 if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) { 267 memcpy(eseg->inline_hdr.start, xdptxd->data, MLX5E_XDP_MIN_INLINE); 268 eseg->inline_hdr.sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE); 269 dma_len -= MLX5E_XDP_MIN_INLINE; 270 dma_addr += MLX5E_XDP_MIN_INLINE; 271 dseg++; 272 } 273 274 /* write the dma part */ 275 dseg->addr = cpu_to_be64(dma_addr); 276 dseg->byte_count = cpu_to_be32(dma_len); 277 278 cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND); 279 280 sq->pc++; 281 282 sq->doorbell_cseg = cseg; 283 284 mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi); 285 stats->xmit++; 286 return true; 287 } 288 289 static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq, 290 struct mlx5e_xdp_wqe_info *wi, 291 bool recycle) 292 { 293 struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo; 294 u16 i; 295 296 for (i = 0; i < wi->num_pkts; i++) { 297 struct mlx5e_xdp_info xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo); 298 299 switch (xdpi.mode) { 300 case MLX5E_XDP_XMIT_MODE_FRAME: 301 /* XDP_REDIRECT */ 302 dma_unmap_single(sq->pdev, xdpi.frame.dma_addr, 303 xdpi.frame.xdpf->len, DMA_TO_DEVICE); 304 xdp_return_frame(xdpi.frame.xdpf); 305 break; 306 case MLX5E_XDP_XMIT_MODE_PAGE: 307 /* XDP_TX */ 308 mlx5e_page_release(xdpi.page.rq, &xdpi.page.di, recycle); 309 break; 310 default: 311 WARN_ON_ONCE(true); 312 } 313 } 314 } 315 316 bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) 317 { 318 struct mlx5e_xdpsq *sq; 319 struct mlx5_cqe64 *cqe; 320 u16 sqcc; 321 int i; 322 323 sq = container_of(cq, struct mlx5e_xdpsq, cq); 324 325 if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) 326 return false; 327 328 cqe = mlx5_cqwq_get_cqe(&cq->wq); 329 if (!cqe) 330 return false; 331 332 /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), 333 * otherwise a cq overrun may occur 334 */ 335 sqcc = sq->cc; 336 337 i = 0; 338 do { 339 u16 wqe_counter; 340 bool last_wqe; 341 342 mlx5_cqwq_pop(&cq->wq); 343 344 wqe_counter = be16_to_cpu(cqe->wqe_counter); 345 346 if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) 347 netdev_WARN_ONCE(sq->channel->netdev, 348 "Bad OP in XDPSQ CQE: 0x%x\n", 349 get_cqe_opcode(cqe)); 350 351 do { 352 struct mlx5e_xdp_wqe_info *wi; 353 u16 ci; 354 355 last_wqe = (sqcc == wqe_counter); 356 ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc); 357 wi = &sq->db.wqe_info[ci]; 358 359 sqcc += wi->num_wqebbs; 360 361 mlx5e_free_xdpsq_desc(sq, wi, true); 362 } while (!last_wqe); 363 } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq))); 364 365 sq->stats->cqes += i; 366 367 mlx5_cqwq_update_db_record(&cq->wq); 368 369 /* ensure cq space is freed before enabling more cqes */ 370 wmb(); 371 372 sq->cc = sqcc; 373 return (i == MLX5E_TX_CQ_POLL_BUDGET); 374 } 375 376 void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq) 377 { 378 while (sq->cc != sq->pc) { 379 struct mlx5e_xdp_wqe_info *wi; 380 u16 ci; 381 382 ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->cc); 383 wi = &sq->db.wqe_info[ci]; 384 385 sq->cc += wi->num_wqebbs; 386 387 mlx5e_free_xdpsq_desc(sq, wi, false); 388 } 389 } 390 391 int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, 392 u32 flags) 393 { 394 struct mlx5e_priv *priv = netdev_priv(dev); 395 struct mlx5e_xdpsq *sq; 396 int drops = 0; 397 int sq_num; 398 int i; 399 400 /* this flag is sufficient, no need to test internal sq state */ 401 if (unlikely(!mlx5e_xdp_tx_is_enabled(priv))) 402 return -ENETDOWN; 403 404 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) 405 return -EINVAL; 406 407 sq_num = smp_processor_id(); 408 409 if (unlikely(sq_num >= priv->channels.num)) 410 return -ENXIO; 411 412 sq = &priv->channels.c[sq_num]->xdpsq; 413 414 for (i = 0; i < n; i++) { 415 struct xdp_frame *xdpf = frames[i]; 416 struct mlx5e_xdp_xmit_data xdptxd; 417 struct mlx5e_xdp_info xdpi; 418 419 xdptxd.data = xdpf->data; 420 xdptxd.len = xdpf->len; 421 xdptxd.dma_addr = dma_map_single(sq->pdev, xdptxd.data, 422 xdptxd.len, DMA_TO_DEVICE); 423 424 if (unlikely(dma_mapping_error(sq->pdev, xdptxd.dma_addr))) { 425 xdp_return_frame_rx_napi(xdpf); 426 drops++; 427 continue; 428 } 429 430 xdpi.mode = MLX5E_XDP_XMIT_MODE_FRAME; 431 xdpi.frame.xdpf = xdpf; 432 xdpi.frame.dma_addr = xdptxd.dma_addr; 433 434 if (unlikely(!sq->xmit_xdp_frame(sq, &xdptxd, &xdpi))) { 435 dma_unmap_single(sq->pdev, xdptxd.dma_addr, 436 xdptxd.len, DMA_TO_DEVICE); 437 xdp_return_frame_rx_napi(xdpf); 438 drops++; 439 } 440 } 441 442 if (flags & XDP_XMIT_FLUSH) { 443 if (sq->mpwqe.wqe) 444 mlx5e_xdp_mpwqe_complete(sq); 445 mlx5e_xmit_xdp_doorbell(sq); 446 } 447 448 return n - drops; 449 } 450 451 void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq) 452 { 453 struct mlx5e_xdpsq *xdpsq = rq->xdpsq; 454 455 if (xdpsq->mpwqe.wqe) 456 mlx5e_xdp_mpwqe_complete(xdpsq); 457 458 mlx5e_xmit_xdp_doorbell(xdpsq); 459 460 if (test_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags)) { 461 xdp_do_flush_map(); 462 __clear_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags); 463 } 464 } 465 466 void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw) 467 { 468 sq->xmit_xdp_frame = is_mpw ? 469 mlx5e_xmit_xdp_frame_mpwqe : mlx5e_xmit_xdp_frame; 470 } 471 472