1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2019 Mellanox Technologies. */ 3 4 #include "rx.h" 5 #include "en/xdp.h" 6 #include <net/xdp_sock_drv.h> 7 #include <linux/filter.h> 8 9 /* RX data path */ 10 11 int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) 12 { 13 struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, ix); 14 struct mlx5e_icosq *icosq = rq->icosq; 15 struct mlx5_wq_cyc *wq = &icosq->wq; 16 struct mlx5e_umr_wqe *umr_wqe; 17 int batch, i; 18 u32 offset; /* 17-bit value with MTT. */ 19 u16 pi; 20 21 if (unlikely(!xsk_buff_can_alloc(rq->xsk_pool, rq->mpwqe.pages_per_wqe))) 22 goto err; 23 24 BUILD_BUG_ON(sizeof(wi->alloc_units[0]) != sizeof(wi->alloc_units[0].xsk)); 25 batch = xsk_buff_alloc_batch(rq->xsk_pool, (struct xdp_buff **)wi->alloc_units, 26 rq->mpwqe.pages_per_wqe); 27 28 /* If batch < pages_per_wqe, either: 29 * 1. Some (or all) descriptors were invalid. 30 * 2. dma_need_sync is true, and it fell back to allocating one frame. 31 * In either case, try to continue allocating frames one by one, until 32 * the first error, which will mean there are no more valid descriptors. 33 */ 34 for (; batch < rq->mpwqe.pages_per_wqe; batch++) { 35 wi->alloc_units[batch].xsk = xsk_buff_alloc(rq->xsk_pool); 36 if (unlikely(!wi->alloc_units[batch].xsk)) 37 goto err_reuse_batch; 38 } 39 40 pi = mlx5e_icosq_get_next_pi(icosq, rq->mpwqe.umr_wqebbs); 41 umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi); 42 memcpy(umr_wqe, &rq->mpwqe.umr_wqe, sizeof(struct mlx5e_umr_wqe)); 43 44 if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED)) { 45 for (i = 0; i < batch; i++) { 46 dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk); 47 48 umr_wqe->inline_mtts[i] = (struct mlx5_mtt) { 49 .ptag = cpu_to_be64(addr | MLX5_EN_WR), 50 }; 51 } 52 } else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_UNALIGNED)) { 53 for (i = 0; i < batch; i++) { 54 dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk); 55 56 umr_wqe->inline_ksms[i] = (struct mlx5_ksm) { 57 .key = rq->mkey_be, 58 .va = cpu_to_be64(addr), 59 }; 60 } 61 } else if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE)) { 62 u32 mapping_size = 1 << (rq->mpwqe.page_shift - 2); 63 64 for (i = 0; i < batch; i++) { 65 dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk); 66 67 umr_wqe->inline_ksms[i << 2] = (struct mlx5_ksm) { 68 .key = rq->mkey_be, 69 .va = cpu_to_be64(addr), 70 }; 71 umr_wqe->inline_ksms[(i << 2) + 1] = (struct mlx5_ksm) { 72 .key = rq->mkey_be, 73 .va = cpu_to_be64(addr + mapping_size), 74 }; 75 umr_wqe->inline_ksms[(i << 2) + 2] = (struct mlx5_ksm) { 76 .key = rq->mkey_be, 77 .va = cpu_to_be64(addr + mapping_size * 2), 78 }; 79 umr_wqe->inline_ksms[(i << 2) + 3] = (struct mlx5_ksm) { 80 .key = rq->mkey_be, 81 .va = cpu_to_be64(rq->wqe_overflow.addr), 82 }; 83 } 84 } else { 85 __be32 pad_size = cpu_to_be32((1 << rq->mpwqe.page_shift) - 86 rq->xsk_pool->chunk_size); 87 __be32 frame_size = cpu_to_be32(rq->xsk_pool->chunk_size); 88 89 for (i = 0; i < batch; i++) { 90 dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk); 91 92 umr_wqe->inline_klms[i << 1] = (struct mlx5_klm) { 93 .key = rq->mkey_be, 94 .va = cpu_to_be64(addr), 95 .bcount = frame_size, 96 }; 97 umr_wqe->inline_klms[(i << 1) + 1] = (struct mlx5_klm) { 98 .key = rq->mkey_be, 99 .va = cpu_to_be64(rq->wqe_overflow.addr), 100 .bcount = pad_size, 101 }; 102 } 103 } 104 105 bitmap_zero(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe); 106 wi->consumed_strides = 0; 107 108 umr_wqe->ctrl.opmod_idx_opcode = 109 cpu_to_be32((icosq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | MLX5_OPCODE_UMR); 110 111 /* Optimized for speed: keep in sync with mlx5e_mpwrq_umr_entry_size. */ 112 offset = ix * rq->mpwqe.mtts_per_wqe; 113 if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED)) 114 offset = offset * sizeof(struct mlx5_mtt) / MLX5_OCTWORD; 115 else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_OVERSIZED)) 116 offset = offset * sizeof(struct mlx5_klm) * 2 / MLX5_OCTWORD; 117 else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE)) 118 offset = offset * sizeof(struct mlx5_ksm) * 4 / MLX5_OCTWORD; 119 umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset); 120 121 icosq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) { 122 .wqe_type = MLX5E_ICOSQ_WQE_UMR_RX, 123 .num_wqebbs = rq->mpwqe.umr_wqebbs, 124 .umr.rq = rq, 125 }; 126 127 icosq->pc += rq->mpwqe.umr_wqebbs; 128 129 icosq->doorbell_cseg = &umr_wqe->ctrl; 130 131 return 0; 132 133 err_reuse_batch: 134 while (--batch >= 0) 135 xsk_buff_free(wi->alloc_units[batch].xsk); 136 137 err: 138 rq->stats->buff_alloc_err++; 139 return -ENOMEM; 140 } 141 142 int mlx5e_xsk_alloc_rx_wqes_batched(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) 143 { 144 struct mlx5_wq_cyc *wq = &rq->wqe.wq; 145 struct xdp_buff **buffs; 146 u32 contig, alloc; 147 int i; 148 149 /* mlx5e_init_frags_partition creates a 1:1 mapping between 150 * rq->wqe.frags and rq->wqe.alloc_units, which allows us to 151 * allocate XDP buffers straight into alloc_units. 152 */ 153 BUILD_BUG_ON(sizeof(rq->wqe.alloc_units[0]) != 154 sizeof(rq->wqe.alloc_units[0].xsk)); 155 buffs = (struct xdp_buff **)rq->wqe.alloc_units; 156 contig = mlx5_wq_cyc_get_size(wq) - ix; 157 if (wqe_bulk <= contig) { 158 alloc = xsk_buff_alloc_batch(rq->xsk_pool, buffs + ix, wqe_bulk); 159 } else { 160 alloc = xsk_buff_alloc_batch(rq->xsk_pool, buffs + ix, contig); 161 if (likely(alloc == contig)) 162 alloc += xsk_buff_alloc_batch(rq->xsk_pool, buffs, wqe_bulk - contig); 163 } 164 165 for (i = 0; i < alloc; i++) { 166 int j = mlx5_wq_cyc_ctr2ix(wq, ix + i); 167 struct mlx5e_wqe_frag_info *frag; 168 struct mlx5e_rx_wqe_cyc *wqe; 169 dma_addr_t addr; 170 171 wqe = mlx5_wq_cyc_get_wqe(wq, j); 172 /* Assumes log_num_frags == 0. */ 173 frag = &rq->wqe.frags[j]; 174 175 addr = xsk_buff_xdp_get_frame_dma(frag->au->xsk); 176 wqe->data[0].addr = cpu_to_be64(addr + rq->buff.headroom); 177 } 178 179 return alloc; 180 } 181 182 int mlx5e_xsk_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) 183 { 184 struct mlx5_wq_cyc *wq = &rq->wqe.wq; 185 int i; 186 187 for (i = 0; i < wqe_bulk; i++) { 188 int j = mlx5_wq_cyc_ctr2ix(wq, ix + i); 189 struct mlx5e_wqe_frag_info *frag; 190 struct mlx5e_rx_wqe_cyc *wqe; 191 dma_addr_t addr; 192 193 wqe = mlx5_wq_cyc_get_wqe(wq, j); 194 /* Assumes log_num_frags == 0. */ 195 frag = &rq->wqe.frags[j]; 196 197 frag->au->xsk = xsk_buff_alloc(rq->xsk_pool); 198 if (unlikely(!frag->au->xsk)) 199 return i; 200 201 addr = xsk_buff_xdp_get_frame_dma(frag->au->xsk); 202 wqe->data[0].addr = cpu_to_be64(addr + rq->buff.headroom); 203 } 204 205 return wqe_bulk; 206 } 207 208 static struct sk_buff *mlx5e_xsk_construct_skb(struct mlx5e_rq *rq, struct xdp_buff *xdp) 209 { 210 u32 totallen = xdp->data_end - xdp->data_meta; 211 u32 metalen = xdp->data - xdp->data_meta; 212 struct sk_buff *skb; 213 214 skb = napi_alloc_skb(rq->cq.napi, totallen); 215 if (unlikely(!skb)) { 216 rq->stats->buff_alloc_err++; 217 return NULL; 218 } 219 220 skb_put_data(skb, xdp->data_meta, totallen); 221 222 if (metalen) { 223 skb_metadata_set(skb, metalen); 224 __skb_pull(skb, metalen); 225 } 226 227 return skb; 228 } 229 230 struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, 231 struct mlx5e_mpw_info *wi, 232 u16 cqe_bcnt, 233 u32 head_offset, 234 u32 page_idx) 235 { 236 struct xdp_buff *xdp = wi->alloc_units[page_idx].xsk; 237 struct bpf_prog *prog; 238 239 /* Check packet size. Note LRO doesn't use linear SKB */ 240 if (unlikely(cqe_bcnt > rq->hw_mtu)) { 241 rq->stats->oversize_pkts_sw_drop++; 242 return NULL; 243 } 244 245 /* head_offset is not used in this function, because xdp->data and the 246 * DMA address point directly to the necessary place. Furthermore, in 247 * the current implementation, UMR pages are mapped to XSK frames, so 248 * head_offset should always be 0. 249 */ 250 WARN_ON_ONCE(head_offset); 251 252 xsk_buff_set_size(xdp, cqe_bcnt); 253 xsk_buff_dma_sync_for_cpu(xdp, rq->xsk_pool); 254 net_prefetch(xdp->data); 255 256 /* Possible flows: 257 * - XDP_REDIRECT to XSKMAP: 258 * The page is owned by the userspace from now. 259 * - XDP_TX and other XDP_REDIRECTs: 260 * The page was returned by ZCA and recycled. 261 * - XDP_DROP: 262 * Recycle the page. 263 * - XDP_PASS: 264 * Allocate an SKB, copy the data and recycle the page. 265 * 266 * Pages to be recycled go to the Reuse Ring on MPWQE deallocation. Its 267 * size is the same as the Driver RX Ring's size, and pages for WQEs are 268 * allocated first from the Reuse Ring, so it has enough space. 269 */ 270 271 prog = rcu_dereference(rq->xdp_prog); 272 if (likely(prog && mlx5e_xdp_handle(rq, NULL, prog, xdp))) { 273 if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))) 274 __set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */ 275 return NULL; /* page/packet was consumed by XDP */ 276 } 277 278 /* XDP_PASS: copy the data from the UMEM to a new SKB and reuse the 279 * frame. On SKB allocation failure, NULL is returned. 280 */ 281 return mlx5e_xsk_construct_skb(rq, xdp); 282 } 283 284 struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq, 285 struct mlx5e_wqe_frag_info *wi, 286 u32 cqe_bcnt) 287 { 288 struct xdp_buff *xdp = wi->au->xsk; 289 struct bpf_prog *prog; 290 291 /* wi->offset is not used in this function, because xdp->data and the 292 * DMA address point directly to the necessary place. Furthermore, the 293 * XSK allocator allocates frames per packet, instead of pages, so 294 * wi->offset should always be 0. 295 */ 296 WARN_ON_ONCE(wi->offset); 297 298 xsk_buff_set_size(xdp, cqe_bcnt); 299 xsk_buff_dma_sync_for_cpu(xdp, rq->xsk_pool); 300 net_prefetch(xdp->data); 301 302 prog = rcu_dereference(rq->xdp_prog); 303 if (likely(prog && mlx5e_xdp_handle(rq, NULL, prog, xdp))) 304 return NULL; /* page/packet was consumed by XDP */ 305 306 /* XDP_PASS: copy the data from the UMEM to a new SKB. The frame reuse 307 * will be handled by mlx5e_free_rx_wqe. 308 * On SKB allocation failure, NULL is returned. 309 */ 310 return mlx5e_xsk_construct_skb(rq, xdp); 311 } 312