1 /* 2 * Copyright (c) 2015-2016, Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include <linux/tcp.h> 34 #include <linux/if_vlan.h> 35 #include "en.h" 36 37 #define MLX5E_SQ_NOPS_ROOM MLX5_SEND_WQE_MAX_WQEBBS 38 #define MLX5E_SQ_STOP_ROOM (MLX5_SEND_WQE_MAX_WQEBBS +\ 39 MLX5E_SQ_NOPS_ROOM) 40 41 void mlx5e_send_nop(struct mlx5e_sq *sq, bool notify_hw) 42 { 43 struct mlx5_wq_cyc *wq = &sq->wq; 44 45 u16 pi = sq->pc & wq->sz_m1; 46 struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); 47 48 struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; 49 50 memset(cseg, 0, sizeof(*cseg)); 51 52 cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_NOP); 53 cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | 0x01); 54 55 sq->skb[pi] = NULL; 56 sq->pc++; 57 sq->stats.nop++; 58 59 if (notify_hw) { 60 cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; 61 mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0); 62 } 63 } 64 65 static inline void mlx5e_tx_dma_unmap(struct device *pdev, 66 struct mlx5e_sq_dma *dma) 67 { 68 switch (dma->type) { 69 case MLX5E_DMA_MAP_SINGLE: 70 dma_unmap_single(pdev, dma->addr, dma->size, DMA_TO_DEVICE); 71 break; 72 case MLX5E_DMA_MAP_PAGE: 73 dma_unmap_page(pdev, dma->addr, dma->size, DMA_TO_DEVICE); 74 break; 75 default: 76 WARN_ONCE(true, "mlx5e_tx_dma_unmap unknown DMA type!\n"); 77 } 78 } 79 80 static inline void mlx5e_dma_push(struct mlx5e_sq *sq, 81 dma_addr_t addr, 82 u32 size, 83 enum mlx5e_dma_map_type map_type) 84 { 85 sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].addr = addr; 86 sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].size = size; 87 sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].type = map_type; 88 sq->dma_fifo_pc++; 89 } 90 91 static inline struct mlx5e_sq_dma *mlx5e_dma_get(struct mlx5e_sq *sq, u32 i) 92 { 93 return &sq->dma_fifo[i & sq->dma_fifo_mask]; 94 } 95 96 static void mlx5e_dma_unmap_wqe_err(struct mlx5e_sq *sq, u8 num_dma) 97 { 98 int i; 99 100 for (i = 0; i < num_dma; i++) { 101 struct mlx5e_sq_dma *last_pushed_dma = 102 mlx5e_dma_get(sq, --sq->dma_fifo_pc); 103 104 mlx5e_tx_dma_unmap(sq->pdev, last_pushed_dma); 105 } 106 } 107 108 u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, 109 void *accel_priv, select_queue_fallback_t fallback) 110 { 111 struct mlx5e_priv *priv = netdev_priv(dev); 112 int channel_ix = fallback(dev, skb); 113 int up = 0; 114 115 if (!netdev_get_num_tc(dev)) 116 return channel_ix; 117 118 if (skb_vlan_tag_present(skb)) 119 up = skb->vlan_tci >> VLAN_PRIO_SHIFT; 120 121 /* channel_ix can be larger than num_channels since 122 * dev->num_real_tx_queues = num_channels * num_tc 123 */ 124 if (channel_ix >= priv->params.num_channels) 125 channel_ix = reciprocal_scale(channel_ix, 126 priv->params.num_channels); 127 128 return priv->channeltc_to_txq_map[channel_ix][up]; 129 } 130 131 static inline int mlx5e_skb_l2_header_offset(struct sk_buff *skb) 132 { 133 #define MLX5E_MIN_INLINE (ETH_HLEN + VLAN_HLEN) 134 135 return max(skb_network_offset(skb), MLX5E_MIN_INLINE); 136 } 137 138 static inline int mlx5e_skb_l3_header_offset(struct sk_buff *skb) 139 { 140 struct flow_keys keys; 141 142 if (skb_transport_header_was_set(skb)) 143 return skb_transport_offset(skb); 144 else if (skb_flow_dissect_flow_keys(skb, &keys, 0)) 145 return keys.control.thoff; 146 else 147 return mlx5e_skb_l2_header_offset(skb); 148 } 149 150 static inline unsigned int mlx5e_calc_min_inline(enum mlx5_inline_modes mode, 151 struct sk_buff *skb) 152 { 153 int hlen; 154 155 switch (mode) { 156 case MLX5_INLINE_MODE_TCP_UDP: 157 hlen = eth_get_headlen(skb->data, skb_headlen(skb)); 158 if (hlen == ETH_HLEN && !skb_vlan_tag_present(skb)) 159 hlen += VLAN_HLEN; 160 return hlen; 161 case MLX5_INLINE_MODE_IP: 162 /* When transport header is set to zero, it means no transport 163 * header. When transport header is set to 0xff's, it means 164 * transport header wasn't set. 165 */ 166 if (skb_transport_offset(skb)) 167 return mlx5e_skb_l3_header_offset(skb); 168 /* fall through */ 169 case MLX5_INLINE_MODE_L2: 170 default: 171 return mlx5e_skb_l2_header_offset(skb); 172 } 173 } 174 175 static inline u16 mlx5e_get_inline_hdr_size(struct mlx5e_sq *sq, 176 struct sk_buff *skb, bool bf) 177 { 178 /* Some NIC TX decisions, e.g loopback, are based on the packet 179 * headers and occur before the data gather. 180 * Therefore these headers must be copied into the WQE 181 */ 182 if (bf) { 183 u16 ihs = skb_headlen(skb); 184 185 if (skb_vlan_tag_present(skb)) 186 ihs += VLAN_HLEN; 187 188 if (ihs <= sq->max_inline) 189 return skb_headlen(skb); 190 } 191 return mlx5e_calc_min_inline(sq->min_inline_mode, skb); 192 } 193 194 static inline void mlx5e_tx_skb_pull_inline(unsigned char **skb_data, 195 unsigned int *skb_len, 196 unsigned int len) 197 { 198 *skb_len -= len; 199 *skb_data += len; 200 } 201 202 static inline void mlx5e_insert_vlan(void *start, struct sk_buff *skb, u16 ihs, 203 unsigned char **skb_data, 204 unsigned int *skb_len) 205 { 206 struct vlan_ethhdr *vhdr = (struct vlan_ethhdr *)start; 207 int cpy1_sz = 2 * ETH_ALEN; 208 int cpy2_sz = ihs - cpy1_sz; 209 210 memcpy(vhdr, *skb_data, cpy1_sz); 211 mlx5e_tx_skb_pull_inline(skb_data, skb_len, cpy1_sz); 212 vhdr->h_vlan_proto = skb->vlan_proto; 213 vhdr->h_vlan_TCI = cpu_to_be16(skb_vlan_tag_get(skb)); 214 memcpy(&vhdr->h_vlan_encapsulated_proto, *skb_data, cpy2_sz); 215 mlx5e_tx_skb_pull_inline(skb_data, skb_len, cpy2_sz); 216 } 217 218 static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) 219 { 220 struct mlx5_wq_cyc *wq = &sq->wq; 221 222 u16 pi = sq->pc & wq->sz_m1; 223 struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); 224 struct mlx5e_tx_wqe_info *wi = &sq->wqe_info[pi]; 225 226 struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; 227 struct mlx5_wqe_eth_seg *eseg = &wqe->eth; 228 struct mlx5_wqe_data_seg *dseg; 229 230 unsigned char *skb_data = skb->data; 231 unsigned int skb_len = skb->len; 232 u8 opcode = MLX5_OPCODE_SEND; 233 dma_addr_t dma_addr = 0; 234 unsigned int num_bytes; 235 bool bf = false; 236 u16 headlen; 237 u16 ds_cnt; 238 u16 ihs; 239 int i; 240 241 memset(wqe, 0, sizeof(*wqe)); 242 243 if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { 244 eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM; 245 if (skb->encapsulation) { 246 eseg->cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM | 247 MLX5_ETH_WQE_L4_INNER_CSUM; 248 sq->stats.csum_partial_inner++; 249 } else { 250 eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM; 251 } 252 } else 253 sq->stats.csum_none++; 254 255 if (sq->cc != sq->prev_cc) { 256 sq->prev_cc = sq->cc; 257 sq->bf_budget = (sq->cc == sq->pc) ? MLX5E_SQ_BF_BUDGET : 0; 258 } 259 260 if (skb_is_gso(skb)) { 261 eseg->mss = cpu_to_be16(skb_shinfo(skb)->gso_size); 262 opcode = MLX5_OPCODE_LSO; 263 264 if (skb->encapsulation) { 265 ihs = skb_inner_transport_offset(skb) + inner_tcp_hdrlen(skb); 266 sq->stats.tso_inner_packets++; 267 sq->stats.tso_inner_bytes += skb->len - ihs; 268 } else { 269 ihs = skb_transport_offset(skb) + tcp_hdrlen(skb); 270 sq->stats.tso_packets++; 271 sq->stats.tso_bytes += skb->len - ihs; 272 } 273 274 num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs; 275 } else { 276 bf = sq->bf_budget && 277 !skb->xmit_more && 278 !skb_shinfo(skb)->nr_frags; 279 ihs = mlx5e_get_inline_hdr_size(sq, skb, bf); 280 num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN); 281 } 282 283 wi->num_bytes = num_bytes; 284 285 if (skb_vlan_tag_present(skb)) { 286 mlx5e_insert_vlan(eseg->inline_hdr_start, skb, ihs, &skb_data, 287 &skb_len); 288 ihs += VLAN_HLEN; 289 } else { 290 memcpy(eseg->inline_hdr_start, skb_data, ihs); 291 mlx5e_tx_skb_pull_inline(&skb_data, &skb_len, ihs); 292 } 293 294 eseg->inline_hdr_sz = cpu_to_be16(ihs); 295 296 ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS; 297 ds_cnt += DIV_ROUND_UP(ihs - sizeof(eseg->inline_hdr_start), 298 MLX5_SEND_WQE_DS); 299 dseg = (struct mlx5_wqe_data_seg *)cseg + ds_cnt; 300 301 wi->num_dma = 0; 302 303 headlen = skb_len - skb->data_len; 304 if (headlen) { 305 dma_addr = dma_map_single(sq->pdev, skb_data, headlen, 306 DMA_TO_DEVICE); 307 if (unlikely(dma_mapping_error(sq->pdev, dma_addr))) 308 goto dma_unmap_wqe_err; 309 310 dseg->addr = cpu_to_be64(dma_addr); 311 dseg->lkey = sq->mkey_be; 312 dseg->byte_count = cpu_to_be32(headlen); 313 314 mlx5e_dma_push(sq, dma_addr, headlen, MLX5E_DMA_MAP_SINGLE); 315 wi->num_dma++; 316 317 dseg++; 318 } 319 320 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 321 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i]; 322 int fsz = skb_frag_size(frag); 323 324 dma_addr = skb_frag_dma_map(sq->pdev, frag, 0, fsz, 325 DMA_TO_DEVICE); 326 if (unlikely(dma_mapping_error(sq->pdev, dma_addr))) 327 goto dma_unmap_wqe_err; 328 329 dseg->addr = cpu_to_be64(dma_addr); 330 dseg->lkey = sq->mkey_be; 331 dseg->byte_count = cpu_to_be32(fsz); 332 333 mlx5e_dma_push(sq, dma_addr, fsz, MLX5E_DMA_MAP_PAGE); 334 wi->num_dma++; 335 336 dseg++; 337 } 338 339 ds_cnt += wi->num_dma; 340 341 cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode); 342 cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); 343 344 sq->skb[pi] = skb; 345 346 wi->num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); 347 sq->pc += wi->num_wqebbs; 348 349 netdev_tx_sent_queue(sq->txq, wi->num_bytes); 350 351 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) 352 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; 353 354 if (unlikely(!mlx5e_sq_has_room_for(sq, MLX5E_SQ_STOP_ROOM))) { 355 netif_tx_stop_queue(sq->txq); 356 sq->stats.stopped++; 357 } 358 359 if (!skb->xmit_more || netif_xmit_stopped(sq->txq)) { 360 int bf_sz = 0; 361 362 if (bf && test_bit(MLX5E_SQ_STATE_BF_ENABLE, &sq->state)) 363 bf_sz = wi->num_wqebbs << 3; 364 365 cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; 366 mlx5e_tx_notify_hw(sq, &wqe->ctrl, bf_sz); 367 } 368 369 /* fill sq edge with nops to avoid wqe wrap around */ 370 while ((sq->pc & wq->sz_m1) > sq->edge) 371 mlx5e_send_nop(sq, false); 372 373 if (bf) 374 sq->bf_budget--; 375 376 sq->stats.packets++; 377 sq->stats.bytes += num_bytes; 378 return NETDEV_TX_OK; 379 380 dma_unmap_wqe_err: 381 sq->stats.dropped++; 382 mlx5e_dma_unmap_wqe_err(sq, wi->num_dma); 383 384 dev_kfree_skb_any(skb); 385 386 return NETDEV_TX_OK; 387 } 388 389 netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev) 390 { 391 struct mlx5e_priv *priv = netdev_priv(dev); 392 struct mlx5e_sq *sq = priv->txq_to_sq_map[skb_get_queue_mapping(skb)]; 393 394 return mlx5e_sq_xmit(sq, skb); 395 } 396 397 void mlx5e_free_tx_descs(struct mlx5e_sq *sq) 398 { 399 struct mlx5e_tx_wqe_info *wi; 400 struct sk_buff *skb; 401 u16 ci; 402 int i; 403 404 while (sq->cc != sq->pc) { 405 ci = sq->cc & sq->wq.sz_m1; 406 skb = sq->skb[ci]; 407 wi = &sq->wqe_info[ci]; 408 409 if (!skb) { /* nop */ 410 sq->cc++; 411 continue; 412 } 413 414 for (i = 0; i < wi->num_dma; i++) { 415 struct mlx5e_sq_dma *dma = 416 mlx5e_dma_get(sq, sq->dma_fifo_cc++); 417 418 mlx5e_tx_dma_unmap(sq->pdev, dma); 419 } 420 421 dev_kfree_skb_any(skb); 422 sq->cc += wi->num_wqebbs; 423 } 424 } 425 426 bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) 427 { 428 struct mlx5e_sq *sq; 429 u32 dma_fifo_cc; 430 u32 nbytes; 431 u16 npkts; 432 u16 sqcc; 433 int i; 434 435 sq = container_of(cq, struct mlx5e_sq, cq); 436 437 if (unlikely(test_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state))) 438 return false; 439 440 npkts = 0; 441 nbytes = 0; 442 443 /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), 444 * otherwise a cq overrun may occur 445 */ 446 sqcc = sq->cc; 447 448 /* avoid dirtying sq cache line every cqe */ 449 dma_fifo_cc = sq->dma_fifo_cc; 450 451 for (i = 0; i < MLX5E_TX_CQ_POLL_BUDGET; i++) { 452 struct mlx5_cqe64 *cqe; 453 u16 wqe_counter; 454 bool last_wqe; 455 456 cqe = mlx5e_get_cqe(cq); 457 if (!cqe) 458 break; 459 460 mlx5_cqwq_pop(&cq->wq); 461 462 wqe_counter = be16_to_cpu(cqe->wqe_counter); 463 464 do { 465 struct mlx5e_tx_wqe_info *wi; 466 struct sk_buff *skb; 467 u16 ci; 468 int j; 469 470 last_wqe = (sqcc == wqe_counter); 471 472 ci = sqcc & sq->wq.sz_m1; 473 skb = sq->skb[ci]; 474 wi = &sq->wqe_info[ci]; 475 476 if (unlikely(!skb)) { /* nop */ 477 sqcc++; 478 continue; 479 } 480 481 if (unlikely(skb_shinfo(skb)->tx_flags & 482 SKBTX_HW_TSTAMP)) { 483 struct skb_shared_hwtstamps hwts = {}; 484 485 mlx5e_fill_hwstamp(sq->tstamp, 486 get_cqe_ts(cqe), &hwts); 487 skb_tstamp_tx(skb, &hwts); 488 } 489 490 for (j = 0; j < wi->num_dma; j++) { 491 struct mlx5e_sq_dma *dma = 492 mlx5e_dma_get(sq, dma_fifo_cc++); 493 494 mlx5e_tx_dma_unmap(sq->pdev, dma); 495 } 496 497 npkts++; 498 nbytes += wi->num_bytes; 499 sqcc += wi->num_wqebbs; 500 napi_consume_skb(skb, napi_budget); 501 } while (!last_wqe); 502 } 503 504 mlx5_cqwq_update_db_record(&cq->wq); 505 506 /* ensure cq space is freed before enabling more cqes */ 507 wmb(); 508 509 sq->dma_fifo_cc = dma_fifo_cc; 510 sq->cc = sqcc; 511 512 netdev_tx_completed_queue(sq->txq, npkts, nbytes); 513 514 if (netif_tx_queue_stopped(sq->txq) && 515 mlx5e_sq_has_room_for(sq, MLX5E_SQ_STOP_ROOM) && 516 likely(test_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, &sq->state))) { 517 netif_tx_wake_queue(sq->txq); 518 sq->stats.wake++; 519 } 520 521 return (i == MLX5E_TX_CQ_POLL_BUDGET); 522 } 523