1 /*
2  * Copyright (c) 2015-2016, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #include <linux/tcp.h>
34 #include <linux/if_vlan.h>
35 #include <net/geneve.h>
36 #include <net/dsfield.h>
37 #include "en.h"
38 #include "en/txrx.h"
39 #include "ipoib/ipoib.h"
40 #include "en_accel/en_accel.h"
41 #include "en_accel/ipsec_rxtx.h"
42 #include "en/ptp.h"
43 
44 static void mlx5e_dma_unmap_wqe_err(struct mlx5e_txqsq *sq, u8 num_dma)
45 {
46 	int i;
47 
48 	for (i = 0; i < num_dma; i++) {
49 		struct mlx5e_sq_dma *last_pushed_dma =
50 			mlx5e_dma_get(sq, --sq->dma_fifo_pc);
51 
52 		mlx5e_tx_dma_unmap(sq->pdev, last_pushed_dma);
53 	}
54 }
55 
56 static inline int mlx5e_skb_l2_header_offset(struct sk_buff *skb)
57 {
58 #define MLX5E_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
59 
60 	return max(skb_network_offset(skb), MLX5E_MIN_INLINE);
61 }
62 
63 static inline int mlx5e_skb_l3_header_offset(struct sk_buff *skb)
64 {
65 	if (skb_transport_header_was_set(skb))
66 		return skb_transport_offset(skb);
67 	else
68 		return mlx5e_skb_l2_header_offset(skb);
69 }
70 
71 static inline u16 mlx5e_calc_min_inline(enum mlx5_inline_modes mode,
72 					struct sk_buff *skb)
73 {
74 	u16 hlen;
75 
76 	switch (mode) {
77 	case MLX5_INLINE_MODE_NONE:
78 		return 0;
79 	case MLX5_INLINE_MODE_TCP_UDP:
80 		hlen = eth_get_headlen(skb->dev, skb->data, skb_headlen(skb));
81 		if (hlen == ETH_HLEN && !skb_vlan_tag_present(skb))
82 			hlen += VLAN_HLEN;
83 		break;
84 	case MLX5_INLINE_MODE_IP:
85 		hlen = mlx5e_skb_l3_header_offset(skb);
86 		break;
87 	case MLX5_INLINE_MODE_L2:
88 	default:
89 		hlen = mlx5e_skb_l2_header_offset(skb);
90 	}
91 	return min_t(u16, hlen, skb_headlen(skb));
92 }
93 
94 static inline void mlx5e_insert_vlan(void *start, struct sk_buff *skb, u16 ihs)
95 {
96 	struct vlan_ethhdr *vhdr = (struct vlan_ethhdr *)start;
97 	int cpy1_sz = 2 * ETH_ALEN;
98 	int cpy2_sz = ihs - cpy1_sz;
99 
100 	memcpy(&vhdr->addrs, skb->data, cpy1_sz);
101 	vhdr->h_vlan_proto = skb->vlan_proto;
102 	vhdr->h_vlan_TCI = cpu_to_be16(skb_vlan_tag_get(skb));
103 	memcpy(&vhdr->h_vlan_encapsulated_proto, skb->data + cpy1_sz, cpy2_sz);
104 }
105 
106 static inline void
107 mlx5e_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb,
108 			    struct mlx5e_accel_tx_state *accel,
109 			    struct mlx5_wqe_eth_seg *eseg)
110 {
111 	if (unlikely(mlx5e_ipsec_txwqe_build_eseg_csum(sq, skb, eseg)))
112 		return;
113 
114 	if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
115 		eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM;
116 		if (skb->encapsulation) {
117 			eseg->cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM |
118 					  MLX5_ETH_WQE_L4_INNER_CSUM;
119 			sq->stats->csum_partial_inner++;
120 		} else {
121 			eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM;
122 			sq->stats->csum_partial++;
123 		}
124 #ifdef CONFIG_MLX5_EN_TLS
125 	} else if (unlikely(accel && accel->tls.tls_tisn)) {
126 		eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM;
127 		sq->stats->csum_partial++;
128 #endif
129 	} else
130 		sq->stats->csum_none++;
131 }
132 
133 static inline u16
134 mlx5e_tx_get_gso_ihs(struct mlx5e_txqsq *sq, struct sk_buff *skb)
135 {
136 	struct mlx5e_sq_stats *stats = sq->stats;
137 	u16 ihs;
138 
139 	if (skb->encapsulation) {
140 		ihs = skb_inner_transport_offset(skb) + inner_tcp_hdrlen(skb);
141 		stats->tso_inner_packets++;
142 		stats->tso_inner_bytes += skb->len - ihs;
143 	} else {
144 		if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
145 			ihs = skb_transport_offset(skb) + sizeof(struct udphdr);
146 		else
147 			ihs = skb_transport_offset(skb) + tcp_hdrlen(skb);
148 		stats->tso_packets++;
149 		stats->tso_bytes += skb->len - ihs;
150 	}
151 
152 	return ihs;
153 }
154 
155 static inline int
156 mlx5e_txwqe_build_dsegs(struct mlx5e_txqsq *sq, struct sk_buff *skb,
157 			unsigned char *skb_data, u16 headlen,
158 			struct mlx5_wqe_data_seg *dseg)
159 {
160 	dma_addr_t dma_addr = 0;
161 	u8 num_dma          = 0;
162 	int i;
163 
164 	if (headlen) {
165 		dma_addr = dma_map_single(sq->pdev, skb_data, headlen,
166 					  DMA_TO_DEVICE);
167 		if (unlikely(dma_mapping_error(sq->pdev, dma_addr)))
168 			goto dma_unmap_wqe_err;
169 
170 		dseg->addr       = cpu_to_be64(dma_addr);
171 		dseg->lkey       = sq->mkey_be;
172 		dseg->byte_count = cpu_to_be32(headlen);
173 
174 		mlx5e_dma_push(sq, dma_addr, headlen, MLX5E_DMA_MAP_SINGLE);
175 		num_dma++;
176 		dseg++;
177 	}
178 
179 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
180 		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
181 		int fsz = skb_frag_size(frag);
182 
183 		dma_addr = skb_frag_dma_map(sq->pdev, frag, 0, fsz,
184 					    DMA_TO_DEVICE);
185 		if (unlikely(dma_mapping_error(sq->pdev, dma_addr)))
186 			goto dma_unmap_wqe_err;
187 
188 		dseg->addr       = cpu_to_be64(dma_addr);
189 		dseg->lkey       = sq->mkey_be;
190 		dseg->byte_count = cpu_to_be32(fsz);
191 
192 		mlx5e_dma_push(sq, dma_addr, fsz, MLX5E_DMA_MAP_PAGE);
193 		num_dma++;
194 		dseg++;
195 	}
196 
197 	return num_dma;
198 
199 dma_unmap_wqe_err:
200 	mlx5e_dma_unmap_wqe_err(sq, num_dma);
201 	return -ENOMEM;
202 }
203 
204 struct mlx5e_tx_attr {
205 	u32 num_bytes;
206 	u16 headlen;
207 	u16 ihs;
208 	__be16 mss;
209 	u16 insz;
210 	u8 opcode;
211 };
212 
213 struct mlx5e_tx_wqe_attr {
214 	u16 ds_cnt;
215 	u16 ds_cnt_inl;
216 	u16 ds_cnt_ids;
217 	u8 num_wqebbs;
218 };
219 
220 static u8
221 mlx5e_tx_wqe_inline_mode(struct mlx5e_txqsq *sq, struct sk_buff *skb,
222 			 struct mlx5e_accel_tx_state *accel)
223 {
224 	u8 mode;
225 
226 #ifdef CONFIG_MLX5_EN_TLS
227 	if (accel && accel->tls.tls_tisn)
228 		return MLX5_INLINE_MODE_TCP_UDP;
229 #endif
230 
231 	mode = sq->min_inline_mode;
232 
233 	if (skb_vlan_tag_present(skb) &&
234 	    test_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state))
235 		mode = max_t(u8, MLX5_INLINE_MODE_L2, mode);
236 
237 	return mode;
238 }
239 
240 static void mlx5e_sq_xmit_prepare(struct mlx5e_txqsq *sq, struct sk_buff *skb,
241 				  struct mlx5e_accel_tx_state *accel,
242 				  struct mlx5e_tx_attr *attr)
243 {
244 	struct mlx5e_sq_stats *stats = sq->stats;
245 
246 	if (skb_is_gso(skb)) {
247 		u16 ihs = mlx5e_tx_get_gso_ihs(sq, skb);
248 
249 		*attr = (struct mlx5e_tx_attr) {
250 			.opcode    = MLX5_OPCODE_LSO,
251 			.mss       = cpu_to_be16(skb_shinfo(skb)->gso_size),
252 			.ihs       = ihs,
253 			.num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs,
254 			.headlen   = skb_headlen(skb) - ihs,
255 		};
256 
257 		stats->packets += skb_shinfo(skb)->gso_segs;
258 	} else {
259 		u8 mode = mlx5e_tx_wqe_inline_mode(sq, skb, accel);
260 		u16 ihs = mlx5e_calc_min_inline(mode, skb);
261 
262 		*attr = (struct mlx5e_tx_attr) {
263 			.opcode    = MLX5_OPCODE_SEND,
264 			.mss       = cpu_to_be16(0),
265 			.ihs       = ihs,
266 			.num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN),
267 			.headlen   = skb_headlen(skb) - ihs,
268 		};
269 
270 		stats->packets++;
271 	}
272 
273 	attr->insz = mlx5e_accel_tx_ids_len(sq, accel);
274 	stats->bytes += attr->num_bytes;
275 }
276 
277 static void mlx5e_sq_calc_wqe_attr(struct sk_buff *skb, const struct mlx5e_tx_attr *attr,
278 				   struct mlx5e_tx_wqe_attr *wqe_attr)
279 {
280 	u16 ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT;
281 	u16 ds_cnt_inl = 0;
282 	u16 ds_cnt_ids = 0;
283 
284 	if (attr->insz)
285 		ds_cnt_ids = DIV_ROUND_UP(sizeof(struct mlx5_wqe_inline_seg) + attr->insz,
286 					  MLX5_SEND_WQE_DS);
287 
288 	ds_cnt += !!attr->headlen + skb_shinfo(skb)->nr_frags + ds_cnt_ids;
289 	if (attr->ihs) {
290 		u16 inl = attr->ihs - INL_HDR_START_SZ;
291 
292 		if (skb_vlan_tag_present(skb))
293 			inl += VLAN_HLEN;
294 
295 		ds_cnt_inl = DIV_ROUND_UP(inl, MLX5_SEND_WQE_DS);
296 		ds_cnt += ds_cnt_inl;
297 	}
298 
299 	*wqe_attr = (struct mlx5e_tx_wqe_attr) {
300 		.ds_cnt     = ds_cnt,
301 		.ds_cnt_inl = ds_cnt_inl,
302 		.ds_cnt_ids = ds_cnt_ids,
303 		.num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS),
304 	};
305 }
306 
307 static void mlx5e_tx_skb_update_hwts_flags(struct sk_buff *skb)
308 {
309 	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
310 		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
311 }
312 
313 static void mlx5e_tx_check_stop(struct mlx5e_txqsq *sq)
314 {
315 	if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, sq->stop_room))) {
316 		netif_tx_stop_queue(sq->txq);
317 		sq->stats->stopped++;
318 	}
319 }
320 
321 static inline void
322 mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb,
323 		     const struct mlx5e_tx_attr *attr,
324 		     const struct mlx5e_tx_wqe_attr *wqe_attr, u8 num_dma,
325 		     struct mlx5e_tx_wqe_info *wi, struct mlx5_wqe_ctrl_seg *cseg,
326 		     bool xmit_more)
327 {
328 	struct mlx5_wq_cyc *wq = &sq->wq;
329 	bool send_doorbell;
330 
331 	*wi = (struct mlx5e_tx_wqe_info) {
332 		.skb = skb,
333 		.num_bytes = attr->num_bytes,
334 		.num_dma = num_dma,
335 		.num_wqebbs = wqe_attr->num_wqebbs,
336 		.num_fifo_pkts = 0,
337 	};
338 
339 	cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | attr->opcode);
340 	cseg->qpn_ds           = cpu_to_be32((sq->sqn << 8) | wqe_attr->ds_cnt);
341 
342 	mlx5e_tx_skb_update_hwts_flags(skb);
343 
344 	sq->pc += wi->num_wqebbs;
345 
346 	mlx5e_tx_check_stop(sq);
347 
348 	if (unlikely(sq->ptpsq)) {
349 		mlx5e_skb_cb_hwtstamp_init(skb);
350 		mlx5e_skb_fifo_push(&sq->ptpsq->skb_fifo, skb);
351 		skb_get(skb);
352 	}
353 
354 	send_doorbell = __netdev_tx_sent_queue(sq->txq, attr->num_bytes, xmit_more);
355 	if (send_doorbell)
356 		mlx5e_notify_hw(wq, sq->pc, sq->uar_map, cseg);
357 }
358 
359 static void
360 mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb,
361 		  const struct mlx5e_tx_attr *attr, const struct mlx5e_tx_wqe_attr *wqe_attr,
362 		  struct mlx5e_tx_wqe *wqe, u16 pi, bool xmit_more)
363 {
364 	struct mlx5_wqe_ctrl_seg *cseg;
365 	struct mlx5_wqe_eth_seg  *eseg;
366 	struct mlx5_wqe_data_seg *dseg;
367 	struct mlx5e_tx_wqe_info *wi;
368 
369 	struct mlx5e_sq_stats *stats = sq->stats;
370 	int num_dma;
371 
372 	stats->xmit_more += xmit_more;
373 
374 	/* fill wqe */
375 	wi   = &sq->db.wqe_info[pi];
376 	cseg = &wqe->ctrl;
377 	eseg = &wqe->eth;
378 	dseg =  wqe->data;
379 
380 	eseg->mss = attr->mss;
381 
382 	if (attr->ihs) {
383 		if (skb_vlan_tag_present(skb)) {
384 			eseg->inline_hdr.sz |= cpu_to_be16(attr->ihs + VLAN_HLEN);
385 			mlx5e_insert_vlan(eseg->inline_hdr.start, skb, attr->ihs);
386 			stats->added_vlan_packets++;
387 		} else {
388 			eseg->inline_hdr.sz |= cpu_to_be16(attr->ihs);
389 			memcpy(eseg->inline_hdr.start, skb->data, attr->ihs);
390 		}
391 		dseg += wqe_attr->ds_cnt_inl;
392 	} else if (skb_vlan_tag_present(skb)) {
393 		eseg->insert.type = cpu_to_be16(MLX5_ETH_WQE_INSERT_VLAN);
394 		if (skb->vlan_proto == cpu_to_be16(ETH_P_8021AD))
395 			eseg->insert.type |= cpu_to_be16(MLX5_ETH_WQE_SVLAN);
396 		eseg->insert.vlan_tci = cpu_to_be16(skb_vlan_tag_get(skb));
397 		stats->added_vlan_packets++;
398 	}
399 
400 	dseg += wqe_attr->ds_cnt_ids;
401 	num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + attr->ihs,
402 					  attr->headlen, dseg);
403 	if (unlikely(num_dma < 0))
404 		goto err_drop;
405 
406 	mlx5e_txwqe_complete(sq, skb, attr, wqe_attr, num_dma, wi, cseg, xmit_more);
407 
408 	return;
409 
410 err_drop:
411 	stats->dropped++;
412 	dev_kfree_skb_any(skb);
413 }
414 
415 static bool mlx5e_tx_skb_supports_mpwqe(struct sk_buff *skb, struct mlx5e_tx_attr *attr)
416 {
417 	return !skb_is_nonlinear(skb) && !skb_vlan_tag_present(skb) && !attr->ihs &&
418 	       !attr->insz;
419 }
420 
421 static bool mlx5e_tx_mpwqe_same_eseg(struct mlx5e_txqsq *sq, struct mlx5_wqe_eth_seg *eseg)
422 {
423 	struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
424 
425 	/* Assumes the session is already running and has at least one packet. */
426 	return !memcmp(&session->wqe->eth, eseg, MLX5E_ACCEL_ESEG_LEN);
427 }
428 
429 static void mlx5e_tx_mpwqe_session_start(struct mlx5e_txqsq *sq,
430 					 struct mlx5_wqe_eth_seg *eseg)
431 {
432 	struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
433 	struct mlx5e_tx_wqe *wqe;
434 	u16 pi;
435 
436 	pi = mlx5e_txqsq_get_next_pi(sq, sq->max_sq_mpw_wqebbs);
437 	wqe = MLX5E_TX_FETCH_WQE(sq, pi);
438 	net_prefetchw(wqe->data);
439 
440 	*session = (struct mlx5e_tx_mpwqe) {
441 		.wqe = wqe,
442 		.bytes_count = 0,
443 		.ds_count = MLX5E_TX_WQE_EMPTY_DS_COUNT,
444 		.pkt_count = 0,
445 		.inline_on = 0,
446 	};
447 
448 	memcpy(&session->wqe->eth, eseg, MLX5E_ACCEL_ESEG_LEN);
449 
450 	sq->stats->mpwqe_blks++;
451 }
452 
453 static bool mlx5e_tx_mpwqe_session_is_active(struct mlx5e_txqsq *sq)
454 {
455 	return sq->mpwqe.wqe;
456 }
457 
458 static void mlx5e_tx_mpwqe_add_dseg(struct mlx5e_txqsq *sq, struct mlx5e_xmit_data *txd)
459 {
460 	struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
461 	struct mlx5_wqe_data_seg *dseg;
462 
463 	dseg = (struct mlx5_wqe_data_seg *)session->wqe + session->ds_count;
464 
465 	session->pkt_count++;
466 	session->bytes_count += txd->len;
467 
468 	dseg->addr = cpu_to_be64(txd->dma_addr);
469 	dseg->byte_count = cpu_to_be32(txd->len);
470 	dseg->lkey = sq->mkey_be;
471 	session->ds_count++;
472 
473 	sq->stats->mpwqe_pkts++;
474 }
475 
476 static struct mlx5_wqe_ctrl_seg *mlx5e_tx_mpwqe_session_complete(struct mlx5e_txqsq *sq)
477 {
478 	struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
479 	u8 ds_count = session->ds_count;
480 	struct mlx5_wqe_ctrl_seg *cseg;
481 	struct mlx5e_tx_wqe_info *wi;
482 	u16 pi;
483 
484 	cseg = &session->wqe->ctrl;
485 	cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_ENHANCED_MPSW);
486 	cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_count);
487 
488 	pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
489 	wi = &sq->db.wqe_info[pi];
490 	*wi = (struct mlx5e_tx_wqe_info) {
491 		.skb = NULL,
492 		.num_bytes = session->bytes_count,
493 		.num_wqebbs = DIV_ROUND_UP(ds_count, MLX5_SEND_WQEBB_NUM_DS),
494 		.num_dma = session->pkt_count,
495 		.num_fifo_pkts = session->pkt_count,
496 	};
497 
498 	sq->pc += wi->num_wqebbs;
499 
500 	session->wqe = NULL;
501 
502 	mlx5e_tx_check_stop(sq);
503 
504 	return cseg;
505 }
506 
507 static void
508 mlx5e_sq_xmit_mpwqe(struct mlx5e_txqsq *sq, struct sk_buff *skb,
509 		    struct mlx5_wqe_eth_seg *eseg, bool xmit_more)
510 {
511 	struct mlx5_wqe_ctrl_seg *cseg;
512 	struct mlx5e_xmit_data txd;
513 
514 	if (!mlx5e_tx_mpwqe_session_is_active(sq)) {
515 		mlx5e_tx_mpwqe_session_start(sq, eseg);
516 	} else if (!mlx5e_tx_mpwqe_same_eseg(sq, eseg)) {
517 		mlx5e_tx_mpwqe_session_complete(sq);
518 		mlx5e_tx_mpwqe_session_start(sq, eseg);
519 	}
520 
521 	sq->stats->xmit_more += xmit_more;
522 
523 	txd.data = skb->data;
524 	txd.len = skb->len;
525 
526 	txd.dma_addr = dma_map_single(sq->pdev, txd.data, txd.len, DMA_TO_DEVICE);
527 	if (unlikely(dma_mapping_error(sq->pdev, txd.dma_addr)))
528 		goto err_unmap;
529 	mlx5e_dma_push(sq, txd.dma_addr, txd.len, MLX5E_DMA_MAP_SINGLE);
530 
531 	mlx5e_skb_fifo_push(&sq->db.skb_fifo, skb);
532 
533 	mlx5e_tx_mpwqe_add_dseg(sq, &txd);
534 
535 	mlx5e_tx_skb_update_hwts_flags(skb);
536 
537 	if (unlikely(mlx5e_tx_mpwqe_is_full(&sq->mpwqe, sq->max_sq_mpw_wqebbs))) {
538 		/* Might stop the queue and affect the retval of __netdev_tx_sent_queue. */
539 		cseg = mlx5e_tx_mpwqe_session_complete(sq);
540 
541 		if (__netdev_tx_sent_queue(sq->txq, txd.len, xmit_more))
542 			mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg);
543 	} else if (__netdev_tx_sent_queue(sq->txq, txd.len, xmit_more)) {
544 		/* Might stop the queue, but we were asked to ring the doorbell anyway. */
545 		cseg = mlx5e_tx_mpwqe_session_complete(sq);
546 
547 		mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg);
548 	}
549 
550 	return;
551 
552 err_unmap:
553 	mlx5e_dma_unmap_wqe_err(sq, 1);
554 	sq->stats->dropped++;
555 	dev_kfree_skb_any(skb);
556 }
557 
558 void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq)
559 {
560 	/* Unlikely in non-MPWQE workloads; not important in MPWQE workloads. */
561 	if (unlikely(mlx5e_tx_mpwqe_session_is_active(sq)))
562 		mlx5e_tx_mpwqe_session_complete(sq);
563 }
564 
565 static void mlx5e_txwqe_build_eseg(struct mlx5e_priv *priv, struct mlx5e_txqsq *sq,
566 				   struct sk_buff *skb, struct mlx5e_accel_tx_state *accel,
567 				   struct mlx5_wqe_eth_seg *eseg, u16 ihs)
568 {
569 	mlx5e_accel_tx_eseg(priv, skb, eseg, ihs);
570 	mlx5e_txwqe_build_eseg_csum(sq, skb, accel, eseg);
571 }
572 
573 netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
574 {
575 	struct mlx5e_priv *priv = netdev_priv(dev);
576 	struct mlx5e_accel_tx_state accel = {};
577 	struct mlx5e_tx_wqe_attr wqe_attr;
578 	struct mlx5e_tx_attr attr;
579 	struct mlx5e_tx_wqe *wqe;
580 	struct mlx5e_txqsq *sq;
581 	u16 pi;
582 
583 	/* All changes to txq2sq are performed in sync with mlx5e_xmit, when the
584 	 * queue being changed is disabled, and smp_wmb guarantees that the
585 	 * changes are visible before mlx5e_xmit tries to read from txq2sq. It
586 	 * guarantees that the value of txq2sq[qid] doesn't change while
587 	 * mlx5e_xmit is running on queue number qid. smb_wmb is paired with
588 	 * HARD_TX_LOCK around ndo_start_xmit, which serves as an ACQUIRE.
589 	 */
590 	sq = priv->txq2sq[skb_get_queue_mapping(skb)];
591 	if (unlikely(!sq)) {
592 		/* Two cases when sq can be NULL:
593 		 * 1. The HTB node is registered, and mlx5e_select_queue
594 		 * selected its queue ID, but the SQ itself is not yet created.
595 		 * 2. HTB SQ creation failed. Similar to the previous case, but
596 		 * the SQ won't be created.
597 		 */
598 		dev_kfree_skb_any(skb);
599 		return NETDEV_TX_OK;
600 	}
601 
602 	/* May send SKBs and WQEs. */
603 	if (unlikely(!mlx5e_accel_tx_begin(dev, sq, skb, &accel)))
604 		return NETDEV_TX_OK;
605 
606 	mlx5e_sq_xmit_prepare(sq, skb, &accel, &attr);
607 
608 	if (test_bit(MLX5E_SQ_STATE_MPWQE, &sq->state)) {
609 		if (mlx5e_tx_skb_supports_mpwqe(skb, &attr)) {
610 			struct mlx5_wqe_eth_seg eseg = {};
611 
612 			mlx5e_txwqe_build_eseg(priv, sq, skb, &accel, &eseg, attr.ihs);
613 			mlx5e_sq_xmit_mpwqe(sq, skb, &eseg, netdev_xmit_more());
614 			return NETDEV_TX_OK;
615 		}
616 
617 		mlx5e_tx_mpwqe_ensure_complete(sq);
618 	}
619 
620 	mlx5e_sq_calc_wqe_attr(skb, &attr, &wqe_attr);
621 	pi = mlx5e_txqsq_get_next_pi(sq, wqe_attr.num_wqebbs);
622 	wqe = MLX5E_TX_FETCH_WQE(sq, pi);
623 
624 	/* May update the WQE, but may not post other WQEs. */
625 	mlx5e_accel_tx_finish(sq, wqe, &accel,
626 			      (struct mlx5_wqe_inline_seg *)(wqe->data + wqe_attr.ds_cnt_inl));
627 	mlx5e_txwqe_build_eseg(priv, sq, skb, &accel, &wqe->eth, attr.ihs);
628 	mlx5e_sq_xmit_wqe(sq, skb, &attr, &wqe_attr, wqe, pi, netdev_xmit_more());
629 
630 	return NETDEV_TX_OK;
631 }
632 
633 void mlx5e_sq_xmit_simple(struct mlx5e_txqsq *sq, struct sk_buff *skb, bool xmit_more)
634 {
635 	struct mlx5e_tx_wqe_attr wqe_attr;
636 	struct mlx5e_tx_attr attr;
637 	struct mlx5e_tx_wqe *wqe;
638 	u16 pi;
639 
640 	mlx5e_sq_xmit_prepare(sq, skb, NULL, &attr);
641 	mlx5e_sq_calc_wqe_attr(skb, &attr, &wqe_attr);
642 	pi = mlx5e_txqsq_get_next_pi(sq, wqe_attr.num_wqebbs);
643 	wqe = MLX5E_TX_FETCH_WQE(sq, pi);
644 	mlx5e_txwqe_build_eseg_csum(sq, skb, NULL, &wqe->eth);
645 	mlx5e_sq_xmit_wqe(sq, skb, &attr, &wqe_attr, wqe, pi, xmit_more);
646 }
647 
648 static void mlx5e_tx_wi_dma_unmap(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi,
649 				  u32 *dma_fifo_cc)
650 {
651 	int i;
652 
653 	for (i = 0; i < wi->num_dma; i++) {
654 		struct mlx5e_sq_dma *dma = mlx5e_dma_get(sq, (*dma_fifo_cc)++);
655 
656 		mlx5e_tx_dma_unmap(sq->pdev, dma);
657 	}
658 }
659 
660 static void mlx5e_consume_skb(struct mlx5e_txqsq *sq, struct sk_buff *skb,
661 			      struct mlx5_cqe64 *cqe, int napi_budget)
662 {
663 	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
664 		struct skb_shared_hwtstamps hwts = {};
665 		u64 ts = get_cqe_ts(cqe);
666 
667 		hwts.hwtstamp = mlx5e_cqe_ts_to_ns(sq->ptp_cyc2time, sq->clock, ts);
668 		if (sq->ptpsq)
669 			mlx5e_skb_cb_hwtstamp_handler(skb, MLX5E_SKB_CB_CQE_HWTSTAMP,
670 						      hwts.hwtstamp, sq->ptpsq->cq_stats);
671 		else
672 			skb_tstamp_tx(skb, &hwts);
673 	}
674 
675 	napi_consume_skb(skb, napi_budget);
676 }
677 
678 static void mlx5e_tx_wi_consume_fifo_skbs(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi,
679 					  struct mlx5_cqe64 *cqe, int napi_budget)
680 {
681 	int i;
682 
683 	for (i = 0; i < wi->num_fifo_pkts; i++) {
684 		struct sk_buff *skb = mlx5e_skb_fifo_pop(&sq->db.skb_fifo);
685 
686 		mlx5e_consume_skb(sq, skb, cqe, napi_budget);
687 	}
688 }
689 
690 bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
691 {
692 	struct mlx5e_sq_stats *stats;
693 	struct mlx5e_txqsq *sq;
694 	struct mlx5_cqe64 *cqe;
695 	u32 dma_fifo_cc;
696 	u32 nbytes;
697 	u16 npkts;
698 	u16 sqcc;
699 	int i;
700 
701 	sq = container_of(cq, struct mlx5e_txqsq, cq);
702 
703 	if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)))
704 		return false;
705 
706 	cqe = mlx5_cqwq_get_cqe(&cq->wq);
707 	if (!cqe)
708 		return false;
709 
710 	stats = sq->stats;
711 
712 	npkts = 0;
713 	nbytes = 0;
714 
715 	/* sq->cc must be updated only after mlx5_cqwq_update_db_record(),
716 	 * otherwise a cq overrun may occur
717 	 */
718 	sqcc = sq->cc;
719 
720 	/* avoid dirtying sq cache line every cqe */
721 	dma_fifo_cc = sq->dma_fifo_cc;
722 
723 	i = 0;
724 	do {
725 		struct mlx5e_tx_wqe_info *wi;
726 		u16 wqe_counter;
727 		bool last_wqe;
728 		u16 ci;
729 
730 		mlx5_cqwq_pop(&cq->wq);
731 
732 		wqe_counter = be16_to_cpu(cqe->wqe_counter);
733 
734 		do {
735 			last_wqe = (sqcc == wqe_counter);
736 
737 			ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
738 			wi = &sq->db.wqe_info[ci];
739 
740 			sqcc += wi->num_wqebbs;
741 
742 			if (likely(wi->skb)) {
743 				mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
744 				mlx5e_consume_skb(sq, wi->skb, cqe, napi_budget);
745 
746 				npkts++;
747 				nbytes += wi->num_bytes;
748 				continue;
749 			}
750 
751 			if (unlikely(mlx5e_ktls_tx_try_handle_resync_dump_comp(sq, wi,
752 									       &dma_fifo_cc)))
753 				continue;
754 
755 			if (wi->num_fifo_pkts) {
756 				mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
757 				mlx5e_tx_wi_consume_fifo_skbs(sq, wi, cqe, napi_budget);
758 
759 				npkts += wi->num_fifo_pkts;
760 				nbytes += wi->num_bytes;
761 			}
762 		} while (!last_wqe);
763 
764 		if (unlikely(get_cqe_opcode(cqe) == MLX5_CQE_REQ_ERR)) {
765 			if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING,
766 					      &sq->state)) {
767 				mlx5e_dump_error_cqe(&sq->cq, sq->sqn,
768 						     (struct mlx5_err_cqe *)cqe);
769 				mlx5_wq_cyc_wqe_dump(&sq->wq, ci, wi->num_wqebbs);
770 				queue_work(cq->priv->wq, &sq->recover_work);
771 			}
772 			stats->cqe_err++;
773 		}
774 
775 	} while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
776 
777 	stats->cqes += i;
778 
779 	mlx5_cqwq_update_db_record(&cq->wq);
780 
781 	/* ensure cq space is freed before enabling more cqes */
782 	wmb();
783 
784 	sq->dma_fifo_cc = dma_fifo_cc;
785 	sq->cc = sqcc;
786 
787 	netdev_tx_completed_queue(sq->txq, npkts, nbytes);
788 
789 	if (netif_tx_queue_stopped(sq->txq) &&
790 	    mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, sq->stop_room) &&
791 	    !test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) {
792 		netif_tx_wake_queue(sq->txq);
793 		stats->wake++;
794 	}
795 
796 	return (i == MLX5E_TX_CQ_POLL_BUDGET);
797 }
798 
799 static void mlx5e_tx_wi_kfree_fifo_skbs(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi)
800 {
801 	int i;
802 
803 	for (i = 0; i < wi->num_fifo_pkts; i++)
804 		dev_kfree_skb_any(mlx5e_skb_fifo_pop(&sq->db.skb_fifo));
805 }
806 
807 void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq)
808 {
809 	struct mlx5e_tx_wqe_info *wi;
810 	u32 dma_fifo_cc, nbytes = 0;
811 	u16 ci, sqcc, npkts = 0;
812 
813 	sqcc = sq->cc;
814 	dma_fifo_cc = sq->dma_fifo_cc;
815 
816 	while (sqcc != sq->pc) {
817 		ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
818 		wi = &sq->db.wqe_info[ci];
819 
820 		sqcc += wi->num_wqebbs;
821 
822 		if (likely(wi->skb)) {
823 			mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
824 			dev_kfree_skb_any(wi->skb);
825 
826 			npkts++;
827 			nbytes += wi->num_bytes;
828 			continue;
829 		}
830 
831 		if (unlikely(mlx5e_ktls_tx_try_handle_resync_dump_comp(sq, wi, &dma_fifo_cc)))
832 			continue;
833 
834 		if (wi->num_fifo_pkts) {
835 			mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
836 			mlx5e_tx_wi_kfree_fifo_skbs(sq, wi);
837 
838 			npkts += wi->num_fifo_pkts;
839 			nbytes += wi->num_bytes;
840 		}
841 	}
842 
843 	sq->dma_fifo_cc = dma_fifo_cc;
844 	sq->cc = sqcc;
845 
846 	netdev_tx_completed_queue(sq->txq, npkts, nbytes);
847 }
848 
849 #ifdef CONFIG_MLX5_CORE_IPOIB
850 static inline void
851 mlx5i_txwqe_build_datagram(struct mlx5_av *av, u32 dqpn, u32 dqkey,
852 			   struct mlx5_wqe_datagram_seg *dseg)
853 {
854 	memcpy(&dseg->av, av, sizeof(struct mlx5_av));
855 	dseg->av.dqp_dct = cpu_to_be32(dqpn | MLX5_EXTENDED_UD_AV);
856 	dseg->av.key.qkey.qkey = cpu_to_be32(dqkey);
857 }
858 
859 static void mlx5i_sq_calc_wqe_attr(struct sk_buff *skb,
860 				   const struct mlx5e_tx_attr *attr,
861 				   struct mlx5e_tx_wqe_attr *wqe_attr)
862 {
863 	u16 ds_cnt = sizeof(struct mlx5i_tx_wqe) / MLX5_SEND_WQE_DS;
864 	u16 ds_cnt_inl = 0;
865 
866 	ds_cnt += !!attr->headlen + skb_shinfo(skb)->nr_frags;
867 
868 	if (attr->ihs) {
869 		u16 inl = attr->ihs - INL_HDR_START_SZ;
870 
871 		ds_cnt_inl = DIV_ROUND_UP(inl, MLX5_SEND_WQE_DS);
872 		ds_cnt += ds_cnt_inl;
873 	}
874 
875 	*wqe_attr = (struct mlx5e_tx_wqe_attr) {
876 		.ds_cnt     = ds_cnt,
877 		.ds_cnt_inl = ds_cnt_inl,
878 		.num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS),
879 	};
880 }
881 
882 void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
883 		   struct mlx5_av *av, u32 dqpn, u32 dqkey, bool xmit_more)
884 {
885 	struct mlx5e_tx_wqe_attr wqe_attr;
886 	struct mlx5e_tx_attr attr;
887 	struct mlx5i_tx_wqe *wqe;
888 
889 	struct mlx5_wqe_datagram_seg *datagram;
890 	struct mlx5_wqe_ctrl_seg *cseg;
891 	struct mlx5_wqe_eth_seg  *eseg;
892 	struct mlx5_wqe_data_seg *dseg;
893 	struct mlx5e_tx_wqe_info *wi;
894 
895 	struct mlx5e_sq_stats *stats = sq->stats;
896 	int num_dma;
897 	u16 pi;
898 
899 	mlx5e_sq_xmit_prepare(sq, skb, NULL, &attr);
900 	mlx5i_sq_calc_wqe_attr(skb, &attr, &wqe_attr);
901 
902 	pi = mlx5e_txqsq_get_next_pi(sq, wqe_attr.num_wqebbs);
903 	wqe = MLX5I_SQ_FETCH_WQE(sq, pi);
904 
905 	stats->xmit_more += xmit_more;
906 
907 	/* fill wqe */
908 	wi       = &sq->db.wqe_info[pi];
909 	cseg     = &wqe->ctrl;
910 	datagram = &wqe->datagram;
911 	eseg     = &wqe->eth;
912 	dseg     =  wqe->data;
913 
914 	mlx5i_txwqe_build_datagram(av, dqpn, dqkey, datagram);
915 
916 	mlx5e_txwqe_build_eseg_csum(sq, skb, NULL, eseg);
917 
918 	eseg->mss = attr.mss;
919 
920 	if (attr.ihs) {
921 		memcpy(eseg->inline_hdr.start, skb->data, attr.ihs);
922 		eseg->inline_hdr.sz = cpu_to_be16(attr.ihs);
923 		dseg += wqe_attr.ds_cnt_inl;
924 	}
925 
926 	num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + attr.ihs,
927 					  attr.headlen, dseg);
928 	if (unlikely(num_dma < 0))
929 		goto err_drop;
930 
931 	mlx5e_txwqe_complete(sq, skb, &attr, &wqe_attr, num_dma, wi, cseg, xmit_more);
932 
933 	return;
934 
935 err_drop:
936 	stats->dropped++;
937 	dev_kfree_skb_any(skb);
938 }
939 #endif
940