1 /*******************************************************************************
2  *
3  * Intel Ethernet Controller XL710 Family Linux Driver
4  * Copyright(c) 2013 - 2014 Intel Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License along
16  * with this program.  If not, see <http://www.gnu.org/licenses/>.
17  *
18  * The full GNU General Public License is included in this distribution in
19  * the file called "COPYING".
20  *
21  * Contact Information:
22  * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
23  * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
24  *
25  ******************************************************************************/
26 
27 #include <linux/prefetch.h>
28 #include <net/busy_poll.h>
29 #include "i40e.h"
30 #include "i40e_prototype.h"
31 
32 static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size,
33 				u32 td_tag)
34 {
35 	return cpu_to_le64(I40E_TX_DESC_DTYPE_DATA |
36 			   ((u64)td_cmd  << I40E_TXD_QW1_CMD_SHIFT) |
37 			   ((u64)td_offset << I40E_TXD_QW1_OFFSET_SHIFT) |
38 			   ((u64)size  << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) |
39 			   ((u64)td_tag  << I40E_TXD_QW1_L2TAG1_SHIFT));
40 }
41 
42 #define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
43 #define I40E_FD_CLEAN_DELAY 10
44 /**
45  * i40e_program_fdir_filter - Program a Flow Director filter
46  * @fdir_data: Packet data that will be filter parameters
47  * @raw_packet: the pre-allocated packet buffer for FDir
48  * @pf: The PF pointer
49  * @add: True for add/update, False for remove
50  **/
51 int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, u8 *raw_packet,
52 			     struct i40e_pf *pf, bool add)
53 {
54 	struct i40e_filter_program_desc *fdir_desc;
55 	struct i40e_tx_buffer *tx_buf, *first;
56 	struct i40e_tx_desc *tx_desc;
57 	struct i40e_ring *tx_ring;
58 	unsigned int fpt, dcc;
59 	struct i40e_vsi *vsi;
60 	struct device *dev;
61 	dma_addr_t dma;
62 	u32 td_cmd = 0;
63 	u16 delay = 0;
64 	u16 i;
65 
66 	/* find existing FDIR VSI */
67 	vsi = NULL;
68 	for (i = 0; i < pf->num_alloc_vsi; i++)
69 		if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR)
70 			vsi = pf->vsi[i];
71 	if (!vsi)
72 		return -ENOENT;
73 
74 	tx_ring = vsi->tx_rings[0];
75 	dev = tx_ring->dev;
76 
77 	/* we need two descriptors to add/del a filter and we can wait */
78 	do {
79 		if (I40E_DESC_UNUSED(tx_ring) > 1)
80 			break;
81 		msleep_interruptible(1);
82 		delay++;
83 	} while (delay < I40E_FD_CLEAN_DELAY);
84 
85 	if (!(I40E_DESC_UNUSED(tx_ring) > 1))
86 		return -EAGAIN;
87 
88 	dma = dma_map_single(dev, raw_packet,
89 			     I40E_FDIR_MAX_RAW_PACKET_SIZE, DMA_TO_DEVICE);
90 	if (dma_mapping_error(dev, dma))
91 		goto dma_fail;
92 
93 	/* grab the next descriptor */
94 	i = tx_ring->next_to_use;
95 	fdir_desc = I40E_TX_FDIRDESC(tx_ring, i);
96 	first = &tx_ring->tx_bi[i];
97 	memset(first, 0, sizeof(struct i40e_tx_buffer));
98 
99 	tx_ring->next_to_use = ((i + 1) < tx_ring->count) ? i + 1 : 0;
100 
101 	fpt = (fdir_data->q_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) &
102 	      I40E_TXD_FLTR_QW0_QINDEX_MASK;
103 
104 	fpt |= (fdir_data->flex_off << I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT) &
105 	       I40E_TXD_FLTR_QW0_FLEXOFF_MASK;
106 
107 	fpt |= (fdir_data->pctype << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) &
108 	       I40E_TXD_FLTR_QW0_PCTYPE_MASK;
109 
110 	/* Use LAN VSI Id if not programmed by user */
111 	if (fdir_data->dest_vsi == 0)
112 		fpt |= (pf->vsi[pf->lan_vsi]->id) <<
113 		       I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT;
114 	else
115 		fpt |= ((u32)fdir_data->dest_vsi <<
116 			I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT) &
117 		       I40E_TXD_FLTR_QW0_DEST_VSI_MASK;
118 
119 	dcc = I40E_TX_DESC_DTYPE_FILTER_PROG;
120 
121 	if (add)
122 		dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE <<
123 		       I40E_TXD_FLTR_QW1_PCMD_SHIFT;
124 	else
125 		dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE <<
126 		       I40E_TXD_FLTR_QW1_PCMD_SHIFT;
127 
128 	dcc |= (fdir_data->dest_ctl << I40E_TXD_FLTR_QW1_DEST_SHIFT) &
129 	       I40E_TXD_FLTR_QW1_DEST_MASK;
130 
131 	dcc |= (fdir_data->fd_status << I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT) &
132 	       I40E_TXD_FLTR_QW1_FD_STATUS_MASK;
133 
134 	if (fdir_data->cnt_index != 0) {
135 		dcc |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK;
136 		dcc |= ((u32)fdir_data->cnt_index <<
137 			I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
138 			I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
139 	}
140 
141 	fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(fpt);
142 	fdir_desc->rsvd = cpu_to_le32(0);
143 	fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dcc);
144 	fdir_desc->fd_id = cpu_to_le32(fdir_data->fd_id);
145 
146 	/* Now program a dummy descriptor */
147 	i = tx_ring->next_to_use;
148 	tx_desc = I40E_TX_DESC(tx_ring, i);
149 	tx_buf = &tx_ring->tx_bi[i];
150 
151 	tx_ring->next_to_use = ((i + 1) < tx_ring->count) ? i + 1 : 0;
152 
153 	memset(tx_buf, 0, sizeof(struct i40e_tx_buffer));
154 
155 	/* record length, and DMA address */
156 	dma_unmap_len_set(tx_buf, len, I40E_FDIR_MAX_RAW_PACKET_SIZE);
157 	dma_unmap_addr_set(tx_buf, dma, dma);
158 
159 	tx_desc->buffer_addr = cpu_to_le64(dma);
160 	td_cmd = I40E_TXD_CMD | I40E_TX_DESC_CMD_DUMMY;
161 
162 	tx_buf->tx_flags = I40E_TX_FLAGS_FD_SB;
163 	tx_buf->raw_buf = (void *)raw_packet;
164 
165 	tx_desc->cmd_type_offset_bsz =
166 		build_ctob(td_cmd, 0, I40E_FDIR_MAX_RAW_PACKET_SIZE, 0);
167 
168 	/* Force memory writes to complete before letting h/w
169 	 * know there are new descriptors to fetch.
170 	 */
171 	wmb();
172 
173 	/* Mark the data descriptor to be watched */
174 	first->next_to_watch = tx_desc;
175 
176 	writel(tx_ring->next_to_use, tx_ring->tail);
177 	return 0;
178 
179 dma_fail:
180 	return -1;
181 }
182 
183 #define IP_HEADER_OFFSET 14
184 #define I40E_UDPIP_DUMMY_PACKET_LEN 42
185 /**
186  * i40e_add_del_fdir_udpv4 - Add/Remove UDPv4 filters
187  * @vsi: pointer to the targeted VSI
188  * @fd_data: the flow director data required for the FDir descriptor
189  * @add: true adds a filter, false removes it
190  *
191  * Returns 0 if the filters were successfully added or removed
192  **/
193 static int i40e_add_del_fdir_udpv4(struct i40e_vsi *vsi,
194 				   struct i40e_fdir_filter *fd_data,
195 				   bool add)
196 {
197 	struct i40e_pf *pf = vsi->back;
198 	struct udphdr *udp;
199 	struct iphdr *ip;
200 	bool err = false;
201 	u8 *raw_packet;
202 	int ret;
203 	static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
204 		0x45, 0, 0, 0x1c, 0, 0, 0x40, 0, 0x40, 0x11, 0, 0, 0, 0, 0, 0,
205 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
206 
207 	raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
208 	if (!raw_packet)
209 		return -ENOMEM;
210 	memcpy(raw_packet, packet, I40E_UDPIP_DUMMY_PACKET_LEN);
211 
212 	ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
213 	udp = (struct udphdr *)(raw_packet + IP_HEADER_OFFSET
214 	      + sizeof(struct iphdr));
215 
216 	ip->daddr = fd_data->dst_ip[0];
217 	udp->dest = fd_data->dst_port;
218 	ip->saddr = fd_data->src_ip[0];
219 	udp->source = fd_data->src_port;
220 
221 	fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_UDP;
222 	ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
223 	if (ret) {
224 		dev_info(&pf->pdev->dev,
225 			 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
226 			 fd_data->pctype, fd_data->fd_id, ret);
227 		err = true;
228 	} else if (I40E_DEBUG_FD & pf->hw.debug_mask) {
229 		if (add)
230 			dev_info(&pf->pdev->dev,
231 				 "Filter OK for PCTYPE %d loc = %d\n",
232 				 fd_data->pctype, fd_data->fd_id);
233 		else
234 			dev_info(&pf->pdev->dev,
235 				 "Filter deleted for PCTYPE %d loc = %d\n",
236 				 fd_data->pctype, fd_data->fd_id);
237 	}
238 	return err ? -EOPNOTSUPP : 0;
239 }
240 
241 #define I40E_TCPIP_DUMMY_PACKET_LEN 54
242 /**
243  * i40e_add_del_fdir_tcpv4 - Add/Remove TCPv4 filters
244  * @vsi: pointer to the targeted VSI
245  * @fd_data: the flow director data required for the FDir descriptor
246  * @add: true adds a filter, false removes it
247  *
248  * Returns 0 if the filters were successfully added or removed
249  **/
250 static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi,
251 				   struct i40e_fdir_filter *fd_data,
252 				   bool add)
253 {
254 	struct i40e_pf *pf = vsi->back;
255 	struct tcphdr *tcp;
256 	struct iphdr *ip;
257 	bool err = false;
258 	u8 *raw_packet;
259 	int ret;
260 	/* Dummy packet */
261 	static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
262 		0x45, 0, 0, 0x28, 0, 0, 0x40, 0, 0x40, 0x6, 0, 0, 0, 0, 0, 0,
263 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x80, 0x11,
264 		0x0, 0x72, 0, 0, 0, 0};
265 
266 	raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
267 	if (!raw_packet)
268 		return -ENOMEM;
269 	memcpy(raw_packet, packet, I40E_TCPIP_DUMMY_PACKET_LEN);
270 
271 	ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
272 	tcp = (struct tcphdr *)(raw_packet + IP_HEADER_OFFSET
273 	      + sizeof(struct iphdr));
274 
275 	ip->daddr = fd_data->dst_ip[0];
276 	tcp->dest = fd_data->dst_port;
277 	ip->saddr = fd_data->src_ip[0];
278 	tcp->source = fd_data->src_port;
279 
280 	if (add) {
281 		pf->fd_tcp_rule++;
282 		if (pf->flags & I40E_FLAG_FD_ATR_ENABLED) {
283 			if (I40E_DEBUG_FD & pf->hw.debug_mask)
284 				dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n");
285 			pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED;
286 		}
287 	} else {
288 		pf->fd_tcp_rule = (pf->fd_tcp_rule > 0) ?
289 				  (pf->fd_tcp_rule - 1) : 0;
290 		if (pf->fd_tcp_rule == 0) {
291 			pf->flags |= I40E_FLAG_FD_ATR_ENABLED;
292 			if (I40E_DEBUG_FD & pf->hw.debug_mask)
293 				dev_info(&pf->pdev->dev, "ATR re-enabled due to no sideband TCP/IPv4 rules\n");
294 		}
295 	}
296 
297 	fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP;
298 	ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
299 
300 	if (ret) {
301 		dev_info(&pf->pdev->dev,
302 			 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
303 			 fd_data->pctype, fd_data->fd_id, ret);
304 		err = true;
305 	} else if (I40E_DEBUG_FD & pf->hw.debug_mask) {
306 		if (add)
307 			dev_info(&pf->pdev->dev, "Filter OK for PCTYPE %d loc = %d)\n",
308 				 fd_data->pctype, fd_data->fd_id);
309 		else
310 			dev_info(&pf->pdev->dev,
311 				 "Filter deleted for PCTYPE %d loc = %d\n",
312 				 fd_data->pctype, fd_data->fd_id);
313 	}
314 
315 	return err ? -EOPNOTSUPP : 0;
316 }
317 
318 /**
319  * i40e_add_del_fdir_sctpv4 - Add/Remove SCTPv4 Flow Director filters for
320  * a specific flow spec
321  * @vsi: pointer to the targeted VSI
322  * @fd_data: the flow director data required for the FDir descriptor
323  * @add: true adds a filter, false removes it
324  *
325  * Always returns -EOPNOTSUPP
326  **/
327 static int i40e_add_del_fdir_sctpv4(struct i40e_vsi *vsi,
328 				    struct i40e_fdir_filter *fd_data,
329 				    bool add)
330 {
331 	return -EOPNOTSUPP;
332 }
333 
334 #define I40E_IP_DUMMY_PACKET_LEN 34
335 /**
336  * i40e_add_del_fdir_ipv4 - Add/Remove IPv4 Flow Director filters for
337  * a specific flow spec
338  * @vsi: pointer to the targeted VSI
339  * @fd_data: the flow director data required for the FDir descriptor
340  * @add: true adds a filter, false removes it
341  *
342  * Returns 0 if the filters were successfully added or removed
343  **/
344 static int i40e_add_del_fdir_ipv4(struct i40e_vsi *vsi,
345 				  struct i40e_fdir_filter *fd_data,
346 				  bool add)
347 {
348 	struct i40e_pf *pf = vsi->back;
349 	struct iphdr *ip;
350 	bool err = false;
351 	u8 *raw_packet;
352 	int ret;
353 	int i;
354 	static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
355 		0x45, 0, 0, 0x14, 0, 0, 0x40, 0, 0x40, 0x10, 0, 0, 0, 0, 0, 0,
356 		0, 0, 0, 0};
357 
358 	for (i = I40E_FILTER_PCTYPE_NONF_IPV4_OTHER;
359 	     i <= I40E_FILTER_PCTYPE_FRAG_IPV4;	i++) {
360 		raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
361 		if (!raw_packet)
362 			return -ENOMEM;
363 		memcpy(raw_packet, packet, I40E_IP_DUMMY_PACKET_LEN);
364 		ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
365 
366 		ip->saddr = fd_data->src_ip[0];
367 		ip->daddr = fd_data->dst_ip[0];
368 		ip->protocol = 0;
369 
370 		fd_data->pctype = i;
371 		ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
372 
373 		if (ret) {
374 			dev_info(&pf->pdev->dev,
375 				 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
376 				 fd_data->pctype, fd_data->fd_id, ret);
377 			err = true;
378 		} else if (I40E_DEBUG_FD & pf->hw.debug_mask) {
379 			if (add)
380 				dev_info(&pf->pdev->dev,
381 					 "Filter OK for PCTYPE %d loc = %d\n",
382 					 fd_data->pctype, fd_data->fd_id);
383 			else
384 				dev_info(&pf->pdev->dev,
385 					 "Filter deleted for PCTYPE %d loc = %d\n",
386 					 fd_data->pctype, fd_data->fd_id);
387 		}
388 	}
389 
390 	return err ? -EOPNOTSUPP : 0;
391 }
392 
393 /**
394  * i40e_add_del_fdir - Build raw packets to add/del fdir filter
395  * @vsi: pointer to the targeted VSI
396  * @cmd: command to get or set RX flow classification rules
397  * @add: true adds a filter, false removes it
398  *
399  **/
400 int i40e_add_del_fdir(struct i40e_vsi *vsi,
401 		      struct i40e_fdir_filter *input, bool add)
402 {
403 	struct i40e_pf *pf = vsi->back;
404 	int ret;
405 
406 	switch (input->flow_type & ~FLOW_EXT) {
407 	case TCP_V4_FLOW:
408 		ret = i40e_add_del_fdir_tcpv4(vsi, input, add);
409 		break;
410 	case UDP_V4_FLOW:
411 		ret = i40e_add_del_fdir_udpv4(vsi, input, add);
412 		break;
413 	case SCTP_V4_FLOW:
414 		ret = i40e_add_del_fdir_sctpv4(vsi, input, add);
415 		break;
416 	case IPV4_FLOW:
417 		ret = i40e_add_del_fdir_ipv4(vsi, input, add);
418 		break;
419 	case IP_USER_FLOW:
420 		switch (input->ip4_proto) {
421 		case IPPROTO_TCP:
422 			ret = i40e_add_del_fdir_tcpv4(vsi, input, add);
423 			break;
424 		case IPPROTO_UDP:
425 			ret = i40e_add_del_fdir_udpv4(vsi, input, add);
426 			break;
427 		case IPPROTO_SCTP:
428 			ret = i40e_add_del_fdir_sctpv4(vsi, input, add);
429 			break;
430 		default:
431 			ret = i40e_add_del_fdir_ipv4(vsi, input, add);
432 			break;
433 		}
434 		break;
435 	default:
436 		dev_info(&pf->pdev->dev, "Could not specify spec type %d\n",
437 			 input->flow_type);
438 		ret = -EINVAL;
439 	}
440 
441 	/* The buffer allocated here is freed by the i40e_clean_tx_ring() */
442 	return ret;
443 }
444 
445 /**
446  * i40e_fd_handle_status - check the Programming Status for FD
447  * @rx_ring: the Rx ring for this descriptor
448  * @rx_desc: the Rx descriptor for programming Status, not a packet descriptor.
449  * @prog_id: the id originally used for programming
450  *
451  * This is used to verify if the FD programming or invalidation
452  * requested by SW to the HW is successful or not and take actions accordingly.
453  **/
454 static void i40e_fd_handle_status(struct i40e_ring *rx_ring,
455 				  union i40e_rx_desc *rx_desc, u8 prog_id)
456 {
457 	struct i40e_pf *pf = rx_ring->vsi->back;
458 	struct pci_dev *pdev = pf->pdev;
459 	u32 fcnt_prog, fcnt_avail;
460 	u32 error;
461 	u64 qw;
462 
463 	qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
464 	error = (qw & I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK) >>
465 		I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT;
466 
467 	if (error == BIT(I40E_RX_PROG_STATUS_DESC_FD_TBL_FULL_SHIFT)) {
468 		if ((rx_desc->wb.qword0.hi_dword.fd_id != 0) ||
469 		    (I40E_DEBUG_FD & pf->hw.debug_mask))
470 			dev_warn(&pdev->dev, "ntuple filter loc = %d, could not be added\n",
471 				 rx_desc->wb.qword0.hi_dword.fd_id);
472 
473 		/* Check if the programming error is for ATR.
474 		 * If so, auto disable ATR and set a state for
475 		 * flush in progress. Next time we come here if flush is in
476 		 * progress do nothing, once flush is complete the state will
477 		 * be cleared.
478 		 */
479 		if (test_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state))
480 			return;
481 
482 		pf->fd_add_err++;
483 		/* store the current atr filter count */
484 		pf->fd_atr_cnt = i40e_get_current_atr_cnt(pf);
485 
486 		if ((rx_desc->wb.qword0.hi_dword.fd_id == 0) &&
487 		    (pf->auto_disable_flags & I40E_FLAG_FD_SB_ENABLED)) {
488 			pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED;
489 			set_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state);
490 		}
491 
492 		/* filter programming failed most likely due to table full */
493 		fcnt_prog = i40e_get_global_fd_count(pf);
494 		fcnt_avail = pf->fdir_pf_filter_count;
495 		/* If ATR is running fcnt_prog can quickly change,
496 		 * if we are very close to full, it makes sense to disable
497 		 * FD ATR/SB and then re-enable it when there is room.
498 		 */
499 		if (fcnt_prog >= (fcnt_avail - I40E_FDIR_BUFFER_FULL_MARGIN)) {
500 			if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) &&
501 			    !(pf->auto_disable_flags &
502 				     I40E_FLAG_FD_SB_ENABLED)) {
503 				if (I40E_DEBUG_FD & pf->hw.debug_mask)
504 					dev_warn(&pdev->dev, "FD filter space full, new ntuple rules will not be added\n");
505 				pf->auto_disable_flags |=
506 							I40E_FLAG_FD_SB_ENABLED;
507 			}
508 		} else {
509 			dev_info(&pdev->dev,
510 				"FD filter programming failed due to incorrect filter parameters\n");
511 		}
512 	} else if (error == BIT(I40E_RX_PROG_STATUS_DESC_NO_FD_ENTRY_SHIFT)) {
513 		if (I40E_DEBUG_FD & pf->hw.debug_mask)
514 			dev_info(&pdev->dev, "ntuple filter fd_id = %d, could not be removed\n",
515 				 rx_desc->wb.qword0.hi_dword.fd_id);
516 	}
517 }
518 
519 /**
520  * i40e_unmap_and_free_tx_resource - Release a Tx buffer
521  * @ring:      the ring that owns the buffer
522  * @tx_buffer: the buffer to free
523  **/
524 static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring,
525 					    struct i40e_tx_buffer *tx_buffer)
526 {
527 	if (tx_buffer->skb) {
528 		if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB)
529 			kfree(tx_buffer->raw_buf);
530 		else
531 			dev_kfree_skb_any(tx_buffer->skb);
532 
533 		if (dma_unmap_len(tx_buffer, len))
534 			dma_unmap_single(ring->dev,
535 					 dma_unmap_addr(tx_buffer, dma),
536 					 dma_unmap_len(tx_buffer, len),
537 					 DMA_TO_DEVICE);
538 	} else if (dma_unmap_len(tx_buffer, len)) {
539 		dma_unmap_page(ring->dev,
540 			       dma_unmap_addr(tx_buffer, dma),
541 			       dma_unmap_len(tx_buffer, len),
542 			       DMA_TO_DEVICE);
543 	}
544 	tx_buffer->next_to_watch = NULL;
545 	tx_buffer->skb = NULL;
546 	dma_unmap_len_set(tx_buffer, len, 0);
547 	/* tx_buffer must be completely set up in the transmit path */
548 }
549 
550 /**
551  * i40e_clean_tx_ring - Free any empty Tx buffers
552  * @tx_ring: ring to be cleaned
553  **/
554 void i40e_clean_tx_ring(struct i40e_ring *tx_ring)
555 {
556 	unsigned long bi_size;
557 	u16 i;
558 
559 	/* ring already cleared, nothing to do */
560 	if (!tx_ring->tx_bi)
561 		return;
562 
563 	/* Free all the Tx ring sk_buffs */
564 	for (i = 0; i < tx_ring->count; i++)
565 		i40e_unmap_and_free_tx_resource(tx_ring, &tx_ring->tx_bi[i]);
566 
567 	bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
568 	memset(tx_ring->tx_bi, 0, bi_size);
569 
570 	/* Zero out the descriptor ring */
571 	memset(tx_ring->desc, 0, tx_ring->size);
572 
573 	tx_ring->next_to_use = 0;
574 	tx_ring->next_to_clean = 0;
575 
576 	if (!tx_ring->netdev)
577 		return;
578 
579 	/* cleanup Tx queue statistics */
580 	netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev,
581 						  tx_ring->queue_index));
582 }
583 
584 /**
585  * i40e_free_tx_resources - Free Tx resources per queue
586  * @tx_ring: Tx descriptor ring for a specific queue
587  *
588  * Free all transmit software resources
589  **/
590 void i40e_free_tx_resources(struct i40e_ring *tx_ring)
591 {
592 	i40e_clean_tx_ring(tx_ring);
593 	kfree(tx_ring->tx_bi);
594 	tx_ring->tx_bi = NULL;
595 
596 	if (tx_ring->desc) {
597 		dma_free_coherent(tx_ring->dev, tx_ring->size,
598 				  tx_ring->desc, tx_ring->dma);
599 		tx_ring->desc = NULL;
600 	}
601 }
602 
603 /**
604  * i40e_get_head - Retrieve head from head writeback
605  * @tx_ring:  tx ring to fetch head of
606  *
607  * Returns value of Tx ring head based on value stored
608  * in head write-back location
609  **/
610 static inline u32 i40e_get_head(struct i40e_ring *tx_ring)
611 {
612 	void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count;
613 
614 	return le32_to_cpu(*(volatile __le32 *)head);
615 }
616 
617 /**
618  * i40e_get_tx_pending - how many tx descriptors not processed
619  * @tx_ring: the ring of descriptors
620  *
621  * Since there is no access to the ring head register
622  * in XL710, we need to use our local copies
623  **/
624 static u32 i40e_get_tx_pending(struct i40e_ring *ring)
625 {
626 	u32 head, tail;
627 
628 	head = i40e_get_head(ring);
629 	tail = readl(ring->tail);
630 
631 	if (head != tail)
632 		return (head < tail) ?
633 			tail - head : (tail + ring->count - head);
634 
635 	return 0;
636 }
637 
638 /**
639  * i40e_check_tx_hang - Is there a hang in the Tx queue
640  * @tx_ring: the ring of descriptors
641  **/
642 static bool i40e_check_tx_hang(struct i40e_ring *tx_ring)
643 {
644 	u32 tx_done = tx_ring->stats.packets;
645 	u32 tx_done_old = tx_ring->tx_stats.tx_done_old;
646 	u32 tx_pending = i40e_get_tx_pending(tx_ring);
647 	struct i40e_pf *pf = tx_ring->vsi->back;
648 	bool ret = false;
649 
650 	clear_check_for_tx_hang(tx_ring);
651 
652 	/* Check for a hung queue, but be thorough. This verifies
653 	 * that a transmit has been completed since the previous
654 	 * check AND there is at least one packet pending. The
655 	 * ARMED bit is set to indicate a potential hang. The
656 	 * bit is cleared if a pause frame is received to remove
657 	 * false hang detection due to PFC or 802.3x frames. By
658 	 * requiring this to fail twice we avoid races with
659 	 * PFC clearing the ARMED bit and conditions where we
660 	 * run the check_tx_hang logic with a transmit completion
661 	 * pending but without time to complete it yet.
662 	 */
663 	if ((tx_done_old == tx_done) && tx_pending) {
664 		/* make sure it is true for two checks in a row */
665 		ret = test_and_set_bit(__I40E_HANG_CHECK_ARMED,
666 				       &tx_ring->state);
667 	} else if (tx_done_old == tx_done &&
668 		   (tx_pending < I40E_MIN_DESC_PENDING) && (tx_pending > 0)) {
669 		if (I40E_DEBUG_FLOW & pf->hw.debug_mask)
670 			dev_info(tx_ring->dev, "HW needs some more descs to do a cacheline flush. tx_pending %d, queue %d",
671 				 tx_pending, tx_ring->queue_index);
672 		pf->tx_sluggish_count++;
673 	} else {
674 		/* update completed stats and disarm the hang check */
675 		tx_ring->tx_stats.tx_done_old = tx_done;
676 		clear_bit(__I40E_HANG_CHECK_ARMED, &tx_ring->state);
677 	}
678 
679 	return ret;
680 }
681 
682 #define WB_STRIDE 0x3
683 
684 /**
685  * i40e_clean_tx_irq - Reclaim resources after transmit completes
686  * @tx_ring:  tx ring to clean
687  * @budget:   how many cleans we're allowed
688  *
689  * Returns true if there's any budget left (e.g. the clean is finished)
690  **/
691 static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
692 {
693 	u16 i = tx_ring->next_to_clean;
694 	struct i40e_tx_buffer *tx_buf;
695 	struct i40e_tx_desc *tx_head;
696 	struct i40e_tx_desc *tx_desc;
697 	unsigned int total_packets = 0;
698 	unsigned int total_bytes = 0;
699 
700 	tx_buf = &tx_ring->tx_bi[i];
701 	tx_desc = I40E_TX_DESC(tx_ring, i);
702 	i -= tx_ring->count;
703 
704 	tx_head = I40E_TX_DESC(tx_ring, i40e_get_head(tx_ring));
705 
706 	do {
707 		struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch;
708 
709 		/* if next_to_watch is not set then there is no work pending */
710 		if (!eop_desc)
711 			break;
712 
713 		/* prevent any other reads prior to eop_desc */
714 		read_barrier_depends();
715 
716 		/* we have caught up to head, no work left to do */
717 		if (tx_head == tx_desc)
718 			break;
719 
720 		/* clear next_to_watch to prevent false hangs */
721 		tx_buf->next_to_watch = NULL;
722 
723 		/* update the statistics for this packet */
724 		total_bytes += tx_buf->bytecount;
725 		total_packets += tx_buf->gso_segs;
726 
727 		/* free the skb */
728 		dev_consume_skb_any(tx_buf->skb);
729 
730 		/* unmap skb header data */
731 		dma_unmap_single(tx_ring->dev,
732 				 dma_unmap_addr(tx_buf, dma),
733 				 dma_unmap_len(tx_buf, len),
734 				 DMA_TO_DEVICE);
735 
736 		/* clear tx_buffer data */
737 		tx_buf->skb = NULL;
738 		dma_unmap_len_set(tx_buf, len, 0);
739 
740 		/* unmap remaining buffers */
741 		while (tx_desc != eop_desc) {
742 
743 			tx_buf++;
744 			tx_desc++;
745 			i++;
746 			if (unlikely(!i)) {
747 				i -= tx_ring->count;
748 				tx_buf = tx_ring->tx_bi;
749 				tx_desc = I40E_TX_DESC(tx_ring, 0);
750 			}
751 
752 			/* unmap any remaining paged data */
753 			if (dma_unmap_len(tx_buf, len)) {
754 				dma_unmap_page(tx_ring->dev,
755 					       dma_unmap_addr(tx_buf, dma),
756 					       dma_unmap_len(tx_buf, len),
757 					       DMA_TO_DEVICE);
758 				dma_unmap_len_set(tx_buf, len, 0);
759 			}
760 		}
761 
762 		/* move us one more past the eop_desc for start of next pkt */
763 		tx_buf++;
764 		tx_desc++;
765 		i++;
766 		if (unlikely(!i)) {
767 			i -= tx_ring->count;
768 			tx_buf = tx_ring->tx_bi;
769 			tx_desc = I40E_TX_DESC(tx_ring, 0);
770 		}
771 
772 		prefetch(tx_desc);
773 
774 		/* update budget accounting */
775 		budget--;
776 	} while (likely(budget));
777 
778 	i += tx_ring->count;
779 	tx_ring->next_to_clean = i;
780 	u64_stats_update_begin(&tx_ring->syncp);
781 	tx_ring->stats.bytes += total_bytes;
782 	tx_ring->stats.packets += total_packets;
783 	u64_stats_update_end(&tx_ring->syncp);
784 	tx_ring->q_vector->tx.total_bytes += total_bytes;
785 	tx_ring->q_vector->tx.total_packets += total_packets;
786 
787 	/* check to see if there are any non-cache aligned descriptors
788 	 * waiting to be written back, and kick the hardware to force
789 	 * them to be written back in case of napi polling
790 	 */
791 	if (budget &&
792 	    !((i & WB_STRIDE) == WB_STRIDE) &&
793 	    !test_bit(__I40E_DOWN, &tx_ring->vsi->state) &&
794 	    (I40E_DESC_UNUSED(tx_ring) != tx_ring->count))
795 		tx_ring->arm_wb = true;
796 	else
797 		tx_ring->arm_wb = false;
798 
799 	if (check_for_tx_hang(tx_ring) && i40e_check_tx_hang(tx_ring)) {
800 		/* schedule immediate reset if we believe we hung */
801 		dev_info(tx_ring->dev, "Detected Tx Unit Hang\n"
802 			 "  VSI                  <%d>\n"
803 			 "  Tx Queue             <%d>\n"
804 			 "  next_to_use          <%x>\n"
805 			 "  next_to_clean        <%x>\n",
806 			 tx_ring->vsi->seid,
807 			 tx_ring->queue_index,
808 			 tx_ring->next_to_use, i);
809 
810 		netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
811 
812 		dev_info(tx_ring->dev,
813 			 "tx hang detected on queue %d, reset requested\n",
814 			 tx_ring->queue_index);
815 
816 		/* do not fire the reset immediately, wait for the stack to
817 		 * decide we are truly stuck, also prevents every queue from
818 		 * simultaneously requesting a reset
819 		 */
820 
821 		/* the adapter is about to reset, no point in enabling polling */
822 		budget = 1;
823 	}
824 
825 	netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev,
826 						      tx_ring->queue_index),
827 				  total_packets, total_bytes);
828 
829 #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
830 	if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) &&
831 		     (I40E_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) {
832 		/* Make sure that anybody stopping the queue after this
833 		 * sees the new next_to_clean.
834 		 */
835 		smp_mb();
836 		if (__netif_subqueue_stopped(tx_ring->netdev,
837 					     tx_ring->queue_index) &&
838 		   !test_bit(__I40E_DOWN, &tx_ring->vsi->state)) {
839 			netif_wake_subqueue(tx_ring->netdev,
840 					    tx_ring->queue_index);
841 			++tx_ring->tx_stats.restart_queue;
842 		}
843 	}
844 
845 	return !!budget;
846 }
847 
848 /**
849  * i40e_force_wb - Arm hardware to do a wb on noncache aligned descriptors
850  * @vsi: the VSI we care about
851  * @q_vector: the vector  on which to force writeback
852  *
853  **/
854 static void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
855 {
856 	u16 flags = q_vector->tx.ring[0].flags;
857 
858 	if (flags & I40E_TXR_FLAGS_WB_ON_ITR) {
859 		u32 val;
860 
861 		if (q_vector->arm_wb_state)
862 			return;
863 
864 		val = I40E_PFINT_DYN_CTLN_WB_ON_ITR_MASK;
865 
866 		wr32(&vsi->back->hw,
867 		     I40E_PFINT_DYN_CTLN(q_vector->v_idx +
868 					 vsi->base_vector - 1),
869 		     val);
870 		q_vector->arm_wb_state = true;
871 	} else if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
872 		u32 val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
873 			  I40E_PFINT_DYN_CTLN_ITR_INDX_MASK | /* set noitr */
874 			  I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK |
875 			  I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK;
876 			  /* allow 00 to be written to the index */
877 
878 		wr32(&vsi->back->hw,
879 		     I40E_PFINT_DYN_CTLN(q_vector->v_idx +
880 					 vsi->base_vector - 1), val);
881 	} else {
882 		u32 val = I40E_PFINT_DYN_CTL0_INTENA_MASK |
883 			  I40E_PFINT_DYN_CTL0_ITR_INDX_MASK | /* set noitr */
884 			  I40E_PFINT_DYN_CTL0_SWINT_TRIG_MASK |
885 			  I40E_PFINT_DYN_CTL0_SW_ITR_INDX_ENA_MASK;
886 			/* allow 00 to be written to the index */
887 
888 		wr32(&vsi->back->hw, I40E_PFINT_DYN_CTL0, val);
889 	}
890 }
891 
892 /**
893  * i40e_set_new_dynamic_itr - Find new ITR level
894  * @rc: structure containing ring performance data
895  *
896  * Stores a new ITR value based on packets and byte counts during
897  * the last interrupt.  The advantage of per interrupt computation
898  * is faster updates and more accurate ITR for the current traffic
899  * pattern.  Constants in this function were computed based on
900  * theoretical maximum wire speed and thresholds were set based on
901  * testing data as well as attempting to minimize response time
902  * while increasing bulk throughput.
903  **/
904 static void i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
905 {
906 	enum i40e_latency_range new_latency_range = rc->latency_range;
907 	u32 new_itr = rc->itr;
908 	int bytes_per_int;
909 
910 	if (rc->total_packets == 0 || !rc->itr)
911 		return;
912 
913 	/* simple throttlerate management
914 	 *   0-10MB/s   lowest (100000 ints/s)
915 	 *  10-20MB/s   low    (20000 ints/s)
916 	 *  20-1249MB/s bulk   (8000 ints/s)
917 	 */
918 	bytes_per_int = rc->total_bytes / rc->itr;
919 	switch (new_latency_range) {
920 	case I40E_LOWEST_LATENCY:
921 		if (bytes_per_int > 10)
922 			new_latency_range = I40E_LOW_LATENCY;
923 		break;
924 	case I40E_LOW_LATENCY:
925 		if (bytes_per_int > 20)
926 			new_latency_range = I40E_BULK_LATENCY;
927 		else if (bytes_per_int <= 10)
928 			new_latency_range = I40E_LOWEST_LATENCY;
929 		break;
930 	case I40E_BULK_LATENCY:
931 		if (bytes_per_int <= 20)
932 			new_latency_range = I40E_LOW_LATENCY;
933 		break;
934 	default:
935 		if (bytes_per_int <= 20)
936 			new_latency_range = I40E_LOW_LATENCY;
937 		break;
938 	}
939 	rc->latency_range = new_latency_range;
940 
941 	switch (new_latency_range) {
942 	case I40E_LOWEST_LATENCY:
943 		new_itr = I40E_ITR_100K;
944 		break;
945 	case I40E_LOW_LATENCY:
946 		new_itr = I40E_ITR_20K;
947 		break;
948 	case I40E_BULK_LATENCY:
949 		new_itr = I40E_ITR_8K;
950 		break;
951 	default:
952 		break;
953 	}
954 
955 	if (new_itr != rc->itr)
956 		rc->itr = new_itr;
957 
958 	rc->total_bytes = 0;
959 	rc->total_packets = 0;
960 }
961 
962 /**
963  * i40e_clean_programming_status - clean the programming status descriptor
964  * @rx_ring: the rx ring that has this descriptor
965  * @rx_desc: the rx descriptor written back by HW
966  *
967  * Flow director should handle FD_FILTER_STATUS to check its filter programming
968  * status being successful or not and take actions accordingly. FCoE should
969  * handle its context/filter programming/invalidation status and take actions.
970  *
971  **/
972 static void i40e_clean_programming_status(struct i40e_ring *rx_ring,
973 					  union i40e_rx_desc *rx_desc)
974 {
975 	u64 qw;
976 	u8 id;
977 
978 	qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
979 	id = (qw & I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK) >>
980 		  I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT;
981 
982 	if (id == I40E_RX_PROG_STATUS_DESC_FD_FILTER_STATUS)
983 		i40e_fd_handle_status(rx_ring, rx_desc, id);
984 #ifdef I40E_FCOE
985 	else if ((id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_PROG_STATUS) ||
986 		 (id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_INVL_STATUS))
987 		i40e_fcoe_handle_status(rx_ring, rx_desc, id);
988 #endif
989 }
990 
991 /**
992  * i40e_setup_tx_descriptors - Allocate the Tx descriptors
993  * @tx_ring: the tx ring to set up
994  *
995  * Return 0 on success, negative on error
996  **/
997 int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring)
998 {
999 	struct device *dev = tx_ring->dev;
1000 	int bi_size;
1001 
1002 	if (!dev)
1003 		return -ENOMEM;
1004 
1005 	bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
1006 	tx_ring->tx_bi = kzalloc(bi_size, GFP_KERNEL);
1007 	if (!tx_ring->tx_bi)
1008 		goto err;
1009 
1010 	/* round up to nearest 4K */
1011 	tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc);
1012 	/* add u32 for head writeback, align after this takes care of
1013 	 * guaranteeing this is at least one cache line in size
1014 	 */
1015 	tx_ring->size += sizeof(u32);
1016 	tx_ring->size = ALIGN(tx_ring->size, 4096);
1017 	tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
1018 					   &tx_ring->dma, GFP_KERNEL);
1019 	if (!tx_ring->desc) {
1020 		dev_info(dev, "Unable to allocate memory for the Tx descriptor ring, size=%d\n",
1021 			 tx_ring->size);
1022 		goto err;
1023 	}
1024 
1025 	tx_ring->next_to_use = 0;
1026 	tx_ring->next_to_clean = 0;
1027 	return 0;
1028 
1029 err:
1030 	kfree(tx_ring->tx_bi);
1031 	tx_ring->tx_bi = NULL;
1032 	return -ENOMEM;
1033 }
1034 
1035 /**
1036  * i40e_clean_rx_ring - Free Rx buffers
1037  * @rx_ring: ring to be cleaned
1038  **/
1039 void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
1040 {
1041 	struct device *dev = rx_ring->dev;
1042 	struct i40e_rx_buffer *rx_bi;
1043 	unsigned long bi_size;
1044 	u16 i;
1045 
1046 	/* ring already cleared, nothing to do */
1047 	if (!rx_ring->rx_bi)
1048 		return;
1049 
1050 	if (ring_is_ps_enabled(rx_ring)) {
1051 		int bufsz = ALIGN(rx_ring->rx_hdr_len, 256) * rx_ring->count;
1052 
1053 		rx_bi = &rx_ring->rx_bi[0];
1054 		if (rx_bi->hdr_buf) {
1055 			dma_free_coherent(dev,
1056 					  bufsz,
1057 					  rx_bi->hdr_buf,
1058 					  rx_bi->dma);
1059 			for (i = 0; i < rx_ring->count; i++) {
1060 				rx_bi = &rx_ring->rx_bi[i];
1061 				rx_bi->dma = 0;
1062 				rx_bi->hdr_buf = NULL;
1063 			}
1064 		}
1065 	}
1066 	/* Free all the Rx ring sk_buffs */
1067 	for (i = 0; i < rx_ring->count; i++) {
1068 		rx_bi = &rx_ring->rx_bi[i];
1069 		if (rx_bi->dma) {
1070 			dma_unmap_single(dev,
1071 					 rx_bi->dma,
1072 					 rx_ring->rx_buf_len,
1073 					 DMA_FROM_DEVICE);
1074 			rx_bi->dma = 0;
1075 		}
1076 		if (rx_bi->skb) {
1077 			dev_kfree_skb(rx_bi->skb);
1078 			rx_bi->skb = NULL;
1079 		}
1080 		if (rx_bi->page) {
1081 			if (rx_bi->page_dma) {
1082 				dma_unmap_page(dev,
1083 					       rx_bi->page_dma,
1084 					       PAGE_SIZE / 2,
1085 					       DMA_FROM_DEVICE);
1086 				rx_bi->page_dma = 0;
1087 			}
1088 			__free_page(rx_bi->page);
1089 			rx_bi->page = NULL;
1090 			rx_bi->page_offset = 0;
1091 		}
1092 	}
1093 
1094 	bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
1095 	memset(rx_ring->rx_bi, 0, bi_size);
1096 
1097 	/* Zero out the descriptor ring */
1098 	memset(rx_ring->desc, 0, rx_ring->size);
1099 
1100 	rx_ring->next_to_clean = 0;
1101 	rx_ring->next_to_use = 0;
1102 }
1103 
1104 /**
1105  * i40e_free_rx_resources - Free Rx resources
1106  * @rx_ring: ring to clean the resources from
1107  *
1108  * Free all receive software resources
1109  **/
1110 void i40e_free_rx_resources(struct i40e_ring *rx_ring)
1111 {
1112 	i40e_clean_rx_ring(rx_ring);
1113 	kfree(rx_ring->rx_bi);
1114 	rx_ring->rx_bi = NULL;
1115 
1116 	if (rx_ring->desc) {
1117 		dma_free_coherent(rx_ring->dev, rx_ring->size,
1118 				  rx_ring->desc, rx_ring->dma);
1119 		rx_ring->desc = NULL;
1120 	}
1121 }
1122 
1123 /**
1124  * i40e_alloc_rx_headers - allocate rx header buffers
1125  * @rx_ring: ring to alloc buffers
1126  *
1127  * Allocate rx header buffers for the entire ring. As these are static,
1128  * this is only called when setting up a new ring.
1129  **/
1130 void i40e_alloc_rx_headers(struct i40e_ring *rx_ring)
1131 {
1132 	struct device *dev = rx_ring->dev;
1133 	struct i40e_rx_buffer *rx_bi;
1134 	dma_addr_t dma;
1135 	void *buffer;
1136 	int buf_size;
1137 	int i;
1138 
1139 	if (rx_ring->rx_bi[0].hdr_buf)
1140 		return;
1141 	/* Make sure the buffers don't cross cache line boundaries. */
1142 	buf_size = ALIGN(rx_ring->rx_hdr_len, 256);
1143 	buffer = dma_alloc_coherent(dev, buf_size * rx_ring->count,
1144 				    &dma, GFP_KERNEL);
1145 	if (!buffer)
1146 		return;
1147 	for (i = 0; i < rx_ring->count; i++) {
1148 		rx_bi = &rx_ring->rx_bi[i];
1149 		rx_bi->dma = dma + (i * buf_size);
1150 		rx_bi->hdr_buf = buffer + (i * buf_size);
1151 	}
1152 }
1153 
1154 /**
1155  * i40e_setup_rx_descriptors - Allocate Rx descriptors
1156  * @rx_ring: Rx descriptor ring (for a specific queue) to setup
1157  *
1158  * Returns 0 on success, negative on failure
1159  **/
1160 int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
1161 {
1162 	struct device *dev = rx_ring->dev;
1163 	int bi_size;
1164 
1165 	bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
1166 	rx_ring->rx_bi = kzalloc(bi_size, GFP_KERNEL);
1167 	if (!rx_ring->rx_bi)
1168 		goto err;
1169 
1170 	u64_stats_init(&rx_ring->syncp);
1171 
1172 	/* Round up to nearest 4K */
1173 	rx_ring->size = ring_is_16byte_desc_enabled(rx_ring)
1174 		? rx_ring->count * sizeof(union i40e_16byte_rx_desc)
1175 		: rx_ring->count * sizeof(union i40e_32byte_rx_desc);
1176 	rx_ring->size = ALIGN(rx_ring->size, 4096);
1177 	rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
1178 					   &rx_ring->dma, GFP_KERNEL);
1179 
1180 	if (!rx_ring->desc) {
1181 		dev_info(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n",
1182 			 rx_ring->size);
1183 		goto err;
1184 	}
1185 
1186 	rx_ring->next_to_clean = 0;
1187 	rx_ring->next_to_use = 0;
1188 
1189 	return 0;
1190 err:
1191 	kfree(rx_ring->rx_bi);
1192 	rx_ring->rx_bi = NULL;
1193 	return -ENOMEM;
1194 }
1195 
1196 /**
1197  * i40e_release_rx_desc - Store the new tail and head values
1198  * @rx_ring: ring to bump
1199  * @val: new head index
1200  **/
1201 static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
1202 {
1203 	rx_ring->next_to_use = val;
1204 	/* Force memory writes to complete before letting h/w
1205 	 * know there are new descriptors to fetch.  (Only
1206 	 * applicable for weak-ordered memory model archs,
1207 	 * such as IA-64).
1208 	 */
1209 	wmb();
1210 	writel(val, rx_ring->tail);
1211 }
1212 
1213 /**
1214  * i40e_alloc_rx_buffers_ps - Replace used receive buffers; packet split
1215  * @rx_ring: ring to place buffers on
1216  * @cleaned_count: number of buffers to replace
1217  **/
1218 void i40e_alloc_rx_buffers_ps(struct i40e_ring *rx_ring, u16 cleaned_count)
1219 {
1220 	u16 i = rx_ring->next_to_use;
1221 	union i40e_rx_desc *rx_desc;
1222 	struct i40e_rx_buffer *bi;
1223 
1224 	/* do nothing if no valid netdev defined */
1225 	if (!rx_ring->netdev || !cleaned_count)
1226 		return;
1227 
1228 	while (cleaned_count--) {
1229 		rx_desc = I40E_RX_DESC(rx_ring, i);
1230 		bi = &rx_ring->rx_bi[i];
1231 
1232 		if (bi->skb) /* desc is in use */
1233 			goto no_buffers;
1234 		if (!bi->page) {
1235 			bi->page = alloc_page(GFP_ATOMIC);
1236 			if (!bi->page) {
1237 				rx_ring->rx_stats.alloc_page_failed++;
1238 				goto no_buffers;
1239 			}
1240 		}
1241 
1242 		if (!bi->page_dma) {
1243 			/* use a half page if we're re-using */
1244 			bi->page_offset ^= PAGE_SIZE / 2;
1245 			bi->page_dma = dma_map_page(rx_ring->dev,
1246 						    bi->page,
1247 						    bi->page_offset,
1248 						    PAGE_SIZE / 2,
1249 						    DMA_FROM_DEVICE);
1250 			if (dma_mapping_error(rx_ring->dev,
1251 					      bi->page_dma)) {
1252 				rx_ring->rx_stats.alloc_page_failed++;
1253 				bi->page_dma = 0;
1254 				goto no_buffers;
1255 			}
1256 		}
1257 
1258 		dma_sync_single_range_for_device(rx_ring->dev,
1259 						 bi->dma,
1260 						 0,
1261 						 rx_ring->rx_hdr_len,
1262 						 DMA_FROM_DEVICE);
1263 		/* Refresh the desc even if buffer_addrs didn't change
1264 		 * because each write-back erases this info.
1265 		 */
1266 		rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
1267 		rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
1268 		i++;
1269 		if (i == rx_ring->count)
1270 			i = 0;
1271 	}
1272 
1273 no_buffers:
1274 	if (rx_ring->next_to_use != i)
1275 		i40e_release_rx_desc(rx_ring, i);
1276 }
1277 
1278 /**
1279  * i40e_alloc_rx_buffers_1buf - Replace used receive buffers; single buffer
1280  * @rx_ring: ring to place buffers on
1281  * @cleaned_count: number of buffers to replace
1282  **/
1283 void i40e_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count)
1284 {
1285 	u16 i = rx_ring->next_to_use;
1286 	union i40e_rx_desc *rx_desc;
1287 	struct i40e_rx_buffer *bi;
1288 	struct sk_buff *skb;
1289 
1290 	/* do nothing if no valid netdev defined */
1291 	if (!rx_ring->netdev || !cleaned_count)
1292 		return;
1293 
1294 	while (cleaned_count--) {
1295 		rx_desc = I40E_RX_DESC(rx_ring, i);
1296 		bi = &rx_ring->rx_bi[i];
1297 		skb = bi->skb;
1298 
1299 		if (!skb) {
1300 			skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
1301 							rx_ring->rx_buf_len);
1302 			if (!skb) {
1303 				rx_ring->rx_stats.alloc_buff_failed++;
1304 				goto no_buffers;
1305 			}
1306 			/* initialize queue mapping */
1307 			skb_record_rx_queue(skb, rx_ring->queue_index);
1308 			bi->skb = skb;
1309 		}
1310 
1311 		if (!bi->dma) {
1312 			bi->dma = dma_map_single(rx_ring->dev,
1313 						 skb->data,
1314 						 rx_ring->rx_buf_len,
1315 						 DMA_FROM_DEVICE);
1316 			if (dma_mapping_error(rx_ring->dev, bi->dma)) {
1317 				rx_ring->rx_stats.alloc_buff_failed++;
1318 				bi->dma = 0;
1319 				goto no_buffers;
1320 			}
1321 		}
1322 
1323 		rx_desc->read.pkt_addr = cpu_to_le64(bi->dma);
1324 		rx_desc->read.hdr_addr = 0;
1325 		i++;
1326 		if (i == rx_ring->count)
1327 			i = 0;
1328 	}
1329 
1330 no_buffers:
1331 	if (rx_ring->next_to_use != i)
1332 		i40e_release_rx_desc(rx_ring, i);
1333 }
1334 
1335 /**
1336  * i40e_receive_skb - Send a completed packet up the stack
1337  * @rx_ring:  rx ring in play
1338  * @skb: packet to send up
1339  * @vlan_tag: vlan tag for packet
1340  **/
1341 static void i40e_receive_skb(struct i40e_ring *rx_ring,
1342 			     struct sk_buff *skb, u16 vlan_tag)
1343 {
1344 	struct i40e_q_vector *q_vector = rx_ring->q_vector;
1345 	struct i40e_vsi *vsi = rx_ring->vsi;
1346 	u64 flags = vsi->back->flags;
1347 
1348 	if (vlan_tag & VLAN_VID_MASK)
1349 		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
1350 
1351 	if (flags & I40E_FLAG_IN_NETPOLL)
1352 		netif_rx(skb);
1353 	else
1354 		napi_gro_receive(&q_vector->napi, skb);
1355 }
1356 
1357 /**
1358  * i40e_rx_checksum - Indicate in skb if hw indicated a good cksum
1359  * @vsi: the VSI we care about
1360  * @skb: skb currently being received and modified
1361  * @rx_status: status value of last descriptor in packet
1362  * @rx_error: error value of last descriptor in packet
1363  * @rx_ptype: ptype value of last descriptor in packet
1364  **/
1365 static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
1366 				    struct sk_buff *skb,
1367 				    u32 rx_status,
1368 				    u32 rx_error,
1369 				    u16 rx_ptype)
1370 {
1371 	struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(rx_ptype);
1372 	bool ipv4 = false, ipv6 = false;
1373 	bool ipv4_tunnel, ipv6_tunnel;
1374 	__wsum rx_udp_csum;
1375 	struct iphdr *iph;
1376 	__sum16 csum;
1377 
1378 	ipv4_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT4_MAC_PAY3) &&
1379 		     (rx_ptype <= I40E_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4);
1380 	ipv6_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT6_MAC_PAY3) &&
1381 		     (rx_ptype <= I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4);
1382 
1383 	skb->ip_summed = CHECKSUM_NONE;
1384 
1385 	/* Rx csum enabled and ip headers found? */
1386 	if (!(vsi->netdev->features & NETIF_F_RXCSUM))
1387 		return;
1388 
1389 	/* did the hardware decode the packet and checksum? */
1390 	if (!(rx_status & BIT(I40E_RX_DESC_STATUS_L3L4P_SHIFT)))
1391 		return;
1392 
1393 	/* both known and outer_ip must be set for the below code to work */
1394 	if (!(decoded.known && decoded.outer_ip))
1395 		return;
1396 
1397 	if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1398 	    decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4)
1399 		ipv4 = true;
1400 	else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1401 		 decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6)
1402 		ipv6 = true;
1403 
1404 	if (ipv4 &&
1405 	    (rx_error & (BIT(I40E_RX_DESC_ERROR_IPE_SHIFT) |
1406 			 BIT(I40E_RX_DESC_ERROR_EIPE_SHIFT))))
1407 		goto checksum_fail;
1408 
1409 	/* likely incorrect csum if alternate IP extension headers found */
1410 	if (ipv6 &&
1411 	    rx_status & BIT(I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT))
1412 		/* don't increment checksum err here, non-fatal err */
1413 		return;
1414 
1415 	/* there was some L4 error, count error and punt packet to the stack */
1416 	if (rx_error & BIT(I40E_RX_DESC_ERROR_L4E_SHIFT))
1417 		goto checksum_fail;
1418 
1419 	/* handle packets that were not able to be checksummed due
1420 	 * to arrival speed, in this case the stack can compute
1421 	 * the csum.
1422 	 */
1423 	if (rx_error & BIT(I40E_RX_DESC_ERROR_PPRS_SHIFT))
1424 		return;
1425 
1426 	/* If VXLAN traffic has an outer UDPv4 checksum we need to check
1427 	 * it in the driver, hardware does not do it for us.
1428 	 * Since L3L4P bit was set we assume a valid IHL value (>=5)
1429 	 * so the total length of IPv4 header is IHL*4 bytes
1430 	 * The UDP_0 bit *may* bet set if the *inner* header is UDP
1431 	 */
1432 	if (!(vsi->back->flags & I40E_FLAG_OUTER_UDP_CSUM_CAPABLE) &&
1433 	    (ipv4_tunnel)) {
1434 		skb->transport_header = skb->mac_header +
1435 					sizeof(struct ethhdr) +
1436 					(ip_hdr(skb)->ihl * 4);
1437 
1438 		/* Add 4 bytes for VLAN tagged packets */
1439 		skb->transport_header += (skb->protocol == htons(ETH_P_8021Q) ||
1440 					  skb->protocol == htons(ETH_P_8021AD))
1441 					  ? VLAN_HLEN : 0;
1442 
1443 		if ((ip_hdr(skb)->protocol == IPPROTO_UDP) &&
1444 		    (udp_hdr(skb)->check != 0)) {
1445 			rx_udp_csum = udp_csum(skb);
1446 			iph = ip_hdr(skb);
1447 			csum = csum_tcpudp_magic(
1448 					iph->saddr, iph->daddr,
1449 					(skb->len - skb_transport_offset(skb)),
1450 					IPPROTO_UDP, rx_udp_csum);
1451 
1452 			if (udp_hdr(skb)->check != csum)
1453 				goto checksum_fail;
1454 
1455 		} /* else its GRE and so no outer UDP header */
1456 	}
1457 
1458 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1459 	skb->csum_level = ipv4_tunnel || ipv6_tunnel;
1460 
1461 	return;
1462 
1463 checksum_fail:
1464 	vsi->back->hw_csum_rx_error++;
1465 }
1466 
1467 /**
1468  * i40e_rx_hash - returns the hash value from the Rx descriptor
1469  * @ring: descriptor ring
1470  * @rx_desc: specific descriptor
1471  **/
1472 static inline u32 i40e_rx_hash(struct i40e_ring *ring,
1473 			       union i40e_rx_desc *rx_desc)
1474 {
1475 	const __le64 rss_mask =
1476 		cpu_to_le64((u64)I40E_RX_DESC_FLTSTAT_RSS_HASH <<
1477 			    I40E_RX_DESC_STATUS_FLTSTAT_SHIFT);
1478 
1479 	if ((ring->netdev->features & NETIF_F_RXHASH) &&
1480 	    (rx_desc->wb.qword1.status_error_len & rss_mask) == rss_mask)
1481 		return le32_to_cpu(rx_desc->wb.qword0.hi_dword.rss);
1482 	else
1483 		return 0;
1484 }
1485 
1486 /**
1487  * i40e_ptype_to_hash - get a hash type
1488  * @ptype: the ptype value from the descriptor
1489  *
1490  * Returns a hash type to be used by skb_set_hash
1491  **/
1492 static inline enum pkt_hash_types i40e_ptype_to_hash(u8 ptype)
1493 {
1494 	struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(ptype);
1495 
1496 	if (!decoded.known)
1497 		return PKT_HASH_TYPE_NONE;
1498 
1499 	if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1500 	    decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY4)
1501 		return PKT_HASH_TYPE_L4;
1502 	else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1503 		 decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY3)
1504 		return PKT_HASH_TYPE_L3;
1505 	else
1506 		return PKT_HASH_TYPE_L2;
1507 }
1508 
1509 /**
1510  * i40e_clean_rx_irq_ps - Reclaim resources after receive; packet split
1511  * @rx_ring:  rx ring to clean
1512  * @budget:   how many cleans we're allowed
1513  *
1514  * Returns true if there's any budget left (e.g. the clean is finished)
1515  **/
1516 static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget)
1517 {
1518 	unsigned int total_rx_bytes = 0, total_rx_packets = 0;
1519 	u16 rx_packet_len, rx_header_len, rx_sph, rx_hbo;
1520 	u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
1521 	const int current_node = numa_node_id();
1522 	struct i40e_vsi *vsi = rx_ring->vsi;
1523 	u16 i = rx_ring->next_to_clean;
1524 	union i40e_rx_desc *rx_desc;
1525 	u32 rx_error, rx_status;
1526 	u8 rx_ptype;
1527 	u64 qword;
1528 
1529 	if (budget <= 0)
1530 		return 0;
1531 
1532 	do {
1533 		struct i40e_rx_buffer *rx_bi;
1534 		struct sk_buff *skb;
1535 		u16 vlan_tag;
1536 		/* return some buffers to hardware, one at a time is too slow */
1537 		if (cleaned_count >= I40E_RX_BUFFER_WRITE) {
1538 			i40e_alloc_rx_buffers_ps(rx_ring, cleaned_count);
1539 			cleaned_count = 0;
1540 		}
1541 
1542 		i = rx_ring->next_to_clean;
1543 		rx_desc = I40E_RX_DESC(rx_ring, i);
1544 		qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
1545 		rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
1546 			I40E_RXD_QW1_STATUS_SHIFT;
1547 
1548 		if (!(rx_status & BIT(I40E_RX_DESC_STATUS_DD_SHIFT)))
1549 			break;
1550 
1551 		/* This memory barrier is needed to keep us from reading
1552 		 * any other fields out of the rx_desc until we know the
1553 		 * DD bit is set.
1554 		 */
1555 		dma_rmb();
1556 		if (i40e_rx_is_programming_status(qword)) {
1557 			i40e_clean_programming_status(rx_ring, rx_desc);
1558 			I40E_RX_INCREMENT(rx_ring, i);
1559 			continue;
1560 		}
1561 		rx_bi = &rx_ring->rx_bi[i];
1562 		skb = rx_bi->skb;
1563 		if (likely(!skb)) {
1564 			skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
1565 							rx_ring->rx_hdr_len);
1566 			if (!skb) {
1567 				rx_ring->rx_stats.alloc_buff_failed++;
1568 				break;
1569 			}
1570 
1571 			/* initialize queue mapping */
1572 			skb_record_rx_queue(skb, rx_ring->queue_index);
1573 			/* we are reusing so sync this buffer for CPU use */
1574 			dma_sync_single_range_for_cpu(rx_ring->dev,
1575 						      rx_bi->dma,
1576 						      0,
1577 						      rx_ring->rx_hdr_len,
1578 						      DMA_FROM_DEVICE);
1579 		}
1580 		rx_packet_len = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
1581 				I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1582 		rx_header_len = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK) >>
1583 				I40E_RXD_QW1_LENGTH_HBUF_SHIFT;
1584 		rx_sph = (qword & I40E_RXD_QW1_LENGTH_SPH_MASK) >>
1585 			 I40E_RXD_QW1_LENGTH_SPH_SHIFT;
1586 
1587 		rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >>
1588 			   I40E_RXD_QW1_ERROR_SHIFT;
1589 		rx_hbo = rx_error & BIT(I40E_RX_DESC_ERROR_HBO_SHIFT);
1590 		rx_error &= ~BIT(I40E_RX_DESC_ERROR_HBO_SHIFT);
1591 
1592 		rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
1593 			   I40E_RXD_QW1_PTYPE_SHIFT;
1594 		prefetch(rx_bi->page);
1595 		rx_bi->skb = NULL;
1596 		cleaned_count++;
1597 		if (rx_hbo || rx_sph) {
1598 			int len;
1599 			if (rx_hbo)
1600 				len = I40E_RX_HDR_SIZE;
1601 			else
1602 				len = rx_header_len;
1603 			memcpy(__skb_put(skb, len), rx_bi->hdr_buf, len);
1604 		} else if (skb->len == 0) {
1605 			int len;
1606 
1607 			len = (rx_packet_len > skb_headlen(skb) ?
1608 				skb_headlen(skb) : rx_packet_len);
1609 			memcpy(__skb_put(skb, len),
1610 			       rx_bi->page + rx_bi->page_offset,
1611 			       len);
1612 			rx_bi->page_offset += len;
1613 			rx_packet_len -= len;
1614 		}
1615 
1616 		/* Get the rest of the data if this was a header split */
1617 		if (rx_packet_len) {
1618 			skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
1619 					   rx_bi->page,
1620 					   rx_bi->page_offset,
1621 					   rx_packet_len);
1622 
1623 			skb->len += rx_packet_len;
1624 			skb->data_len += rx_packet_len;
1625 			skb->truesize += rx_packet_len;
1626 
1627 			if ((page_count(rx_bi->page) == 1) &&
1628 			    (page_to_nid(rx_bi->page) == current_node))
1629 				get_page(rx_bi->page);
1630 			else
1631 				rx_bi->page = NULL;
1632 
1633 			dma_unmap_page(rx_ring->dev,
1634 				       rx_bi->page_dma,
1635 				       PAGE_SIZE / 2,
1636 				       DMA_FROM_DEVICE);
1637 			rx_bi->page_dma = 0;
1638 		}
1639 		I40E_RX_INCREMENT(rx_ring, i);
1640 
1641 		if (unlikely(
1642 		    !(rx_status & BIT(I40E_RX_DESC_STATUS_EOF_SHIFT)))) {
1643 			struct i40e_rx_buffer *next_buffer;
1644 
1645 			next_buffer = &rx_ring->rx_bi[i];
1646 			next_buffer->skb = skb;
1647 			rx_ring->rx_stats.non_eop_descs++;
1648 			continue;
1649 		}
1650 
1651 		/* ERR_MASK will only have valid bits if EOP set */
1652 		if (unlikely(rx_error & BIT(I40E_RX_DESC_ERROR_RXE_SHIFT))) {
1653 			dev_kfree_skb_any(skb);
1654 			continue;
1655 		}
1656 
1657 		skb_set_hash(skb, i40e_rx_hash(rx_ring, rx_desc),
1658 			     i40e_ptype_to_hash(rx_ptype));
1659 		if (unlikely(rx_status & I40E_RXD_QW1_STATUS_TSYNVALID_MASK)) {
1660 			i40e_ptp_rx_hwtstamp(vsi->back, skb, (rx_status &
1661 					   I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >>
1662 					   I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT);
1663 			rx_ring->last_rx_timestamp = jiffies;
1664 		}
1665 
1666 		/* probably a little skewed due to removing CRC */
1667 		total_rx_bytes += skb->len;
1668 		total_rx_packets++;
1669 
1670 		skb->protocol = eth_type_trans(skb, rx_ring->netdev);
1671 
1672 		i40e_rx_checksum(vsi, skb, rx_status, rx_error, rx_ptype);
1673 
1674 		vlan_tag = rx_status & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)
1675 			 ? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1)
1676 			 : 0;
1677 #ifdef I40E_FCOE
1678 		if (!i40e_fcoe_handle_offload(rx_ring, rx_desc, skb)) {
1679 			dev_kfree_skb_any(skb);
1680 			continue;
1681 		}
1682 #endif
1683 		skb_mark_napi_id(skb, &rx_ring->q_vector->napi);
1684 		i40e_receive_skb(rx_ring, skb, vlan_tag);
1685 
1686 		rx_desc->wb.qword1.status_error_len = 0;
1687 
1688 	} while (likely(total_rx_packets < budget));
1689 
1690 	u64_stats_update_begin(&rx_ring->syncp);
1691 	rx_ring->stats.packets += total_rx_packets;
1692 	rx_ring->stats.bytes += total_rx_bytes;
1693 	u64_stats_update_end(&rx_ring->syncp);
1694 	rx_ring->q_vector->rx.total_packets += total_rx_packets;
1695 	rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
1696 
1697 	return total_rx_packets;
1698 }
1699 
1700 /**
1701  * i40e_clean_rx_irq_1buf - Reclaim resources after receive; single buffer
1702  * @rx_ring:  rx ring to clean
1703  * @budget:   how many cleans we're allowed
1704  *
1705  * Returns number of packets cleaned
1706  **/
1707 static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget)
1708 {
1709 	unsigned int total_rx_bytes = 0, total_rx_packets = 0;
1710 	u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
1711 	struct i40e_vsi *vsi = rx_ring->vsi;
1712 	union i40e_rx_desc *rx_desc;
1713 	u32 rx_error, rx_status;
1714 	u16 rx_packet_len;
1715 	u8 rx_ptype;
1716 	u64 qword;
1717 	u16 i;
1718 
1719 	do {
1720 		struct i40e_rx_buffer *rx_bi;
1721 		struct sk_buff *skb;
1722 		u16 vlan_tag;
1723 		/* return some buffers to hardware, one at a time is too slow */
1724 		if (cleaned_count >= I40E_RX_BUFFER_WRITE) {
1725 			i40e_alloc_rx_buffers_1buf(rx_ring, cleaned_count);
1726 			cleaned_count = 0;
1727 		}
1728 
1729 		i = rx_ring->next_to_clean;
1730 		rx_desc = I40E_RX_DESC(rx_ring, i);
1731 		qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
1732 		rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
1733 			I40E_RXD_QW1_STATUS_SHIFT;
1734 
1735 		if (!(rx_status & BIT(I40E_RX_DESC_STATUS_DD_SHIFT)))
1736 			break;
1737 
1738 		/* This memory barrier is needed to keep us from reading
1739 		 * any other fields out of the rx_desc until we know the
1740 		 * DD bit is set.
1741 		 */
1742 		dma_rmb();
1743 
1744 		if (i40e_rx_is_programming_status(qword)) {
1745 			i40e_clean_programming_status(rx_ring, rx_desc);
1746 			I40E_RX_INCREMENT(rx_ring, i);
1747 			continue;
1748 		}
1749 		rx_bi = &rx_ring->rx_bi[i];
1750 		skb = rx_bi->skb;
1751 		prefetch(skb->data);
1752 
1753 		rx_packet_len = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
1754 				I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1755 
1756 		rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >>
1757 			   I40E_RXD_QW1_ERROR_SHIFT;
1758 		rx_error &= ~BIT(I40E_RX_DESC_ERROR_HBO_SHIFT);
1759 
1760 		rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
1761 			   I40E_RXD_QW1_PTYPE_SHIFT;
1762 		rx_bi->skb = NULL;
1763 		cleaned_count++;
1764 
1765 		/* Get the header and possibly the whole packet
1766 		 * If this is an skb from previous receive dma will be 0
1767 		 */
1768 		skb_put(skb, rx_packet_len);
1769 		dma_unmap_single(rx_ring->dev, rx_bi->dma, rx_ring->rx_buf_len,
1770 				 DMA_FROM_DEVICE);
1771 		rx_bi->dma = 0;
1772 
1773 		I40E_RX_INCREMENT(rx_ring, i);
1774 
1775 		if (unlikely(
1776 		    !(rx_status & BIT(I40E_RX_DESC_STATUS_EOF_SHIFT)))) {
1777 			rx_ring->rx_stats.non_eop_descs++;
1778 			continue;
1779 		}
1780 
1781 		/* ERR_MASK will only have valid bits if EOP set */
1782 		if (unlikely(rx_error & BIT(I40E_RX_DESC_ERROR_RXE_SHIFT))) {
1783 			dev_kfree_skb_any(skb);
1784 			/* TODO: shouldn't we increment a counter indicating the
1785 			 * drop?
1786 			 */
1787 			continue;
1788 		}
1789 
1790 		skb_set_hash(skb, i40e_rx_hash(rx_ring, rx_desc),
1791 			     i40e_ptype_to_hash(rx_ptype));
1792 		if (unlikely(rx_status & I40E_RXD_QW1_STATUS_TSYNVALID_MASK)) {
1793 			i40e_ptp_rx_hwtstamp(vsi->back, skb, (rx_status &
1794 					   I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >>
1795 					   I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT);
1796 			rx_ring->last_rx_timestamp = jiffies;
1797 		}
1798 
1799 		/* probably a little skewed due to removing CRC */
1800 		total_rx_bytes += skb->len;
1801 		total_rx_packets++;
1802 
1803 		skb->protocol = eth_type_trans(skb, rx_ring->netdev);
1804 
1805 		i40e_rx_checksum(vsi, skb, rx_status, rx_error, rx_ptype);
1806 
1807 		vlan_tag = rx_status & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)
1808 			 ? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1)
1809 			 : 0;
1810 #ifdef I40E_FCOE
1811 		if (!i40e_fcoe_handle_offload(rx_ring, rx_desc, skb)) {
1812 			dev_kfree_skb_any(skb);
1813 			continue;
1814 		}
1815 #endif
1816 		i40e_receive_skb(rx_ring, skb, vlan_tag);
1817 
1818 		rx_desc->wb.qword1.status_error_len = 0;
1819 	} while (likely(total_rx_packets < budget));
1820 
1821 	u64_stats_update_begin(&rx_ring->syncp);
1822 	rx_ring->stats.packets += total_rx_packets;
1823 	rx_ring->stats.bytes += total_rx_bytes;
1824 	u64_stats_update_end(&rx_ring->syncp);
1825 	rx_ring->q_vector->rx.total_packets += total_rx_packets;
1826 	rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
1827 
1828 	return total_rx_packets;
1829 }
1830 
1831 /**
1832  * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt
1833  * @vsi: the VSI we care about
1834  * @q_vector: q_vector for which itr is being updated and interrupt enabled
1835  *
1836  **/
1837 static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
1838 					  struct i40e_q_vector *q_vector)
1839 {
1840 	struct i40e_hw *hw = &vsi->back->hw;
1841 	u16 old_itr;
1842 	int vector;
1843 	u32 val;
1844 
1845 	vector = (q_vector->v_idx + vsi->base_vector);
1846 	if (ITR_IS_DYNAMIC(vsi->rx_itr_setting)) {
1847 		old_itr = q_vector->rx.itr;
1848 		i40e_set_new_dynamic_itr(&q_vector->rx);
1849 		if (old_itr != q_vector->rx.itr) {
1850 			val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
1851 			I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
1852 			(I40E_RX_ITR <<
1853 				I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) |
1854 			(q_vector->rx.itr <<
1855 				I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT);
1856 		} else {
1857 			val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
1858 			I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
1859 			(I40E_ITR_NONE <<
1860 				I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT);
1861 		}
1862 		if (!test_bit(__I40E_DOWN, &vsi->state))
1863 			wr32(hw, I40E_PFINT_DYN_CTLN(vector - 1), val);
1864 	} else {
1865 		i40e_irq_dynamic_enable(vsi,
1866 					q_vector->v_idx + vsi->base_vector);
1867 	}
1868 	if (ITR_IS_DYNAMIC(vsi->tx_itr_setting)) {
1869 		old_itr = q_vector->tx.itr;
1870 		i40e_set_new_dynamic_itr(&q_vector->tx);
1871 		if (old_itr != q_vector->tx.itr) {
1872 			val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
1873 				I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
1874 				(I40E_TX_ITR <<
1875 				   I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) |
1876 				(q_vector->tx.itr <<
1877 				   I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT);
1878 		} else {
1879 			val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
1880 				I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
1881 				(I40E_ITR_NONE <<
1882 				   I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT);
1883 		}
1884 		if (!test_bit(__I40E_DOWN, &vsi->state))
1885 			wr32(hw, I40E_PFINT_DYN_CTLN(q_vector->v_idx +
1886 			      vsi->base_vector - 1), val);
1887 	} else {
1888 		i40e_irq_dynamic_enable(vsi,
1889 					q_vector->v_idx + vsi->base_vector);
1890 	}
1891 }
1892 
1893 /**
1894  * i40e_napi_poll - NAPI polling Rx/Tx cleanup routine
1895  * @napi: napi struct with our devices info in it
1896  * @budget: amount of work driver is allowed to do this pass, in packets
1897  *
1898  * This function will clean all queues associated with a q_vector.
1899  *
1900  * Returns the amount of work done
1901  **/
1902 int i40e_napi_poll(struct napi_struct *napi, int budget)
1903 {
1904 	struct i40e_q_vector *q_vector =
1905 			       container_of(napi, struct i40e_q_vector, napi);
1906 	struct i40e_vsi *vsi = q_vector->vsi;
1907 	struct i40e_ring *ring;
1908 	bool clean_complete = true;
1909 	bool arm_wb = false;
1910 	int budget_per_ring;
1911 	int cleaned;
1912 
1913 	if (test_bit(__I40E_DOWN, &vsi->state)) {
1914 		napi_complete(napi);
1915 		return 0;
1916 	}
1917 
1918 	/* Since the actual Tx work is minimal, we can give the Tx a larger
1919 	 * budget and be more aggressive about cleaning up the Tx descriptors.
1920 	 */
1921 	i40e_for_each_ring(ring, q_vector->tx) {
1922 		clean_complete &= i40e_clean_tx_irq(ring, vsi->work_limit);
1923 		arm_wb |= ring->arm_wb;
1924 	}
1925 
1926 	/* We attempt to distribute budget to each Rx queue fairly, but don't
1927 	 * allow the budget to go below 1 because that would exit polling early.
1928 	 */
1929 	budget_per_ring = max(budget/q_vector->num_ringpairs, 1);
1930 
1931 	i40e_for_each_ring(ring, q_vector->rx) {
1932 		if (ring_is_ps_enabled(ring))
1933 			cleaned = i40e_clean_rx_irq_ps(ring, budget_per_ring);
1934 		else
1935 			cleaned = i40e_clean_rx_irq_1buf(ring, budget_per_ring);
1936 		/* if we didn't clean as many as budgeted, we must be done */
1937 		clean_complete &= (budget_per_ring != cleaned);
1938 	}
1939 
1940 	/* If work not completed, return budget and polling will return */
1941 	if (!clean_complete) {
1942 		if (arm_wb)
1943 			i40e_force_wb(vsi, q_vector);
1944 		return budget;
1945 	}
1946 
1947 	if (vsi->back->flags & I40E_TXR_FLAGS_WB_ON_ITR)
1948 		q_vector->arm_wb_state = false;
1949 
1950 	/* Work is done so exit the polling mode and re-enable the interrupt */
1951 	napi_complete(napi);
1952 	if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
1953 		i40e_update_enable_itr(vsi, q_vector);
1954 	} else { /* Legacy mode */
1955 		struct i40e_hw *hw = &vsi->back->hw;
1956 		/* We re-enable the queue 0 cause, but
1957 		 * don't worry about dynamic_enable
1958 		 * because we left it on for the other
1959 		 * possible interrupts during napi
1960 		 */
1961 		u32 qval = rd32(hw, I40E_QINT_RQCTL(0)) |
1962 			   I40E_QINT_RQCTL_CAUSE_ENA_MASK;
1963 
1964 		wr32(hw, I40E_QINT_RQCTL(0), qval);
1965 		qval = rd32(hw, I40E_QINT_TQCTL(0)) |
1966 		       I40E_QINT_TQCTL_CAUSE_ENA_MASK;
1967 		wr32(hw, I40E_QINT_TQCTL(0), qval);
1968 		i40e_irq_dynamic_enable_icr0(vsi->back);
1969 	}
1970 	return 0;
1971 }
1972 
1973 /**
1974  * i40e_atr - Add a Flow Director ATR filter
1975  * @tx_ring:  ring to add programming descriptor to
1976  * @skb:      send buffer
1977  * @tx_flags: send tx flags
1978  * @protocol: wire protocol
1979  **/
1980 static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
1981 		     u32 tx_flags, __be16 protocol)
1982 {
1983 	struct i40e_filter_program_desc *fdir_desc;
1984 	struct i40e_pf *pf = tx_ring->vsi->back;
1985 	union {
1986 		unsigned char *network;
1987 		struct iphdr *ipv4;
1988 		struct ipv6hdr *ipv6;
1989 	} hdr;
1990 	struct tcphdr *th;
1991 	unsigned int hlen;
1992 	u32 flex_ptype, dtype_cmd;
1993 	u16 i;
1994 
1995 	/* make sure ATR is enabled */
1996 	if (!(pf->flags & I40E_FLAG_FD_ATR_ENABLED))
1997 		return;
1998 
1999 	if ((pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED))
2000 		return;
2001 
2002 	/* if sampling is disabled do nothing */
2003 	if (!tx_ring->atr_sample_rate)
2004 		return;
2005 
2006 	if (!(tx_flags & (I40E_TX_FLAGS_IPV4 | I40E_TX_FLAGS_IPV6)))
2007 		return;
2008 
2009 	if (!(tx_flags & I40E_TX_FLAGS_VXLAN_TUNNEL)) {
2010 		/* snag network header to get L4 type and address */
2011 		hdr.network = skb_network_header(skb);
2012 
2013 		/* Currently only IPv4/IPv6 with TCP is supported
2014 		 * access ihl as u8 to avoid unaligned access on ia64
2015 		 */
2016 		if (tx_flags & I40E_TX_FLAGS_IPV4)
2017 			hlen = (hdr.network[0] & 0x0F) << 2;
2018 		else if (protocol == htons(ETH_P_IPV6))
2019 			hlen = sizeof(struct ipv6hdr);
2020 		else
2021 			return;
2022 	} else {
2023 		hdr.network = skb_inner_network_header(skb);
2024 		hlen = skb_inner_network_header_len(skb);
2025 	}
2026 
2027 	/* Currently only IPv4/IPv6 with TCP is supported
2028 	 * Note: tx_flags gets modified to reflect inner protocols in
2029 	 * tx_enable_csum function if encap is enabled.
2030 	 */
2031 	if ((tx_flags & I40E_TX_FLAGS_IPV4) &&
2032 	    (hdr.ipv4->protocol != IPPROTO_TCP))
2033 		return;
2034 	else if ((tx_flags & I40E_TX_FLAGS_IPV6) &&
2035 		 (hdr.ipv6->nexthdr != IPPROTO_TCP))
2036 		return;
2037 
2038 	th = (struct tcphdr *)(hdr.network + hlen);
2039 
2040 	/* Due to lack of space, no more new filters can be programmed */
2041 	if (th->syn && (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED))
2042 		return;
2043 	if (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) {
2044 		/* HW ATR eviction will take care of removing filters on FIN
2045 		 * and RST packets.
2046 		 */
2047 		if (th->fin || th->rst)
2048 			return;
2049 	}
2050 
2051 	tx_ring->atr_count++;
2052 
2053 	/* sample on all syn/fin/rst packets or once every atr sample rate */
2054 	if (!th->fin &&
2055 	    !th->syn &&
2056 	    !th->rst &&
2057 	    (tx_ring->atr_count < tx_ring->atr_sample_rate))
2058 		return;
2059 
2060 	tx_ring->atr_count = 0;
2061 
2062 	/* grab the next descriptor */
2063 	i = tx_ring->next_to_use;
2064 	fdir_desc = I40E_TX_FDIRDESC(tx_ring, i);
2065 
2066 	i++;
2067 	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
2068 
2069 	flex_ptype = (tx_ring->queue_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) &
2070 		      I40E_TXD_FLTR_QW0_QINDEX_MASK;
2071 	flex_ptype |= (protocol == htons(ETH_P_IP)) ?
2072 		      (I40E_FILTER_PCTYPE_NONF_IPV4_TCP <<
2073 		       I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) :
2074 		      (I40E_FILTER_PCTYPE_NONF_IPV6_TCP <<
2075 		       I40E_TXD_FLTR_QW0_PCTYPE_SHIFT);
2076 
2077 	flex_ptype |= tx_ring->vsi->id << I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT;
2078 
2079 	dtype_cmd = I40E_TX_DESC_DTYPE_FILTER_PROG;
2080 
2081 	dtype_cmd |= (th->fin || th->rst) ?
2082 		     (I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE <<
2083 		      I40E_TXD_FLTR_QW1_PCMD_SHIFT) :
2084 		     (I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE <<
2085 		      I40E_TXD_FLTR_QW1_PCMD_SHIFT);
2086 
2087 	dtype_cmd |= I40E_FILTER_PROGRAM_DESC_DEST_DIRECT_PACKET_QINDEX <<
2088 		     I40E_TXD_FLTR_QW1_DEST_SHIFT;
2089 
2090 	dtype_cmd |= I40E_FILTER_PROGRAM_DESC_FD_STATUS_FD_ID <<
2091 		     I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT;
2092 
2093 	dtype_cmd |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK;
2094 	if (!(tx_flags & I40E_TX_FLAGS_VXLAN_TUNNEL))
2095 		dtype_cmd |=
2096 			((u32)I40E_FD_ATR_STAT_IDX(pf->hw.pf_id) <<
2097 			I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
2098 			I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
2099 	else
2100 		dtype_cmd |=
2101 			((u32)I40E_FD_ATR_TUNNEL_STAT_IDX(pf->hw.pf_id) <<
2102 			I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
2103 			I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
2104 
2105 	if (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE)
2106 		dtype_cmd |= I40E_TXD_FLTR_QW1_ATR_MASK;
2107 
2108 	fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype);
2109 	fdir_desc->rsvd = cpu_to_le32(0);
2110 	fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dtype_cmd);
2111 	fdir_desc->fd_id = cpu_to_le32(0);
2112 }
2113 
2114 /**
2115  * i40e_tx_prepare_vlan_flags - prepare generic TX VLAN tagging flags for HW
2116  * @skb:     send buffer
2117  * @tx_ring: ring to send buffer on
2118  * @flags:   the tx flags to be set
2119  *
2120  * Checks the skb and set up correspondingly several generic transmit flags
2121  * related to VLAN tagging for the HW, such as VLAN, DCB, etc.
2122  *
2123  * Returns error code indicate the frame should be dropped upon error and the
2124  * otherwise  returns 0 to indicate the flags has been set properly.
2125  **/
2126 #ifdef I40E_FCOE
2127 inline int i40e_tx_prepare_vlan_flags(struct sk_buff *skb,
2128 				      struct i40e_ring *tx_ring,
2129 				      u32 *flags)
2130 #else
2131 static inline int i40e_tx_prepare_vlan_flags(struct sk_buff *skb,
2132 					     struct i40e_ring *tx_ring,
2133 					     u32 *flags)
2134 #endif
2135 {
2136 	__be16 protocol = skb->protocol;
2137 	u32  tx_flags = 0;
2138 
2139 	if (protocol == htons(ETH_P_8021Q) &&
2140 	    !(tx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_TX)) {
2141 		/* When HW VLAN acceleration is turned off by the user the
2142 		 * stack sets the protocol to 8021q so that the driver
2143 		 * can take any steps required to support the SW only
2144 		 * VLAN handling.  In our case the driver doesn't need
2145 		 * to take any further steps so just set the protocol
2146 		 * to the encapsulated ethertype.
2147 		 */
2148 		skb->protocol = vlan_get_protocol(skb);
2149 		goto out;
2150 	}
2151 
2152 	/* if we have a HW VLAN tag being added, default to the HW one */
2153 	if (skb_vlan_tag_present(skb)) {
2154 		tx_flags |= skb_vlan_tag_get(skb) << I40E_TX_FLAGS_VLAN_SHIFT;
2155 		tx_flags |= I40E_TX_FLAGS_HW_VLAN;
2156 	/* else if it is a SW VLAN, check the next protocol and store the tag */
2157 	} else if (protocol == htons(ETH_P_8021Q)) {
2158 		struct vlan_hdr *vhdr, _vhdr;
2159 		vhdr = skb_header_pointer(skb, ETH_HLEN, sizeof(_vhdr), &_vhdr);
2160 		if (!vhdr)
2161 			return -EINVAL;
2162 
2163 		protocol = vhdr->h_vlan_encapsulated_proto;
2164 		tx_flags |= ntohs(vhdr->h_vlan_TCI) << I40E_TX_FLAGS_VLAN_SHIFT;
2165 		tx_flags |= I40E_TX_FLAGS_SW_VLAN;
2166 	}
2167 
2168 	if (!(tx_ring->vsi->back->flags & I40E_FLAG_DCB_ENABLED))
2169 		goto out;
2170 
2171 	/* Insert 802.1p priority into VLAN header */
2172 	if ((tx_flags & (I40E_TX_FLAGS_HW_VLAN | I40E_TX_FLAGS_SW_VLAN)) ||
2173 	    (skb->priority != TC_PRIO_CONTROL)) {
2174 		tx_flags &= ~I40E_TX_FLAGS_VLAN_PRIO_MASK;
2175 		tx_flags |= (skb->priority & 0x7) <<
2176 				I40E_TX_FLAGS_VLAN_PRIO_SHIFT;
2177 		if (tx_flags & I40E_TX_FLAGS_SW_VLAN) {
2178 			struct vlan_ethhdr *vhdr;
2179 			int rc;
2180 
2181 			rc = skb_cow_head(skb, 0);
2182 			if (rc < 0)
2183 				return rc;
2184 			vhdr = (struct vlan_ethhdr *)skb->data;
2185 			vhdr->h_vlan_TCI = htons(tx_flags >>
2186 						 I40E_TX_FLAGS_VLAN_SHIFT);
2187 		} else {
2188 			tx_flags |= I40E_TX_FLAGS_HW_VLAN;
2189 		}
2190 	}
2191 
2192 out:
2193 	*flags = tx_flags;
2194 	return 0;
2195 }
2196 
2197 /**
2198  * i40e_tso - set up the tso context descriptor
2199  * @tx_ring:  ptr to the ring to send
2200  * @skb:      ptr to the skb we're sending
2201  * @hdr_len:  ptr to the size of the packet header
2202  * @cd_tunneling: ptr to context descriptor bits
2203  *
2204  * Returns 0 if no TSO can happen, 1 if tso is going, or error
2205  **/
2206 static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb,
2207 		    u8 *hdr_len, u64 *cd_type_cmd_tso_mss,
2208 		    u32 *cd_tunneling)
2209 {
2210 	u32 cd_cmd, cd_tso_len, cd_mss;
2211 	struct ipv6hdr *ipv6h;
2212 	struct tcphdr *tcph;
2213 	struct iphdr *iph;
2214 	u32 l4len;
2215 	int err;
2216 
2217 	if (!skb_is_gso(skb))
2218 		return 0;
2219 
2220 	err = skb_cow_head(skb, 0);
2221 	if (err < 0)
2222 		return err;
2223 
2224 	iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb);
2225 	ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb);
2226 
2227 	if (iph->version == 4) {
2228 		tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb);
2229 		iph->tot_len = 0;
2230 		iph->check = 0;
2231 		tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
2232 						 0, IPPROTO_TCP, 0);
2233 	} else if (ipv6h->version == 6) {
2234 		tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb);
2235 		ipv6h->payload_len = 0;
2236 		tcph->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
2237 					       0, IPPROTO_TCP, 0);
2238 	}
2239 
2240 	l4len = skb->encapsulation ? inner_tcp_hdrlen(skb) : tcp_hdrlen(skb);
2241 	*hdr_len = (skb->encapsulation
2242 		    ? (skb_inner_transport_header(skb) - skb->data)
2243 		    : skb_transport_offset(skb)) + l4len;
2244 
2245 	/* find the field values */
2246 	cd_cmd = I40E_TX_CTX_DESC_TSO;
2247 	cd_tso_len = skb->len - *hdr_len;
2248 	cd_mss = skb_shinfo(skb)->gso_size;
2249 	*cd_type_cmd_tso_mss |= ((u64)cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
2250 				((u64)cd_tso_len <<
2251 				 I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
2252 				((u64)cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
2253 	return 1;
2254 }
2255 
2256 /**
2257  * i40e_tsyn - set up the tsyn context descriptor
2258  * @tx_ring:  ptr to the ring to send
2259  * @skb:      ptr to the skb we're sending
2260  * @tx_flags: the collected send information
2261  *
2262  * Returns 0 if no Tx timestamp can happen and 1 if the timestamp will happen
2263  **/
2264 static int i40e_tsyn(struct i40e_ring *tx_ring, struct sk_buff *skb,
2265 		     u32 tx_flags, u64 *cd_type_cmd_tso_mss)
2266 {
2267 	struct i40e_pf *pf;
2268 
2269 	if (likely(!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)))
2270 		return 0;
2271 
2272 	/* Tx timestamps cannot be sampled when doing TSO */
2273 	if (tx_flags & I40E_TX_FLAGS_TSO)
2274 		return 0;
2275 
2276 	/* only timestamp the outbound packet if the user has requested it and
2277 	 * we are not already transmitting a packet to be timestamped
2278 	 */
2279 	pf = i40e_netdev_to_pf(tx_ring->netdev);
2280 	if (!(pf->flags & I40E_FLAG_PTP))
2281 		return 0;
2282 
2283 	if (pf->ptp_tx &&
2284 	    !test_and_set_bit_lock(__I40E_PTP_TX_IN_PROGRESS, &pf->state)) {
2285 		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
2286 		pf->ptp_tx_skb = skb_get(skb);
2287 	} else {
2288 		return 0;
2289 	}
2290 
2291 	*cd_type_cmd_tso_mss |= (u64)I40E_TX_CTX_DESC_TSYN <<
2292 				I40E_TXD_CTX_QW1_CMD_SHIFT;
2293 
2294 	return 1;
2295 }
2296 
2297 /**
2298  * i40e_tx_enable_csum - Enable Tx checksum offloads
2299  * @skb: send buffer
2300  * @tx_flags: pointer to Tx flags currently set
2301  * @td_cmd: Tx descriptor command bits to set
2302  * @td_offset: Tx descriptor header offsets to set
2303  * @cd_tunneling: ptr to context desc bits
2304  **/
2305 static void i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags,
2306 				u32 *td_cmd, u32 *td_offset,
2307 				struct i40e_ring *tx_ring,
2308 				u32 *cd_tunneling)
2309 {
2310 	struct ipv6hdr *this_ipv6_hdr;
2311 	unsigned int this_tcp_hdrlen;
2312 	struct iphdr *this_ip_hdr;
2313 	u32 network_hdr_len;
2314 	u8 l4_hdr = 0;
2315 	struct udphdr *oudph;
2316 	struct iphdr *oiph;
2317 	u32 l4_tunnel = 0;
2318 
2319 	if (skb->encapsulation) {
2320 		switch (ip_hdr(skb)->protocol) {
2321 		case IPPROTO_UDP:
2322 			oudph = udp_hdr(skb);
2323 			oiph = ip_hdr(skb);
2324 			l4_tunnel = I40E_TXD_CTX_UDP_TUNNELING;
2325 			*tx_flags |= I40E_TX_FLAGS_VXLAN_TUNNEL;
2326 			break;
2327 		default:
2328 			return;
2329 		}
2330 		network_hdr_len = skb_inner_network_header_len(skb);
2331 		this_ip_hdr = inner_ip_hdr(skb);
2332 		this_ipv6_hdr = inner_ipv6_hdr(skb);
2333 		this_tcp_hdrlen = inner_tcp_hdrlen(skb);
2334 
2335 		if (*tx_flags & I40E_TX_FLAGS_IPV4) {
2336 			if (*tx_flags & I40E_TX_FLAGS_TSO) {
2337 				*cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV4;
2338 				ip_hdr(skb)->check = 0;
2339 			} else {
2340 				*cd_tunneling |=
2341 					 I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM;
2342 			}
2343 		} else if (*tx_flags & I40E_TX_FLAGS_IPV6) {
2344 			*cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6;
2345 			if (*tx_flags & I40E_TX_FLAGS_TSO)
2346 				ip_hdr(skb)->check = 0;
2347 		}
2348 
2349 		/* Now set the ctx descriptor fields */
2350 		*cd_tunneling |= (skb_network_header_len(skb) >> 2) <<
2351 				   I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT      |
2352 				   l4_tunnel                             |
2353 				   ((skb_inner_network_offset(skb) -
2354 					skb_transport_offset(skb)) >> 1) <<
2355 				   I40E_TXD_CTX_QW0_NATLEN_SHIFT;
2356 		if (this_ip_hdr->version == 6) {
2357 			*tx_flags &= ~I40E_TX_FLAGS_IPV4;
2358 			*tx_flags |= I40E_TX_FLAGS_IPV6;
2359 		}
2360 		if ((tx_ring->flags & I40E_TXR_FLAGS_OUTER_UDP_CSUM) &&
2361 		    (l4_tunnel == I40E_TXD_CTX_UDP_TUNNELING)        &&
2362 		    (*cd_tunneling & I40E_TXD_CTX_QW0_EXT_IP_MASK)) {
2363 			oudph->check = ~csum_tcpudp_magic(oiph->saddr,
2364 					oiph->daddr,
2365 					(skb->len - skb_transport_offset(skb)),
2366 					IPPROTO_UDP, 0);
2367 			*cd_tunneling |= I40E_TXD_CTX_QW0_L4T_CS_MASK;
2368 		}
2369 	} else {
2370 		network_hdr_len = skb_network_header_len(skb);
2371 		this_ip_hdr = ip_hdr(skb);
2372 		this_ipv6_hdr = ipv6_hdr(skb);
2373 		this_tcp_hdrlen = tcp_hdrlen(skb);
2374 	}
2375 
2376 	/* Enable IP checksum offloads */
2377 	if (*tx_flags & I40E_TX_FLAGS_IPV4) {
2378 		l4_hdr = this_ip_hdr->protocol;
2379 		/* the stack computes the IP header already, the only time we
2380 		 * need the hardware to recompute it is in the case of TSO.
2381 		 */
2382 		if (*tx_flags & I40E_TX_FLAGS_TSO) {
2383 			*td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
2384 			this_ip_hdr->check = 0;
2385 		} else {
2386 			*td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4;
2387 		}
2388 		/* Now set the td_offset for IP header length */
2389 		*td_offset = (network_hdr_len >> 2) <<
2390 			      I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
2391 	} else if (*tx_flags & I40E_TX_FLAGS_IPV6) {
2392 		l4_hdr = this_ipv6_hdr->nexthdr;
2393 		*td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
2394 		/* Now set the td_offset for IP header length */
2395 		*td_offset = (network_hdr_len >> 2) <<
2396 			      I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
2397 	}
2398 	/* words in MACLEN + dwords in IPLEN + dwords in L4Len */
2399 	*td_offset |= (skb_network_offset(skb) >> 1) <<
2400 		       I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
2401 
2402 	/* Enable L4 checksum offloads */
2403 	switch (l4_hdr) {
2404 	case IPPROTO_TCP:
2405 		/* enable checksum offloads */
2406 		*td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
2407 		*td_offset |= (this_tcp_hdrlen >> 2) <<
2408 			       I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
2409 		break;
2410 	case IPPROTO_SCTP:
2411 		/* enable SCTP checksum offload */
2412 		*td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
2413 		*td_offset |= (sizeof(struct sctphdr) >> 2) <<
2414 			       I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
2415 		break;
2416 	case IPPROTO_UDP:
2417 		/* enable UDP checksum offload */
2418 		*td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
2419 		*td_offset |= (sizeof(struct udphdr) >> 2) <<
2420 			       I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
2421 		break;
2422 	default:
2423 		break;
2424 	}
2425 }
2426 
2427 /**
2428  * i40e_create_tx_ctx Build the Tx context descriptor
2429  * @tx_ring:  ring to create the descriptor on
2430  * @cd_type_cmd_tso_mss: Quad Word 1
2431  * @cd_tunneling: Quad Word 0 - bits 0-31
2432  * @cd_l2tag2: Quad Word 0 - bits 32-63
2433  **/
2434 static void i40e_create_tx_ctx(struct i40e_ring *tx_ring,
2435 			       const u64 cd_type_cmd_tso_mss,
2436 			       const u32 cd_tunneling, const u32 cd_l2tag2)
2437 {
2438 	struct i40e_tx_context_desc *context_desc;
2439 	int i = tx_ring->next_to_use;
2440 
2441 	if ((cd_type_cmd_tso_mss == I40E_TX_DESC_DTYPE_CONTEXT) &&
2442 	    !cd_tunneling && !cd_l2tag2)
2443 		return;
2444 
2445 	/* grab the next descriptor */
2446 	context_desc = I40E_TX_CTXTDESC(tx_ring, i);
2447 
2448 	i++;
2449 	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
2450 
2451 	/* cpu_to_le32 and assign to struct fields */
2452 	context_desc->tunneling_params = cpu_to_le32(cd_tunneling);
2453 	context_desc->l2tag2 = cpu_to_le16(cd_l2tag2);
2454 	context_desc->rsvd = cpu_to_le16(0);
2455 	context_desc->type_cmd_tso_mss = cpu_to_le64(cd_type_cmd_tso_mss);
2456 }
2457 
2458 /**
2459  * __i40e_maybe_stop_tx - 2nd level check for tx stop conditions
2460  * @tx_ring: the ring to be checked
2461  * @size:    the size buffer we want to assure is available
2462  *
2463  * Returns -EBUSY if a stop is needed, else 0
2464  **/
2465 static inline int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
2466 {
2467 	netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
2468 	/* Memory barrier before checking head and tail */
2469 	smp_mb();
2470 
2471 	/* Check again in a case another CPU has just made room available. */
2472 	if (likely(I40E_DESC_UNUSED(tx_ring) < size))
2473 		return -EBUSY;
2474 
2475 	/* A reprieve! - use start_queue because it doesn't call schedule */
2476 	netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index);
2477 	++tx_ring->tx_stats.restart_queue;
2478 	return 0;
2479 }
2480 
2481 /**
2482  * i40e_maybe_stop_tx - 1st level check for tx stop conditions
2483  * @tx_ring: the ring to be checked
2484  * @size:    the size buffer we want to assure is available
2485  *
2486  * Returns 0 if stop is not needed
2487  **/
2488 #ifdef I40E_FCOE
2489 inline int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
2490 #else
2491 static inline int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
2492 #endif
2493 {
2494 	if (likely(I40E_DESC_UNUSED(tx_ring) >= size))
2495 		return 0;
2496 	return __i40e_maybe_stop_tx(tx_ring, size);
2497 }
2498 
2499 /**
2500  * i40e_chk_linearize - Check if there are more than 8 fragments per packet
2501  * @skb:      send buffer
2502  * @tx_flags: collected send information
2503  *
2504  * Note: Our HW can't scatter-gather more than 8 fragments to build
2505  * a packet on the wire and so we need to figure out the cases where we
2506  * need to linearize the skb.
2507  **/
2508 static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags)
2509 {
2510 	struct skb_frag_struct *frag;
2511 	bool linearize = false;
2512 	unsigned int size = 0;
2513 	u16 num_frags;
2514 	u16 gso_segs;
2515 
2516 	num_frags = skb_shinfo(skb)->nr_frags;
2517 	gso_segs = skb_shinfo(skb)->gso_segs;
2518 
2519 	if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO)) {
2520 		u16 j = 0;
2521 
2522 		if (num_frags < (I40E_MAX_BUFFER_TXD))
2523 			goto linearize_chk_done;
2524 		/* try the simple math, if we have too many frags per segment */
2525 		if (DIV_ROUND_UP((num_frags + gso_segs), gso_segs) >
2526 		    I40E_MAX_BUFFER_TXD) {
2527 			linearize = true;
2528 			goto linearize_chk_done;
2529 		}
2530 		frag = &skb_shinfo(skb)->frags[0];
2531 		/* we might still have more fragments per segment */
2532 		do {
2533 			size += skb_frag_size(frag);
2534 			frag++; j++;
2535 			if ((size >= skb_shinfo(skb)->gso_size) &&
2536 			    (j < I40E_MAX_BUFFER_TXD)) {
2537 				size = (size % skb_shinfo(skb)->gso_size);
2538 				j = (size) ? 1 : 0;
2539 			}
2540 			if (j == I40E_MAX_BUFFER_TXD) {
2541 				linearize = true;
2542 				break;
2543 			}
2544 			num_frags--;
2545 		} while (num_frags);
2546 	} else {
2547 		if (num_frags >= I40E_MAX_BUFFER_TXD)
2548 			linearize = true;
2549 	}
2550 
2551 linearize_chk_done:
2552 	return linearize;
2553 }
2554 
2555 /**
2556  * i40e_tx_map - Build the Tx descriptor
2557  * @tx_ring:  ring to send buffer on
2558  * @skb:      send buffer
2559  * @first:    first buffer info buffer to use
2560  * @tx_flags: collected send information
2561  * @hdr_len:  size of the packet header
2562  * @td_cmd:   the command field in the descriptor
2563  * @td_offset: offset for checksum or crc
2564  **/
2565 #ifdef I40E_FCOE
2566 inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
2567 			struct i40e_tx_buffer *first, u32 tx_flags,
2568 			const u8 hdr_len, u32 td_cmd, u32 td_offset)
2569 #else
2570 static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
2571 			       struct i40e_tx_buffer *first, u32 tx_flags,
2572 			       const u8 hdr_len, u32 td_cmd, u32 td_offset)
2573 #endif
2574 {
2575 	unsigned int data_len = skb->data_len;
2576 	unsigned int size = skb_headlen(skb);
2577 	struct skb_frag_struct *frag;
2578 	struct i40e_tx_buffer *tx_bi;
2579 	struct i40e_tx_desc *tx_desc;
2580 	u16 i = tx_ring->next_to_use;
2581 	u32 td_tag = 0;
2582 	dma_addr_t dma;
2583 	u16 gso_segs;
2584 
2585 	if (tx_flags & I40E_TX_FLAGS_HW_VLAN) {
2586 		td_cmd |= I40E_TX_DESC_CMD_IL2TAG1;
2587 		td_tag = (tx_flags & I40E_TX_FLAGS_VLAN_MASK) >>
2588 			 I40E_TX_FLAGS_VLAN_SHIFT;
2589 	}
2590 
2591 	if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO))
2592 		gso_segs = skb_shinfo(skb)->gso_segs;
2593 	else
2594 		gso_segs = 1;
2595 
2596 	/* multiply data chunks by size of headers */
2597 	first->bytecount = skb->len - hdr_len + (gso_segs * hdr_len);
2598 	first->gso_segs = gso_segs;
2599 	first->skb = skb;
2600 	first->tx_flags = tx_flags;
2601 
2602 	dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
2603 
2604 	tx_desc = I40E_TX_DESC(tx_ring, i);
2605 	tx_bi = first;
2606 
2607 	for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
2608 		if (dma_mapping_error(tx_ring->dev, dma))
2609 			goto dma_error;
2610 
2611 		/* record length, and DMA address */
2612 		dma_unmap_len_set(tx_bi, len, size);
2613 		dma_unmap_addr_set(tx_bi, dma, dma);
2614 
2615 		tx_desc->buffer_addr = cpu_to_le64(dma);
2616 
2617 		while (unlikely(size > I40E_MAX_DATA_PER_TXD)) {
2618 			tx_desc->cmd_type_offset_bsz =
2619 				build_ctob(td_cmd, td_offset,
2620 					   I40E_MAX_DATA_PER_TXD, td_tag);
2621 
2622 			tx_desc++;
2623 			i++;
2624 			if (i == tx_ring->count) {
2625 				tx_desc = I40E_TX_DESC(tx_ring, 0);
2626 				i = 0;
2627 			}
2628 
2629 			dma += I40E_MAX_DATA_PER_TXD;
2630 			size -= I40E_MAX_DATA_PER_TXD;
2631 
2632 			tx_desc->buffer_addr = cpu_to_le64(dma);
2633 		}
2634 
2635 		if (likely(!data_len))
2636 			break;
2637 
2638 		tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset,
2639 							  size, td_tag);
2640 
2641 		tx_desc++;
2642 		i++;
2643 		if (i == tx_ring->count) {
2644 			tx_desc = I40E_TX_DESC(tx_ring, 0);
2645 			i = 0;
2646 		}
2647 
2648 		size = skb_frag_size(frag);
2649 		data_len -= size;
2650 
2651 		dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size,
2652 				       DMA_TO_DEVICE);
2653 
2654 		tx_bi = &tx_ring->tx_bi[i];
2655 	}
2656 
2657 	/* Place RS bit on last descriptor of any packet that spans across the
2658 	 * 4th descriptor (WB_STRIDE aka 0x3) in a 64B cacheline.
2659 	 */
2660 	if (((i & WB_STRIDE) != WB_STRIDE) &&
2661 	    (first <= &tx_ring->tx_bi[i]) &&
2662 	    (first >= &tx_ring->tx_bi[i & ~WB_STRIDE])) {
2663 		tx_desc->cmd_type_offset_bsz =
2664 			build_ctob(td_cmd, td_offset, size, td_tag) |
2665 			cpu_to_le64((u64)I40E_TX_DESC_CMD_EOP <<
2666 					 I40E_TXD_QW1_CMD_SHIFT);
2667 	} else {
2668 		tx_desc->cmd_type_offset_bsz =
2669 			build_ctob(td_cmd, td_offset, size, td_tag) |
2670 			cpu_to_le64((u64)I40E_TXD_CMD <<
2671 					 I40E_TXD_QW1_CMD_SHIFT);
2672 	}
2673 
2674 	netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev,
2675 						 tx_ring->queue_index),
2676 			     first->bytecount);
2677 
2678 	/* Force memory writes to complete before letting h/w
2679 	 * know there are new descriptors to fetch.  (Only
2680 	 * applicable for weak-ordered memory model archs,
2681 	 * such as IA-64).
2682 	 */
2683 	wmb();
2684 
2685 	/* set next_to_watch value indicating a packet is present */
2686 	first->next_to_watch = tx_desc;
2687 
2688 	i++;
2689 	if (i == tx_ring->count)
2690 		i = 0;
2691 
2692 	tx_ring->next_to_use = i;
2693 
2694 	i40e_maybe_stop_tx(tx_ring, DESC_NEEDED);
2695 	/* notify HW of packet */
2696 	if (!skb->xmit_more ||
2697 	    netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev,
2698 						   tx_ring->queue_index)))
2699 		writel(i, tx_ring->tail);
2700 	else
2701 		prefetchw(tx_desc + 1);
2702 
2703 	return;
2704 
2705 dma_error:
2706 	dev_info(tx_ring->dev, "TX DMA map failed\n");
2707 
2708 	/* clear dma mappings for failed tx_bi map */
2709 	for (;;) {
2710 		tx_bi = &tx_ring->tx_bi[i];
2711 		i40e_unmap_and_free_tx_resource(tx_ring, tx_bi);
2712 		if (tx_bi == first)
2713 			break;
2714 		if (i == 0)
2715 			i = tx_ring->count;
2716 		i--;
2717 	}
2718 
2719 	tx_ring->next_to_use = i;
2720 }
2721 
2722 /**
2723  * i40e_xmit_descriptor_count - calculate number of tx descriptors needed
2724  * @skb:     send buffer
2725  * @tx_ring: ring to send buffer on
2726  *
2727  * Returns number of data descriptors needed for this skb. Returns 0 to indicate
2728  * there is not enough descriptors available in this ring since we need at least
2729  * one descriptor.
2730  **/
2731 #ifdef I40E_FCOE
2732 inline int i40e_xmit_descriptor_count(struct sk_buff *skb,
2733 				      struct i40e_ring *tx_ring)
2734 #else
2735 static inline int i40e_xmit_descriptor_count(struct sk_buff *skb,
2736 					     struct i40e_ring *tx_ring)
2737 #endif
2738 {
2739 	unsigned int f;
2740 	int count = 0;
2741 
2742 	/* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD,
2743 	 *       + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD,
2744 	 *       + 4 desc gap to avoid the cache line where head is,
2745 	 *       + 1 desc for context descriptor,
2746 	 * otherwise try next time
2747 	 */
2748 	for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
2749 		count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size);
2750 
2751 	count += TXD_USE_COUNT(skb_headlen(skb));
2752 	if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) {
2753 		tx_ring->tx_stats.tx_busy++;
2754 		return 0;
2755 	}
2756 	return count;
2757 }
2758 
2759 /**
2760  * i40e_xmit_frame_ring - Sends buffer on Tx ring
2761  * @skb:     send buffer
2762  * @tx_ring: ring to send buffer on
2763  *
2764  * Returns NETDEV_TX_OK if sent, else an error code
2765  **/
2766 static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
2767 					struct i40e_ring *tx_ring)
2768 {
2769 	u64 cd_type_cmd_tso_mss = I40E_TX_DESC_DTYPE_CONTEXT;
2770 	u32 cd_tunneling = 0, cd_l2tag2 = 0;
2771 	struct i40e_tx_buffer *first;
2772 	u32 td_offset = 0;
2773 	u32 tx_flags = 0;
2774 	__be16 protocol;
2775 	u32 td_cmd = 0;
2776 	u8 hdr_len = 0;
2777 	int tsyn;
2778 	int tso;
2779 	if (0 == i40e_xmit_descriptor_count(skb, tx_ring))
2780 		return NETDEV_TX_BUSY;
2781 
2782 	/* prepare the xmit flags */
2783 	if (i40e_tx_prepare_vlan_flags(skb, tx_ring, &tx_flags))
2784 		goto out_drop;
2785 
2786 	/* obtain protocol of skb */
2787 	protocol = vlan_get_protocol(skb);
2788 
2789 	/* record the location of the first descriptor for this packet */
2790 	first = &tx_ring->tx_bi[tx_ring->next_to_use];
2791 
2792 	/* setup IPv4/IPv6 offloads */
2793 	if (protocol == htons(ETH_P_IP))
2794 		tx_flags |= I40E_TX_FLAGS_IPV4;
2795 	else if (protocol == htons(ETH_P_IPV6))
2796 		tx_flags |= I40E_TX_FLAGS_IPV6;
2797 
2798 	tso = i40e_tso(tx_ring, skb, &hdr_len,
2799 		       &cd_type_cmd_tso_mss, &cd_tunneling);
2800 
2801 	if (tso < 0)
2802 		goto out_drop;
2803 	else if (tso)
2804 		tx_flags |= I40E_TX_FLAGS_TSO;
2805 
2806 	tsyn = i40e_tsyn(tx_ring, skb, tx_flags, &cd_type_cmd_tso_mss);
2807 
2808 	if (tsyn)
2809 		tx_flags |= I40E_TX_FLAGS_TSYN;
2810 
2811 	if (i40e_chk_linearize(skb, tx_flags))
2812 		if (skb_linearize(skb))
2813 			goto out_drop;
2814 
2815 	skb_tx_timestamp(skb);
2816 
2817 	/* always enable CRC insertion offload */
2818 	td_cmd |= I40E_TX_DESC_CMD_ICRC;
2819 
2820 	/* Always offload the checksum, since it's in the data descriptor */
2821 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
2822 		tx_flags |= I40E_TX_FLAGS_CSUM;
2823 
2824 		i40e_tx_enable_csum(skb, &tx_flags, &td_cmd, &td_offset,
2825 				    tx_ring, &cd_tunneling);
2826 	}
2827 
2828 	i40e_create_tx_ctx(tx_ring, cd_type_cmd_tso_mss,
2829 			   cd_tunneling, cd_l2tag2);
2830 
2831 	/* Add Flow Director ATR if it's enabled.
2832 	 *
2833 	 * NOTE: this must always be directly before the data descriptor.
2834 	 */
2835 	i40e_atr(tx_ring, skb, tx_flags, protocol);
2836 
2837 	i40e_tx_map(tx_ring, skb, first, tx_flags, hdr_len,
2838 		    td_cmd, td_offset);
2839 
2840 	return NETDEV_TX_OK;
2841 
2842 out_drop:
2843 	dev_kfree_skb_any(skb);
2844 	return NETDEV_TX_OK;
2845 }
2846 
2847 /**
2848  * i40e_lan_xmit_frame - Selects the correct VSI and Tx queue to send buffer
2849  * @skb:    send buffer
2850  * @netdev: network interface device structure
2851  *
2852  * Returns NETDEV_TX_OK if sent, else an error code
2853  **/
2854 netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
2855 {
2856 	struct i40e_netdev_priv *np = netdev_priv(netdev);
2857 	struct i40e_vsi *vsi = np->vsi;
2858 	struct i40e_ring *tx_ring = vsi->tx_rings[skb->queue_mapping];
2859 
2860 	/* hardware can't handle really short frames, hardware padding works
2861 	 * beyond this point
2862 	 */
2863 	if (skb_put_padto(skb, I40E_MIN_TX_LEN))
2864 		return NETDEV_TX_OK;
2865 
2866 	return i40e_xmit_frame_ring(skb, tx_ring);
2867 }
2868