1 /*******************************************************************************
2  *
3  * Intel Ethernet Controller XL710 Family Linux Driver
4  * Copyright(c) 2013 - 2014 Intel Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License along
16  * with this program.  If not, see <http://www.gnu.org/licenses/>.
17  *
18  * The full GNU General Public License is included in this distribution in
19  * the file called "COPYING".
20  *
21  * Contact Information:
22  * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
23  * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
24  *
25  ******************************************************************************/
26 
27 #include <linux/prefetch.h>
28 #include "i40e.h"
29 #include "i40e_prototype.h"
30 
31 static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size,
32 				u32 td_tag)
33 {
34 	return cpu_to_le64(I40E_TX_DESC_DTYPE_DATA |
35 			   ((u64)td_cmd  << I40E_TXD_QW1_CMD_SHIFT) |
36 			   ((u64)td_offset << I40E_TXD_QW1_OFFSET_SHIFT) |
37 			   ((u64)size  << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) |
38 			   ((u64)td_tag  << I40E_TXD_QW1_L2TAG1_SHIFT));
39 }
40 
41 #define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
42 #define I40E_FD_CLEAN_DELAY 10
43 /**
44  * i40e_program_fdir_filter - Program a Flow Director filter
45  * @fdir_data: Packet data that will be filter parameters
46  * @raw_packet: the pre-allocated packet buffer for FDir
47  * @pf: The pf pointer
48  * @add: True for add/update, False for remove
49  **/
50 int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, u8 *raw_packet,
51 			     struct i40e_pf *pf, bool add)
52 {
53 	struct i40e_filter_program_desc *fdir_desc;
54 	struct i40e_tx_buffer *tx_buf, *first;
55 	struct i40e_tx_desc *tx_desc;
56 	struct i40e_ring *tx_ring;
57 	unsigned int fpt, dcc;
58 	struct i40e_vsi *vsi;
59 	struct device *dev;
60 	dma_addr_t dma;
61 	u32 td_cmd = 0;
62 	u16 delay = 0;
63 	u16 i;
64 
65 	/* find existing FDIR VSI */
66 	vsi = NULL;
67 	for (i = 0; i < pf->num_alloc_vsi; i++)
68 		if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR)
69 			vsi = pf->vsi[i];
70 	if (!vsi)
71 		return -ENOENT;
72 
73 	tx_ring = vsi->tx_rings[0];
74 	dev = tx_ring->dev;
75 
76 	/* we need two descriptors to add/del a filter and we can wait */
77 	do {
78 		if (I40E_DESC_UNUSED(tx_ring) > 1)
79 			break;
80 		msleep_interruptible(1);
81 		delay++;
82 	} while (delay < I40E_FD_CLEAN_DELAY);
83 
84 	if (!(I40E_DESC_UNUSED(tx_ring) > 1))
85 		return -EAGAIN;
86 
87 	dma = dma_map_single(dev, raw_packet,
88 			     I40E_FDIR_MAX_RAW_PACKET_SIZE, DMA_TO_DEVICE);
89 	if (dma_mapping_error(dev, dma))
90 		goto dma_fail;
91 
92 	/* grab the next descriptor */
93 	i = tx_ring->next_to_use;
94 	fdir_desc = I40E_TX_FDIRDESC(tx_ring, i);
95 	first = &tx_ring->tx_bi[i];
96 	memset(first, 0, sizeof(struct i40e_tx_buffer));
97 
98 	tx_ring->next_to_use = ((i + 1) < tx_ring->count) ? i + 1 : 0;
99 
100 	fpt = (fdir_data->q_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) &
101 	      I40E_TXD_FLTR_QW0_QINDEX_MASK;
102 
103 	fpt |= (fdir_data->flex_off << I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT) &
104 	       I40E_TXD_FLTR_QW0_FLEXOFF_MASK;
105 
106 	fpt |= (fdir_data->pctype << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) &
107 	       I40E_TXD_FLTR_QW0_PCTYPE_MASK;
108 
109 	/* Use LAN VSI Id if not programmed by user */
110 	if (fdir_data->dest_vsi == 0)
111 		fpt |= (pf->vsi[pf->lan_vsi]->id) <<
112 		       I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT;
113 	else
114 		fpt |= ((u32)fdir_data->dest_vsi <<
115 			I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT) &
116 		       I40E_TXD_FLTR_QW0_DEST_VSI_MASK;
117 
118 	dcc = I40E_TX_DESC_DTYPE_FILTER_PROG;
119 
120 	if (add)
121 		dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE <<
122 		       I40E_TXD_FLTR_QW1_PCMD_SHIFT;
123 	else
124 		dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE <<
125 		       I40E_TXD_FLTR_QW1_PCMD_SHIFT;
126 
127 	dcc |= (fdir_data->dest_ctl << I40E_TXD_FLTR_QW1_DEST_SHIFT) &
128 	       I40E_TXD_FLTR_QW1_DEST_MASK;
129 
130 	dcc |= (fdir_data->fd_status << I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT) &
131 	       I40E_TXD_FLTR_QW1_FD_STATUS_MASK;
132 
133 	if (fdir_data->cnt_index != 0) {
134 		dcc |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK;
135 		dcc |= ((u32)fdir_data->cnt_index <<
136 			I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
137 			I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
138 	}
139 
140 	fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(fpt);
141 	fdir_desc->rsvd = cpu_to_le32(0);
142 	fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dcc);
143 	fdir_desc->fd_id = cpu_to_le32(fdir_data->fd_id);
144 
145 	/* Now program a dummy descriptor */
146 	i = tx_ring->next_to_use;
147 	tx_desc = I40E_TX_DESC(tx_ring, i);
148 	tx_buf = &tx_ring->tx_bi[i];
149 
150 	tx_ring->next_to_use = ((i + 1) < tx_ring->count) ? i + 1 : 0;
151 
152 	memset(tx_buf, 0, sizeof(struct i40e_tx_buffer));
153 
154 	/* record length, and DMA address */
155 	dma_unmap_len_set(tx_buf, len, I40E_FDIR_MAX_RAW_PACKET_SIZE);
156 	dma_unmap_addr_set(tx_buf, dma, dma);
157 
158 	tx_desc->buffer_addr = cpu_to_le64(dma);
159 	td_cmd = I40E_TXD_CMD | I40E_TX_DESC_CMD_DUMMY;
160 
161 	tx_buf->tx_flags = I40E_TX_FLAGS_FD_SB;
162 	tx_buf->raw_buf = (void *)raw_packet;
163 
164 	tx_desc->cmd_type_offset_bsz =
165 		build_ctob(td_cmd, 0, I40E_FDIR_MAX_RAW_PACKET_SIZE, 0);
166 
167 	/* set the timestamp */
168 	tx_buf->time_stamp = jiffies;
169 
170 	/* Force memory writes to complete before letting h/w
171 	 * know there are new descriptors to fetch.
172 	 */
173 	wmb();
174 
175 	/* Mark the data descriptor to be watched */
176 	first->next_to_watch = tx_desc;
177 
178 	writel(tx_ring->next_to_use, tx_ring->tail);
179 	return 0;
180 
181 dma_fail:
182 	return -1;
183 }
184 
185 #define IP_HEADER_OFFSET 14
186 #define I40E_UDPIP_DUMMY_PACKET_LEN 42
187 /**
188  * i40e_add_del_fdir_udpv4 - Add/Remove UDPv4 filters
189  * @vsi: pointer to the targeted VSI
190  * @fd_data: the flow director data required for the FDir descriptor
191  * @add: true adds a filter, false removes it
192  *
193  * Returns 0 if the filters were successfully added or removed
194  **/
195 static int i40e_add_del_fdir_udpv4(struct i40e_vsi *vsi,
196 				   struct i40e_fdir_filter *fd_data,
197 				   bool add)
198 {
199 	struct i40e_pf *pf = vsi->back;
200 	struct udphdr *udp;
201 	struct iphdr *ip;
202 	bool err = false;
203 	u8 *raw_packet;
204 	int ret;
205 	static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
206 		0x45, 0, 0, 0x1c, 0, 0, 0x40, 0, 0x40, 0x11, 0, 0, 0, 0, 0, 0,
207 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
208 
209 	raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
210 	if (!raw_packet)
211 		return -ENOMEM;
212 	memcpy(raw_packet, packet, I40E_UDPIP_DUMMY_PACKET_LEN);
213 
214 	ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
215 	udp = (struct udphdr *)(raw_packet + IP_HEADER_OFFSET
216 	      + sizeof(struct iphdr));
217 
218 	ip->daddr = fd_data->dst_ip[0];
219 	udp->dest = fd_data->dst_port;
220 	ip->saddr = fd_data->src_ip[0];
221 	udp->source = fd_data->src_port;
222 
223 	fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_UDP;
224 	ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
225 	if (ret) {
226 		dev_info(&pf->pdev->dev,
227 			 "Filter command send failed for PCTYPE %d (ret = %d)\n",
228 			 fd_data->pctype, ret);
229 		err = true;
230 	} else {
231 		dev_info(&pf->pdev->dev,
232 			 "Filter OK for PCTYPE %d (ret = %d)\n",
233 			 fd_data->pctype, ret);
234 	}
235 
236 	return err ? -EOPNOTSUPP : 0;
237 }
238 
239 #define I40E_TCPIP_DUMMY_PACKET_LEN 54
240 /**
241  * i40e_add_del_fdir_tcpv4 - Add/Remove TCPv4 filters
242  * @vsi: pointer to the targeted VSI
243  * @fd_data: the flow director data required for the FDir descriptor
244  * @add: true adds a filter, false removes it
245  *
246  * Returns 0 if the filters were successfully added or removed
247  **/
248 static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi,
249 				   struct i40e_fdir_filter *fd_data,
250 				   bool add)
251 {
252 	struct i40e_pf *pf = vsi->back;
253 	struct tcphdr *tcp;
254 	struct iphdr *ip;
255 	bool err = false;
256 	u8 *raw_packet;
257 	int ret;
258 	/* Dummy packet */
259 	static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
260 		0x45, 0, 0, 0x28, 0, 0, 0x40, 0, 0x40, 0x6, 0, 0, 0, 0, 0, 0,
261 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x80, 0x11,
262 		0x0, 0x72, 0, 0, 0, 0};
263 
264 	raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
265 	if (!raw_packet)
266 		return -ENOMEM;
267 	memcpy(raw_packet, packet, I40E_TCPIP_DUMMY_PACKET_LEN);
268 
269 	ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
270 	tcp = (struct tcphdr *)(raw_packet + IP_HEADER_OFFSET
271 	      + sizeof(struct iphdr));
272 
273 	ip->daddr = fd_data->dst_ip[0];
274 	tcp->dest = fd_data->dst_port;
275 	ip->saddr = fd_data->src_ip[0];
276 	tcp->source = fd_data->src_port;
277 
278 	if (add) {
279 		if (pf->flags & I40E_FLAG_FD_ATR_ENABLED) {
280 			dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n");
281 			pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED;
282 		}
283 	}
284 
285 	fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP;
286 	ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
287 
288 	if (ret) {
289 		dev_info(&pf->pdev->dev,
290 			 "Filter command send failed for PCTYPE %d (ret = %d)\n",
291 			 fd_data->pctype, ret);
292 		err = true;
293 	} else {
294 		dev_info(&pf->pdev->dev, "Filter OK for PCTYPE %d (ret = %d)\n",
295 			 fd_data->pctype, ret);
296 	}
297 
298 	return err ? -EOPNOTSUPP : 0;
299 }
300 
301 /**
302  * i40e_add_del_fdir_sctpv4 - Add/Remove SCTPv4 Flow Director filters for
303  * a specific flow spec
304  * @vsi: pointer to the targeted VSI
305  * @fd_data: the flow director data required for the FDir descriptor
306  * @add: true adds a filter, false removes it
307  *
308  * Always returns -EOPNOTSUPP
309  **/
310 static int i40e_add_del_fdir_sctpv4(struct i40e_vsi *vsi,
311 				    struct i40e_fdir_filter *fd_data,
312 				    bool add)
313 {
314 	return -EOPNOTSUPP;
315 }
316 
317 #define I40E_IP_DUMMY_PACKET_LEN 34
318 /**
319  * i40e_add_del_fdir_ipv4 - Add/Remove IPv4 Flow Director filters for
320  * a specific flow spec
321  * @vsi: pointer to the targeted VSI
322  * @fd_data: the flow director data required for the FDir descriptor
323  * @add: true adds a filter, false removes it
324  *
325  * Returns 0 if the filters were successfully added or removed
326  **/
327 static int i40e_add_del_fdir_ipv4(struct i40e_vsi *vsi,
328 				  struct i40e_fdir_filter *fd_data,
329 				  bool add)
330 {
331 	struct i40e_pf *pf = vsi->back;
332 	struct iphdr *ip;
333 	bool err = false;
334 	u8 *raw_packet;
335 	int ret;
336 	int i;
337 	static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
338 		0x45, 0, 0, 0x14, 0, 0, 0x40, 0, 0x40, 0x10, 0, 0, 0, 0, 0, 0,
339 		0, 0, 0, 0};
340 
341 	for (i = I40E_FILTER_PCTYPE_NONF_IPV4_OTHER;
342 	     i <= I40E_FILTER_PCTYPE_FRAG_IPV4;	i++) {
343 		raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
344 		if (!raw_packet)
345 			return -ENOMEM;
346 		memcpy(raw_packet, packet, I40E_IP_DUMMY_PACKET_LEN);
347 		ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
348 
349 		ip->saddr = fd_data->src_ip[0];
350 		ip->daddr = fd_data->dst_ip[0];
351 		ip->protocol = 0;
352 
353 		fd_data->pctype = i;
354 		ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
355 
356 		if (ret) {
357 			dev_info(&pf->pdev->dev,
358 				 "Filter command send failed for PCTYPE %d (ret = %d)\n",
359 				 fd_data->pctype, ret);
360 			err = true;
361 		} else {
362 			dev_info(&pf->pdev->dev,
363 				 "Filter OK for PCTYPE %d (ret = %d)\n",
364 				 fd_data->pctype, ret);
365 		}
366 	}
367 
368 	return err ? -EOPNOTSUPP : 0;
369 }
370 
371 /**
372  * i40e_add_del_fdir - Build raw packets to add/del fdir filter
373  * @vsi: pointer to the targeted VSI
374  * @cmd: command to get or set RX flow classification rules
375  * @add: true adds a filter, false removes it
376  *
377  **/
378 int i40e_add_del_fdir(struct i40e_vsi *vsi,
379 		      struct i40e_fdir_filter *input, bool add)
380 {
381 	struct i40e_pf *pf = vsi->back;
382 	int ret;
383 
384 	switch (input->flow_type & ~FLOW_EXT) {
385 	case TCP_V4_FLOW:
386 		ret = i40e_add_del_fdir_tcpv4(vsi, input, add);
387 		break;
388 	case UDP_V4_FLOW:
389 		ret = i40e_add_del_fdir_udpv4(vsi, input, add);
390 		break;
391 	case SCTP_V4_FLOW:
392 		ret = i40e_add_del_fdir_sctpv4(vsi, input, add);
393 		break;
394 	case IPV4_FLOW:
395 		ret = i40e_add_del_fdir_ipv4(vsi, input, add);
396 		break;
397 	case IP_USER_FLOW:
398 		switch (input->ip4_proto) {
399 		case IPPROTO_TCP:
400 			ret = i40e_add_del_fdir_tcpv4(vsi, input, add);
401 			break;
402 		case IPPROTO_UDP:
403 			ret = i40e_add_del_fdir_udpv4(vsi, input, add);
404 			break;
405 		case IPPROTO_SCTP:
406 			ret = i40e_add_del_fdir_sctpv4(vsi, input, add);
407 			break;
408 		default:
409 			ret = i40e_add_del_fdir_ipv4(vsi, input, add);
410 			break;
411 		}
412 		break;
413 	default:
414 		dev_info(&pf->pdev->dev, "Could not specify spec type %d\n",
415 			 input->flow_type);
416 		ret = -EINVAL;
417 	}
418 
419 	/* The buffer allocated here is freed by the i40e_clean_tx_ring() */
420 	return ret;
421 }
422 
423 /**
424  * i40e_fd_handle_status - check the Programming Status for FD
425  * @rx_ring: the Rx ring for this descriptor
426  * @rx_desc: the Rx descriptor for programming Status, not a packet descriptor.
427  * @prog_id: the id originally used for programming
428  *
429  * This is used to verify if the FD programming or invalidation
430  * requested by SW to the HW is successful or not and take actions accordingly.
431  **/
432 static void i40e_fd_handle_status(struct i40e_ring *rx_ring,
433 				  union i40e_rx_desc *rx_desc, u8 prog_id)
434 {
435 	struct i40e_pf *pf = rx_ring->vsi->back;
436 	struct pci_dev *pdev = pf->pdev;
437 	u32 fcnt_prog, fcnt_avail;
438 	u32 error;
439 	u64 qw;
440 
441 	qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
442 	error = (qw & I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK) >>
443 		I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT;
444 
445 	if (error == (0x1 << I40E_RX_PROG_STATUS_DESC_FD_TBL_FULL_SHIFT)) {
446 		dev_warn(&pdev->dev, "ntuple filter loc = %d, could not be added\n",
447 			 rx_desc->wb.qword0.hi_dword.fd_id);
448 
449 		/* filter programming failed most likely due to table full */
450 		fcnt_prog = i40e_get_cur_guaranteed_fd_count(pf);
451 		fcnt_avail = pf->fdir_pf_filter_count;
452 		/* If ATR is running fcnt_prog can quickly change,
453 		 * if we are very close to full, it makes sense to disable
454 		 * FD ATR/SB and then re-enable it when there is room.
455 		 */
456 		if (fcnt_prog >= (fcnt_avail - I40E_FDIR_BUFFER_FULL_MARGIN)) {
457 			/* Turn off ATR first */
458 			if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) &&
459 			    !(pf->auto_disable_flags &
460 			      I40E_FLAG_FD_ATR_ENABLED)) {
461 				dev_warn(&pdev->dev, "FD filter space full, ATR for further flows will be turned off\n");
462 				pf->auto_disable_flags |=
463 						       I40E_FLAG_FD_ATR_ENABLED;
464 				pf->flags |= I40E_FLAG_FDIR_REQUIRES_REINIT;
465 			} else if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) &&
466 				   !(pf->auto_disable_flags &
467 				     I40E_FLAG_FD_SB_ENABLED)) {
468 				dev_warn(&pdev->dev, "FD filter space full, new ntuple rules will not be added\n");
469 				pf->auto_disable_flags |=
470 							I40E_FLAG_FD_SB_ENABLED;
471 				pf->flags |= I40E_FLAG_FDIR_REQUIRES_REINIT;
472 			}
473 		} else {
474 			dev_info(&pdev->dev, "FD filter programming error\n");
475 		}
476 	} else if (error ==
477 			  (0x1 << I40E_RX_PROG_STATUS_DESC_NO_FD_ENTRY_SHIFT)) {
478 		if (I40E_DEBUG_FD & pf->hw.debug_mask)
479 			dev_info(&pdev->dev, "ntuple filter loc = %d, could not be removed\n",
480 				 rx_desc->wb.qword0.hi_dword.fd_id);
481 	}
482 }
483 
484 /**
485  * i40e_unmap_and_free_tx_resource - Release a Tx buffer
486  * @ring:      the ring that owns the buffer
487  * @tx_buffer: the buffer to free
488  **/
489 static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring,
490 					    struct i40e_tx_buffer *tx_buffer)
491 {
492 	if (tx_buffer->skb) {
493 		if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB)
494 			kfree(tx_buffer->raw_buf);
495 		else
496 			dev_kfree_skb_any(tx_buffer->skb);
497 
498 		if (dma_unmap_len(tx_buffer, len))
499 			dma_unmap_single(ring->dev,
500 					 dma_unmap_addr(tx_buffer, dma),
501 					 dma_unmap_len(tx_buffer, len),
502 					 DMA_TO_DEVICE);
503 	} else if (dma_unmap_len(tx_buffer, len)) {
504 		dma_unmap_page(ring->dev,
505 			       dma_unmap_addr(tx_buffer, dma),
506 			       dma_unmap_len(tx_buffer, len),
507 			       DMA_TO_DEVICE);
508 	}
509 	tx_buffer->next_to_watch = NULL;
510 	tx_buffer->skb = NULL;
511 	dma_unmap_len_set(tx_buffer, len, 0);
512 	/* tx_buffer must be completely set up in the transmit path */
513 }
514 
515 /**
516  * i40e_clean_tx_ring - Free any empty Tx buffers
517  * @tx_ring: ring to be cleaned
518  **/
519 void i40e_clean_tx_ring(struct i40e_ring *tx_ring)
520 {
521 	unsigned long bi_size;
522 	u16 i;
523 
524 	/* ring already cleared, nothing to do */
525 	if (!tx_ring->tx_bi)
526 		return;
527 
528 	/* Free all the Tx ring sk_buffs */
529 	for (i = 0; i < tx_ring->count; i++)
530 		i40e_unmap_and_free_tx_resource(tx_ring, &tx_ring->tx_bi[i]);
531 
532 	bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
533 	memset(tx_ring->tx_bi, 0, bi_size);
534 
535 	/* Zero out the descriptor ring */
536 	memset(tx_ring->desc, 0, tx_ring->size);
537 
538 	tx_ring->next_to_use = 0;
539 	tx_ring->next_to_clean = 0;
540 
541 	if (!tx_ring->netdev)
542 		return;
543 
544 	/* cleanup Tx queue statistics */
545 	netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev,
546 						  tx_ring->queue_index));
547 }
548 
549 /**
550  * i40e_free_tx_resources - Free Tx resources per queue
551  * @tx_ring: Tx descriptor ring for a specific queue
552  *
553  * Free all transmit software resources
554  **/
555 void i40e_free_tx_resources(struct i40e_ring *tx_ring)
556 {
557 	i40e_clean_tx_ring(tx_ring);
558 	kfree(tx_ring->tx_bi);
559 	tx_ring->tx_bi = NULL;
560 
561 	if (tx_ring->desc) {
562 		dma_free_coherent(tx_ring->dev, tx_ring->size,
563 				  tx_ring->desc, tx_ring->dma);
564 		tx_ring->desc = NULL;
565 	}
566 }
567 
568 /**
569  * i40e_get_tx_pending - how many tx descriptors not processed
570  * @tx_ring: the ring of descriptors
571  *
572  * Since there is no access to the ring head register
573  * in XL710, we need to use our local copies
574  **/
575 static u32 i40e_get_tx_pending(struct i40e_ring *ring)
576 {
577 	u32 ntu = ((ring->next_to_clean <= ring->next_to_use)
578 			? ring->next_to_use
579 			: ring->next_to_use + ring->count);
580 	return ntu - ring->next_to_clean;
581 }
582 
583 /**
584  * i40e_check_tx_hang - Is there a hang in the Tx queue
585  * @tx_ring: the ring of descriptors
586  **/
587 static bool i40e_check_tx_hang(struct i40e_ring *tx_ring)
588 {
589 	u32 tx_pending = i40e_get_tx_pending(tx_ring);
590 	bool ret = false;
591 
592 	clear_check_for_tx_hang(tx_ring);
593 
594 	/* Check for a hung queue, but be thorough. This verifies
595 	 * that a transmit has been completed since the previous
596 	 * check AND there is at least one packet pending. The
597 	 * ARMED bit is set to indicate a potential hang. The
598 	 * bit is cleared if a pause frame is received to remove
599 	 * false hang detection due to PFC or 802.3x frames. By
600 	 * requiring this to fail twice we avoid races with
601 	 * PFC clearing the ARMED bit and conditions where we
602 	 * run the check_tx_hang logic with a transmit completion
603 	 * pending but without time to complete it yet.
604 	 */
605 	if ((tx_ring->tx_stats.tx_done_old == tx_ring->stats.packets) &&
606 	    tx_pending) {
607 		/* make sure it is true for two checks in a row */
608 		ret = test_and_set_bit(__I40E_HANG_CHECK_ARMED,
609 				       &tx_ring->state);
610 	} else {
611 		/* update completed stats and disarm the hang check */
612 		tx_ring->tx_stats.tx_done_old = tx_ring->stats.packets;
613 		clear_bit(__I40E_HANG_CHECK_ARMED, &tx_ring->state);
614 	}
615 
616 	return ret;
617 }
618 
619 /**
620  * i40e_get_head - Retrieve head from head writeback
621  * @tx_ring:  tx ring to fetch head of
622  *
623  * Returns value of Tx ring head based on value stored
624  * in head write-back location
625  **/
626 static inline u32 i40e_get_head(struct i40e_ring *tx_ring)
627 {
628 	void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count;
629 
630 	return le32_to_cpu(*(volatile __le32 *)head);
631 }
632 
633 /**
634  * i40e_clean_tx_irq - Reclaim resources after transmit completes
635  * @tx_ring:  tx ring to clean
636  * @budget:   how many cleans we're allowed
637  *
638  * Returns true if there's any budget left (e.g. the clean is finished)
639  **/
640 static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
641 {
642 	u16 i = tx_ring->next_to_clean;
643 	struct i40e_tx_buffer *tx_buf;
644 	struct i40e_tx_desc *tx_head;
645 	struct i40e_tx_desc *tx_desc;
646 	unsigned int total_packets = 0;
647 	unsigned int total_bytes = 0;
648 
649 	tx_buf = &tx_ring->tx_bi[i];
650 	tx_desc = I40E_TX_DESC(tx_ring, i);
651 	i -= tx_ring->count;
652 
653 	tx_head = I40E_TX_DESC(tx_ring, i40e_get_head(tx_ring));
654 
655 	do {
656 		struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch;
657 
658 		/* if next_to_watch is not set then there is no work pending */
659 		if (!eop_desc)
660 			break;
661 
662 		/* prevent any other reads prior to eop_desc */
663 		read_barrier_depends();
664 
665 		/* we have caught up to head, no work left to do */
666 		if (tx_head == tx_desc)
667 			break;
668 
669 		/* clear next_to_watch to prevent false hangs */
670 		tx_buf->next_to_watch = NULL;
671 
672 		/* update the statistics for this packet */
673 		total_bytes += tx_buf->bytecount;
674 		total_packets += tx_buf->gso_segs;
675 
676 		/* free the skb */
677 		dev_kfree_skb_any(tx_buf->skb);
678 
679 		/* unmap skb header data */
680 		dma_unmap_single(tx_ring->dev,
681 				 dma_unmap_addr(tx_buf, dma),
682 				 dma_unmap_len(tx_buf, len),
683 				 DMA_TO_DEVICE);
684 
685 		/* clear tx_buffer data */
686 		tx_buf->skb = NULL;
687 		dma_unmap_len_set(tx_buf, len, 0);
688 
689 		/* unmap remaining buffers */
690 		while (tx_desc != eop_desc) {
691 
692 			tx_buf++;
693 			tx_desc++;
694 			i++;
695 			if (unlikely(!i)) {
696 				i -= tx_ring->count;
697 				tx_buf = tx_ring->tx_bi;
698 				tx_desc = I40E_TX_DESC(tx_ring, 0);
699 			}
700 
701 			/* unmap any remaining paged data */
702 			if (dma_unmap_len(tx_buf, len)) {
703 				dma_unmap_page(tx_ring->dev,
704 					       dma_unmap_addr(tx_buf, dma),
705 					       dma_unmap_len(tx_buf, len),
706 					       DMA_TO_DEVICE);
707 				dma_unmap_len_set(tx_buf, len, 0);
708 			}
709 		}
710 
711 		/* move us one more past the eop_desc for start of next pkt */
712 		tx_buf++;
713 		tx_desc++;
714 		i++;
715 		if (unlikely(!i)) {
716 			i -= tx_ring->count;
717 			tx_buf = tx_ring->tx_bi;
718 			tx_desc = I40E_TX_DESC(tx_ring, 0);
719 		}
720 
721 		/* update budget accounting */
722 		budget--;
723 	} while (likely(budget));
724 
725 	i += tx_ring->count;
726 	tx_ring->next_to_clean = i;
727 	u64_stats_update_begin(&tx_ring->syncp);
728 	tx_ring->stats.bytes += total_bytes;
729 	tx_ring->stats.packets += total_packets;
730 	u64_stats_update_end(&tx_ring->syncp);
731 	tx_ring->q_vector->tx.total_bytes += total_bytes;
732 	tx_ring->q_vector->tx.total_packets += total_packets;
733 
734 	if (check_for_tx_hang(tx_ring) && i40e_check_tx_hang(tx_ring)) {
735 		/* schedule immediate reset if we believe we hung */
736 		dev_info(tx_ring->dev, "Detected Tx Unit Hang\n"
737 			 "  VSI                  <%d>\n"
738 			 "  Tx Queue             <%d>\n"
739 			 "  next_to_use          <%x>\n"
740 			 "  next_to_clean        <%x>\n",
741 			 tx_ring->vsi->seid,
742 			 tx_ring->queue_index,
743 			 tx_ring->next_to_use, i);
744 		dev_info(tx_ring->dev, "tx_bi[next_to_clean]\n"
745 			 "  time_stamp           <%lx>\n"
746 			 "  jiffies              <%lx>\n",
747 			 tx_ring->tx_bi[i].time_stamp, jiffies);
748 
749 		netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
750 
751 		dev_info(tx_ring->dev,
752 			 "tx hang detected on queue %d, resetting adapter\n",
753 			 tx_ring->queue_index);
754 
755 		tx_ring->netdev->netdev_ops->ndo_tx_timeout(tx_ring->netdev);
756 
757 		/* the adapter is about to reset, no point in enabling stuff */
758 		return true;
759 	}
760 
761 	netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev,
762 						      tx_ring->queue_index),
763 				  total_packets, total_bytes);
764 
765 #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
766 	if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) &&
767 		     (I40E_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) {
768 		/* Make sure that anybody stopping the queue after this
769 		 * sees the new next_to_clean.
770 		 */
771 		smp_mb();
772 		if (__netif_subqueue_stopped(tx_ring->netdev,
773 					     tx_ring->queue_index) &&
774 		   !test_bit(__I40E_DOWN, &tx_ring->vsi->state)) {
775 			netif_wake_subqueue(tx_ring->netdev,
776 					    tx_ring->queue_index);
777 			++tx_ring->tx_stats.restart_queue;
778 		}
779 	}
780 
781 	return budget > 0;
782 }
783 
784 /**
785  * i40e_set_new_dynamic_itr - Find new ITR level
786  * @rc: structure containing ring performance data
787  *
788  * Stores a new ITR value based on packets and byte counts during
789  * the last interrupt.  The advantage of per interrupt computation
790  * is faster updates and more accurate ITR for the current traffic
791  * pattern.  Constants in this function were computed based on
792  * theoretical maximum wire speed and thresholds were set based on
793  * testing data as well as attempting to minimize response time
794  * while increasing bulk throughput.
795  **/
796 static void i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
797 {
798 	enum i40e_latency_range new_latency_range = rc->latency_range;
799 	u32 new_itr = rc->itr;
800 	int bytes_per_int;
801 
802 	if (rc->total_packets == 0 || !rc->itr)
803 		return;
804 
805 	/* simple throttlerate management
806 	 *   0-10MB/s   lowest (100000 ints/s)
807 	 *  10-20MB/s   low    (20000 ints/s)
808 	 *  20-1249MB/s bulk   (8000 ints/s)
809 	 */
810 	bytes_per_int = rc->total_bytes / rc->itr;
811 	switch (rc->itr) {
812 	case I40E_LOWEST_LATENCY:
813 		if (bytes_per_int > 10)
814 			new_latency_range = I40E_LOW_LATENCY;
815 		break;
816 	case I40E_LOW_LATENCY:
817 		if (bytes_per_int > 20)
818 			new_latency_range = I40E_BULK_LATENCY;
819 		else if (bytes_per_int <= 10)
820 			new_latency_range = I40E_LOWEST_LATENCY;
821 		break;
822 	case I40E_BULK_LATENCY:
823 		if (bytes_per_int <= 20)
824 			rc->latency_range = I40E_LOW_LATENCY;
825 		break;
826 	}
827 
828 	switch (new_latency_range) {
829 	case I40E_LOWEST_LATENCY:
830 		new_itr = I40E_ITR_100K;
831 		break;
832 	case I40E_LOW_LATENCY:
833 		new_itr = I40E_ITR_20K;
834 		break;
835 	case I40E_BULK_LATENCY:
836 		new_itr = I40E_ITR_8K;
837 		break;
838 	default:
839 		break;
840 	}
841 
842 	if (new_itr != rc->itr) {
843 		/* do an exponential smoothing */
844 		new_itr = (10 * new_itr * rc->itr) /
845 			  ((9 * new_itr) + rc->itr);
846 		rc->itr = new_itr & I40E_MAX_ITR;
847 	}
848 
849 	rc->total_bytes = 0;
850 	rc->total_packets = 0;
851 }
852 
853 /**
854  * i40e_update_dynamic_itr - Adjust ITR based on bytes per int
855  * @q_vector: the vector to adjust
856  **/
857 static void i40e_update_dynamic_itr(struct i40e_q_vector *q_vector)
858 {
859 	u16 vector = q_vector->vsi->base_vector + q_vector->v_idx;
860 	struct i40e_hw *hw = &q_vector->vsi->back->hw;
861 	u32 reg_addr;
862 	u16 old_itr;
863 
864 	reg_addr = I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1);
865 	old_itr = q_vector->rx.itr;
866 	i40e_set_new_dynamic_itr(&q_vector->rx);
867 	if (old_itr != q_vector->rx.itr)
868 		wr32(hw, reg_addr, q_vector->rx.itr);
869 
870 	reg_addr = I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1);
871 	old_itr = q_vector->tx.itr;
872 	i40e_set_new_dynamic_itr(&q_vector->tx);
873 	if (old_itr != q_vector->tx.itr)
874 		wr32(hw, reg_addr, q_vector->tx.itr);
875 }
876 
877 /**
878  * i40e_clean_programming_status - clean the programming status descriptor
879  * @rx_ring: the rx ring that has this descriptor
880  * @rx_desc: the rx descriptor written back by HW
881  *
882  * Flow director should handle FD_FILTER_STATUS to check its filter programming
883  * status being successful or not and take actions accordingly. FCoE should
884  * handle its context/filter programming/invalidation status and take actions.
885  *
886  **/
887 static void i40e_clean_programming_status(struct i40e_ring *rx_ring,
888 					  union i40e_rx_desc *rx_desc)
889 {
890 	u64 qw;
891 	u8 id;
892 
893 	qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
894 	id = (qw & I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK) >>
895 		  I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT;
896 
897 	if (id == I40E_RX_PROG_STATUS_DESC_FD_FILTER_STATUS)
898 		i40e_fd_handle_status(rx_ring, rx_desc, id);
899 #ifdef I40E_FCOE
900 	else if ((id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_PROG_STATUS) ||
901 		 (id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_INVL_STATUS))
902 		i40e_fcoe_handle_status(rx_ring, rx_desc, id);
903 #endif
904 }
905 
906 /**
907  * i40e_setup_tx_descriptors - Allocate the Tx descriptors
908  * @tx_ring: the tx ring to set up
909  *
910  * Return 0 on success, negative on error
911  **/
912 int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring)
913 {
914 	struct device *dev = tx_ring->dev;
915 	int bi_size;
916 
917 	if (!dev)
918 		return -ENOMEM;
919 
920 	bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
921 	tx_ring->tx_bi = kzalloc(bi_size, GFP_KERNEL);
922 	if (!tx_ring->tx_bi)
923 		goto err;
924 
925 	/* round up to nearest 4K */
926 	tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc);
927 	/* add u32 for head writeback, align after this takes care of
928 	 * guaranteeing this is at least one cache line in size
929 	 */
930 	tx_ring->size += sizeof(u32);
931 	tx_ring->size = ALIGN(tx_ring->size, 4096);
932 	tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
933 					   &tx_ring->dma, GFP_KERNEL);
934 	if (!tx_ring->desc) {
935 		dev_info(dev, "Unable to allocate memory for the Tx descriptor ring, size=%d\n",
936 			 tx_ring->size);
937 		goto err;
938 	}
939 
940 	tx_ring->next_to_use = 0;
941 	tx_ring->next_to_clean = 0;
942 	return 0;
943 
944 err:
945 	kfree(tx_ring->tx_bi);
946 	tx_ring->tx_bi = NULL;
947 	return -ENOMEM;
948 }
949 
950 /**
951  * i40e_clean_rx_ring - Free Rx buffers
952  * @rx_ring: ring to be cleaned
953  **/
954 void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
955 {
956 	struct device *dev = rx_ring->dev;
957 	struct i40e_rx_buffer *rx_bi;
958 	unsigned long bi_size;
959 	u16 i;
960 
961 	/* ring already cleared, nothing to do */
962 	if (!rx_ring->rx_bi)
963 		return;
964 
965 	/* Free all the Rx ring sk_buffs */
966 	for (i = 0; i < rx_ring->count; i++) {
967 		rx_bi = &rx_ring->rx_bi[i];
968 		if (rx_bi->dma) {
969 			dma_unmap_single(dev,
970 					 rx_bi->dma,
971 					 rx_ring->rx_buf_len,
972 					 DMA_FROM_DEVICE);
973 			rx_bi->dma = 0;
974 		}
975 		if (rx_bi->skb) {
976 			dev_kfree_skb(rx_bi->skb);
977 			rx_bi->skb = NULL;
978 		}
979 		if (rx_bi->page) {
980 			if (rx_bi->page_dma) {
981 				dma_unmap_page(dev,
982 					       rx_bi->page_dma,
983 					       PAGE_SIZE / 2,
984 					       DMA_FROM_DEVICE);
985 				rx_bi->page_dma = 0;
986 			}
987 			__free_page(rx_bi->page);
988 			rx_bi->page = NULL;
989 			rx_bi->page_offset = 0;
990 		}
991 	}
992 
993 	bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
994 	memset(rx_ring->rx_bi, 0, bi_size);
995 
996 	/* Zero out the descriptor ring */
997 	memset(rx_ring->desc, 0, rx_ring->size);
998 
999 	rx_ring->next_to_clean = 0;
1000 	rx_ring->next_to_use = 0;
1001 }
1002 
1003 /**
1004  * i40e_free_rx_resources - Free Rx resources
1005  * @rx_ring: ring to clean the resources from
1006  *
1007  * Free all receive software resources
1008  **/
1009 void i40e_free_rx_resources(struct i40e_ring *rx_ring)
1010 {
1011 	i40e_clean_rx_ring(rx_ring);
1012 	kfree(rx_ring->rx_bi);
1013 	rx_ring->rx_bi = NULL;
1014 
1015 	if (rx_ring->desc) {
1016 		dma_free_coherent(rx_ring->dev, rx_ring->size,
1017 				  rx_ring->desc, rx_ring->dma);
1018 		rx_ring->desc = NULL;
1019 	}
1020 }
1021 
1022 /**
1023  * i40e_setup_rx_descriptors - Allocate Rx descriptors
1024  * @rx_ring: Rx descriptor ring (for a specific queue) to setup
1025  *
1026  * Returns 0 on success, negative on failure
1027  **/
1028 int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
1029 {
1030 	struct device *dev = rx_ring->dev;
1031 	int bi_size;
1032 
1033 	bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
1034 	rx_ring->rx_bi = kzalloc(bi_size, GFP_KERNEL);
1035 	if (!rx_ring->rx_bi)
1036 		goto err;
1037 
1038 	/* Round up to nearest 4K */
1039 	rx_ring->size = ring_is_16byte_desc_enabled(rx_ring)
1040 		? rx_ring->count * sizeof(union i40e_16byte_rx_desc)
1041 		: rx_ring->count * sizeof(union i40e_32byte_rx_desc);
1042 	rx_ring->size = ALIGN(rx_ring->size, 4096);
1043 	rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
1044 					   &rx_ring->dma, GFP_KERNEL);
1045 
1046 	if (!rx_ring->desc) {
1047 		dev_info(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n",
1048 			 rx_ring->size);
1049 		goto err;
1050 	}
1051 
1052 	rx_ring->next_to_clean = 0;
1053 	rx_ring->next_to_use = 0;
1054 
1055 	return 0;
1056 err:
1057 	kfree(rx_ring->rx_bi);
1058 	rx_ring->rx_bi = NULL;
1059 	return -ENOMEM;
1060 }
1061 
1062 /**
1063  * i40e_release_rx_desc - Store the new tail and head values
1064  * @rx_ring: ring to bump
1065  * @val: new head index
1066  **/
1067 static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
1068 {
1069 	rx_ring->next_to_use = val;
1070 	/* Force memory writes to complete before letting h/w
1071 	 * know there are new descriptors to fetch.  (Only
1072 	 * applicable for weak-ordered memory model archs,
1073 	 * such as IA-64).
1074 	 */
1075 	wmb();
1076 	writel(val, rx_ring->tail);
1077 }
1078 
1079 /**
1080  * i40e_alloc_rx_buffers - Replace used receive buffers; packet split
1081  * @rx_ring: ring to place buffers on
1082  * @cleaned_count: number of buffers to replace
1083  **/
1084 void i40e_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count)
1085 {
1086 	u16 i = rx_ring->next_to_use;
1087 	union i40e_rx_desc *rx_desc;
1088 	struct i40e_rx_buffer *bi;
1089 	struct sk_buff *skb;
1090 
1091 	/* do nothing if no valid netdev defined */
1092 	if (!rx_ring->netdev || !cleaned_count)
1093 		return;
1094 
1095 	while (cleaned_count--) {
1096 		rx_desc = I40E_RX_DESC(rx_ring, i);
1097 		bi = &rx_ring->rx_bi[i];
1098 		skb = bi->skb;
1099 
1100 		if (!skb) {
1101 			skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
1102 							rx_ring->rx_buf_len);
1103 			if (!skb) {
1104 				rx_ring->rx_stats.alloc_buff_failed++;
1105 				goto no_buffers;
1106 			}
1107 			/* initialize queue mapping */
1108 			skb_record_rx_queue(skb, rx_ring->queue_index);
1109 			bi->skb = skb;
1110 		}
1111 
1112 		if (!bi->dma) {
1113 			bi->dma = dma_map_single(rx_ring->dev,
1114 						 skb->data,
1115 						 rx_ring->rx_buf_len,
1116 						 DMA_FROM_DEVICE);
1117 			if (dma_mapping_error(rx_ring->dev, bi->dma)) {
1118 				rx_ring->rx_stats.alloc_buff_failed++;
1119 				bi->dma = 0;
1120 				goto no_buffers;
1121 			}
1122 		}
1123 
1124 		if (ring_is_ps_enabled(rx_ring)) {
1125 			if (!bi->page) {
1126 				bi->page = alloc_page(GFP_ATOMIC);
1127 				if (!bi->page) {
1128 					rx_ring->rx_stats.alloc_page_failed++;
1129 					goto no_buffers;
1130 				}
1131 			}
1132 
1133 			if (!bi->page_dma) {
1134 				/* use a half page if we're re-using */
1135 				bi->page_offset ^= PAGE_SIZE / 2;
1136 				bi->page_dma = dma_map_page(rx_ring->dev,
1137 							    bi->page,
1138 							    bi->page_offset,
1139 							    PAGE_SIZE / 2,
1140 							    DMA_FROM_DEVICE);
1141 				if (dma_mapping_error(rx_ring->dev,
1142 						      bi->page_dma)) {
1143 					rx_ring->rx_stats.alloc_page_failed++;
1144 					bi->page_dma = 0;
1145 					goto no_buffers;
1146 				}
1147 			}
1148 
1149 			/* Refresh the desc even if buffer_addrs didn't change
1150 			 * because each write-back erases this info.
1151 			 */
1152 			rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
1153 			rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
1154 		} else {
1155 			rx_desc->read.pkt_addr = cpu_to_le64(bi->dma);
1156 			rx_desc->read.hdr_addr = 0;
1157 		}
1158 		i++;
1159 		if (i == rx_ring->count)
1160 			i = 0;
1161 	}
1162 
1163 no_buffers:
1164 	if (rx_ring->next_to_use != i)
1165 		i40e_release_rx_desc(rx_ring, i);
1166 }
1167 
1168 /**
1169  * i40e_receive_skb - Send a completed packet up the stack
1170  * @rx_ring:  rx ring in play
1171  * @skb: packet to send up
1172  * @vlan_tag: vlan tag for packet
1173  **/
1174 static void i40e_receive_skb(struct i40e_ring *rx_ring,
1175 			     struct sk_buff *skb, u16 vlan_tag)
1176 {
1177 	struct i40e_q_vector *q_vector = rx_ring->q_vector;
1178 	struct i40e_vsi *vsi = rx_ring->vsi;
1179 	u64 flags = vsi->back->flags;
1180 
1181 	if (vlan_tag & VLAN_VID_MASK)
1182 		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
1183 
1184 	if (flags & I40E_FLAG_IN_NETPOLL)
1185 		netif_rx(skb);
1186 	else
1187 		napi_gro_receive(&q_vector->napi, skb);
1188 }
1189 
1190 /**
1191  * i40e_rx_checksum - Indicate in skb if hw indicated a good cksum
1192  * @vsi: the VSI we care about
1193  * @skb: skb currently being received and modified
1194  * @rx_status: status value of last descriptor in packet
1195  * @rx_error: error value of last descriptor in packet
1196  * @rx_ptype: ptype value of last descriptor in packet
1197  **/
1198 static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
1199 				    struct sk_buff *skb,
1200 				    u32 rx_status,
1201 				    u32 rx_error,
1202 				    u16 rx_ptype)
1203 {
1204 	struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(rx_ptype);
1205 	bool ipv4 = false, ipv6 = false;
1206 	bool ipv4_tunnel, ipv6_tunnel;
1207 	__wsum rx_udp_csum;
1208 	struct iphdr *iph;
1209 	__sum16 csum;
1210 
1211 	ipv4_tunnel = (rx_ptype > I40E_RX_PTYPE_GRENAT4_MAC_PAY3) &&
1212 		      (rx_ptype < I40E_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4);
1213 	ipv6_tunnel = (rx_ptype > I40E_RX_PTYPE_GRENAT6_MAC_PAY3) &&
1214 		      (rx_ptype < I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4);
1215 
1216 	skb->encapsulation = ipv4_tunnel || ipv6_tunnel;
1217 	skb->ip_summed = CHECKSUM_NONE;
1218 
1219 	/* Rx csum enabled and ip headers found? */
1220 	if (!(vsi->netdev->features & NETIF_F_RXCSUM))
1221 		return;
1222 
1223 	/* did the hardware decode the packet and checksum? */
1224 	if (!(rx_status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)))
1225 		return;
1226 
1227 	/* both known and outer_ip must be set for the below code to work */
1228 	if (!(decoded.known && decoded.outer_ip))
1229 		return;
1230 
1231 	if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1232 	    decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4)
1233 		ipv4 = true;
1234 	else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1235 		 decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6)
1236 		ipv6 = true;
1237 
1238 	if (ipv4 &&
1239 	    (rx_error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) |
1240 			 (1 << I40E_RX_DESC_ERROR_EIPE_SHIFT))))
1241 		goto checksum_fail;
1242 
1243 	/* likely incorrect csum if alternate IP extension headers found */
1244 	if (ipv6 &&
1245 	    rx_status & (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT))
1246 		/* don't increment checksum err here, non-fatal err */
1247 		return;
1248 
1249 	/* there was some L4 error, count error and punt packet to the stack */
1250 	if (rx_error & (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))
1251 		goto checksum_fail;
1252 
1253 	/* handle packets that were not able to be checksummed due
1254 	 * to arrival speed, in this case the stack can compute
1255 	 * the csum.
1256 	 */
1257 	if (rx_error & (1 << I40E_RX_DESC_ERROR_PPRS_SHIFT))
1258 		return;
1259 
1260 	/* If VXLAN traffic has an outer UDPv4 checksum we need to check
1261 	 * it in the driver, hardware does not do it for us.
1262 	 * Since L3L4P bit was set we assume a valid IHL value (>=5)
1263 	 * so the total length of IPv4 header is IHL*4 bytes
1264 	 * The UDP_0 bit *may* bet set if the *inner* header is UDP
1265 	 */
1266 	if (ipv4_tunnel &&
1267 	    (decoded.inner_prot != I40E_RX_PTYPE_INNER_PROT_UDP) &&
1268 	    !(rx_status & (1 << I40E_RX_DESC_STATUS_UDP_0_SHIFT))) {
1269 		skb->transport_header = skb->mac_header +
1270 					sizeof(struct ethhdr) +
1271 					(ip_hdr(skb)->ihl * 4);
1272 
1273 		/* Add 4 bytes for VLAN tagged packets */
1274 		skb->transport_header += (skb->protocol == htons(ETH_P_8021Q) ||
1275 					  skb->protocol == htons(ETH_P_8021AD))
1276 					  ? VLAN_HLEN : 0;
1277 
1278 		rx_udp_csum = udp_csum(skb);
1279 		iph = ip_hdr(skb);
1280 		csum = csum_tcpudp_magic(
1281 				iph->saddr, iph->daddr,
1282 				(skb->len - skb_transport_offset(skb)),
1283 				IPPROTO_UDP, rx_udp_csum);
1284 
1285 		if (udp_hdr(skb)->check != csum)
1286 			goto checksum_fail;
1287 	}
1288 
1289 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1290 
1291 	return;
1292 
1293 checksum_fail:
1294 	vsi->back->hw_csum_rx_error++;
1295 }
1296 
1297 /**
1298  * i40e_rx_hash - returns the hash value from the Rx descriptor
1299  * @ring: descriptor ring
1300  * @rx_desc: specific descriptor
1301  **/
1302 static inline u32 i40e_rx_hash(struct i40e_ring *ring,
1303 			       union i40e_rx_desc *rx_desc)
1304 {
1305 	const __le64 rss_mask =
1306 		cpu_to_le64((u64)I40E_RX_DESC_FLTSTAT_RSS_HASH <<
1307 			    I40E_RX_DESC_STATUS_FLTSTAT_SHIFT);
1308 
1309 	if ((ring->netdev->features & NETIF_F_RXHASH) &&
1310 	    (rx_desc->wb.qword1.status_error_len & rss_mask) == rss_mask)
1311 		return le32_to_cpu(rx_desc->wb.qword0.hi_dword.rss);
1312 	else
1313 		return 0;
1314 }
1315 
1316 /**
1317  * i40e_ptype_to_hash - get a hash type
1318  * @ptype: the ptype value from the descriptor
1319  *
1320  * Returns a hash type to be used by skb_set_hash
1321  **/
1322 static inline enum pkt_hash_types i40e_ptype_to_hash(u8 ptype)
1323 {
1324 	struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(ptype);
1325 
1326 	if (!decoded.known)
1327 		return PKT_HASH_TYPE_NONE;
1328 
1329 	if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1330 	    decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY4)
1331 		return PKT_HASH_TYPE_L4;
1332 	else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1333 		 decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY3)
1334 		return PKT_HASH_TYPE_L3;
1335 	else
1336 		return PKT_HASH_TYPE_L2;
1337 }
1338 
1339 /**
1340  * i40e_clean_rx_irq - Reclaim resources after receive completes
1341  * @rx_ring:  rx ring to clean
1342  * @budget:   how many cleans we're allowed
1343  *
1344  * Returns true if there's any budget left (e.g. the clean is finished)
1345  **/
1346 static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
1347 {
1348 	unsigned int total_rx_bytes = 0, total_rx_packets = 0;
1349 	u16 rx_packet_len, rx_header_len, rx_sph, rx_hbo;
1350 	u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
1351 	const int current_node = numa_node_id();
1352 	struct i40e_vsi *vsi = rx_ring->vsi;
1353 	u16 i = rx_ring->next_to_clean;
1354 	union i40e_rx_desc *rx_desc;
1355 	u32 rx_error, rx_status;
1356 	u8 rx_ptype;
1357 	u64 qword;
1358 
1359 	if (budget <= 0)
1360 		return 0;
1361 
1362 	rx_desc = I40E_RX_DESC(rx_ring, i);
1363 	qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
1364 	rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
1365 		    I40E_RXD_QW1_STATUS_SHIFT;
1366 
1367 	while (rx_status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) {
1368 		union i40e_rx_desc *next_rxd;
1369 		struct i40e_rx_buffer *rx_bi;
1370 		struct sk_buff *skb;
1371 		u16 vlan_tag;
1372 		if (i40e_rx_is_programming_status(qword)) {
1373 			i40e_clean_programming_status(rx_ring, rx_desc);
1374 			I40E_RX_NEXT_DESC_PREFETCH(rx_ring, i, next_rxd);
1375 			goto next_desc;
1376 		}
1377 		rx_bi = &rx_ring->rx_bi[i];
1378 		skb = rx_bi->skb;
1379 		prefetch(skb->data);
1380 
1381 		rx_packet_len = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
1382 				I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1383 		rx_header_len = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK) >>
1384 				I40E_RXD_QW1_LENGTH_HBUF_SHIFT;
1385 		rx_sph = (qword & I40E_RXD_QW1_LENGTH_SPH_MASK) >>
1386 			 I40E_RXD_QW1_LENGTH_SPH_SHIFT;
1387 
1388 		rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >>
1389 			   I40E_RXD_QW1_ERROR_SHIFT;
1390 		rx_hbo = rx_error & (1 << I40E_RX_DESC_ERROR_HBO_SHIFT);
1391 		rx_error &= ~(1 << I40E_RX_DESC_ERROR_HBO_SHIFT);
1392 
1393 		rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
1394 			   I40E_RXD_QW1_PTYPE_SHIFT;
1395 		rx_bi->skb = NULL;
1396 
1397 		/* This memory barrier is needed to keep us from reading
1398 		 * any other fields out of the rx_desc until we know the
1399 		 * STATUS_DD bit is set
1400 		 */
1401 		rmb();
1402 
1403 		/* Get the header and possibly the whole packet
1404 		 * If this is an skb from previous receive dma will be 0
1405 		 */
1406 		if (rx_bi->dma) {
1407 			u16 len;
1408 
1409 			if (rx_hbo)
1410 				len = I40E_RX_HDR_SIZE;
1411 			else if (rx_sph)
1412 				len = rx_header_len;
1413 			else if (rx_packet_len)
1414 				len = rx_packet_len;   /* 1buf/no split found */
1415 			else
1416 				len = rx_header_len;   /* split always mode */
1417 
1418 			skb_put(skb, len);
1419 			dma_unmap_single(rx_ring->dev,
1420 					 rx_bi->dma,
1421 					 rx_ring->rx_buf_len,
1422 					 DMA_FROM_DEVICE);
1423 			rx_bi->dma = 0;
1424 		}
1425 
1426 		/* Get the rest of the data if this was a header split */
1427 		if (ring_is_ps_enabled(rx_ring) && rx_packet_len) {
1428 
1429 			skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
1430 					   rx_bi->page,
1431 					   rx_bi->page_offset,
1432 					   rx_packet_len);
1433 
1434 			skb->len += rx_packet_len;
1435 			skb->data_len += rx_packet_len;
1436 			skb->truesize += rx_packet_len;
1437 
1438 			if ((page_count(rx_bi->page) == 1) &&
1439 			    (page_to_nid(rx_bi->page) == current_node))
1440 				get_page(rx_bi->page);
1441 			else
1442 				rx_bi->page = NULL;
1443 
1444 			dma_unmap_page(rx_ring->dev,
1445 				       rx_bi->page_dma,
1446 				       PAGE_SIZE / 2,
1447 				       DMA_FROM_DEVICE);
1448 			rx_bi->page_dma = 0;
1449 		}
1450 		I40E_RX_NEXT_DESC_PREFETCH(rx_ring, i, next_rxd);
1451 
1452 		if (unlikely(
1453 		    !(rx_status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT)))) {
1454 			struct i40e_rx_buffer *next_buffer;
1455 
1456 			next_buffer = &rx_ring->rx_bi[i];
1457 
1458 			if (ring_is_ps_enabled(rx_ring)) {
1459 				rx_bi->skb = next_buffer->skb;
1460 				rx_bi->dma = next_buffer->dma;
1461 				next_buffer->skb = skb;
1462 				next_buffer->dma = 0;
1463 			}
1464 			rx_ring->rx_stats.non_eop_descs++;
1465 			goto next_desc;
1466 		}
1467 
1468 		/* ERR_MASK will only have valid bits if EOP set */
1469 		if (unlikely(rx_error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) {
1470 			dev_kfree_skb_any(skb);
1471 			/* TODO: shouldn't we increment a counter indicating the
1472 			 * drop?
1473 			 */
1474 			goto next_desc;
1475 		}
1476 
1477 		skb_set_hash(skb, i40e_rx_hash(rx_ring, rx_desc),
1478 			     i40e_ptype_to_hash(rx_ptype));
1479 		if (unlikely(rx_status & I40E_RXD_QW1_STATUS_TSYNVALID_MASK)) {
1480 			i40e_ptp_rx_hwtstamp(vsi->back, skb, (rx_status &
1481 					   I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >>
1482 					   I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT);
1483 			rx_ring->last_rx_timestamp = jiffies;
1484 		}
1485 
1486 		/* probably a little skewed due to removing CRC */
1487 		total_rx_bytes += skb->len;
1488 		total_rx_packets++;
1489 
1490 		skb->protocol = eth_type_trans(skb, rx_ring->netdev);
1491 
1492 		i40e_rx_checksum(vsi, skb, rx_status, rx_error, rx_ptype);
1493 
1494 		vlan_tag = rx_status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)
1495 			 ? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1)
1496 			 : 0;
1497 #ifdef I40E_FCOE
1498 		if (!i40e_fcoe_handle_offload(rx_ring, rx_desc, skb)) {
1499 			dev_kfree_skb_any(skb);
1500 			goto next_desc;
1501 		}
1502 #endif
1503 		i40e_receive_skb(rx_ring, skb, vlan_tag);
1504 
1505 		rx_ring->netdev->last_rx = jiffies;
1506 		budget--;
1507 next_desc:
1508 		rx_desc->wb.qword1.status_error_len = 0;
1509 		if (!budget)
1510 			break;
1511 
1512 		cleaned_count++;
1513 		/* return some buffers to hardware, one at a time is too slow */
1514 		if (cleaned_count >= I40E_RX_BUFFER_WRITE) {
1515 			i40e_alloc_rx_buffers(rx_ring, cleaned_count);
1516 			cleaned_count = 0;
1517 		}
1518 
1519 		/* use prefetched values */
1520 		rx_desc = next_rxd;
1521 		qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
1522 		rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
1523 			    I40E_RXD_QW1_STATUS_SHIFT;
1524 	}
1525 
1526 	rx_ring->next_to_clean = i;
1527 	u64_stats_update_begin(&rx_ring->syncp);
1528 	rx_ring->stats.packets += total_rx_packets;
1529 	rx_ring->stats.bytes += total_rx_bytes;
1530 	u64_stats_update_end(&rx_ring->syncp);
1531 	rx_ring->q_vector->rx.total_packets += total_rx_packets;
1532 	rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
1533 
1534 	if (cleaned_count)
1535 		i40e_alloc_rx_buffers(rx_ring, cleaned_count);
1536 
1537 	return budget > 0;
1538 }
1539 
1540 /**
1541  * i40e_napi_poll - NAPI polling Rx/Tx cleanup routine
1542  * @napi: napi struct with our devices info in it
1543  * @budget: amount of work driver is allowed to do this pass, in packets
1544  *
1545  * This function will clean all queues associated with a q_vector.
1546  *
1547  * Returns the amount of work done
1548  **/
1549 int i40e_napi_poll(struct napi_struct *napi, int budget)
1550 {
1551 	struct i40e_q_vector *q_vector =
1552 			       container_of(napi, struct i40e_q_vector, napi);
1553 	struct i40e_vsi *vsi = q_vector->vsi;
1554 	struct i40e_ring *ring;
1555 	bool clean_complete = true;
1556 	int budget_per_ring;
1557 
1558 	if (test_bit(__I40E_DOWN, &vsi->state)) {
1559 		napi_complete(napi);
1560 		return 0;
1561 	}
1562 
1563 	/* Since the actual Tx work is minimal, we can give the Tx a larger
1564 	 * budget and be more aggressive about cleaning up the Tx descriptors.
1565 	 */
1566 	i40e_for_each_ring(ring, q_vector->tx)
1567 		clean_complete &= i40e_clean_tx_irq(ring, vsi->work_limit);
1568 
1569 	/* We attempt to distribute budget to each Rx queue fairly, but don't
1570 	 * allow the budget to go below 1 because that would exit polling early.
1571 	 */
1572 	budget_per_ring = max(budget/q_vector->num_ringpairs, 1);
1573 
1574 	i40e_for_each_ring(ring, q_vector->rx)
1575 		clean_complete &= i40e_clean_rx_irq(ring, budget_per_ring);
1576 
1577 	/* If work not completed, return budget and polling will return */
1578 	if (!clean_complete)
1579 		return budget;
1580 
1581 	/* Work is done so exit the polling mode and re-enable the interrupt */
1582 	napi_complete(napi);
1583 	if (ITR_IS_DYNAMIC(vsi->rx_itr_setting) ||
1584 	    ITR_IS_DYNAMIC(vsi->tx_itr_setting))
1585 		i40e_update_dynamic_itr(q_vector);
1586 
1587 	if (!test_bit(__I40E_DOWN, &vsi->state)) {
1588 		if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
1589 			i40e_irq_dynamic_enable(vsi,
1590 					q_vector->v_idx + vsi->base_vector);
1591 		} else {
1592 			struct i40e_hw *hw = &vsi->back->hw;
1593 			/* We re-enable the queue 0 cause, but
1594 			 * don't worry about dynamic_enable
1595 			 * because we left it on for the other
1596 			 * possible interrupts during napi
1597 			 */
1598 			u32 qval = rd32(hw, I40E_QINT_RQCTL(0));
1599 			qval |= I40E_QINT_RQCTL_CAUSE_ENA_MASK;
1600 			wr32(hw, I40E_QINT_RQCTL(0), qval);
1601 
1602 			qval = rd32(hw, I40E_QINT_TQCTL(0));
1603 			qval |= I40E_QINT_TQCTL_CAUSE_ENA_MASK;
1604 			wr32(hw, I40E_QINT_TQCTL(0), qval);
1605 
1606 			i40e_irq_dynamic_enable_icr0(vsi->back);
1607 		}
1608 	}
1609 
1610 	return 0;
1611 }
1612 
1613 /**
1614  * i40e_atr - Add a Flow Director ATR filter
1615  * @tx_ring:  ring to add programming descriptor to
1616  * @skb:      send buffer
1617  * @flags:    send flags
1618  * @protocol: wire protocol
1619  **/
1620 static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
1621 		     u32 flags, __be16 protocol)
1622 {
1623 	struct i40e_filter_program_desc *fdir_desc;
1624 	struct i40e_pf *pf = tx_ring->vsi->back;
1625 	union {
1626 		unsigned char *network;
1627 		struct iphdr *ipv4;
1628 		struct ipv6hdr *ipv6;
1629 	} hdr;
1630 	struct tcphdr *th;
1631 	unsigned int hlen;
1632 	u32 flex_ptype, dtype_cmd;
1633 	u16 i;
1634 
1635 	/* make sure ATR is enabled */
1636 	if (!(pf->flags & I40E_FLAG_FD_ATR_ENABLED))
1637 		return;
1638 
1639 	/* if sampling is disabled do nothing */
1640 	if (!tx_ring->atr_sample_rate)
1641 		return;
1642 
1643 	/* snag network header to get L4 type and address */
1644 	hdr.network = skb_network_header(skb);
1645 
1646 	/* Currently only IPv4/IPv6 with TCP is supported */
1647 	if (protocol == htons(ETH_P_IP)) {
1648 		if (hdr.ipv4->protocol != IPPROTO_TCP)
1649 			return;
1650 
1651 		/* access ihl as a u8 to avoid unaligned access on ia64 */
1652 		hlen = (hdr.network[0] & 0x0F) << 2;
1653 	} else if (protocol == htons(ETH_P_IPV6)) {
1654 		if (hdr.ipv6->nexthdr != IPPROTO_TCP)
1655 			return;
1656 
1657 		hlen = sizeof(struct ipv6hdr);
1658 	} else {
1659 		return;
1660 	}
1661 
1662 	th = (struct tcphdr *)(hdr.network + hlen);
1663 
1664 	/* Due to lack of space, no more new filters can be programmed */
1665 	if (th->syn && (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED))
1666 		return;
1667 
1668 	tx_ring->atr_count++;
1669 
1670 	/* sample on all syn/fin/rst packets or once every atr sample rate */
1671 	if (!th->fin &&
1672 	    !th->syn &&
1673 	    !th->rst &&
1674 	    (tx_ring->atr_count < tx_ring->atr_sample_rate))
1675 		return;
1676 
1677 	tx_ring->atr_count = 0;
1678 
1679 	/* grab the next descriptor */
1680 	i = tx_ring->next_to_use;
1681 	fdir_desc = I40E_TX_FDIRDESC(tx_ring, i);
1682 
1683 	i++;
1684 	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
1685 
1686 	flex_ptype = (tx_ring->queue_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) &
1687 		      I40E_TXD_FLTR_QW0_QINDEX_MASK;
1688 	flex_ptype |= (protocol == htons(ETH_P_IP)) ?
1689 		      (I40E_FILTER_PCTYPE_NONF_IPV4_TCP <<
1690 		       I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) :
1691 		      (I40E_FILTER_PCTYPE_NONF_IPV6_TCP <<
1692 		       I40E_TXD_FLTR_QW0_PCTYPE_SHIFT);
1693 
1694 	flex_ptype |= tx_ring->vsi->id << I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT;
1695 
1696 	dtype_cmd = I40E_TX_DESC_DTYPE_FILTER_PROG;
1697 
1698 	dtype_cmd |= (th->fin || th->rst) ?
1699 		     (I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE <<
1700 		      I40E_TXD_FLTR_QW1_PCMD_SHIFT) :
1701 		     (I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE <<
1702 		      I40E_TXD_FLTR_QW1_PCMD_SHIFT);
1703 
1704 	dtype_cmd |= I40E_FILTER_PROGRAM_DESC_DEST_DIRECT_PACKET_QINDEX <<
1705 		     I40E_TXD_FLTR_QW1_DEST_SHIFT;
1706 
1707 	dtype_cmd |= I40E_FILTER_PROGRAM_DESC_FD_STATUS_FD_ID <<
1708 		     I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT;
1709 
1710 	dtype_cmd |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK;
1711 	dtype_cmd |=
1712 		((u32)pf->fd_atr_cnt_idx << I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
1713 		I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
1714 
1715 	fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype);
1716 	fdir_desc->rsvd = cpu_to_le32(0);
1717 	fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dtype_cmd);
1718 	fdir_desc->fd_id = cpu_to_le32(0);
1719 }
1720 
1721 /**
1722  * i40e_tx_prepare_vlan_flags - prepare generic TX VLAN tagging flags for HW
1723  * @skb:     send buffer
1724  * @tx_ring: ring to send buffer on
1725  * @flags:   the tx flags to be set
1726  *
1727  * Checks the skb and set up correspondingly several generic transmit flags
1728  * related to VLAN tagging for the HW, such as VLAN, DCB, etc.
1729  *
1730  * Returns error code indicate the frame should be dropped upon error and the
1731  * otherwise  returns 0 to indicate the flags has been set properly.
1732  **/
1733 #ifdef I40E_FCOE
1734 int i40e_tx_prepare_vlan_flags(struct sk_buff *skb,
1735 			       struct i40e_ring *tx_ring,
1736 			       u32 *flags)
1737 #else
1738 static int i40e_tx_prepare_vlan_flags(struct sk_buff *skb,
1739 				      struct i40e_ring *tx_ring,
1740 				      u32 *flags)
1741 #endif
1742 {
1743 	__be16 protocol = skb->protocol;
1744 	u32  tx_flags = 0;
1745 
1746 	/* if we have a HW VLAN tag being added, default to the HW one */
1747 	if (vlan_tx_tag_present(skb)) {
1748 		tx_flags |= vlan_tx_tag_get(skb) << I40E_TX_FLAGS_VLAN_SHIFT;
1749 		tx_flags |= I40E_TX_FLAGS_HW_VLAN;
1750 	/* else if it is a SW VLAN, check the next protocol and store the tag */
1751 	} else if (protocol == htons(ETH_P_8021Q)) {
1752 		struct vlan_hdr *vhdr, _vhdr;
1753 		vhdr = skb_header_pointer(skb, ETH_HLEN, sizeof(_vhdr), &_vhdr);
1754 		if (!vhdr)
1755 			return -EINVAL;
1756 
1757 		protocol = vhdr->h_vlan_encapsulated_proto;
1758 		tx_flags |= ntohs(vhdr->h_vlan_TCI) << I40E_TX_FLAGS_VLAN_SHIFT;
1759 		tx_flags |= I40E_TX_FLAGS_SW_VLAN;
1760 	}
1761 
1762 	/* Insert 802.1p priority into VLAN header */
1763 	if ((tx_flags & (I40E_TX_FLAGS_HW_VLAN | I40E_TX_FLAGS_SW_VLAN)) ||
1764 	    (skb->priority != TC_PRIO_CONTROL)) {
1765 		tx_flags &= ~I40E_TX_FLAGS_VLAN_PRIO_MASK;
1766 		tx_flags |= (skb->priority & 0x7) <<
1767 				I40E_TX_FLAGS_VLAN_PRIO_SHIFT;
1768 		if (tx_flags & I40E_TX_FLAGS_SW_VLAN) {
1769 			struct vlan_ethhdr *vhdr;
1770 			int rc;
1771 
1772 			rc = skb_cow_head(skb, 0);
1773 			if (rc < 0)
1774 				return rc;
1775 			vhdr = (struct vlan_ethhdr *)skb->data;
1776 			vhdr->h_vlan_TCI = htons(tx_flags >>
1777 						 I40E_TX_FLAGS_VLAN_SHIFT);
1778 		} else {
1779 			tx_flags |= I40E_TX_FLAGS_HW_VLAN;
1780 		}
1781 	}
1782 	*flags = tx_flags;
1783 	return 0;
1784 }
1785 
1786 /**
1787  * i40e_tso - set up the tso context descriptor
1788  * @tx_ring:  ptr to the ring to send
1789  * @skb:      ptr to the skb we're sending
1790  * @tx_flags: the collected send information
1791  * @protocol: the send protocol
1792  * @hdr_len:  ptr to the size of the packet header
1793  * @cd_tunneling: ptr to context descriptor bits
1794  *
1795  * Returns 0 if no TSO can happen, 1 if tso is going, or error
1796  **/
1797 static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb,
1798 		    u32 tx_flags, __be16 protocol, u8 *hdr_len,
1799 		    u64 *cd_type_cmd_tso_mss, u32 *cd_tunneling)
1800 {
1801 	u32 cd_cmd, cd_tso_len, cd_mss;
1802 	struct ipv6hdr *ipv6h;
1803 	struct tcphdr *tcph;
1804 	struct iphdr *iph;
1805 	u32 l4len;
1806 	int err;
1807 
1808 	if (!skb_is_gso(skb))
1809 		return 0;
1810 
1811 	err = skb_cow_head(skb, 0);
1812 	if (err < 0)
1813 		return err;
1814 
1815 	if (protocol == htons(ETH_P_IP)) {
1816 		iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb);
1817 		tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb);
1818 		iph->tot_len = 0;
1819 		iph->check = 0;
1820 		tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
1821 						 0, IPPROTO_TCP, 0);
1822 	} else if (skb_is_gso_v6(skb)) {
1823 
1824 		ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb)
1825 					   : ipv6_hdr(skb);
1826 		tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb);
1827 		ipv6h->payload_len = 0;
1828 		tcph->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
1829 					       0, IPPROTO_TCP, 0);
1830 	}
1831 
1832 	l4len = skb->encapsulation ? inner_tcp_hdrlen(skb) : tcp_hdrlen(skb);
1833 	*hdr_len = (skb->encapsulation
1834 		    ? (skb_inner_transport_header(skb) - skb->data)
1835 		    : skb_transport_offset(skb)) + l4len;
1836 
1837 	/* find the field values */
1838 	cd_cmd = I40E_TX_CTX_DESC_TSO;
1839 	cd_tso_len = skb->len - *hdr_len;
1840 	cd_mss = skb_shinfo(skb)->gso_size;
1841 	*cd_type_cmd_tso_mss |= ((u64)cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
1842 				((u64)cd_tso_len <<
1843 				 I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
1844 				((u64)cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
1845 	return 1;
1846 }
1847 
1848 /**
1849  * i40e_tsyn - set up the tsyn context descriptor
1850  * @tx_ring:  ptr to the ring to send
1851  * @skb:      ptr to the skb we're sending
1852  * @tx_flags: the collected send information
1853  *
1854  * Returns 0 if no Tx timestamp can happen and 1 if the timestamp will happen
1855  **/
1856 static int i40e_tsyn(struct i40e_ring *tx_ring, struct sk_buff *skb,
1857 		     u32 tx_flags, u64 *cd_type_cmd_tso_mss)
1858 {
1859 	struct i40e_pf *pf;
1860 
1861 	if (likely(!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)))
1862 		return 0;
1863 
1864 	/* Tx timestamps cannot be sampled when doing TSO */
1865 	if (tx_flags & I40E_TX_FLAGS_TSO)
1866 		return 0;
1867 
1868 	/* only timestamp the outbound packet if the user has requested it and
1869 	 * we are not already transmitting a packet to be timestamped
1870 	 */
1871 	pf = i40e_netdev_to_pf(tx_ring->netdev);
1872 	if (pf->ptp_tx &&
1873 	    !test_and_set_bit_lock(__I40E_PTP_TX_IN_PROGRESS, &pf->state)) {
1874 		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
1875 		pf->ptp_tx_skb = skb_get(skb);
1876 	} else {
1877 		return 0;
1878 	}
1879 
1880 	*cd_type_cmd_tso_mss |= (u64)I40E_TX_CTX_DESC_TSYN <<
1881 				I40E_TXD_CTX_QW1_CMD_SHIFT;
1882 
1883 	return 1;
1884 }
1885 
1886 /**
1887  * i40e_tx_enable_csum - Enable Tx checksum offloads
1888  * @skb: send buffer
1889  * @tx_flags: Tx flags currently set
1890  * @td_cmd: Tx descriptor command bits to set
1891  * @td_offset: Tx descriptor header offsets to set
1892  * @cd_tunneling: ptr to context desc bits
1893  **/
1894 static void i40e_tx_enable_csum(struct sk_buff *skb, u32 tx_flags,
1895 				u32 *td_cmd, u32 *td_offset,
1896 				struct i40e_ring *tx_ring,
1897 				u32 *cd_tunneling)
1898 {
1899 	struct ipv6hdr *this_ipv6_hdr;
1900 	unsigned int this_tcp_hdrlen;
1901 	struct iphdr *this_ip_hdr;
1902 	u32 network_hdr_len;
1903 	u8 l4_hdr = 0;
1904 
1905 	if (skb->encapsulation) {
1906 		network_hdr_len = skb_inner_network_header_len(skb);
1907 		this_ip_hdr = inner_ip_hdr(skb);
1908 		this_ipv6_hdr = inner_ipv6_hdr(skb);
1909 		this_tcp_hdrlen = inner_tcp_hdrlen(skb);
1910 
1911 		if (tx_flags & I40E_TX_FLAGS_IPV4) {
1912 
1913 			if (tx_flags & I40E_TX_FLAGS_TSO) {
1914 				*cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV4;
1915 				ip_hdr(skb)->check = 0;
1916 			} else {
1917 				*cd_tunneling |=
1918 					 I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM;
1919 			}
1920 		} else if (tx_flags & I40E_TX_FLAGS_IPV6) {
1921 			if (tx_flags & I40E_TX_FLAGS_TSO) {
1922 				*cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6;
1923 				ip_hdr(skb)->check = 0;
1924 			} else {
1925 				*cd_tunneling |=
1926 					 I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM;
1927 			}
1928 		}
1929 
1930 		/* Now set the ctx descriptor fields */
1931 		*cd_tunneling |= (skb_network_header_len(skb) >> 2) <<
1932 					I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT |
1933 				   I40E_TXD_CTX_UDP_TUNNELING            |
1934 				   ((skb_inner_network_offset(skb) -
1935 					skb_transport_offset(skb)) >> 1) <<
1936 				   I40E_TXD_CTX_QW0_NATLEN_SHIFT;
1937 
1938 	} else {
1939 		network_hdr_len = skb_network_header_len(skb);
1940 		this_ip_hdr = ip_hdr(skb);
1941 		this_ipv6_hdr = ipv6_hdr(skb);
1942 		this_tcp_hdrlen = tcp_hdrlen(skb);
1943 	}
1944 
1945 	/* Enable IP checksum offloads */
1946 	if (tx_flags & I40E_TX_FLAGS_IPV4) {
1947 		l4_hdr = this_ip_hdr->protocol;
1948 		/* the stack computes the IP header already, the only time we
1949 		 * need the hardware to recompute it is in the case of TSO.
1950 		 */
1951 		if (tx_flags & I40E_TX_FLAGS_TSO) {
1952 			*td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
1953 			this_ip_hdr->check = 0;
1954 		} else {
1955 			*td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4;
1956 		}
1957 		/* Now set the td_offset for IP header length */
1958 		*td_offset = (network_hdr_len >> 2) <<
1959 			      I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
1960 	} else if (tx_flags & I40E_TX_FLAGS_IPV6) {
1961 		l4_hdr = this_ipv6_hdr->nexthdr;
1962 		*td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
1963 		/* Now set the td_offset for IP header length */
1964 		*td_offset = (network_hdr_len >> 2) <<
1965 			      I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
1966 	}
1967 	/* words in MACLEN + dwords in IPLEN + dwords in L4Len */
1968 	*td_offset |= (skb_network_offset(skb) >> 1) <<
1969 		       I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
1970 
1971 	/* Enable L4 checksum offloads */
1972 	switch (l4_hdr) {
1973 	case IPPROTO_TCP:
1974 		/* enable checksum offloads */
1975 		*td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
1976 		*td_offset |= (this_tcp_hdrlen >> 2) <<
1977 			       I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
1978 		break;
1979 	case IPPROTO_SCTP:
1980 		/* enable SCTP checksum offload */
1981 		*td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
1982 		*td_offset |= (sizeof(struct sctphdr) >> 2) <<
1983 			       I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
1984 		break;
1985 	case IPPROTO_UDP:
1986 		/* enable UDP checksum offload */
1987 		*td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
1988 		*td_offset |= (sizeof(struct udphdr) >> 2) <<
1989 			       I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
1990 		break;
1991 	default:
1992 		break;
1993 	}
1994 }
1995 
1996 /**
1997  * i40e_create_tx_ctx Build the Tx context descriptor
1998  * @tx_ring:  ring to create the descriptor on
1999  * @cd_type_cmd_tso_mss: Quad Word 1
2000  * @cd_tunneling: Quad Word 0 - bits 0-31
2001  * @cd_l2tag2: Quad Word 0 - bits 32-63
2002  **/
2003 static void i40e_create_tx_ctx(struct i40e_ring *tx_ring,
2004 			       const u64 cd_type_cmd_tso_mss,
2005 			       const u32 cd_tunneling, const u32 cd_l2tag2)
2006 {
2007 	struct i40e_tx_context_desc *context_desc;
2008 	int i = tx_ring->next_to_use;
2009 
2010 	if ((cd_type_cmd_tso_mss == I40E_TX_DESC_DTYPE_CONTEXT) &&
2011 	    !cd_tunneling && !cd_l2tag2)
2012 		return;
2013 
2014 	/* grab the next descriptor */
2015 	context_desc = I40E_TX_CTXTDESC(tx_ring, i);
2016 
2017 	i++;
2018 	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
2019 
2020 	/* cpu_to_le32 and assign to struct fields */
2021 	context_desc->tunneling_params = cpu_to_le32(cd_tunneling);
2022 	context_desc->l2tag2 = cpu_to_le16(cd_l2tag2);
2023 	context_desc->rsvd = cpu_to_le16(0);
2024 	context_desc->type_cmd_tso_mss = cpu_to_le64(cd_type_cmd_tso_mss);
2025 }
2026 
2027 /**
2028  * i40e_tx_map - Build the Tx descriptor
2029  * @tx_ring:  ring to send buffer on
2030  * @skb:      send buffer
2031  * @first:    first buffer info buffer to use
2032  * @tx_flags: collected send information
2033  * @hdr_len:  size of the packet header
2034  * @td_cmd:   the command field in the descriptor
2035  * @td_offset: offset for checksum or crc
2036  **/
2037 #ifdef I40E_FCOE
2038 void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
2039 		 struct i40e_tx_buffer *first, u32 tx_flags,
2040 		 const u8 hdr_len, u32 td_cmd, u32 td_offset)
2041 #else
2042 static void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
2043 			struct i40e_tx_buffer *first, u32 tx_flags,
2044 			const u8 hdr_len, u32 td_cmd, u32 td_offset)
2045 #endif
2046 {
2047 	unsigned int data_len = skb->data_len;
2048 	unsigned int size = skb_headlen(skb);
2049 	struct skb_frag_struct *frag;
2050 	struct i40e_tx_buffer *tx_bi;
2051 	struct i40e_tx_desc *tx_desc;
2052 	u16 i = tx_ring->next_to_use;
2053 	u32 td_tag = 0;
2054 	dma_addr_t dma;
2055 	u16 gso_segs;
2056 
2057 	if (tx_flags & I40E_TX_FLAGS_HW_VLAN) {
2058 		td_cmd |= I40E_TX_DESC_CMD_IL2TAG1;
2059 		td_tag = (tx_flags & I40E_TX_FLAGS_VLAN_MASK) >>
2060 			 I40E_TX_FLAGS_VLAN_SHIFT;
2061 	}
2062 
2063 	if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO))
2064 		gso_segs = skb_shinfo(skb)->gso_segs;
2065 	else
2066 		gso_segs = 1;
2067 
2068 	/* multiply data chunks by size of headers */
2069 	first->bytecount = skb->len - hdr_len + (gso_segs * hdr_len);
2070 	first->gso_segs = gso_segs;
2071 	first->skb = skb;
2072 	first->tx_flags = tx_flags;
2073 
2074 	dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
2075 
2076 	tx_desc = I40E_TX_DESC(tx_ring, i);
2077 	tx_bi = first;
2078 
2079 	for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
2080 		if (dma_mapping_error(tx_ring->dev, dma))
2081 			goto dma_error;
2082 
2083 		/* record length, and DMA address */
2084 		dma_unmap_len_set(tx_bi, len, size);
2085 		dma_unmap_addr_set(tx_bi, dma, dma);
2086 
2087 		tx_desc->buffer_addr = cpu_to_le64(dma);
2088 
2089 		while (unlikely(size > I40E_MAX_DATA_PER_TXD)) {
2090 			tx_desc->cmd_type_offset_bsz =
2091 				build_ctob(td_cmd, td_offset,
2092 					   I40E_MAX_DATA_PER_TXD, td_tag);
2093 
2094 			tx_desc++;
2095 			i++;
2096 			if (i == tx_ring->count) {
2097 				tx_desc = I40E_TX_DESC(tx_ring, 0);
2098 				i = 0;
2099 			}
2100 
2101 			dma += I40E_MAX_DATA_PER_TXD;
2102 			size -= I40E_MAX_DATA_PER_TXD;
2103 
2104 			tx_desc->buffer_addr = cpu_to_le64(dma);
2105 		}
2106 
2107 		if (likely(!data_len))
2108 			break;
2109 
2110 		tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset,
2111 							  size, td_tag);
2112 
2113 		tx_desc++;
2114 		i++;
2115 		if (i == tx_ring->count) {
2116 			tx_desc = I40E_TX_DESC(tx_ring, 0);
2117 			i = 0;
2118 		}
2119 
2120 		size = skb_frag_size(frag);
2121 		data_len -= size;
2122 
2123 		dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size,
2124 				       DMA_TO_DEVICE);
2125 
2126 		tx_bi = &tx_ring->tx_bi[i];
2127 	}
2128 
2129 	/* Place RS bit on last descriptor of any packet that spans across the
2130 	 * 4th descriptor (WB_STRIDE aka 0x3) in a 64B cacheline.
2131 	 */
2132 #define WB_STRIDE 0x3
2133 	if (((i & WB_STRIDE) != WB_STRIDE) &&
2134 	    (first <= &tx_ring->tx_bi[i]) &&
2135 	    (first >= &tx_ring->tx_bi[i & ~WB_STRIDE])) {
2136 		tx_desc->cmd_type_offset_bsz =
2137 			build_ctob(td_cmd, td_offset, size, td_tag) |
2138 			cpu_to_le64((u64)I40E_TX_DESC_CMD_EOP <<
2139 					 I40E_TXD_QW1_CMD_SHIFT);
2140 	} else {
2141 		tx_desc->cmd_type_offset_bsz =
2142 			build_ctob(td_cmd, td_offset, size, td_tag) |
2143 			cpu_to_le64((u64)I40E_TXD_CMD <<
2144 					 I40E_TXD_QW1_CMD_SHIFT);
2145 	}
2146 
2147 	netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev,
2148 						 tx_ring->queue_index),
2149 			     first->bytecount);
2150 
2151 	/* set the timestamp */
2152 	first->time_stamp = jiffies;
2153 
2154 	/* Force memory writes to complete before letting h/w
2155 	 * know there are new descriptors to fetch.  (Only
2156 	 * applicable for weak-ordered memory model archs,
2157 	 * such as IA-64).
2158 	 */
2159 	wmb();
2160 
2161 	/* set next_to_watch value indicating a packet is present */
2162 	first->next_to_watch = tx_desc;
2163 
2164 	i++;
2165 	if (i == tx_ring->count)
2166 		i = 0;
2167 
2168 	tx_ring->next_to_use = i;
2169 
2170 	/* notify HW of packet */
2171 	writel(i, tx_ring->tail);
2172 
2173 	return;
2174 
2175 dma_error:
2176 	dev_info(tx_ring->dev, "TX DMA map failed\n");
2177 
2178 	/* clear dma mappings for failed tx_bi map */
2179 	for (;;) {
2180 		tx_bi = &tx_ring->tx_bi[i];
2181 		i40e_unmap_and_free_tx_resource(tx_ring, tx_bi);
2182 		if (tx_bi == first)
2183 			break;
2184 		if (i == 0)
2185 			i = tx_ring->count;
2186 		i--;
2187 	}
2188 
2189 	tx_ring->next_to_use = i;
2190 }
2191 
2192 /**
2193  * __i40e_maybe_stop_tx - 2nd level check for tx stop conditions
2194  * @tx_ring: the ring to be checked
2195  * @size:    the size buffer we want to assure is available
2196  *
2197  * Returns -EBUSY if a stop is needed, else 0
2198  **/
2199 static inline int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
2200 {
2201 	netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
2202 	/* Memory barrier before checking head and tail */
2203 	smp_mb();
2204 
2205 	/* Check again in a case another CPU has just made room available. */
2206 	if (likely(I40E_DESC_UNUSED(tx_ring) < size))
2207 		return -EBUSY;
2208 
2209 	/* A reprieve! - use start_queue because it doesn't call schedule */
2210 	netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index);
2211 	++tx_ring->tx_stats.restart_queue;
2212 	return 0;
2213 }
2214 
2215 /**
2216  * i40e_maybe_stop_tx - 1st level check for tx stop conditions
2217  * @tx_ring: the ring to be checked
2218  * @size:    the size buffer we want to assure is available
2219  *
2220  * Returns 0 if stop is not needed
2221  **/
2222 #ifdef I40E_FCOE
2223 int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
2224 #else
2225 static int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
2226 #endif
2227 {
2228 	if (likely(I40E_DESC_UNUSED(tx_ring) >= size))
2229 		return 0;
2230 	return __i40e_maybe_stop_tx(tx_ring, size);
2231 }
2232 
2233 /**
2234  * i40e_xmit_descriptor_count - calculate number of tx descriptors needed
2235  * @skb:     send buffer
2236  * @tx_ring: ring to send buffer on
2237  *
2238  * Returns number of data descriptors needed for this skb. Returns 0 to indicate
2239  * there is not enough descriptors available in this ring since we need at least
2240  * one descriptor.
2241  **/
2242 #ifdef I40E_FCOE
2243 int i40e_xmit_descriptor_count(struct sk_buff *skb,
2244 			       struct i40e_ring *tx_ring)
2245 #else
2246 static int i40e_xmit_descriptor_count(struct sk_buff *skb,
2247 				      struct i40e_ring *tx_ring)
2248 #endif
2249 {
2250 	unsigned int f;
2251 	int count = 0;
2252 
2253 	/* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD,
2254 	 *       + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD,
2255 	 *       + 4 desc gap to avoid the cache line where head is,
2256 	 *       + 1 desc for context descriptor,
2257 	 * otherwise try next time
2258 	 */
2259 	for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
2260 		count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size);
2261 
2262 	count += TXD_USE_COUNT(skb_headlen(skb));
2263 	if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) {
2264 		tx_ring->tx_stats.tx_busy++;
2265 		return 0;
2266 	}
2267 	return count;
2268 }
2269 
2270 /**
2271  * i40e_xmit_frame_ring - Sends buffer on Tx ring
2272  * @skb:     send buffer
2273  * @tx_ring: ring to send buffer on
2274  *
2275  * Returns NETDEV_TX_OK if sent, else an error code
2276  **/
2277 static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
2278 					struct i40e_ring *tx_ring)
2279 {
2280 	u64 cd_type_cmd_tso_mss = I40E_TX_DESC_DTYPE_CONTEXT;
2281 	u32 cd_tunneling = 0, cd_l2tag2 = 0;
2282 	struct i40e_tx_buffer *first;
2283 	u32 td_offset = 0;
2284 	u32 tx_flags = 0;
2285 	__be16 protocol;
2286 	u32 td_cmd = 0;
2287 	u8 hdr_len = 0;
2288 	int tsyn;
2289 	int tso;
2290 	if (0 == i40e_xmit_descriptor_count(skb, tx_ring))
2291 		return NETDEV_TX_BUSY;
2292 
2293 	/* prepare the xmit flags */
2294 	if (i40e_tx_prepare_vlan_flags(skb, tx_ring, &tx_flags))
2295 		goto out_drop;
2296 
2297 	/* obtain protocol of skb */
2298 	protocol = skb->protocol;
2299 
2300 	/* record the location of the first descriptor for this packet */
2301 	first = &tx_ring->tx_bi[tx_ring->next_to_use];
2302 
2303 	/* setup IPv4/IPv6 offloads */
2304 	if (protocol == htons(ETH_P_IP))
2305 		tx_flags |= I40E_TX_FLAGS_IPV4;
2306 	else if (protocol == htons(ETH_P_IPV6))
2307 		tx_flags |= I40E_TX_FLAGS_IPV6;
2308 
2309 	tso = i40e_tso(tx_ring, skb, tx_flags, protocol, &hdr_len,
2310 		       &cd_type_cmd_tso_mss, &cd_tunneling);
2311 
2312 	if (tso < 0)
2313 		goto out_drop;
2314 	else if (tso)
2315 		tx_flags |= I40E_TX_FLAGS_TSO;
2316 
2317 	tsyn = i40e_tsyn(tx_ring, skb, tx_flags, &cd_type_cmd_tso_mss);
2318 
2319 	if (tsyn)
2320 		tx_flags |= I40E_TX_FLAGS_TSYN;
2321 
2322 	skb_tx_timestamp(skb);
2323 
2324 	/* always enable CRC insertion offload */
2325 	td_cmd |= I40E_TX_DESC_CMD_ICRC;
2326 
2327 	/* Always offload the checksum, since it's in the data descriptor */
2328 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
2329 		tx_flags |= I40E_TX_FLAGS_CSUM;
2330 
2331 		i40e_tx_enable_csum(skb, tx_flags, &td_cmd, &td_offset,
2332 				    tx_ring, &cd_tunneling);
2333 	}
2334 
2335 	i40e_create_tx_ctx(tx_ring, cd_type_cmd_tso_mss,
2336 			   cd_tunneling, cd_l2tag2);
2337 
2338 	/* Add Flow Director ATR if it's enabled.
2339 	 *
2340 	 * NOTE: this must always be directly before the data descriptor.
2341 	 */
2342 	i40e_atr(tx_ring, skb, tx_flags, protocol);
2343 
2344 	i40e_tx_map(tx_ring, skb, first, tx_flags, hdr_len,
2345 		    td_cmd, td_offset);
2346 
2347 	i40e_maybe_stop_tx(tx_ring, DESC_NEEDED);
2348 
2349 	return NETDEV_TX_OK;
2350 
2351 out_drop:
2352 	dev_kfree_skb_any(skb);
2353 	return NETDEV_TX_OK;
2354 }
2355 
2356 /**
2357  * i40e_lan_xmit_frame - Selects the correct VSI and Tx queue to send buffer
2358  * @skb:    send buffer
2359  * @netdev: network interface device structure
2360  *
2361  * Returns NETDEV_TX_OK if sent, else an error code
2362  **/
2363 netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
2364 {
2365 	struct i40e_netdev_priv *np = netdev_priv(netdev);
2366 	struct i40e_vsi *vsi = np->vsi;
2367 	struct i40e_ring *tx_ring = vsi->tx_rings[skb->queue_mapping];
2368 
2369 	/* hardware can't handle really short frames, hardware padding works
2370 	 * beyond this point
2371 	 */
2372 	if (unlikely(skb->len < I40E_MIN_TX_LEN)) {
2373 		if (skb_pad(skb, I40E_MIN_TX_LEN - skb->len))
2374 			return NETDEV_TX_OK;
2375 		skb->len = I40E_MIN_TX_LEN;
2376 		skb_set_tail_pointer(skb, I40E_MIN_TX_LEN);
2377 	}
2378 
2379 	return i40e_xmit_frame_ring(skb, tx_ring);
2380 }
2381