1 /*******************************************************************************
2  *
3  * Intel Ethernet Controller XL710 Family Linux Driver
4  * Copyright(c) 2013 - 2014 Intel Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License along
16  * with this program.  If not, see <http://www.gnu.org/licenses/>.
17  *
18  * The full GNU General Public License is included in this distribution in
19  * the file called "COPYING".
20  *
21  * Contact Information:
22  * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
23  * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
24  *
25  ******************************************************************************/
26 
27 #include <linux/prefetch.h>
28 #include "i40e.h"
29 #include "i40e_prototype.h"
30 
31 static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size,
32 				u32 td_tag)
33 {
34 	return cpu_to_le64(I40E_TX_DESC_DTYPE_DATA |
35 			   ((u64)td_cmd  << I40E_TXD_QW1_CMD_SHIFT) |
36 			   ((u64)td_offset << I40E_TXD_QW1_OFFSET_SHIFT) |
37 			   ((u64)size  << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) |
38 			   ((u64)td_tag  << I40E_TXD_QW1_L2TAG1_SHIFT));
39 }
40 
41 #define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
42 #define I40E_FD_CLEAN_DELAY 10
43 /**
44  * i40e_program_fdir_filter - Program a Flow Director filter
45  * @fdir_data: Packet data that will be filter parameters
46  * @raw_packet: the pre-allocated packet buffer for FDir
47  * @pf: The pf pointer
48  * @add: True for add/update, False for remove
49  **/
50 int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, u8 *raw_packet,
51 			     struct i40e_pf *pf, bool add)
52 {
53 	struct i40e_filter_program_desc *fdir_desc;
54 	struct i40e_tx_buffer *tx_buf, *first;
55 	struct i40e_tx_desc *tx_desc;
56 	struct i40e_ring *tx_ring;
57 	unsigned int fpt, dcc;
58 	struct i40e_vsi *vsi;
59 	struct device *dev;
60 	dma_addr_t dma;
61 	u32 td_cmd = 0;
62 	u16 delay = 0;
63 	u16 i;
64 
65 	/* find existing FDIR VSI */
66 	vsi = NULL;
67 	for (i = 0; i < pf->num_alloc_vsi; i++)
68 		if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR)
69 			vsi = pf->vsi[i];
70 	if (!vsi)
71 		return -ENOENT;
72 
73 	tx_ring = vsi->tx_rings[0];
74 	dev = tx_ring->dev;
75 
76 	/* we need two descriptors to add/del a filter and we can wait */
77 	do {
78 		if (I40E_DESC_UNUSED(tx_ring) > 1)
79 			break;
80 		msleep_interruptible(1);
81 		delay++;
82 	} while (delay < I40E_FD_CLEAN_DELAY);
83 
84 	if (!(I40E_DESC_UNUSED(tx_ring) > 1))
85 		return -EAGAIN;
86 
87 	dma = dma_map_single(dev, raw_packet,
88 			     I40E_FDIR_MAX_RAW_PACKET_SIZE, DMA_TO_DEVICE);
89 	if (dma_mapping_error(dev, dma))
90 		goto dma_fail;
91 
92 	/* grab the next descriptor */
93 	i = tx_ring->next_to_use;
94 	fdir_desc = I40E_TX_FDIRDESC(tx_ring, i);
95 	first = &tx_ring->tx_bi[i];
96 	memset(first, 0, sizeof(struct i40e_tx_buffer));
97 
98 	tx_ring->next_to_use = ((i + 1) < tx_ring->count) ? i + 1 : 0;
99 
100 	fpt = (fdir_data->q_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) &
101 	      I40E_TXD_FLTR_QW0_QINDEX_MASK;
102 
103 	fpt |= (fdir_data->flex_off << I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT) &
104 	       I40E_TXD_FLTR_QW0_FLEXOFF_MASK;
105 
106 	fpt |= (fdir_data->pctype << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) &
107 	       I40E_TXD_FLTR_QW0_PCTYPE_MASK;
108 
109 	/* Use LAN VSI Id if not programmed by user */
110 	if (fdir_data->dest_vsi == 0)
111 		fpt |= (pf->vsi[pf->lan_vsi]->id) <<
112 		       I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT;
113 	else
114 		fpt |= ((u32)fdir_data->dest_vsi <<
115 			I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT) &
116 		       I40E_TXD_FLTR_QW0_DEST_VSI_MASK;
117 
118 	dcc = I40E_TX_DESC_DTYPE_FILTER_PROG;
119 
120 	if (add)
121 		dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE <<
122 		       I40E_TXD_FLTR_QW1_PCMD_SHIFT;
123 	else
124 		dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE <<
125 		       I40E_TXD_FLTR_QW1_PCMD_SHIFT;
126 
127 	dcc |= (fdir_data->dest_ctl << I40E_TXD_FLTR_QW1_DEST_SHIFT) &
128 	       I40E_TXD_FLTR_QW1_DEST_MASK;
129 
130 	dcc |= (fdir_data->fd_status << I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT) &
131 	       I40E_TXD_FLTR_QW1_FD_STATUS_MASK;
132 
133 	if (fdir_data->cnt_index != 0) {
134 		dcc |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK;
135 		dcc |= ((u32)fdir_data->cnt_index <<
136 			I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
137 			I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
138 	}
139 
140 	fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(fpt);
141 	fdir_desc->rsvd = cpu_to_le32(0);
142 	fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dcc);
143 	fdir_desc->fd_id = cpu_to_le32(fdir_data->fd_id);
144 
145 	/* Now program a dummy descriptor */
146 	i = tx_ring->next_to_use;
147 	tx_desc = I40E_TX_DESC(tx_ring, i);
148 	tx_buf = &tx_ring->tx_bi[i];
149 
150 	tx_ring->next_to_use = ((i + 1) < tx_ring->count) ? i + 1 : 0;
151 
152 	memset(tx_buf, 0, sizeof(struct i40e_tx_buffer));
153 
154 	/* record length, and DMA address */
155 	dma_unmap_len_set(tx_buf, len, I40E_FDIR_MAX_RAW_PACKET_SIZE);
156 	dma_unmap_addr_set(tx_buf, dma, dma);
157 
158 	tx_desc->buffer_addr = cpu_to_le64(dma);
159 	td_cmd = I40E_TXD_CMD | I40E_TX_DESC_CMD_DUMMY;
160 
161 	tx_buf->tx_flags = I40E_TX_FLAGS_FD_SB;
162 	tx_buf->raw_buf = (void *)raw_packet;
163 
164 	tx_desc->cmd_type_offset_bsz =
165 		build_ctob(td_cmd, 0, I40E_FDIR_MAX_RAW_PACKET_SIZE, 0);
166 
167 	/* set the timestamp */
168 	tx_buf->time_stamp = jiffies;
169 
170 	/* Force memory writes to complete before letting h/w
171 	 * know there are new descriptors to fetch.
172 	 */
173 	wmb();
174 
175 	/* Mark the data descriptor to be watched */
176 	first->next_to_watch = tx_desc;
177 
178 	writel(tx_ring->next_to_use, tx_ring->tail);
179 	return 0;
180 
181 dma_fail:
182 	return -1;
183 }
184 
185 #define IP_HEADER_OFFSET 14
186 #define I40E_UDPIP_DUMMY_PACKET_LEN 42
187 /**
188  * i40e_add_del_fdir_udpv4 - Add/Remove UDPv4 filters
189  * @vsi: pointer to the targeted VSI
190  * @fd_data: the flow director data required for the FDir descriptor
191  * @add: true adds a filter, false removes it
192  *
193  * Returns 0 if the filters were successfully added or removed
194  **/
195 static int i40e_add_del_fdir_udpv4(struct i40e_vsi *vsi,
196 				   struct i40e_fdir_filter *fd_data,
197 				   bool add)
198 {
199 	struct i40e_pf *pf = vsi->back;
200 	struct udphdr *udp;
201 	struct iphdr *ip;
202 	bool err = false;
203 	u8 *raw_packet;
204 	int ret;
205 	static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
206 		0x45, 0, 0, 0x1c, 0, 0, 0x40, 0, 0x40, 0x11, 0, 0, 0, 0, 0, 0,
207 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
208 
209 	raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
210 	if (!raw_packet)
211 		return -ENOMEM;
212 	memcpy(raw_packet, packet, I40E_UDPIP_DUMMY_PACKET_LEN);
213 
214 	ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
215 	udp = (struct udphdr *)(raw_packet + IP_HEADER_OFFSET
216 	      + sizeof(struct iphdr));
217 
218 	ip->daddr = fd_data->dst_ip[0];
219 	udp->dest = fd_data->dst_port;
220 	ip->saddr = fd_data->src_ip[0];
221 	udp->source = fd_data->src_port;
222 
223 	fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_UDP;
224 	ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
225 	if (ret) {
226 		dev_info(&pf->pdev->dev,
227 			 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
228 			 fd_data->pctype, fd_data->fd_id, ret);
229 		err = true;
230 	} else {
231 		if (add)
232 			dev_info(&pf->pdev->dev,
233 				 "Filter OK for PCTYPE %d loc = %d\n",
234 				 fd_data->pctype, fd_data->fd_id);
235 		else
236 			dev_info(&pf->pdev->dev,
237 				 "Filter deleted for PCTYPE %d loc = %d\n",
238 				 fd_data->pctype, fd_data->fd_id);
239 	}
240 	return err ? -EOPNOTSUPP : 0;
241 }
242 
243 #define I40E_TCPIP_DUMMY_PACKET_LEN 54
244 /**
245  * i40e_add_del_fdir_tcpv4 - Add/Remove TCPv4 filters
246  * @vsi: pointer to the targeted VSI
247  * @fd_data: the flow director data required for the FDir descriptor
248  * @add: true adds a filter, false removes it
249  *
250  * Returns 0 if the filters were successfully added or removed
251  **/
252 static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi,
253 				   struct i40e_fdir_filter *fd_data,
254 				   bool add)
255 {
256 	struct i40e_pf *pf = vsi->back;
257 	struct tcphdr *tcp;
258 	struct iphdr *ip;
259 	bool err = false;
260 	u8 *raw_packet;
261 	int ret;
262 	/* Dummy packet */
263 	static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
264 		0x45, 0, 0, 0x28, 0, 0, 0x40, 0, 0x40, 0x6, 0, 0, 0, 0, 0, 0,
265 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x80, 0x11,
266 		0x0, 0x72, 0, 0, 0, 0};
267 
268 	raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
269 	if (!raw_packet)
270 		return -ENOMEM;
271 	memcpy(raw_packet, packet, I40E_TCPIP_DUMMY_PACKET_LEN);
272 
273 	ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
274 	tcp = (struct tcphdr *)(raw_packet + IP_HEADER_OFFSET
275 	      + sizeof(struct iphdr));
276 
277 	ip->daddr = fd_data->dst_ip[0];
278 	tcp->dest = fd_data->dst_port;
279 	ip->saddr = fd_data->src_ip[0];
280 	tcp->source = fd_data->src_port;
281 
282 	if (add) {
283 		pf->fd_tcp_rule++;
284 		if (pf->flags & I40E_FLAG_FD_ATR_ENABLED) {
285 			dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n");
286 			pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED;
287 		}
288 	} else {
289 		pf->fd_tcp_rule = (pf->fd_tcp_rule > 0) ?
290 				  (pf->fd_tcp_rule - 1) : 0;
291 		if (pf->fd_tcp_rule == 0) {
292 			pf->flags |= I40E_FLAG_FD_ATR_ENABLED;
293 			dev_info(&pf->pdev->dev, "ATR re-enabled due to no sideband TCP/IPv4 rules\n");
294 		}
295 	}
296 
297 	fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP;
298 	ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
299 
300 	if (ret) {
301 		dev_info(&pf->pdev->dev,
302 			 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
303 			 fd_data->pctype, fd_data->fd_id, ret);
304 		err = true;
305 	} else {
306 		if (add)
307 			dev_info(&pf->pdev->dev, "Filter OK for PCTYPE %d loc = %d)\n",
308 				 fd_data->pctype, fd_data->fd_id);
309 		else
310 			dev_info(&pf->pdev->dev,
311 				 "Filter deleted for PCTYPE %d loc = %d\n",
312 				 fd_data->pctype, fd_data->fd_id);
313 	}
314 
315 	return err ? -EOPNOTSUPP : 0;
316 }
317 
318 /**
319  * i40e_add_del_fdir_sctpv4 - Add/Remove SCTPv4 Flow Director filters for
320  * a specific flow spec
321  * @vsi: pointer to the targeted VSI
322  * @fd_data: the flow director data required for the FDir descriptor
323  * @add: true adds a filter, false removes it
324  *
325  * Always returns -EOPNOTSUPP
326  **/
327 static int i40e_add_del_fdir_sctpv4(struct i40e_vsi *vsi,
328 				    struct i40e_fdir_filter *fd_data,
329 				    bool add)
330 {
331 	return -EOPNOTSUPP;
332 }
333 
334 #define I40E_IP_DUMMY_PACKET_LEN 34
335 /**
336  * i40e_add_del_fdir_ipv4 - Add/Remove IPv4 Flow Director filters for
337  * a specific flow spec
338  * @vsi: pointer to the targeted VSI
339  * @fd_data: the flow director data required for the FDir descriptor
340  * @add: true adds a filter, false removes it
341  *
342  * Returns 0 if the filters were successfully added or removed
343  **/
344 static int i40e_add_del_fdir_ipv4(struct i40e_vsi *vsi,
345 				  struct i40e_fdir_filter *fd_data,
346 				  bool add)
347 {
348 	struct i40e_pf *pf = vsi->back;
349 	struct iphdr *ip;
350 	bool err = false;
351 	u8 *raw_packet;
352 	int ret;
353 	int i;
354 	static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
355 		0x45, 0, 0, 0x14, 0, 0, 0x40, 0, 0x40, 0x10, 0, 0, 0, 0, 0, 0,
356 		0, 0, 0, 0};
357 
358 	for (i = I40E_FILTER_PCTYPE_NONF_IPV4_OTHER;
359 	     i <= I40E_FILTER_PCTYPE_FRAG_IPV4;	i++) {
360 		raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
361 		if (!raw_packet)
362 			return -ENOMEM;
363 		memcpy(raw_packet, packet, I40E_IP_DUMMY_PACKET_LEN);
364 		ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
365 
366 		ip->saddr = fd_data->src_ip[0];
367 		ip->daddr = fd_data->dst_ip[0];
368 		ip->protocol = 0;
369 
370 		fd_data->pctype = i;
371 		ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
372 
373 		if (ret) {
374 			dev_info(&pf->pdev->dev,
375 				 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
376 				 fd_data->pctype, fd_data->fd_id, ret);
377 			err = true;
378 		} else {
379 			if (add)
380 				dev_info(&pf->pdev->dev,
381 					 "Filter OK for PCTYPE %d loc = %d\n",
382 					 fd_data->pctype, fd_data->fd_id);
383 			else
384 				dev_info(&pf->pdev->dev,
385 					 "Filter deleted for PCTYPE %d loc = %d\n",
386 					 fd_data->pctype, fd_data->fd_id);
387 		}
388 	}
389 
390 	return err ? -EOPNOTSUPP : 0;
391 }
392 
393 /**
394  * i40e_add_del_fdir - Build raw packets to add/del fdir filter
395  * @vsi: pointer to the targeted VSI
396  * @cmd: command to get or set RX flow classification rules
397  * @add: true adds a filter, false removes it
398  *
399  **/
400 int i40e_add_del_fdir(struct i40e_vsi *vsi,
401 		      struct i40e_fdir_filter *input, bool add)
402 {
403 	struct i40e_pf *pf = vsi->back;
404 	int ret;
405 
406 	switch (input->flow_type & ~FLOW_EXT) {
407 	case TCP_V4_FLOW:
408 		ret = i40e_add_del_fdir_tcpv4(vsi, input, add);
409 		break;
410 	case UDP_V4_FLOW:
411 		ret = i40e_add_del_fdir_udpv4(vsi, input, add);
412 		break;
413 	case SCTP_V4_FLOW:
414 		ret = i40e_add_del_fdir_sctpv4(vsi, input, add);
415 		break;
416 	case IPV4_FLOW:
417 		ret = i40e_add_del_fdir_ipv4(vsi, input, add);
418 		break;
419 	case IP_USER_FLOW:
420 		switch (input->ip4_proto) {
421 		case IPPROTO_TCP:
422 			ret = i40e_add_del_fdir_tcpv4(vsi, input, add);
423 			break;
424 		case IPPROTO_UDP:
425 			ret = i40e_add_del_fdir_udpv4(vsi, input, add);
426 			break;
427 		case IPPROTO_SCTP:
428 			ret = i40e_add_del_fdir_sctpv4(vsi, input, add);
429 			break;
430 		default:
431 			ret = i40e_add_del_fdir_ipv4(vsi, input, add);
432 			break;
433 		}
434 		break;
435 	default:
436 		dev_info(&pf->pdev->dev, "Could not specify spec type %d\n",
437 			 input->flow_type);
438 		ret = -EINVAL;
439 	}
440 
441 	/* The buffer allocated here is freed by the i40e_clean_tx_ring() */
442 	return ret;
443 }
444 
445 /**
446  * i40e_fd_handle_status - check the Programming Status for FD
447  * @rx_ring: the Rx ring for this descriptor
448  * @rx_desc: the Rx descriptor for programming Status, not a packet descriptor.
449  * @prog_id: the id originally used for programming
450  *
451  * This is used to verify if the FD programming or invalidation
452  * requested by SW to the HW is successful or not and take actions accordingly.
453  **/
454 static void i40e_fd_handle_status(struct i40e_ring *rx_ring,
455 				  union i40e_rx_desc *rx_desc, u8 prog_id)
456 {
457 	struct i40e_pf *pf = rx_ring->vsi->back;
458 	struct pci_dev *pdev = pf->pdev;
459 	u32 fcnt_prog, fcnt_avail;
460 	u32 error;
461 	u64 qw;
462 
463 	qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
464 	error = (qw & I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK) >>
465 		I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT;
466 
467 	if (error == (0x1 << I40E_RX_PROG_STATUS_DESC_FD_TBL_FULL_SHIFT)) {
468 		if ((rx_desc->wb.qword0.hi_dword.fd_id != 0) ||
469 		    (I40E_DEBUG_FD & pf->hw.debug_mask))
470 			dev_warn(&pdev->dev, "ntuple filter loc = %d, could not be added\n",
471 				 rx_desc->wb.qword0.hi_dword.fd_id);
472 
473 		pf->fd_add_err++;
474 		/* store the current atr filter count */
475 		pf->fd_atr_cnt = i40e_get_current_atr_cnt(pf);
476 
477 		/* filter programming failed most likely due to table full */
478 		fcnt_prog = i40e_get_cur_guaranteed_fd_count(pf);
479 		fcnt_avail = pf->fdir_pf_filter_count;
480 		/* If ATR is running fcnt_prog can quickly change,
481 		 * if we are very close to full, it makes sense to disable
482 		 * FD ATR/SB and then re-enable it when there is room.
483 		 */
484 		if (fcnt_prog >= (fcnt_avail - I40E_FDIR_BUFFER_FULL_MARGIN)) {
485 			if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) &&
486 			    !(pf->auto_disable_flags &
487 				     I40E_FLAG_FD_SB_ENABLED)) {
488 				dev_warn(&pdev->dev, "FD filter space full, new ntuple rules will not be added\n");
489 				pf->auto_disable_flags |=
490 							I40E_FLAG_FD_SB_ENABLED;
491 			}
492 		} else {
493 			dev_info(&pdev->dev,
494 				"FD filter programming failed due to incorrect filter parameters\n");
495 		}
496 	} else if (error ==
497 			  (0x1 << I40E_RX_PROG_STATUS_DESC_NO_FD_ENTRY_SHIFT)) {
498 		if (I40E_DEBUG_FD & pf->hw.debug_mask)
499 			dev_info(&pdev->dev, "ntuple filter fd_id = %d, could not be removed\n",
500 				 rx_desc->wb.qword0.hi_dword.fd_id);
501 	}
502 }
503 
504 /**
505  * i40e_unmap_and_free_tx_resource - Release a Tx buffer
506  * @ring:      the ring that owns the buffer
507  * @tx_buffer: the buffer to free
508  **/
509 static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring,
510 					    struct i40e_tx_buffer *tx_buffer)
511 {
512 	if (tx_buffer->skb) {
513 		if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB)
514 			kfree(tx_buffer->raw_buf);
515 		else
516 			dev_kfree_skb_any(tx_buffer->skb);
517 
518 		if (dma_unmap_len(tx_buffer, len))
519 			dma_unmap_single(ring->dev,
520 					 dma_unmap_addr(tx_buffer, dma),
521 					 dma_unmap_len(tx_buffer, len),
522 					 DMA_TO_DEVICE);
523 	} else if (dma_unmap_len(tx_buffer, len)) {
524 		dma_unmap_page(ring->dev,
525 			       dma_unmap_addr(tx_buffer, dma),
526 			       dma_unmap_len(tx_buffer, len),
527 			       DMA_TO_DEVICE);
528 	}
529 	tx_buffer->next_to_watch = NULL;
530 	tx_buffer->skb = NULL;
531 	dma_unmap_len_set(tx_buffer, len, 0);
532 	/* tx_buffer must be completely set up in the transmit path */
533 }
534 
535 /**
536  * i40e_clean_tx_ring - Free any empty Tx buffers
537  * @tx_ring: ring to be cleaned
538  **/
539 void i40e_clean_tx_ring(struct i40e_ring *tx_ring)
540 {
541 	unsigned long bi_size;
542 	u16 i;
543 
544 	/* ring already cleared, nothing to do */
545 	if (!tx_ring->tx_bi)
546 		return;
547 
548 	/* Free all the Tx ring sk_buffs */
549 	for (i = 0; i < tx_ring->count; i++)
550 		i40e_unmap_and_free_tx_resource(tx_ring, &tx_ring->tx_bi[i]);
551 
552 	bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
553 	memset(tx_ring->tx_bi, 0, bi_size);
554 
555 	/* Zero out the descriptor ring */
556 	memset(tx_ring->desc, 0, tx_ring->size);
557 
558 	tx_ring->next_to_use = 0;
559 	tx_ring->next_to_clean = 0;
560 
561 	if (!tx_ring->netdev)
562 		return;
563 
564 	/* cleanup Tx queue statistics */
565 	netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev,
566 						  tx_ring->queue_index));
567 }
568 
569 /**
570  * i40e_free_tx_resources - Free Tx resources per queue
571  * @tx_ring: Tx descriptor ring for a specific queue
572  *
573  * Free all transmit software resources
574  **/
575 void i40e_free_tx_resources(struct i40e_ring *tx_ring)
576 {
577 	i40e_clean_tx_ring(tx_ring);
578 	kfree(tx_ring->tx_bi);
579 	tx_ring->tx_bi = NULL;
580 
581 	if (tx_ring->desc) {
582 		dma_free_coherent(tx_ring->dev, tx_ring->size,
583 				  tx_ring->desc, tx_ring->dma);
584 		tx_ring->desc = NULL;
585 	}
586 }
587 
588 /**
589  * i40e_get_head - Retrieve head from head writeback
590  * @tx_ring:  tx ring to fetch head of
591  *
592  * Returns value of Tx ring head based on value stored
593  * in head write-back location
594  **/
595 static inline u32 i40e_get_head(struct i40e_ring *tx_ring)
596 {
597 	void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count;
598 
599 	return le32_to_cpu(*(volatile __le32 *)head);
600 }
601 
602 /**
603  * i40e_get_tx_pending - how many tx descriptors not processed
604  * @tx_ring: the ring of descriptors
605  *
606  * Since there is no access to the ring head register
607  * in XL710, we need to use our local copies
608  **/
609 static u32 i40e_get_tx_pending(struct i40e_ring *ring)
610 {
611 	u32 head, tail;
612 
613 	head = i40e_get_head(ring);
614 	tail = readl(ring->tail);
615 
616 	if (head != tail)
617 		return (head < tail) ?
618 			tail - head : (tail + ring->count - head);
619 
620 	return 0;
621 }
622 
623 /**
624  * i40e_check_tx_hang - Is there a hang in the Tx queue
625  * @tx_ring: the ring of descriptors
626  **/
627 static bool i40e_check_tx_hang(struct i40e_ring *tx_ring)
628 {
629 	u32 tx_done = tx_ring->stats.packets;
630 	u32 tx_done_old = tx_ring->tx_stats.tx_done_old;
631 	u32 tx_pending = i40e_get_tx_pending(tx_ring);
632 	struct i40e_pf *pf = tx_ring->vsi->back;
633 	bool ret = false;
634 
635 	clear_check_for_tx_hang(tx_ring);
636 
637 	/* Check for a hung queue, but be thorough. This verifies
638 	 * that a transmit has been completed since the previous
639 	 * check AND there is at least one packet pending. The
640 	 * ARMED bit is set to indicate a potential hang. The
641 	 * bit is cleared if a pause frame is received to remove
642 	 * false hang detection due to PFC or 802.3x frames. By
643 	 * requiring this to fail twice we avoid races with
644 	 * PFC clearing the ARMED bit and conditions where we
645 	 * run the check_tx_hang logic with a transmit completion
646 	 * pending but without time to complete it yet.
647 	 */
648 	if ((tx_done_old == tx_done) && tx_pending) {
649 		/* make sure it is true for two checks in a row */
650 		ret = test_and_set_bit(__I40E_HANG_CHECK_ARMED,
651 				       &tx_ring->state);
652 	} else if (tx_done_old == tx_done &&
653 		   (tx_pending < I40E_MIN_DESC_PENDING) && (tx_pending > 0)) {
654 		if (I40E_DEBUG_FLOW & pf->hw.debug_mask)
655 			dev_info(tx_ring->dev, "HW needs some more descs to do a cacheline flush. tx_pending %d, queue %d",
656 				 tx_pending, tx_ring->queue_index);
657 		pf->tx_sluggish_count++;
658 	} else {
659 		/* update completed stats and disarm the hang check */
660 		tx_ring->tx_stats.tx_done_old = tx_done;
661 		clear_bit(__I40E_HANG_CHECK_ARMED, &tx_ring->state);
662 	}
663 
664 	return ret;
665 }
666 
667 #define WB_STRIDE 0x3
668 
669 /**
670  * i40e_clean_tx_irq - Reclaim resources after transmit completes
671  * @tx_ring:  tx ring to clean
672  * @budget:   how many cleans we're allowed
673  *
674  * Returns true if there's any budget left (e.g. the clean is finished)
675  **/
676 static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
677 {
678 	u16 i = tx_ring->next_to_clean;
679 	struct i40e_tx_buffer *tx_buf;
680 	struct i40e_tx_desc *tx_head;
681 	struct i40e_tx_desc *tx_desc;
682 	unsigned int total_packets = 0;
683 	unsigned int total_bytes = 0;
684 
685 	tx_buf = &tx_ring->tx_bi[i];
686 	tx_desc = I40E_TX_DESC(tx_ring, i);
687 	i -= tx_ring->count;
688 
689 	tx_head = I40E_TX_DESC(tx_ring, i40e_get_head(tx_ring));
690 
691 	do {
692 		struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch;
693 
694 		/* if next_to_watch is not set then there is no work pending */
695 		if (!eop_desc)
696 			break;
697 
698 		/* prevent any other reads prior to eop_desc */
699 		read_barrier_depends();
700 
701 		/* we have caught up to head, no work left to do */
702 		if (tx_head == tx_desc)
703 			break;
704 
705 		/* clear next_to_watch to prevent false hangs */
706 		tx_buf->next_to_watch = NULL;
707 
708 		/* update the statistics for this packet */
709 		total_bytes += tx_buf->bytecount;
710 		total_packets += tx_buf->gso_segs;
711 
712 		/* free the skb */
713 		dev_consume_skb_any(tx_buf->skb);
714 
715 		/* unmap skb header data */
716 		dma_unmap_single(tx_ring->dev,
717 				 dma_unmap_addr(tx_buf, dma),
718 				 dma_unmap_len(tx_buf, len),
719 				 DMA_TO_DEVICE);
720 
721 		/* clear tx_buffer data */
722 		tx_buf->skb = NULL;
723 		dma_unmap_len_set(tx_buf, len, 0);
724 
725 		/* unmap remaining buffers */
726 		while (tx_desc != eop_desc) {
727 
728 			tx_buf++;
729 			tx_desc++;
730 			i++;
731 			if (unlikely(!i)) {
732 				i -= tx_ring->count;
733 				tx_buf = tx_ring->tx_bi;
734 				tx_desc = I40E_TX_DESC(tx_ring, 0);
735 			}
736 
737 			/* unmap any remaining paged data */
738 			if (dma_unmap_len(tx_buf, len)) {
739 				dma_unmap_page(tx_ring->dev,
740 					       dma_unmap_addr(tx_buf, dma),
741 					       dma_unmap_len(tx_buf, len),
742 					       DMA_TO_DEVICE);
743 				dma_unmap_len_set(tx_buf, len, 0);
744 			}
745 		}
746 
747 		/* move us one more past the eop_desc for start of next pkt */
748 		tx_buf++;
749 		tx_desc++;
750 		i++;
751 		if (unlikely(!i)) {
752 			i -= tx_ring->count;
753 			tx_buf = tx_ring->tx_bi;
754 			tx_desc = I40E_TX_DESC(tx_ring, 0);
755 		}
756 
757 		/* update budget accounting */
758 		budget--;
759 	} while (likely(budget));
760 
761 	i += tx_ring->count;
762 	tx_ring->next_to_clean = i;
763 	u64_stats_update_begin(&tx_ring->syncp);
764 	tx_ring->stats.bytes += total_bytes;
765 	tx_ring->stats.packets += total_packets;
766 	u64_stats_update_end(&tx_ring->syncp);
767 	tx_ring->q_vector->tx.total_bytes += total_bytes;
768 	tx_ring->q_vector->tx.total_packets += total_packets;
769 
770 	/* check to see if there are any non-cache aligned descriptors
771 	 * waiting to be written back, and kick the hardware to force
772 	 * them to be written back in case of napi polling
773 	 */
774 	if (budget &&
775 	    !((i & WB_STRIDE) == WB_STRIDE) &&
776 	    !test_bit(__I40E_DOWN, &tx_ring->vsi->state) &&
777 	    (I40E_DESC_UNUSED(tx_ring) != tx_ring->count))
778 		tx_ring->arm_wb = true;
779 	else
780 		tx_ring->arm_wb = false;
781 
782 	if (check_for_tx_hang(tx_ring) && i40e_check_tx_hang(tx_ring)) {
783 		/* schedule immediate reset if we believe we hung */
784 		dev_info(tx_ring->dev, "Detected Tx Unit Hang\n"
785 			 "  VSI                  <%d>\n"
786 			 "  Tx Queue             <%d>\n"
787 			 "  next_to_use          <%x>\n"
788 			 "  next_to_clean        <%x>\n",
789 			 tx_ring->vsi->seid,
790 			 tx_ring->queue_index,
791 			 tx_ring->next_to_use, i);
792 		dev_info(tx_ring->dev, "tx_bi[next_to_clean]\n"
793 			 "  time_stamp           <%lx>\n"
794 			 "  jiffies              <%lx>\n",
795 			 tx_ring->tx_bi[i].time_stamp, jiffies);
796 
797 		netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
798 
799 		dev_info(tx_ring->dev,
800 			 "tx hang detected on queue %d, reset requested\n",
801 			 tx_ring->queue_index);
802 
803 		/* do not fire the reset immediately, wait for the stack to
804 		 * decide we are truly stuck, also prevents every queue from
805 		 * simultaneously requesting a reset
806 		 */
807 
808 		/* the adapter is about to reset, no point in enabling polling */
809 		budget = 1;
810 	}
811 
812 	netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev,
813 						      tx_ring->queue_index),
814 				  total_packets, total_bytes);
815 
816 #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
817 	if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) &&
818 		     (I40E_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) {
819 		/* Make sure that anybody stopping the queue after this
820 		 * sees the new next_to_clean.
821 		 */
822 		smp_mb();
823 		if (__netif_subqueue_stopped(tx_ring->netdev,
824 					     tx_ring->queue_index) &&
825 		   !test_bit(__I40E_DOWN, &tx_ring->vsi->state)) {
826 			netif_wake_subqueue(tx_ring->netdev,
827 					    tx_ring->queue_index);
828 			++tx_ring->tx_stats.restart_queue;
829 		}
830 	}
831 
832 	return !!budget;
833 }
834 
835 /**
836  * i40e_force_wb - Arm hardware to do a wb on noncache aligned descriptors
837  * @vsi: the VSI we care about
838  * @q_vector: the vector  on which to force writeback
839  *
840  **/
841 static void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
842 {
843 	u32 val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
844 		  I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK |
845 		  I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK;
846 		  /* allow 00 to be written to the index */
847 
848 	wr32(&vsi->back->hw,
849 	     I40E_PFINT_DYN_CTLN(q_vector->v_idx + vsi->base_vector - 1),
850 	     val);
851 }
852 
853 /**
854  * i40e_set_new_dynamic_itr - Find new ITR level
855  * @rc: structure containing ring performance data
856  *
857  * Stores a new ITR value based on packets and byte counts during
858  * the last interrupt.  The advantage of per interrupt computation
859  * is faster updates and more accurate ITR for the current traffic
860  * pattern.  Constants in this function were computed based on
861  * theoretical maximum wire speed and thresholds were set based on
862  * testing data as well as attempting to minimize response time
863  * while increasing bulk throughput.
864  **/
865 static void i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
866 {
867 	enum i40e_latency_range new_latency_range = rc->latency_range;
868 	u32 new_itr = rc->itr;
869 	int bytes_per_int;
870 
871 	if (rc->total_packets == 0 || !rc->itr)
872 		return;
873 
874 	/* simple throttlerate management
875 	 *   0-10MB/s   lowest (100000 ints/s)
876 	 *  10-20MB/s   low    (20000 ints/s)
877 	 *  20-1249MB/s bulk   (8000 ints/s)
878 	 */
879 	bytes_per_int = rc->total_bytes / rc->itr;
880 	switch (rc->itr) {
881 	case I40E_LOWEST_LATENCY:
882 		if (bytes_per_int > 10)
883 			new_latency_range = I40E_LOW_LATENCY;
884 		break;
885 	case I40E_LOW_LATENCY:
886 		if (bytes_per_int > 20)
887 			new_latency_range = I40E_BULK_LATENCY;
888 		else if (bytes_per_int <= 10)
889 			new_latency_range = I40E_LOWEST_LATENCY;
890 		break;
891 	case I40E_BULK_LATENCY:
892 		if (bytes_per_int <= 20)
893 			rc->latency_range = I40E_LOW_LATENCY;
894 		break;
895 	}
896 
897 	switch (new_latency_range) {
898 	case I40E_LOWEST_LATENCY:
899 		new_itr = I40E_ITR_100K;
900 		break;
901 	case I40E_LOW_LATENCY:
902 		new_itr = I40E_ITR_20K;
903 		break;
904 	case I40E_BULK_LATENCY:
905 		new_itr = I40E_ITR_8K;
906 		break;
907 	default:
908 		break;
909 	}
910 
911 	if (new_itr != rc->itr) {
912 		/* do an exponential smoothing */
913 		new_itr = (10 * new_itr * rc->itr) /
914 			  ((9 * new_itr) + rc->itr);
915 		rc->itr = new_itr & I40E_MAX_ITR;
916 	}
917 
918 	rc->total_bytes = 0;
919 	rc->total_packets = 0;
920 }
921 
922 /**
923  * i40e_update_dynamic_itr - Adjust ITR based on bytes per int
924  * @q_vector: the vector to adjust
925  **/
926 static void i40e_update_dynamic_itr(struct i40e_q_vector *q_vector)
927 {
928 	u16 vector = q_vector->vsi->base_vector + q_vector->v_idx;
929 	struct i40e_hw *hw = &q_vector->vsi->back->hw;
930 	u32 reg_addr;
931 	u16 old_itr;
932 
933 	reg_addr = I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1);
934 	old_itr = q_vector->rx.itr;
935 	i40e_set_new_dynamic_itr(&q_vector->rx);
936 	if (old_itr != q_vector->rx.itr)
937 		wr32(hw, reg_addr, q_vector->rx.itr);
938 
939 	reg_addr = I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1);
940 	old_itr = q_vector->tx.itr;
941 	i40e_set_new_dynamic_itr(&q_vector->tx);
942 	if (old_itr != q_vector->tx.itr)
943 		wr32(hw, reg_addr, q_vector->tx.itr);
944 }
945 
946 /**
947  * i40e_clean_programming_status - clean the programming status descriptor
948  * @rx_ring: the rx ring that has this descriptor
949  * @rx_desc: the rx descriptor written back by HW
950  *
951  * Flow director should handle FD_FILTER_STATUS to check its filter programming
952  * status being successful or not and take actions accordingly. FCoE should
953  * handle its context/filter programming/invalidation status and take actions.
954  *
955  **/
956 static void i40e_clean_programming_status(struct i40e_ring *rx_ring,
957 					  union i40e_rx_desc *rx_desc)
958 {
959 	u64 qw;
960 	u8 id;
961 
962 	qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
963 	id = (qw & I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK) >>
964 		  I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT;
965 
966 	if (id == I40E_RX_PROG_STATUS_DESC_FD_FILTER_STATUS)
967 		i40e_fd_handle_status(rx_ring, rx_desc, id);
968 #ifdef I40E_FCOE
969 	else if ((id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_PROG_STATUS) ||
970 		 (id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_INVL_STATUS))
971 		i40e_fcoe_handle_status(rx_ring, rx_desc, id);
972 #endif
973 }
974 
975 /**
976  * i40e_setup_tx_descriptors - Allocate the Tx descriptors
977  * @tx_ring: the tx ring to set up
978  *
979  * Return 0 on success, negative on error
980  **/
981 int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring)
982 {
983 	struct device *dev = tx_ring->dev;
984 	int bi_size;
985 
986 	if (!dev)
987 		return -ENOMEM;
988 
989 	bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
990 	tx_ring->tx_bi = kzalloc(bi_size, GFP_KERNEL);
991 	if (!tx_ring->tx_bi)
992 		goto err;
993 
994 	/* round up to nearest 4K */
995 	tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc);
996 	/* add u32 for head writeback, align after this takes care of
997 	 * guaranteeing this is at least one cache line in size
998 	 */
999 	tx_ring->size += sizeof(u32);
1000 	tx_ring->size = ALIGN(tx_ring->size, 4096);
1001 	tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
1002 					   &tx_ring->dma, GFP_KERNEL);
1003 	if (!tx_ring->desc) {
1004 		dev_info(dev, "Unable to allocate memory for the Tx descriptor ring, size=%d\n",
1005 			 tx_ring->size);
1006 		goto err;
1007 	}
1008 
1009 	tx_ring->next_to_use = 0;
1010 	tx_ring->next_to_clean = 0;
1011 	return 0;
1012 
1013 err:
1014 	kfree(tx_ring->tx_bi);
1015 	tx_ring->tx_bi = NULL;
1016 	return -ENOMEM;
1017 }
1018 
1019 /**
1020  * i40e_clean_rx_ring - Free Rx buffers
1021  * @rx_ring: ring to be cleaned
1022  **/
1023 void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
1024 {
1025 	struct device *dev = rx_ring->dev;
1026 	struct i40e_rx_buffer *rx_bi;
1027 	unsigned long bi_size;
1028 	u16 i;
1029 
1030 	/* ring already cleared, nothing to do */
1031 	if (!rx_ring->rx_bi)
1032 		return;
1033 
1034 	/* Free all the Rx ring sk_buffs */
1035 	for (i = 0; i < rx_ring->count; i++) {
1036 		rx_bi = &rx_ring->rx_bi[i];
1037 		if (rx_bi->dma) {
1038 			dma_unmap_single(dev,
1039 					 rx_bi->dma,
1040 					 rx_ring->rx_buf_len,
1041 					 DMA_FROM_DEVICE);
1042 			rx_bi->dma = 0;
1043 		}
1044 		if (rx_bi->skb) {
1045 			dev_kfree_skb(rx_bi->skb);
1046 			rx_bi->skb = NULL;
1047 		}
1048 		if (rx_bi->page) {
1049 			if (rx_bi->page_dma) {
1050 				dma_unmap_page(dev,
1051 					       rx_bi->page_dma,
1052 					       PAGE_SIZE / 2,
1053 					       DMA_FROM_DEVICE);
1054 				rx_bi->page_dma = 0;
1055 			}
1056 			__free_page(rx_bi->page);
1057 			rx_bi->page = NULL;
1058 			rx_bi->page_offset = 0;
1059 		}
1060 	}
1061 
1062 	bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
1063 	memset(rx_ring->rx_bi, 0, bi_size);
1064 
1065 	/* Zero out the descriptor ring */
1066 	memset(rx_ring->desc, 0, rx_ring->size);
1067 
1068 	rx_ring->next_to_clean = 0;
1069 	rx_ring->next_to_use = 0;
1070 }
1071 
1072 /**
1073  * i40e_free_rx_resources - Free Rx resources
1074  * @rx_ring: ring to clean the resources from
1075  *
1076  * Free all receive software resources
1077  **/
1078 void i40e_free_rx_resources(struct i40e_ring *rx_ring)
1079 {
1080 	i40e_clean_rx_ring(rx_ring);
1081 	kfree(rx_ring->rx_bi);
1082 	rx_ring->rx_bi = NULL;
1083 
1084 	if (rx_ring->desc) {
1085 		dma_free_coherent(rx_ring->dev, rx_ring->size,
1086 				  rx_ring->desc, rx_ring->dma);
1087 		rx_ring->desc = NULL;
1088 	}
1089 }
1090 
1091 /**
1092  * i40e_setup_rx_descriptors - Allocate Rx descriptors
1093  * @rx_ring: Rx descriptor ring (for a specific queue) to setup
1094  *
1095  * Returns 0 on success, negative on failure
1096  **/
1097 int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
1098 {
1099 	struct device *dev = rx_ring->dev;
1100 	int bi_size;
1101 
1102 	bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
1103 	rx_ring->rx_bi = kzalloc(bi_size, GFP_KERNEL);
1104 	if (!rx_ring->rx_bi)
1105 		goto err;
1106 
1107 	u64_stats_init(&rx_ring->syncp);
1108 
1109 	/* Round up to nearest 4K */
1110 	rx_ring->size = ring_is_16byte_desc_enabled(rx_ring)
1111 		? rx_ring->count * sizeof(union i40e_16byte_rx_desc)
1112 		: rx_ring->count * sizeof(union i40e_32byte_rx_desc);
1113 	rx_ring->size = ALIGN(rx_ring->size, 4096);
1114 	rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
1115 					   &rx_ring->dma, GFP_KERNEL);
1116 
1117 	if (!rx_ring->desc) {
1118 		dev_info(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n",
1119 			 rx_ring->size);
1120 		goto err;
1121 	}
1122 
1123 	rx_ring->next_to_clean = 0;
1124 	rx_ring->next_to_use = 0;
1125 
1126 	return 0;
1127 err:
1128 	kfree(rx_ring->rx_bi);
1129 	rx_ring->rx_bi = NULL;
1130 	return -ENOMEM;
1131 }
1132 
1133 /**
1134  * i40e_release_rx_desc - Store the new tail and head values
1135  * @rx_ring: ring to bump
1136  * @val: new head index
1137  **/
1138 static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
1139 {
1140 	rx_ring->next_to_use = val;
1141 	/* Force memory writes to complete before letting h/w
1142 	 * know there are new descriptors to fetch.  (Only
1143 	 * applicable for weak-ordered memory model archs,
1144 	 * such as IA-64).
1145 	 */
1146 	wmb();
1147 	writel(val, rx_ring->tail);
1148 }
1149 
1150 /**
1151  * i40e_alloc_rx_buffers - Replace used receive buffers; packet split
1152  * @rx_ring: ring to place buffers on
1153  * @cleaned_count: number of buffers to replace
1154  **/
1155 void i40e_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count)
1156 {
1157 	u16 i = rx_ring->next_to_use;
1158 	union i40e_rx_desc *rx_desc;
1159 	struct i40e_rx_buffer *bi;
1160 	struct sk_buff *skb;
1161 
1162 	/* do nothing if no valid netdev defined */
1163 	if (!rx_ring->netdev || !cleaned_count)
1164 		return;
1165 
1166 	while (cleaned_count--) {
1167 		rx_desc = I40E_RX_DESC(rx_ring, i);
1168 		bi = &rx_ring->rx_bi[i];
1169 		skb = bi->skb;
1170 
1171 		if (!skb) {
1172 			skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
1173 							rx_ring->rx_buf_len);
1174 			if (!skb) {
1175 				rx_ring->rx_stats.alloc_buff_failed++;
1176 				goto no_buffers;
1177 			}
1178 			/* initialize queue mapping */
1179 			skb_record_rx_queue(skb, rx_ring->queue_index);
1180 			bi->skb = skb;
1181 		}
1182 
1183 		if (!bi->dma) {
1184 			bi->dma = dma_map_single(rx_ring->dev,
1185 						 skb->data,
1186 						 rx_ring->rx_buf_len,
1187 						 DMA_FROM_DEVICE);
1188 			if (dma_mapping_error(rx_ring->dev, bi->dma)) {
1189 				rx_ring->rx_stats.alloc_buff_failed++;
1190 				bi->dma = 0;
1191 				goto no_buffers;
1192 			}
1193 		}
1194 
1195 		if (ring_is_ps_enabled(rx_ring)) {
1196 			if (!bi->page) {
1197 				bi->page = alloc_page(GFP_ATOMIC);
1198 				if (!bi->page) {
1199 					rx_ring->rx_stats.alloc_page_failed++;
1200 					goto no_buffers;
1201 				}
1202 			}
1203 
1204 			if (!bi->page_dma) {
1205 				/* use a half page if we're re-using */
1206 				bi->page_offset ^= PAGE_SIZE / 2;
1207 				bi->page_dma = dma_map_page(rx_ring->dev,
1208 							    bi->page,
1209 							    bi->page_offset,
1210 							    PAGE_SIZE / 2,
1211 							    DMA_FROM_DEVICE);
1212 				if (dma_mapping_error(rx_ring->dev,
1213 						      bi->page_dma)) {
1214 					rx_ring->rx_stats.alloc_page_failed++;
1215 					bi->page_dma = 0;
1216 					goto no_buffers;
1217 				}
1218 			}
1219 
1220 			/* Refresh the desc even if buffer_addrs didn't change
1221 			 * because each write-back erases this info.
1222 			 */
1223 			rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
1224 			rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
1225 		} else {
1226 			rx_desc->read.pkt_addr = cpu_to_le64(bi->dma);
1227 			rx_desc->read.hdr_addr = 0;
1228 		}
1229 		i++;
1230 		if (i == rx_ring->count)
1231 			i = 0;
1232 	}
1233 
1234 no_buffers:
1235 	if (rx_ring->next_to_use != i)
1236 		i40e_release_rx_desc(rx_ring, i);
1237 }
1238 
1239 /**
1240  * i40e_receive_skb - Send a completed packet up the stack
1241  * @rx_ring:  rx ring in play
1242  * @skb: packet to send up
1243  * @vlan_tag: vlan tag for packet
1244  **/
1245 static void i40e_receive_skb(struct i40e_ring *rx_ring,
1246 			     struct sk_buff *skb, u16 vlan_tag)
1247 {
1248 	struct i40e_q_vector *q_vector = rx_ring->q_vector;
1249 	struct i40e_vsi *vsi = rx_ring->vsi;
1250 	u64 flags = vsi->back->flags;
1251 
1252 	if (vlan_tag & VLAN_VID_MASK)
1253 		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
1254 
1255 	if (flags & I40E_FLAG_IN_NETPOLL)
1256 		netif_rx(skb);
1257 	else
1258 		napi_gro_receive(&q_vector->napi, skb);
1259 }
1260 
1261 /**
1262  * i40e_rx_checksum - Indicate in skb if hw indicated a good cksum
1263  * @vsi: the VSI we care about
1264  * @skb: skb currently being received and modified
1265  * @rx_status: status value of last descriptor in packet
1266  * @rx_error: error value of last descriptor in packet
1267  * @rx_ptype: ptype value of last descriptor in packet
1268  **/
1269 static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
1270 				    struct sk_buff *skb,
1271 				    u32 rx_status,
1272 				    u32 rx_error,
1273 				    u16 rx_ptype)
1274 {
1275 	struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(rx_ptype);
1276 	bool ipv4 = false, ipv6 = false;
1277 	bool ipv4_tunnel, ipv6_tunnel;
1278 	__wsum rx_udp_csum;
1279 	struct iphdr *iph;
1280 	__sum16 csum;
1281 
1282 	ipv4_tunnel = (rx_ptype > I40E_RX_PTYPE_GRENAT4_MAC_PAY3) &&
1283 		      (rx_ptype < I40E_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4);
1284 	ipv6_tunnel = (rx_ptype > I40E_RX_PTYPE_GRENAT6_MAC_PAY3) &&
1285 		      (rx_ptype < I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4);
1286 
1287 	skb->ip_summed = CHECKSUM_NONE;
1288 
1289 	/* Rx csum enabled and ip headers found? */
1290 	if (!(vsi->netdev->features & NETIF_F_RXCSUM))
1291 		return;
1292 
1293 	/* did the hardware decode the packet and checksum? */
1294 	if (!(rx_status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)))
1295 		return;
1296 
1297 	/* both known and outer_ip must be set for the below code to work */
1298 	if (!(decoded.known && decoded.outer_ip))
1299 		return;
1300 
1301 	if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1302 	    decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4)
1303 		ipv4 = true;
1304 	else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1305 		 decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6)
1306 		ipv6 = true;
1307 
1308 	if (ipv4 &&
1309 	    (rx_error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) |
1310 			 (1 << I40E_RX_DESC_ERROR_EIPE_SHIFT))))
1311 		goto checksum_fail;
1312 
1313 	/* likely incorrect csum if alternate IP extension headers found */
1314 	if (ipv6 &&
1315 	    rx_status & (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT))
1316 		/* don't increment checksum err here, non-fatal err */
1317 		return;
1318 
1319 	/* there was some L4 error, count error and punt packet to the stack */
1320 	if (rx_error & (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))
1321 		goto checksum_fail;
1322 
1323 	/* handle packets that were not able to be checksummed due
1324 	 * to arrival speed, in this case the stack can compute
1325 	 * the csum.
1326 	 */
1327 	if (rx_error & (1 << I40E_RX_DESC_ERROR_PPRS_SHIFT))
1328 		return;
1329 
1330 	/* If VXLAN traffic has an outer UDPv4 checksum we need to check
1331 	 * it in the driver, hardware does not do it for us.
1332 	 * Since L3L4P bit was set we assume a valid IHL value (>=5)
1333 	 * so the total length of IPv4 header is IHL*4 bytes
1334 	 * The UDP_0 bit *may* bet set if the *inner* header is UDP
1335 	 */
1336 	if (ipv4_tunnel) {
1337 		skb->transport_header = skb->mac_header +
1338 					sizeof(struct ethhdr) +
1339 					(ip_hdr(skb)->ihl * 4);
1340 
1341 		/* Add 4 bytes for VLAN tagged packets */
1342 		skb->transport_header += (skb->protocol == htons(ETH_P_8021Q) ||
1343 					  skb->protocol == htons(ETH_P_8021AD))
1344 					  ? VLAN_HLEN : 0;
1345 
1346 		if ((ip_hdr(skb)->protocol == IPPROTO_UDP) &&
1347 		    (udp_hdr(skb)->check != 0)) {
1348 			rx_udp_csum = udp_csum(skb);
1349 			iph = ip_hdr(skb);
1350 			csum = csum_tcpudp_magic(
1351 					iph->saddr, iph->daddr,
1352 					(skb->len - skb_transport_offset(skb)),
1353 					IPPROTO_UDP, rx_udp_csum);
1354 
1355 			if (udp_hdr(skb)->check != csum)
1356 				goto checksum_fail;
1357 
1358 		} /* else its GRE and so no outer UDP header */
1359 	}
1360 
1361 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1362 	skb->csum_level = ipv4_tunnel || ipv6_tunnel;
1363 
1364 	return;
1365 
1366 checksum_fail:
1367 	vsi->back->hw_csum_rx_error++;
1368 }
1369 
1370 /**
1371  * i40e_rx_hash - returns the hash value from the Rx descriptor
1372  * @ring: descriptor ring
1373  * @rx_desc: specific descriptor
1374  **/
1375 static inline u32 i40e_rx_hash(struct i40e_ring *ring,
1376 			       union i40e_rx_desc *rx_desc)
1377 {
1378 	const __le64 rss_mask =
1379 		cpu_to_le64((u64)I40E_RX_DESC_FLTSTAT_RSS_HASH <<
1380 			    I40E_RX_DESC_STATUS_FLTSTAT_SHIFT);
1381 
1382 	if ((ring->netdev->features & NETIF_F_RXHASH) &&
1383 	    (rx_desc->wb.qword1.status_error_len & rss_mask) == rss_mask)
1384 		return le32_to_cpu(rx_desc->wb.qword0.hi_dword.rss);
1385 	else
1386 		return 0;
1387 }
1388 
1389 /**
1390  * i40e_ptype_to_hash - get a hash type
1391  * @ptype: the ptype value from the descriptor
1392  *
1393  * Returns a hash type to be used by skb_set_hash
1394  **/
1395 static inline enum pkt_hash_types i40e_ptype_to_hash(u8 ptype)
1396 {
1397 	struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(ptype);
1398 
1399 	if (!decoded.known)
1400 		return PKT_HASH_TYPE_NONE;
1401 
1402 	if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1403 	    decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY4)
1404 		return PKT_HASH_TYPE_L4;
1405 	else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1406 		 decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY3)
1407 		return PKT_HASH_TYPE_L3;
1408 	else
1409 		return PKT_HASH_TYPE_L2;
1410 }
1411 
1412 /**
1413  * i40e_clean_rx_irq - Reclaim resources after receive completes
1414  * @rx_ring:  rx ring to clean
1415  * @budget:   how many cleans we're allowed
1416  *
1417  * Returns true if there's any budget left (e.g. the clean is finished)
1418  **/
1419 static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
1420 {
1421 	unsigned int total_rx_bytes = 0, total_rx_packets = 0;
1422 	u16 rx_packet_len, rx_header_len, rx_sph, rx_hbo;
1423 	u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
1424 	const int current_node = numa_node_id();
1425 	struct i40e_vsi *vsi = rx_ring->vsi;
1426 	u16 i = rx_ring->next_to_clean;
1427 	union i40e_rx_desc *rx_desc;
1428 	u32 rx_error, rx_status;
1429 	u8 rx_ptype;
1430 	u64 qword;
1431 
1432 	if (budget <= 0)
1433 		return 0;
1434 
1435 	rx_desc = I40E_RX_DESC(rx_ring, i);
1436 	qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
1437 	rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
1438 		    I40E_RXD_QW1_STATUS_SHIFT;
1439 
1440 	while (rx_status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) {
1441 		union i40e_rx_desc *next_rxd;
1442 		struct i40e_rx_buffer *rx_bi;
1443 		struct sk_buff *skb;
1444 		u16 vlan_tag;
1445 		if (i40e_rx_is_programming_status(qword)) {
1446 			i40e_clean_programming_status(rx_ring, rx_desc);
1447 			I40E_RX_NEXT_DESC_PREFETCH(rx_ring, i, next_rxd);
1448 			goto next_desc;
1449 		}
1450 		rx_bi = &rx_ring->rx_bi[i];
1451 		skb = rx_bi->skb;
1452 		prefetch(skb->data);
1453 
1454 		rx_packet_len = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
1455 				I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1456 		rx_header_len = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK) >>
1457 				I40E_RXD_QW1_LENGTH_HBUF_SHIFT;
1458 		rx_sph = (qword & I40E_RXD_QW1_LENGTH_SPH_MASK) >>
1459 			 I40E_RXD_QW1_LENGTH_SPH_SHIFT;
1460 
1461 		rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >>
1462 			   I40E_RXD_QW1_ERROR_SHIFT;
1463 		rx_hbo = rx_error & (1 << I40E_RX_DESC_ERROR_HBO_SHIFT);
1464 		rx_error &= ~(1 << I40E_RX_DESC_ERROR_HBO_SHIFT);
1465 
1466 		rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
1467 			   I40E_RXD_QW1_PTYPE_SHIFT;
1468 		rx_bi->skb = NULL;
1469 
1470 		/* This memory barrier is needed to keep us from reading
1471 		 * any other fields out of the rx_desc until we know the
1472 		 * STATUS_DD bit is set
1473 		 */
1474 		rmb();
1475 
1476 		/* Get the header and possibly the whole packet
1477 		 * If this is an skb from previous receive dma will be 0
1478 		 */
1479 		if (rx_bi->dma) {
1480 			u16 len;
1481 
1482 			if (rx_hbo)
1483 				len = I40E_RX_HDR_SIZE;
1484 			else if (rx_sph)
1485 				len = rx_header_len;
1486 			else if (rx_packet_len)
1487 				len = rx_packet_len;   /* 1buf/no split found */
1488 			else
1489 				len = rx_header_len;   /* split always mode */
1490 
1491 			skb_put(skb, len);
1492 			dma_unmap_single(rx_ring->dev,
1493 					 rx_bi->dma,
1494 					 rx_ring->rx_buf_len,
1495 					 DMA_FROM_DEVICE);
1496 			rx_bi->dma = 0;
1497 		}
1498 
1499 		/* Get the rest of the data if this was a header split */
1500 		if (ring_is_ps_enabled(rx_ring) && rx_packet_len) {
1501 
1502 			skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
1503 					   rx_bi->page,
1504 					   rx_bi->page_offset,
1505 					   rx_packet_len);
1506 
1507 			skb->len += rx_packet_len;
1508 			skb->data_len += rx_packet_len;
1509 			skb->truesize += rx_packet_len;
1510 
1511 			if ((page_count(rx_bi->page) == 1) &&
1512 			    (page_to_nid(rx_bi->page) == current_node))
1513 				get_page(rx_bi->page);
1514 			else
1515 				rx_bi->page = NULL;
1516 
1517 			dma_unmap_page(rx_ring->dev,
1518 				       rx_bi->page_dma,
1519 				       PAGE_SIZE / 2,
1520 				       DMA_FROM_DEVICE);
1521 			rx_bi->page_dma = 0;
1522 		}
1523 		I40E_RX_NEXT_DESC_PREFETCH(rx_ring, i, next_rxd);
1524 
1525 		if (unlikely(
1526 		    !(rx_status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT)))) {
1527 			struct i40e_rx_buffer *next_buffer;
1528 
1529 			next_buffer = &rx_ring->rx_bi[i];
1530 
1531 			if (ring_is_ps_enabled(rx_ring)) {
1532 				rx_bi->skb = next_buffer->skb;
1533 				rx_bi->dma = next_buffer->dma;
1534 				next_buffer->skb = skb;
1535 				next_buffer->dma = 0;
1536 			}
1537 			rx_ring->rx_stats.non_eop_descs++;
1538 			goto next_desc;
1539 		}
1540 
1541 		/* ERR_MASK will only have valid bits if EOP set */
1542 		if (unlikely(rx_error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) {
1543 			dev_kfree_skb_any(skb);
1544 			/* TODO: shouldn't we increment a counter indicating the
1545 			 * drop?
1546 			 */
1547 			goto next_desc;
1548 		}
1549 
1550 		skb_set_hash(skb, i40e_rx_hash(rx_ring, rx_desc),
1551 			     i40e_ptype_to_hash(rx_ptype));
1552 		if (unlikely(rx_status & I40E_RXD_QW1_STATUS_TSYNVALID_MASK)) {
1553 			i40e_ptp_rx_hwtstamp(vsi->back, skb, (rx_status &
1554 					   I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >>
1555 					   I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT);
1556 			rx_ring->last_rx_timestamp = jiffies;
1557 		}
1558 
1559 		/* probably a little skewed due to removing CRC */
1560 		total_rx_bytes += skb->len;
1561 		total_rx_packets++;
1562 
1563 		skb->protocol = eth_type_trans(skb, rx_ring->netdev);
1564 
1565 		i40e_rx_checksum(vsi, skb, rx_status, rx_error, rx_ptype);
1566 
1567 		vlan_tag = rx_status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)
1568 			 ? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1)
1569 			 : 0;
1570 #ifdef I40E_FCOE
1571 		if (!i40e_fcoe_handle_offload(rx_ring, rx_desc, skb)) {
1572 			dev_kfree_skb_any(skb);
1573 			goto next_desc;
1574 		}
1575 #endif
1576 		i40e_receive_skb(rx_ring, skb, vlan_tag);
1577 
1578 		rx_ring->netdev->last_rx = jiffies;
1579 		budget--;
1580 next_desc:
1581 		rx_desc->wb.qword1.status_error_len = 0;
1582 		if (!budget)
1583 			break;
1584 
1585 		cleaned_count++;
1586 		/* return some buffers to hardware, one at a time is too slow */
1587 		if (cleaned_count >= I40E_RX_BUFFER_WRITE) {
1588 			i40e_alloc_rx_buffers(rx_ring, cleaned_count);
1589 			cleaned_count = 0;
1590 		}
1591 
1592 		/* use prefetched values */
1593 		rx_desc = next_rxd;
1594 		qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
1595 		rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
1596 			    I40E_RXD_QW1_STATUS_SHIFT;
1597 	}
1598 
1599 	rx_ring->next_to_clean = i;
1600 	u64_stats_update_begin(&rx_ring->syncp);
1601 	rx_ring->stats.packets += total_rx_packets;
1602 	rx_ring->stats.bytes += total_rx_bytes;
1603 	u64_stats_update_end(&rx_ring->syncp);
1604 	rx_ring->q_vector->rx.total_packets += total_rx_packets;
1605 	rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
1606 
1607 	if (cleaned_count)
1608 		i40e_alloc_rx_buffers(rx_ring, cleaned_count);
1609 
1610 	return budget > 0;
1611 }
1612 
1613 /**
1614  * i40e_napi_poll - NAPI polling Rx/Tx cleanup routine
1615  * @napi: napi struct with our devices info in it
1616  * @budget: amount of work driver is allowed to do this pass, in packets
1617  *
1618  * This function will clean all queues associated with a q_vector.
1619  *
1620  * Returns the amount of work done
1621  **/
1622 int i40e_napi_poll(struct napi_struct *napi, int budget)
1623 {
1624 	struct i40e_q_vector *q_vector =
1625 			       container_of(napi, struct i40e_q_vector, napi);
1626 	struct i40e_vsi *vsi = q_vector->vsi;
1627 	struct i40e_ring *ring;
1628 	bool clean_complete = true;
1629 	bool arm_wb = false;
1630 	int budget_per_ring;
1631 
1632 	if (test_bit(__I40E_DOWN, &vsi->state)) {
1633 		napi_complete(napi);
1634 		return 0;
1635 	}
1636 
1637 	/* Since the actual Tx work is minimal, we can give the Tx a larger
1638 	 * budget and be more aggressive about cleaning up the Tx descriptors.
1639 	 */
1640 	i40e_for_each_ring(ring, q_vector->tx) {
1641 		clean_complete &= i40e_clean_tx_irq(ring, vsi->work_limit);
1642 		arm_wb |= ring->arm_wb;
1643 	}
1644 
1645 	/* We attempt to distribute budget to each Rx queue fairly, but don't
1646 	 * allow the budget to go below 1 because that would exit polling early.
1647 	 */
1648 	budget_per_ring = max(budget/q_vector->num_ringpairs, 1);
1649 
1650 	i40e_for_each_ring(ring, q_vector->rx)
1651 		clean_complete &= i40e_clean_rx_irq(ring, budget_per_ring);
1652 
1653 	/* If work not completed, return budget and polling will return */
1654 	if (!clean_complete) {
1655 		if (arm_wb)
1656 			i40e_force_wb(vsi, q_vector);
1657 		return budget;
1658 	}
1659 
1660 	/* Work is done so exit the polling mode and re-enable the interrupt */
1661 	napi_complete(napi);
1662 	if (ITR_IS_DYNAMIC(vsi->rx_itr_setting) ||
1663 	    ITR_IS_DYNAMIC(vsi->tx_itr_setting))
1664 		i40e_update_dynamic_itr(q_vector);
1665 
1666 	if (!test_bit(__I40E_DOWN, &vsi->state)) {
1667 		if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
1668 			i40e_irq_dynamic_enable(vsi,
1669 					q_vector->v_idx + vsi->base_vector);
1670 		} else {
1671 			struct i40e_hw *hw = &vsi->back->hw;
1672 			/* We re-enable the queue 0 cause, but
1673 			 * don't worry about dynamic_enable
1674 			 * because we left it on for the other
1675 			 * possible interrupts during napi
1676 			 */
1677 			u32 qval = rd32(hw, I40E_QINT_RQCTL(0));
1678 			qval |= I40E_QINT_RQCTL_CAUSE_ENA_MASK;
1679 			wr32(hw, I40E_QINT_RQCTL(0), qval);
1680 
1681 			qval = rd32(hw, I40E_QINT_TQCTL(0));
1682 			qval |= I40E_QINT_TQCTL_CAUSE_ENA_MASK;
1683 			wr32(hw, I40E_QINT_TQCTL(0), qval);
1684 
1685 			i40e_irq_dynamic_enable_icr0(vsi->back);
1686 		}
1687 	}
1688 
1689 	return 0;
1690 }
1691 
1692 /**
1693  * i40e_atr - Add a Flow Director ATR filter
1694  * @tx_ring:  ring to add programming descriptor to
1695  * @skb:      send buffer
1696  * @flags:    send flags
1697  * @protocol: wire protocol
1698  **/
1699 static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
1700 		     u32 flags, __be16 protocol)
1701 {
1702 	struct i40e_filter_program_desc *fdir_desc;
1703 	struct i40e_pf *pf = tx_ring->vsi->back;
1704 	union {
1705 		unsigned char *network;
1706 		struct iphdr *ipv4;
1707 		struct ipv6hdr *ipv6;
1708 	} hdr;
1709 	struct tcphdr *th;
1710 	unsigned int hlen;
1711 	u32 flex_ptype, dtype_cmd;
1712 	u16 i;
1713 
1714 	/* make sure ATR is enabled */
1715 	if (!(pf->flags & I40E_FLAG_FD_ATR_ENABLED))
1716 		return;
1717 
1718 	/* if sampling is disabled do nothing */
1719 	if (!tx_ring->atr_sample_rate)
1720 		return;
1721 
1722 	/* snag network header to get L4 type and address */
1723 	hdr.network = skb_network_header(skb);
1724 
1725 	/* Currently only IPv4/IPv6 with TCP is supported */
1726 	if (protocol == htons(ETH_P_IP)) {
1727 		if (hdr.ipv4->protocol != IPPROTO_TCP)
1728 			return;
1729 
1730 		/* access ihl as a u8 to avoid unaligned access on ia64 */
1731 		hlen = (hdr.network[0] & 0x0F) << 2;
1732 	} else if (protocol == htons(ETH_P_IPV6)) {
1733 		if (hdr.ipv6->nexthdr != IPPROTO_TCP)
1734 			return;
1735 
1736 		hlen = sizeof(struct ipv6hdr);
1737 	} else {
1738 		return;
1739 	}
1740 
1741 	th = (struct tcphdr *)(hdr.network + hlen);
1742 
1743 	/* Due to lack of space, no more new filters can be programmed */
1744 	if (th->syn && (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED))
1745 		return;
1746 
1747 	tx_ring->atr_count++;
1748 
1749 	/* sample on all syn/fin/rst packets or once every atr sample rate */
1750 	if (!th->fin &&
1751 	    !th->syn &&
1752 	    !th->rst &&
1753 	    (tx_ring->atr_count < tx_ring->atr_sample_rate))
1754 		return;
1755 
1756 	tx_ring->atr_count = 0;
1757 
1758 	/* grab the next descriptor */
1759 	i = tx_ring->next_to_use;
1760 	fdir_desc = I40E_TX_FDIRDESC(tx_ring, i);
1761 
1762 	i++;
1763 	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
1764 
1765 	flex_ptype = (tx_ring->queue_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) &
1766 		      I40E_TXD_FLTR_QW0_QINDEX_MASK;
1767 	flex_ptype |= (protocol == htons(ETH_P_IP)) ?
1768 		      (I40E_FILTER_PCTYPE_NONF_IPV4_TCP <<
1769 		       I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) :
1770 		      (I40E_FILTER_PCTYPE_NONF_IPV6_TCP <<
1771 		       I40E_TXD_FLTR_QW0_PCTYPE_SHIFT);
1772 
1773 	flex_ptype |= tx_ring->vsi->id << I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT;
1774 
1775 	dtype_cmd = I40E_TX_DESC_DTYPE_FILTER_PROG;
1776 
1777 	dtype_cmd |= (th->fin || th->rst) ?
1778 		     (I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE <<
1779 		      I40E_TXD_FLTR_QW1_PCMD_SHIFT) :
1780 		     (I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE <<
1781 		      I40E_TXD_FLTR_QW1_PCMD_SHIFT);
1782 
1783 	dtype_cmd |= I40E_FILTER_PROGRAM_DESC_DEST_DIRECT_PACKET_QINDEX <<
1784 		     I40E_TXD_FLTR_QW1_DEST_SHIFT;
1785 
1786 	dtype_cmd |= I40E_FILTER_PROGRAM_DESC_FD_STATUS_FD_ID <<
1787 		     I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT;
1788 
1789 	dtype_cmd |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK;
1790 	dtype_cmd |=
1791 		((u32)pf->fd_atr_cnt_idx << I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
1792 		I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
1793 
1794 	fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype);
1795 	fdir_desc->rsvd = cpu_to_le32(0);
1796 	fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dtype_cmd);
1797 	fdir_desc->fd_id = cpu_to_le32(0);
1798 }
1799 
1800 /**
1801  * i40e_tx_prepare_vlan_flags - prepare generic TX VLAN tagging flags for HW
1802  * @skb:     send buffer
1803  * @tx_ring: ring to send buffer on
1804  * @flags:   the tx flags to be set
1805  *
1806  * Checks the skb and set up correspondingly several generic transmit flags
1807  * related to VLAN tagging for the HW, such as VLAN, DCB, etc.
1808  *
1809  * Returns error code indicate the frame should be dropped upon error and the
1810  * otherwise  returns 0 to indicate the flags has been set properly.
1811  **/
1812 #ifdef I40E_FCOE
1813 int i40e_tx_prepare_vlan_flags(struct sk_buff *skb,
1814 			       struct i40e_ring *tx_ring,
1815 			       u32 *flags)
1816 #else
1817 static int i40e_tx_prepare_vlan_flags(struct sk_buff *skb,
1818 				      struct i40e_ring *tx_ring,
1819 				      u32 *flags)
1820 #endif
1821 {
1822 	__be16 protocol = skb->protocol;
1823 	u32  tx_flags = 0;
1824 
1825 	/* if we have a HW VLAN tag being added, default to the HW one */
1826 	if (skb_vlan_tag_present(skb)) {
1827 		tx_flags |= skb_vlan_tag_get(skb) << I40E_TX_FLAGS_VLAN_SHIFT;
1828 		tx_flags |= I40E_TX_FLAGS_HW_VLAN;
1829 	/* else if it is a SW VLAN, check the next protocol and store the tag */
1830 	} else if (protocol == htons(ETH_P_8021Q)) {
1831 		struct vlan_hdr *vhdr, _vhdr;
1832 		vhdr = skb_header_pointer(skb, ETH_HLEN, sizeof(_vhdr), &_vhdr);
1833 		if (!vhdr)
1834 			return -EINVAL;
1835 
1836 		protocol = vhdr->h_vlan_encapsulated_proto;
1837 		tx_flags |= ntohs(vhdr->h_vlan_TCI) << I40E_TX_FLAGS_VLAN_SHIFT;
1838 		tx_flags |= I40E_TX_FLAGS_SW_VLAN;
1839 	}
1840 
1841 	/* Insert 802.1p priority into VLAN header */
1842 	if ((tx_flags & (I40E_TX_FLAGS_HW_VLAN | I40E_TX_FLAGS_SW_VLAN)) ||
1843 	    (skb->priority != TC_PRIO_CONTROL)) {
1844 		tx_flags &= ~I40E_TX_FLAGS_VLAN_PRIO_MASK;
1845 		tx_flags |= (skb->priority & 0x7) <<
1846 				I40E_TX_FLAGS_VLAN_PRIO_SHIFT;
1847 		if (tx_flags & I40E_TX_FLAGS_SW_VLAN) {
1848 			struct vlan_ethhdr *vhdr;
1849 			int rc;
1850 
1851 			rc = skb_cow_head(skb, 0);
1852 			if (rc < 0)
1853 				return rc;
1854 			vhdr = (struct vlan_ethhdr *)skb->data;
1855 			vhdr->h_vlan_TCI = htons(tx_flags >>
1856 						 I40E_TX_FLAGS_VLAN_SHIFT);
1857 		} else {
1858 			tx_flags |= I40E_TX_FLAGS_HW_VLAN;
1859 		}
1860 	}
1861 	*flags = tx_flags;
1862 	return 0;
1863 }
1864 
1865 /**
1866  * i40e_tso - set up the tso context descriptor
1867  * @tx_ring:  ptr to the ring to send
1868  * @skb:      ptr to the skb we're sending
1869  * @tx_flags: the collected send information
1870  * @protocol: the send protocol
1871  * @hdr_len:  ptr to the size of the packet header
1872  * @cd_tunneling: ptr to context descriptor bits
1873  *
1874  * Returns 0 if no TSO can happen, 1 if tso is going, or error
1875  **/
1876 static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb,
1877 		    u32 tx_flags, __be16 protocol, u8 *hdr_len,
1878 		    u64 *cd_type_cmd_tso_mss, u32 *cd_tunneling)
1879 {
1880 	u32 cd_cmd, cd_tso_len, cd_mss;
1881 	struct ipv6hdr *ipv6h;
1882 	struct tcphdr *tcph;
1883 	struct iphdr *iph;
1884 	u32 l4len;
1885 	int err;
1886 
1887 	if (!skb_is_gso(skb))
1888 		return 0;
1889 
1890 	err = skb_cow_head(skb, 0);
1891 	if (err < 0)
1892 		return err;
1893 
1894 	iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb);
1895 	ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb);
1896 
1897 	if (iph->version == 4) {
1898 		tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb);
1899 		iph->tot_len = 0;
1900 		iph->check = 0;
1901 		tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
1902 						 0, IPPROTO_TCP, 0);
1903 	} else if (ipv6h->version == 6) {
1904 		tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb);
1905 		ipv6h->payload_len = 0;
1906 		tcph->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
1907 					       0, IPPROTO_TCP, 0);
1908 	}
1909 
1910 	l4len = skb->encapsulation ? inner_tcp_hdrlen(skb) : tcp_hdrlen(skb);
1911 	*hdr_len = (skb->encapsulation
1912 		    ? (skb_inner_transport_header(skb) - skb->data)
1913 		    : skb_transport_offset(skb)) + l4len;
1914 
1915 	/* find the field values */
1916 	cd_cmd = I40E_TX_CTX_DESC_TSO;
1917 	cd_tso_len = skb->len - *hdr_len;
1918 	cd_mss = skb_shinfo(skb)->gso_size;
1919 	*cd_type_cmd_tso_mss |= ((u64)cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
1920 				((u64)cd_tso_len <<
1921 				 I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
1922 				((u64)cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
1923 	return 1;
1924 }
1925 
1926 /**
1927  * i40e_tsyn - set up the tsyn context descriptor
1928  * @tx_ring:  ptr to the ring to send
1929  * @skb:      ptr to the skb we're sending
1930  * @tx_flags: the collected send information
1931  *
1932  * Returns 0 if no Tx timestamp can happen and 1 if the timestamp will happen
1933  **/
1934 static int i40e_tsyn(struct i40e_ring *tx_ring, struct sk_buff *skb,
1935 		     u32 tx_flags, u64 *cd_type_cmd_tso_mss)
1936 {
1937 	struct i40e_pf *pf;
1938 
1939 	if (likely(!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)))
1940 		return 0;
1941 
1942 	/* Tx timestamps cannot be sampled when doing TSO */
1943 	if (tx_flags & I40E_TX_FLAGS_TSO)
1944 		return 0;
1945 
1946 	/* only timestamp the outbound packet if the user has requested it and
1947 	 * we are not already transmitting a packet to be timestamped
1948 	 */
1949 	pf = i40e_netdev_to_pf(tx_ring->netdev);
1950 	if (!(pf->flags & I40E_FLAG_PTP))
1951 		return 0;
1952 
1953 	if (pf->ptp_tx &&
1954 	    !test_and_set_bit_lock(__I40E_PTP_TX_IN_PROGRESS, &pf->state)) {
1955 		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
1956 		pf->ptp_tx_skb = skb_get(skb);
1957 	} else {
1958 		return 0;
1959 	}
1960 
1961 	*cd_type_cmd_tso_mss |= (u64)I40E_TX_CTX_DESC_TSYN <<
1962 				I40E_TXD_CTX_QW1_CMD_SHIFT;
1963 
1964 	return 1;
1965 }
1966 
1967 /**
1968  * i40e_tx_enable_csum - Enable Tx checksum offloads
1969  * @skb: send buffer
1970  * @tx_flags: Tx flags currently set
1971  * @td_cmd: Tx descriptor command bits to set
1972  * @td_offset: Tx descriptor header offsets to set
1973  * @cd_tunneling: ptr to context desc bits
1974  **/
1975 static void i40e_tx_enable_csum(struct sk_buff *skb, u32 tx_flags,
1976 				u32 *td_cmd, u32 *td_offset,
1977 				struct i40e_ring *tx_ring,
1978 				u32 *cd_tunneling)
1979 {
1980 	struct ipv6hdr *this_ipv6_hdr;
1981 	unsigned int this_tcp_hdrlen;
1982 	struct iphdr *this_ip_hdr;
1983 	u32 network_hdr_len;
1984 	u8 l4_hdr = 0;
1985 
1986 	if (skb->encapsulation) {
1987 		network_hdr_len = skb_inner_network_header_len(skb);
1988 		this_ip_hdr = inner_ip_hdr(skb);
1989 		this_ipv6_hdr = inner_ipv6_hdr(skb);
1990 		this_tcp_hdrlen = inner_tcp_hdrlen(skb);
1991 
1992 		if (tx_flags & I40E_TX_FLAGS_IPV4) {
1993 
1994 			if (tx_flags & I40E_TX_FLAGS_TSO) {
1995 				*cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV4;
1996 				ip_hdr(skb)->check = 0;
1997 			} else {
1998 				*cd_tunneling |=
1999 					 I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM;
2000 			}
2001 		} else if (tx_flags & I40E_TX_FLAGS_IPV6) {
2002 			*cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6;
2003 			if (tx_flags & I40E_TX_FLAGS_TSO)
2004 				ip_hdr(skb)->check = 0;
2005 		}
2006 
2007 		/* Now set the ctx descriptor fields */
2008 		*cd_tunneling |= (skb_network_header_len(skb) >> 2) <<
2009 					I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT |
2010 				   I40E_TXD_CTX_UDP_TUNNELING            |
2011 				   ((skb_inner_network_offset(skb) -
2012 					skb_transport_offset(skb)) >> 1) <<
2013 				   I40E_TXD_CTX_QW0_NATLEN_SHIFT;
2014 		if (this_ip_hdr->version == 6) {
2015 			tx_flags &= ~I40E_TX_FLAGS_IPV4;
2016 			tx_flags |= I40E_TX_FLAGS_IPV6;
2017 		}
2018 	} else {
2019 		network_hdr_len = skb_network_header_len(skb);
2020 		this_ip_hdr = ip_hdr(skb);
2021 		this_ipv6_hdr = ipv6_hdr(skb);
2022 		this_tcp_hdrlen = tcp_hdrlen(skb);
2023 	}
2024 
2025 	/* Enable IP checksum offloads */
2026 	if (tx_flags & I40E_TX_FLAGS_IPV4) {
2027 		l4_hdr = this_ip_hdr->protocol;
2028 		/* the stack computes the IP header already, the only time we
2029 		 * need the hardware to recompute it is in the case of TSO.
2030 		 */
2031 		if (tx_flags & I40E_TX_FLAGS_TSO) {
2032 			*td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
2033 			this_ip_hdr->check = 0;
2034 		} else {
2035 			*td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4;
2036 		}
2037 		/* Now set the td_offset for IP header length */
2038 		*td_offset = (network_hdr_len >> 2) <<
2039 			      I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
2040 	} else if (tx_flags & I40E_TX_FLAGS_IPV6) {
2041 		l4_hdr = this_ipv6_hdr->nexthdr;
2042 		*td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
2043 		/* Now set the td_offset for IP header length */
2044 		*td_offset = (network_hdr_len >> 2) <<
2045 			      I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
2046 	}
2047 	/* words in MACLEN + dwords in IPLEN + dwords in L4Len */
2048 	*td_offset |= (skb_network_offset(skb) >> 1) <<
2049 		       I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
2050 
2051 	/* Enable L4 checksum offloads */
2052 	switch (l4_hdr) {
2053 	case IPPROTO_TCP:
2054 		/* enable checksum offloads */
2055 		*td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
2056 		*td_offset |= (this_tcp_hdrlen >> 2) <<
2057 			       I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
2058 		break;
2059 	case IPPROTO_SCTP:
2060 		/* enable SCTP checksum offload */
2061 		*td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
2062 		*td_offset |= (sizeof(struct sctphdr) >> 2) <<
2063 			       I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
2064 		break;
2065 	case IPPROTO_UDP:
2066 		/* enable UDP checksum offload */
2067 		*td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
2068 		*td_offset |= (sizeof(struct udphdr) >> 2) <<
2069 			       I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
2070 		break;
2071 	default:
2072 		break;
2073 	}
2074 }
2075 
2076 /**
2077  * i40e_create_tx_ctx Build the Tx context descriptor
2078  * @tx_ring:  ring to create the descriptor on
2079  * @cd_type_cmd_tso_mss: Quad Word 1
2080  * @cd_tunneling: Quad Word 0 - bits 0-31
2081  * @cd_l2tag2: Quad Word 0 - bits 32-63
2082  **/
2083 static void i40e_create_tx_ctx(struct i40e_ring *tx_ring,
2084 			       const u64 cd_type_cmd_tso_mss,
2085 			       const u32 cd_tunneling, const u32 cd_l2tag2)
2086 {
2087 	struct i40e_tx_context_desc *context_desc;
2088 	int i = tx_ring->next_to_use;
2089 
2090 	if ((cd_type_cmd_tso_mss == I40E_TX_DESC_DTYPE_CONTEXT) &&
2091 	    !cd_tunneling && !cd_l2tag2)
2092 		return;
2093 
2094 	/* grab the next descriptor */
2095 	context_desc = I40E_TX_CTXTDESC(tx_ring, i);
2096 
2097 	i++;
2098 	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
2099 
2100 	/* cpu_to_le32 and assign to struct fields */
2101 	context_desc->tunneling_params = cpu_to_le32(cd_tunneling);
2102 	context_desc->l2tag2 = cpu_to_le16(cd_l2tag2);
2103 	context_desc->rsvd = cpu_to_le16(0);
2104 	context_desc->type_cmd_tso_mss = cpu_to_le64(cd_type_cmd_tso_mss);
2105 }
2106 
2107 /**
2108  * __i40e_maybe_stop_tx - 2nd level check for tx stop conditions
2109  * @tx_ring: the ring to be checked
2110  * @size:    the size buffer we want to assure is available
2111  *
2112  * Returns -EBUSY if a stop is needed, else 0
2113  **/
2114 static inline int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
2115 {
2116 	netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
2117 	/* Memory barrier before checking head and tail */
2118 	smp_mb();
2119 
2120 	/* Check again in a case another CPU has just made room available. */
2121 	if (likely(I40E_DESC_UNUSED(tx_ring) < size))
2122 		return -EBUSY;
2123 
2124 	/* A reprieve! - use start_queue because it doesn't call schedule */
2125 	netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index);
2126 	++tx_ring->tx_stats.restart_queue;
2127 	return 0;
2128 }
2129 
2130 /**
2131  * i40e_maybe_stop_tx - 1st level check for tx stop conditions
2132  * @tx_ring: the ring to be checked
2133  * @size:    the size buffer we want to assure is available
2134  *
2135  * Returns 0 if stop is not needed
2136  **/
2137 #ifdef I40E_FCOE
2138 int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
2139 #else
2140 static int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
2141 #endif
2142 {
2143 	if (likely(I40E_DESC_UNUSED(tx_ring) >= size))
2144 		return 0;
2145 	return __i40e_maybe_stop_tx(tx_ring, size);
2146 }
2147 
2148 /**
2149  * i40e_chk_linearize - Check if there are more than 8 fragments per packet
2150  * @skb:      send buffer
2151  * @tx_flags: collected send information
2152  * @hdr_len:  size of the packet header
2153  *
2154  * Note: Our HW can't scatter-gather more than 8 fragments to build
2155  * a packet on the wire and so we need to figure out the cases where we
2156  * need to linearize the skb.
2157  **/
2158 static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags,
2159 			       const u8 hdr_len)
2160 {
2161 	struct skb_frag_struct *frag;
2162 	bool linearize = false;
2163 	unsigned int size = 0;
2164 	u16 num_frags;
2165 	u16 gso_segs;
2166 
2167 	num_frags = skb_shinfo(skb)->nr_frags;
2168 	gso_segs = skb_shinfo(skb)->gso_segs;
2169 
2170 	if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO)) {
2171 		u16 j = 1;
2172 
2173 		if (num_frags < (I40E_MAX_BUFFER_TXD))
2174 			goto linearize_chk_done;
2175 		/* try the simple math, if we have too many frags per segment */
2176 		if (DIV_ROUND_UP((num_frags + gso_segs), gso_segs) >
2177 		    I40E_MAX_BUFFER_TXD) {
2178 			linearize = true;
2179 			goto linearize_chk_done;
2180 		}
2181 		frag = &skb_shinfo(skb)->frags[0];
2182 		size = hdr_len;
2183 		/* we might still have more fragments per segment */
2184 		do {
2185 			size += skb_frag_size(frag);
2186 			frag++; j++;
2187 			if (j == I40E_MAX_BUFFER_TXD) {
2188 				if (size < skb_shinfo(skb)->gso_size) {
2189 					linearize = true;
2190 					break;
2191 				}
2192 				j = 1;
2193 				size -= skb_shinfo(skb)->gso_size;
2194 				if (size)
2195 					j++;
2196 				size += hdr_len;
2197 			}
2198 			num_frags--;
2199 		} while (num_frags);
2200 	} else {
2201 		if (num_frags >= I40E_MAX_BUFFER_TXD)
2202 			linearize = true;
2203 	}
2204 
2205 linearize_chk_done:
2206 	return linearize;
2207 }
2208 
2209 /**
2210  * i40e_tx_map - Build the Tx descriptor
2211  * @tx_ring:  ring to send buffer on
2212  * @skb:      send buffer
2213  * @first:    first buffer info buffer to use
2214  * @tx_flags: collected send information
2215  * @hdr_len:  size of the packet header
2216  * @td_cmd:   the command field in the descriptor
2217  * @td_offset: offset for checksum or crc
2218  **/
2219 #ifdef I40E_FCOE
2220 void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
2221 		 struct i40e_tx_buffer *first, u32 tx_flags,
2222 		 const u8 hdr_len, u32 td_cmd, u32 td_offset)
2223 #else
2224 static void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
2225 			struct i40e_tx_buffer *first, u32 tx_flags,
2226 			const u8 hdr_len, u32 td_cmd, u32 td_offset)
2227 #endif
2228 {
2229 	unsigned int data_len = skb->data_len;
2230 	unsigned int size = skb_headlen(skb);
2231 	struct skb_frag_struct *frag;
2232 	struct i40e_tx_buffer *tx_bi;
2233 	struct i40e_tx_desc *tx_desc;
2234 	u16 i = tx_ring->next_to_use;
2235 	u32 td_tag = 0;
2236 	dma_addr_t dma;
2237 	u16 gso_segs;
2238 
2239 	if (tx_flags & I40E_TX_FLAGS_HW_VLAN) {
2240 		td_cmd |= I40E_TX_DESC_CMD_IL2TAG1;
2241 		td_tag = (tx_flags & I40E_TX_FLAGS_VLAN_MASK) >>
2242 			 I40E_TX_FLAGS_VLAN_SHIFT;
2243 	}
2244 
2245 	if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO))
2246 		gso_segs = skb_shinfo(skb)->gso_segs;
2247 	else
2248 		gso_segs = 1;
2249 
2250 	/* multiply data chunks by size of headers */
2251 	first->bytecount = skb->len - hdr_len + (gso_segs * hdr_len);
2252 	first->gso_segs = gso_segs;
2253 	first->skb = skb;
2254 	first->tx_flags = tx_flags;
2255 
2256 	dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
2257 
2258 	tx_desc = I40E_TX_DESC(tx_ring, i);
2259 	tx_bi = first;
2260 
2261 	for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
2262 		if (dma_mapping_error(tx_ring->dev, dma))
2263 			goto dma_error;
2264 
2265 		/* record length, and DMA address */
2266 		dma_unmap_len_set(tx_bi, len, size);
2267 		dma_unmap_addr_set(tx_bi, dma, dma);
2268 
2269 		tx_desc->buffer_addr = cpu_to_le64(dma);
2270 
2271 		while (unlikely(size > I40E_MAX_DATA_PER_TXD)) {
2272 			tx_desc->cmd_type_offset_bsz =
2273 				build_ctob(td_cmd, td_offset,
2274 					   I40E_MAX_DATA_PER_TXD, td_tag);
2275 
2276 			tx_desc++;
2277 			i++;
2278 			if (i == tx_ring->count) {
2279 				tx_desc = I40E_TX_DESC(tx_ring, 0);
2280 				i = 0;
2281 			}
2282 
2283 			dma += I40E_MAX_DATA_PER_TXD;
2284 			size -= I40E_MAX_DATA_PER_TXD;
2285 
2286 			tx_desc->buffer_addr = cpu_to_le64(dma);
2287 		}
2288 
2289 		if (likely(!data_len))
2290 			break;
2291 
2292 		tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset,
2293 							  size, td_tag);
2294 
2295 		tx_desc++;
2296 		i++;
2297 		if (i == tx_ring->count) {
2298 			tx_desc = I40E_TX_DESC(tx_ring, 0);
2299 			i = 0;
2300 		}
2301 
2302 		size = skb_frag_size(frag);
2303 		data_len -= size;
2304 
2305 		dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size,
2306 				       DMA_TO_DEVICE);
2307 
2308 		tx_bi = &tx_ring->tx_bi[i];
2309 	}
2310 
2311 	/* Place RS bit on last descriptor of any packet that spans across the
2312 	 * 4th descriptor (WB_STRIDE aka 0x3) in a 64B cacheline.
2313 	 */
2314 	if (((i & WB_STRIDE) != WB_STRIDE) &&
2315 	    (first <= &tx_ring->tx_bi[i]) &&
2316 	    (first >= &tx_ring->tx_bi[i & ~WB_STRIDE])) {
2317 		tx_desc->cmd_type_offset_bsz =
2318 			build_ctob(td_cmd, td_offset, size, td_tag) |
2319 			cpu_to_le64((u64)I40E_TX_DESC_CMD_EOP <<
2320 					 I40E_TXD_QW1_CMD_SHIFT);
2321 	} else {
2322 		tx_desc->cmd_type_offset_bsz =
2323 			build_ctob(td_cmd, td_offset, size, td_tag) |
2324 			cpu_to_le64((u64)I40E_TXD_CMD <<
2325 					 I40E_TXD_QW1_CMD_SHIFT);
2326 	}
2327 
2328 	netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev,
2329 						 tx_ring->queue_index),
2330 			     first->bytecount);
2331 
2332 	/* set the timestamp */
2333 	first->time_stamp = jiffies;
2334 
2335 	/* Force memory writes to complete before letting h/w
2336 	 * know there are new descriptors to fetch.  (Only
2337 	 * applicable for weak-ordered memory model archs,
2338 	 * such as IA-64).
2339 	 */
2340 	wmb();
2341 
2342 	/* set next_to_watch value indicating a packet is present */
2343 	first->next_to_watch = tx_desc;
2344 
2345 	i++;
2346 	if (i == tx_ring->count)
2347 		i = 0;
2348 
2349 	tx_ring->next_to_use = i;
2350 
2351 	i40e_maybe_stop_tx(tx_ring, DESC_NEEDED);
2352 	/* notify HW of packet */
2353 	if (!skb->xmit_more ||
2354 	    netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev,
2355 						   tx_ring->queue_index)))
2356 		writel(i, tx_ring->tail);
2357 
2358 	return;
2359 
2360 dma_error:
2361 	dev_info(tx_ring->dev, "TX DMA map failed\n");
2362 
2363 	/* clear dma mappings for failed tx_bi map */
2364 	for (;;) {
2365 		tx_bi = &tx_ring->tx_bi[i];
2366 		i40e_unmap_and_free_tx_resource(tx_ring, tx_bi);
2367 		if (tx_bi == first)
2368 			break;
2369 		if (i == 0)
2370 			i = tx_ring->count;
2371 		i--;
2372 	}
2373 
2374 	tx_ring->next_to_use = i;
2375 }
2376 
2377 /**
2378  * i40e_xmit_descriptor_count - calculate number of tx descriptors needed
2379  * @skb:     send buffer
2380  * @tx_ring: ring to send buffer on
2381  *
2382  * Returns number of data descriptors needed for this skb. Returns 0 to indicate
2383  * there is not enough descriptors available in this ring since we need at least
2384  * one descriptor.
2385  **/
2386 #ifdef I40E_FCOE
2387 int i40e_xmit_descriptor_count(struct sk_buff *skb,
2388 			       struct i40e_ring *tx_ring)
2389 #else
2390 static int i40e_xmit_descriptor_count(struct sk_buff *skb,
2391 				      struct i40e_ring *tx_ring)
2392 #endif
2393 {
2394 	unsigned int f;
2395 	int count = 0;
2396 
2397 	/* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD,
2398 	 *       + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD,
2399 	 *       + 4 desc gap to avoid the cache line where head is,
2400 	 *       + 1 desc for context descriptor,
2401 	 * otherwise try next time
2402 	 */
2403 	for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
2404 		count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size);
2405 
2406 	count += TXD_USE_COUNT(skb_headlen(skb));
2407 	if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) {
2408 		tx_ring->tx_stats.tx_busy++;
2409 		return 0;
2410 	}
2411 	return count;
2412 }
2413 
2414 /**
2415  * i40e_xmit_frame_ring - Sends buffer on Tx ring
2416  * @skb:     send buffer
2417  * @tx_ring: ring to send buffer on
2418  *
2419  * Returns NETDEV_TX_OK if sent, else an error code
2420  **/
2421 static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
2422 					struct i40e_ring *tx_ring)
2423 {
2424 	u64 cd_type_cmd_tso_mss = I40E_TX_DESC_DTYPE_CONTEXT;
2425 	u32 cd_tunneling = 0, cd_l2tag2 = 0;
2426 	struct i40e_tx_buffer *first;
2427 	u32 td_offset = 0;
2428 	u32 tx_flags = 0;
2429 	__be16 protocol;
2430 	u32 td_cmd = 0;
2431 	u8 hdr_len = 0;
2432 	int tsyn;
2433 	int tso;
2434 	if (0 == i40e_xmit_descriptor_count(skb, tx_ring))
2435 		return NETDEV_TX_BUSY;
2436 
2437 	/* prepare the xmit flags */
2438 	if (i40e_tx_prepare_vlan_flags(skb, tx_ring, &tx_flags))
2439 		goto out_drop;
2440 
2441 	/* obtain protocol of skb */
2442 	protocol = vlan_get_protocol(skb);
2443 
2444 	/* record the location of the first descriptor for this packet */
2445 	first = &tx_ring->tx_bi[tx_ring->next_to_use];
2446 
2447 	/* setup IPv4/IPv6 offloads */
2448 	if (protocol == htons(ETH_P_IP))
2449 		tx_flags |= I40E_TX_FLAGS_IPV4;
2450 	else if (protocol == htons(ETH_P_IPV6))
2451 		tx_flags |= I40E_TX_FLAGS_IPV6;
2452 
2453 	tso = i40e_tso(tx_ring, skb, tx_flags, protocol, &hdr_len,
2454 		       &cd_type_cmd_tso_mss, &cd_tunneling);
2455 
2456 	if (tso < 0)
2457 		goto out_drop;
2458 	else if (tso)
2459 		tx_flags |= I40E_TX_FLAGS_TSO;
2460 
2461 	tsyn = i40e_tsyn(tx_ring, skb, tx_flags, &cd_type_cmd_tso_mss);
2462 
2463 	if (tsyn)
2464 		tx_flags |= I40E_TX_FLAGS_TSYN;
2465 
2466 	if (i40e_chk_linearize(skb, tx_flags, hdr_len))
2467 		if (skb_linearize(skb))
2468 			goto out_drop;
2469 
2470 	skb_tx_timestamp(skb);
2471 
2472 	/* always enable CRC insertion offload */
2473 	td_cmd |= I40E_TX_DESC_CMD_ICRC;
2474 
2475 	/* Always offload the checksum, since it's in the data descriptor */
2476 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
2477 		tx_flags |= I40E_TX_FLAGS_CSUM;
2478 
2479 		i40e_tx_enable_csum(skb, tx_flags, &td_cmd, &td_offset,
2480 				    tx_ring, &cd_tunneling);
2481 	}
2482 
2483 	i40e_create_tx_ctx(tx_ring, cd_type_cmd_tso_mss,
2484 			   cd_tunneling, cd_l2tag2);
2485 
2486 	/* Add Flow Director ATR if it's enabled.
2487 	 *
2488 	 * NOTE: this must always be directly before the data descriptor.
2489 	 */
2490 	i40e_atr(tx_ring, skb, tx_flags, protocol);
2491 
2492 	i40e_tx_map(tx_ring, skb, first, tx_flags, hdr_len,
2493 		    td_cmd, td_offset);
2494 
2495 	return NETDEV_TX_OK;
2496 
2497 out_drop:
2498 	dev_kfree_skb_any(skb);
2499 	return NETDEV_TX_OK;
2500 }
2501 
2502 /**
2503  * i40e_lan_xmit_frame - Selects the correct VSI and Tx queue to send buffer
2504  * @skb:    send buffer
2505  * @netdev: network interface device structure
2506  *
2507  * Returns NETDEV_TX_OK if sent, else an error code
2508  **/
2509 netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
2510 {
2511 	struct i40e_netdev_priv *np = netdev_priv(netdev);
2512 	struct i40e_vsi *vsi = np->vsi;
2513 	struct i40e_ring *tx_ring = vsi->tx_rings[skb->queue_mapping];
2514 
2515 	/* hardware can't handle really short frames, hardware padding works
2516 	 * beyond this point
2517 	 */
2518 	if (skb_put_padto(skb, I40E_MIN_TX_LEN))
2519 		return NETDEV_TX_OK;
2520 
2521 	return i40e_xmit_frame_ring(skb, tx_ring);
2522 }
2523