xref: /openbmc/linux/drivers/net/ethernet/intel/i40e/i40e_txrx.c (revision a03a8dbe20eff6d57aae3147577bf84b52aba4e6)
1 /*******************************************************************************
2  *
3  * Intel Ethernet Controller XL710 Family Linux Driver
4  * Copyright(c) 2013 - 2014 Intel Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License along
16  * with this program.  If not, see <http://www.gnu.org/licenses/>.
17  *
18  * The full GNU General Public License is included in this distribution in
19  * the file called "COPYING".
20  *
21  * Contact Information:
22  * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
23  * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
24  *
25  ******************************************************************************/
26 
27 #include <linux/prefetch.h>
28 #include <net/busy_poll.h>
29 #include "i40e.h"
30 #include "i40e_prototype.h"
31 
32 static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size,
33 				u32 td_tag)
34 {
35 	return cpu_to_le64(I40E_TX_DESC_DTYPE_DATA |
36 			   ((u64)td_cmd  << I40E_TXD_QW1_CMD_SHIFT) |
37 			   ((u64)td_offset << I40E_TXD_QW1_OFFSET_SHIFT) |
38 			   ((u64)size  << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) |
39 			   ((u64)td_tag  << I40E_TXD_QW1_L2TAG1_SHIFT));
40 }
41 
42 #define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
43 #define I40E_FD_CLEAN_DELAY 10
44 /**
45  * i40e_program_fdir_filter - Program a Flow Director filter
46  * @fdir_data: Packet data that will be filter parameters
47  * @raw_packet: the pre-allocated packet buffer for FDir
48  * @pf: The PF pointer
49  * @add: True for add/update, False for remove
50  **/
51 int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, u8 *raw_packet,
52 			     struct i40e_pf *pf, bool add)
53 {
54 	struct i40e_filter_program_desc *fdir_desc;
55 	struct i40e_tx_buffer *tx_buf, *first;
56 	struct i40e_tx_desc *tx_desc;
57 	struct i40e_ring *tx_ring;
58 	unsigned int fpt, dcc;
59 	struct i40e_vsi *vsi;
60 	struct device *dev;
61 	dma_addr_t dma;
62 	u32 td_cmd = 0;
63 	u16 delay = 0;
64 	u16 i;
65 
66 	/* find existing FDIR VSI */
67 	vsi = NULL;
68 	for (i = 0; i < pf->num_alloc_vsi; i++)
69 		if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR)
70 			vsi = pf->vsi[i];
71 	if (!vsi)
72 		return -ENOENT;
73 
74 	tx_ring = vsi->tx_rings[0];
75 	dev = tx_ring->dev;
76 
77 	/* we need two descriptors to add/del a filter and we can wait */
78 	do {
79 		if (I40E_DESC_UNUSED(tx_ring) > 1)
80 			break;
81 		msleep_interruptible(1);
82 		delay++;
83 	} while (delay < I40E_FD_CLEAN_DELAY);
84 
85 	if (!(I40E_DESC_UNUSED(tx_ring) > 1))
86 		return -EAGAIN;
87 
88 	dma = dma_map_single(dev, raw_packet,
89 			     I40E_FDIR_MAX_RAW_PACKET_SIZE, DMA_TO_DEVICE);
90 	if (dma_mapping_error(dev, dma))
91 		goto dma_fail;
92 
93 	/* grab the next descriptor */
94 	i = tx_ring->next_to_use;
95 	fdir_desc = I40E_TX_FDIRDESC(tx_ring, i);
96 	first = &tx_ring->tx_bi[i];
97 	memset(first, 0, sizeof(struct i40e_tx_buffer));
98 
99 	tx_ring->next_to_use = ((i + 1) < tx_ring->count) ? i + 1 : 0;
100 
101 	fpt = (fdir_data->q_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) &
102 	      I40E_TXD_FLTR_QW0_QINDEX_MASK;
103 
104 	fpt |= (fdir_data->flex_off << I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT) &
105 	       I40E_TXD_FLTR_QW0_FLEXOFF_MASK;
106 
107 	fpt |= (fdir_data->pctype << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) &
108 	       I40E_TXD_FLTR_QW0_PCTYPE_MASK;
109 
110 	/* Use LAN VSI Id if not programmed by user */
111 	if (fdir_data->dest_vsi == 0)
112 		fpt |= (pf->vsi[pf->lan_vsi]->id) <<
113 		       I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT;
114 	else
115 		fpt |= ((u32)fdir_data->dest_vsi <<
116 			I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT) &
117 		       I40E_TXD_FLTR_QW0_DEST_VSI_MASK;
118 
119 	dcc = I40E_TX_DESC_DTYPE_FILTER_PROG;
120 
121 	if (add)
122 		dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE <<
123 		       I40E_TXD_FLTR_QW1_PCMD_SHIFT;
124 	else
125 		dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE <<
126 		       I40E_TXD_FLTR_QW1_PCMD_SHIFT;
127 
128 	dcc |= (fdir_data->dest_ctl << I40E_TXD_FLTR_QW1_DEST_SHIFT) &
129 	       I40E_TXD_FLTR_QW1_DEST_MASK;
130 
131 	dcc |= (fdir_data->fd_status << I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT) &
132 	       I40E_TXD_FLTR_QW1_FD_STATUS_MASK;
133 
134 	if (fdir_data->cnt_index != 0) {
135 		dcc |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK;
136 		dcc |= ((u32)fdir_data->cnt_index <<
137 			I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
138 			I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
139 	}
140 
141 	fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(fpt);
142 	fdir_desc->rsvd = cpu_to_le32(0);
143 	fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dcc);
144 	fdir_desc->fd_id = cpu_to_le32(fdir_data->fd_id);
145 
146 	/* Now program a dummy descriptor */
147 	i = tx_ring->next_to_use;
148 	tx_desc = I40E_TX_DESC(tx_ring, i);
149 	tx_buf = &tx_ring->tx_bi[i];
150 
151 	tx_ring->next_to_use = ((i + 1) < tx_ring->count) ? i + 1 : 0;
152 
153 	memset(tx_buf, 0, sizeof(struct i40e_tx_buffer));
154 
155 	/* record length, and DMA address */
156 	dma_unmap_len_set(tx_buf, len, I40E_FDIR_MAX_RAW_PACKET_SIZE);
157 	dma_unmap_addr_set(tx_buf, dma, dma);
158 
159 	tx_desc->buffer_addr = cpu_to_le64(dma);
160 	td_cmd = I40E_TXD_CMD | I40E_TX_DESC_CMD_DUMMY;
161 
162 	tx_buf->tx_flags = I40E_TX_FLAGS_FD_SB;
163 	tx_buf->raw_buf = (void *)raw_packet;
164 
165 	tx_desc->cmd_type_offset_bsz =
166 		build_ctob(td_cmd, 0, I40E_FDIR_MAX_RAW_PACKET_SIZE, 0);
167 
168 	/* set the timestamp */
169 	tx_buf->time_stamp = jiffies;
170 
171 	/* Force memory writes to complete before letting h/w
172 	 * know there are new descriptors to fetch.
173 	 */
174 	wmb();
175 
176 	/* Mark the data descriptor to be watched */
177 	first->next_to_watch = tx_desc;
178 
179 	writel(tx_ring->next_to_use, tx_ring->tail);
180 	return 0;
181 
182 dma_fail:
183 	return -1;
184 }
185 
186 #define IP_HEADER_OFFSET 14
187 #define I40E_UDPIP_DUMMY_PACKET_LEN 42
188 /**
189  * i40e_add_del_fdir_udpv4 - Add/Remove UDPv4 filters
190  * @vsi: pointer to the targeted VSI
191  * @fd_data: the flow director data required for the FDir descriptor
192  * @add: true adds a filter, false removes it
193  *
194  * Returns 0 if the filters were successfully added or removed
195  **/
196 static int i40e_add_del_fdir_udpv4(struct i40e_vsi *vsi,
197 				   struct i40e_fdir_filter *fd_data,
198 				   bool add)
199 {
200 	struct i40e_pf *pf = vsi->back;
201 	struct udphdr *udp;
202 	struct iphdr *ip;
203 	bool err = false;
204 	u8 *raw_packet;
205 	int ret;
206 	static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
207 		0x45, 0, 0, 0x1c, 0, 0, 0x40, 0, 0x40, 0x11, 0, 0, 0, 0, 0, 0,
208 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
209 
210 	raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
211 	if (!raw_packet)
212 		return -ENOMEM;
213 	memcpy(raw_packet, packet, I40E_UDPIP_DUMMY_PACKET_LEN);
214 
215 	ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
216 	udp = (struct udphdr *)(raw_packet + IP_HEADER_OFFSET
217 	      + sizeof(struct iphdr));
218 
219 	ip->daddr = fd_data->dst_ip[0];
220 	udp->dest = fd_data->dst_port;
221 	ip->saddr = fd_data->src_ip[0];
222 	udp->source = fd_data->src_port;
223 
224 	fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_UDP;
225 	ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
226 	if (ret) {
227 		dev_info(&pf->pdev->dev,
228 			 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
229 			 fd_data->pctype, fd_data->fd_id, ret);
230 		err = true;
231 	} else if (I40E_DEBUG_FD & pf->hw.debug_mask) {
232 		if (add)
233 			dev_info(&pf->pdev->dev,
234 				 "Filter OK for PCTYPE %d loc = %d\n",
235 				 fd_data->pctype, fd_data->fd_id);
236 		else
237 			dev_info(&pf->pdev->dev,
238 				 "Filter deleted for PCTYPE %d loc = %d\n",
239 				 fd_data->pctype, fd_data->fd_id);
240 	}
241 	return err ? -EOPNOTSUPP : 0;
242 }
243 
244 #define I40E_TCPIP_DUMMY_PACKET_LEN 54
245 /**
246  * i40e_add_del_fdir_tcpv4 - Add/Remove TCPv4 filters
247  * @vsi: pointer to the targeted VSI
248  * @fd_data: the flow director data required for the FDir descriptor
249  * @add: true adds a filter, false removes it
250  *
251  * Returns 0 if the filters were successfully added or removed
252  **/
253 static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi,
254 				   struct i40e_fdir_filter *fd_data,
255 				   bool add)
256 {
257 	struct i40e_pf *pf = vsi->back;
258 	struct tcphdr *tcp;
259 	struct iphdr *ip;
260 	bool err = false;
261 	u8 *raw_packet;
262 	int ret;
263 	/* Dummy packet */
264 	static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
265 		0x45, 0, 0, 0x28, 0, 0, 0x40, 0, 0x40, 0x6, 0, 0, 0, 0, 0, 0,
266 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x80, 0x11,
267 		0x0, 0x72, 0, 0, 0, 0};
268 
269 	raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
270 	if (!raw_packet)
271 		return -ENOMEM;
272 	memcpy(raw_packet, packet, I40E_TCPIP_DUMMY_PACKET_LEN);
273 
274 	ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
275 	tcp = (struct tcphdr *)(raw_packet + IP_HEADER_OFFSET
276 	      + sizeof(struct iphdr));
277 
278 	ip->daddr = fd_data->dst_ip[0];
279 	tcp->dest = fd_data->dst_port;
280 	ip->saddr = fd_data->src_ip[0];
281 	tcp->source = fd_data->src_port;
282 
283 	if (add) {
284 		pf->fd_tcp_rule++;
285 		if (pf->flags & I40E_FLAG_FD_ATR_ENABLED) {
286 			dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n");
287 			pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED;
288 		}
289 	} else {
290 		pf->fd_tcp_rule = (pf->fd_tcp_rule > 0) ?
291 				  (pf->fd_tcp_rule - 1) : 0;
292 		if (pf->fd_tcp_rule == 0) {
293 			pf->flags |= I40E_FLAG_FD_ATR_ENABLED;
294 			dev_info(&pf->pdev->dev, "ATR re-enabled due to no sideband TCP/IPv4 rules\n");
295 		}
296 	}
297 
298 	fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP;
299 	ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
300 
301 	if (ret) {
302 		dev_info(&pf->pdev->dev,
303 			 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
304 			 fd_data->pctype, fd_data->fd_id, ret);
305 		err = true;
306 	} else if (I40E_DEBUG_FD & pf->hw.debug_mask) {
307 		if (add)
308 			dev_info(&pf->pdev->dev, "Filter OK for PCTYPE %d loc = %d)\n",
309 				 fd_data->pctype, fd_data->fd_id);
310 		else
311 			dev_info(&pf->pdev->dev,
312 				 "Filter deleted for PCTYPE %d loc = %d\n",
313 				 fd_data->pctype, fd_data->fd_id);
314 	}
315 
316 	return err ? -EOPNOTSUPP : 0;
317 }
318 
319 /**
320  * i40e_add_del_fdir_sctpv4 - Add/Remove SCTPv4 Flow Director filters for
321  * a specific flow spec
322  * @vsi: pointer to the targeted VSI
323  * @fd_data: the flow director data required for the FDir descriptor
324  * @add: true adds a filter, false removes it
325  *
326  * Always returns -EOPNOTSUPP
327  **/
328 static int i40e_add_del_fdir_sctpv4(struct i40e_vsi *vsi,
329 				    struct i40e_fdir_filter *fd_data,
330 				    bool add)
331 {
332 	return -EOPNOTSUPP;
333 }
334 
335 #define I40E_IP_DUMMY_PACKET_LEN 34
336 /**
337  * i40e_add_del_fdir_ipv4 - Add/Remove IPv4 Flow Director filters for
338  * a specific flow spec
339  * @vsi: pointer to the targeted VSI
340  * @fd_data: the flow director data required for the FDir descriptor
341  * @add: true adds a filter, false removes it
342  *
343  * Returns 0 if the filters were successfully added or removed
344  **/
345 static int i40e_add_del_fdir_ipv4(struct i40e_vsi *vsi,
346 				  struct i40e_fdir_filter *fd_data,
347 				  bool add)
348 {
349 	struct i40e_pf *pf = vsi->back;
350 	struct iphdr *ip;
351 	bool err = false;
352 	u8 *raw_packet;
353 	int ret;
354 	int i;
355 	static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
356 		0x45, 0, 0, 0x14, 0, 0, 0x40, 0, 0x40, 0x10, 0, 0, 0, 0, 0, 0,
357 		0, 0, 0, 0};
358 
359 	for (i = I40E_FILTER_PCTYPE_NONF_IPV4_OTHER;
360 	     i <= I40E_FILTER_PCTYPE_FRAG_IPV4;	i++) {
361 		raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
362 		if (!raw_packet)
363 			return -ENOMEM;
364 		memcpy(raw_packet, packet, I40E_IP_DUMMY_PACKET_LEN);
365 		ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
366 
367 		ip->saddr = fd_data->src_ip[0];
368 		ip->daddr = fd_data->dst_ip[0];
369 		ip->protocol = 0;
370 
371 		fd_data->pctype = i;
372 		ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
373 
374 		if (ret) {
375 			dev_info(&pf->pdev->dev,
376 				 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
377 				 fd_data->pctype, fd_data->fd_id, ret);
378 			err = true;
379 		} else if (I40E_DEBUG_FD & pf->hw.debug_mask) {
380 			if (add)
381 				dev_info(&pf->pdev->dev,
382 					 "Filter OK for PCTYPE %d loc = %d\n",
383 					 fd_data->pctype, fd_data->fd_id);
384 			else
385 				dev_info(&pf->pdev->dev,
386 					 "Filter deleted for PCTYPE %d loc = %d\n",
387 					 fd_data->pctype, fd_data->fd_id);
388 		}
389 	}
390 
391 	return err ? -EOPNOTSUPP : 0;
392 }
393 
394 /**
395  * i40e_add_del_fdir - Build raw packets to add/del fdir filter
396  * @vsi: pointer to the targeted VSI
397  * @cmd: command to get or set RX flow classification rules
398  * @add: true adds a filter, false removes it
399  *
400  **/
401 int i40e_add_del_fdir(struct i40e_vsi *vsi,
402 		      struct i40e_fdir_filter *input, bool add)
403 {
404 	struct i40e_pf *pf = vsi->back;
405 	int ret;
406 
407 	switch (input->flow_type & ~FLOW_EXT) {
408 	case TCP_V4_FLOW:
409 		ret = i40e_add_del_fdir_tcpv4(vsi, input, add);
410 		break;
411 	case UDP_V4_FLOW:
412 		ret = i40e_add_del_fdir_udpv4(vsi, input, add);
413 		break;
414 	case SCTP_V4_FLOW:
415 		ret = i40e_add_del_fdir_sctpv4(vsi, input, add);
416 		break;
417 	case IPV4_FLOW:
418 		ret = i40e_add_del_fdir_ipv4(vsi, input, add);
419 		break;
420 	case IP_USER_FLOW:
421 		switch (input->ip4_proto) {
422 		case IPPROTO_TCP:
423 			ret = i40e_add_del_fdir_tcpv4(vsi, input, add);
424 			break;
425 		case IPPROTO_UDP:
426 			ret = i40e_add_del_fdir_udpv4(vsi, input, add);
427 			break;
428 		case IPPROTO_SCTP:
429 			ret = i40e_add_del_fdir_sctpv4(vsi, input, add);
430 			break;
431 		default:
432 			ret = i40e_add_del_fdir_ipv4(vsi, input, add);
433 			break;
434 		}
435 		break;
436 	default:
437 		dev_info(&pf->pdev->dev, "Could not specify spec type %d\n",
438 			 input->flow_type);
439 		ret = -EINVAL;
440 	}
441 
442 	/* The buffer allocated here is freed by the i40e_clean_tx_ring() */
443 	return ret;
444 }
445 
446 /**
447  * i40e_fd_handle_status - check the Programming Status for FD
448  * @rx_ring: the Rx ring for this descriptor
449  * @rx_desc: the Rx descriptor for programming Status, not a packet descriptor.
450  * @prog_id: the id originally used for programming
451  *
452  * This is used to verify if the FD programming or invalidation
453  * requested by SW to the HW is successful or not and take actions accordingly.
454  **/
455 static void i40e_fd_handle_status(struct i40e_ring *rx_ring,
456 				  union i40e_rx_desc *rx_desc, u8 prog_id)
457 {
458 	struct i40e_pf *pf = rx_ring->vsi->back;
459 	struct pci_dev *pdev = pf->pdev;
460 	u32 fcnt_prog, fcnt_avail;
461 	u32 error;
462 	u64 qw;
463 
464 	qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
465 	error = (qw & I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK) >>
466 		I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT;
467 
468 	if (error == (0x1 << I40E_RX_PROG_STATUS_DESC_FD_TBL_FULL_SHIFT)) {
469 		if ((rx_desc->wb.qword0.hi_dword.fd_id != 0) ||
470 		    (I40E_DEBUG_FD & pf->hw.debug_mask))
471 			dev_warn(&pdev->dev, "ntuple filter loc = %d, could not be added\n",
472 				 rx_desc->wb.qword0.hi_dword.fd_id);
473 
474 		/* Check if the programming error is for ATR.
475 		 * If so, auto disable ATR and set a state for
476 		 * flush in progress. Next time we come here if flush is in
477 		 * progress do nothing, once flush is complete the state will
478 		 * be cleared.
479 		 */
480 		if (test_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state))
481 			return;
482 
483 		pf->fd_add_err++;
484 		/* store the current atr filter count */
485 		pf->fd_atr_cnt = i40e_get_current_atr_cnt(pf);
486 
487 		if ((rx_desc->wb.qword0.hi_dword.fd_id == 0) &&
488 		    (pf->auto_disable_flags & I40E_FLAG_FD_SB_ENABLED)) {
489 			pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED;
490 			set_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state);
491 		}
492 
493 		/* filter programming failed most likely due to table full */
494 		fcnt_prog = i40e_get_global_fd_count(pf);
495 		fcnt_avail = pf->fdir_pf_filter_count;
496 		/* If ATR is running fcnt_prog can quickly change,
497 		 * if we are very close to full, it makes sense to disable
498 		 * FD ATR/SB and then re-enable it when there is room.
499 		 */
500 		if (fcnt_prog >= (fcnt_avail - I40E_FDIR_BUFFER_FULL_MARGIN)) {
501 			if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) &&
502 			    !(pf->auto_disable_flags &
503 				     I40E_FLAG_FD_SB_ENABLED)) {
504 				dev_warn(&pdev->dev, "FD filter space full, new ntuple rules will not be added\n");
505 				pf->auto_disable_flags |=
506 							I40E_FLAG_FD_SB_ENABLED;
507 			}
508 		} else {
509 			dev_info(&pdev->dev,
510 				"FD filter programming failed due to incorrect filter parameters\n");
511 		}
512 	} else if (error ==
513 			  (0x1 << I40E_RX_PROG_STATUS_DESC_NO_FD_ENTRY_SHIFT)) {
514 		if (I40E_DEBUG_FD & pf->hw.debug_mask)
515 			dev_info(&pdev->dev, "ntuple filter fd_id = %d, could not be removed\n",
516 				 rx_desc->wb.qword0.hi_dword.fd_id);
517 	}
518 }
519 
520 /**
521  * i40e_unmap_and_free_tx_resource - Release a Tx buffer
522  * @ring:      the ring that owns the buffer
523  * @tx_buffer: the buffer to free
524  **/
525 static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring,
526 					    struct i40e_tx_buffer *tx_buffer)
527 {
528 	if (tx_buffer->skb) {
529 		if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB)
530 			kfree(tx_buffer->raw_buf);
531 		else
532 			dev_kfree_skb_any(tx_buffer->skb);
533 
534 		if (dma_unmap_len(tx_buffer, len))
535 			dma_unmap_single(ring->dev,
536 					 dma_unmap_addr(tx_buffer, dma),
537 					 dma_unmap_len(tx_buffer, len),
538 					 DMA_TO_DEVICE);
539 	} else if (dma_unmap_len(tx_buffer, len)) {
540 		dma_unmap_page(ring->dev,
541 			       dma_unmap_addr(tx_buffer, dma),
542 			       dma_unmap_len(tx_buffer, len),
543 			       DMA_TO_DEVICE);
544 	}
545 	tx_buffer->next_to_watch = NULL;
546 	tx_buffer->skb = NULL;
547 	dma_unmap_len_set(tx_buffer, len, 0);
548 	/* tx_buffer must be completely set up in the transmit path */
549 }
550 
551 /**
552  * i40e_clean_tx_ring - Free any empty Tx buffers
553  * @tx_ring: ring to be cleaned
554  **/
555 void i40e_clean_tx_ring(struct i40e_ring *tx_ring)
556 {
557 	unsigned long bi_size;
558 	u16 i;
559 
560 	/* ring already cleared, nothing to do */
561 	if (!tx_ring->tx_bi)
562 		return;
563 
564 	/* Free all the Tx ring sk_buffs */
565 	for (i = 0; i < tx_ring->count; i++)
566 		i40e_unmap_and_free_tx_resource(tx_ring, &tx_ring->tx_bi[i]);
567 
568 	bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
569 	memset(tx_ring->tx_bi, 0, bi_size);
570 
571 	/* Zero out the descriptor ring */
572 	memset(tx_ring->desc, 0, tx_ring->size);
573 
574 	tx_ring->next_to_use = 0;
575 	tx_ring->next_to_clean = 0;
576 
577 	if (!tx_ring->netdev)
578 		return;
579 
580 	/* cleanup Tx queue statistics */
581 	netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev,
582 						  tx_ring->queue_index));
583 }
584 
585 /**
586  * i40e_free_tx_resources - Free Tx resources per queue
587  * @tx_ring: Tx descriptor ring for a specific queue
588  *
589  * Free all transmit software resources
590  **/
591 void i40e_free_tx_resources(struct i40e_ring *tx_ring)
592 {
593 	i40e_clean_tx_ring(tx_ring);
594 	kfree(tx_ring->tx_bi);
595 	tx_ring->tx_bi = NULL;
596 
597 	if (tx_ring->desc) {
598 		dma_free_coherent(tx_ring->dev, tx_ring->size,
599 				  tx_ring->desc, tx_ring->dma);
600 		tx_ring->desc = NULL;
601 	}
602 }
603 
604 /**
605  * i40e_get_head - Retrieve head from head writeback
606  * @tx_ring:  tx ring to fetch head of
607  *
608  * Returns value of Tx ring head based on value stored
609  * in head write-back location
610  **/
611 static inline u32 i40e_get_head(struct i40e_ring *tx_ring)
612 {
613 	void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count;
614 
615 	return le32_to_cpu(*(volatile __le32 *)head);
616 }
617 
618 /**
619  * i40e_get_tx_pending - how many tx descriptors not processed
620  * @tx_ring: the ring of descriptors
621  *
622  * Since there is no access to the ring head register
623  * in XL710, we need to use our local copies
624  **/
625 static u32 i40e_get_tx_pending(struct i40e_ring *ring)
626 {
627 	u32 head, tail;
628 
629 	head = i40e_get_head(ring);
630 	tail = readl(ring->tail);
631 
632 	if (head != tail)
633 		return (head < tail) ?
634 			tail - head : (tail + ring->count - head);
635 
636 	return 0;
637 }
638 
639 /**
640  * i40e_check_tx_hang - Is there a hang in the Tx queue
641  * @tx_ring: the ring of descriptors
642  **/
643 static bool i40e_check_tx_hang(struct i40e_ring *tx_ring)
644 {
645 	u32 tx_done = tx_ring->stats.packets;
646 	u32 tx_done_old = tx_ring->tx_stats.tx_done_old;
647 	u32 tx_pending = i40e_get_tx_pending(tx_ring);
648 	struct i40e_pf *pf = tx_ring->vsi->back;
649 	bool ret = false;
650 
651 	clear_check_for_tx_hang(tx_ring);
652 
653 	/* Check for a hung queue, but be thorough. This verifies
654 	 * that a transmit has been completed since the previous
655 	 * check AND there is at least one packet pending. The
656 	 * ARMED bit is set to indicate a potential hang. The
657 	 * bit is cleared if a pause frame is received to remove
658 	 * false hang detection due to PFC or 802.3x frames. By
659 	 * requiring this to fail twice we avoid races with
660 	 * PFC clearing the ARMED bit and conditions where we
661 	 * run the check_tx_hang logic with a transmit completion
662 	 * pending but without time to complete it yet.
663 	 */
664 	if ((tx_done_old == tx_done) && tx_pending) {
665 		/* make sure it is true for two checks in a row */
666 		ret = test_and_set_bit(__I40E_HANG_CHECK_ARMED,
667 				       &tx_ring->state);
668 	} else if (tx_done_old == tx_done &&
669 		   (tx_pending < I40E_MIN_DESC_PENDING) && (tx_pending > 0)) {
670 		if (I40E_DEBUG_FLOW & pf->hw.debug_mask)
671 			dev_info(tx_ring->dev, "HW needs some more descs to do a cacheline flush. tx_pending %d, queue %d",
672 				 tx_pending, tx_ring->queue_index);
673 		pf->tx_sluggish_count++;
674 	} else {
675 		/* update completed stats and disarm the hang check */
676 		tx_ring->tx_stats.tx_done_old = tx_done;
677 		clear_bit(__I40E_HANG_CHECK_ARMED, &tx_ring->state);
678 	}
679 
680 	return ret;
681 }
682 
683 #define WB_STRIDE 0x3
684 
685 /**
686  * i40e_clean_tx_irq - Reclaim resources after transmit completes
687  * @tx_ring:  tx ring to clean
688  * @budget:   how many cleans we're allowed
689  *
690  * Returns true if there's any budget left (e.g. the clean is finished)
691  **/
692 static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
693 {
694 	u16 i = tx_ring->next_to_clean;
695 	struct i40e_tx_buffer *tx_buf;
696 	struct i40e_tx_desc *tx_head;
697 	struct i40e_tx_desc *tx_desc;
698 	unsigned int total_packets = 0;
699 	unsigned int total_bytes = 0;
700 
701 	tx_buf = &tx_ring->tx_bi[i];
702 	tx_desc = I40E_TX_DESC(tx_ring, i);
703 	i -= tx_ring->count;
704 
705 	tx_head = I40E_TX_DESC(tx_ring, i40e_get_head(tx_ring));
706 
707 	do {
708 		struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch;
709 
710 		/* if next_to_watch is not set then there is no work pending */
711 		if (!eop_desc)
712 			break;
713 
714 		/* prevent any other reads prior to eop_desc */
715 		read_barrier_depends();
716 
717 		/* we have caught up to head, no work left to do */
718 		if (tx_head == tx_desc)
719 			break;
720 
721 		/* clear next_to_watch to prevent false hangs */
722 		tx_buf->next_to_watch = NULL;
723 
724 		/* update the statistics for this packet */
725 		total_bytes += tx_buf->bytecount;
726 		total_packets += tx_buf->gso_segs;
727 
728 		/* free the skb */
729 		dev_consume_skb_any(tx_buf->skb);
730 
731 		/* unmap skb header data */
732 		dma_unmap_single(tx_ring->dev,
733 				 dma_unmap_addr(tx_buf, dma),
734 				 dma_unmap_len(tx_buf, len),
735 				 DMA_TO_DEVICE);
736 
737 		/* clear tx_buffer data */
738 		tx_buf->skb = NULL;
739 		dma_unmap_len_set(tx_buf, len, 0);
740 
741 		/* unmap remaining buffers */
742 		while (tx_desc != eop_desc) {
743 
744 			tx_buf++;
745 			tx_desc++;
746 			i++;
747 			if (unlikely(!i)) {
748 				i -= tx_ring->count;
749 				tx_buf = tx_ring->tx_bi;
750 				tx_desc = I40E_TX_DESC(tx_ring, 0);
751 			}
752 
753 			/* unmap any remaining paged data */
754 			if (dma_unmap_len(tx_buf, len)) {
755 				dma_unmap_page(tx_ring->dev,
756 					       dma_unmap_addr(tx_buf, dma),
757 					       dma_unmap_len(tx_buf, len),
758 					       DMA_TO_DEVICE);
759 				dma_unmap_len_set(tx_buf, len, 0);
760 			}
761 		}
762 
763 		/* move us one more past the eop_desc for start of next pkt */
764 		tx_buf++;
765 		tx_desc++;
766 		i++;
767 		if (unlikely(!i)) {
768 			i -= tx_ring->count;
769 			tx_buf = tx_ring->tx_bi;
770 			tx_desc = I40E_TX_DESC(tx_ring, 0);
771 		}
772 
773 		prefetch(tx_desc);
774 
775 		/* update budget accounting */
776 		budget--;
777 	} while (likely(budget));
778 
779 	i += tx_ring->count;
780 	tx_ring->next_to_clean = i;
781 	u64_stats_update_begin(&tx_ring->syncp);
782 	tx_ring->stats.bytes += total_bytes;
783 	tx_ring->stats.packets += total_packets;
784 	u64_stats_update_end(&tx_ring->syncp);
785 	tx_ring->q_vector->tx.total_bytes += total_bytes;
786 	tx_ring->q_vector->tx.total_packets += total_packets;
787 
788 	/* check to see if there are any non-cache aligned descriptors
789 	 * waiting to be written back, and kick the hardware to force
790 	 * them to be written back in case of napi polling
791 	 */
792 	if (budget &&
793 	    !((i & WB_STRIDE) == WB_STRIDE) &&
794 	    !test_bit(__I40E_DOWN, &tx_ring->vsi->state) &&
795 	    (I40E_DESC_UNUSED(tx_ring) != tx_ring->count))
796 		tx_ring->arm_wb = true;
797 	else
798 		tx_ring->arm_wb = false;
799 
800 	if (check_for_tx_hang(tx_ring) && i40e_check_tx_hang(tx_ring)) {
801 		/* schedule immediate reset if we believe we hung */
802 		dev_info(tx_ring->dev, "Detected Tx Unit Hang\n"
803 			 "  VSI                  <%d>\n"
804 			 "  Tx Queue             <%d>\n"
805 			 "  next_to_use          <%x>\n"
806 			 "  next_to_clean        <%x>\n",
807 			 tx_ring->vsi->seid,
808 			 tx_ring->queue_index,
809 			 tx_ring->next_to_use, i);
810 		dev_info(tx_ring->dev, "tx_bi[next_to_clean]\n"
811 			 "  time_stamp           <%lx>\n"
812 			 "  jiffies              <%lx>\n",
813 			 tx_ring->tx_bi[i].time_stamp, jiffies);
814 
815 		netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
816 
817 		dev_info(tx_ring->dev,
818 			 "tx hang detected on queue %d, reset requested\n",
819 			 tx_ring->queue_index);
820 
821 		/* do not fire the reset immediately, wait for the stack to
822 		 * decide we are truly stuck, also prevents every queue from
823 		 * simultaneously requesting a reset
824 		 */
825 
826 		/* the adapter is about to reset, no point in enabling polling */
827 		budget = 1;
828 	}
829 
830 	netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev,
831 						      tx_ring->queue_index),
832 				  total_packets, total_bytes);
833 
834 #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
835 	if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) &&
836 		     (I40E_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) {
837 		/* Make sure that anybody stopping the queue after this
838 		 * sees the new next_to_clean.
839 		 */
840 		smp_mb();
841 		if (__netif_subqueue_stopped(tx_ring->netdev,
842 					     tx_ring->queue_index) &&
843 		   !test_bit(__I40E_DOWN, &tx_ring->vsi->state)) {
844 			netif_wake_subqueue(tx_ring->netdev,
845 					    tx_ring->queue_index);
846 			++tx_ring->tx_stats.restart_queue;
847 		}
848 	}
849 
850 	return !!budget;
851 }
852 
853 /**
854  * i40e_force_wb - Arm hardware to do a wb on noncache aligned descriptors
855  * @vsi: the VSI we care about
856  * @q_vector: the vector  on which to force writeback
857  *
858  **/
859 static void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
860 {
861 	u32 val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
862 		  I40E_PFINT_DYN_CTLN_ITR_INDX_MASK | /* set noitr */
863 		  I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK |
864 		  I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK;
865 		  /* allow 00 to be written to the index */
866 
867 	wr32(&vsi->back->hw,
868 	     I40E_PFINT_DYN_CTLN(q_vector->v_idx + vsi->base_vector - 1),
869 	     val);
870 }
871 
872 /**
873  * i40e_set_new_dynamic_itr - Find new ITR level
874  * @rc: structure containing ring performance data
875  *
876  * Stores a new ITR value based on packets and byte counts during
877  * the last interrupt.  The advantage of per interrupt computation
878  * is faster updates and more accurate ITR for the current traffic
879  * pattern.  Constants in this function were computed based on
880  * theoretical maximum wire speed and thresholds were set based on
881  * testing data as well as attempting to minimize response time
882  * while increasing bulk throughput.
883  **/
884 static void i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
885 {
886 	enum i40e_latency_range new_latency_range = rc->latency_range;
887 	u32 new_itr = rc->itr;
888 	int bytes_per_int;
889 
890 	if (rc->total_packets == 0 || !rc->itr)
891 		return;
892 
893 	/* simple throttlerate management
894 	 *   0-10MB/s   lowest (100000 ints/s)
895 	 *  10-20MB/s   low    (20000 ints/s)
896 	 *  20-1249MB/s bulk   (8000 ints/s)
897 	 */
898 	bytes_per_int = rc->total_bytes / rc->itr;
899 	switch (rc->itr) {
900 	case I40E_LOWEST_LATENCY:
901 		if (bytes_per_int > 10)
902 			new_latency_range = I40E_LOW_LATENCY;
903 		break;
904 	case I40E_LOW_LATENCY:
905 		if (bytes_per_int > 20)
906 			new_latency_range = I40E_BULK_LATENCY;
907 		else if (bytes_per_int <= 10)
908 			new_latency_range = I40E_LOWEST_LATENCY;
909 		break;
910 	case I40E_BULK_LATENCY:
911 		if (bytes_per_int <= 20)
912 			rc->latency_range = I40E_LOW_LATENCY;
913 		break;
914 	}
915 
916 	switch (new_latency_range) {
917 	case I40E_LOWEST_LATENCY:
918 		new_itr = I40E_ITR_100K;
919 		break;
920 	case I40E_LOW_LATENCY:
921 		new_itr = I40E_ITR_20K;
922 		break;
923 	case I40E_BULK_LATENCY:
924 		new_itr = I40E_ITR_8K;
925 		break;
926 	default:
927 		break;
928 	}
929 
930 	if (new_itr != rc->itr) {
931 		/* do an exponential smoothing */
932 		new_itr = (10 * new_itr * rc->itr) /
933 			  ((9 * new_itr) + rc->itr);
934 		rc->itr = new_itr & I40E_MAX_ITR;
935 	}
936 
937 	rc->total_bytes = 0;
938 	rc->total_packets = 0;
939 }
940 
941 /**
942  * i40e_update_dynamic_itr - Adjust ITR based on bytes per int
943  * @q_vector: the vector to adjust
944  **/
945 static void i40e_update_dynamic_itr(struct i40e_q_vector *q_vector)
946 {
947 	u16 vector = q_vector->vsi->base_vector + q_vector->v_idx;
948 	struct i40e_hw *hw = &q_vector->vsi->back->hw;
949 	u32 reg_addr;
950 	u16 old_itr;
951 
952 	reg_addr = I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1);
953 	old_itr = q_vector->rx.itr;
954 	i40e_set_new_dynamic_itr(&q_vector->rx);
955 	if (old_itr != q_vector->rx.itr)
956 		wr32(hw, reg_addr, q_vector->rx.itr);
957 
958 	reg_addr = I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1);
959 	old_itr = q_vector->tx.itr;
960 	i40e_set_new_dynamic_itr(&q_vector->tx);
961 	if (old_itr != q_vector->tx.itr)
962 		wr32(hw, reg_addr, q_vector->tx.itr);
963 }
964 
965 /**
966  * i40e_clean_programming_status - clean the programming status descriptor
967  * @rx_ring: the rx ring that has this descriptor
968  * @rx_desc: the rx descriptor written back by HW
969  *
970  * Flow director should handle FD_FILTER_STATUS to check its filter programming
971  * status being successful or not and take actions accordingly. FCoE should
972  * handle its context/filter programming/invalidation status and take actions.
973  *
974  **/
975 static void i40e_clean_programming_status(struct i40e_ring *rx_ring,
976 					  union i40e_rx_desc *rx_desc)
977 {
978 	u64 qw;
979 	u8 id;
980 
981 	qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
982 	id = (qw & I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK) >>
983 		  I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT;
984 
985 	if (id == I40E_RX_PROG_STATUS_DESC_FD_FILTER_STATUS)
986 		i40e_fd_handle_status(rx_ring, rx_desc, id);
987 #ifdef I40E_FCOE
988 	else if ((id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_PROG_STATUS) ||
989 		 (id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_INVL_STATUS))
990 		i40e_fcoe_handle_status(rx_ring, rx_desc, id);
991 #endif
992 }
993 
994 /**
995  * i40e_setup_tx_descriptors - Allocate the Tx descriptors
996  * @tx_ring: the tx ring to set up
997  *
998  * Return 0 on success, negative on error
999  **/
1000 int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring)
1001 {
1002 	struct device *dev = tx_ring->dev;
1003 	int bi_size;
1004 
1005 	if (!dev)
1006 		return -ENOMEM;
1007 
1008 	bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
1009 	tx_ring->tx_bi = kzalloc(bi_size, GFP_KERNEL);
1010 	if (!tx_ring->tx_bi)
1011 		goto err;
1012 
1013 	/* round up to nearest 4K */
1014 	tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc);
1015 	/* add u32 for head writeback, align after this takes care of
1016 	 * guaranteeing this is at least one cache line in size
1017 	 */
1018 	tx_ring->size += sizeof(u32);
1019 	tx_ring->size = ALIGN(tx_ring->size, 4096);
1020 	tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
1021 					   &tx_ring->dma, GFP_KERNEL);
1022 	if (!tx_ring->desc) {
1023 		dev_info(dev, "Unable to allocate memory for the Tx descriptor ring, size=%d\n",
1024 			 tx_ring->size);
1025 		goto err;
1026 	}
1027 
1028 	tx_ring->next_to_use = 0;
1029 	tx_ring->next_to_clean = 0;
1030 	return 0;
1031 
1032 err:
1033 	kfree(tx_ring->tx_bi);
1034 	tx_ring->tx_bi = NULL;
1035 	return -ENOMEM;
1036 }
1037 
1038 /**
1039  * i40e_clean_rx_ring - Free Rx buffers
1040  * @rx_ring: ring to be cleaned
1041  **/
1042 void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
1043 {
1044 	struct device *dev = rx_ring->dev;
1045 	struct i40e_rx_buffer *rx_bi;
1046 	unsigned long bi_size;
1047 	u16 i;
1048 
1049 	/* ring already cleared, nothing to do */
1050 	if (!rx_ring->rx_bi)
1051 		return;
1052 
1053 	if (ring_is_ps_enabled(rx_ring)) {
1054 		int bufsz = ALIGN(rx_ring->rx_hdr_len, 256) * rx_ring->count;
1055 
1056 		rx_bi = &rx_ring->rx_bi[0];
1057 		if (rx_bi->hdr_buf) {
1058 			dma_free_coherent(dev,
1059 					  bufsz,
1060 					  rx_bi->hdr_buf,
1061 					  rx_bi->dma);
1062 			for (i = 0; i < rx_ring->count; i++) {
1063 				rx_bi = &rx_ring->rx_bi[i];
1064 				rx_bi->dma = 0;
1065 				rx_bi->hdr_buf = NULL;
1066 			}
1067 		}
1068 	}
1069 	/* Free all the Rx ring sk_buffs */
1070 	for (i = 0; i < rx_ring->count; i++) {
1071 		rx_bi = &rx_ring->rx_bi[i];
1072 		if (rx_bi->dma) {
1073 			dma_unmap_single(dev,
1074 					 rx_bi->dma,
1075 					 rx_ring->rx_buf_len,
1076 					 DMA_FROM_DEVICE);
1077 			rx_bi->dma = 0;
1078 		}
1079 		if (rx_bi->skb) {
1080 			dev_kfree_skb(rx_bi->skb);
1081 			rx_bi->skb = NULL;
1082 		}
1083 		if (rx_bi->page) {
1084 			if (rx_bi->page_dma) {
1085 				dma_unmap_page(dev,
1086 					       rx_bi->page_dma,
1087 					       PAGE_SIZE / 2,
1088 					       DMA_FROM_DEVICE);
1089 				rx_bi->page_dma = 0;
1090 			}
1091 			__free_page(rx_bi->page);
1092 			rx_bi->page = NULL;
1093 			rx_bi->page_offset = 0;
1094 		}
1095 	}
1096 
1097 	bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
1098 	memset(rx_ring->rx_bi, 0, bi_size);
1099 
1100 	/* Zero out the descriptor ring */
1101 	memset(rx_ring->desc, 0, rx_ring->size);
1102 
1103 	rx_ring->next_to_clean = 0;
1104 	rx_ring->next_to_use = 0;
1105 }
1106 
1107 /**
1108  * i40e_free_rx_resources - Free Rx resources
1109  * @rx_ring: ring to clean the resources from
1110  *
1111  * Free all receive software resources
1112  **/
1113 void i40e_free_rx_resources(struct i40e_ring *rx_ring)
1114 {
1115 	i40e_clean_rx_ring(rx_ring);
1116 	kfree(rx_ring->rx_bi);
1117 	rx_ring->rx_bi = NULL;
1118 
1119 	if (rx_ring->desc) {
1120 		dma_free_coherent(rx_ring->dev, rx_ring->size,
1121 				  rx_ring->desc, rx_ring->dma);
1122 		rx_ring->desc = NULL;
1123 	}
1124 }
1125 
1126 /**
1127  * i40e_alloc_rx_headers - allocate rx header buffers
1128  * @rx_ring: ring to alloc buffers
1129  *
1130  * Allocate rx header buffers for the entire ring. As these are static,
1131  * this is only called when setting up a new ring.
1132  **/
1133 void i40e_alloc_rx_headers(struct i40e_ring *rx_ring)
1134 {
1135 	struct device *dev = rx_ring->dev;
1136 	struct i40e_rx_buffer *rx_bi;
1137 	dma_addr_t dma;
1138 	void *buffer;
1139 	int buf_size;
1140 	int i;
1141 
1142 	if (rx_ring->rx_bi[0].hdr_buf)
1143 		return;
1144 	/* Make sure the buffers don't cross cache line boundaries. */
1145 	buf_size = ALIGN(rx_ring->rx_hdr_len, 256);
1146 	buffer = dma_alloc_coherent(dev, buf_size * rx_ring->count,
1147 				    &dma, GFP_KERNEL);
1148 	if (!buffer)
1149 		return;
1150 	for (i = 0; i < rx_ring->count; i++) {
1151 		rx_bi = &rx_ring->rx_bi[i];
1152 		rx_bi->dma = dma + (i * buf_size);
1153 		rx_bi->hdr_buf = buffer + (i * buf_size);
1154 	}
1155 }
1156 
1157 /**
1158  * i40e_setup_rx_descriptors - Allocate Rx descriptors
1159  * @rx_ring: Rx descriptor ring (for a specific queue) to setup
1160  *
1161  * Returns 0 on success, negative on failure
1162  **/
1163 int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
1164 {
1165 	struct device *dev = rx_ring->dev;
1166 	int bi_size;
1167 
1168 	bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
1169 	rx_ring->rx_bi = kzalloc(bi_size, GFP_KERNEL);
1170 	if (!rx_ring->rx_bi)
1171 		goto err;
1172 
1173 	u64_stats_init(&rx_ring->syncp);
1174 
1175 	/* Round up to nearest 4K */
1176 	rx_ring->size = ring_is_16byte_desc_enabled(rx_ring)
1177 		? rx_ring->count * sizeof(union i40e_16byte_rx_desc)
1178 		: rx_ring->count * sizeof(union i40e_32byte_rx_desc);
1179 	rx_ring->size = ALIGN(rx_ring->size, 4096);
1180 	rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
1181 					   &rx_ring->dma, GFP_KERNEL);
1182 
1183 	if (!rx_ring->desc) {
1184 		dev_info(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n",
1185 			 rx_ring->size);
1186 		goto err;
1187 	}
1188 
1189 	rx_ring->next_to_clean = 0;
1190 	rx_ring->next_to_use = 0;
1191 
1192 	return 0;
1193 err:
1194 	kfree(rx_ring->rx_bi);
1195 	rx_ring->rx_bi = NULL;
1196 	return -ENOMEM;
1197 }
1198 
1199 /**
1200  * i40e_release_rx_desc - Store the new tail and head values
1201  * @rx_ring: ring to bump
1202  * @val: new head index
1203  **/
1204 static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
1205 {
1206 	rx_ring->next_to_use = val;
1207 	/* Force memory writes to complete before letting h/w
1208 	 * know there are new descriptors to fetch.  (Only
1209 	 * applicable for weak-ordered memory model archs,
1210 	 * such as IA-64).
1211 	 */
1212 	wmb();
1213 	writel(val, rx_ring->tail);
1214 }
1215 
1216 /**
1217  * i40e_alloc_rx_buffers_ps - Replace used receive buffers; packet split
1218  * @rx_ring: ring to place buffers on
1219  * @cleaned_count: number of buffers to replace
1220  **/
1221 void i40e_alloc_rx_buffers_ps(struct i40e_ring *rx_ring, u16 cleaned_count)
1222 {
1223 	u16 i = rx_ring->next_to_use;
1224 	union i40e_rx_desc *rx_desc;
1225 	struct i40e_rx_buffer *bi;
1226 
1227 	/* do nothing if no valid netdev defined */
1228 	if (!rx_ring->netdev || !cleaned_count)
1229 		return;
1230 
1231 	while (cleaned_count--) {
1232 		rx_desc = I40E_RX_DESC(rx_ring, i);
1233 		bi = &rx_ring->rx_bi[i];
1234 
1235 		if (bi->skb) /* desc is in use */
1236 			goto no_buffers;
1237 		if (!bi->page) {
1238 			bi->page = alloc_page(GFP_ATOMIC);
1239 			if (!bi->page) {
1240 				rx_ring->rx_stats.alloc_page_failed++;
1241 				goto no_buffers;
1242 			}
1243 		}
1244 
1245 		if (!bi->page_dma) {
1246 			/* use a half page if we're re-using */
1247 			bi->page_offset ^= PAGE_SIZE / 2;
1248 			bi->page_dma = dma_map_page(rx_ring->dev,
1249 						    bi->page,
1250 						    bi->page_offset,
1251 						    PAGE_SIZE / 2,
1252 						    DMA_FROM_DEVICE);
1253 			if (dma_mapping_error(rx_ring->dev,
1254 					      bi->page_dma)) {
1255 				rx_ring->rx_stats.alloc_page_failed++;
1256 				bi->page_dma = 0;
1257 				goto no_buffers;
1258 			}
1259 		}
1260 
1261 		dma_sync_single_range_for_device(rx_ring->dev,
1262 						 bi->dma,
1263 						 0,
1264 						 rx_ring->rx_hdr_len,
1265 						 DMA_FROM_DEVICE);
1266 		/* Refresh the desc even if buffer_addrs didn't change
1267 		 * because each write-back erases this info.
1268 		 */
1269 		rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
1270 		rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
1271 		i++;
1272 		if (i == rx_ring->count)
1273 			i = 0;
1274 	}
1275 
1276 no_buffers:
1277 	if (rx_ring->next_to_use != i)
1278 		i40e_release_rx_desc(rx_ring, i);
1279 }
1280 
1281 /**
1282  * i40e_alloc_rx_buffers_1buf - Replace used receive buffers; single buffer
1283  * @rx_ring: ring to place buffers on
1284  * @cleaned_count: number of buffers to replace
1285  **/
1286 void i40e_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count)
1287 {
1288 	u16 i = rx_ring->next_to_use;
1289 	union i40e_rx_desc *rx_desc;
1290 	struct i40e_rx_buffer *bi;
1291 	struct sk_buff *skb;
1292 
1293 	/* do nothing if no valid netdev defined */
1294 	if (!rx_ring->netdev || !cleaned_count)
1295 		return;
1296 
1297 	while (cleaned_count--) {
1298 		rx_desc = I40E_RX_DESC(rx_ring, i);
1299 		bi = &rx_ring->rx_bi[i];
1300 		skb = bi->skb;
1301 
1302 		if (!skb) {
1303 			skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
1304 							rx_ring->rx_buf_len);
1305 			if (!skb) {
1306 				rx_ring->rx_stats.alloc_buff_failed++;
1307 				goto no_buffers;
1308 			}
1309 			/* initialize queue mapping */
1310 			skb_record_rx_queue(skb, rx_ring->queue_index);
1311 			bi->skb = skb;
1312 		}
1313 
1314 		if (!bi->dma) {
1315 			bi->dma = dma_map_single(rx_ring->dev,
1316 						 skb->data,
1317 						 rx_ring->rx_buf_len,
1318 						 DMA_FROM_DEVICE);
1319 			if (dma_mapping_error(rx_ring->dev, bi->dma)) {
1320 				rx_ring->rx_stats.alloc_buff_failed++;
1321 				bi->dma = 0;
1322 				goto no_buffers;
1323 			}
1324 		}
1325 
1326 		rx_desc->read.pkt_addr = cpu_to_le64(bi->dma);
1327 		rx_desc->read.hdr_addr = 0;
1328 		i++;
1329 		if (i == rx_ring->count)
1330 			i = 0;
1331 	}
1332 
1333 no_buffers:
1334 	if (rx_ring->next_to_use != i)
1335 		i40e_release_rx_desc(rx_ring, i);
1336 }
1337 
1338 /**
1339  * i40e_receive_skb - Send a completed packet up the stack
1340  * @rx_ring:  rx ring in play
1341  * @skb: packet to send up
1342  * @vlan_tag: vlan tag for packet
1343  **/
1344 static void i40e_receive_skb(struct i40e_ring *rx_ring,
1345 			     struct sk_buff *skb, u16 vlan_tag)
1346 {
1347 	struct i40e_q_vector *q_vector = rx_ring->q_vector;
1348 	struct i40e_vsi *vsi = rx_ring->vsi;
1349 	u64 flags = vsi->back->flags;
1350 
1351 	if (vlan_tag & VLAN_VID_MASK)
1352 		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
1353 
1354 	if (flags & I40E_FLAG_IN_NETPOLL)
1355 		netif_rx(skb);
1356 	else
1357 		napi_gro_receive(&q_vector->napi, skb);
1358 }
1359 
1360 /**
1361  * i40e_rx_checksum - Indicate in skb if hw indicated a good cksum
1362  * @vsi: the VSI we care about
1363  * @skb: skb currently being received and modified
1364  * @rx_status: status value of last descriptor in packet
1365  * @rx_error: error value of last descriptor in packet
1366  * @rx_ptype: ptype value of last descriptor in packet
1367  **/
1368 static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
1369 				    struct sk_buff *skb,
1370 				    u32 rx_status,
1371 				    u32 rx_error,
1372 				    u16 rx_ptype)
1373 {
1374 	struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(rx_ptype);
1375 	bool ipv4 = false, ipv6 = false;
1376 	bool ipv4_tunnel, ipv6_tunnel;
1377 	__wsum rx_udp_csum;
1378 	struct iphdr *iph;
1379 	__sum16 csum;
1380 
1381 	ipv4_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT4_MAC_PAY3) &&
1382 		     (rx_ptype <= I40E_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4);
1383 	ipv6_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT6_MAC_PAY3) &&
1384 		     (rx_ptype <= I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4);
1385 
1386 	skb->ip_summed = CHECKSUM_NONE;
1387 
1388 	/* Rx csum enabled and ip headers found? */
1389 	if (!(vsi->netdev->features & NETIF_F_RXCSUM))
1390 		return;
1391 
1392 	/* did the hardware decode the packet and checksum? */
1393 	if (!(rx_status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)))
1394 		return;
1395 
1396 	/* both known and outer_ip must be set for the below code to work */
1397 	if (!(decoded.known && decoded.outer_ip))
1398 		return;
1399 
1400 	if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1401 	    decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4)
1402 		ipv4 = true;
1403 	else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1404 		 decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6)
1405 		ipv6 = true;
1406 
1407 	if (ipv4 &&
1408 	    (rx_error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) |
1409 			 (1 << I40E_RX_DESC_ERROR_EIPE_SHIFT))))
1410 		goto checksum_fail;
1411 
1412 	/* likely incorrect csum if alternate IP extension headers found */
1413 	if (ipv6 &&
1414 	    rx_status & (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT))
1415 		/* don't increment checksum err here, non-fatal err */
1416 		return;
1417 
1418 	/* there was some L4 error, count error and punt packet to the stack */
1419 	if (rx_error & (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))
1420 		goto checksum_fail;
1421 
1422 	/* handle packets that were not able to be checksummed due
1423 	 * to arrival speed, in this case the stack can compute
1424 	 * the csum.
1425 	 */
1426 	if (rx_error & (1 << I40E_RX_DESC_ERROR_PPRS_SHIFT))
1427 		return;
1428 
1429 	/* If VXLAN traffic has an outer UDPv4 checksum we need to check
1430 	 * it in the driver, hardware does not do it for us.
1431 	 * Since L3L4P bit was set we assume a valid IHL value (>=5)
1432 	 * so the total length of IPv4 header is IHL*4 bytes
1433 	 * The UDP_0 bit *may* bet set if the *inner* header is UDP
1434 	 */
1435 	if (ipv4_tunnel) {
1436 		skb->transport_header = skb->mac_header +
1437 					sizeof(struct ethhdr) +
1438 					(ip_hdr(skb)->ihl * 4);
1439 
1440 		/* Add 4 bytes for VLAN tagged packets */
1441 		skb->transport_header += (skb->protocol == htons(ETH_P_8021Q) ||
1442 					  skb->protocol == htons(ETH_P_8021AD))
1443 					  ? VLAN_HLEN : 0;
1444 
1445 		if ((ip_hdr(skb)->protocol == IPPROTO_UDP) &&
1446 		    (udp_hdr(skb)->check != 0)) {
1447 			rx_udp_csum = udp_csum(skb);
1448 			iph = ip_hdr(skb);
1449 			csum = csum_tcpudp_magic(
1450 					iph->saddr, iph->daddr,
1451 					(skb->len - skb_transport_offset(skb)),
1452 					IPPROTO_UDP, rx_udp_csum);
1453 
1454 			if (udp_hdr(skb)->check != csum)
1455 				goto checksum_fail;
1456 
1457 		} /* else its GRE and so no outer UDP header */
1458 	}
1459 
1460 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1461 	skb->csum_level = ipv4_tunnel || ipv6_tunnel;
1462 
1463 	return;
1464 
1465 checksum_fail:
1466 	vsi->back->hw_csum_rx_error++;
1467 }
1468 
1469 /**
1470  * i40e_rx_hash - returns the hash value from the Rx descriptor
1471  * @ring: descriptor ring
1472  * @rx_desc: specific descriptor
1473  **/
1474 static inline u32 i40e_rx_hash(struct i40e_ring *ring,
1475 			       union i40e_rx_desc *rx_desc)
1476 {
1477 	const __le64 rss_mask =
1478 		cpu_to_le64((u64)I40E_RX_DESC_FLTSTAT_RSS_HASH <<
1479 			    I40E_RX_DESC_STATUS_FLTSTAT_SHIFT);
1480 
1481 	if ((ring->netdev->features & NETIF_F_RXHASH) &&
1482 	    (rx_desc->wb.qword1.status_error_len & rss_mask) == rss_mask)
1483 		return le32_to_cpu(rx_desc->wb.qword0.hi_dword.rss);
1484 	else
1485 		return 0;
1486 }
1487 
1488 /**
1489  * i40e_ptype_to_hash - get a hash type
1490  * @ptype: the ptype value from the descriptor
1491  *
1492  * Returns a hash type to be used by skb_set_hash
1493  **/
1494 static inline enum pkt_hash_types i40e_ptype_to_hash(u8 ptype)
1495 {
1496 	struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(ptype);
1497 
1498 	if (!decoded.known)
1499 		return PKT_HASH_TYPE_NONE;
1500 
1501 	if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1502 	    decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY4)
1503 		return PKT_HASH_TYPE_L4;
1504 	else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1505 		 decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY3)
1506 		return PKT_HASH_TYPE_L3;
1507 	else
1508 		return PKT_HASH_TYPE_L2;
1509 }
1510 
1511 /**
1512  * i40e_clean_rx_irq_ps - Reclaim resources after receive; packet split
1513  * @rx_ring:  rx ring to clean
1514  * @budget:   how many cleans we're allowed
1515  *
1516  * Returns true if there's any budget left (e.g. the clean is finished)
1517  **/
1518 static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget)
1519 {
1520 	unsigned int total_rx_bytes = 0, total_rx_packets = 0;
1521 	u16 rx_packet_len, rx_header_len, rx_sph, rx_hbo;
1522 	u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
1523 	const int current_node = numa_node_id();
1524 	struct i40e_vsi *vsi = rx_ring->vsi;
1525 	u16 i = rx_ring->next_to_clean;
1526 	union i40e_rx_desc *rx_desc;
1527 	u32 rx_error, rx_status;
1528 	u8 rx_ptype;
1529 	u64 qword;
1530 
1531 	if (budget <= 0)
1532 		return 0;
1533 
1534 	do {
1535 		struct i40e_rx_buffer *rx_bi;
1536 		struct sk_buff *skb;
1537 		u16 vlan_tag;
1538 		/* return some buffers to hardware, one at a time is too slow */
1539 		if (cleaned_count >= I40E_RX_BUFFER_WRITE) {
1540 			i40e_alloc_rx_buffers_ps(rx_ring, cleaned_count);
1541 			cleaned_count = 0;
1542 		}
1543 
1544 		i = rx_ring->next_to_clean;
1545 		rx_desc = I40E_RX_DESC(rx_ring, i);
1546 		qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
1547 		rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
1548 			I40E_RXD_QW1_STATUS_SHIFT;
1549 
1550 		if (!(rx_status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)))
1551 			break;
1552 
1553 		/* This memory barrier is needed to keep us from reading
1554 		 * any other fields out of the rx_desc until we know the
1555 		 * DD bit is set.
1556 		 */
1557 		rmb();
1558 		if (i40e_rx_is_programming_status(qword)) {
1559 			i40e_clean_programming_status(rx_ring, rx_desc);
1560 			I40E_RX_INCREMENT(rx_ring, i);
1561 			continue;
1562 		}
1563 		rx_bi = &rx_ring->rx_bi[i];
1564 		skb = rx_bi->skb;
1565 		if (likely(!skb)) {
1566 			skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
1567 							rx_ring->rx_hdr_len);
1568 			if (!skb)
1569 				rx_ring->rx_stats.alloc_buff_failed++;
1570 			/* initialize queue mapping */
1571 			skb_record_rx_queue(skb, rx_ring->queue_index);
1572 			/* we are reusing so sync this buffer for CPU use */
1573 			dma_sync_single_range_for_cpu(rx_ring->dev,
1574 						      rx_bi->dma,
1575 						      0,
1576 						      rx_ring->rx_hdr_len,
1577 						      DMA_FROM_DEVICE);
1578 		}
1579 		rx_packet_len = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
1580 				I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1581 		rx_header_len = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK) >>
1582 				I40E_RXD_QW1_LENGTH_HBUF_SHIFT;
1583 		rx_sph = (qword & I40E_RXD_QW1_LENGTH_SPH_MASK) >>
1584 			 I40E_RXD_QW1_LENGTH_SPH_SHIFT;
1585 
1586 		rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >>
1587 			   I40E_RXD_QW1_ERROR_SHIFT;
1588 		rx_hbo = rx_error & (1 << I40E_RX_DESC_ERROR_HBO_SHIFT);
1589 		rx_error &= ~(1 << I40E_RX_DESC_ERROR_HBO_SHIFT);
1590 
1591 		rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
1592 			   I40E_RXD_QW1_PTYPE_SHIFT;
1593 		prefetch(rx_bi->page);
1594 		rx_bi->skb = NULL;
1595 		cleaned_count++;
1596 		if (rx_hbo || rx_sph) {
1597 			int len;
1598 			if (rx_hbo)
1599 				len = I40E_RX_HDR_SIZE;
1600 			else
1601 				len = rx_header_len;
1602 			memcpy(__skb_put(skb, len), rx_bi->hdr_buf, len);
1603 		} else if (skb->len == 0) {
1604 			int len;
1605 
1606 			len = (rx_packet_len > skb_headlen(skb) ?
1607 				skb_headlen(skb) : rx_packet_len);
1608 			memcpy(__skb_put(skb, len),
1609 			       rx_bi->page + rx_bi->page_offset,
1610 			       len);
1611 			rx_bi->page_offset += len;
1612 			rx_packet_len -= len;
1613 		}
1614 
1615 		/* Get the rest of the data if this was a header split */
1616 		if (rx_packet_len) {
1617 			skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
1618 					   rx_bi->page,
1619 					   rx_bi->page_offset,
1620 					   rx_packet_len);
1621 
1622 			skb->len += rx_packet_len;
1623 			skb->data_len += rx_packet_len;
1624 			skb->truesize += rx_packet_len;
1625 
1626 			if ((page_count(rx_bi->page) == 1) &&
1627 			    (page_to_nid(rx_bi->page) == current_node))
1628 				get_page(rx_bi->page);
1629 			else
1630 				rx_bi->page = NULL;
1631 
1632 			dma_unmap_page(rx_ring->dev,
1633 				       rx_bi->page_dma,
1634 				       PAGE_SIZE / 2,
1635 				       DMA_FROM_DEVICE);
1636 			rx_bi->page_dma = 0;
1637 		}
1638 		I40E_RX_INCREMENT(rx_ring, i);
1639 
1640 		if (unlikely(
1641 		    !(rx_status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT)))) {
1642 			struct i40e_rx_buffer *next_buffer;
1643 
1644 			next_buffer = &rx_ring->rx_bi[i];
1645 			next_buffer->skb = skb;
1646 			rx_ring->rx_stats.non_eop_descs++;
1647 			continue;
1648 		}
1649 
1650 		/* ERR_MASK will only have valid bits if EOP set */
1651 		if (unlikely(rx_error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) {
1652 			dev_kfree_skb_any(skb);
1653 			/* TODO: shouldn't we increment a counter indicating the
1654 			 * drop?
1655 			 */
1656 			continue;
1657 		}
1658 
1659 		skb_set_hash(skb, i40e_rx_hash(rx_ring, rx_desc),
1660 			     i40e_ptype_to_hash(rx_ptype));
1661 		if (unlikely(rx_status & I40E_RXD_QW1_STATUS_TSYNVALID_MASK)) {
1662 			i40e_ptp_rx_hwtstamp(vsi->back, skb, (rx_status &
1663 					   I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >>
1664 					   I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT);
1665 			rx_ring->last_rx_timestamp = jiffies;
1666 		}
1667 
1668 		/* probably a little skewed due to removing CRC */
1669 		total_rx_bytes += skb->len;
1670 		total_rx_packets++;
1671 
1672 		skb->protocol = eth_type_trans(skb, rx_ring->netdev);
1673 
1674 		i40e_rx_checksum(vsi, skb, rx_status, rx_error, rx_ptype);
1675 
1676 		vlan_tag = rx_status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)
1677 			 ? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1)
1678 			 : 0;
1679 #ifdef I40E_FCOE
1680 		if (!i40e_fcoe_handle_offload(rx_ring, rx_desc, skb)) {
1681 			dev_kfree_skb_any(skb);
1682 			continue;
1683 		}
1684 #endif
1685 		skb_mark_napi_id(skb, &rx_ring->q_vector->napi);
1686 		i40e_receive_skb(rx_ring, skb, vlan_tag);
1687 
1688 		rx_ring->netdev->last_rx = jiffies;
1689 		rx_desc->wb.qword1.status_error_len = 0;
1690 
1691 	} while (likely(total_rx_packets < budget));
1692 
1693 	u64_stats_update_begin(&rx_ring->syncp);
1694 	rx_ring->stats.packets += total_rx_packets;
1695 	rx_ring->stats.bytes += total_rx_bytes;
1696 	u64_stats_update_end(&rx_ring->syncp);
1697 	rx_ring->q_vector->rx.total_packets += total_rx_packets;
1698 	rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
1699 
1700 	return total_rx_packets;
1701 }
1702 
1703 /**
1704  * i40e_clean_rx_irq_1buf - Reclaim resources after receive; single buffer
1705  * @rx_ring:  rx ring to clean
1706  * @budget:   how many cleans we're allowed
1707  *
1708  * Returns number of packets cleaned
1709  **/
1710 static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget)
1711 {
1712 	unsigned int total_rx_bytes = 0, total_rx_packets = 0;
1713 	u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
1714 	struct i40e_vsi *vsi = rx_ring->vsi;
1715 	union i40e_rx_desc *rx_desc;
1716 	u32 rx_error, rx_status;
1717 	u16 rx_packet_len;
1718 	u8 rx_ptype;
1719 	u64 qword;
1720 	u16 i;
1721 
1722 	do {
1723 		struct i40e_rx_buffer *rx_bi;
1724 		struct sk_buff *skb;
1725 		u16 vlan_tag;
1726 		/* return some buffers to hardware, one at a time is too slow */
1727 		if (cleaned_count >= I40E_RX_BUFFER_WRITE) {
1728 			i40e_alloc_rx_buffers_1buf(rx_ring, cleaned_count);
1729 			cleaned_count = 0;
1730 		}
1731 
1732 		i = rx_ring->next_to_clean;
1733 		rx_desc = I40E_RX_DESC(rx_ring, i);
1734 		qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
1735 		rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
1736 			I40E_RXD_QW1_STATUS_SHIFT;
1737 
1738 		if (!(rx_status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)))
1739 			break;
1740 
1741 		/* This memory barrier is needed to keep us from reading
1742 		 * any other fields out of the rx_desc until we know the
1743 		 * DD bit is set.
1744 		 */
1745 		rmb();
1746 
1747 		if (i40e_rx_is_programming_status(qword)) {
1748 			i40e_clean_programming_status(rx_ring, rx_desc);
1749 			I40E_RX_INCREMENT(rx_ring, i);
1750 			continue;
1751 		}
1752 		rx_bi = &rx_ring->rx_bi[i];
1753 		skb = rx_bi->skb;
1754 		prefetch(skb->data);
1755 
1756 		rx_packet_len = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
1757 				I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1758 
1759 		rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >>
1760 			   I40E_RXD_QW1_ERROR_SHIFT;
1761 		rx_error &= ~(1 << I40E_RX_DESC_ERROR_HBO_SHIFT);
1762 
1763 		rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
1764 			   I40E_RXD_QW1_PTYPE_SHIFT;
1765 		rx_bi->skb = NULL;
1766 		cleaned_count++;
1767 
1768 		/* Get the header and possibly the whole packet
1769 		 * If this is an skb from previous receive dma will be 0
1770 		 */
1771 		skb_put(skb, rx_packet_len);
1772 		dma_unmap_single(rx_ring->dev, rx_bi->dma, rx_ring->rx_buf_len,
1773 				 DMA_FROM_DEVICE);
1774 		rx_bi->dma = 0;
1775 
1776 		I40E_RX_INCREMENT(rx_ring, i);
1777 
1778 		if (unlikely(
1779 		    !(rx_status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT)))) {
1780 			rx_ring->rx_stats.non_eop_descs++;
1781 			continue;
1782 		}
1783 
1784 		/* ERR_MASK will only have valid bits if EOP set */
1785 		if (unlikely(rx_error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) {
1786 			dev_kfree_skb_any(skb);
1787 			/* TODO: shouldn't we increment a counter indicating the
1788 			 * drop?
1789 			 */
1790 			continue;
1791 		}
1792 
1793 		skb_set_hash(skb, i40e_rx_hash(rx_ring, rx_desc),
1794 			     i40e_ptype_to_hash(rx_ptype));
1795 		if (unlikely(rx_status & I40E_RXD_QW1_STATUS_TSYNVALID_MASK)) {
1796 			i40e_ptp_rx_hwtstamp(vsi->back, skb, (rx_status &
1797 					   I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >>
1798 					   I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT);
1799 			rx_ring->last_rx_timestamp = jiffies;
1800 		}
1801 
1802 		/* probably a little skewed due to removing CRC */
1803 		total_rx_bytes += skb->len;
1804 		total_rx_packets++;
1805 
1806 		skb->protocol = eth_type_trans(skb, rx_ring->netdev);
1807 
1808 		i40e_rx_checksum(vsi, skb, rx_status, rx_error, rx_ptype);
1809 
1810 		vlan_tag = rx_status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)
1811 			 ? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1)
1812 			 : 0;
1813 #ifdef I40E_FCOE
1814 		if (!i40e_fcoe_handle_offload(rx_ring, rx_desc, skb)) {
1815 			dev_kfree_skb_any(skb);
1816 			continue;
1817 		}
1818 #endif
1819 		i40e_receive_skb(rx_ring, skb, vlan_tag);
1820 
1821 		rx_ring->netdev->last_rx = jiffies;
1822 		rx_desc->wb.qword1.status_error_len = 0;
1823 	} while (likely(total_rx_packets < budget));
1824 
1825 	u64_stats_update_begin(&rx_ring->syncp);
1826 	rx_ring->stats.packets += total_rx_packets;
1827 	rx_ring->stats.bytes += total_rx_bytes;
1828 	u64_stats_update_end(&rx_ring->syncp);
1829 	rx_ring->q_vector->rx.total_packets += total_rx_packets;
1830 	rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
1831 
1832 	return total_rx_packets;
1833 }
1834 
1835 /**
1836  * i40e_napi_poll - NAPI polling Rx/Tx cleanup routine
1837  * @napi: napi struct with our devices info in it
1838  * @budget: amount of work driver is allowed to do this pass, in packets
1839  *
1840  * This function will clean all queues associated with a q_vector.
1841  *
1842  * Returns the amount of work done
1843  **/
1844 int i40e_napi_poll(struct napi_struct *napi, int budget)
1845 {
1846 	struct i40e_q_vector *q_vector =
1847 			       container_of(napi, struct i40e_q_vector, napi);
1848 	struct i40e_vsi *vsi = q_vector->vsi;
1849 	struct i40e_ring *ring;
1850 	bool clean_complete = true;
1851 	bool arm_wb = false;
1852 	int budget_per_ring;
1853 	int cleaned;
1854 
1855 	if (test_bit(__I40E_DOWN, &vsi->state)) {
1856 		napi_complete(napi);
1857 		return 0;
1858 	}
1859 
1860 	/* Since the actual Tx work is minimal, we can give the Tx a larger
1861 	 * budget and be more aggressive about cleaning up the Tx descriptors.
1862 	 */
1863 	i40e_for_each_ring(ring, q_vector->tx) {
1864 		clean_complete &= i40e_clean_tx_irq(ring, vsi->work_limit);
1865 		arm_wb |= ring->arm_wb;
1866 	}
1867 
1868 	/* We attempt to distribute budget to each Rx queue fairly, but don't
1869 	 * allow the budget to go below 1 because that would exit polling early.
1870 	 */
1871 	budget_per_ring = max(budget/q_vector->num_ringpairs, 1);
1872 
1873 	i40e_for_each_ring(ring, q_vector->rx) {
1874 		if (ring_is_ps_enabled(ring))
1875 			cleaned = i40e_clean_rx_irq_ps(ring, budget_per_ring);
1876 		else
1877 			cleaned = i40e_clean_rx_irq_1buf(ring, budget_per_ring);
1878 		/* if we didn't clean as many as budgeted, we must be done */
1879 		clean_complete &= (budget_per_ring != cleaned);
1880 	}
1881 
1882 	/* If work not completed, return budget and polling will return */
1883 	if (!clean_complete) {
1884 		if (arm_wb)
1885 			i40e_force_wb(vsi, q_vector);
1886 		return budget;
1887 	}
1888 
1889 	/* Work is done so exit the polling mode and re-enable the interrupt */
1890 	napi_complete(napi);
1891 	if (ITR_IS_DYNAMIC(vsi->rx_itr_setting) ||
1892 	    ITR_IS_DYNAMIC(vsi->tx_itr_setting))
1893 		i40e_update_dynamic_itr(q_vector);
1894 
1895 	if (!test_bit(__I40E_DOWN, &vsi->state)) {
1896 		if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
1897 			i40e_irq_dynamic_enable(vsi,
1898 					q_vector->v_idx + vsi->base_vector);
1899 		} else {
1900 			struct i40e_hw *hw = &vsi->back->hw;
1901 			/* We re-enable the queue 0 cause, but
1902 			 * don't worry about dynamic_enable
1903 			 * because we left it on for the other
1904 			 * possible interrupts during napi
1905 			 */
1906 			u32 qval = rd32(hw, I40E_QINT_RQCTL(0));
1907 			qval |= I40E_QINT_RQCTL_CAUSE_ENA_MASK;
1908 			wr32(hw, I40E_QINT_RQCTL(0), qval);
1909 
1910 			qval = rd32(hw, I40E_QINT_TQCTL(0));
1911 			qval |= I40E_QINT_TQCTL_CAUSE_ENA_MASK;
1912 			wr32(hw, I40E_QINT_TQCTL(0), qval);
1913 
1914 			i40e_irq_dynamic_enable_icr0(vsi->back);
1915 		}
1916 	}
1917 
1918 	return 0;
1919 }
1920 
1921 /**
1922  * i40e_atr - Add a Flow Director ATR filter
1923  * @tx_ring:  ring to add programming descriptor to
1924  * @skb:      send buffer
1925  * @flags:    send flags
1926  * @protocol: wire protocol
1927  **/
1928 static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
1929 		     u32 flags, __be16 protocol)
1930 {
1931 	struct i40e_filter_program_desc *fdir_desc;
1932 	struct i40e_pf *pf = tx_ring->vsi->back;
1933 	union {
1934 		unsigned char *network;
1935 		struct iphdr *ipv4;
1936 		struct ipv6hdr *ipv6;
1937 	} hdr;
1938 	struct tcphdr *th;
1939 	unsigned int hlen;
1940 	u32 flex_ptype, dtype_cmd;
1941 	u16 i;
1942 
1943 	/* make sure ATR is enabled */
1944 	if (!(pf->flags & I40E_FLAG_FD_ATR_ENABLED))
1945 		return;
1946 
1947 	if ((pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED))
1948 		return;
1949 
1950 	/* if sampling is disabled do nothing */
1951 	if (!tx_ring->atr_sample_rate)
1952 		return;
1953 
1954 	/* snag network header to get L4 type and address */
1955 	hdr.network = skb_network_header(skb);
1956 
1957 	/* Currently only IPv4/IPv6 with TCP is supported */
1958 	if (protocol == htons(ETH_P_IP)) {
1959 		if (hdr.ipv4->protocol != IPPROTO_TCP)
1960 			return;
1961 
1962 		/* access ihl as a u8 to avoid unaligned access on ia64 */
1963 		hlen = (hdr.network[0] & 0x0F) << 2;
1964 	} else if (protocol == htons(ETH_P_IPV6)) {
1965 		if (hdr.ipv6->nexthdr != IPPROTO_TCP)
1966 			return;
1967 
1968 		hlen = sizeof(struct ipv6hdr);
1969 	} else {
1970 		return;
1971 	}
1972 
1973 	th = (struct tcphdr *)(hdr.network + hlen);
1974 
1975 	/* Due to lack of space, no more new filters can be programmed */
1976 	if (th->syn && (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED))
1977 		return;
1978 
1979 	tx_ring->atr_count++;
1980 
1981 	/* sample on all syn/fin/rst packets or once every atr sample rate */
1982 	if (!th->fin &&
1983 	    !th->syn &&
1984 	    !th->rst &&
1985 	    (tx_ring->atr_count < tx_ring->atr_sample_rate))
1986 		return;
1987 
1988 	tx_ring->atr_count = 0;
1989 
1990 	/* grab the next descriptor */
1991 	i = tx_ring->next_to_use;
1992 	fdir_desc = I40E_TX_FDIRDESC(tx_ring, i);
1993 
1994 	i++;
1995 	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
1996 
1997 	flex_ptype = (tx_ring->queue_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) &
1998 		      I40E_TXD_FLTR_QW0_QINDEX_MASK;
1999 	flex_ptype |= (protocol == htons(ETH_P_IP)) ?
2000 		      (I40E_FILTER_PCTYPE_NONF_IPV4_TCP <<
2001 		       I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) :
2002 		      (I40E_FILTER_PCTYPE_NONF_IPV6_TCP <<
2003 		       I40E_TXD_FLTR_QW0_PCTYPE_SHIFT);
2004 
2005 	flex_ptype |= tx_ring->vsi->id << I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT;
2006 
2007 	dtype_cmd = I40E_TX_DESC_DTYPE_FILTER_PROG;
2008 
2009 	dtype_cmd |= (th->fin || th->rst) ?
2010 		     (I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE <<
2011 		      I40E_TXD_FLTR_QW1_PCMD_SHIFT) :
2012 		     (I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE <<
2013 		      I40E_TXD_FLTR_QW1_PCMD_SHIFT);
2014 
2015 	dtype_cmd |= I40E_FILTER_PROGRAM_DESC_DEST_DIRECT_PACKET_QINDEX <<
2016 		     I40E_TXD_FLTR_QW1_DEST_SHIFT;
2017 
2018 	dtype_cmd |= I40E_FILTER_PROGRAM_DESC_FD_STATUS_FD_ID <<
2019 		     I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT;
2020 
2021 	dtype_cmd |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK;
2022 	dtype_cmd |=
2023 		((u32)pf->fd_atr_cnt_idx << I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
2024 		I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
2025 
2026 	fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype);
2027 	fdir_desc->rsvd = cpu_to_le32(0);
2028 	fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dtype_cmd);
2029 	fdir_desc->fd_id = cpu_to_le32(0);
2030 }
2031 
2032 /**
2033  * i40e_tx_prepare_vlan_flags - prepare generic TX VLAN tagging flags for HW
2034  * @skb:     send buffer
2035  * @tx_ring: ring to send buffer on
2036  * @flags:   the tx flags to be set
2037  *
2038  * Checks the skb and set up correspondingly several generic transmit flags
2039  * related to VLAN tagging for the HW, such as VLAN, DCB, etc.
2040  *
2041  * Returns error code indicate the frame should be dropped upon error and the
2042  * otherwise  returns 0 to indicate the flags has been set properly.
2043  **/
2044 #ifdef I40E_FCOE
2045 int i40e_tx_prepare_vlan_flags(struct sk_buff *skb,
2046 			       struct i40e_ring *tx_ring,
2047 			       u32 *flags)
2048 #else
2049 static int i40e_tx_prepare_vlan_flags(struct sk_buff *skb,
2050 				      struct i40e_ring *tx_ring,
2051 				      u32 *flags)
2052 #endif
2053 {
2054 	__be16 protocol = skb->protocol;
2055 	u32  tx_flags = 0;
2056 
2057 	/* if we have a HW VLAN tag being added, default to the HW one */
2058 	if (skb_vlan_tag_present(skb)) {
2059 		tx_flags |= skb_vlan_tag_get(skb) << I40E_TX_FLAGS_VLAN_SHIFT;
2060 		tx_flags |= I40E_TX_FLAGS_HW_VLAN;
2061 	/* else if it is a SW VLAN, check the next protocol and store the tag */
2062 	} else if (protocol == htons(ETH_P_8021Q)) {
2063 		struct vlan_hdr *vhdr, _vhdr;
2064 		vhdr = skb_header_pointer(skb, ETH_HLEN, sizeof(_vhdr), &_vhdr);
2065 		if (!vhdr)
2066 			return -EINVAL;
2067 
2068 		protocol = vhdr->h_vlan_encapsulated_proto;
2069 		tx_flags |= ntohs(vhdr->h_vlan_TCI) << I40E_TX_FLAGS_VLAN_SHIFT;
2070 		tx_flags |= I40E_TX_FLAGS_SW_VLAN;
2071 	}
2072 
2073 	if (!(tx_ring->vsi->back->flags & I40E_FLAG_DCB_ENABLED))
2074 		goto out;
2075 
2076 	/* Insert 802.1p priority into VLAN header */
2077 	if ((tx_flags & (I40E_TX_FLAGS_HW_VLAN | I40E_TX_FLAGS_SW_VLAN)) ||
2078 	    (skb->priority != TC_PRIO_CONTROL)) {
2079 		tx_flags &= ~I40E_TX_FLAGS_VLAN_PRIO_MASK;
2080 		tx_flags |= (skb->priority & 0x7) <<
2081 				I40E_TX_FLAGS_VLAN_PRIO_SHIFT;
2082 		if (tx_flags & I40E_TX_FLAGS_SW_VLAN) {
2083 			struct vlan_ethhdr *vhdr;
2084 			int rc;
2085 
2086 			rc = skb_cow_head(skb, 0);
2087 			if (rc < 0)
2088 				return rc;
2089 			vhdr = (struct vlan_ethhdr *)skb->data;
2090 			vhdr->h_vlan_TCI = htons(tx_flags >>
2091 						 I40E_TX_FLAGS_VLAN_SHIFT);
2092 		} else {
2093 			tx_flags |= I40E_TX_FLAGS_HW_VLAN;
2094 		}
2095 	}
2096 
2097 out:
2098 	*flags = tx_flags;
2099 	return 0;
2100 }
2101 
2102 /**
2103  * i40e_tso - set up the tso context descriptor
2104  * @tx_ring:  ptr to the ring to send
2105  * @skb:      ptr to the skb we're sending
2106  * @tx_flags: the collected send information
2107  * @protocol: the send protocol
2108  * @hdr_len:  ptr to the size of the packet header
2109  * @cd_tunneling: ptr to context descriptor bits
2110  *
2111  * Returns 0 if no TSO can happen, 1 if tso is going, or error
2112  **/
2113 static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb,
2114 		    u32 tx_flags, __be16 protocol, u8 *hdr_len,
2115 		    u64 *cd_type_cmd_tso_mss, u32 *cd_tunneling)
2116 {
2117 	u32 cd_cmd, cd_tso_len, cd_mss;
2118 	struct ipv6hdr *ipv6h;
2119 	struct tcphdr *tcph;
2120 	struct iphdr *iph;
2121 	u32 l4len;
2122 	int err;
2123 
2124 	if (!skb_is_gso(skb))
2125 		return 0;
2126 
2127 	err = skb_cow_head(skb, 0);
2128 	if (err < 0)
2129 		return err;
2130 
2131 	iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb);
2132 	ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb);
2133 
2134 	if (iph->version == 4) {
2135 		tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb);
2136 		iph->tot_len = 0;
2137 		iph->check = 0;
2138 		tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
2139 						 0, IPPROTO_TCP, 0);
2140 	} else if (ipv6h->version == 6) {
2141 		tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb);
2142 		ipv6h->payload_len = 0;
2143 		tcph->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
2144 					       0, IPPROTO_TCP, 0);
2145 	}
2146 
2147 	l4len = skb->encapsulation ? inner_tcp_hdrlen(skb) : tcp_hdrlen(skb);
2148 	*hdr_len = (skb->encapsulation
2149 		    ? (skb_inner_transport_header(skb) - skb->data)
2150 		    : skb_transport_offset(skb)) + l4len;
2151 
2152 	/* find the field values */
2153 	cd_cmd = I40E_TX_CTX_DESC_TSO;
2154 	cd_tso_len = skb->len - *hdr_len;
2155 	cd_mss = skb_shinfo(skb)->gso_size;
2156 	*cd_type_cmd_tso_mss |= ((u64)cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
2157 				((u64)cd_tso_len <<
2158 				 I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
2159 				((u64)cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
2160 	return 1;
2161 }
2162 
2163 /**
2164  * i40e_tsyn - set up the tsyn context descriptor
2165  * @tx_ring:  ptr to the ring to send
2166  * @skb:      ptr to the skb we're sending
2167  * @tx_flags: the collected send information
2168  *
2169  * Returns 0 if no Tx timestamp can happen and 1 if the timestamp will happen
2170  **/
2171 static int i40e_tsyn(struct i40e_ring *tx_ring, struct sk_buff *skb,
2172 		     u32 tx_flags, u64 *cd_type_cmd_tso_mss)
2173 {
2174 	struct i40e_pf *pf;
2175 
2176 	if (likely(!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)))
2177 		return 0;
2178 
2179 	/* Tx timestamps cannot be sampled when doing TSO */
2180 	if (tx_flags & I40E_TX_FLAGS_TSO)
2181 		return 0;
2182 
2183 	/* only timestamp the outbound packet if the user has requested it and
2184 	 * we are not already transmitting a packet to be timestamped
2185 	 */
2186 	pf = i40e_netdev_to_pf(tx_ring->netdev);
2187 	if (!(pf->flags & I40E_FLAG_PTP))
2188 		return 0;
2189 
2190 	if (pf->ptp_tx &&
2191 	    !test_and_set_bit_lock(__I40E_PTP_TX_IN_PROGRESS, &pf->state)) {
2192 		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
2193 		pf->ptp_tx_skb = skb_get(skb);
2194 	} else {
2195 		return 0;
2196 	}
2197 
2198 	*cd_type_cmd_tso_mss |= (u64)I40E_TX_CTX_DESC_TSYN <<
2199 				I40E_TXD_CTX_QW1_CMD_SHIFT;
2200 
2201 	return 1;
2202 }
2203 
2204 /**
2205  * i40e_tx_enable_csum - Enable Tx checksum offloads
2206  * @skb: send buffer
2207  * @tx_flags: Tx flags currently set
2208  * @td_cmd: Tx descriptor command bits to set
2209  * @td_offset: Tx descriptor header offsets to set
2210  * @cd_tunneling: ptr to context desc bits
2211  **/
2212 static void i40e_tx_enable_csum(struct sk_buff *skb, u32 tx_flags,
2213 				u32 *td_cmd, u32 *td_offset,
2214 				struct i40e_ring *tx_ring,
2215 				u32 *cd_tunneling)
2216 {
2217 	struct ipv6hdr *this_ipv6_hdr;
2218 	unsigned int this_tcp_hdrlen;
2219 	struct iphdr *this_ip_hdr;
2220 	u32 network_hdr_len;
2221 	u8 l4_hdr = 0;
2222 	u32 l4_tunnel = 0;
2223 
2224 	if (skb->encapsulation) {
2225 		switch (ip_hdr(skb)->protocol) {
2226 		case IPPROTO_UDP:
2227 			l4_tunnel = I40E_TXD_CTX_UDP_TUNNELING;
2228 			break;
2229 		default:
2230 			return;
2231 		}
2232 		network_hdr_len = skb_inner_network_header_len(skb);
2233 		this_ip_hdr = inner_ip_hdr(skb);
2234 		this_ipv6_hdr = inner_ipv6_hdr(skb);
2235 		this_tcp_hdrlen = inner_tcp_hdrlen(skb);
2236 
2237 		if (tx_flags & I40E_TX_FLAGS_IPV4) {
2238 
2239 			if (tx_flags & I40E_TX_FLAGS_TSO) {
2240 				*cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV4;
2241 				ip_hdr(skb)->check = 0;
2242 			} else {
2243 				*cd_tunneling |=
2244 					 I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM;
2245 			}
2246 		} else if (tx_flags & I40E_TX_FLAGS_IPV6) {
2247 			*cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6;
2248 			if (tx_flags & I40E_TX_FLAGS_TSO)
2249 				ip_hdr(skb)->check = 0;
2250 		}
2251 
2252 		/* Now set the ctx descriptor fields */
2253 		*cd_tunneling |= (skb_network_header_len(skb) >> 2) <<
2254 				   I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT      |
2255 				   l4_tunnel                             |
2256 				   ((skb_inner_network_offset(skb) -
2257 					skb_transport_offset(skb)) >> 1) <<
2258 				   I40E_TXD_CTX_QW0_NATLEN_SHIFT;
2259 		if (this_ip_hdr->version == 6) {
2260 			tx_flags &= ~I40E_TX_FLAGS_IPV4;
2261 			tx_flags |= I40E_TX_FLAGS_IPV6;
2262 		}
2263 	} else {
2264 		network_hdr_len = skb_network_header_len(skb);
2265 		this_ip_hdr = ip_hdr(skb);
2266 		this_ipv6_hdr = ipv6_hdr(skb);
2267 		this_tcp_hdrlen = tcp_hdrlen(skb);
2268 	}
2269 
2270 	/* Enable IP checksum offloads */
2271 	if (tx_flags & I40E_TX_FLAGS_IPV4) {
2272 		l4_hdr = this_ip_hdr->protocol;
2273 		/* the stack computes the IP header already, the only time we
2274 		 * need the hardware to recompute it is in the case of TSO.
2275 		 */
2276 		if (tx_flags & I40E_TX_FLAGS_TSO) {
2277 			*td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
2278 			this_ip_hdr->check = 0;
2279 		} else {
2280 			*td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4;
2281 		}
2282 		/* Now set the td_offset for IP header length */
2283 		*td_offset = (network_hdr_len >> 2) <<
2284 			      I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
2285 	} else if (tx_flags & I40E_TX_FLAGS_IPV6) {
2286 		l4_hdr = this_ipv6_hdr->nexthdr;
2287 		*td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
2288 		/* Now set the td_offset for IP header length */
2289 		*td_offset = (network_hdr_len >> 2) <<
2290 			      I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
2291 	}
2292 	/* words in MACLEN + dwords in IPLEN + dwords in L4Len */
2293 	*td_offset |= (skb_network_offset(skb) >> 1) <<
2294 		       I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
2295 
2296 	/* Enable L4 checksum offloads */
2297 	switch (l4_hdr) {
2298 	case IPPROTO_TCP:
2299 		/* enable checksum offloads */
2300 		*td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
2301 		*td_offset |= (this_tcp_hdrlen >> 2) <<
2302 			       I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
2303 		break;
2304 	case IPPROTO_SCTP:
2305 		/* enable SCTP checksum offload */
2306 		*td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
2307 		*td_offset |= (sizeof(struct sctphdr) >> 2) <<
2308 			       I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
2309 		break;
2310 	case IPPROTO_UDP:
2311 		/* enable UDP checksum offload */
2312 		*td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
2313 		*td_offset |= (sizeof(struct udphdr) >> 2) <<
2314 			       I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
2315 		break;
2316 	default:
2317 		break;
2318 	}
2319 }
2320 
2321 /**
2322  * i40e_create_tx_ctx Build the Tx context descriptor
2323  * @tx_ring:  ring to create the descriptor on
2324  * @cd_type_cmd_tso_mss: Quad Word 1
2325  * @cd_tunneling: Quad Word 0 - bits 0-31
2326  * @cd_l2tag2: Quad Word 0 - bits 32-63
2327  **/
2328 static void i40e_create_tx_ctx(struct i40e_ring *tx_ring,
2329 			       const u64 cd_type_cmd_tso_mss,
2330 			       const u32 cd_tunneling, const u32 cd_l2tag2)
2331 {
2332 	struct i40e_tx_context_desc *context_desc;
2333 	int i = tx_ring->next_to_use;
2334 
2335 	if ((cd_type_cmd_tso_mss == I40E_TX_DESC_DTYPE_CONTEXT) &&
2336 	    !cd_tunneling && !cd_l2tag2)
2337 		return;
2338 
2339 	/* grab the next descriptor */
2340 	context_desc = I40E_TX_CTXTDESC(tx_ring, i);
2341 
2342 	i++;
2343 	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
2344 
2345 	/* cpu_to_le32 and assign to struct fields */
2346 	context_desc->tunneling_params = cpu_to_le32(cd_tunneling);
2347 	context_desc->l2tag2 = cpu_to_le16(cd_l2tag2);
2348 	context_desc->rsvd = cpu_to_le16(0);
2349 	context_desc->type_cmd_tso_mss = cpu_to_le64(cd_type_cmd_tso_mss);
2350 }
2351 
2352 /**
2353  * __i40e_maybe_stop_tx - 2nd level check for tx stop conditions
2354  * @tx_ring: the ring to be checked
2355  * @size:    the size buffer we want to assure is available
2356  *
2357  * Returns -EBUSY if a stop is needed, else 0
2358  **/
2359 static inline int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
2360 {
2361 	netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
2362 	/* Memory barrier before checking head and tail */
2363 	smp_mb();
2364 
2365 	/* Check again in a case another CPU has just made room available. */
2366 	if (likely(I40E_DESC_UNUSED(tx_ring) < size))
2367 		return -EBUSY;
2368 
2369 	/* A reprieve! - use start_queue because it doesn't call schedule */
2370 	netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index);
2371 	++tx_ring->tx_stats.restart_queue;
2372 	return 0;
2373 }
2374 
2375 /**
2376  * i40e_maybe_stop_tx - 1st level check for tx stop conditions
2377  * @tx_ring: the ring to be checked
2378  * @size:    the size buffer we want to assure is available
2379  *
2380  * Returns 0 if stop is not needed
2381  **/
2382 #ifdef I40E_FCOE
2383 int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
2384 #else
2385 static int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
2386 #endif
2387 {
2388 	if (likely(I40E_DESC_UNUSED(tx_ring) >= size))
2389 		return 0;
2390 	return __i40e_maybe_stop_tx(tx_ring, size);
2391 }
2392 
2393 /**
2394  * i40e_chk_linearize - Check if there are more than 8 fragments per packet
2395  * @skb:      send buffer
2396  * @tx_flags: collected send information
2397  * @hdr_len:  size of the packet header
2398  *
2399  * Note: Our HW can't scatter-gather more than 8 fragments to build
2400  * a packet on the wire and so we need to figure out the cases where we
2401  * need to linearize the skb.
2402  **/
2403 static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags,
2404 			       const u8 hdr_len)
2405 {
2406 	struct skb_frag_struct *frag;
2407 	bool linearize = false;
2408 	unsigned int size = 0;
2409 	u16 num_frags;
2410 	u16 gso_segs;
2411 
2412 	num_frags = skb_shinfo(skb)->nr_frags;
2413 	gso_segs = skb_shinfo(skb)->gso_segs;
2414 
2415 	if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO)) {
2416 		u16 j = 1;
2417 
2418 		if (num_frags < (I40E_MAX_BUFFER_TXD))
2419 			goto linearize_chk_done;
2420 		/* try the simple math, if we have too many frags per segment */
2421 		if (DIV_ROUND_UP((num_frags + gso_segs), gso_segs) >
2422 		    I40E_MAX_BUFFER_TXD) {
2423 			linearize = true;
2424 			goto linearize_chk_done;
2425 		}
2426 		frag = &skb_shinfo(skb)->frags[0];
2427 		size = hdr_len;
2428 		/* we might still have more fragments per segment */
2429 		do {
2430 			size += skb_frag_size(frag);
2431 			frag++; j++;
2432 			if (j == I40E_MAX_BUFFER_TXD) {
2433 				if (size < skb_shinfo(skb)->gso_size) {
2434 					linearize = true;
2435 					break;
2436 				}
2437 				j = 1;
2438 				size -= skb_shinfo(skb)->gso_size;
2439 				if (size)
2440 					j++;
2441 				size += hdr_len;
2442 			}
2443 			num_frags--;
2444 		} while (num_frags);
2445 	} else {
2446 		if (num_frags >= I40E_MAX_BUFFER_TXD)
2447 			linearize = true;
2448 	}
2449 
2450 linearize_chk_done:
2451 	return linearize;
2452 }
2453 
2454 /**
2455  * i40e_tx_map - Build the Tx descriptor
2456  * @tx_ring:  ring to send buffer on
2457  * @skb:      send buffer
2458  * @first:    first buffer info buffer to use
2459  * @tx_flags: collected send information
2460  * @hdr_len:  size of the packet header
2461  * @td_cmd:   the command field in the descriptor
2462  * @td_offset: offset for checksum or crc
2463  **/
2464 #ifdef I40E_FCOE
2465 void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
2466 		 struct i40e_tx_buffer *first, u32 tx_flags,
2467 		 const u8 hdr_len, u32 td_cmd, u32 td_offset)
2468 #else
2469 static void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
2470 			struct i40e_tx_buffer *first, u32 tx_flags,
2471 			const u8 hdr_len, u32 td_cmd, u32 td_offset)
2472 #endif
2473 {
2474 	unsigned int data_len = skb->data_len;
2475 	unsigned int size = skb_headlen(skb);
2476 	struct skb_frag_struct *frag;
2477 	struct i40e_tx_buffer *tx_bi;
2478 	struct i40e_tx_desc *tx_desc;
2479 	u16 i = tx_ring->next_to_use;
2480 	u32 td_tag = 0;
2481 	dma_addr_t dma;
2482 	u16 gso_segs;
2483 
2484 	if (tx_flags & I40E_TX_FLAGS_HW_VLAN) {
2485 		td_cmd |= I40E_TX_DESC_CMD_IL2TAG1;
2486 		td_tag = (tx_flags & I40E_TX_FLAGS_VLAN_MASK) >>
2487 			 I40E_TX_FLAGS_VLAN_SHIFT;
2488 	}
2489 
2490 	if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO))
2491 		gso_segs = skb_shinfo(skb)->gso_segs;
2492 	else
2493 		gso_segs = 1;
2494 
2495 	/* multiply data chunks by size of headers */
2496 	first->bytecount = skb->len - hdr_len + (gso_segs * hdr_len);
2497 	first->gso_segs = gso_segs;
2498 	first->skb = skb;
2499 	first->tx_flags = tx_flags;
2500 
2501 	dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
2502 
2503 	tx_desc = I40E_TX_DESC(tx_ring, i);
2504 	tx_bi = first;
2505 
2506 	for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
2507 		if (dma_mapping_error(tx_ring->dev, dma))
2508 			goto dma_error;
2509 
2510 		/* record length, and DMA address */
2511 		dma_unmap_len_set(tx_bi, len, size);
2512 		dma_unmap_addr_set(tx_bi, dma, dma);
2513 
2514 		tx_desc->buffer_addr = cpu_to_le64(dma);
2515 
2516 		while (unlikely(size > I40E_MAX_DATA_PER_TXD)) {
2517 			tx_desc->cmd_type_offset_bsz =
2518 				build_ctob(td_cmd, td_offset,
2519 					   I40E_MAX_DATA_PER_TXD, td_tag);
2520 
2521 			tx_desc++;
2522 			i++;
2523 			if (i == tx_ring->count) {
2524 				tx_desc = I40E_TX_DESC(tx_ring, 0);
2525 				i = 0;
2526 			}
2527 
2528 			dma += I40E_MAX_DATA_PER_TXD;
2529 			size -= I40E_MAX_DATA_PER_TXD;
2530 
2531 			tx_desc->buffer_addr = cpu_to_le64(dma);
2532 		}
2533 
2534 		if (likely(!data_len))
2535 			break;
2536 
2537 		tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset,
2538 							  size, td_tag);
2539 
2540 		tx_desc++;
2541 		i++;
2542 		if (i == tx_ring->count) {
2543 			tx_desc = I40E_TX_DESC(tx_ring, 0);
2544 			i = 0;
2545 		}
2546 
2547 		size = skb_frag_size(frag);
2548 		data_len -= size;
2549 
2550 		dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size,
2551 				       DMA_TO_DEVICE);
2552 
2553 		tx_bi = &tx_ring->tx_bi[i];
2554 	}
2555 
2556 	/* Place RS bit on last descriptor of any packet that spans across the
2557 	 * 4th descriptor (WB_STRIDE aka 0x3) in a 64B cacheline.
2558 	 */
2559 	if (((i & WB_STRIDE) != WB_STRIDE) &&
2560 	    (first <= &tx_ring->tx_bi[i]) &&
2561 	    (first >= &tx_ring->tx_bi[i & ~WB_STRIDE])) {
2562 		tx_desc->cmd_type_offset_bsz =
2563 			build_ctob(td_cmd, td_offset, size, td_tag) |
2564 			cpu_to_le64((u64)I40E_TX_DESC_CMD_EOP <<
2565 					 I40E_TXD_QW1_CMD_SHIFT);
2566 	} else {
2567 		tx_desc->cmd_type_offset_bsz =
2568 			build_ctob(td_cmd, td_offset, size, td_tag) |
2569 			cpu_to_le64((u64)I40E_TXD_CMD <<
2570 					 I40E_TXD_QW1_CMD_SHIFT);
2571 	}
2572 
2573 	netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev,
2574 						 tx_ring->queue_index),
2575 			     first->bytecount);
2576 
2577 	/* set the timestamp */
2578 	first->time_stamp = jiffies;
2579 
2580 	/* Force memory writes to complete before letting h/w
2581 	 * know there are new descriptors to fetch.  (Only
2582 	 * applicable for weak-ordered memory model archs,
2583 	 * such as IA-64).
2584 	 */
2585 	wmb();
2586 
2587 	/* set next_to_watch value indicating a packet is present */
2588 	first->next_to_watch = tx_desc;
2589 
2590 	i++;
2591 	if (i == tx_ring->count)
2592 		i = 0;
2593 
2594 	tx_ring->next_to_use = i;
2595 
2596 	i40e_maybe_stop_tx(tx_ring, DESC_NEEDED);
2597 	/* notify HW of packet */
2598 	if (!skb->xmit_more ||
2599 	    netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev,
2600 						   tx_ring->queue_index)))
2601 		writel(i, tx_ring->tail);
2602 
2603 	return;
2604 
2605 dma_error:
2606 	dev_info(tx_ring->dev, "TX DMA map failed\n");
2607 
2608 	/* clear dma mappings for failed tx_bi map */
2609 	for (;;) {
2610 		tx_bi = &tx_ring->tx_bi[i];
2611 		i40e_unmap_and_free_tx_resource(tx_ring, tx_bi);
2612 		if (tx_bi == first)
2613 			break;
2614 		if (i == 0)
2615 			i = tx_ring->count;
2616 		i--;
2617 	}
2618 
2619 	tx_ring->next_to_use = i;
2620 }
2621 
2622 /**
2623  * i40e_xmit_descriptor_count - calculate number of tx descriptors needed
2624  * @skb:     send buffer
2625  * @tx_ring: ring to send buffer on
2626  *
2627  * Returns number of data descriptors needed for this skb. Returns 0 to indicate
2628  * there is not enough descriptors available in this ring since we need at least
2629  * one descriptor.
2630  **/
2631 #ifdef I40E_FCOE
2632 int i40e_xmit_descriptor_count(struct sk_buff *skb,
2633 			       struct i40e_ring *tx_ring)
2634 #else
2635 static int i40e_xmit_descriptor_count(struct sk_buff *skb,
2636 				      struct i40e_ring *tx_ring)
2637 #endif
2638 {
2639 	unsigned int f;
2640 	int count = 0;
2641 
2642 	/* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD,
2643 	 *       + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD,
2644 	 *       + 4 desc gap to avoid the cache line where head is,
2645 	 *       + 1 desc for context descriptor,
2646 	 * otherwise try next time
2647 	 */
2648 	for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
2649 		count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size);
2650 
2651 	count += TXD_USE_COUNT(skb_headlen(skb));
2652 	if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) {
2653 		tx_ring->tx_stats.tx_busy++;
2654 		return 0;
2655 	}
2656 	return count;
2657 }
2658 
2659 /**
2660  * i40e_xmit_frame_ring - Sends buffer on Tx ring
2661  * @skb:     send buffer
2662  * @tx_ring: ring to send buffer on
2663  *
2664  * Returns NETDEV_TX_OK if sent, else an error code
2665  **/
2666 static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
2667 					struct i40e_ring *tx_ring)
2668 {
2669 	u64 cd_type_cmd_tso_mss = I40E_TX_DESC_DTYPE_CONTEXT;
2670 	u32 cd_tunneling = 0, cd_l2tag2 = 0;
2671 	struct i40e_tx_buffer *first;
2672 	u32 td_offset = 0;
2673 	u32 tx_flags = 0;
2674 	__be16 protocol;
2675 	u32 td_cmd = 0;
2676 	u8 hdr_len = 0;
2677 	int tsyn;
2678 	int tso;
2679 	if (0 == i40e_xmit_descriptor_count(skb, tx_ring))
2680 		return NETDEV_TX_BUSY;
2681 
2682 	/* prepare the xmit flags */
2683 	if (i40e_tx_prepare_vlan_flags(skb, tx_ring, &tx_flags))
2684 		goto out_drop;
2685 
2686 	/* obtain protocol of skb */
2687 	protocol = vlan_get_protocol(skb);
2688 
2689 	/* record the location of the first descriptor for this packet */
2690 	first = &tx_ring->tx_bi[tx_ring->next_to_use];
2691 
2692 	/* setup IPv4/IPv6 offloads */
2693 	if (protocol == htons(ETH_P_IP))
2694 		tx_flags |= I40E_TX_FLAGS_IPV4;
2695 	else if (protocol == htons(ETH_P_IPV6))
2696 		tx_flags |= I40E_TX_FLAGS_IPV6;
2697 
2698 	tso = i40e_tso(tx_ring, skb, tx_flags, protocol, &hdr_len,
2699 		       &cd_type_cmd_tso_mss, &cd_tunneling);
2700 
2701 	if (tso < 0)
2702 		goto out_drop;
2703 	else if (tso)
2704 		tx_flags |= I40E_TX_FLAGS_TSO;
2705 
2706 	tsyn = i40e_tsyn(tx_ring, skb, tx_flags, &cd_type_cmd_tso_mss);
2707 
2708 	if (tsyn)
2709 		tx_flags |= I40E_TX_FLAGS_TSYN;
2710 
2711 	if (i40e_chk_linearize(skb, tx_flags, hdr_len))
2712 		if (skb_linearize(skb))
2713 			goto out_drop;
2714 
2715 	skb_tx_timestamp(skb);
2716 
2717 	/* always enable CRC insertion offload */
2718 	td_cmd |= I40E_TX_DESC_CMD_ICRC;
2719 
2720 	/* Always offload the checksum, since it's in the data descriptor */
2721 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
2722 		tx_flags |= I40E_TX_FLAGS_CSUM;
2723 
2724 		i40e_tx_enable_csum(skb, tx_flags, &td_cmd, &td_offset,
2725 				    tx_ring, &cd_tunneling);
2726 	}
2727 
2728 	i40e_create_tx_ctx(tx_ring, cd_type_cmd_tso_mss,
2729 			   cd_tunneling, cd_l2tag2);
2730 
2731 	/* Add Flow Director ATR if it's enabled.
2732 	 *
2733 	 * NOTE: this must always be directly before the data descriptor.
2734 	 */
2735 	i40e_atr(tx_ring, skb, tx_flags, protocol);
2736 
2737 	i40e_tx_map(tx_ring, skb, first, tx_flags, hdr_len,
2738 		    td_cmd, td_offset);
2739 
2740 	return NETDEV_TX_OK;
2741 
2742 out_drop:
2743 	dev_kfree_skb_any(skb);
2744 	return NETDEV_TX_OK;
2745 }
2746 
2747 /**
2748  * i40e_lan_xmit_frame - Selects the correct VSI and Tx queue to send buffer
2749  * @skb:    send buffer
2750  * @netdev: network interface device structure
2751  *
2752  * Returns NETDEV_TX_OK if sent, else an error code
2753  **/
2754 netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
2755 {
2756 	struct i40e_netdev_priv *np = netdev_priv(netdev);
2757 	struct i40e_vsi *vsi = np->vsi;
2758 	struct i40e_ring *tx_ring = vsi->tx_rings[skb->queue_mapping];
2759 
2760 	/* hardware can't handle really short frames, hardware padding works
2761 	 * beyond this point
2762 	 */
2763 	if (skb_put_padto(skb, I40E_MIN_TX_LEN))
2764 		return NETDEV_TX_OK;
2765 
2766 	return i40e_xmit_frame_ring(skb, tx_ring);
2767 }
2768