1 /*******************************************************************************
2  *
3  * Intel Ethernet Controller XL710 Family Linux Driver
4  * Copyright(c) 2013 - 2014 Intel Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License along
16  * with this program.  If not, see <http://www.gnu.org/licenses/>.
17  *
18  * The full GNU General Public License is included in this distribution in
19  * the file called "COPYING".
20  *
21  * Contact Information:
22  * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
23  * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
24  *
25  ******************************************************************************/
26 
27 #include <linux/prefetch.h>
28 #include "i40e.h"
29 #include "i40e_prototype.h"
30 
31 static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size,
32 				u32 td_tag)
33 {
34 	return cpu_to_le64(I40E_TX_DESC_DTYPE_DATA |
35 			   ((u64)td_cmd  << I40E_TXD_QW1_CMD_SHIFT) |
36 			   ((u64)td_offset << I40E_TXD_QW1_OFFSET_SHIFT) |
37 			   ((u64)size  << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) |
38 			   ((u64)td_tag  << I40E_TXD_QW1_L2TAG1_SHIFT));
39 }
40 
41 #define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
42 #define I40E_FD_CLEAN_DELAY 10
43 /**
44  * i40e_program_fdir_filter - Program a Flow Director filter
45  * @fdir_data: Packet data that will be filter parameters
46  * @raw_packet: the pre-allocated packet buffer for FDir
47  * @pf: The pf pointer
48  * @add: True for add/update, False for remove
49  **/
50 int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, u8 *raw_packet,
51 			     struct i40e_pf *pf, bool add)
52 {
53 	struct i40e_filter_program_desc *fdir_desc;
54 	struct i40e_tx_buffer *tx_buf, *first;
55 	struct i40e_tx_desc *tx_desc;
56 	struct i40e_ring *tx_ring;
57 	unsigned int fpt, dcc;
58 	struct i40e_vsi *vsi;
59 	struct device *dev;
60 	dma_addr_t dma;
61 	u32 td_cmd = 0;
62 	u16 delay = 0;
63 	u16 i;
64 
65 	/* find existing FDIR VSI */
66 	vsi = NULL;
67 	for (i = 0; i < pf->num_alloc_vsi; i++)
68 		if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR)
69 			vsi = pf->vsi[i];
70 	if (!vsi)
71 		return -ENOENT;
72 
73 	tx_ring = vsi->tx_rings[0];
74 	dev = tx_ring->dev;
75 
76 	/* we need two descriptors to add/del a filter and we can wait */
77 	do {
78 		if (I40E_DESC_UNUSED(tx_ring) > 1)
79 			break;
80 		msleep_interruptible(1);
81 		delay++;
82 	} while (delay < I40E_FD_CLEAN_DELAY);
83 
84 	if (!(I40E_DESC_UNUSED(tx_ring) > 1))
85 		return -EAGAIN;
86 
87 	dma = dma_map_single(dev, raw_packet,
88 			     I40E_FDIR_MAX_RAW_PACKET_SIZE, DMA_TO_DEVICE);
89 	if (dma_mapping_error(dev, dma))
90 		goto dma_fail;
91 
92 	/* grab the next descriptor */
93 	i = tx_ring->next_to_use;
94 	fdir_desc = I40E_TX_FDIRDESC(tx_ring, i);
95 	first = &tx_ring->tx_bi[i];
96 	memset(first, 0, sizeof(struct i40e_tx_buffer));
97 
98 	tx_ring->next_to_use = ((i + 1) < tx_ring->count) ? i + 1 : 0;
99 
100 	fpt = (fdir_data->q_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) &
101 	      I40E_TXD_FLTR_QW0_QINDEX_MASK;
102 
103 	fpt |= (fdir_data->flex_off << I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT) &
104 	       I40E_TXD_FLTR_QW0_FLEXOFF_MASK;
105 
106 	fpt |= (fdir_data->pctype << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) &
107 	       I40E_TXD_FLTR_QW0_PCTYPE_MASK;
108 
109 	/* Use LAN VSI Id if not programmed by user */
110 	if (fdir_data->dest_vsi == 0)
111 		fpt |= (pf->vsi[pf->lan_vsi]->id) <<
112 		       I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT;
113 	else
114 		fpt |= ((u32)fdir_data->dest_vsi <<
115 			I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT) &
116 		       I40E_TXD_FLTR_QW0_DEST_VSI_MASK;
117 
118 	dcc = I40E_TX_DESC_DTYPE_FILTER_PROG;
119 
120 	if (add)
121 		dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE <<
122 		       I40E_TXD_FLTR_QW1_PCMD_SHIFT;
123 	else
124 		dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE <<
125 		       I40E_TXD_FLTR_QW1_PCMD_SHIFT;
126 
127 	dcc |= (fdir_data->dest_ctl << I40E_TXD_FLTR_QW1_DEST_SHIFT) &
128 	       I40E_TXD_FLTR_QW1_DEST_MASK;
129 
130 	dcc |= (fdir_data->fd_status << I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT) &
131 	       I40E_TXD_FLTR_QW1_FD_STATUS_MASK;
132 
133 	if (fdir_data->cnt_index != 0) {
134 		dcc |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK;
135 		dcc |= ((u32)fdir_data->cnt_index <<
136 			I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
137 			I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
138 	}
139 
140 	fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(fpt);
141 	fdir_desc->rsvd = cpu_to_le32(0);
142 	fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dcc);
143 	fdir_desc->fd_id = cpu_to_le32(fdir_data->fd_id);
144 
145 	/* Now program a dummy descriptor */
146 	i = tx_ring->next_to_use;
147 	tx_desc = I40E_TX_DESC(tx_ring, i);
148 	tx_buf = &tx_ring->tx_bi[i];
149 
150 	tx_ring->next_to_use = ((i + 1) < tx_ring->count) ? i + 1 : 0;
151 
152 	memset(tx_buf, 0, sizeof(struct i40e_tx_buffer));
153 
154 	/* record length, and DMA address */
155 	dma_unmap_len_set(tx_buf, len, I40E_FDIR_MAX_RAW_PACKET_SIZE);
156 	dma_unmap_addr_set(tx_buf, dma, dma);
157 
158 	tx_desc->buffer_addr = cpu_to_le64(dma);
159 	td_cmd = I40E_TXD_CMD | I40E_TX_DESC_CMD_DUMMY;
160 
161 	tx_buf->tx_flags = I40E_TX_FLAGS_FD_SB;
162 	tx_buf->raw_buf = (void *)raw_packet;
163 
164 	tx_desc->cmd_type_offset_bsz =
165 		build_ctob(td_cmd, 0, I40E_FDIR_MAX_RAW_PACKET_SIZE, 0);
166 
167 	/* set the timestamp */
168 	tx_buf->time_stamp = jiffies;
169 
170 	/* Force memory writes to complete before letting h/w
171 	 * know there are new descriptors to fetch.
172 	 */
173 	wmb();
174 
175 	/* Mark the data descriptor to be watched */
176 	first->next_to_watch = tx_desc;
177 
178 	writel(tx_ring->next_to_use, tx_ring->tail);
179 	return 0;
180 
181 dma_fail:
182 	return -1;
183 }
184 
185 #define IP_HEADER_OFFSET 14
186 #define I40E_UDPIP_DUMMY_PACKET_LEN 42
187 /**
188  * i40e_add_del_fdir_udpv4 - Add/Remove UDPv4 filters
189  * @vsi: pointer to the targeted VSI
190  * @fd_data: the flow director data required for the FDir descriptor
191  * @add: true adds a filter, false removes it
192  *
193  * Returns 0 if the filters were successfully added or removed
194  **/
195 static int i40e_add_del_fdir_udpv4(struct i40e_vsi *vsi,
196 				   struct i40e_fdir_filter *fd_data,
197 				   bool add)
198 {
199 	struct i40e_pf *pf = vsi->back;
200 	struct udphdr *udp;
201 	struct iphdr *ip;
202 	bool err = false;
203 	u8 *raw_packet;
204 	int ret;
205 	static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
206 		0x45, 0, 0, 0x1c, 0, 0, 0x40, 0, 0x40, 0x11, 0, 0, 0, 0, 0, 0,
207 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
208 
209 	raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
210 	if (!raw_packet)
211 		return -ENOMEM;
212 	memcpy(raw_packet, packet, I40E_UDPIP_DUMMY_PACKET_LEN);
213 
214 	ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
215 	udp = (struct udphdr *)(raw_packet + IP_HEADER_OFFSET
216 	      + sizeof(struct iphdr));
217 
218 	ip->daddr = fd_data->dst_ip[0];
219 	udp->dest = fd_data->dst_port;
220 	ip->saddr = fd_data->src_ip[0];
221 	udp->source = fd_data->src_port;
222 
223 	fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_UDP;
224 	ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
225 	if (ret) {
226 		dev_info(&pf->pdev->dev,
227 			 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
228 			 fd_data->pctype, fd_data->fd_id, ret);
229 		err = true;
230 	} else {
231 		if (add)
232 			dev_info(&pf->pdev->dev,
233 				 "Filter OK for PCTYPE %d loc = %d\n",
234 				 fd_data->pctype, fd_data->fd_id);
235 		else
236 			dev_info(&pf->pdev->dev,
237 				 "Filter deleted for PCTYPE %d loc = %d\n",
238 				 fd_data->pctype, fd_data->fd_id);
239 	}
240 	return err ? -EOPNOTSUPP : 0;
241 }
242 
243 #define I40E_TCPIP_DUMMY_PACKET_LEN 54
244 /**
245  * i40e_add_del_fdir_tcpv4 - Add/Remove TCPv4 filters
246  * @vsi: pointer to the targeted VSI
247  * @fd_data: the flow director data required for the FDir descriptor
248  * @add: true adds a filter, false removes it
249  *
250  * Returns 0 if the filters were successfully added or removed
251  **/
252 static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi,
253 				   struct i40e_fdir_filter *fd_data,
254 				   bool add)
255 {
256 	struct i40e_pf *pf = vsi->back;
257 	struct tcphdr *tcp;
258 	struct iphdr *ip;
259 	bool err = false;
260 	u8 *raw_packet;
261 	int ret;
262 	/* Dummy packet */
263 	static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
264 		0x45, 0, 0, 0x28, 0, 0, 0x40, 0, 0x40, 0x6, 0, 0, 0, 0, 0, 0,
265 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x80, 0x11,
266 		0x0, 0x72, 0, 0, 0, 0};
267 
268 	raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
269 	if (!raw_packet)
270 		return -ENOMEM;
271 	memcpy(raw_packet, packet, I40E_TCPIP_DUMMY_PACKET_LEN);
272 
273 	ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
274 	tcp = (struct tcphdr *)(raw_packet + IP_HEADER_OFFSET
275 	      + sizeof(struct iphdr));
276 
277 	ip->daddr = fd_data->dst_ip[0];
278 	tcp->dest = fd_data->dst_port;
279 	ip->saddr = fd_data->src_ip[0];
280 	tcp->source = fd_data->src_port;
281 
282 	if (add) {
283 		pf->fd_tcp_rule++;
284 		if (pf->flags & I40E_FLAG_FD_ATR_ENABLED) {
285 			dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n");
286 			pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED;
287 		}
288 	} else {
289 		pf->fd_tcp_rule = (pf->fd_tcp_rule > 0) ?
290 				  (pf->fd_tcp_rule - 1) : 0;
291 		if (pf->fd_tcp_rule == 0) {
292 			pf->flags |= I40E_FLAG_FD_ATR_ENABLED;
293 			dev_info(&pf->pdev->dev, "ATR re-enabled due to no sideband TCP/IPv4 rules\n");
294 		}
295 	}
296 
297 	fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP;
298 	ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
299 
300 	if (ret) {
301 		dev_info(&pf->pdev->dev,
302 			 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
303 			 fd_data->pctype, fd_data->fd_id, ret);
304 		err = true;
305 	} else {
306 		if (add)
307 			dev_info(&pf->pdev->dev, "Filter OK for PCTYPE %d loc = %d)\n",
308 				 fd_data->pctype, fd_data->fd_id);
309 		else
310 			dev_info(&pf->pdev->dev,
311 				 "Filter deleted for PCTYPE %d loc = %d\n",
312 				 fd_data->pctype, fd_data->fd_id);
313 	}
314 
315 	return err ? -EOPNOTSUPP : 0;
316 }
317 
318 /**
319  * i40e_add_del_fdir_sctpv4 - Add/Remove SCTPv4 Flow Director filters for
320  * a specific flow spec
321  * @vsi: pointer to the targeted VSI
322  * @fd_data: the flow director data required for the FDir descriptor
323  * @add: true adds a filter, false removes it
324  *
325  * Always returns -EOPNOTSUPP
326  **/
327 static int i40e_add_del_fdir_sctpv4(struct i40e_vsi *vsi,
328 				    struct i40e_fdir_filter *fd_data,
329 				    bool add)
330 {
331 	return -EOPNOTSUPP;
332 }
333 
334 #define I40E_IP_DUMMY_PACKET_LEN 34
335 /**
336  * i40e_add_del_fdir_ipv4 - Add/Remove IPv4 Flow Director filters for
337  * a specific flow spec
338  * @vsi: pointer to the targeted VSI
339  * @fd_data: the flow director data required for the FDir descriptor
340  * @add: true adds a filter, false removes it
341  *
342  * Returns 0 if the filters were successfully added or removed
343  **/
344 static int i40e_add_del_fdir_ipv4(struct i40e_vsi *vsi,
345 				  struct i40e_fdir_filter *fd_data,
346 				  bool add)
347 {
348 	struct i40e_pf *pf = vsi->back;
349 	struct iphdr *ip;
350 	bool err = false;
351 	u8 *raw_packet;
352 	int ret;
353 	int i;
354 	static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
355 		0x45, 0, 0, 0x14, 0, 0, 0x40, 0, 0x40, 0x10, 0, 0, 0, 0, 0, 0,
356 		0, 0, 0, 0};
357 
358 	for (i = I40E_FILTER_PCTYPE_NONF_IPV4_OTHER;
359 	     i <= I40E_FILTER_PCTYPE_FRAG_IPV4;	i++) {
360 		raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
361 		if (!raw_packet)
362 			return -ENOMEM;
363 		memcpy(raw_packet, packet, I40E_IP_DUMMY_PACKET_LEN);
364 		ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
365 
366 		ip->saddr = fd_data->src_ip[0];
367 		ip->daddr = fd_data->dst_ip[0];
368 		ip->protocol = 0;
369 
370 		fd_data->pctype = i;
371 		ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
372 
373 		if (ret) {
374 			dev_info(&pf->pdev->dev,
375 				 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
376 				 fd_data->pctype, fd_data->fd_id, ret);
377 			err = true;
378 		} else {
379 			if (add)
380 				dev_info(&pf->pdev->dev,
381 					 "Filter OK for PCTYPE %d loc = %d\n",
382 					 fd_data->pctype, fd_data->fd_id);
383 			else
384 				dev_info(&pf->pdev->dev,
385 					 "Filter deleted for PCTYPE %d loc = %d\n",
386 					 fd_data->pctype, fd_data->fd_id);
387 		}
388 	}
389 
390 	return err ? -EOPNOTSUPP : 0;
391 }
392 
393 /**
394  * i40e_add_del_fdir - Build raw packets to add/del fdir filter
395  * @vsi: pointer to the targeted VSI
396  * @cmd: command to get or set RX flow classification rules
397  * @add: true adds a filter, false removes it
398  *
399  **/
400 int i40e_add_del_fdir(struct i40e_vsi *vsi,
401 		      struct i40e_fdir_filter *input, bool add)
402 {
403 	struct i40e_pf *pf = vsi->back;
404 	int ret;
405 
406 	switch (input->flow_type & ~FLOW_EXT) {
407 	case TCP_V4_FLOW:
408 		ret = i40e_add_del_fdir_tcpv4(vsi, input, add);
409 		break;
410 	case UDP_V4_FLOW:
411 		ret = i40e_add_del_fdir_udpv4(vsi, input, add);
412 		break;
413 	case SCTP_V4_FLOW:
414 		ret = i40e_add_del_fdir_sctpv4(vsi, input, add);
415 		break;
416 	case IPV4_FLOW:
417 		ret = i40e_add_del_fdir_ipv4(vsi, input, add);
418 		break;
419 	case IP_USER_FLOW:
420 		switch (input->ip4_proto) {
421 		case IPPROTO_TCP:
422 			ret = i40e_add_del_fdir_tcpv4(vsi, input, add);
423 			break;
424 		case IPPROTO_UDP:
425 			ret = i40e_add_del_fdir_udpv4(vsi, input, add);
426 			break;
427 		case IPPROTO_SCTP:
428 			ret = i40e_add_del_fdir_sctpv4(vsi, input, add);
429 			break;
430 		default:
431 			ret = i40e_add_del_fdir_ipv4(vsi, input, add);
432 			break;
433 		}
434 		break;
435 	default:
436 		dev_info(&pf->pdev->dev, "Could not specify spec type %d\n",
437 			 input->flow_type);
438 		ret = -EINVAL;
439 	}
440 
441 	/* The buffer allocated here is freed by the i40e_clean_tx_ring() */
442 	return ret;
443 }
444 
445 /**
446  * i40e_fd_handle_status - check the Programming Status for FD
447  * @rx_ring: the Rx ring for this descriptor
448  * @rx_desc: the Rx descriptor for programming Status, not a packet descriptor.
449  * @prog_id: the id originally used for programming
450  *
451  * This is used to verify if the FD programming or invalidation
452  * requested by SW to the HW is successful or not and take actions accordingly.
453  **/
454 static void i40e_fd_handle_status(struct i40e_ring *rx_ring,
455 				  union i40e_rx_desc *rx_desc, u8 prog_id)
456 {
457 	struct i40e_pf *pf = rx_ring->vsi->back;
458 	struct pci_dev *pdev = pf->pdev;
459 	u32 fcnt_prog, fcnt_avail;
460 	u32 error;
461 	u64 qw;
462 
463 	qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
464 	error = (qw & I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK) >>
465 		I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT;
466 
467 	if (error == (0x1 << I40E_RX_PROG_STATUS_DESC_FD_TBL_FULL_SHIFT)) {
468 		if ((rx_desc->wb.qword0.hi_dword.fd_id != 0) ||
469 		    (I40E_DEBUG_FD & pf->hw.debug_mask))
470 			dev_warn(&pdev->dev, "ntuple filter loc = %d, could not be added\n",
471 				 rx_desc->wb.qword0.hi_dword.fd_id);
472 
473 		pf->fd_add_err++;
474 		/* store the current atr filter count */
475 		pf->fd_atr_cnt = i40e_get_current_atr_cnt(pf);
476 
477 		/* filter programming failed most likely due to table full */
478 		fcnt_prog = i40e_get_cur_guaranteed_fd_count(pf);
479 		fcnt_avail = pf->fdir_pf_filter_count;
480 		/* If ATR is running fcnt_prog can quickly change,
481 		 * if we are very close to full, it makes sense to disable
482 		 * FD ATR/SB and then re-enable it when there is room.
483 		 */
484 		if (fcnt_prog >= (fcnt_avail - I40E_FDIR_BUFFER_FULL_MARGIN)) {
485 			if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) &&
486 			    !(pf->auto_disable_flags &
487 				     I40E_FLAG_FD_SB_ENABLED)) {
488 				dev_warn(&pdev->dev, "FD filter space full, new ntuple rules will not be added\n");
489 				pf->auto_disable_flags |=
490 							I40E_FLAG_FD_SB_ENABLED;
491 			}
492 		} else {
493 			dev_info(&pdev->dev,
494 				"FD filter programming failed due to incorrect filter parameters\n");
495 		}
496 	} else if (error ==
497 			  (0x1 << I40E_RX_PROG_STATUS_DESC_NO_FD_ENTRY_SHIFT)) {
498 		if (I40E_DEBUG_FD & pf->hw.debug_mask)
499 			dev_info(&pdev->dev, "ntuple filter fd_id = %d, could not be removed\n",
500 				 rx_desc->wb.qword0.hi_dword.fd_id);
501 	}
502 }
503 
504 /**
505  * i40e_unmap_and_free_tx_resource - Release a Tx buffer
506  * @ring:      the ring that owns the buffer
507  * @tx_buffer: the buffer to free
508  **/
509 static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring,
510 					    struct i40e_tx_buffer *tx_buffer)
511 {
512 	if (tx_buffer->skb) {
513 		if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB)
514 			kfree(tx_buffer->raw_buf);
515 		else
516 			dev_kfree_skb_any(tx_buffer->skb);
517 
518 		if (dma_unmap_len(tx_buffer, len))
519 			dma_unmap_single(ring->dev,
520 					 dma_unmap_addr(tx_buffer, dma),
521 					 dma_unmap_len(tx_buffer, len),
522 					 DMA_TO_DEVICE);
523 	} else if (dma_unmap_len(tx_buffer, len)) {
524 		dma_unmap_page(ring->dev,
525 			       dma_unmap_addr(tx_buffer, dma),
526 			       dma_unmap_len(tx_buffer, len),
527 			       DMA_TO_DEVICE);
528 	}
529 	tx_buffer->next_to_watch = NULL;
530 	tx_buffer->skb = NULL;
531 	dma_unmap_len_set(tx_buffer, len, 0);
532 	/* tx_buffer must be completely set up in the transmit path */
533 }
534 
535 /**
536  * i40e_clean_tx_ring - Free any empty Tx buffers
537  * @tx_ring: ring to be cleaned
538  **/
539 void i40e_clean_tx_ring(struct i40e_ring *tx_ring)
540 {
541 	unsigned long bi_size;
542 	u16 i;
543 
544 	/* ring already cleared, nothing to do */
545 	if (!tx_ring->tx_bi)
546 		return;
547 
548 	/* Free all the Tx ring sk_buffs */
549 	for (i = 0; i < tx_ring->count; i++)
550 		i40e_unmap_and_free_tx_resource(tx_ring, &tx_ring->tx_bi[i]);
551 
552 	bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
553 	memset(tx_ring->tx_bi, 0, bi_size);
554 
555 	/* Zero out the descriptor ring */
556 	memset(tx_ring->desc, 0, tx_ring->size);
557 
558 	tx_ring->next_to_use = 0;
559 	tx_ring->next_to_clean = 0;
560 
561 	if (!tx_ring->netdev)
562 		return;
563 
564 	/* cleanup Tx queue statistics */
565 	netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev,
566 						  tx_ring->queue_index));
567 }
568 
569 /**
570  * i40e_free_tx_resources - Free Tx resources per queue
571  * @tx_ring: Tx descriptor ring for a specific queue
572  *
573  * Free all transmit software resources
574  **/
575 void i40e_free_tx_resources(struct i40e_ring *tx_ring)
576 {
577 	i40e_clean_tx_ring(tx_ring);
578 	kfree(tx_ring->tx_bi);
579 	tx_ring->tx_bi = NULL;
580 
581 	if (tx_ring->desc) {
582 		dma_free_coherent(tx_ring->dev, tx_ring->size,
583 				  tx_ring->desc, tx_ring->dma);
584 		tx_ring->desc = NULL;
585 	}
586 }
587 
588 /**
589  * i40e_get_tx_pending - how many tx descriptors not processed
590  * @tx_ring: the ring of descriptors
591  *
592  * Since there is no access to the ring head register
593  * in XL710, we need to use our local copies
594  **/
595 static u32 i40e_get_tx_pending(struct i40e_ring *ring)
596 {
597 	u32 ntu = ((ring->next_to_clean <= ring->next_to_use)
598 			? ring->next_to_use
599 			: ring->next_to_use + ring->count);
600 	return ntu - ring->next_to_clean;
601 }
602 
603 /**
604  * i40e_check_tx_hang - Is there a hang in the Tx queue
605  * @tx_ring: the ring of descriptors
606  **/
607 static bool i40e_check_tx_hang(struct i40e_ring *tx_ring)
608 {
609 	u32 tx_pending = i40e_get_tx_pending(tx_ring);
610 	struct i40e_pf *pf = tx_ring->vsi->back;
611 	bool ret = false;
612 
613 	clear_check_for_tx_hang(tx_ring);
614 
615 	/* Check for a hung queue, but be thorough. This verifies
616 	 * that a transmit has been completed since the previous
617 	 * check AND there is at least one packet pending. The
618 	 * ARMED bit is set to indicate a potential hang. The
619 	 * bit is cleared if a pause frame is received to remove
620 	 * false hang detection due to PFC or 802.3x frames. By
621 	 * requiring this to fail twice we avoid races with
622 	 * PFC clearing the ARMED bit and conditions where we
623 	 * run the check_tx_hang logic with a transmit completion
624 	 * pending but without time to complete it yet.
625 	 */
626 	if ((tx_ring->tx_stats.tx_done_old == tx_ring->stats.packets) &&
627 	    (tx_pending >= I40E_MIN_DESC_PENDING)) {
628 		/* make sure it is true for two checks in a row */
629 		ret = test_and_set_bit(__I40E_HANG_CHECK_ARMED,
630 				       &tx_ring->state);
631 	} else if ((tx_ring->tx_stats.tx_done_old == tx_ring->stats.packets) &&
632 		   (tx_pending < I40E_MIN_DESC_PENDING) &&
633 		   (tx_pending > 0)) {
634 		if (I40E_DEBUG_FLOW & pf->hw.debug_mask)
635 			dev_info(tx_ring->dev, "HW needs some more descs to do a cacheline flush. tx_pending %d, queue %d",
636 				 tx_pending, tx_ring->queue_index);
637 		pf->tx_sluggish_count++;
638 	} else {
639 		/* update completed stats and disarm the hang check */
640 		tx_ring->tx_stats.tx_done_old = tx_ring->stats.packets;
641 		clear_bit(__I40E_HANG_CHECK_ARMED, &tx_ring->state);
642 	}
643 
644 	return ret;
645 }
646 
647 /**
648  * i40e_get_head - Retrieve head from head writeback
649  * @tx_ring:  tx ring to fetch head of
650  *
651  * Returns value of Tx ring head based on value stored
652  * in head write-back location
653  **/
654 static inline u32 i40e_get_head(struct i40e_ring *tx_ring)
655 {
656 	void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count;
657 
658 	return le32_to_cpu(*(volatile __le32 *)head);
659 }
660 
661 /**
662  * i40e_clean_tx_irq - Reclaim resources after transmit completes
663  * @tx_ring:  tx ring to clean
664  * @budget:   how many cleans we're allowed
665  *
666  * Returns true if there's any budget left (e.g. the clean is finished)
667  **/
668 static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
669 {
670 	u16 i = tx_ring->next_to_clean;
671 	struct i40e_tx_buffer *tx_buf;
672 	struct i40e_tx_desc *tx_head;
673 	struct i40e_tx_desc *tx_desc;
674 	unsigned int total_packets = 0;
675 	unsigned int total_bytes = 0;
676 
677 	tx_buf = &tx_ring->tx_bi[i];
678 	tx_desc = I40E_TX_DESC(tx_ring, i);
679 	i -= tx_ring->count;
680 
681 	tx_head = I40E_TX_DESC(tx_ring, i40e_get_head(tx_ring));
682 
683 	do {
684 		struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch;
685 
686 		/* if next_to_watch is not set then there is no work pending */
687 		if (!eop_desc)
688 			break;
689 
690 		/* prevent any other reads prior to eop_desc */
691 		read_barrier_depends();
692 
693 		/* we have caught up to head, no work left to do */
694 		if (tx_head == tx_desc)
695 			break;
696 
697 		/* clear next_to_watch to prevent false hangs */
698 		tx_buf->next_to_watch = NULL;
699 
700 		/* update the statistics for this packet */
701 		total_bytes += tx_buf->bytecount;
702 		total_packets += tx_buf->gso_segs;
703 
704 		/* free the skb */
705 		dev_consume_skb_any(tx_buf->skb);
706 
707 		/* unmap skb header data */
708 		dma_unmap_single(tx_ring->dev,
709 				 dma_unmap_addr(tx_buf, dma),
710 				 dma_unmap_len(tx_buf, len),
711 				 DMA_TO_DEVICE);
712 
713 		/* clear tx_buffer data */
714 		tx_buf->skb = NULL;
715 		dma_unmap_len_set(tx_buf, len, 0);
716 
717 		/* unmap remaining buffers */
718 		while (tx_desc != eop_desc) {
719 
720 			tx_buf++;
721 			tx_desc++;
722 			i++;
723 			if (unlikely(!i)) {
724 				i -= tx_ring->count;
725 				tx_buf = tx_ring->tx_bi;
726 				tx_desc = I40E_TX_DESC(tx_ring, 0);
727 			}
728 
729 			/* unmap any remaining paged data */
730 			if (dma_unmap_len(tx_buf, len)) {
731 				dma_unmap_page(tx_ring->dev,
732 					       dma_unmap_addr(tx_buf, dma),
733 					       dma_unmap_len(tx_buf, len),
734 					       DMA_TO_DEVICE);
735 				dma_unmap_len_set(tx_buf, len, 0);
736 			}
737 		}
738 
739 		/* move us one more past the eop_desc for start of next pkt */
740 		tx_buf++;
741 		tx_desc++;
742 		i++;
743 		if (unlikely(!i)) {
744 			i -= tx_ring->count;
745 			tx_buf = tx_ring->tx_bi;
746 			tx_desc = I40E_TX_DESC(tx_ring, 0);
747 		}
748 
749 		/* update budget accounting */
750 		budget--;
751 	} while (likely(budget));
752 
753 	i += tx_ring->count;
754 	tx_ring->next_to_clean = i;
755 	u64_stats_update_begin(&tx_ring->syncp);
756 	tx_ring->stats.bytes += total_bytes;
757 	tx_ring->stats.packets += total_packets;
758 	u64_stats_update_end(&tx_ring->syncp);
759 	tx_ring->q_vector->tx.total_bytes += total_bytes;
760 	tx_ring->q_vector->tx.total_packets += total_packets;
761 
762 	if (check_for_tx_hang(tx_ring) && i40e_check_tx_hang(tx_ring)) {
763 		/* schedule immediate reset if we believe we hung */
764 		dev_info(tx_ring->dev, "Detected Tx Unit Hang\n"
765 			 "  VSI                  <%d>\n"
766 			 "  Tx Queue             <%d>\n"
767 			 "  next_to_use          <%x>\n"
768 			 "  next_to_clean        <%x>\n",
769 			 tx_ring->vsi->seid,
770 			 tx_ring->queue_index,
771 			 tx_ring->next_to_use, i);
772 		dev_info(tx_ring->dev, "tx_bi[next_to_clean]\n"
773 			 "  time_stamp           <%lx>\n"
774 			 "  jiffies              <%lx>\n",
775 			 tx_ring->tx_bi[i].time_stamp, jiffies);
776 
777 		netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
778 
779 		dev_info(tx_ring->dev,
780 			 "tx hang detected on queue %d, resetting adapter\n",
781 			 tx_ring->queue_index);
782 
783 		tx_ring->netdev->netdev_ops->ndo_tx_timeout(tx_ring->netdev);
784 
785 		/* the adapter is about to reset, no point in enabling stuff */
786 		return true;
787 	}
788 
789 	netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev,
790 						      tx_ring->queue_index),
791 				  total_packets, total_bytes);
792 
793 #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
794 	if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) &&
795 		     (I40E_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) {
796 		/* Make sure that anybody stopping the queue after this
797 		 * sees the new next_to_clean.
798 		 */
799 		smp_mb();
800 		if (__netif_subqueue_stopped(tx_ring->netdev,
801 					     tx_ring->queue_index) &&
802 		   !test_bit(__I40E_DOWN, &tx_ring->vsi->state)) {
803 			netif_wake_subqueue(tx_ring->netdev,
804 					    tx_ring->queue_index);
805 			++tx_ring->tx_stats.restart_queue;
806 		}
807 	}
808 
809 	return budget > 0;
810 }
811 
812 /**
813  * i40e_set_new_dynamic_itr - Find new ITR level
814  * @rc: structure containing ring performance data
815  *
816  * Stores a new ITR value based on packets and byte counts during
817  * the last interrupt.  The advantage of per interrupt computation
818  * is faster updates and more accurate ITR for the current traffic
819  * pattern.  Constants in this function were computed based on
820  * theoretical maximum wire speed and thresholds were set based on
821  * testing data as well as attempting to minimize response time
822  * while increasing bulk throughput.
823  **/
824 static void i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
825 {
826 	enum i40e_latency_range new_latency_range = rc->latency_range;
827 	u32 new_itr = rc->itr;
828 	int bytes_per_int;
829 
830 	if (rc->total_packets == 0 || !rc->itr)
831 		return;
832 
833 	/* simple throttlerate management
834 	 *   0-10MB/s   lowest (100000 ints/s)
835 	 *  10-20MB/s   low    (20000 ints/s)
836 	 *  20-1249MB/s bulk   (8000 ints/s)
837 	 */
838 	bytes_per_int = rc->total_bytes / rc->itr;
839 	switch (rc->itr) {
840 	case I40E_LOWEST_LATENCY:
841 		if (bytes_per_int > 10)
842 			new_latency_range = I40E_LOW_LATENCY;
843 		break;
844 	case I40E_LOW_LATENCY:
845 		if (bytes_per_int > 20)
846 			new_latency_range = I40E_BULK_LATENCY;
847 		else if (bytes_per_int <= 10)
848 			new_latency_range = I40E_LOWEST_LATENCY;
849 		break;
850 	case I40E_BULK_LATENCY:
851 		if (bytes_per_int <= 20)
852 			rc->latency_range = I40E_LOW_LATENCY;
853 		break;
854 	}
855 
856 	switch (new_latency_range) {
857 	case I40E_LOWEST_LATENCY:
858 		new_itr = I40E_ITR_100K;
859 		break;
860 	case I40E_LOW_LATENCY:
861 		new_itr = I40E_ITR_20K;
862 		break;
863 	case I40E_BULK_LATENCY:
864 		new_itr = I40E_ITR_8K;
865 		break;
866 	default:
867 		break;
868 	}
869 
870 	if (new_itr != rc->itr) {
871 		/* do an exponential smoothing */
872 		new_itr = (10 * new_itr * rc->itr) /
873 			  ((9 * new_itr) + rc->itr);
874 		rc->itr = new_itr & I40E_MAX_ITR;
875 	}
876 
877 	rc->total_bytes = 0;
878 	rc->total_packets = 0;
879 }
880 
881 /**
882  * i40e_update_dynamic_itr - Adjust ITR based on bytes per int
883  * @q_vector: the vector to adjust
884  **/
885 static void i40e_update_dynamic_itr(struct i40e_q_vector *q_vector)
886 {
887 	u16 vector = q_vector->vsi->base_vector + q_vector->v_idx;
888 	struct i40e_hw *hw = &q_vector->vsi->back->hw;
889 	u32 reg_addr;
890 	u16 old_itr;
891 
892 	reg_addr = I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1);
893 	old_itr = q_vector->rx.itr;
894 	i40e_set_new_dynamic_itr(&q_vector->rx);
895 	if (old_itr != q_vector->rx.itr)
896 		wr32(hw, reg_addr, q_vector->rx.itr);
897 
898 	reg_addr = I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1);
899 	old_itr = q_vector->tx.itr;
900 	i40e_set_new_dynamic_itr(&q_vector->tx);
901 	if (old_itr != q_vector->tx.itr)
902 		wr32(hw, reg_addr, q_vector->tx.itr);
903 }
904 
905 /**
906  * i40e_clean_programming_status - clean the programming status descriptor
907  * @rx_ring: the rx ring that has this descriptor
908  * @rx_desc: the rx descriptor written back by HW
909  *
910  * Flow director should handle FD_FILTER_STATUS to check its filter programming
911  * status being successful or not and take actions accordingly. FCoE should
912  * handle its context/filter programming/invalidation status and take actions.
913  *
914  **/
915 static void i40e_clean_programming_status(struct i40e_ring *rx_ring,
916 					  union i40e_rx_desc *rx_desc)
917 {
918 	u64 qw;
919 	u8 id;
920 
921 	qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
922 	id = (qw & I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK) >>
923 		  I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT;
924 
925 	if (id == I40E_RX_PROG_STATUS_DESC_FD_FILTER_STATUS)
926 		i40e_fd_handle_status(rx_ring, rx_desc, id);
927 #ifdef I40E_FCOE
928 	else if ((id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_PROG_STATUS) ||
929 		 (id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_INVL_STATUS))
930 		i40e_fcoe_handle_status(rx_ring, rx_desc, id);
931 #endif
932 }
933 
934 /**
935  * i40e_setup_tx_descriptors - Allocate the Tx descriptors
936  * @tx_ring: the tx ring to set up
937  *
938  * Return 0 on success, negative on error
939  **/
940 int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring)
941 {
942 	struct device *dev = tx_ring->dev;
943 	int bi_size;
944 
945 	if (!dev)
946 		return -ENOMEM;
947 
948 	bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
949 	tx_ring->tx_bi = kzalloc(bi_size, GFP_KERNEL);
950 	if (!tx_ring->tx_bi)
951 		goto err;
952 
953 	/* round up to nearest 4K */
954 	tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc);
955 	/* add u32 for head writeback, align after this takes care of
956 	 * guaranteeing this is at least one cache line in size
957 	 */
958 	tx_ring->size += sizeof(u32);
959 	tx_ring->size = ALIGN(tx_ring->size, 4096);
960 	tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
961 					   &tx_ring->dma, GFP_KERNEL);
962 	if (!tx_ring->desc) {
963 		dev_info(dev, "Unable to allocate memory for the Tx descriptor ring, size=%d\n",
964 			 tx_ring->size);
965 		goto err;
966 	}
967 
968 	tx_ring->next_to_use = 0;
969 	tx_ring->next_to_clean = 0;
970 	return 0;
971 
972 err:
973 	kfree(tx_ring->tx_bi);
974 	tx_ring->tx_bi = NULL;
975 	return -ENOMEM;
976 }
977 
978 /**
979  * i40e_clean_rx_ring - Free Rx buffers
980  * @rx_ring: ring to be cleaned
981  **/
982 void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
983 {
984 	struct device *dev = rx_ring->dev;
985 	struct i40e_rx_buffer *rx_bi;
986 	unsigned long bi_size;
987 	u16 i;
988 
989 	/* ring already cleared, nothing to do */
990 	if (!rx_ring->rx_bi)
991 		return;
992 
993 	/* Free all the Rx ring sk_buffs */
994 	for (i = 0; i < rx_ring->count; i++) {
995 		rx_bi = &rx_ring->rx_bi[i];
996 		if (rx_bi->dma) {
997 			dma_unmap_single(dev,
998 					 rx_bi->dma,
999 					 rx_ring->rx_buf_len,
1000 					 DMA_FROM_DEVICE);
1001 			rx_bi->dma = 0;
1002 		}
1003 		if (rx_bi->skb) {
1004 			dev_kfree_skb(rx_bi->skb);
1005 			rx_bi->skb = NULL;
1006 		}
1007 		if (rx_bi->page) {
1008 			if (rx_bi->page_dma) {
1009 				dma_unmap_page(dev,
1010 					       rx_bi->page_dma,
1011 					       PAGE_SIZE / 2,
1012 					       DMA_FROM_DEVICE);
1013 				rx_bi->page_dma = 0;
1014 			}
1015 			__free_page(rx_bi->page);
1016 			rx_bi->page = NULL;
1017 			rx_bi->page_offset = 0;
1018 		}
1019 	}
1020 
1021 	bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
1022 	memset(rx_ring->rx_bi, 0, bi_size);
1023 
1024 	/* Zero out the descriptor ring */
1025 	memset(rx_ring->desc, 0, rx_ring->size);
1026 
1027 	rx_ring->next_to_clean = 0;
1028 	rx_ring->next_to_use = 0;
1029 }
1030 
1031 /**
1032  * i40e_free_rx_resources - Free Rx resources
1033  * @rx_ring: ring to clean the resources from
1034  *
1035  * Free all receive software resources
1036  **/
1037 void i40e_free_rx_resources(struct i40e_ring *rx_ring)
1038 {
1039 	i40e_clean_rx_ring(rx_ring);
1040 	kfree(rx_ring->rx_bi);
1041 	rx_ring->rx_bi = NULL;
1042 
1043 	if (rx_ring->desc) {
1044 		dma_free_coherent(rx_ring->dev, rx_ring->size,
1045 				  rx_ring->desc, rx_ring->dma);
1046 		rx_ring->desc = NULL;
1047 	}
1048 }
1049 
1050 /**
1051  * i40e_setup_rx_descriptors - Allocate Rx descriptors
1052  * @rx_ring: Rx descriptor ring (for a specific queue) to setup
1053  *
1054  * Returns 0 on success, negative on failure
1055  **/
1056 int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
1057 {
1058 	struct device *dev = rx_ring->dev;
1059 	int bi_size;
1060 
1061 	bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
1062 	rx_ring->rx_bi = kzalloc(bi_size, GFP_KERNEL);
1063 	if (!rx_ring->rx_bi)
1064 		goto err;
1065 
1066 	/* Round up to nearest 4K */
1067 	rx_ring->size = ring_is_16byte_desc_enabled(rx_ring)
1068 		? rx_ring->count * sizeof(union i40e_16byte_rx_desc)
1069 		: rx_ring->count * sizeof(union i40e_32byte_rx_desc);
1070 	rx_ring->size = ALIGN(rx_ring->size, 4096);
1071 	rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
1072 					   &rx_ring->dma, GFP_KERNEL);
1073 
1074 	if (!rx_ring->desc) {
1075 		dev_info(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n",
1076 			 rx_ring->size);
1077 		goto err;
1078 	}
1079 
1080 	rx_ring->next_to_clean = 0;
1081 	rx_ring->next_to_use = 0;
1082 
1083 	return 0;
1084 err:
1085 	kfree(rx_ring->rx_bi);
1086 	rx_ring->rx_bi = NULL;
1087 	return -ENOMEM;
1088 }
1089 
1090 /**
1091  * i40e_release_rx_desc - Store the new tail and head values
1092  * @rx_ring: ring to bump
1093  * @val: new head index
1094  **/
1095 static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
1096 {
1097 	rx_ring->next_to_use = val;
1098 	/* Force memory writes to complete before letting h/w
1099 	 * know there are new descriptors to fetch.  (Only
1100 	 * applicable for weak-ordered memory model archs,
1101 	 * such as IA-64).
1102 	 */
1103 	wmb();
1104 	writel(val, rx_ring->tail);
1105 }
1106 
1107 /**
1108  * i40e_alloc_rx_buffers - Replace used receive buffers; packet split
1109  * @rx_ring: ring to place buffers on
1110  * @cleaned_count: number of buffers to replace
1111  **/
1112 void i40e_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count)
1113 {
1114 	u16 i = rx_ring->next_to_use;
1115 	union i40e_rx_desc *rx_desc;
1116 	struct i40e_rx_buffer *bi;
1117 	struct sk_buff *skb;
1118 
1119 	/* do nothing if no valid netdev defined */
1120 	if (!rx_ring->netdev || !cleaned_count)
1121 		return;
1122 
1123 	while (cleaned_count--) {
1124 		rx_desc = I40E_RX_DESC(rx_ring, i);
1125 		bi = &rx_ring->rx_bi[i];
1126 		skb = bi->skb;
1127 
1128 		if (!skb) {
1129 			skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
1130 							rx_ring->rx_buf_len);
1131 			if (!skb) {
1132 				rx_ring->rx_stats.alloc_buff_failed++;
1133 				goto no_buffers;
1134 			}
1135 			/* initialize queue mapping */
1136 			skb_record_rx_queue(skb, rx_ring->queue_index);
1137 			bi->skb = skb;
1138 		}
1139 
1140 		if (!bi->dma) {
1141 			bi->dma = dma_map_single(rx_ring->dev,
1142 						 skb->data,
1143 						 rx_ring->rx_buf_len,
1144 						 DMA_FROM_DEVICE);
1145 			if (dma_mapping_error(rx_ring->dev, bi->dma)) {
1146 				rx_ring->rx_stats.alloc_buff_failed++;
1147 				bi->dma = 0;
1148 				goto no_buffers;
1149 			}
1150 		}
1151 
1152 		if (ring_is_ps_enabled(rx_ring)) {
1153 			if (!bi->page) {
1154 				bi->page = alloc_page(GFP_ATOMIC);
1155 				if (!bi->page) {
1156 					rx_ring->rx_stats.alloc_page_failed++;
1157 					goto no_buffers;
1158 				}
1159 			}
1160 
1161 			if (!bi->page_dma) {
1162 				/* use a half page if we're re-using */
1163 				bi->page_offset ^= PAGE_SIZE / 2;
1164 				bi->page_dma = dma_map_page(rx_ring->dev,
1165 							    bi->page,
1166 							    bi->page_offset,
1167 							    PAGE_SIZE / 2,
1168 							    DMA_FROM_DEVICE);
1169 				if (dma_mapping_error(rx_ring->dev,
1170 						      bi->page_dma)) {
1171 					rx_ring->rx_stats.alloc_page_failed++;
1172 					bi->page_dma = 0;
1173 					goto no_buffers;
1174 				}
1175 			}
1176 
1177 			/* Refresh the desc even if buffer_addrs didn't change
1178 			 * because each write-back erases this info.
1179 			 */
1180 			rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
1181 			rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
1182 		} else {
1183 			rx_desc->read.pkt_addr = cpu_to_le64(bi->dma);
1184 			rx_desc->read.hdr_addr = 0;
1185 		}
1186 		i++;
1187 		if (i == rx_ring->count)
1188 			i = 0;
1189 	}
1190 
1191 no_buffers:
1192 	if (rx_ring->next_to_use != i)
1193 		i40e_release_rx_desc(rx_ring, i);
1194 }
1195 
1196 /**
1197  * i40e_receive_skb - Send a completed packet up the stack
1198  * @rx_ring:  rx ring in play
1199  * @skb: packet to send up
1200  * @vlan_tag: vlan tag for packet
1201  **/
1202 static void i40e_receive_skb(struct i40e_ring *rx_ring,
1203 			     struct sk_buff *skb, u16 vlan_tag)
1204 {
1205 	struct i40e_q_vector *q_vector = rx_ring->q_vector;
1206 	struct i40e_vsi *vsi = rx_ring->vsi;
1207 	u64 flags = vsi->back->flags;
1208 
1209 	if (vlan_tag & VLAN_VID_MASK)
1210 		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
1211 
1212 	if (flags & I40E_FLAG_IN_NETPOLL)
1213 		netif_rx(skb);
1214 	else
1215 		napi_gro_receive(&q_vector->napi, skb);
1216 }
1217 
1218 /**
1219  * i40e_rx_checksum - Indicate in skb if hw indicated a good cksum
1220  * @vsi: the VSI we care about
1221  * @skb: skb currently being received and modified
1222  * @rx_status: status value of last descriptor in packet
1223  * @rx_error: error value of last descriptor in packet
1224  * @rx_ptype: ptype value of last descriptor in packet
1225  **/
1226 static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
1227 				    struct sk_buff *skb,
1228 				    u32 rx_status,
1229 				    u32 rx_error,
1230 				    u16 rx_ptype)
1231 {
1232 	struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(rx_ptype);
1233 	bool ipv4 = false, ipv6 = false;
1234 	bool ipv4_tunnel, ipv6_tunnel;
1235 	__wsum rx_udp_csum;
1236 	struct iphdr *iph;
1237 	__sum16 csum;
1238 
1239 	ipv4_tunnel = (rx_ptype > I40E_RX_PTYPE_GRENAT4_MAC_PAY3) &&
1240 		      (rx_ptype < I40E_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4);
1241 	ipv6_tunnel = (rx_ptype > I40E_RX_PTYPE_GRENAT6_MAC_PAY3) &&
1242 		      (rx_ptype < I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4);
1243 
1244 	skb->ip_summed = CHECKSUM_NONE;
1245 
1246 	/* Rx csum enabled and ip headers found? */
1247 	if (!(vsi->netdev->features & NETIF_F_RXCSUM))
1248 		return;
1249 
1250 	/* did the hardware decode the packet and checksum? */
1251 	if (!(rx_status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)))
1252 		return;
1253 
1254 	/* both known and outer_ip must be set for the below code to work */
1255 	if (!(decoded.known && decoded.outer_ip))
1256 		return;
1257 
1258 	if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1259 	    decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4)
1260 		ipv4 = true;
1261 	else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1262 		 decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6)
1263 		ipv6 = true;
1264 
1265 	if (ipv4 &&
1266 	    (rx_error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) |
1267 			 (1 << I40E_RX_DESC_ERROR_EIPE_SHIFT))))
1268 		goto checksum_fail;
1269 
1270 	/* likely incorrect csum if alternate IP extension headers found */
1271 	if (ipv6 &&
1272 	    rx_status & (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT))
1273 		/* don't increment checksum err here, non-fatal err */
1274 		return;
1275 
1276 	/* there was some L4 error, count error and punt packet to the stack */
1277 	if (rx_error & (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))
1278 		goto checksum_fail;
1279 
1280 	/* handle packets that were not able to be checksummed due
1281 	 * to arrival speed, in this case the stack can compute
1282 	 * the csum.
1283 	 */
1284 	if (rx_error & (1 << I40E_RX_DESC_ERROR_PPRS_SHIFT))
1285 		return;
1286 
1287 	/* If VXLAN traffic has an outer UDPv4 checksum we need to check
1288 	 * it in the driver, hardware does not do it for us.
1289 	 * Since L3L4P bit was set we assume a valid IHL value (>=5)
1290 	 * so the total length of IPv4 header is IHL*4 bytes
1291 	 * The UDP_0 bit *may* bet set if the *inner* header is UDP
1292 	 */
1293 	if (ipv4_tunnel &&
1294 	    (decoded.inner_prot != I40E_RX_PTYPE_INNER_PROT_UDP) &&
1295 	    !(rx_status & (1 << I40E_RX_DESC_STATUS_UDP_0_SHIFT))) {
1296 		skb->transport_header = skb->mac_header +
1297 					sizeof(struct ethhdr) +
1298 					(ip_hdr(skb)->ihl * 4);
1299 
1300 		/* Add 4 bytes for VLAN tagged packets */
1301 		skb->transport_header += (skb->protocol == htons(ETH_P_8021Q) ||
1302 					  skb->protocol == htons(ETH_P_8021AD))
1303 					  ? VLAN_HLEN : 0;
1304 
1305 		rx_udp_csum = udp_csum(skb);
1306 		iph = ip_hdr(skb);
1307 		csum = csum_tcpudp_magic(
1308 				iph->saddr, iph->daddr,
1309 				(skb->len - skb_transport_offset(skb)),
1310 				IPPROTO_UDP, rx_udp_csum);
1311 
1312 		if (udp_hdr(skb)->check != csum)
1313 			goto checksum_fail;
1314 	}
1315 
1316 	skb->ip_summed = CHECKSUM_UNNECESSARY;
1317 	skb->csum_level = ipv4_tunnel || ipv6_tunnel;
1318 
1319 	return;
1320 
1321 checksum_fail:
1322 	vsi->back->hw_csum_rx_error++;
1323 }
1324 
1325 /**
1326  * i40e_rx_hash - returns the hash value from the Rx descriptor
1327  * @ring: descriptor ring
1328  * @rx_desc: specific descriptor
1329  **/
1330 static inline u32 i40e_rx_hash(struct i40e_ring *ring,
1331 			       union i40e_rx_desc *rx_desc)
1332 {
1333 	const __le64 rss_mask =
1334 		cpu_to_le64((u64)I40E_RX_DESC_FLTSTAT_RSS_HASH <<
1335 			    I40E_RX_DESC_STATUS_FLTSTAT_SHIFT);
1336 
1337 	if ((ring->netdev->features & NETIF_F_RXHASH) &&
1338 	    (rx_desc->wb.qword1.status_error_len & rss_mask) == rss_mask)
1339 		return le32_to_cpu(rx_desc->wb.qword0.hi_dword.rss);
1340 	else
1341 		return 0;
1342 }
1343 
1344 /**
1345  * i40e_ptype_to_hash - get a hash type
1346  * @ptype: the ptype value from the descriptor
1347  *
1348  * Returns a hash type to be used by skb_set_hash
1349  **/
1350 static inline enum pkt_hash_types i40e_ptype_to_hash(u8 ptype)
1351 {
1352 	struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(ptype);
1353 
1354 	if (!decoded.known)
1355 		return PKT_HASH_TYPE_NONE;
1356 
1357 	if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1358 	    decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY4)
1359 		return PKT_HASH_TYPE_L4;
1360 	else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1361 		 decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY3)
1362 		return PKT_HASH_TYPE_L3;
1363 	else
1364 		return PKT_HASH_TYPE_L2;
1365 }
1366 
1367 /**
1368  * i40e_clean_rx_irq - Reclaim resources after receive completes
1369  * @rx_ring:  rx ring to clean
1370  * @budget:   how many cleans we're allowed
1371  *
1372  * Returns true if there's any budget left (e.g. the clean is finished)
1373  **/
1374 static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
1375 {
1376 	unsigned int total_rx_bytes = 0, total_rx_packets = 0;
1377 	u16 rx_packet_len, rx_header_len, rx_sph, rx_hbo;
1378 	u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
1379 	const int current_node = numa_node_id();
1380 	struct i40e_vsi *vsi = rx_ring->vsi;
1381 	u16 i = rx_ring->next_to_clean;
1382 	union i40e_rx_desc *rx_desc;
1383 	u32 rx_error, rx_status;
1384 	u8 rx_ptype;
1385 	u64 qword;
1386 
1387 	if (budget <= 0)
1388 		return 0;
1389 
1390 	rx_desc = I40E_RX_DESC(rx_ring, i);
1391 	qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
1392 	rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
1393 		    I40E_RXD_QW1_STATUS_SHIFT;
1394 
1395 	while (rx_status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) {
1396 		union i40e_rx_desc *next_rxd;
1397 		struct i40e_rx_buffer *rx_bi;
1398 		struct sk_buff *skb;
1399 		u16 vlan_tag;
1400 		if (i40e_rx_is_programming_status(qword)) {
1401 			i40e_clean_programming_status(rx_ring, rx_desc);
1402 			I40E_RX_NEXT_DESC_PREFETCH(rx_ring, i, next_rxd);
1403 			goto next_desc;
1404 		}
1405 		rx_bi = &rx_ring->rx_bi[i];
1406 		skb = rx_bi->skb;
1407 		prefetch(skb->data);
1408 
1409 		rx_packet_len = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
1410 				I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1411 		rx_header_len = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK) >>
1412 				I40E_RXD_QW1_LENGTH_HBUF_SHIFT;
1413 		rx_sph = (qword & I40E_RXD_QW1_LENGTH_SPH_MASK) >>
1414 			 I40E_RXD_QW1_LENGTH_SPH_SHIFT;
1415 
1416 		rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >>
1417 			   I40E_RXD_QW1_ERROR_SHIFT;
1418 		rx_hbo = rx_error & (1 << I40E_RX_DESC_ERROR_HBO_SHIFT);
1419 		rx_error &= ~(1 << I40E_RX_DESC_ERROR_HBO_SHIFT);
1420 
1421 		rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
1422 			   I40E_RXD_QW1_PTYPE_SHIFT;
1423 		rx_bi->skb = NULL;
1424 
1425 		/* This memory barrier is needed to keep us from reading
1426 		 * any other fields out of the rx_desc until we know the
1427 		 * STATUS_DD bit is set
1428 		 */
1429 		rmb();
1430 
1431 		/* Get the header and possibly the whole packet
1432 		 * If this is an skb from previous receive dma will be 0
1433 		 */
1434 		if (rx_bi->dma) {
1435 			u16 len;
1436 
1437 			if (rx_hbo)
1438 				len = I40E_RX_HDR_SIZE;
1439 			else if (rx_sph)
1440 				len = rx_header_len;
1441 			else if (rx_packet_len)
1442 				len = rx_packet_len;   /* 1buf/no split found */
1443 			else
1444 				len = rx_header_len;   /* split always mode */
1445 
1446 			skb_put(skb, len);
1447 			dma_unmap_single(rx_ring->dev,
1448 					 rx_bi->dma,
1449 					 rx_ring->rx_buf_len,
1450 					 DMA_FROM_DEVICE);
1451 			rx_bi->dma = 0;
1452 		}
1453 
1454 		/* Get the rest of the data if this was a header split */
1455 		if (ring_is_ps_enabled(rx_ring) && rx_packet_len) {
1456 
1457 			skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
1458 					   rx_bi->page,
1459 					   rx_bi->page_offset,
1460 					   rx_packet_len);
1461 
1462 			skb->len += rx_packet_len;
1463 			skb->data_len += rx_packet_len;
1464 			skb->truesize += rx_packet_len;
1465 
1466 			if ((page_count(rx_bi->page) == 1) &&
1467 			    (page_to_nid(rx_bi->page) == current_node))
1468 				get_page(rx_bi->page);
1469 			else
1470 				rx_bi->page = NULL;
1471 
1472 			dma_unmap_page(rx_ring->dev,
1473 				       rx_bi->page_dma,
1474 				       PAGE_SIZE / 2,
1475 				       DMA_FROM_DEVICE);
1476 			rx_bi->page_dma = 0;
1477 		}
1478 		I40E_RX_NEXT_DESC_PREFETCH(rx_ring, i, next_rxd);
1479 
1480 		if (unlikely(
1481 		    !(rx_status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT)))) {
1482 			struct i40e_rx_buffer *next_buffer;
1483 
1484 			next_buffer = &rx_ring->rx_bi[i];
1485 
1486 			if (ring_is_ps_enabled(rx_ring)) {
1487 				rx_bi->skb = next_buffer->skb;
1488 				rx_bi->dma = next_buffer->dma;
1489 				next_buffer->skb = skb;
1490 				next_buffer->dma = 0;
1491 			}
1492 			rx_ring->rx_stats.non_eop_descs++;
1493 			goto next_desc;
1494 		}
1495 
1496 		/* ERR_MASK will only have valid bits if EOP set */
1497 		if (unlikely(rx_error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) {
1498 			dev_kfree_skb_any(skb);
1499 			/* TODO: shouldn't we increment a counter indicating the
1500 			 * drop?
1501 			 */
1502 			goto next_desc;
1503 		}
1504 
1505 		skb_set_hash(skb, i40e_rx_hash(rx_ring, rx_desc),
1506 			     i40e_ptype_to_hash(rx_ptype));
1507 		if (unlikely(rx_status & I40E_RXD_QW1_STATUS_TSYNVALID_MASK)) {
1508 			i40e_ptp_rx_hwtstamp(vsi->back, skb, (rx_status &
1509 					   I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >>
1510 					   I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT);
1511 			rx_ring->last_rx_timestamp = jiffies;
1512 		}
1513 
1514 		/* probably a little skewed due to removing CRC */
1515 		total_rx_bytes += skb->len;
1516 		total_rx_packets++;
1517 
1518 		skb->protocol = eth_type_trans(skb, rx_ring->netdev);
1519 
1520 		i40e_rx_checksum(vsi, skb, rx_status, rx_error, rx_ptype);
1521 
1522 		vlan_tag = rx_status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)
1523 			 ? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1)
1524 			 : 0;
1525 #ifdef I40E_FCOE
1526 		if (!i40e_fcoe_handle_offload(rx_ring, rx_desc, skb)) {
1527 			dev_kfree_skb_any(skb);
1528 			goto next_desc;
1529 		}
1530 #endif
1531 		i40e_receive_skb(rx_ring, skb, vlan_tag);
1532 
1533 		rx_ring->netdev->last_rx = jiffies;
1534 		budget--;
1535 next_desc:
1536 		rx_desc->wb.qword1.status_error_len = 0;
1537 		if (!budget)
1538 			break;
1539 
1540 		cleaned_count++;
1541 		/* return some buffers to hardware, one at a time is too slow */
1542 		if (cleaned_count >= I40E_RX_BUFFER_WRITE) {
1543 			i40e_alloc_rx_buffers(rx_ring, cleaned_count);
1544 			cleaned_count = 0;
1545 		}
1546 
1547 		/* use prefetched values */
1548 		rx_desc = next_rxd;
1549 		qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
1550 		rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
1551 			    I40E_RXD_QW1_STATUS_SHIFT;
1552 	}
1553 
1554 	rx_ring->next_to_clean = i;
1555 	u64_stats_update_begin(&rx_ring->syncp);
1556 	rx_ring->stats.packets += total_rx_packets;
1557 	rx_ring->stats.bytes += total_rx_bytes;
1558 	u64_stats_update_end(&rx_ring->syncp);
1559 	rx_ring->q_vector->rx.total_packets += total_rx_packets;
1560 	rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
1561 
1562 	if (cleaned_count)
1563 		i40e_alloc_rx_buffers(rx_ring, cleaned_count);
1564 
1565 	return budget > 0;
1566 }
1567 
1568 /**
1569  * i40e_napi_poll - NAPI polling Rx/Tx cleanup routine
1570  * @napi: napi struct with our devices info in it
1571  * @budget: amount of work driver is allowed to do this pass, in packets
1572  *
1573  * This function will clean all queues associated with a q_vector.
1574  *
1575  * Returns the amount of work done
1576  **/
1577 int i40e_napi_poll(struct napi_struct *napi, int budget)
1578 {
1579 	struct i40e_q_vector *q_vector =
1580 			       container_of(napi, struct i40e_q_vector, napi);
1581 	struct i40e_vsi *vsi = q_vector->vsi;
1582 	struct i40e_ring *ring;
1583 	bool clean_complete = true;
1584 	int budget_per_ring;
1585 
1586 	if (test_bit(__I40E_DOWN, &vsi->state)) {
1587 		napi_complete(napi);
1588 		return 0;
1589 	}
1590 
1591 	/* Since the actual Tx work is minimal, we can give the Tx a larger
1592 	 * budget and be more aggressive about cleaning up the Tx descriptors.
1593 	 */
1594 	i40e_for_each_ring(ring, q_vector->tx)
1595 		clean_complete &= i40e_clean_tx_irq(ring, vsi->work_limit);
1596 
1597 	/* We attempt to distribute budget to each Rx queue fairly, but don't
1598 	 * allow the budget to go below 1 because that would exit polling early.
1599 	 */
1600 	budget_per_ring = max(budget/q_vector->num_ringpairs, 1);
1601 
1602 	i40e_for_each_ring(ring, q_vector->rx)
1603 		clean_complete &= i40e_clean_rx_irq(ring, budget_per_ring);
1604 
1605 	/* If work not completed, return budget and polling will return */
1606 	if (!clean_complete)
1607 		return budget;
1608 
1609 	/* Work is done so exit the polling mode and re-enable the interrupt */
1610 	napi_complete(napi);
1611 	if (ITR_IS_DYNAMIC(vsi->rx_itr_setting) ||
1612 	    ITR_IS_DYNAMIC(vsi->tx_itr_setting))
1613 		i40e_update_dynamic_itr(q_vector);
1614 
1615 	if (!test_bit(__I40E_DOWN, &vsi->state)) {
1616 		if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
1617 			i40e_irq_dynamic_enable(vsi,
1618 					q_vector->v_idx + vsi->base_vector);
1619 		} else {
1620 			struct i40e_hw *hw = &vsi->back->hw;
1621 			/* We re-enable the queue 0 cause, but
1622 			 * don't worry about dynamic_enable
1623 			 * because we left it on for the other
1624 			 * possible interrupts during napi
1625 			 */
1626 			u32 qval = rd32(hw, I40E_QINT_RQCTL(0));
1627 			qval |= I40E_QINT_RQCTL_CAUSE_ENA_MASK;
1628 			wr32(hw, I40E_QINT_RQCTL(0), qval);
1629 
1630 			qval = rd32(hw, I40E_QINT_TQCTL(0));
1631 			qval |= I40E_QINT_TQCTL_CAUSE_ENA_MASK;
1632 			wr32(hw, I40E_QINT_TQCTL(0), qval);
1633 
1634 			i40e_irq_dynamic_enable_icr0(vsi->back);
1635 		}
1636 	}
1637 
1638 	return 0;
1639 }
1640 
1641 /**
1642  * i40e_atr - Add a Flow Director ATR filter
1643  * @tx_ring:  ring to add programming descriptor to
1644  * @skb:      send buffer
1645  * @flags:    send flags
1646  * @protocol: wire protocol
1647  **/
1648 static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
1649 		     u32 flags, __be16 protocol)
1650 {
1651 	struct i40e_filter_program_desc *fdir_desc;
1652 	struct i40e_pf *pf = tx_ring->vsi->back;
1653 	union {
1654 		unsigned char *network;
1655 		struct iphdr *ipv4;
1656 		struct ipv6hdr *ipv6;
1657 	} hdr;
1658 	struct tcphdr *th;
1659 	unsigned int hlen;
1660 	u32 flex_ptype, dtype_cmd;
1661 	u16 i;
1662 
1663 	/* make sure ATR is enabled */
1664 	if (!(pf->flags & I40E_FLAG_FD_ATR_ENABLED))
1665 		return;
1666 
1667 	/* if sampling is disabled do nothing */
1668 	if (!tx_ring->atr_sample_rate)
1669 		return;
1670 
1671 	/* snag network header to get L4 type and address */
1672 	hdr.network = skb_network_header(skb);
1673 
1674 	/* Currently only IPv4/IPv6 with TCP is supported */
1675 	if (protocol == htons(ETH_P_IP)) {
1676 		if (hdr.ipv4->protocol != IPPROTO_TCP)
1677 			return;
1678 
1679 		/* access ihl as a u8 to avoid unaligned access on ia64 */
1680 		hlen = (hdr.network[0] & 0x0F) << 2;
1681 	} else if (protocol == htons(ETH_P_IPV6)) {
1682 		if (hdr.ipv6->nexthdr != IPPROTO_TCP)
1683 			return;
1684 
1685 		hlen = sizeof(struct ipv6hdr);
1686 	} else {
1687 		return;
1688 	}
1689 
1690 	th = (struct tcphdr *)(hdr.network + hlen);
1691 
1692 	/* Due to lack of space, no more new filters can be programmed */
1693 	if (th->syn && (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED))
1694 		return;
1695 
1696 	tx_ring->atr_count++;
1697 
1698 	/* sample on all syn/fin/rst packets or once every atr sample rate */
1699 	if (!th->fin &&
1700 	    !th->syn &&
1701 	    !th->rst &&
1702 	    (tx_ring->atr_count < tx_ring->atr_sample_rate))
1703 		return;
1704 
1705 	tx_ring->atr_count = 0;
1706 
1707 	/* grab the next descriptor */
1708 	i = tx_ring->next_to_use;
1709 	fdir_desc = I40E_TX_FDIRDESC(tx_ring, i);
1710 
1711 	i++;
1712 	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
1713 
1714 	flex_ptype = (tx_ring->queue_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) &
1715 		      I40E_TXD_FLTR_QW0_QINDEX_MASK;
1716 	flex_ptype |= (protocol == htons(ETH_P_IP)) ?
1717 		      (I40E_FILTER_PCTYPE_NONF_IPV4_TCP <<
1718 		       I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) :
1719 		      (I40E_FILTER_PCTYPE_NONF_IPV6_TCP <<
1720 		       I40E_TXD_FLTR_QW0_PCTYPE_SHIFT);
1721 
1722 	flex_ptype |= tx_ring->vsi->id << I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT;
1723 
1724 	dtype_cmd = I40E_TX_DESC_DTYPE_FILTER_PROG;
1725 
1726 	dtype_cmd |= (th->fin || th->rst) ?
1727 		     (I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE <<
1728 		      I40E_TXD_FLTR_QW1_PCMD_SHIFT) :
1729 		     (I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE <<
1730 		      I40E_TXD_FLTR_QW1_PCMD_SHIFT);
1731 
1732 	dtype_cmd |= I40E_FILTER_PROGRAM_DESC_DEST_DIRECT_PACKET_QINDEX <<
1733 		     I40E_TXD_FLTR_QW1_DEST_SHIFT;
1734 
1735 	dtype_cmd |= I40E_FILTER_PROGRAM_DESC_FD_STATUS_FD_ID <<
1736 		     I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT;
1737 
1738 	dtype_cmd |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK;
1739 	dtype_cmd |=
1740 		((u32)pf->fd_atr_cnt_idx << I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
1741 		I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
1742 
1743 	fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype);
1744 	fdir_desc->rsvd = cpu_to_le32(0);
1745 	fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dtype_cmd);
1746 	fdir_desc->fd_id = cpu_to_le32(0);
1747 }
1748 
1749 /**
1750  * i40e_tx_prepare_vlan_flags - prepare generic TX VLAN tagging flags for HW
1751  * @skb:     send buffer
1752  * @tx_ring: ring to send buffer on
1753  * @flags:   the tx flags to be set
1754  *
1755  * Checks the skb and set up correspondingly several generic transmit flags
1756  * related to VLAN tagging for the HW, such as VLAN, DCB, etc.
1757  *
1758  * Returns error code indicate the frame should be dropped upon error and the
1759  * otherwise  returns 0 to indicate the flags has been set properly.
1760  **/
1761 #ifdef I40E_FCOE
1762 int i40e_tx_prepare_vlan_flags(struct sk_buff *skb,
1763 			       struct i40e_ring *tx_ring,
1764 			       u32 *flags)
1765 #else
1766 static int i40e_tx_prepare_vlan_flags(struct sk_buff *skb,
1767 				      struct i40e_ring *tx_ring,
1768 				      u32 *flags)
1769 #endif
1770 {
1771 	__be16 protocol = skb->protocol;
1772 	u32  tx_flags = 0;
1773 
1774 	/* if we have a HW VLAN tag being added, default to the HW one */
1775 	if (vlan_tx_tag_present(skb)) {
1776 		tx_flags |= vlan_tx_tag_get(skb) << I40E_TX_FLAGS_VLAN_SHIFT;
1777 		tx_flags |= I40E_TX_FLAGS_HW_VLAN;
1778 	/* else if it is a SW VLAN, check the next protocol and store the tag */
1779 	} else if (protocol == htons(ETH_P_8021Q)) {
1780 		struct vlan_hdr *vhdr, _vhdr;
1781 		vhdr = skb_header_pointer(skb, ETH_HLEN, sizeof(_vhdr), &_vhdr);
1782 		if (!vhdr)
1783 			return -EINVAL;
1784 
1785 		protocol = vhdr->h_vlan_encapsulated_proto;
1786 		tx_flags |= ntohs(vhdr->h_vlan_TCI) << I40E_TX_FLAGS_VLAN_SHIFT;
1787 		tx_flags |= I40E_TX_FLAGS_SW_VLAN;
1788 	}
1789 
1790 	/* Insert 802.1p priority into VLAN header */
1791 	if ((tx_flags & (I40E_TX_FLAGS_HW_VLAN | I40E_TX_FLAGS_SW_VLAN)) ||
1792 	    (skb->priority != TC_PRIO_CONTROL)) {
1793 		tx_flags &= ~I40E_TX_FLAGS_VLAN_PRIO_MASK;
1794 		tx_flags |= (skb->priority & 0x7) <<
1795 				I40E_TX_FLAGS_VLAN_PRIO_SHIFT;
1796 		if (tx_flags & I40E_TX_FLAGS_SW_VLAN) {
1797 			struct vlan_ethhdr *vhdr;
1798 			int rc;
1799 
1800 			rc = skb_cow_head(skb, 0);
1801 			if (rc < 0)
1802 				return rc;
1803 			vhdr = (struct vlan_ethhdr *)skb->data;
1804 			vhdr->h_vlan_TCI = htons(tx_flags >>
1805 						 I40E_TX_FLAGS_VLAN_SHIFT);
1806 		} else {
1807 			tx_flags |= I40E_TX_FLAGS_HW_VLAN;
1808 		}
1809 	}
1810 	*flags = tx_flags;
1811 	return 0;
1812 }
1813 
1814 /**
1815  * i40e_tso - set up the tso context descriptor
1816  * @tx_ring:  ptr to the ring to send
1817  * @skb:      ptr to the skb we're sending
1818  * @tx_flags: the collected send information
1819  * @protocol: the send protocol
1820  * @hdr_len:  ptr to the size of the packet header
1821  * @cd_tunneling: ptr to context descriptor bits
1822  *
1823  * Returns 0 if no TSO can happen, 1 if tso is going, or error
1824  **/
1825 static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb,
1826 		    u32 tx_flags, __be16 protocol, u8 *hdr_len,
1827 		    u64 *cd_type_cmd_tso_mss, u32 *cd_tunneling)
1828 {
1829 	u32 cd_cmd, cd_tso_len, cd_mss;
1830 	struct ipv6hdr *ipv6h;
1831 	struct tcphdr *tcph;
1832 	struct iphdr *iph;
1833 	u32 l4len;
1834 	int err;
1835 
1836 	if (!skb_is_gso(skb))
1837 		return 0;
1838 
1839 	err = skb_cow_head(skb, 0);
1840 	if (err < 0)
1841 		return err;
1842 
1843 	if (protocol == htons(ETH_P_IP)) {
1844 		iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb);
1845 		tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb);
1846 		iph->tot_len = 0;
1847 		iph->check = 0;
1848 		tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
1849 						 0, IPPROTO_TCP, 0);
1850 	} else if (skb_is_gso_v6(skb)) {
1851 
1852 		ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb)
1853 					   : ipv6_hdr(skb);
1854 		tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb);
1855 		ipv6h->payload_len = 0;
1856 		tcph->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
1857 					       0, IPPROTO_TCP, 0);
1858 	}
1859 
1860 	l4len = skb->encapsulation ? inner_tcp_hdrlen(skb) : tcp_hdrlen(skb);
1861 	*hdr_len = (skb->encapsulation
1862 		    ? (skb_inner_transport_header(skb) - skb->data)
1863 		    : skb_transport_offset(skb)) + l4len;
1864 
1865 	/* find the field values */
1866 	cd_cmd = I40E_TX_CTX_DESC_TSO;
1867 	cd_tso_len = skb->len - *hdr_len;
1868 	cd_mss = skb_shinfo(skb)->gso_size;
1869 	*cd_type_cmd_tso_mss |= ((u64)cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
1870 				((u64)cd_tso_len <<
1871 				 I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
1872 				((u64)cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
1873 	return 1;
1874 }
1875 
1876 /**
1877  * i40e_tsyn - set up the tsyn context descriptor
1878  * @tx_ring:  ptr to the ring to send
1879  * @skb:      ptr to the skb we're sending
1880  * @tx_flags: the collected send information
1881  *
1882  * Returns 0 if no Tx timestamp can happen and 1 if the timestamp will happen
1883  **/
1884 static int i40e_tsyn(struct i40e_ring *tx_ring, struct sk_buff *skb,
1885 		     u32 tx_flags, u64 *cd_type_cmd_tso_mss)
1886 {
1887 	struct i40e_pf *pf;
1888 
1889 	if (likely(!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)))
1890 		return 0;
1891 
1892 	/* Tx timestamps cannot be sampled when doing TSO */
1893 	if (tx_flags & I40E_TX_FLAGS_TSO)
1894 		return 0;
1895 
1896 	/* only timestamp the outbound packet if the user has requested it and
1897 	 * we are not already transmitting a packet to be timestamped
1898 	 */
1899 	pf = i40e_netdev_to_pf(tx_ring->netdev);
1900 	if (pf->ptp_tx &&
1901 	    !test_and_set_bit_lock(__I40E_PTP_TX_IN_PROGRESS, &pf->state)) {
1902 		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
1903 		pf->ptp_tx_skb = skb_get(skb);
1904 	} else {
1905 		return 0;
1906 	}
1907 
1908 	*cd_type_cmd_tso_mss |= (u64)I40E_TX_CTX_DESC_TSYN <<
1909 				I40E_TXD_CTX_QW1_CMD_SHIFT;
1910 
1911 	return 1;
1912 }
1913 
1914 /**
1915  * i40e_tx_enable_csum - Enable Tx checksum offloads
1916  * @skb: send buffer
1917  * @tx_flags: Tx flags currently set
1918  * @td_cmd: Tx descriptor command bits to set
1919  * @td_offset: Tx descriptor header offsets to set
1920  * @cd_tunneling: ptr to context desc bits
1921  **/
1922 static void i40e_tx_enable_csum(struct sk_buff *skb, u32 tx_flags,
1923 				u32 *td_cmd, u32 *td_offset,
1924 				struct i40e_ring *tx_ring,
1925 				u32 *cd_tunneling)
1926 {
1927 	struct ipv6hdr *this_ipv6_hdr;
1928 	unsigned int this_tcp_hdrlen;
1929 	struct iphdr *this_ip_hdr;
1930 	u32 network_hdr_len;
1931 	u8 l4_hdr = 0;
1932 
1933 	if (skb->encapsulation) {
1934 		network_hdr_len = skb_inner_network_header_len(skb);
1935 		this_ip_hdr = inner_ip_hdr(skb);
1936 		this_ipv6_hdr = inner_ipv6_hdr(skb);
1937 		this_tcp_hdrlen = inner_tcp_hdrlen(skb);
1938 
1939 		if (tx_flags & I40E_TX_FLAGS_IPV4) {
1940 
1941 			if (tx_flags & I40E_TX_FLAGS_TSO) {
1942 				*cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV4;
1943 				ip_hdr(skb)->check = 0;
1944 			} else {
1945 				*cd_tunneling |=
1946 					 I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM;
1947 			}
1948 		} else if (tx_flags & I40E_TX_FLAGS_IPV6) {
1949 			if (tx_flags & I40E_TX_FLAGS_TSO) {
1950 				*cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6;
1951 				ip_hdr(skb)->check = 0;
1952 			} else {
1953 				*cd_tunneling |=
1954 					 I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM;
1955 			}
1956 		}
1957 
1958 		/* Now set the ctx descriptor fields */
1959 		*cd_tunneling |= (skb_network_header_len(skb) >> 2) <<
1960 					I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT |
1961 				   I40E_TXD_CTX_UDP_TUNNELING            |
1962 				   ((skb_inner_network_offset(skb) -
1963 					skb_transport_offset(skb)) >> 1) <<
1964 				   I40E_TXD_CTX_QW0_NATLEN_SHIFT;
1965 
1966 	} else {
1967 		network_hdr_len = skb_network_header_len(skb);
1968 		this_ip_hdr = ip_hdr(skb);
1969 		this_ipv6_hdr = ipv6_hdr(skb);
1970 		this_tcp_hdrlen = tcp_hdrlen(skb);
1971 	}
1972 
1973 	/* Enable IP checksum offloads */
1974 	if (tx_flags & I40E_TX_FLAGS_IPV4) {
1975 		l4_hdr = this_ip_hdr->protocol;
1976 		/* the stack computes the IP header already, the only time we
1977 		 * need the hardware to recompute it is in the case of TSO.
1978 		 */
1979 		if (tx_flags & I40E_TX_FLAGS_TSO) {
1980 			*td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
1981 			this_ip_hdr->check = 0;
1982 		} else {
1983 			*td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4;
1984 		}
1985 		/* Now set the td_offset for IP header length */
1986 		*td_offset = (network_hdr_len >> 2) <<
1987 			      I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
1988 	} else if (tx_flags & I40E_TX_FLAGS_IPV6) {
1989 		l4_hdr = this_ipv6_hdr->nexthdr;
1990 		*td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
1991 		/* Now set the td_offset for IP header length */
1992 		*td_offset = (network_hdr_len >> 2) <<
1993 			      I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
1994 	}
1995 	/* words in MACLEN + dwords in IPLEN + dwords in L4Len */
1996 	*td_offset |= (skb_network_offset(skb) >> 1) <<
1997 		       I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
1998 
1999 	/* Enable L4 checksum offloads */
2000 	switch (l4_hdr) {
2001 	case IPPROTO_TCP:
2002 		/* enable checksum offloads */
2003 		*td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
2004 		*td_offset |= (this_tcp_hdrlen >> 2) <<
2005 			       I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
2006 		break;
2007 	case IPPROTO_SCTP:
2008 		/* enable SCTP checksum offload */
2009 		*td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
2010 		*td_offset |= (sizeof(struct sctphdr) >> 2) <<
2011 			       I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
2012 		break;
2013 	case IPPROTO_UDP:
2014 		/* enable UDP checksum offload */
2015 		*td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
2016 		*td_offset |= (sizeof(struct udphdr) >> 2) <<
2017 			       I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
2018 		break;
2019 	default:
2020 		break;
2021 	}
2022 }
2023 
2024 /**
2025  * i40e_create_tx_ctx Build the Tx context descriptor
2026  * @tx_ring:  ring to create the descriptor on
2027  * @cd_type_cmd_tso_mss: Quad Word 1
2028  * @cd_tunneling: Quad Word 0 - bits 0-31
2029  * @cd_l2tag2: Quad Word 0 - bits 32-63
2030  **/
2031 static void i40e_create_tx_ctx(struct i40e_ring *tx_ring,
2032 			       const u64 cd_type_cmd_tso_mss,
2033 			       const u32 cd_tunneling, const u32 cd_l2tag2)
2034 {
2035 	struct i40e_tx_context_desc *context_desc;
2036 	int i = tx_ring->next_to_use;
2037 
2038 	if ((cd_type_cmd_tso_mss == I40E_TX_DESC_DTYPE_CONTEXT) &&
2039 	    !cd_tunneling && !cd_l2tag2)
2040 		return;
2041 
2042 	/* grab the next descriptor */
2043 	context_desc = I40E_TX_CTXTDESC(tx_ring, i);
2044 
2045 	i++;
2046 	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
2047 
2048 	/* cpu_to_le32 and assign to struct fields */
2049 	context_desc->tunneling_params = cpu_to_le32(cd_tunneling);
2050 	context_desc->l2tag2 = cpu_to_le16(cd_l2tag2);
2051 	context_desc->rsvd = cpu_to_le16(0);
2052 	context_desc->type_cmd_tso_mss = cpu_to_le64(cd_type_cmd_tso_mss);
2053 }
2054 
2055 /**
2056  * __i40e_maybe_stop_tx - 2nd level check for tx stop conditions
2057  * @tx_ring: the ring to be checked
2058  * @size:    the size buffer we want to assure is available
2059  *
2060  * Returns -EBUSY if a stop is needed, else 0
2061  **/
2062 static inline int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
2063 {
2064 	netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
2065 	/* Memory barrier before checking head and tail */
2066 	smp_mb();
2067 
2068 	/* Check again in a case another CPU has just made room available. */
2069 	if (likely(I40E_DESC_UNUSED(tx_ring) < size))
2070 		return -EBUSY;
2071 
2072 	/* A reprieve! - use start_queue because it doesn't call schedule */
2073 	netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index);
2074 	++tx_ring->tx_stats.restart_queue;
2075 	return 0;
2076 }
2077 
2078 /**
2079  * i40e_maybe_stop_tx - 1st level check for tx stop conditions
2080  * @tx_ring: the ring to be checked
2081  * @size:    the size buffer we want to assure is available
2082  *
2083  * Returns 0 if stop is not needed
2084  **/
2085 #ifdef I40E_FCOE
2086 int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
2087 #else
2088 static int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
2089 #endif
2090 {
2091 	if (likely(I40E_DESC_UNUSED(tx_ring) >= size))
2092 		return 0;
2093 	return __i40e_maybe_stop_tx(tx_ring, size);
2094 }
2095 
2096 /**
2097  * i40e_tx_map - Build the Tx descriptor
2098  * @tx_ring:  ring to send buffer on
2099  * @skb:      send buffer
2100  * @first:    first buffer info buffer to use
2101  * @tx_flags: collected send information
2102  * @hdr_len:  size of the packet header
2103  * @td_cmd:   the command field in the descriptor
2104  * @td_offset: offset for checksum or crc
2105  **/
2106 #ifdef I40E_FCOE
2107 void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
2108 		 struct i40e_tx_buffer *first, u32 tx_flags,
2109 		 const u8 hdr_len, u32 td_cmd, u32 td_offset)
2110 #else
2111 static void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
2112 			struct i40e_tx_buffer *first, u32 tx_flags,
2113 			const u8 hdr_len, u32 td_cmd, u32 td_offset)
2114 #endif
2115 {
2116 	unsigned int data_len = skb->data_len;
2117 	unsigned int size = skb_headlen(skb);
2118 	struct skb_frag_struct *frag;
2119 	struct i40e_tx_buffer *tx_bi;
2120 	struct i40e_tx_desc *tx_desc;
2121 	u16 i = tx_ring->next_to_use;
2122 	u32 td_tag = 0;
2123 	dma_addr_t dma;
2124 	u16 gso_segs;
2125 
2126 	if (tx_flags & I40E_TX_FLAGS_HW_VLAN) {
2127 		td_cmd |= I40E_TX_DESC_CMD_IL2TAG1;
2128 		td_tag = (tx_flags & I40E_TX_FLAGS_VLAN_MASK) >>
2129 			 I40E_TX_FLAGS_VLAN_SHIFT;
2130 	}
2131 
2132 	if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO))
2133 		gso_segs = skb_shinfo(skb)->gso_segs;
2134 	else
2135 		gso_segs = 1;
2136 
2137 	/* multiply data chunks by size of headers */
2138 	first->bytecount = skb->len - hdr_len + (gso_segs * hdr_len);
2139 	first->gso_segs = gso_segs;
2140 	first->skb = skb;
2141 	first->tx_flags = tx_flags;
2142 
2143 	dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
2144 
2145 	tx_desc = I40E_TX_DESC(tx_ring, i);
2146 	tx_bi = first;
2147 
2148 	for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
2149 		if (dma_mapping_error(tx_ring->dev, dma))
2150 			goto dma_error;
2151 
2152 		/* record length, and DMA address */
2153 		dma_unmap_len_set(tx_bi, len, size);
2154 		dma_unmap_addr_set(tx_bi, dma, dma);
2155 
2156 		tx_desc->buffer_addr = cpu_to_le64(dma);
2157 
2158 		while (unlikely(size > I40E_MAX_DATA_PER_TXD)) {
2159 			tx_desc->cmd_type_offset_bsz =
2160 				build_ctob(td_cmd, td_offset,
2161 					   I40E_MAX_DATA_PER_TXD, td_tag);
2162 
2163 			tx_desc++;
2164 			i++;
2165 			if (i == tx_ring->count) {
2166 				tx_desc = I40E_TX_DESC(tx_ring, 0);
2167 				i = 0;
2168 			}
2169 
2170 			dma += I40E_MAX_DATA_PER_TXD;
2171 			size -= I40E_MAX_DATA_PER_TXD;
2172 
2173 			tx_desc->buffer_addr = cpu_to_le64(dma);
2174 		}
2175 
2176 		if (likely(!data_len))
2177 			break;
2178 
2179 		tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset,
2180 							  size, td_tag);
2181 
2182 		tx_desc++;
2183 		i++;
2184 		if (i == tx_ring->count) {
2185 			tx_desc = I40E_TX_DESC(tx_ring, 0);
2186 			i = 0;
2187 		}
2188 
2189 		size = skb_frag_size(frag);
2190 		data_len -= size;
2191 
2192 		dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size,
2193 				       DMA_TO_DEVICE);
2194 
2195 		tx_bi = &tx_ring->tx_bi[i];
2196 	}
2197 
2198 	/* Place RS bit on last descriptor of any packet that spans across the
2199 	 * 4th descriptor (WB_STRIDE aka 0x3) in a 64B cacheline.
2200 	 */
2201 #define WB_STRIDE 0x3
2202 	if (((i & WB_STRIDE) != WB_STRIDE) &&
2203 	    (first <= &tx_ring->tx_bi[i]) &&
2204 	    (first >= &tx_ring->tx_bi[i & ~WB_STRIDE])) {
2205 		tx_desc->cmd_type_offset_bsz =
2206 			build_ctob(td_cmd, td_offset, size, td_tag) |
2207 			cpu_to_le64((u64)I40E_TX_DESC_CMD_EOP <<
2208 					 I40E_TXD_QW1_CMD_SHIFT);
2209 	} else {
2210 		tx_desc->cmd_type_offset_bsz =
2211 			build_ctob(td_cmd, td_offset, size, td_tag) |
2212 			cpu_to_le64((u64)I40E_TXD_CMD <<
2213 					 I40E_TXD_QW1_CMD_SHIFT);
2214 	}
2215 
2216 	netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev,
2217 						 tx_ring->queue_index),
2218 			     first->bytecount);
2219 
2220 	/* set the timestamp */
2221 	first->time_stamp = jiffies;
2222 
2223 	/* Force memory writes to complete before letting h/w
2224 	 * know there are new descriptors to fetch.  (Only
2225 	 * applicable for weak-ordered memory model archs,
2226 	 * such as IA-64).
2227 	 */
2228 	wmb();
2229 
2230 	/* set next_to_watch value indicating a packet is present */
2231 	first->next_to_watch = tx_desc;
2232 
2233 	i++;
2234 	if (i == tx_ring->count)
2235 		i = 0;
2236 
2237 	tx_ring->next_to_use = i;
2238 
2239 	i40e_maybe_stop_tx(tx_ring, DESC_NEEDED);
2240 	/* notify HW of packet */
2241 	if (!skb->xmit_more ||
2242 	    netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev,
2243 						   tx_ring->queue_index)))
2244 		writel(i, tx_ring->tail);
2245 
2246 	return;
2247 
2248 dma_error:
2249 	dev_info(tx_ring->dev, "TX DMA map failed\n");
2250 
2251 	/* clear dma mappings for failed tx_bi map */
2252 	for (;;) {
2253 		tx_bi = &tx_ring->tx_bi[i];
2254 		i40e_unmap_and_free_tx_resource(tx_ring, tx_bi);
2255 		if (tx_bi == first)
2256 			break;
2257 		if (i == 0)
2258 			i = tx_ring->count;
2259 		i--;
2260 	}
2261 
2262 	tx_ring->next_to_use = i;
2263 }
2264 
2265 /**
2266  * i40e_xmit_descriptor_count - calculate number of tx descriptors needed
2267  * @skb:     send buffer
2268  * @tx_ring: ring to send buffer on
2269  *
2270  * Returns number of data descriptors needed for this skb. Returns 0 to indicate
2271  * there is not enough descriptors available in this ring since we need at least
2272  * one descriptor.
2273  **/
2274 #ifdef I40E_FCOE
2275 int i40e_xmit_descriptor_count(struct sk_buff *skb,
2276 			       struct i40e_ring *tx_ring)
2277 #else
2278 static int i40e_xmit_descriptor_count(struct sk_buff *skb,
2279 				      struct i40e_ring *tx_ring)
2280 #endif
2281 {
2282 	unsigned int f;
2283 	int count = 0;
2284 
2285 	/* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD,
2286 	 *       + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD,
2287 	 *       + 4 desc gap to avoid the cache line where head is,
2288 	 *       + 1 desc for context descriptor,
2289 	 * otherwise try next time
2290 	 */
2291 	for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
2292 		count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size);
2293 
2294 	count += TXD_USE_COUNT(skb_headlen(skb));
2295 	if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) {
2296 		tx_ring->tx_stats.tx_busy++;
2297 		return 0;
2298 	}
2299 	return count;
2300 }
2301 
2302 /**
2303  * i40e_xmit_frame_ring - Sends buffer on Tx ring
2304  * @skb:     send buffer
2305  * @tx_ring: ring to send buffer on
2306  *
2307  * Returns NETDEV_TX_OK if sent, else an error code
2308  **/
2309 static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
2310 					struct i40e_ring *tx_ring)
2311 {
2312 	u64 cd_type_cmd_tso_mss = I40E_TX_DESC_DTYPE_CONTEXT;
2313 	u32 cd_tunneling = 0, cd_l2tag2 = 0;
2314 	struct i40e_tx_buffer *first;
2315 	u32 td_offset = 0;
2316 	u32 tx_flags = 0;
2317 	__be16 protocol;
2318 	u32 td_cmd = 0;
2319 	u8 hdr_len = 0;
2320 	int tsyn;
2321 	int tso;
2322 	if (0 == i40e_xmit_descriptor_count(skb, tx_ring))
2323 		return NETDEV_TX_BUSY;
2324 
2325 	/* prepare the xmit flags */
2326 	if (i40e_tx_prepare_vlan_flags(skb, tx_ring, &tx_flags))
2327 		goto out_drop;
2328 
2329 	/* obtain protocol of skb */
2330 	protocol = vlan_get_protocol(skb);
2331 
2332 	/* record the location of the first descriptor for this packet */
2333 	first = &tx_ring->tx_bi[tx_ring->next_to_use];
2334 
2335 	/* setup IPv4/IPv6 offloads */
2336 	if (protocol == htons(ETH_P_IP))
2337 		tx_flags |= I40E_TX_FLAGS_IPV4;
2338 	else if (protocol == htons(ETH_P_IPV6))
2339 		tx_flags |= I40E_TX_FLAGS_IPV6;
2340 
2341 	tso = i40e_tso(tx_ring, skb, tx_flags, protocol, &hdr_len,
2342 		       &cd_type_cmd_tso_mss, &cd_tunneling);
2343 
2344 	if (tso < 0)
2345 		goto out_drop;
2346 	else if (tso)
2347 		tx_flags |= I40E_TX_FLAGS_TSO;
2348 
2349 	tsyn = i40e_tsyn(tx_ring, skb, tx_flags, &cd_type_cmd_tso_mss);
2350 
2351 	if (tsyn)
2352 		tx_flags |= I40E_TX_FLAGS_TSYN;
2353 
2354 	skb_tx_timestamp(skb);
2355 
2356 	/* always enable CRC insertion offload */
2357 	td_cmd |= I40E_TX_DESC_CMD_ICRC;
2358 
2359 	/* Always offload the checksum, since it's in the data descriptor */
2360 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
2361 		tx_flags |= I40E_TX_FLAGS_CSUM;
2362 
2363 		i40e_tx_enable_csum(skb, tx_flags, &td_cmd, &td_offset,
2364 				    tx_ring, &cd_tunneling);
2365 	}
2366 
2367 	i40e_create_tx_ctx(tx_ring, cd_type_cmd_tso_mss,
2368 			   cd_tunneling, cd_l2tag2);
2369 
2370 	/* Add Flow Director ATR if it's enabled.
2371 	 *
2372 	 * NOTE: this must always be directly before the data descriptor.
2373 	 */
2374 	i40e_atr(tx_ring, skb, tx_flags, protocol);
2375 
2376 	i40e_tx_map(tx_ring, skb, first, tx_flags, hdr_len,
2377 		    td_cmd, td_offset);
2378 
2379 	return NETDEV_TX_OK;
2380 
2381 out_drop:
2382 	dev_kfree_skb_any(skb);
2383 	return NETDEV_TX_OK;
2384 }
2385 
2386 /**
2387  * i40e_lan_xmit_frame - Selects the correct VSI and Tx queue to send buffer
2388  * @skb:    send buffer
2389  * @netdev: network interface device structure
2390  *
2391  * Returns NETDEV_TX_OK if sent, else an error code
2392  **/
2393 netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
2394 {
2395 	struct i40e_netdev_priv *np = netdev_priv(netdev);
2396 	struct i40e_vsi *vsi = np->vsi;
2397 	struct i40e_ring *tx_ring = vsi->tx_rings[skb->queue_mapping];
2398 
2399 	/* hardware can't handle really short frames, hardware padding works
2400 	 * beyond this point
2401 	 */
2402 	if (skb_put_padto(skb, I40E_MIN_TX_LEN))
2403 		return NETDEV_TX_OK;
2404 
2405 	return i40e_xmit_frame_ring(skb, tx_ring);
2406 }
2407