xref: /openbmc/linux/drivers/net/ethernet/intel/igc/igc_main.c (revision f8a11425075ff11b4b5784f077cb84f3d2dfb3f0)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c)  2018 Intel Corporation */
3 
4 #include <linux/module.h>
5 #include <linux/types.h>
6 #include <linux/if_vlan.h>
7 #include <linux/aer.h>
8 #include <linux/tcp.h>
9 #include <linux/udp.h>
10 #include <linux/ip.h>
11 #include <linux/pm_runtime.h>
12 #include <net/pkt_sched.h>
13 #include <linux/bpf_trace.h>
14 #include <net/xdp_sock_drv.h>
15 #include <net/ipv6.h>
16 
17 #include "igc.h"
18 #include "igc_hw.h"
19 #include "igc_tsn.h"
20 #include "igc_xdp.h"
21 
22 #define DRV_SUMMARY	"Intel(R) 2.5G Ethernet Linux Driver"
23 
24 #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK)
25 
26 #define IGC_XDP_PASS		0
27 #define IGC_XDP_CONSUMED	BIT(0)
28 #define IGC_XDP_TX		BIT(1)
29 #define IGC_XDP_REDIRECT	BIT(2)
30 
31 static int debug = -1;
32 
33 MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
34 MODULE_DESCRIPTION(DRV_SUMMARY);
35 MODULE_LICENSE("GPL v2");
36 module_param(debug, int, 0);
37 MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
38 
39 char igc_driver_name[] = "igc";
40 static const char igc_driver_string[] = DRV_SUMMARY;
41 static const char igc_copyright[] =
42 	"Copyright(c) 2018 Intel Corporation.";
43 
44 static const struct igc_info *igc_info_tbl[] = {
45 	[board_base] = &igc_base_info,
46 };
47 
48 static const struct pci_device_id igc_pci_tbl[] = {
49 	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_LM), board_base },
50 	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_V), board_base },
51 	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_I), board_base },
52 	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I220_V), board_base },
53 	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_K), board_base },
54 	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_K2), board_base },
55 	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_K), board_base },
56 	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_LMVP), board_base },
57 	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_IT), board_base },
58 	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_LM), board_base },
59 	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_V), board_base },
60 	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_IT), board_base },
61 	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I221_V), board_base },
62 	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_BLANK_NVM), board_base },
63 	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_BLANK_NVM), board_base },
64 	/* required last entry */
65 	{0, }
66 };
67 
68 MODULE_DEVICE_TABLE(pci, igc_pci_tbl);
69 
70 enum latency_range {
71 	lowest_latency = 0,
72 	low_latency = 1,
73 	bulk_latency = 2,
74 	latency_invalid = 255
75 };
76 
77 void igc_reset(struct igc_adapter *adapter)
78 {
79 	struct net_device *dev = adapter->netdev;
80 	struct igc_hw *hw = &adapter->hw;
81 	struct igc_fc_info *fc = &hw->fc;
82 	u32 pba, hwm;
83 
84 	/* Repartition PBA for greater than 9k MTU if required */
85 	pba = IGC_PBA_34K;
86 
87 	/* flow control settings
88 	 * The high water mark must be low enough to fit one full frame
89 	 * after transmitting the pause frame.  As such we must have enough
90 	 * space to allow for us to complete our current transmit and then
91 	 * receive the frame that is in progress from the link partner.
92 	 * Set it to:
93 	 * - the full Rx FIFO size minus one full Tx plus one full Rx frame
94 	 */
95 	hwm = (pba << 10) - (adapter->max_frame_size + MAX_JUMBO_FRAME_SIZE);
96 
97 	fc->high_water = hwm & 0xFFFFFFF0;	/* 16-byte granularity */
98 	fc->low_water = fc->high_water - 16;
99 	fc->pause_time = 0xFFFF;
100 	fc->send_xon = 1;
101 	fc->current_mode = fc->requested_mode;
102 
103 	hw->mac.ops.reset_hw(hw);
104 
105 	if (hw->mac.ops.init_hw(hw))
106 		netdev_err(dev, "Error on hardware initialization\n");
107 
108 	/* Re-establish EEE setting */
109 	igc_set_eee_i225(hw, true, true, true);
110 
111 	if (!netif_running(adapter->netdev))
112 		igc_power_down_phy_copper_base(&adapter->hw);
113 
114 	/* Enable HW to recognize an 802.1Q VLAN Ethernet packet */
115 	wr32(IGC_VET, ETH_P_8021Q);
116 
117 	/* Re-enable PTP, where applicable. */
118 	igc_ptp_reset(adapter);
119 
120 	/* Re-enable TSN offloading, where applicable. */
121 	igc_tsn_offload_apply(adapter);
122 
123 	igc_get_phy_info(hw);
124 }
125 
126 /**
127  * igc_power_up_link - Power up the phy link
128  * @adapter: address of board private structure
129  */
130 static void igc_power_up_link(struct igc_adapter *adapter)
131 {
132 	igc_reset_phy(&adapter->hw);
133 
134 	igc_power_up_phy_copper(&adapter->hw);
135 
136 	igc_setup_link(&adapter->hw);
137 }
138 
139 /**
140  * igc_release_hw_control - release control of the h/w to f/w
141  * @adapter: address of board private structure
142  *
143  * igc_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
144  * For ASF and Pass Through versions of f/w this means that the
145  * driver is no longer loaded.
146  */
147 static void igc_release_hw_control(struct igc_adapter *adapter)
148 {
149 	struct igc_hw *hw = &adapter->hw;
150 	u32 ctrl_ext;
151 
152 	/* Let firmware take over control of h/w */
153 	ctrl_ext = rd32(IGC_CTRL_EXT);
154 	wr32(IGC_CTRL_EXT,
155 	     ctrl_ext & ~IGC_CTRL_EXT_DRV_LOAD);
156 }
157 
158 /**
159  * igc_get_hw_control - get control of the h/w from f/w
160  * @adapter: address of board private structure
161  *
162  * igc_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
163  * For ASF and Pass Through versions of f/w this means that
164  * the driver is loaded.
165  */
166 static void igc_get_hw_control(struct igc_adapter *adapter)
167 {
168 	struct igc_hw *hw = &adapter->hw;
169 	u32 ctrl_ext;
170 
171 	/* Let firmware know the driver has taken over */
172 	ctrl_ext = rd32(IGC_CTRL_EXT);
173 	wr32(IGC_CTRL_EXT,
174 	     ctrl_ext | IGC_CTRL_EXT_DRV_LOAD);
175 }
176 
177 static void igc_unmap_tx_buffer(struct device *dev, struct igc_tx_buffer *buf)
178 {
179 	dma_unmap_single(dev, dma_unmap_addr(buf, dma),
180 			 dma_unmap_len(buf, len), DMA_TO_DEVICE);
181 
182 	dma_unmap_len_set(buf, len, 0);
183 }
184 
185 /**
186  * igc_clean_tx_ring - Free Tx Buffers
187  * @tx_ring: ring to be cleaned
188  */
189 static void igc_clean_tx_ring(struct igc_ring *tx_ring)
190 {
191 	u16 i = tx_ring->next_to_clean;
192 	struct igc_tx_buffer *tx_buffer = &tx_ring->tx_buffer_info[i];
193 	u32 xsk_frames = 0;
194 
195 	while (i != tx_ring->next_to_use) {
196 		union igc_adv_tx_desc *eop_desc, *tx_desc;
197 
198 		switch (tx_buffer->type) {
199 		case IGC_TX_BUFFER_TYPE_XSK:
200 			xsk_frames++;
201 			break;
202 		case IGC_TX_BUFFER_TYPE_XDP:
203 			xdp_return_frame(tx_buffer->xdpf);
204 			igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
205 			break;
206 		case IGC_TX_BUFFER_TYPE_SKB:
207 			dev_kfree_skb_any(tx_buffer->skb);
208 			igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
209 			break;
210 		default:
211 			netdev_warn_once(tx_ring->netdev, "Unknown Tx buffer type\n");
212 			break;
213 		}
214 
215 		/* check for eop_desc to determine the end of the packet */
216 		eop_desc = tx_buffer->next_to_watch;
217 		tx_desc = IGC_TX_DESC(tx_ring, i);
218 
219 		/* unmap remaining buffers */
220 		while (tx_desc != eop_desc) {
221 			tx_buffer++;
222 			tx_desc++;
223 			i++;
224 			if (unlikely(i == tx_ring->count)) {
225 				i = 0;
226 				tx_buffer = tx_ring->tx_buffer_info;
227 				tx_desc = IGC_TX_DESC(tx_ring, 0);
228 			}
229 
230 			/* unmap any remaining paged data */
231 			if (dma_unmap_len(tx_buffer, len))
232 				igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
233 		}
234 
235 		/* move us one more past the eop_desc for start of next pkt */
236 		tx_buffer++;
237 		i++;
238 		if (unlikely(i == tx_ring->count)) {
239 			i = 0;
240 			tx_buffer = tx_ring->tx_buffer_info;
241 		}
242 	}
243 
244 	if (tx_ring->xsk_pool && xsk_frames)
245 		xsk_tx_completed(tx_ring->xsk_pool, xsk_frames);
246 
247 	/* reset BQL for queue */
248 	netdev_tx_reset_queue(txring_txq(tx_ring));
249 
250 	/* reset next_to_use and next_to_clean */
251 	tx_ring->next_to_use = 0;
252 	tx_ring->next_to_clean = 0;
253 }
254 
255 /**
256  * igc_free_tx_resources - Free Tx Resources per Queue
257  * @tx_ring: Tx descriptor ring for a specific queue
258  *
259  * Free all transmit software resources
260  */
261 void igc_free_tx_resources(struct igc_ring *tx_ring)
262 {
263 	igc_clean_tx_ring(tx_ring);
264 
265 	vfree(tx_ring->tx_buffer_info);
266 	tx_ring->tx_buffer_info = NULL;
267 
268 	/* if not set, then don't free */
269 	if (!tx_ring->desc)
270 		return;
271 
272 	dma_free_coherent(tx_ring->dev, tx_ring->size,
273 			  tx_ring->desc, tx_ring->dma);
274 
275 	tx_ring->desc = NULL;
276 }
277 
278 /**
279  * igc_free_all_tx_resources - Free Tx Resources for All Queues
280  * @adapter: board private structure
281  *
282  * Free all transmit software resources
283  */
284 static void igc_free_all_tx_resources(struct igc_adapter *adapter)
285 {
286 	int i;
287 
288 	for (i = 0; i < adapter->num_tx_queues; i++)
289 		igc_free_tx_resources(adapter->tx_ring[i]);
290 }
291 
292 /**
293  * igc_clean_all_tx_rings - Free Tx Buffers for all queues
294  * @adapter: board private structure
295  */
296 static void igc_clean_all_tx_rings(struct igc_adapter *adapter)
297 {
298 	int i;
299 
300 	for (i = 0; i < adapter->num_tx_queues; i++)
301 		if (adapter->tx_ring[i])
302 			igc_clean_tx_ring(adapter->tx_ring[i]);
303 }
304 
305 /**
306  * igc_setup_tx_resources - allocate Tx resources (Descriptors)
307  * @tx_ring: tx descriptor ring (for a specific queue) to setup
308  *
309  * Return 0 on success, negative on failure
310  */
311 int igc_setup_tx_resources(struct igc_ring *tx_ring)
312 {
313 	struct net_device *ndev = tx_ring->netdev;
314 	struct device *dev = tx_ring->dev;
315 	int size = 0;
316 
317 	size = sizeof(struct igc_tx_buffer) * tx_ring->count;
318 	tx_ring->tx_buffer_info = vzalloc(size);
319 	if (!tx_ring->tx_buffer_info)
320 		goto err;
321 
322 	/* round up to nearest 4K */
323 	tx_ring->size = tx_ring->count * sizeof(union igc_adv_tx_desc);
324 	tx_ring->size = ALIGN(tx_ring->size, 4096);
325 
326 	tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
327 					   &tx_ring->dma, GFP_KERNEL);
328 
329 	if (!tx_ring->desc)
330 		goto err;
331 
332 	tx_ring->next_to_use = 0;
333 	tx_ring->next_to_clean = 0;
334 
335 	return 0;
336 
337 err:
338 	vfree(tx_ring->tx_buffer_info);
339 	netdev_err(ndev, "Unable to allocate memory for Tx descriptor ring\n");
340 	return -ENOMEM;
341 }
342 
343 /**
344  * igc_setup_all_tx_resources - wrapper to allocate Tx resources for all queues
345  * @adapter: board private structure
346  *
347  * Return 0 on success, negative on failure
348  */
349 static int igc_setup_all_tx_resources(struct igc_adapter *adapter)
350 {
351 	struct net_device *dev = adapter->netdev;
352 	int i, err = 0;
353 
354 	for (i = 0; i < adapter->num_tx_queues; i++) {
355 		err = igc_setup_tx_resources(adapter->tx_ring[i]);
356 		if (err) {
357 			netdev_err(dev, "Error on Tx queue %u setup\n", i);
358 			for (i--; i >= 0; i--)
359 				igc_free_tx_resources(adapter->tx_ring[i]);
360 			break;
361 		}
362 	}
363 
364 	return err;
365 }
366 
367 static void igc_clean_rx_ring_page_shared(struct igc_ring *rx_ring)
368 {
369 	u16 i = rx_ring->next_to_clean;
370 
371 	dev_kfree_skb(rx_ring->skb);
372 	rx_ring->skb = NULL;
373 
374 	/* Free all the Rx ring sk_buffs */
375 	while (i != rx_ring->next_to_alloc) {
376 		struct igc_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
377 
378 		/* Invalidate cache lines that may have been written to by
379 		 * device so that we avoid corrupting memory.
380 		 */
381 		dma_sync_single_range_for_cpu(rx_ring->dev,
382 					      buffer_info->dma,
383 					      buffer_info->page_offset,
384 					      igc_rx_bufsz(rx_ring),
385 					      DMA_FROM_DEVICE);
386 
387 		/* free resources associated with mapping */
388 		dma_unmap_page_attrs(rx_ring->dev,
389 				     buffer_info->dma,
390 				     igc_rx_pg_size(rx_ring),
391 				     DMA_FROM_DEVICE,
392 				     IGC_RX_DMA_ATTR);
393 		__page_frag_cache_drain(buffer_info->page,
394 					buffer_info->pagecnt_bias);
395 
396 		i++;
397 		if (i == rx_ring->count)
398 			i = 0;
399 	}
400 }
401 
402 static void igc_clean_rx_ring_xsk_pool(struct igc_ring *ring)
403 {
404 	struct igc_rx_buffer *bi;
405 	u16 i;
406 
407 	for (i = 0; i < ring->count; i++) {
408 		bi = &ring->rx_buffer_info[i];
409 		if (!bi->xdp)
410 			continue;
411 
412 		xsk_buff_free(bi->xdp);
413 		bi->xdp = NULL;
414 	}
415 }
416 
417 /**
418  * igc_clean_rx_ring - Free Rx Buffers per Queue
419  * @ring: ring to free buffers from
420  */
421 static void igc_clean_rx_ring(struct igc_ring *ring)
422 {
423 	if (ring->xsk_pool)
424 		igc_clean_rx_ring_xsk_pool(ring);
425 	else
426 		igc_clean_rx_ring_page_shared(ring);
427 
428 	clear_ring_uses_large_buffer(ring);
429 
430 	ring->next_to_alloc = 0;
431 	ring->next_to_clean = 0;
432 	ring->next_to_use = 0;
433 }
434 
435 /**
436  * igc_clean_all_rx_rings - Free Rx Buffers for all queues
437  * @adapter: board private structure
438  */
439 static void igc_clean_all_rx_rings(struct igc_adapter *adapter)
440 {
441 	int i;
442 
443 	for (i = 0; i < adapter->num_rx_queues; i++)
444 		if (adapter->rx_ring[i])
445 			igc_clean_rx_ring(adapter->rx_ring[i]);
446 }
447 
448 /**
449  * igc_free_rx_resources - Free Rx Resources
450  * @rx_ring: ring to clean the resources from
451  *
452  * Free all receive software resources
453  */
454 void igc_free_rx_resources(struct igc_ring *rx_ring)
455 {
456 	igc_clean_rx_ring(rx_ring);
457 
458 	xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
459 
460 	vfree(rx_ring->rx_buffer_info);
461 	rx_ring->rx_buffer_info = NULL;
462 
463 	/* if not set, then don't free */
464 	if (!rx_ring->desc)
465 		return;
466 
467 	dma_free_coherent(rx_ring->dev, rx_ring->size,
468 			  rx_ring->desc, rx_ring->dma);
469 
470 	rx_ring->desc = NULL;
471 }
472 
473 /**
474  * igc_free_all_rx_resources - Free Rx Resources for All Queues
475  * @adapter: board private structure
476  *
477  * Free all receive software resources
478  */
479 static void igc_free_all_rx_resources(struct igc_adapter *adapter)
480 {
481 	int i;
482 
483 	for (i = 0; i < adapter->num_rx_queues; i++)
484 		igc_free_rx_resources(adapter->rx_ring[i]);
485 }
486 
487 /**
488  * igc_setup_rx_resources - allocate Rx resources (Descriptors)
489  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
490  *
491  * Returns 0 on success, negative on failure
492  */
493 int igc_setup_rx_resources(struct igc_ring *rx_ring)
494 {
495 	struct net_device *ndev = rx_ring->netdev;
496 	struct device *dev = rx_ring->dev;
497 	u8 index = rx_ring->queue_index;
498 	int size, desc_len, res;
499 
500 	res = xdp_rxq_info_reg(&rx_ring->xdp_rxq, ndev, index,
501 			       rx_ring->q_vector->napi.napi_id);
502 	if (res < 0) {
503 		netdev_err(ndev, "Failed to register xdp_rxq index %u\n",
504 			   index);
505 		return res;
506 	}
507 
508 	size = sizeof(struct igc_rx_buffer) * rx_ring->count;
509 	rx_ring->rx_buffer_info = vzalloc(size);
510 	if (!rx_ring->rx_buffer_info)
511 		goto err;
512 
513 	desc_len = sizeof(union igc_adv_rx_desc);
514 
515 	/* Round up to nearest 4K */
516 	rx_ring->size = rx_ring->count * desc_len;
517 	rx_ring->size = ALIGN(rx_ring->size, 4096);
518 
519 	rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
520 					   &rx_ring->dma, GFP_KERNEL);
521 
522 	if (!rx_ring->desc)
523 		goto err;
524 
525 	rx_ring->next_to_alloc = 0;
526 	rx_ring->next_to_clean = 0;
527 	rx_ring->next_to_use = 0;
528 
529 	return 0;
530 
531 err:
532 	xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
533 	vfree(rx_ring->rx_buffer_info);
534 	rx_ring->rx_buffer_info = NULL;
535 	netdev_err(ndev, "Unable to allocate memory for Rx descriptor ring\n");
536 	return -ENOMEM;
537 }
538 
539 /**
540  * igc_setup_all_rx_resources - wrapper to allocate Rx resources
541  *                                (Descriptors) for all queues
542  * @adapter: board private structure
543  *
544  * Return 0 on success, negative on failure
545  */
546 static int igc_setup_all_rx_resources(struct igc_adapter *adapter)
547 {
548 	struct net_device *dev = adapter->netdev;
549 	int i, err = 0;
550 
551 	for (i = 0; i < adapter->num_rx_queues; i++) {
552 		err = igc_setup_rx_resources(adapter->rx_ring[i]);
553 		if (err) {
554 			netdev_err(dev, "Error on Rx queue %u setup\n", i);
555 			for (i--; i >= 0; i--)
556 				igc_free_rx_resources(adapter->rx_ring[i]);
557 			break;
558 		}
559 	}
560 
561 	return err;
562 }
563 
564 static struct xsk_buff_pool *igc_get_xsk_pool(struct igc_adapter *adapter,
565 					      struct igc_ring *ring)
566 {
567 	if (!igc_xdp_is_enabled(adapter) ||
568 	    !test_bit(IGC_RING_FLAG_AF_XDP_ZC, &ring->flags))
569 		return NULL;
570 
571 	return xsk_get_pool_from_qid(ring->netdev, ring->queue_index);
572 }
573 
574 /**
575  * igc_configure_rx_ring - Configure a receive ring after Reset
576  * @adapter: board private structure
577  * @ring: receive ring to be configured
578  *
579  * Configure the Rx unit of the MAC after a reset.
580  */
581 static void igc_configure_rx_ring(struct igc_adapter *adapter,
582 				  struct igc_ring *ring)
583 {
584 	struct igc_hw *hw = &adapter->hw;
585 	union igc_adv_rx_desc *rx_desc;
586 	int reg_idx = ring->reg_idx;
587 	u32 srrctl = 0, rxdctl = 0;
588 	u64 rdba = ring->dma;
589 	u32 buf_size;
590 
591 	xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
592 	ring->xsk_pool = igc_get_xsk_pool(adapter, ring);
593 	if (ring->xsk_pool) {
594 		WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
595 						   MEM_TYPE_XSK_BUFF_POOL,
596 						   NULL));
597 		xsk_pool_set_rxq_info(ring->xsk_pool, &ring->xdp_rxq);
598 	} else {
599 		WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
600 						   MEM_TYPE_PAGE_SHARED,
601 						   NULL));
602 	}
603 
604 	if (igc_xdp_is_enabled(adapter))
605 		set_ring_uses_large_buffer(ring);
606 
607 	/* disable the queue */
608 	wr32(IGC_RXDCTL(reg_idx), 0);
609 
610 	/* Set DMA base address registers */
611 	wr32(IGC_RDBAL(reg_idx),
612 	     rdba & 0x00000000ffffffffULL);
613 	wr32(IGC_RDBAH(reg_idx), rdba >> 32);
614 	wr32(IGC_RDLEN(reg_idx),
615 	     ring->count * sizeof(union igc_adv_rx_desc));
616 
617 	/* initialize head and tail */
618 	ring->tail = adapter->io_addr + IGC_RDT(reg_idx);
619 	wr32(IGC_RDH(reg_idx), 0);
620 	writel(0, ring->tail);
621 
622 	/* reset next-to- use/clean to place SW in sync with hardware */
623 	ring->next_to_clean = 0;
624 	ring->next_to_use = 0;
625 
626 	if (ring->xsk_pool)
627 		buf_size = xsk_pool_get_rx_frame_size(ring->xsk_pool);
628 	else if (ring_uses_large_buffer(ring))
629 		buf_size = IGC_RXBUFFER_3072;
630 	else
631 		buf_size = IGC_RXBUFFER_2048;
632 
633 	srrctl = IGC_RX_HDR_LEN << IGC_SRRCTL_BSIZEHDRSIZE_SHIFT;
634 	srrctl |= buf_size >> IGC_SRRCTL_BSIZEPKT_SHIFT;
635 	srrctl |= IGC_SRRCTL_DESCTYPE_ADV_ONEBUF;
636 
637 	wr32(IGC_SRRCTL(reg_idx), srrctl);
638 
639 	rxdctl |= IGC_RX_PTHRESH;
640 	rxdctl |= IGC_RX_HTHRESH << 8;
641 	rxdctl |= IGC_RX_WTHRESH << 16;
642 
643 	/* initialize rx_buffer_info */
644 	memset(ring->rx_buffer_info, 0,
645 	       sizeof(struct igc_rx_buffer) * ring->count);
646 
647 	/* initialize Rx descriptor 0 */
648 	rx_desc = IGC_RX_DESC(ring, 0);
649 	rx_desc->wb.upper.length = 0;
650 
651 	/* enable receive descriptor fetching */
652 	rxdctl |= IGC_RXDCTL_QUEUE_ENABLE;
653 
654 	wr32(IGC_RXDCTL(reg_idx), rxdctl);
655 }
656 
657 /**
658  * igc_configure_rx - Configure receive Unit after Reset
659  * @adapter: board private structure
660  *
661  * Configure the Rx unit of the MAC after a reset.
662  */
663 static void igc_configure_rx(struct igc_adapter *adapter)
664 {
665 	int i;
666 
667 	/* Setup the HW Rx Head and Tail Descriptor Pointers and
668 	 * the Base and Length of the Rx Descriptor Ring
669 	 */
670 	for (i = 0; i < adapter->num_rx_queues; i++)
671 		igc_configure_rx_ring(adapter, adapter->rx_ring[i]);
672 }
673 
674 /**
675  * igc_configure_tx_ring - Configure transmit ring after Reset
676  * @adapter: board private structure
677  * @ring: tx ring to configure
678  *
679  * Configure a transmit ring after a reset.
680  */
681 static void igc_configure_tx_ring(struct igc_adapter *adapter,
682 				  struct igc_ring *ring)
683 {
684 	struct igc_hw *hw = &adapter->hw;
685 	int reg_idx = ring->reg_idx;
686 	u64 tdba = ring->dma;
687 	u32 txdctl = 0;
688 
689 	ring->xsk_pool = igc_get_xsk_pool(adapter, ring);
690 
691 	/* disable the queue */
692 	wr32(IGC_TXDCTL(reg_idx), 0);
693 	wrfl();
694 	mdelay(10);
695 
696 	wr32(IGC_TDLEN(reg_idx),
697 	     ring->count * sizeof(union igc_adv_tx_desc));
698 	wr32(IGC_TDBAL(reg_idx),
699 	     tdba & 0x00000000ffffffffULL);
700 	wr32(IGC_TDBAH(reg_idx), tdba >> 32);
701 
702 	ring->tail = adapter->io_addr + IGC_TDT(reg_idx);
703 	wr32(IGC_TDH(reg_idx), 0);
704 	writel(0, ring->tail);
705 
706 	txdctl |= IGC_TX_PTHRESH;
707 	txdctl |= IGC_TX_HTHRESH << 8;
708 	txdctl |= IGC_TX_WTHRESH << 16;
709 
710 	txdctl |= IGC_TXDCTL_QUEUE_ENABLE;
711 	wr32(IGC_TXDCTL(reg_idx), txdctl);
712 }
713 
714 /**
715  * igc_configure_tx - Configure transmit Unit after Reset
716  * @adapter: board private structure
717  *
718  * Configure the Tx unit of the MAC after a reset.
719  */
720 static void igc_configure_tx(struct igc_adapter *adapter)
721 {
722 	int i;
723 
724 	for (i = 0; i < adapter->num_tx_queues; i++)
725 		igc_configure_tx_ring(adapter, adapter->tx_ring[i]);
726 }
727 
728 /**
729  * igc_setup_mrqc - configure the multiple receive queue control registers
730  * @adapter: Board private structure
731  */
732 static void igc_setup_mrqc(struct igc_adapter *adapter)
733 {
734 	struct igc_hw *hw = &adapter->hw;
735 	u32 j, num_rx_queues;
736 	u32 mrqc, rxcsum;
737 	u32 rss_key[10];
738 
739 	netdev_rss_key_fill(rss_key, sizeof(rss_key));
740 	for (j = 0; j < 10; j++)
741 		wr32(IGC_RSSRK(j), rss_key[j]);
742 
743 	num_rx_queues = adapter->rss_queues;
744 
745 	if (adapter->rss_indir_tbl_init != num_rx_queues) {
746 		for (j = 0; j < IGC_RETA_SIZE; j++)
747 			adapter->rss_indir_tbl[j] =
748 			(j * num_rx_queues) / IGC_RETA_SIZE;
749 		adapter->rss_indir_tbl_init = num_rx_queues;
750 	}
751 	igc_write_rss_indir_tbl(adapter);
752 
753 	/* Disable raw packet checksumming so that RSS hash is placed in
754 	 * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
755 	 * offloads as they are enabled by default
756 	 */
757 	rxcsum = rd32(IGC_RXCSUM);
758 	rxcsum |= IGC_RXCSUM_PCSD;
759 
760 	/* Enable Receive Checksum Offload for SCTP */
761 	rxcsum |= IGC_RXCSUM_CRCOFL;
762 
763 	/* Don't need to set TUOFL or IPOFL, they default to 1 */
764 	wr32(IGC_RXCSUM, rxcsum);
765 
766 	/* Generate RSS hash based on packet types, TCP/UDP
767 	 * port numbers and/or IPv4/v6 src and dst addresses
768 	 */
769 	mrqc = IGC_MRQC_RSS_FIELD_IPV4 |
770 	       IGC_MRQC_RSS_FIELD_IPV4_TCP |
771 	       IGC_MRQC_RSS_FIELD_IPV6 |
772 	       IGC_MRQC_RSS_FIELD_IPV6_TCP |
773 	       IGC_MRQC_RSS_FIELD_IPV6_TCP_EX;
774 
775 	if (adapter->flags & IGC_FLAG_RSS_FIELD_IPV4_UDP)
776 		mrqc |= IGC_MRQC_RSS_FIELD_IPV4_UDP;
777 	if (adapter->flags & IGC_FLAG_RSS_FIELD_IPV6_UDP)
778 		mrqc |= IGC_MRQC_RSS_FIELD_IPV6_UDP;
779 
780 	mrqc |= IGC_MRQC_ENABLE_RSS_MQ;
781 
782 	wr32(IGC_MRQC, mrqc);
783 }
784 
785 /**
786  * igc_setup_rctl - configure the receive control registers
787  * @adapter: Board private structure
788  */
789 static void igc_setup_rctl(struct igc_adapter *adapter)
790 {
791 	struct igc_hw *hw = &adapter->hw;
792 	u32 rctl;
793 
794 	rctl = rd32(IGC_RCTL);
795 
796 	rctl &= ~(3 << IGC_RCTL_MO_SHIFT);
797 	rctl &= ~(IGC_RCTL_LBM_TCVR | IGC_RCTL_LBM_MAC);
798 
799 	rctl |= IGC_RCTL_EN | IGC_RCTL_BAM | IGC_RCTL_RDMTS_HALF |
800 		(hw->mac.mc_filter_type << IGC_RCTL_MO_SHIFT);
801 
802 	/* enable stripping of CRC. Newer features require
803 	 * that the HW strips the CRC.
804 	 */
805 	rctl |= IGC_RCTL_SECRC;
806 
807 	/* disable store bad packets and clear size bits. */
808 	rctl &= ~(IGC_RCTL_SBP | IGC_RCTL_SZ_256);
809 
810 	/* enable LPE to allow for reception of jumbo frames */
811 	rctl |= IGC_RCTL_LPE;
812 
813 	/* disable queue 0 to prevent tail write w/o re-config */
814 	wr32(IGC_RXDCTL(0), 0);
815 
816 	/* This is useful for sniffing bad packets. */
817 	if (adapter->netdev->features & NETIF_F_RXALL) {
818 		/* UPE and MPE will be handled by normal PROMISC logic
819 		 * in set_rx_mode
820 		 */
821 		rctl |= (IGC_RCTL_SBP | /* Receive bad packets */
822 			 IGC_RCTL_BAM | /* RX All Bcast Pkts */
823 			 IGC_RCTL_PMCF); /* RX All MAC Ctrl Pkts */
824 
825 		rctl &= ~(IGC_RCTL_DPF | /* Allow filtered pause */
826 			  IGC_RCTL_CFIEN); /* Disable VLAN CFIEN Filter */
827 	}
828 
829 	wr32(IGC_RCTL, rctl);
830 }
831 
832 /**
833  * igc_setup_tctl - configure the transmit control registers
834  * @adapter: Board private structure
835  */
836 static void igc_setup_tctl(struct igc_adapter *adapter)
837 {
838 	struct igc_hw *hw = &adapter->hw;
839 	u32 tctl;
840 
841 	/* disable queue 0 which icould be enabled by default */
842 	wr32(IGC_TXDCTL(0), 0);
843 
844 	/* Program the Transmit Control Register */
845 	tctl = rd32(IGC_TCTL);
846 	tctl &= ~IGC_TCTL_CT;
847 	tctl |= IGC_TCTL_PSP | IGC_TCTL_RTLC |
848 		(IGC_COLLISION_THRESHOLD << IGC_CT_SHIFT);
849 
850 	/* Enable transmits */
851 	tctl |= IGC_TCTL_EN;
852 
853 	wr32(IGC_TCTL, tctl);
854 }
855 
856 /**
857  * igc_set_mac_filter_hw() - Set MAC address filter in hardware
858  * @adapter: Pointer to adapter where the filter should be set
859  * @index: Filter index
860  * @type: MAC address filter type (source or destination)
861  * @addr: MAC address
862  * @queue: If non-negative, queue assignment feature is enabled and frames
863  *         matching the filter are enqueued onto 'queue'. Otherwise, queue
864  *         assignment is disabled.
865  */
866 static void igc_set_mac_filter_hw(struct igc_adapter *adapter, int index,
867 				  enum igc_mac_filter_type type,
868 				  const u8 *addr, int queue)
869 {
870 	struct net_device *dev = adapter->netdev;
871 	struct igc_hw *hw = &adapter->hw;
872 	u32 ral, rah;
873 
874 	if (WARN_ON(index >= hw->mac.rar_entry_count))
875 		return;
876 
877 	ral = le32_to_cpup((__le32 *)(addr));
878 	rah = le16_to_cpup((__le16 *)(addr + 4));
879 
880 	if (type == IGC_MAC_FILTER_TYPE_SRC) {
881 		rah &= ~IGC_RAH_ASEL_MASK;
882 		rah |= IGC_RAH_ASEL_SRC_ADDR;
883 	}
884 
885 	if (queue >= 0) {
886 		rah &= ~IGC_RAH_QSEL_MASK;
887 		rah |= (queue << IGC_RAH_QSEL_SHIFT);
888 		rah |= IGC_RAH_QSEL_ENABLE;
889 	}
890 
891 	rah |= IGC_RAH_AV;
892 
893 	wr32(IGC_RAL(index), ral);
894 	wr32(IGC_RAH(index), rah);
895 
896 	netdev_dbg(dev, "MAC address filter set in HW: index %d", index);
897 }
898 
899 /**
900  * igc_clear_mac_filter_hw() - Clear MAC address filter in hardware
901  * @adapter: Pointer to adapter where the filter should be cleared
902  * @index: Filter index
903  */
904 static void igc_clear_mac_filter_hw(struct igc_adapter *adapter, int index)
905 {
906 	struct net_device *dev = adapter->netdev;
907 	struct igc_hw *hw = &adapter->hw;
908 
909 	if (WARN_ON(index >= hw->mac.rar_entry_count))
910 		return;
911 
912 	wr32(IGC_RAL(index), 0);
913 	wr32(IGC_RAH(index), 0);
914 
915 	netdev_dbg(dev, "MAC address filter cleared in HW: index %d", index);
916 }
917 
918 /* Set default MAC address for the PF in the first RAR entry */
919 static void igc_set_default_mac_filter(struct igc_adapter *adapter)
920 {
921 	struct net_device *dev = adapter->netdev;
922 	u8 *addr = adapter->hw.mac.addr;
923 
924 	netdev_dbg(dev, "Set default MAC address filter: address %pM", addr);
925 
926 	igc_set_mac_filter_hw(adapter, 0, IGC_MAC_FILTER_TYPE_DST, addr, -1);
927 }
928 
929 /**
930  * igc_set_mac - Change the Ethernet Address of the NIC
931  * @netdev: network interface device structure
932  * @p: pointer to an address structure
933  *
934  * Returns 0 on success, negative on failure
935  */
936 static int igc_set_mac(struct net_device *netdev, void *p)
937 {
938 	struct igc_adapter *adapter = netdev_priv(netdev);
939 	struct igc_hw *hw = &adapter->hw;
940 	struct sockaddr *addr = p;
941 
942 	if (!is_valid_ether_addr(addr->sa_data))
943 		return -EADDRNOTAVAIL;
944 
945 	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
946 	memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
947 
948 	/* set the correct pool for the new PF MAC address in entry 0 */
949 	igc_set_default_mac_filter(adapter);
950 
951 	return 0;
952 }
953 
954 /**
955  *  igc_write_mc_addr_list - write multicast addresses to MTA
956  *  @netdev: network interface device structure
957  *
958  *  Writes multicast address list to the MTA hash table.
959  *  Returns: -ENOMEM on failure
960  *           0 on no addresses written
961  *           X on writing X addresses to MTA
962  **/
963 static int igc_write_mc_addr_list(struct net_device *netdev)
964 {
965 	struct igc_adapter *adapter = netdev_priv(netdev);
966 	struct igc_hw *hw = &adapter->hw;
967 	struct netdev_hw_addr *ha;
968 	u8  *mta_list;
969 	int i;
970 
971 	if (netdev_mc_empty(netdev)) {
972 		/* nothing to program, so clear mc list */
973 		igc_update_mc_addr_list(hw, NULL, 0);
974 		return 0;
975 	}
976 
977 	mta_list = kcalloc(netdev_mc_count(netdev), 6, GFP_ATOMIC);
978 	if (!mta_list)
979 		return -ENOMEM;
980 
981 	/* The shared function expects a packed array of only addresses. */
982 	i = 0;
983 	netdev_for_each_mc_addr(ha, netdev)
984 		memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
985 
986 	igc_update_mc_addr_list(hw, mta_list, i);
987 	kfree(mta_list);
988 
989 	return netdev_mc_count(netdev);
990 }
991 
992 static __le32 igc_tx_launchtime(struct igc_adapter *adapter, ktime_t txtime)
993 {
994 	ktime_t cycle_time = adapter->cycle_time;
995 	ktime_t base_time = adapter->base_time;
996 	u32 launchtime;
997 
998 	/* FIXME: when using ETF together with taprio, we may have a
999 	 * case where 'delta' is larger than the cycle_time, this may
1000 	 * cause problems if we don't read the current value of
1001 	 * IGC_BASET, as the value writen into the launchtime
1002 	 * descriptor field may be misinterpreted.
1003 	 */
1004 	div_s64_rem(ktime_sub_ns(txtime, base_time), cycle_time, &launchtime);
1005 
1006 	return cpu_to_le32(launchtime);
1007 }
1008 
1009 static void igc_tx_ctxtdesc(struct igc_ring *tx_ring,
1010 			    struct igc_tx_buffer *first,
1011 			    u32 vlan_macip_lens, u32 type_tucmd,
1012 			    u32 mss_l4len_idx)
1013 {
1014 	struct igc_adv_tx_context_desc *context_desc;
1015 	u16 i = tx_ring->next_to_use;
1016 
1017 	context_desc = IGC_TX_CTXTDESC(tx_ring, i);
1018 
1019 	i++;
1020 	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
1021 
1022 	/* set bits to identify this as an advanced context descriptor */
1023 	type_tucmd |= IGC_TXD_CMD_DEXT | IGC_ADVTXD_DTYP_CTXT;
1024 
1025 	/* For i225, context index must be unique per ring. */
1026 	if (test_bit(IGC_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
1027 		mss_l4len_idx |= tx_ring->reg_idx << 4;
1028 
1029 	context_desc->vlan_macip_lens	= cpu_to_le32(vlan_macip_lens);
1030 	context_desc->type_tucmd_mlhl	= cpu_to_le32(type_tucmd);
1031 	context_desc->mss_l4len_idx	= cpu_to_le32(mss_l4len_idx);
1032 
1033 	/* We assume there is always a valid Tx time available. Invalid times
1034 	 * should have been handled by the upper layers.
1035 	 */
1036 	if (tx_ring->launchtime_enable) {
1037 		struct igc_adapter *adapter = netdev_priv(tx_ring->netdev);
1038 		ktime_t txtime = first->skb->tstamp;
1039 
1040 		skb_txtime_consumed(first->skb);
1041 		context_desc->launch_time = igc_tx_launchtime(adapter,
1042 							      txtime);
1043 	} else {
1044 		context_desc->launch_time = 0;
1045 	}
1046 }
1047 
1048 static void igc_tx_csum(struct igc_ring *tx_ring, struct igc_tx_buffer *first)
1049 {
1050 	struct sk_buff *skb = first->skb;
1051 	u32 vlan_macip_lens = 0;
1052 	u32 type_tucmd = 0;
1053 
1054 	if (skb->ip_summed != CHECKSUM_PARTIAL) {
1055 csum_failed:
1056 		if (!(first->tx_flags & IGC_TX_FLAGS_VLAN) &&
1057 		    !tx_ring->launchtime_enable)
1058 			return;
1059 		goto no_csum;
1060 	}
1061 
1062 	switch (skb->csum_offset) {
1063 	case offsetof(struct tcphdr, check):
1064 		type_tucmd = IGC_ADVTXD_TUCMD_L4T_TCP;
1065 		fallthrough;
1066 	case offsetof(struct udphdr, check):
1067 		break;
1068 	case offsetof(struct sctphdr, checksum):
1069 		/* validate that this is actually an SCTP request */
1070 		if (skb_csum_is_sctp(skb)) {
1071 			type_tucmd = IGC_ADVTXD_TUCMD_L4T_SCTP;
1072 			break;
1073 		}
1074 		fallthrough;
1075 	default:
1076 		skb_checksum_help(skb);
1077 		goto csum_failed;
1078 	}
1079 
1080 	/* update TX checksum flag */
1081 	first->tx_flags |= IGC_TX_FLAGS_CSUM;
1082 	vlan_macip_lens = skb_checksum_start_offset(skb) -
1083 			  skb_network_offset(skb);
1084 no_csum:
1085 	vlan_macip_lens |= skb_network_offset(skb) << IGC_ADVTXD_MACLEN_SHIFT;
1086 	vlan_macip_lens |= first->tx_flags & IGC_TX_FLAGS_VLAN_MASK;
1087 
1088 	igc_tx_ctxtdesc(tx_ring, first, vlan_macip_lens, type_tucmd, 0);
1089 }
1090 
1091 static int __igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size)
1092 {
1093 	struct net_device *netdev = tx_ring->netdev;
1094 
1095 	netif_stop_subqueue(netdev, tx_ring->queue_index);
1096 
1097 	/* memory barriier comment */
1098 	smp_mb();
1099 
1100 	/* We need to check again in a case another CPU has just
1101 	 * made room available.
1102 	 */
1103 	if (igc_desc_unused(tx_ring) < size)
1104 		return -EBUSY;
1105 
1106 	/* A reprieve! */
1107 	netif_wake_subqueue(netdev, tx_ring->queue_index);
1108 
1109 	u64_stats_update_begin(&tx_ring->tx_syncp2);
1110 	tx_ring->tx_stats.restart_queue2++;
1111 	u64_stats_update_end(&tx_ring->tx_syncp2);
1112 
1113 	return 0;
1114 }
1115 
1116 static inline int igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size)
1117 {
1118 	if (igc_desc_unused(tx_ring) >= size)
1119 		return 0;
1120 	return __igc_maybe_stop_tx(tx_ring, size);
1121 }
1122 
1123 #define IGC_SET_FLAG(_input, _flag, _result) \
1124 	(((_flag) <= (_result)) ?				\
1125 	 ((u32)((_input) & (_flag)) * ((_result) / (_flag))) :	\
1126 	 ((u32)((_input) & (_flag)) / ((_flag) / (_result))))
1127 
1128 static u32 igc_tx_cmd_type(struct sk_buff *skb, u32 tx_flags)
1129 {
1130 	/* set type for advanced descriptor with frame checksum insertion */
1131 	u32 cmd_type = IGC_ADVTXD_DTYP_DATA |
1132 		       IGC_ADVTXD_DCMD_DEXT |
1133 		       IGC_ADVTXD_DCMD_IFCS;
1134 
1135 	/* set HW vlan bit if vlan is present */
1136 	cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_VLAN,
1137 				 IGC_ADVTXD_DCMD_VLE);
1138 
1139 	/* set segmentation bits for TSO */
1140 	cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSO,
1141 				 (IGC_ADVTXD_DCMD_TSE));
1142 
1143 	/* set timestamp bit if present */
1144 	cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP,
1145 				 (IGC_ADVTXD_MAC_TSTAMP));
1146 
1147 	/* insert frame checksum */
1148 	cmd_type ^= IGC_SET_FLAG(skb->no_fcs, 1, IGC_ADVTXD_DCMD_IFCS);
1149 
1150 	return cmd_type;
1151 }
1152 
1153 static void igc_tx_olinfo_status(struct igc_ring *tx_ring,
1154 				 union igc_adv_tx_desc *tx_desc,
1155 				 u32 tx_flags, unsigned int paylen)
1156 {
1157 	u32 olinfo_status = paylen << IGC_ADVTXD_PAYLEN_SHIFT;
1158 
1159 	/* insert L4 checksum */
1160 	olinfo_status |= (tx_flags & IGC_TX_FLAGS_CSUM) *
1161 			  ((IGC_TXD_POPTS_TXSM << 8) /
1162 			  IGC_TX_FLAGS_CSUM);
1163 
1164 	/* insert IPv4 checksum */
1165 	olinfo_status |= (tx_flags & IGC_TX_FLAGS_IPV4) *
1166 			  (((IGC_TXD_POPTS_IXSM << 8)) /
1167 			  IGC_TX_FLAGS_IPV4);
1168 
1169 	tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
1170 }
1171 
1172 static int igc_tx_map(struct igc_ring *tx_ring,
1173 		      struct igc_tx_buffer *first,
1174 		      const u8 hdr_len)
1175 {
1176 	struct sk_buff *skb = first->skb;
1177 	struct igc_tx_buffer *tx_buffer;
1178 	union igc_adv_tx_desc *tx_desc;
1179 	u32 tx_flags = first->tx_flags;
1180 	skb_frag_t *frag;
1181 	u16 i = tx_ring->next_to_use;
1182 	unsigned int data_len, size;
1183 	dma_addr_t dma;
1184 	u32 cmd_type;
1185 
1186 	cmd_type = igc_tx_cmd_type(skb, tx_flags);
1187 	tx_desc = IGC_TX_DESC(tx_ring, i);
1188 
1189 	igc_tx_olinfo_status(tx_ring, tx_desc, tx_flags, skb->len - hdr_len);
1190 
1191 	size = skb_headlen(skb);
1192 	data_len = skb->data_len;
1193 
1194 	dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
1195 
1196 	tx_buffer = first;
1197 
1198 	for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
1199 		if (dma_mapping_error(tx_ring->dev, dma))
1200 			goto dma_error;
1201 
1202 		/* record length, and DMA address */
1203 		dma_unmap_len_set(tx_buffer, len, size);
1204 		dma_unmap_addr_set(tx_buffer, dma, dma);
1205 
1206 		tx_desc->read.buffer_addr = cpu_to_le64(dma);
1207 
1208 		while (unlikely(size > IGC_MAX_DATA_PER_TXD)) {
1209 			tx_desc->read.cmd_type_len =
1210 				cpu_to_le32(cmd_type ^ IGC_MAX_DATA_PER_TXD);
1211 
1212 			i++;
1213 			tx_desc++;
1214 			if (i == tx_ring->count) {
1215 				tx_desc = IGC_TX_DESC(tx_ring, 0);
1216 				i = 0;
1217 			}
1218 			tx_desc->read.olinfo_status = 0;
1219 
1220 			dma += IGC_MAX_DATA_PER_TXD;
1221 			size -= IGC_MAX_DATA_PER_TXD;
1222 
1223 			tx_desc->read.buffer_addr = cpu_to_le64(dma);
1224 		}
1225 
1226 		if (likely(!data_len))
1227 			break;
1228 
1229 		tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type ^ size);
1230 
1231 		i++;
1232 		tx_desc++;
1233 		if (i == tx_ring->count) {
1234 			tx_desc = IGC_TX_DESC(tx_ring, 0);
1235 			i = 0;
1236 		}
1237 		tx_desc->read.olinfo_status = 0;
1238 
1239 		size = skb_frag_size(frag);
1240 		data_len -= size;
1241 
1242 		dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
1243 				       size, DMA_TO_DEVICE);
1244 
1245 		tx_buffer = &tx_ring->tx_buffer_info[i];
1246 	}
1247 
1248 	/* write last descriptor with RS and EOP bits */
1249 	cmd_type |= size | IGC_TXD_DCMD;
1250 	tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
1251 
1252 	netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
1253 
1254 	/* set the timestamp */
1255 	first->time_stamp = jiffies;
1256 
1257 	skb_tx_timestamp(skb);
1258 
1259 	/* Force memory writes to complete before letting h/w know there
1260 	 * are new descriptors to fetch.  (Only applicable for weak-ordered
1261 	 * memory model archs, such as IA-64).
1262 	 *
1263 	 * We also need this memory barrier to make certain all of the
1264 	 * status bits have been updated before next_to_watch is written.
1265 	 */
1266 	wmb();
1267 
1268 	/* set next_to_watch value indicating a packet is present */
1269 	first->next_to_watch = tx_desc;
1270 
1271 	i++;
1272 	if (i == tx_ring->count)
1273 		i = 0;
1274 
1275 	tx_ring->next_to_use = i;
1276 
1277 	/* Make sure there is space in the ring for the next send. */
1278 	igc_maybe_stop_tx(tx_ring, DESC_NEEDED);
1279 
1280 	if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more()) {
1281 		writel(i, tx_ring->tail);
1282 	}
1283 
1284 	return 0;
1285 dma_error:
1286 	netdev_err(tx_ring->netdev, "TX DMA map failed\n");
1287 	tx_buffer = &tx_ring->tx_buffer_info[i];
1288 
1289 	/* clear dma mappings for failed tx_buffer_info map */
1290 	while (tx_buffer != first) {
1291 		if (dma_unmap_len(tx_buffer, len))
1292 			igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
1293 
1294 		if (i-- == 0)
1295 			i += tx_ring->count;
1296 		tx_buffer = &tx_ring->tx_buffer_info[i];
1297 	}
1298 
1299 	if (dma_unmap_len(tx_buffer, len))
1300 		igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
1301 
1302 	dev_kfree_skb_any(tx_buffer->skb);
1303 	tx_buffer->skb = NULL;
1304 
1305 	tx_ring->next_to_use = i;
1306 
1307 	return -1;
1308 }
1309 
1310 static int igc_tso(struct igc_ring *tx_ring,
1311 		   struct igc_tx_buffer *first,
1312 		   u8 *hdr_len)
1313 {
1314 	u32 vlan_macip_lens, type_tucmd, mss_l4len_idx;
1315 	struct sk_buff *skb = first->skb;
1316 	union {
1317 		struct iphdr *v4;
1318 		struct ipv6hdr *v6;
1319 		unsigned char *hdr;
1320 	} ip;
1321 	union {
1322 		struct tcphdr *tcp;
1323 		struct udphdr *udp;
1324 		unsigned char *hdr;
1325 	} l4;
1326 	u32 paylen, l4_offset;
1327 	int err;
1328 
1329 	if (skb->ip_summed != CHECKSUM_PARTIAL)
1330 		return 0;
1331 
1332 	if (!skb_is_gso(skb))
1333 		return 0;
1334 
1335 	err = skb_cow_head(skb, 0);
1336 	if (err < 0)
1337 		return err;
1338 
1339 	ip.hdr = skb_network_header(skb);
1340 	l4.hdr = skb_checksum_start(skb);
1341 
1342 	/* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
1343 	type_tucmd = IGC_ADVTXD_TUCMD_L4T_TCP;
1344 
1345 	/* initialize outer IP header fields */
1346 	if (ip.v4->version == 4) {
1347 		unsigned char *csum_start = skb_checksum_start(skb);
1348 		unsigned char *trans_start = ip.hdr + (ip.v4->ihl * 4);
1349 
1350 		/* IP header will have to cancel out any data that
1351 		 * is not a part of the outer IP header
1352 		 */
1353 		ip.v4->check = csum_fold(csum_partial(trans_start,
1354 						      csum_start - trans_start,
1355 						      0));
1356 		type_tucmd |= IGC_ADVTXD_TUCMD_IPV4;
1357 
1358 		ip.v4->tot_len = 0;
1359 		first->tx_flags |= IGC_TX_FLAGS_TSO |
1360 				   IGC_TX_FLAGS_CSUM |
1361 				   IGC_TX_FLAGS_IPV4;
1362 	} else {
1363 		ip.v6->payload_len = 0;
1364 		first->tx_flags |= IGC_TX_FLAGS_TSO |
1365 				   IGC_TX_FLAGS_CSUM;
1366 	}
1367 
1368 	/* determine offset of inner transport header */
1369 	l4_offset = l4.hdr - skb->data;
1370 
1371 	/* remove payload length from inner checksum */
1372 	paylen = skb->len - l4_offset;
1373 	if (type_tucmd & IGC_ADVTXD_TUCMD_L4T_TCP) {
1374 		/* compute length of segmentation header */
1375 		*hdr_len = (l4.tcp->doff * 4) + l4_offset;
1376 		csum_replace_by_diff(&l4.tcp->check,
1377 				     (__force __wsum)htonl(paylen));
1378 	} else {
1379 		/* compute length of segmentation header */
1380 		*hdr_len = sizeof(*l4.udp) + l4_offset;
1381 		csum_replace_by_diff(&l4.udp->check,
1382 				     (__force __wsum)htonl(paylen));
1383 	}
1384 
1385 	/* update gso size and bytecount with header size */
1386 	first->gso_segs = skb_shinfo(skb)->gso_segs;
1387 	first->bytecount += (first->gso_segs - 1) * *hdr_len;
1388 
1389 	/* MSS L4LEN IDX */
1390 	mss_l4len_idx = (*hdr_len - l4_offset) << IGC_ADVTXD_L4LEN_SHIFT;
1391 	mss_l4len_idx |= skb_shinfo(skb)->gso_size << IGC_ADVTXD_MSS_SHIFT;
1392 
1393 	/* VLAN MACLEN IPLEN */
1394 	vlan_macip_lens = l4.hdr - ip.hdr;
1395 	vlan_macip_lens |= (ip.hdr - skb->data) << IGC_ADVTXD_MACLEN_SHIFT;
1396 	vlan_macip_lens |= first->tx_flags & IGC_TX_FLAGS_VLAN_MASK;
1397 
1398 	igc_tx_ctxtdesc(tx_ring, first, vlan_macip_lens,
1399 			type_tucmd, mss_l4len_idx);
1400 
1401 	return 1;
1402 }
1403 
1404 static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb,
1405 				       struct igc_ring *tx_ring)
1406 {
1407 	u16 count = TXD_USE_COUNT(skb_headlen(skb));
1408 	__be16 protocol = vlan_get_protocol(skb);
1409 	struct igc_tx_buffer *first;
1410 	u32 tx_flags = 0;
1411 	unsigned short f;
1412 	u8 hdr_len = 0;
1413 	int tso = 0;
1414 
1415 	/* need: 1 descriptor per page * PAGE_SIZE/IGC_MAX_DATA_PER_TXD,
1416 	 *	+ 1 desc for skb_headlen/IGC_MAX_DATA_PER_TXD,
1417 	 *	+ 2 desc gap to keep tail from touching head,
1418 	 *	+ 1 desc for context descriptor,
1419 	 * otherwise try next time
1420 	 */
1421 	for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
1422 		count += TXD_USE_COUNT(skb_frag_size(
1423 						&skb_shinfo(skb)->frags[f]));
1424 
1425 	if (igc_maybe_stop_tx(tx_ring, count + 3)) {
1426 		/* this is a hard error */
1427 		return NETDEV_TX_BUSY;
1428 	}
1429 
1430 	/* record the location of the first descriptor for this packet */
1431 	first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
1432 	first->type = IGC_TX_BUFFER_TYPE_SKB;
1433 	first->skb = skb;
1434 	first->bytecount = skb->len;
1435 	first->gso_segs = 1;
1436 
1437 	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
1438 		struct igc_adapter *adapter = netdev_priv(tx_ring->netdev);
1439 
1440 		/* FIXME: add support for retrieving timestamps from
1441 		 * the other timer registers before skipping the
1442 		 * timestamping request.
1443 		 */
1444 		if (adapter->tstamp_config.tx_type == HWTSTAMP_TX_ON &&
1445 		    !test_and_set_bit_lock(__IGC_PTP_TX_IN_PROGRESS,
1446 					   &adapter->state)) {
1447 			skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
1448 			tx_flags |= IGC_TX_FLAGS_TSTAMP;
1449 
1450 			adapter->ptp_tx_skb = skb_get(skb);
1451 			adapter->ptp_tx_start = jiffies;
1452 		} else {
1453 			adapter->tx_hwtstamp_skipped++;
1454 		}
1455 	}
1456 
1457 	if (skb_vlan_tag_present(skb)) {
1458 		tx_flags |= IGC_TX_FLAGS_VLAN;
1459 		tx_flags |= (skb_vlan_tag_get(skb) << IGC_TX_FLAGS_VLAN_SHIFT);
1460 	}
1461 
1462 	/* record initial flags and protocol */
1463 	first->tx_flags = tx_flags;
1464 	first->protocol = protocol;
1465 
1466 	tso = igc_tso(tx_ring, first, &hdr_len);
1467 	if (tso < 0)
1468 		goto out_drop;
1469 	else if (!tso)
1470 		igc_tx_csum(tx_ring, first);
1471 
1472 	igc_tx_map(tx_ring, first, hdr_len);
1473 
1474 	return NETDEV_TX_OK;
1475 
1476 out_drop:
1477 	dev_kfree_skb_any(first->skb);
1478 	first->skb = NULL;
1479 
1480 	return NETDEV_TX_OK;
1481 }
1482 
1483 static inline struct igc_ring *igc_tx_queue_mapping(struct igc_adapter *adapter,
1484 						    struct sk_buff *skb)
1485 {
1486 	unsigned int r_idx = skb->queue_mapping;
1487 
1488 	if (r_idx >= adapter->num_tx_queues)
1489 		r_idx = r_idx % adapter->num_tx_queues;
1490 
1491 	return adapter->tx_ring[r_idx];
1492 }
1493 
1494 static netdev_tx_t igc_xmit_frame(struct sk_buff *skb,
1495 				  struct net_device *netdev)
1496 {
1497 	struct igc_adapter *adapter = netdev_priv(netdev);
1498 
1499 	/* The minimum packet size with TCTL.PSP set is 17 so pad the skb
1500 	 * in order to meet this minimum size requirement.
1501 	 */
1502 	if (skb->len < 17) {
1503 		if (skb_padto(skb, 17))
1504 			return NETDEV_TX_OK;
1505 		skb->len = 17;
1506 	}
1507 
1508 	return igc_xmit_frame_ring(skb, igc_tx_queue_mapping(adapter, skb));
1509 }
1510 
1511 static void igc_rx_checksum(struct igc_ring *ring,
1512 			    union igc_adv_rx_desc *rx_desc,
1513 			    struct sk_buff *skb)
1514 {
1515 	skb_checksum_none_assert(skb);
1516 
1517 	/* Ignore Checksum bit is set */
1518 	if (igc_test_staterr(rx_desc, IGC_RXD_STAT_IXSM))
1519 		return;
1520 
1521 	/* Rx checksum disabled via ethtool */
1522 	if (!(ring->netdev->features & NETIF_F_RXCSUM))
1523 		return;
1524 
1525 	/* TCP/UDP checksum error bit is set */
1526 	if (igc_test_staterr(rx_desc,
1527 			     IGC_RXDEXT_STATERR_L4E |
1528 			     IGC_RXDEXT_STATERR_IPE)) {
1529 		/* work around errata with sctp packets where the TCPE aka
1530 		 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
1531 		 * packets (aka let the stack check the crc32c)
1532 		 */
1533 		if (!(skb->len == 60 &&
1534 		      test_bit(IGC_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) {
1535 			u64_stats_update_begin(&ring->rx_syncp);
1536 			ring->rx_stats.csum_err++;
1537 			u64_stats_update_end(&ring->rx_syncp);
1538 		}
1539 		/* let the stack verify checksum errors */
1540 		return;
1541 	}
1542 	/* It must be a TCP or UDP packet with a valid checksum */
1543 	if (igc_test_staterr(rx_desc, IGC_RXD_STAT_TCPCS |
1544 				      IGC_RXD_STAT_UDPCS))
1545 		skb->ip_summed = CHECKSUM_UNNECESSARY;
1546 
1547 	netdev_dbg(ring->netdev, "cksum success: bits %08X\n",
1548 		   le32_to_cpu(rx_desc->wb.upper.status_error));
1549 }
1550 
1551 static inline void igc_rx_hash(struct igc_ring *ring,
1552 			       union igc_adv_rx_desc *rx_desc,
1553 			       struct sk_buff *skb)
1554 {
1555 	if (ring->netdev->features & NETIF_F_RXHASH)
1556 		skb_set_hash(skb,
1557 			     le32_to_cpu(rx_desc->wb.lower.hi_dword.rss),
1558 			     PKT_HASH_TYPE_L3);
1559 }
1560 
1561 static void igc_rx_vlan(struct igc_ring *rx_ring,
1562 			union igc_adv_rx_desc *rx_desc,
1563 			struct sk_buff *skb)
1564 {
1565 	struct net_device *dev = rx_ring->netdev;
1566 	u16 vid;
1567 
1568 	if ((dev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
1569 	    igc_test_staterr(rx_desc, IGC_RXD_STAT_VP)) {
1570 		if (igc_test_staterr(rx_desc, IGC_RXDEXT_STATERR_LB) &&
1571 		    test_bit(IGC_RING_FLAG_RX_LB_VLAN_BSWAP, &rx_ring->flags))
1572 			vid = be16_to_cpu((__force __be16)rx_desc->wb.upper.vlan);
1573 		else
1574 			vid = le16_to_cpu(rx_desc->wb.upper.vlan);
1575 
1576 		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid);
1577 	}
1578 }
1579 
1580 /**
1581  * igc_process_skb_fields - Populate skb header fields from Rx descriptor
1582  * @rx_ring: rx descriptor ring packet is being transacted on
1583  * @rx_desc: pointer to the EOP Rx descriptor
1584  * @skb: pointer to current skb being populated
1585  *
1586  * This function checks the ring, descriptor, and packet information in order
1587  * to populate the hash, checksum, VLAN, protocol, and other fields within the
1588  * skb.
1589  */
1590 static void igc_process_skb_fields(struct igc_ring *rx_ring,
1591 				   union igc_adv_rx_desc *rx_desc,
1592 				   struct sk_buff *skb)
1593 {
1594 	igc_rx_hash(rx_ring, rx_desc, skb);
1595 
1596 	igc_rx_checksum(rx_ring, rx_desc, skb);
1597 
1598 	igc_rx_vlan(rx_ring, rx_desc, skb);
1599 
1600 	skb_record_rx_queue(skb, rx_ring->queue_index);
1601 
1602 	skb->protocol = eth_type_trans(skb, rx_ring->netdev);
1603 }
1604 
1605 static void igc_vlan_mode(struct net_device *netdev, netdev_features_t features)
1606 {
1607 	bool enable = !!(features & NETIF_F_HW_VLAN_CTAG_RX);
1608 	struct igc_adapter *adapter = netdev_priv(netdev);
1609 	struct igc_hw *hw = &adapter->hw;
1610 	u32 ctrl;
1611 
1612 	ctrl = rd32(IGC_CTRL);
1613 
1614 	if (enable) {
1615 		/* enable VLAN tag insert/strip */
1616 		ctrl |= IGC_CTRL_VME;
1617 	} else {
1618 		/* disable VLAN tag insert/strip */
1619 		ctrl &= ~IGC_CTRL_VME;
1620 	}
1621 	wr32(IGC_CTRL, ctrl);
1622 }
1623 
1624 static void igc_restore_vlan(struct igc_adapter *adapter)
1625 {
1626 	igc_vlan_mode(adapter->netdev, adapter->netdev->features);
1627 }
1628 
1629 static struct igc_rx_buffer *igc_get_rx_buffer(struct igc_ring *rx_ring,
1630 					       const unsigned int size,
1631 					       int *rx_buffer_pgcnt)
1632 {
1633 	struct igc_rx_buffer *rx_buffer;
1634 
1635 	rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
1636 	*rx_buffer_pgcnt =
1637 #if (PAGE_SIZE < 8192)
1638 		page_count(rx_buffer->page);
1639 #else
1640 		0;
1641 #endif
1642 	prefetchw(rx_buffer->page);
1643 
1644 	/* we are reusing so sync this buffer for CPU use */
1645 	dma_sync_single_range_for_cpu(rx_ring->dev,
1646 				      rx_buffer->dma,
1647 				      rx_buffer->page_offset,
1648 				      size,
1649 				      DMA_FROM_DEVICE);
1650 
1651 	rx_buffer->pagecnt_bias--;
1652 
1653 	return rx_buffer;
1654 }
1655 
1656 static void igc_rx_buffer_flip(struct igc_rx_buffer *buffer,
1657 			       unsigned int truesize)
1658 {
1659 #if (PAGE_SIZE < 8192)
1660 	buffer->page_offset ^= truesize;
1661 #else
1662 	buffer->page_offset += truesize;
1663 #endif
1664 }
1665 
1666 static unsigned int igc_get_rx_frame_truesize(struct igc_ring *ring,
1667 					      unsigned int size)
1668 {
1669 	unsigned int truesize;
1670 
1671 #if (PAGE_SIZE < 8192)
1672 	truesize = igc_rx_pg_size(ring) / 2;
1673 #else
1674 	truesize = ring_uses_build_skb(ring) ?
1675 		   SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
1676 		   SKB_DATA_ALIGN(IGC_SKB_PAD + size) :
1677 		   SKB_DATA_ALIGN(size);
1678 #endif
1679 	return truesize;
1680 }
1681 
1682 /**
1683  * igc_add_rx_frag - Add contents of Rx buffer to sk_buff
1684  * @rx_ring: rx descriptor ring to transact packets on
1685  * @rx_buffer: buffer containing page to add
1686  * @skb: sk_buff to place the data into
1687  * @size: size of buffer to be added
1688  *
1689  * This function will add the data contained in rx_buffer->page to the skb.
1690  */
1691 static void igc_add_rx_frag(struct igc_ring *rx_ring,
1692 			    struct igc_rx_buffer *rx_buffer,
1693 			    struct sk_buff *skb,
1694 			    unsigned int size)
1695 {
1696 	unsigned int truesize;
1697 
1698 #if (PAGE_SIZE < 8192)
1699 	truesize = igc_rx_pg_size(rx_ring) / 2;
1700 #else
1701 	truesize = ring_uses_build_skb(rx_ring) ?
1702 		   SKB_DATA_ALIGN(IGC_SKB_PAD + size) :
1703 		   SKB_DATA_ALIGN(size);
1704 #endif
1705 	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
1706 			rx_buffer->page_offset, size, truesize);
1707 
1708 	igc_rx_buffer_flip(rx_buffer, truesize);
1709 }
1710 
1711 static struct sk_buff *igc_build_skb(struct igc_ring *rx_ring,
1712 				     struct igc_rx_buffer *rx_buffer,
1713 				     union igc_adv_rx_desc *rx_desc,
1714 				     unsigned int size)
1715 {
1716 	void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
1717 	unsigned int truesize = igc_get_rx_frame_truesize(rx_ring, size);
1718 	struct sk_buff *skb;
1719 
1720 	/* prefetch first cache line of first page */
1721 	net_prefetch(va);
1722 
1723 	/* build an skb around the page buffer */
1724 	skb = build_skb(va - IGC_SKB_PAD, truesize);
1725 	if (unlikely(!skb))
1726 		return NULL;
1727 
1728 	/* update pointers within the skb to store the data */
1729 	skb_reserve(skb, IGC_SKB_PAD);
1730 	__skb_put(skb, size);
1731 
1732 	igc_rx_buffer_flip(rx_buffer, truesize);
1733 	return skb;
1734 }
1735 
1736 static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring,
1737 					 struct igc_rx_buffer *rx_buffer,
1738 					 struct xdp_buff *xdp,
1739 					 ktime_t timestamp)
1740 {
1741 	unsigned int size = xdp->data_end - xdp->data;
1742 	unsigned int truesize = igc_get_rx_frame_truesize(rx_ring, size);
1743 	void *va = xdp->data;
1744 	unsigned int headlen;
1745 	struct sk_buff *skb;
1746 
1747 	/* prefetch first cache line of first page */
1748 	net_prefetch(va);
1749 
1750 	/* allocate a skb to store the frags */
1751 	skb = napi_alloc_skb(&rx_ring->q_vector->napi, IGC_RX_HDR_LEN);
1752 	if (unlikely(!skb))
1753 		return NULL;
1754 
1755 	if (timestamp)
1756 		skb_hwtstamps(skb)->hwtstamp = timestamp;
1757 
1758 	/* Determine available headroom for copy */
1759 	headlen = size;
1760 	if (headlen > IGC_RX_HDR_LEN)
1761 		headlen = eth_get_headlen(skb->dev, va, IGC_RX_HDR_LEN);
1762 
1763 	/* align pull length to size of long to optimize memcpy performance */
1764 	memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long)));
1765 
1766 	/* update all of the pointers */
1767 	size -= headlen;
1768 	if (size) {
1769 		skb_add_rx_frag(skb, 0, rx_buffer->page,
1770 				(va + headlen) - page_address(rx_buffer->page),
1771 				size, truesize);
1772 		igc_rx_buffer_flip(rx_buffer, truesize);
1773 	} else {
1774 		rx_buffer->pagecnt_bias++;
1775 	}
1776 
1777 	return skb;
1778 }
1779 
1780 /**
1781  * igc_reuse_rx_page - page flip buffer and store it back on the ring
1782  * @rx_ring: rx descriptor ring to store buffers on
1783  * @old_buff: donor buffer to have page reused
1784  *
1785  * Synchronizes page for reuse by the adapter
1786  */
1787 static void igc_reuse_rx_page(struct igc_ring *rx_ring,
1788 			      struct igc_rx_buffer *old_buff)
1789 {
1790 	u16 nta = rx_ring->next_to_alloc;
1791 	struct igc_rx_buffer *new_buff;
1792 
1793 	new_buff = &rx_ring->rx_buffer_info[nta];
1794 
1795 	/* update, and store next to alloc */
1796 	nta++;
1797 	rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
1798 
1799 	/* Transfer page from old buffer to new buffer.
1800 	 * Move each member individually to avoid possible store
1801 	 * forwarding stalls.
1802 	 */
1803 	new_buff->dma		= old_buff->dma;
1804 	new_buff->page		= old_buff->page;
1805 	new_buff->page_offset	= old_buff->page_offset;
1806 	new_buff->pagecnt_bias	= old_buff->pagecnt_bias;
1807 }
1808 
1809 static bool igc_can_reuse_rx_page(struct igc_rx_buffer *rx_buffer,
1810 				  int rx_buffer_pgcnt)
1811 {
1812 	unsigned int pagecnt_bias = rx_buffer->pagecnt_bias;
1813 	struct page *page = rx_buffer->page;
1814 
1815 	/* avoid re-using remote and pfmemalloc pages */
1816 	if (!dev_page_is_reusable(page))
1817 		return false;
1818 
1819 #if (PAGE_SIZE < 8192)
1820 	/* if we are only owner of page we can reuse it */
1821 	if (unlikely((rx_buffer_pgcnt - pagecnt_bias) > 1))
1822 		return false;
1823 #else
1824 #define IGC_LAST_OFFSET \
1825 	(SKB_WITH_OVERHEAD(PAGE_SIZE) - IGC_RXBUFFER_2048)
1826 
1827 	if (rx_buffer->page_offset > IGC_LAST_OFFSET)
1828 		return false;
1829 #endif
1830 
1831 	/* If we have drained the page fragment pool we need to update
1832 	 * the pagecnt_bias and page count so that we fully restock the
1833 	 * number of references the driver holds.
1834 	 */
1835 	if (unlikely(pagecnt_bias == 1)) {
1836 		page_ref_add(page, USHRT_MAX - 1);
1837 		rx_buffer->pagecnt_bias = USHRT_MAX;
1838 	}
1839 
1840 	return true;
1841 }
1842 
1843 /**
1844  * igc_is_non_eop - process handling of non-EOP buffers
1845  * @rx_ring: Rx ring being processed
1846  * @rx_desc: Rx descriptor for current buffer
1847  *
1848  * This function updates next to clean.  If the buffer is an EOP buffer
1849  * this function exits returning false, otherwise it will place the
1850  * sk_buff in the next buffer to be chained and return true indicating
1851  * that this is in fact a non-EOP buffer.
1852  */
1853 static bool igc_is_non_eop(struct igc_ring *rx_ring,
1854 			   union igc_adv_rx_desc *rx_desc)
1855 {
1856 	u32 ntc = rx_ring->next_to_clean + 1;
1857 
1858 	/* fetch, update, and store next to clean */
1859 	ntc = (ntc < rx_ring->count) ? ntc : 0;
1860 	rx_ring->next_to_clean = ntc;
1861 
1862 	prefetch(IGC_RX_DESC(rx_ring, ntc));
1863 
1864 	if (likely(igc_test_staterr(rx_desc, IGC_RXD_STAT_EOP)))
1865 		return false;
1866 
1867 	return true;
1868 }
1869 
1870 /**
1871  * igc_cleanup_headers - Correct corrupted or empty headers
1872  * @rx_ring: rx descriptor ring packet is being transacted on
1873  * @rx_desc: pointer to the EOP Rx descriptor
1874  * @skb: pointer to current skb being fixed
1875  *
1876  * Address the case where we are pulling data in on pages only
1877  * and as such no data is present in the skb header.
1878  *
1879  * In addition if skb is not at least 60 bytes we need to pad it so that
1880  * it is large enough to qualify as a valid Ethernet frame.
1881  *
1882  * Returns true if an error was encountered and skb was freed.
1883  */
1884 static bool igc_cleanup_headers(struct igc_ring *rx_ring,
1885 				union igc_adv_rx_desc *rx_desc,
1886 				struct sk_buff *skb)
1887 {
1888 	/* XDP packets use error pointer so abort at this point */
1889 	if (IS_ERR(skb))
1890 		return true;
1891 
1892 	if (unlikely(igc_test_staterr(rx_desc, IGC_RXDEXT_STATERR_RXE))) {
1893 		struct net_device *netdev = rx_ring->netdev;
1894 
1895 		if (!(netdev->features & NETIF_F_RXALL)) {
1896 			dev_kfree_skb_any(skb);
1897 			return true;
1898 		}
1899 	}
1900 
1901 	/* if eth_skb_pad returns an error the skb was freed */
1902 	if (eth_skb_pad(skb))
1903 		return true;
1904 
1905 	return false;
1906 }
1907 
1908 static void igc_put_rx_buffer(struct igc_ring *rx_ring,
1909 			      struct igc_rx_buffer *rx_buffer,
1910 			      int rx_buffer_pgcnt)
1911 {
1912 	if (igc_can_reuse_rx_page(rx_buffer, rx_buffer_pgcnt)) {
1913 		/* hand second half of page back to the ring */
1914 		igc_reuse_rx_page(rx_ring, rx_buffer);
1915 	} else {
1916 		/* We are not reusing the buffer so unmap it and free
1917 		 * any references we are holding to it
1918 		 */
1919 		dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
1920 				     igc_rx_pg_size(rx_ring), DMA_FROM_DEVICE,
1921 				     IGC_RX_DMA_ATTR);
1922 		__page_frag_cache_drain(rx_buffer->page,
1923 					rx_buffer->pagecnt_bias);
1924 	}
1925 
1926 	/* clear contents of rx_buffer */
1927 	rx_buffer->page = NULL;
1928 }
1929 
1930 static inline unsigned int igc_rx_offset(struct igc_ring *rx_ring)
1931 {
1932 	struct igc_adapter *adapter = rx_ring->q_vector->adapter;
1933 
1934 	if (ring_uses_build_skb(rx_ring))
1935 		return IGC_SKB_PAD;
1936 	if (igc_xdp_is_enabled(adapter))
1937 		return XDP_PACKET_HEADROOM;
1938 
1939 	return 0;
1940 }
1941 
1942 static bool igc_alloc_mapped_page(struct igc_ring *rx_ring,
1943 				  struct igc_rx_buffer *bi)
1944 {
1945 	struct page *page = bi->page;
1946 	dma_addr_t dma;
1947 
1948 	/* since we are recycling buffers we should seldom need to alloc */
1949 	if (likely(page))
1950 		return true;
1951 
1952 	/* alloc new page for storage */
1953 	page = dev_alloc_pages(igc_rx_pg_order(rx_ring));
1954 	if (unlikely(!page)) {
1955 		rx_ring->rx_stats.alloc_failed++;
1956 		return false;
1957 	}
1958 
1959 	/* map page for use */
1960 	dma = dma_map_page_attrs(rx_ring->dev, page, 0,
1961 				 igc_rx_pg_size(rx_ring),
1962 				 DMA_FROM_DEVICE,
1963 				 IGC_RX_DMA_ATTR);
1964 
1965 	/* if mapping failed free memory back to system since
1966 	 * there isn't much point in holding memory we can't use
1967 	 */
1968 	if (dma_mapping_error(rx_ring->dev, dma)) {
1969 		__free_page(page);
1970 
1971 		rx_ring->rx_stats.alloc_failed++;
1972 		return false;
1973 	}
1974 
1975 	bi->dma = dma;
1976 	bi->page = page;
1977 	bi->page_offset = igc_rx_offset(rx_ring);
1978 	page_ref_add(page, USHRT_MAX - 1);
1979 	bi->pagecnt_bias = USHRT_MAX;
1980 
1981 	return true;
1982 }
1983 
1984 /**
1985  * igc_alloc_rx_buffers - Replace used receive buffers; packet split
1986  * @rx_ring: rx descriptor ring
1987  * @cleaned_count: number of buffers to clean
1988  */
1989 static void igc_alloc_rx_buffers(struct igc_ring *rx_ring, u16 cleaned_count)
1990 {
1991 	union igc_adv_rx_desc *rx_desc;
1992 	u16 i = rx_ring->next_to_use;
1993 	struct igc_rx_buffer *bi;
1994 	u16 bufsz;
1995 
1996 	/* nothing to do */
1997 	if (!cleaned_count)
1998 		return;
1999 
2000 	rx_desc = IGC_RX_DESC(rx_ring, i);
2001 	bi = &rx_ring->rx_buffer_info[i];
2002 	i -= rx_ring->count;
2003 
2004 	bufsz = igc_rx_bufsz(rx_ring);
2005 
2006 	do {
2007 		if (!igc_alloc_mapped_page(rx_ring, bi))
2008 			break;
2009 
2010 		/* sync the buffer for use by the device */
2011 		dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
2012 						 bi->page_offset, bufsz,
2013 						 DMA_FROM_DEVICE);
2014 
2015 		/* Refresh the desc even if buffer_addrs didn't change
2016 		 * because each write-back erases this info.
2017 		 */
2018 		rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
2019 
2020 		rx_desc++;
2021 		bi++;
2022 		i++;
2023 		if (unlikely(!i)) {
2024 			rx_desc = IGC_RX_DESC(rx_ring, 0);
2025 			bi = rx_ring->rx_buffer_info;
2026 			i -= rx_ring->count;
2027 		}
2028 
2029 		/* clear the length for the next_to_use descriptor */
2030 		rx_desc->wb.upper.length = 0;
2031 
2032 		cleaned_count--;
2033 	} while (cleaned_count);
2034 
2035 	i += rx_ring->count;
2036 
2037 	if (rx_ring->next_to_use != i) {
2038 		/* record the next descriptor to use */
2039 		rx_ring->next_to_use = i;
2040 
2041 		/* update next to alloc since we have filled the ring */
2042 		rx_ring->next_to_alloc = i;
2043 
2044 		/* Force memory writes to complete before letting h/w
2045 		 * know there are new descriptors to fetch.  (Only
2046 		 * applicable for weak-ordered memory model archs,
2047 		 * such as IA-64).
2048 		 */
2049 		wmb();
2050 		writel(i, rx_ring->tail);
2051 	}
2052 }
2053 
2054 static bool igc_alloc_rx_buffers_zc(struct igc_ring *ring, u16 count)
2055 {
2056 	union igc_adv_rx_desc *desc;
2057 	u16 i = ring->next_to_use;
2058 	struct igc_rx_buffer *bi;
2059 	dma_addr_t dma;
2060 	bool ok = true;
2061 
2062 	if (!count)
2063 		return ok;
2064 
2065 	desc = IGC_RX_DESC(ring, i);
2066 	bi = &ring->rx_buffer_info[i];
2067 	i -= ring->count;
2068 
2069 	do {
2070 		bi->xdp = xsk_buff_alloc(ring->xsk_pool);
2071 		if (!bi->xdp) {
2072 			ok = false;
2073 			break;
2074 		}
2075 
2076 		dma = xsk_buff_xdp_get_dma(bi->xdp);
2077 		desc->read.pkt_addr = cpu_to_le64(dma);
2078 
2079 		desc++;
2080 		bi++;
2081 		i++;
2082 		if (unlikely(!i)) {
2083 			desc = IGC_RX_DESC(ring, 0);
2084 			bi = ring->rx_buffer_info;
2085 			i -= ring->count;
2086 		}
2087 
2088 		/* Clear the length for the next_to_use descriptor. */
2089 		desc->wb.upper.length = 0;
2090 
2091 		count--;
2092 	} while (count);
2093 
2094 	i += ring->count;
2095 
2096 	if (ring->next_to_use != i) {
2097 		ring->next_to_use = i;
2098 
2099 		/* Force memory writes to complete before letting h/w
2100 		 * know there are new descriptors to fetch.  (Only
2101 		 * applicable for weak-ordered memory model archs,
2102 		 * such as IA-64).
2103 		 */
2104 		wmb();
2105 		writel(i, ring->tail);
2106 	}
2107 
2108 	return ok;
2109 }
2110 
2111 static int igc_xdp_init_tx_buffer(struct igc_tx_buffer *buffer,
2112 				  struct xdp_frame *xdpf,
2113 				  struct igc_ring *ring)
2114 {
2115 	dma_addr_t dma;
2116 
2117 	dma = dma_map_single(ring->dev, xdpf->data, xdpf->len, DMA_TO_DEVICE);
2118 	if (dma_mapping_error(ring->dev, dma)) {
2119 		netdev_err_once(ring->netdev, "Failed to map DMA for TX\n");
2120 		return -ENOMEM;
2121 	}
2122 
2123 	buffer->type = IGC_TX_BUFFER_TYPE_XDP;
2124 	buffer->xdpf = xdpf;
2125 	buffer->protocol = 0;
2126 	buffer->bytecount = xdpf->len;
2127 	buffer->gso_segs = 1;
2128 	buffer->time_stamp = jiffies;
2129 	dma_unmap_len_set(buffer, len, xdpf->len);
2130 	dma_unmap_addr_set(buffer, dma, dma);
2131 	return 0;
2132 }
2133 
2134 /* This function requires __netif_tx_lock is held by the caller. */
2135 static int igc_xdp_init_tx_descriptor(struct igc_ring *ring,
2136 				      struct xdp_frame *xdpf)
2137 {
2138 	struct igc_tx_buffer *buffer;
2139 	union igc_adv_tx_desc *desc;
2140 	u32 cmd_type, olinfo_status;
2141 	int err;
2142 
2143 	if (!igc_desc_unused(ring))
2144 		return -EBUSY;
2145 
2146 	buffer = &ring->tx_buffer_info[ring->next_to_use];
2147 	err = igc_xdp_init_tx_buffer(buffer, xdpf, ring);
2148 	if (err)
2149 		return err;
2150 
2151 	cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT |
2152 		   IGC_ADVTXD_DCMD_IFCS | IGC_TXD_DCMD |
2153 		   buffer->bytecount;
2154 	olinfo_status = buffer->bytecount << IGC_ADVTXD_PAYLEN_SHIFT;
2155 
2156 	desc = IGC_TX_DESC(ring, ring->next_to_use);
2157 	desc->read.cmd_type_len = cpu_to_le32(cmd_type);
2158 	desc->read.olinfo_status = cpu_to_le32(olinfo_status);
2159 	desc->read.buffer_addr = cpu_to_le64(dma_unmap_addr(buffer, dma));
2160 
2161 	netdev_tx_sent_queue(txring_txq(ring), buffer->bytecount);
2162 
2163 	buffer->next_to_watch = desc;
2164 
2165 	ring->next_to_use++;
2166 	if (ring->next_to_use == ring->count)
2167 		ring->next_to_use = 0;
2168 
2169 	return 0;
2170 }
2171 
2172 static struct igc_ring *igc_xdp_get_tx_ring(struct igc_adapter *adapter,
2173 					    int cpu)
2174 {
2175 	int index = cpu;
2176 
2177 	if (unlikely(index < 0))
2178 		index = 0;
2179 
2180 	while (index >= adapter->num_tx_queues)
2181 		index -= adapter->num_tx_queues;
2182 
2183 	return adapter->tx_ring[index];
2184 }
2185 
2186 static int igc_xdp_xmit_back(struct igc_adapter *adapter, struct xdp_buff *xdp)
2187 {
2188 	struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp);
2189 	int cpu = smp_processor_id();
2190 	struct netdev_queue *nq;
2191 	struct igc_ring *ring;
2192 	int res;
2193 
2194 	if (unlikely(!xdpf))
2195 		return -EFAULT;
2196 
2197 	ring = igc_xdp_get_tx_ring(adapter, cpu);
2198 	nq = txring_txq(ring);
2199 
2200 	__netif_tx_lock(nq, cpu);
2201 	res = igc_xdp_init_tx_descriptor(ring, xdpf);
2202 	__netif_tx_unlock(nq);
2203 	return res;
2204 }
2205 
2206 /* This function assumes rcu_read_lock() is held by the caller. */
2207 static int __igc_xdp_run_prog(struct igc_adapter *adapter,
2208 			      struct bpf_prog *prog,
2209 			      struct xdp_buff *xdp)
2210 {
2211 	u32 act = bpf_prog_run_xdp(prog, xdp);
2212 
2213 	switch (act) {
2214 	case XDP_PASS:
2215 		return IGC_XDP_PASS;
2216 	case XDP_TX:
2217 		if (igc_xdp_xmit_back(adapter, xdp) < 0)
2218 			goto out_failure;
2219 		return IGC_XDP_TX;
2220 	case XDP_REDIRECT:
2221 		if (xdp_do_redirect(adapter->netdev, xdp, prog) < 0)
2222 			goto out_failure;
2223 		return IGC_XDP_REDIRECT;
2224 		break;
2225 	default:
2226 		bpf_warn_invalid_xdp_action(act);
2227 		fallthrough;
2228 	case XDP_ABORTED:
2229 out_failure:
2230 		trace_xdp_exception(adapter->netdev, prog, act);
2231 		fallthrough;
2232 	case XDP_DROP:
2233 		return IGC_XDP_CONSUMED;
2234 	}
2235 }
2236 
2237 static struct sk_buff *igc_xdp_run_prog(struct igc_adapter *adapter,
2238 					struct xdp_buff *xdp)
2239 {
2240 	struct bpf_prog *prog;
2241 	int res;
2242 
2243 	rcu_read_lock();
2244 
2245 	prog = READ_ONCE(adapter->xdp_prog);
2246 	if (!prog) {
2247 		res = IGC_XDP_PASS;
2248 		goto unlock;
2249 	}
2250 
2251 	res = __igc_xdp_run_prog(adapter, prog, xdp);
2252 
2253 unlock:
2254 	rcu_read_unlock();
2255 	return ERR_PTR(-res);
2256 }
2257 
2258 /* This function assumes __netif_tx_lock is held by the caller. */
2259 static void igc_flush_tx_descriptors(struct igc_ring *ring)
2260 {
2261 	/* Once tail pointer is updated, hardware can fetch the descriptors
2262 	 * any time so we issue a write membar here to ensure all memory
2263 	 * writes are complete before the tail pointer is updated.
2264 	 */
2265 	wmb();
2266 	writel(ring->next_to_use, ring->tail);
2267 }
2268 
2269 static void igc_finalize_xdp(struct igc_adapter *adapter, int status)
2270 {
2271 	int cpu = smp_processor_id();
2272 	struct netdev_queue *nq;
2273 	struct igc_ring *ring;
2274 
2275 	if (status & IGC_XDP_TX) {
2276 		ring = igc_xdp_get_tx_ring(adapter, cpu);
2277 		nq = txring_txq(ring);
2278 
2279 		__netif_tx_lock(nq, cpu);
2280 		igc_flush_tx_descriptors(ring);
2281 		__netif_tx_unlock(nq);
2282 	}
2283 
2284 	if (status & IGC_XDP_REDIRECT)
2285 		xdp_do_flush();
2286 }
2287 
2288 static void igc_update_rx_stats(struct igc_q_vector *q_vector,
2289 				unsigned int packets, unsigned int bytes)
2290 {
2291 	struct igc_ring *ring = q_vector->rx.ring;
2292 
2293 	u64_stats_update_begin(&ring->rx_syncp);
2294 	ring->rx_stats.packets += packets;
2295 	ring->rx_stats.bytes += bytes;
2296 	u64_stats_update_end(&ring->rx_syncp);
2297 
2298 	q_vector->rx.total_packets += packets;
2299 	q_vector->rx.total_bytes += bytes;
2300 }
2301 
2302 static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
2303 {
2304 	unsigned int total_bytes = 0, total_packets = 0;
2305 	struct igc_adapter *adapter = q_vector->adapter;
2306 	struct igc_ring *rx_ring = q_vector->rx.ring;
2307 	struct sk_buff *skb = rx_ring->skb;
2308 	u16 cleaned_count = igc_desc_unused(rx_ring);
2309 	int xdp_status = 0, rx_buffer_pgcnt;
2310 
2311 	while (likely(total_packets < budget)) {
2312 		union igc_adv_rx_desc *rx_desc;
2313 		struct igc_rx_buffer *rx_buffer;
2314 		unsigned int size, truesize;
2315 		ktime_t timestamp = 0;
2316 		struct xdp_buff xdp;
2317 		int pkt_offset = 0;
2318 		void *pktbuf;
2319 
2320 		/* return some buffers to hardware, one at a time is too slow */
2321 		if (cleaned_count >= IGC_RX_BUFFER_WRITE) {
2322 			igc_alloc_rx_buffers(rx_ring, cleaned_count);
2323 			cleaned_count = 0;
2324 		}
2325 
2326 		rx_desc = IGC_RX_DESC(rx_ring, rx_ring->next_to_clean);
2327 		size = le16_to_cpu(rx_desc->wb.upper.length);
2328 		if (!size)
2329 			break;
2330 
2331 		/* This memory barrier is needed to keep us from reading
2332 		 * any other fields out of the rx_desc until we know the
2333 		 * descriptor has been written back
2334 		 */
2335 		dma_rmb();
2336 
2337 		rx_buffer = igc_get_rx_buffer(rx_ring, size, &rx_buffer_pgcnt);
2338 		truesize = igc_get_rx_frame_truesize(rx_ring, size);
2339 
2340 		pktbuf = page_address(rx_buffer->page) + rx_buffer->page_offset;
2341 
2342 		if (igc_test_staterr(rx_desc, IGC_RXDADV_STAT_TSIP)) {
2343 			timestamp = igc_ptp_rx_pktstamp(q_vector->adapter,
2344 							pktbuf);
2345 			pkt_offset = IGC_TS_HDR_LEN;
2346 			size -= IGC_TS_HDR_LEN;
2347 		}
2348 
2349 		if (!skb) {
2350 			xdp_init_buff(&xdp, truesize, &rx_ring->xdp_rxq);
2351 			xdp_prepare_buff(&xdp, pktbuf - igc_rx_offset(rx_ring),
2352 					 igc_rx_offset(rx_ring) + pkt_offset, size, false);
2353 
2354 			skb = igc_xdp_run_prog(adapter, &xdp);
2355 		}
2356 
2357 		if (IS_ERR(skb)) {
2358 			unsigned int xdp_res = -PTR_ERR(skb);
2359 
2360 			switch (xdp_res) {
2361 			case IGC_XDP_CONSUMED:
2362 				rx_buffer->pagecnt_bias++;
2363 				break;
2364 			case IGC_XDP_TX:
2365 			case IGC_XDP_REDIRECT:
2366 				igc_rx_buffer_flip(rx_buffer, truesize);
2367 				xdp_status |= xdp_res;
2368 				break;
2369 			}
2370 
2371 			total_packets++;
2372 			total_bytes += size;
2373 		} else if (skb)
2374 			igc_add_rx_frag(rx_ring, rx_buffer, skb, size);
2375 		else if (ring_uses_build_skb(rx_ring))
2376 			skb = igc_build_skb(rx_ring, rx_buffer, rx_desc, size);
2377 		else
2378 			skb = igc_construct_skb(rx_ring, rx_buffer, &xdp,
2379 						timestamp);
2380 
2381 		/* exit if we failed to retrieve a buffer */
2382 		if (!skb) {
2383 			rx_ring->rx_stats.alloc_failed++;
2384 			rx_buffer->pagecnt_bias++;
2385 			break;
2386 		}
2387 
2388 		igc_put_rx_buffer(rx_ring, rx_buffer, rx_buffer_pgcnt);
2389 		cleaned_count++;
2390 
2391 		/* fetch next buffer in frame if non-eop */
2392 		if (igc_is_non_eop(rx_ring, rx_desc))
2393 			continue;
2394 
2395 		/* verify the packet layout is correct */
2396 		if (igc_cleanup_headers(rx_ring, rx_desc, skb)) {
2397 			skb = NULL;
2398 			continue;
2399 		}
2400 
2401 		/* probably a little skewed due to removing CRC */
2402 		total_bytes += skb->len;
2403 
2404 		/* populate checksum, VLAN, and protocol */
2405 		igc_process_skb_fields(rx_ring, rx_desc, skb);
2406 
2407 		napi_gro_receive(&q_vector->napi, skb);
2408 
2409 		/* reset skb pointer */
2410 		skb = NULL;
2411 
2412 		/* update budget accounting */
2413 		total_packets++;
2414 	}
2415 
2416 	if (xdp_status)
2417 		igc_finalize_xdp(adapter, xdp_status);
2418 
2419 	/* place incomplete frames back on ring for completion */
2420 	rx_ring->skb = skb;
2421 
2422 	igc_update_rx_stats(q_vector, total_packets, total_bytes);
2423 
2424 	if (cleaned_count)
2425 		igc_alloc_rx_buffers(rx_ring, cleaned_count);
2426 
2427 	return total_packets;
2428 }
2429 
2430 static struct sk_buff *igc_construct_skb_zc(struct igc_ring *ring,
2431 					    struct xdp_buff *xdp)
2432 {
2433 	unsigned int metasize = xdp->data - xdp->data_meta;
2434 	unsigned int datasize = xdp->data_end - xdp->data;
2435 	unsigned int totalsize = metasize + datasize;
2436 	struct sk_buff *skb;
2437 
2438 	skb = __napi_alloc_skb(&ring->q_vector->napi,
2439 			       xdp->data_end - xdp->data_hard_start,
2440 			       GFP_ATOMIC | __GFP_NOWARN);
2441 	if (unlikely(!skb))
2442 		return NULL;
2443 
2444 	skb_reserve(skb, xdp->data_meta - xdp->data_hard_start);
2445 	memcpy(__skb_put(skb, totalsize), xdp->data_meta, totalsize);
2446 	if (metasize)
2447 		skb_metadata_set(skb, metasize);
2448 
2449 	return skb;
2450 }
2451 
2452 static void igc_dispatch_skb_zc(struct igc_q_vector *q_vector,
2453 				union igc_adv_rx_desc *desc,
2454 				struct xdp_buff *xdp,
2455 				ktime_t timestamp)
2456 {
2457 	struct igc_ring *ring = q_vector->rx.ring;
2458 	struct sk_buff *skb;
2459 
2460 	skb = igc_construct_skb_zc(ring, xdp);
2461 	if (!skb) {
2462 		ring->rx_stats.alloc_failed++;
2463 		return;
2464 	}
2465 
2466 	if (timestamp)
2467 		skb_hwtstamps(skb)->hwtstamp = timestamp;
2468 
2469 	if (igc_cleanup_headers(ring, desc, skb))
2470 		return;
2471 
2472 	igc_process_skb_fields(ring, desc, skb);
2473 	napi_gro_receive(&q_vector->napi, skb);
2474 }
2475 
2476 static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget)
2477 {
2478 	struct igc_adapter *adapter = q_vector->adapter;
2479 	struct igc_ring *ring = q_vector->rx.ring;
2480 	u16 cleaned_count = igc_desc_unused(ring);
2481 	int total_bytes = 0, total_packets = 0;
2482 	u16 ntc = ring->next_to_clean;
2483 	struct bpf_prog *prog;
2484 	bool failure = false;
2485 	int xdp_status = 0;
2486 
2487 	rcu_read_lock();
2488 
2489 	prog = READ_ONCE(adapter->xdp_prog);
2490 
2491 	while (likely(total_packets < budget)) {
2492 		union igc_adv_rx_desc *desc;
2493 		struct igc_rx_buffer *bi;
2494 		ktime_t timestamp = 0;
2495 		unsigned int size;
2496 		int res;
2497 
2498 		desc = IGC_RX_DESC(ring, ntc);
2499 		size = le16_to_cpu(desc->wb.upper.length);
2500 		if (!size)
2501 			break;
2502 
2503 		/* This memory barrier is needed to keep us from reading
2504 		 * any other fields out of the rx_desc until we know the
2505 		 * descriptor has been written back
2506 		 */
2507 		dma_rmb();
2508 
2509 		bi = &ring->rx_buffer_info[ntc];
2510 
2511 		if (igc_test_staterr(desc, IGC_RXDADV_STAT_TSIP)) {
2512 			timestamp = igc_ptp_rx_pktstamp(q_vector->adapter,
2513 							bi->xdp->data);
2514 
2515 			bi->xdp->data += IGC_TS_HDR_LEN;
2516 
2517 			/* HW timestamp has been copied into local variable. Metadata
2518 			 * length when XDP program is called should be 0.
2519 			 */
2520 			bi->xdp->data_meta += IGC_TS_HDR_LEN;
2521 			size -= IGC_TS_HDR_LEN;
2522 		}
2523 
2524 		bi->xdp->data_end = bi->xdp->data + size;
2525 		xsk_buff_dma_sync_for_cpu(bi->xdp, ring->xsk_pool);
2526 
2527 		res = __igc_xdp_run_prog(adapter, prog, bi->xdp);
2528 		switch (res) {
2529 		case IGC_XDP_PASS:
2530 			igc_dispatch_skb_zc(q_vector, desc, bi->xdp, timestamp);
2531 			fallthrough;
2532 		case IGC_XDP_CONSUMED:
2533 			xsk_buff_free(bi->xdp);
2534 			break;
2535 		case IGC_XDP_TX:
2536 		case IGC_XDP_REDIRECT:
2537 			xdp_status |= res;
2538 			break;
2539 		}
2540 
2541 		bi->xdp = NULL;
2542 		total_bytes += size;
2543 		total_packets++;
2544 		cleaned_count++;
2545 		ntc++;
2546 		if (ntc == ring->count)
2547 			ntc = 0;
2548 	}
2549 
2550 	ring->next_to_clean = ntc;
2551 	rcu_read_unlock();
2552 
2553 	if (cleaned_count >= IGC_RX_BUFFER_WRITE)
2554 		failure = !igc_alloc_rx_buffers_zc(ring, cleaned_count);
2555 
2556 	if (xdp_status)
2557 		igc_finalize_xdp(adapter, xdp_status);
2558 
2559 	igc_update_rx_stats(q_vector, total_packets, total_bytes);
2560 
2561 	if (xsk_uses_need_wakeup(ring->xsk_pool)) {
2562 		if (failure || ring->next_to_clean == ring->next_to_use)
2563 			xsk_set_rx_need_wakeup(ring->xsk_pool);
2564 		else
2565 			xsk_clear_rx_need_wakeup(ring->xsk_pool);
2566 		return total_packets;
2567 	}
2568 
2569 	return failure ? budget : total_packets;
2570 }
2571 
2572 static void igc_update_tx_stats(struct igc_q_vector *q_vector,
2573 				unsigned int packets, unsigned int bytes)
2574 {
2575 	struct igc_ring *ring = q_vector->tx.ring;
2576 
2577 	u64_stats_update_begin(&ring->tx_syncp);
2578 	ring->tx_stats.bytes += bytes;
2579 	ring->tx_stats.packets += packets;
2580 	u64_stats_update_end(&ring->tx_syncp);
2581 
2582 	q_vector->tx.total_bytes += bytes;
2583 	q_vector->tx.total_packets += packets;
2584 }
2585 
2586 static void igc_xdp_xmit_zc(struct igc_ring *ring)
2587 {
2588 	struct xsk_buff_pool *pool = ring->xsk_pool;
2589 	struct netdev_queue *nq = txring_txq(ring);
2590 	union igc_adv_tx_desc *tx_desc = NULL;
2591 	int cpu = smp_processor_id();
2592 	u16 ntu = ring->next_to_use;
2593 	struct xdp_desc xdp_desc;
2594 	u16 budget;
2595 
2596 	if (!netif_carrier_ok(ring->netdev))
2597 		return;
2598 
2599 	__netif_tx_lock(nq, cpu);
2600 
2601 	budget = igc_desc_unused(ring);
2602 
2603 	while (xsk_tx_peek_desc(pool, &xdp_desc) && budget--) {
2604 		u32 cmd_type, olinfo_status;
2605 		struct igc_tx_buffer *bi;
2606 		dma_addr_t dma;
2607 
2608 		cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT |
2609 			   IGC_ADVTXD_DCMD_IFCS | IGC_TXD_DCMD |
2610 			   xdp_desc.len;
2611 		olinfo_status = xdp_desc.len << IGC_ADVTXD_PAYLEN_SHIFT;
2612 
2613 		dma = xsk_buff_raw_get_dma(pool, xdp_desc.addr);
2614 		xsk_buff_raw_dma_sync_for_device(pool, dma, xdp_desc.len);
2615 
2616 		tx_desc = IGC_TX_DESC(ring, ntu);
2617 		tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
2618 		tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
2619 		tx_desc->read.buffer_addr = cpu_to_le64(dma);
2620 
2621 		bi = &ring->tx_buffer_info[ntu];
2622 		bi->type = IGC_TX_BUFFER_TYPE_XSK;
2623 		bi->protocol = 0;
2624 		bi->bytecount = xdp_desc.len;
2625 		bi->gso_segs = 1;
2626 		bi->time_stamp = jiffies;
2627 		bi->next_to_watch = tx_desc;
2628 
2629 		netdev_tx_sent_queue(txring_txq(ring), xdp_desc.len);
2630 
2631 		ntu++;
2632 		if (ntu == ring->count)
2633 			ntu = 0;
2634 	}
2635 
2636 	ring->next_to_use = ntu;
2637 	if (tx_desc) {
2638 		igc_flush_tx_descriptors(ring);
2639 		xsk_tx_release(pool);
2640 	}
2641 
2642 	__netif_tx_unlock(nq);
2643 }
2644 
2645 /**
2646  * igc_clean_tx_irq - Reclaim resources after transmit completes
2647  * @q_vector: pointer to q_vector containing needed info
2648  * @napi_budget: Used to determine if we are in netpoll
2649  *
2650  * returns true if ring is completely cleaned
2651  */
2652 static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget)
2653 {
2654 	struct igc_adapter *adapter = q_vector->adapter;
2655 	unsigned int total_bytes = 0, total_packets = 0;
2656 	unsigned int budget = q_vector->tx.work_limit;
2657 	struct igc_ring *tx_ring = q_vector->tx.ring;
2658 	unsigned int i = tx_ring->next_to_clean;
2659 	struct igc_tx_buffer *tx_buffer;
2660 	union igc_adv_tx_desc *tx_desc;
2661 	u32 xsk_frames = 0;
2662 
2663 	if (test_bit(__IGC_DOWN, &adapter->state))
2664 		return true;
2665 
2666 	tx_buffer = &tx_ring->tx_buffer_info[i];
2667 	tx_desc = IGC_TX_DESC(tx_ring, i);
2668 	i -= tx_ring->count;
2669 
2670 	do {
2671 		union igc_adv_tx_desc *eop_desc = tx_buffer->next_to_watch;
2672 
2673 		/* if next_to_watch is not set then there is no work pending */
2674 		if (!eop_desc)
2675 			break;
2676 
2677 		/* prevent any other reads prior to eop_desc */
2678 		smp_rmb();
2679 
2680 		/* if DD is not set pending work has not been completed */
2681 		if (!(eop_desc->wb.status & cpu_to_le32(IGC_TXD_STAT_DD)))
2682 			break;
2683 
2684 		/* clear next_to_watch to prevent false hangs */
2685 		tx_buffer->next_to_watch = NULL;
2686 
2687 		/* update the statistics for this packet */
2688 		total_bytes += tx_buffer->bytecount;
2689 		total_packets += tx_buffer->gso_segs;
2690 
2691 		switch (tx_buffer->type) {
2692 		case IGC_TX_BUFFER_TYPE_XSK:
2693 			xsk_frames++;
2694 			break;
2695 		case IGC_TX_BUFFER_TYPE_XDP:
2696 			xdp_return_frame(tx_buffer->xdpf);
2697 			igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
2698 			break;
2699 		case IGC_TX_BUFFER_TYPE_SKB:
2700 			napi_consume_skb(tx_buffer->skb, napi_budget);
2701 			igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
2702 			break;
2703 		default:
2704 			netdev_warn_once(tx_ring->netdev, "Unknown Tx buffer type\n");
2705 			break;
2706 		}
2707 
2708 		/* clear last DMA location and unmap remaining buffers */
2709 		while (tx_desc != eop_desc) {
2710 			tx_buffer++;
2711 			tx_desc++;
2712 			i++;
2713 			if (unlikely(!i)) {
2714 				i -= tx_ring->count;
2715 				tx_buffer = tx_ring->tx_buffer_info;
2716 				tx_desc = IGC_TX_DESC(tx_ring, 0);
2717 			}
2718 
2719 			/* unmap any remaining paged data */
2720 			if (dma_unmap_len(tx_buffer, len))
2721 				igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
2722 		}
2723 
2724 		/* move us one more past the eop_desc for start of next pkt */
2725 		tx_buffer++;
2726 		tx_desc++;
2727 		i++;
2728 		if (unlikely(!i)) {
2729 			i -= tx_ring->count;
2730 			tx_buffer = tx_ring->tx_buffer_info;
2731 			tx_desc = IGC_TX_DESC(tx_ring, 0);
2732 		}
2733 
2734 		/* issue prefetch for next Tx descriptor */
2735 		prefetch(tx_desc);
2736 
2737 		/* update budget accounting */
2738 		budget--;
2739 	} while (likely(budget));
2740 
2741 	netdev_tx_completed_queue(txring_txq(tx_ring),
2742 				  total_packets, total_bytes);
2743 
2744 	i += tx_ring->count;
2745 	tx_ring->next_to_clean = i;
2746 
2747 	igc_update_tx_stats(q_vector, total_packets, total_bytes);
2748 
2749 	if (tx_ring->xsk_pool) {
2750 		if (xsk_frames)
2751 			xsk_tx_completed(tx_ring->xsk_pool, xsk_frames);
2752 		if (xsk_uses_need_wakeup(tx_ring->xsk_pool))
2753 			xsk_set_tx_need_wakeup(tx_ring->xsk_pool);
2754 		igc_xdp_xmit_zc(tx_ring);
2755 	}
2756 
2757 	if (test_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
2758 		struct igc_hw *hw = &adapter->hw;
2759 
2760 		/* Detect a transmit hang in hardware, this serializes the
2761 		 * check with the clearing of time_stamp and movement of i
2762 		 */
2763 		clear_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
2764 		if (tx_buffer->next_to_watch &&
2765 		    time_after(jiffies, tx_buffer->time_stamp +
2766 		    (adapter->tx_timeout_factor * HZ)) &&
2767 		    !(rd32(IGC_STATUS) & IGC_STATUS_TXOFF)) {
2768 			/* detected Tx unit hang */
2769 			netdev_err(tx_ring->netdev,
2770 				   "Detected Tx Unit Hang\n"
2771 				   "  Tx Queue             <%d>\n"
2772 				   "  TDH                  <%x>\n"
2773 				   "  TDT                  <%x>\n"
2774 				   "  next_to_use          <%x>\n"
2775 				   "  next_to_clean        <%x>\n"
2776 				   "buffer_info[next_to_clean]\n"
2777 				   "  time_stamp           <%lx>\n"
2778 				   "  next_to_watch        <%p>\n"
2779 				   "  jiffies              <%lx>\n"
2780 				   "  desc.status          <%x>\n",
2781 				   tx_ring->queue_index,
2782 				   rd32(IGC_TDH(tx_ring->reg_idx)),
2783 				   readl(tx_ring->tail),
2784 				   tx_ring->next_to_use,
2785 				   tx_ring->next_to_clean,
2786 				   tx_buffer->time_stamp,
2787 				   tx_buffer->next_to_watch,
2788 				   jiffies,
2789 				   tx_buffer->next_to_watch->wb.status);
2790 			netif_stop_subqueue(tx_ring->netdev,
2791 					    tx_ring->queue_index);
2792 
2793 			/* we are about to reset, no point in enabling stuff */
2794 			return true;
2795 		}
2796 	}
2797 
2798 #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
2799 	if (unlikely(total_packets &&
2800 		     netif_carrier_ok(tx_ring->netdev) &&
2801 		     igc_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD)) {
2802 		/* Make sure that anybody stopping the queue after this
2803 		 * sees the new next_to_clean.
2804 		 */
2805 		smp_mb();
2806 		if (__netif_subqueue_stopped(tx_ring->netdev,
2807 					     tx_ring->queue_index) &&
2808 		    !(test_bit(__IGC_DOWN, &adapter->state))) {
2809 			netif_wake_subqueue(tx_ring->netdev,
2810 					    tx_ring->queue_index);
2811 
2812 			u64_stats_update_begin(&tx_ring->tx_syncp);
2813 			tx_ring->tx_stats.restart_queue++;
2814 			u64_stats_update_end(&tx_ring->tx_syncp);
2815 		}
2816 	}
2817 
2818 	return !!budget;
2819 }
2820 
2821 static int igc_find_mac_filter(struct igc_adapter *adapter,
2822 			       enum igc_mac_filter_type type, const u8 *addr)
2823 {
2824 	struct igc_hw *hw = &adapter->hw;
2825 	int max_entries = hw->mac.rar_entry_count;
2826 	u32 ral, rah;
2827 	int i;
2828 
2829 	for (i = 0; i < max_entries; i++) {
2830 		ral = rd32(IGC_RAL(i));
2831 		rah = rd32(IGC_RAH(i));
2832 
2833 		if (!(rah & IGC_RAH_AV))
2834 			continue;
2835 		if (!!(rah & IGC_RAH_ASEL_SRC_ADDR) != type)
2836 			continue;
2837 		if ((rah & IGC_RAH_RAH_MASK) !=
2838 		    le16_to_cpup((__le16 *)(addr + 4)))
2839 			continue;
2840 		if (ral != le32_to_cpup((__le32 *)(addr)))
2841 			continue;
2842 
2843 		return i;
2844 	}
2845 
2846 	return -1;
2847 }
2848 
2849 static int igc_get_avail_mac_filter_slot(struct igc_adapter *adapter)
2850 {
2851 	struct igc_hw *hw = &adapter->hw;
2852 	int max_entries = hw->mac.rar_entry_count;
2853 	u32 rah;
2854 	int i;
2855 
2856 	for (i = 0; i < max_entries; i++) {
2857 		rah = rd32(IGC_RAH(i));
2858 
2859 		if (!(rah & IGC_RAH_AV))
2860 			return i;
2861 	}
2862 
2863 	return -1;
2864 }
2865 
2866 /**
2867  * igc_add_mac_filter() - Add MAC address filter
2868  * @adapter: Pointer to adapter where the filter should be added
2869  * @type: MAC address filter type (source or destination)
2870  * @addr: MAC address
2871  * @queue: If non-negative, queue assignment feature is enabled and frames
2872  *         matching the filter are enqueued onto 'queue'. Otherwise, queue
2873  *         assignment is disabled.
2874  *
2875  * Return: 0 in case of success, negative errno code otherwise.
2876  */
2877 static int igc_add_mac_filter(struct igc_adapter *adapter,
2878 			      enum igc_mac_filter_type type, const u8 *addr,
2879 			      int queue)
2880 {
2881 	struct net_device *dev = adapter->netdev;
2882 	int index;
2883 
2884 	index = igc_find_mac_filter(adapter, type, addr);
2885 	if (index >= 0)
2886 		goto update_filter;
2887 
2888 	index = igc_get_avail_mac_filter_slot(adapter);
2889 	if (index < 0)
2890 		return -ENOSPC;
2891 
2892 	netdev_dbg(dev, "Add MAC address filter: index %d type %s address %pM queue %d\n",
2893 		   index, type == IGC_MAC_FILTER_TYPE_DST ? "dst" : "src",
2894 		   addr, queue);
2895 
2896 update_filter:
2897 	igc_set_mac_filter_hw(adapter, index, type, addr, queue);
2898 	return 0;
2899 }
2900 
2901 /**
2902  * igc_del_mac_filter() - Delete MAC address filter
2903  * @adapter: Pointer to adapter where the filter should be deleted from
2904  * @type: MAC address filter type (source or destination)
2905  * @addr: MAC address
2906  */
2907 static void igc_del_mac_filter(struct igc_adapter *adapter,
2908 			       enum igc_mac_filter_type type, const u8 *addr)
2909 {
2910 	struct net_device *dev = adapter->netdev;
2911 	int index;
2912 
2913 	index = igc_find_mac_filter(adapter, type, addr);
2914 	if (index < 0)
2915 		return;
2916 
2917 	if (index == 0) {
2918 		/* If this is the default filter, we don't actually delete it.
2919 		 * We just reset to its default value i.e. disable queue
2920 		 * assignment.
2921 		 */
2922 		netdev_dbg(dev, "Disable default MAC filter queue assignment");
2923 
2924 		igc_set_mac_filter_hw(adapter, 0, type, addr, -1);
2925 	} else {
2926 		netdev_dbg(dev, "Delete MAC address filter: index %d type %s address %pM\n",
2927 			   index,
2928 			   type == IGC_MAC_FILTER_TYPE_DST ? "dst" : "src",
2929 			   addr);
2930 
2931 		igc_clear_mac_filter_hw(adapter, index);
2932 	}
2933 }
2934 
2935 /**
2936  * igc_add_vlan_prio_filter() - Add VLAN priority filter
2937  * @adapter: Pointer to adapter where the filter should be added
2938  * @prio: VLAN priority value
2939  * @queue: Queue number which matching frames are assigned to
2940  *
2941  * Return: 0 in case of success, negative errno code otherwise.
2942  */
2943 static int igc_add_vlan_prio_filter(struct igc_adapter *adapter, int prio,
2944 				    int queue)
2945 {
2946 	struct net_device *dev = adapter->netdev;
2947 	struct igc_hw *hw = &adapter->hw;
2948 	u32 vlanpqf;
2949 
2950 	vlanpqf = rd32(IGC_VLANPQF);
2951 
2952 	if (vlanpqf & IGC_VLANPQF_VALID(prio)) {
2953 		netdev_dbg(dev, "VLAN priority filter already in use\n");
2954 		return -EEXIST;
2955 	}
2956 
2957 	vlanpqf |= IGC_VLANPQF_QSEL(prio, queue);
2958 	vlanpqf |= IGC_VLANPQF_VALID(prio);
2959 
2960 	wr32(IGC_VLANPQF, vlanpqf);
2961 
2962 	netdev_dbg(dev, "Add VLAN priority filter: prio %d queue %d\n",
2963 		   prio, queue);
2964 	return 0;
2965 }
2966 
2967 /**
2968  * igc_del_vlan_prio_filter() - Delete VLAN priority filter
2969  * @adapter: Pointer to adapter where the filter should be deleted from
2970  * @prio: VLAN priority value
2971  */
2972 static void igc_del_vlan_prio_filter(struct igc_adapter *adapter, int prio)
2973 {
2974 	struct igc_hw *hw = &adapter->hw;
2975 	u32 vlanpqf;
2976 
2977 	vlanpqf = rd32(IGC_VLANPQF);
2978 
2979 	vlanpqf &= ~IGC_VLANPQF_VALID(prio);
2980 	vlanpqf &= ~IGC_VLANPQF_QSEL(prio, IGC_VLANPQF_QUEUE_MASK);
2981 
2982 	wr32(IGC_VLANPQF, vlanpqf);
2983 
2984 	netdev_dbg(adapter->netdev, "Delete VLAN priority filter: prio %d\n",
2985 		   prio);
2986 }
2987 
2988 static int igc_get_avail_etype_filter_slot(struct igc_adapter *adapter)
2989 {
2990 	struct igc_hw *hw = &adapter->hw;
2991 	int i;
2992 
2993 	for (i = 0; i < MAX_ETYPE_FILTER; i++) {
2994 		u32 etqf = rd32(IGC_ETQF(i));
2995 
2996 		if (!(etqf & IGC_ETQF_FILTER_ENABLE))
2997 			return i;
2998 	}
2999 
3000 	return -1;
3001 }
3002 
3003 /**
3004  * igc_add_etype_filter() - Add ethertype filter
3005  * @adapter: Pointer to adapter where the filter should be added
3006  * @etype: Ethertype value
3007  * @queue: If non-negative, queue assignment feature is enabled and frames
3008  *         matching the filter are enqueued onto 'queue'. Otherwise, queue
3009  *         assignment is disabled.
3010  *
3011  * Return: 0 in case of success, negative errno code otherwise.
3012  */
3013 static int igc_add_etype_filter(struct igc_adapter *adapter, u16 etype,
3014 				int queue)
3015 {
3016 	struct igc_hw *hw = &adapter->hw;
3017 	int index;
3018 	u32 etqf;
3019 
3020 	index = igc_get_avail_etype_filter_slot(adapter);
3021 	if (index < 0)
3022 		return -ENOSPC;
3023 
3024 	etqf = rd32(IGC_ETQF(index));
3025 
3026 	etqf &= ~IGC_ETQF_ETYPE_MASK;
3027 	etqf |= etype;
3028 
3029 	if (queue >= 0) {
3030 		etqf &= ~IGC_ETQF_QUEUE_MASK;
3031 		etqf |= (queue << IGC_ETQF_QUEUE_SHIFT);
3032 		etqf |= IGC_ETQF_QUEUE_ENABLE;
3033 	}
3034 
3035 	etqf |= IGC_ETQF_FILTER_ENABLE;
3036 
3037 	wr32(IGC_ETQF(index), etqf);
3038 
3039 	netdev_dbg(adapter->netdev, "Add ethertype filter: etype %04x queue %d\n",
3040 		   etype, queue);
3041 	return 0;
3042 }
3043 
3044 static int igc_find_etype_filter(struct igc_adapter *adapter, u16 etype)
3045 {
3046 	struct igc_hw *hw = &adapter->hw;
3047 	int i;
3048 
3049 	for (i = 0; i < MAX_ETYPE_FILTER; i++) {
3050 		u32 etqf = rd32(IGC_ETQF(i));
3051 
3052 		if ((etqf & IGC_ETQF_ETYPE_MASK) == etype)
3053 			return i;
3054 	}
3055 
3056 	return -1;
3057 }
3058 
3059 /**
3060  * igc_del_etype_filter() - Delete ethertype filter
3061  * @adapter: Pointer to adapter where the filter should be deleted from
3062  * @etype: Ethertype value
3063  */
3064 static void igc_del_etype_filter(struct igc_adapter *adapter, u16 etype)
3065 {
3066 	struct igc_hw *hw = &adapter->hw;
3067 	int index;
3068 
3069 	index = igc_find_etype_filter(adapter, etype);
3070 	if (index < 0)
3071 		return;
3072 
3073 	wr32(IGC_ETQF(index), 0);
3074 
3075 	netdev_dbg(adapter->netdev, "Delete ethertype filter: etype %04x\n",
3076 		   etype);
3077 }
3078 
3079 static int igc_enable_nfc_rule(struct igc_adapter *adapter,
3080 			       const struct igc_nfc_rule *rule)
3081 {
3082 	int err;
3083 
3084 	if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) {
3085 		err = igc_add_etype_filter(adapter, rule->filter.etype,
3086 					   rule->action);
3087 		if (err)
3088 			return err;
3089 	}
3090 
3091 	if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) {
3092 		err = igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_SRC,
3093 					 rule->filter.src_addr, rule->action);
3094 		if (err)
3095 			return err;
3096 	}
3097 
3098 	if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) {
3099 		err = igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST,
3100 					 rule->filter.dst_addr, rule->action);
3101 		if (err)
3102 			return err;
3103 	}
3104 
3105 	if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) {
3106 		int prio = (rule->filter.vlan_tci & VLAN_PRIO_MASK) >>
3107 			   VLAN_PRIO_SHIFT;
3108 
3109 		err = igc_add_vlan_prio_filter(adapter, prio, rule->action);
3110 		if (err)
3111 			return err;
3112 	}
3113 
3114 	return 0;
3115 }
3116 
3117 static void igc_disable_nfc_rule(struct igc_adapter *adapter,
3118 				 const struct igc_nfc_rule *rule)
3119 {
3120 	if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE)
3121 		igc_del_etype_filter(adapter, rule->filter.etype);
3122 
3123 	if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) {
3124 		int prio = (rule->filter.vlan_tci & VLAN_PRIO_MASK) >>
3125 			   VLAN_PRIO_SHIFT;
3126 
3127 		igc_del_vlan_prio_filter(adapter, prio);
3128 	}
3129 
3130 	if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR)
3131 		igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_SRC,
3132 				   rule->filter.src_addr);
3133 
3134 	if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR)
3135 		igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST,
3136 				   rule->filter.dst_addr);
3137 }
3138 
3139 /**
3140  * igc_get_nfc_rule() - Get NFC rule
3141  * @adapter: Pointer to adapter
3142  * @location: Rule location
3143  *
3144  * Context: Expects adapter->nfc_rule_lock to be held by caller.
3145  *
3146  * Return: Pointer to NFC rule at @location. If not found, NULL.
3147  */
3148 struct igc_nfc_rule *igc_get_nfc_rule(struct igc_adapter *adapter,
3149 				      u32 location)
3150 {
3151 	struct igc_nfc_rule *rule;
3152 
3153 	list_for_each_entry(rule, &adapter->nfc_rule_list, list) {
3154 		if (rule->location == location)
3155 			return rule;
3156 		if (rule->location > location)
3157 			break;
3158 	}
3159 
3160 	return NULL;
3161 }
3162 
3163 /**
3164  * igc_del_nfc_rule() - Delete NFC rule
3165  * @adapter: Pointer to adapter
3166  * @rule: Pointer to rule to be deleted
3167  *
3168  * Disable NFC rule in hardware and delete it from adapter.
3169  *
3170  * Context: Expects adapter->nfc_rule_lock to be held by caller.
3171  */
3172 void igc_del_nfc_rule(struct igc_adapter *adapter, struct igc_nfc_rule *rule)
3173 {
3174 	igc_disable_nfc_rule(adapter, rule);
3175 
3176 	list_del(&rule->list);
3177 	adapter->nfc_rule_count--;
3178 
3179 	kfree(rule);
3180 }
3181 
3182 static void igc_flush_nfc_rules(struct igc_adapter *adapter)
3183 {
3184 	struct igc_nfc_rule *rule, *tmp;
3185 
3186 	mutex_lock(&adapter->nfc_rule_lock);
3187 
3188 	list_for_each_entry_safe(rule, tmp, &adapter->nfc_rule_list, list)
3189 		igc_del_nfc_rule(adapter, rule);
3190 
3191 	mutex_unlock(&adapter->nfc_rule_lock);
3192 }
3193 
3194 /**
3195  * igc_add_nfc_rule() - Add NFC rule
3196  * @adapter: Pointer to adapter
3197  * @rule: Pointer to rule to be added
3198  *
3199  * Enable NFC rule in hardware and add it to adapter.
3200  *
3201  * Context: Expects adapter->nfc_rule_lock to be held by caller.
3202  *
3203  * Return: 0 on success, negative errno on failure.
3204  */
3205 int igc_add_nfc_rule(struct igc_adapter *adapter, struct igc_nfc_rule *rule)
3206 {
3207 	struct igc_nfc_rule *pred, *cur;
3208 	int err;
3209 
3210 	err = igc_enable_nfc_rule(adapter, rule);
3211 	if (err)
3212 		return err;
3213 
3214 	pred = NULL;
3215 	list_for_each_entry(cur, &adapter->nfc_rule_list, list) {
3216 		if (cur->location >= rule->location)
3217 			break;
3218 		pred = cur;
3219 	}
3220 
3221 	list_add(&rule->list, pred ? &pred->list : &adapter->nfc_rule_list);
3222 	adapter->nfc_rule_count++;
3223 	return 0;
3224 }
3225 
3226 static void igc_restore_nfc_rules(struct igc_adapter *adapter)
3227 {
3228 	struct igc_nfc_rule *rule;
3229 
3230 	mutex_lock(&adapter->nfc_rule_lock);
3231 
3232 	list_for_each_entry_reverse(rule, &adapter->nfc_rule_list, list)
3233 		igc_enable_nfc_rule(adapter, rule);
3234 
3235 	mutex_unlock(&adapter->nfc_rule_lock);
3236 }
3237 
3238 static int igc_uc_sync(struct net_device *netdev, const unsigned char *addr)
3239 {
3240 	struct igc_adapter *adapter = netdev_priv(netdev);
3241 
3242 	return igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, addr, -1);
3243 }
3244 
3245 static int igc_uc_unsync(struct net_device *netdev, const unsigned char *addr)
3246 {
3247 	struct igc_adapter *adapter = netdev_priv(netdev);
3248 
3249 	igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, addr);
3250 	return 0;
3251 }
3252 
3253 /**
3254  * igc_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3255  * @netdev: network interface device structure
3256  *
3257  * The set_rx_mode entry point is called whenever the unicast or multicast
3258  * address lists or the network interface flags are updated.  This routine is
3259  * responsible for configuring the hardware for proper unicast, multicast,
3260  * promiscuous mode, and all-multi behavior.
3261  */
3262 static void igc_set_rx_mode(struct net_device *netdev)
3263 {
3264 	struct igc_adapter *adapter = netdev_priv(netdev);
3265 	struct igc_hw *hw = &adapter->hw;
3266 	u32 rctl = 0, rlpml = MAX_JUMBO_FRAME_SIZE;
3267 	int count;
3268 
3269 	/* Check for Promiscuous and All Multicast modes */
3270 	if (netdev->flags & IFF_PROMISC) {
3271 		rctl |= IGC_RCTL_UPE | IGC_RCTL_MPE;
3272 	} else {
3273 		if (netdev->flags & IFF_ALLMULTI) {
3274 			rctl |= IGC_RCTL_MPE;
3275 		} else {
3276 			/* Write addresses to the MTA, if the attempt fails
3277 			 * then we should just turn on promiscuous mode so
3278 			 * that we can at least receive multicast traffic
3279 			 */
3280 			count = igc_write_mc_addr_list(netdev);
3281 			if (count < 0)
3282 				rctl |= IGC_RCTL_MPE;
3283 		}
3284 	}
3285 
3286 	/* Write addresses to available RAR registers, if there is not
3287 	 * sufficient space to store all the addresses then enable
3288 	 * unicast promiscuous mode
3289 	 */
3290 	if (__dev_uc_sync(netdev, igc_uc_sync, igc_uc_unsync))
3291 		rctl |= IGC_RCTL_UPE;
3292 
3293 	/* update state of unicast and multicast */
3294 	rctl |= rd32(IGC_RCTL) & ~(IGC_RCTL_UPE | IGC_RCTL_MPE);
3295 	wr32(IGC_RCTL, rctl);
3296 
3297 #if (PAGE_SIZE < 8192)
3298 	if (adapter->max_frame_size <= IGC_MAX_FRAME_BUILD_SKB)
3299 		rlpml = IGC_MAX_FRAME_BUILD_SKB;
3300 #endif
3301 	wr32(IGC_RLPML, rlpml);
3302 }
3303 
3304 /**
3305  * igc_configure - configure the hardware for RX and TX
3306  * @adapter: private board structure
3307  */
3308 static void igc_configure(struct igc_adapter *adapter)
3309 {
3310 	struct net_device *netdev = adapter->netdev;
3311 	int i = 0;
3312 
3313 	igc_get_hw_control(adapter);
3314 	igc_set_rx_mode(netdev);
3315 
3316 	igc_restore_vlan(adapter);
3317 
3318 	igc_setup_tctl(adapter);
3319 	igc_setup_mrqc(adapter);
3320 	igc_setup_rctl(adapter);
3321 
3322 	igc_set_default_mac_filter(adapter);
3323 	igc_restore_nfc_rules(adapter);
3324 
3325 	igc_configure_tx(adapter);
3326 	igc_configure_rx(adapter);
3327 
3328 	igc_rx_fifo_flush_base(&adapter->hw);
3329 
3330 	/* call igc_desc_unused which always leaves
3331 	 * at least 1 descriptor unused to make sure
3332 	 * next_to_use != next_to_clean
3333 	 */
3334 	for (i = 0; i < adapter->num_rx_queues; i++) {
3335 		struct igc_ring *ring = adapter->rx_ring[i];
3336 
3337 		if (ring->xsk_pool)
3338 			igc_alloc_rx_buffers_zc(ring, igc_desc_unused(ring));
3339 		else
3340 			igc_alloc_rx_buffers(ring, igc_desc_unused(ring));
3341 	}
3342 }
3343 
3344 /**
3345  * igc_write_ivar - configure ivar for given MSI-X vector
3346  * @hw: pointer to the HW structure
3347  * @msix_vector: vector number we are allocating to a given ring
3348  * @index: row index of IVAR register to write within IVAR table
3349  * @offset: column offset of in IVAR, should be multiple of 8
3350  *
3351  * The IVAR table consists of 2 columns,
3352  * each containing an cause allocation for an Rx and Tx ring, and a
3353  * variable number of rows depending on the number of queues supported.
3354  */
3355 static void igc_write_ivar(struct igc_hw *hw, int msix_vector,
3356 			   int index, int offset)
3357 {
3358 	u32 ivar = array_rd32(IGC_IVAR0, index);
3359 
3360 	/* clear any bits that are currently set */
3361 	ivar &= ~((u32)0xFF << offset);
3362 
3363 	/* write vector and valid bit */
3364 	ivar |= (msix_vector | IGC_IVAR_VALID) << offset;
3365 
3366 	array_wr32(IGC_IVAR0, index, ivar);
3367 }
3368 
3369 static void igc_assign_vector(struct igc_q_vector *q_vector, int msix_vector)
3370 {
3371 	struct igc_adapter *adapter = q_vector->adapter;
3372 	struct igc_hw *hw = &adapter->hw;
3373 	int rx_queue = IGC_N0_QUEUE;
3374 	int tx_queue = IGC_N0_QUEUE;
3375 
3376 	if (q_vector->rx.ring)
3377 		rx_queue = q_vector->rx.ring->reg_idx;
3378 	if (q_vector->tx.ring)
3379 		tx_queue = q_vector->tx.ring->reg_idx;
3380 
3381 	switch (hw->mac.type) {
3382 	case igc_i225:
3383 		if (rx_queue > IGC_N0_QUEUE)
3384 			igc_write_ivar(hw, msix_vector,
3385 				       rx_queue >> 1,
3386 				       (rx_queue & 0x1) << 4);
3387 		if (tx_queue > IGC_N0_QUEUE)
3388 			igc_write_ivar(hw, msix_vector,
3389 				       tx_queue >> 1,
3390 				       ((tx_queue & 0x1) << 4) + 8);
3391 		q_vector->eims_value = BIT(msix_vector);
3392 		break;
3393 	default:
3394 		WARN_ONCE(hw->mac.type != igc_i225, "Wrong MAC type\n");
3395 		break;
3396 	}
3397 
3398 	/* add q_vector eims value to global eims_enable_mask */
3399 	adapter->eims_enable_mask |= q_vector->eims_value;
3400 
3401 	/* configure q_vector to set itr on first interrupt */
3402 	q_vector->set_itr = 1;
3403 }
3404 
3405 /**
3406  * igc_configure_msix - Configure MSI-X hardware
3407  * @adapter: Pointer to adapter structure
3408  *
3409  * igc_configure_msix sets up the hardware to properly
3410  * generate MSI-X interrupts.
3411  */
3412 static void igc_configure_msix(struct igc_adapter *adapter)
3413 {
3414 	struct igc_hw *hw = &adapter->hw;
3415 	int i, vector = 0;
3416 	u32 tmp;
3417 
3418 	adapter->eims_enable_mask = 0;
3419 
3420 	/* set vector for other causes, i.e. link changes */
3421 	switch (hw->mac.type) {
3422 	case igc_i225:
3423 		/* Turn on MSI-X capability first, or our settings
3424 		 * won't stick.  And it will take days to debug.
3425 		 */
3426 		wr32(IGC_GPIE, IGC_GPIE_MSIX_MODE |
3427 		     IGC_GPIE_PBA | IGC_GPIE_EIAME |
3428 		     IGC_GPIE_NSICR);
3429 
3430 		/* enable msix_other interrupt */
3431 		adapter->eims_other = BIT(vector);
3432 		tmp = (vector++ | IGC_IVAR_VALID) << 8;
3433 
3434 		wr32(IGC_IVAR_MISC, tmp);
3435 		break;
3436 	default:
3437 		/* do nothing, since nothing else supports MSI-X */
3438 		break;
3439 	} /* switch (hw->mac.type) */
3440 
3441 	adapter->eims_enable_mask |= adapter->eims_other;
3442 
3443 	for (i = 0; i < adapter->num_q_vectors; i++)
3444 		igc_assign_vector(adapter->q_vector[i], vector++);
3445 
3446 	wrfl();
3447 }
3448 
3449 /**
3450  * igc_irq_enable - Enable default interrupt generation settings
3451  * @adapter: board private structure
3452  */
3453 static void igc_irq_enable(struct igc_adapter *adapter)
3454 {
3455 	struct igc_hw *hw = &adapter->hw;
3456 
3457 	if (adapter->msix_entries) {
3458 		u32 ims = IGC_IMS_LSC | IGC_IMS_DOUTSYNC | IGC_IMS_DRSTA;
3459 		u32 regval = rd32(IGC_EIAC);
3460 
3461 		wr32(IGC_EIAC, regval | adapter->eims_enable_mask);
3462 		regval = rd32(IGC_EIAM);
3463 		wr32(IGC_EIAM, regval | adapter->eims_enable_mask);
3464 		wr32(IGC_EIMS, adapter->eims_enable_mask);
3465 		wr32(IGC_IMS, ims);
3466 	} else {
3467 		wr32(IGC_IMS, IMS_ENABLE_MASK | IGC_IMS_DRSTA);
3468 		wr32(IGC_IAM, IMS_ENABLE_MASK | IGC_IMS_DRSTA);
3469 	}
3470 }
3471 
3472 /**
3473  * igc_irq_disable - Mask off interrupt generation on the NIC
3474  * @adapter: board private structure
3475  */
3476 static void igc_irq_disable(struct igc_adapter *adapter)
3477 {
3478 	struct igc_hw *hw = &adapter->hw;
3479 
3480 	if (adapter->msix_entries) {
3481 		u32 regval = rd32(IGC_EIAM);
3482 
3483 		wr32(IGC_EIAM, regval & ~adapter->eims_enable_mask);
3484 		wr32(IGC_EIMC, adapter->eims_enable_mask);
3485 		regval = rd32(IGC_EIAC);
3486 		wr32(IGC_EIAC, regval & ~adapter->eims_enable_mask);
3487 	}
3488 
3489 	wr32(IGC_IAM, 0);
3490 	wr32(IGC_IMC, ~0);
3491 	wrfl();
3492 
3493 	if (adapter->msix_entries) {
3494 		int vector = 0, i;
3495 
3496 		synchronize_irq(adapter->msix_entries[vector++].vector);
3497 
3498 		for (i = 0; i < adapter->num_q_vectors; i++)
3499 			synchronize_irq(adapter->msix_entries[vector++].vector);
3500 	} else {
3501 		synchronize_irq(adapter->pdev->irq);
3502 	}
3503 }
3504 
3505 void igc_set_flag_queue_pairs(struct igc_adapter *adapter,
3506 			      const u32 max_rss_queues)
3507 {
3508 	/* Determine if we need to pair queues. */
3509 	/* If rss_queues > half of max_rss_queues, pair the queues in
3510 	 * order to conserve interrupts due to limited supply.
3511 	 */
3512 	if (adapter->rss_queues > (max_rss_queues / 2))
3513 		adapter->flags |= IGC_FLAG_QUEUE_PAIRS;
3514 	else
3515 		adapter->flags &= ~IGC_FLAG_QUEUE_PAIRS;
3516 }
3517 
3518 unsigned int igc_get_max_rss_queues(struct igc_adapter *adapter)
3519 {
3520 	return IGC_MAX_RX_QUEUES;
3521 }
3522 
3523 static void igc_init_queue_configuration(struct igc_adapter *adapter)
3524 {
3525 	u32 max_rss_queues;
3526 
3527 	max_rss_queues = igc_get_max_rss_queues(adapter);
3528 	adapter->rss_queues = min_t(u32, max_rss_queues, num_online_cpus());
3529 
3530 	igc_set_flag_queue_pairs(adapter, max_rss_queues);
3531 }
3532 
3533 /**
3534  * igc_reset_q_vector - Reset config for interrupt vector
3535  * @adapter: board private structure to initialize
3536  * @v_idx: Index of vector to be reset
3537  *
3538  * If NAPI is enabled it will delete any references to the
3539  * NAPI struct. This is preparation for igc_free_q_vector.
3540  */
3541 static void igc_reset_q_vector(struct igc_adapter *adapter, int v_idx)
3542 {
3543 	struct igc_q_vector *q_vector = adapter->q_vector[v_idx];
3544 
3545 	/* if we're coming from igc_set_interrupt_capability, the vectors are
3546 	 * not yet allocated
3547 	 */
3548 	if (!q_vector)
3549 		return;
3550 
3551 	if (q_vector->tx.ring)
3552 		adapter->tx_ring[q_vector->tx.ring->queue_index] = NULL;
3553 
3554 	if (q_vector->rx.ring)
3555 		adapter->rx_ring[q_vector->rx.ring->queue_index] = NULL;
3556 
3557 	netif_napi_del(&q_vector->napi);
3558 }
3559 
3560 /**
3561  * igc_free_q_vector - Free memory allocated for specific interrupt vector
3562  * @adapter: board private structure to initialize
3563  * @v_idx: Index of vector to be freed
3564  *
3565  * This function frees the memory allocated to the q_vector.
3566  */
3567 static void igc_free_q_vector(struct igc_adapter *adapter, int v_idx)
3568 {
3569 	struct igc_q_vector *q_vector = adapter->q_vector[v_idx];
3570 
3571 	adapter->q_vector[v_idx] = NULL;
3572 
3573 	/* igc_get_stats64() might access the rings on this vector,
3574 	 * we must wait a grace period before freeing it.
3575 	 */
3576 	if (q_vector)
3577 		kfree_rcu(q_vector, rcu);
3578 }
3579 
3580 /**
3581  * igc_free_q_vectors - Free memory allocated for interrupt vectors
3582  * @adapter: board private structure to initialize
3583  *
3584  * This function frees the memory allocated to the q_vectors.  In addition if
3585  * NAPI is enabled it will delete any references to the NAPI struct prior
3586  * to freeing the q_vector.
3587  */
3588 static void igc_free_q_vectors(struct igc_adapter *adapter)
3589 {
3590 	int v_idx = adapter->num_q_vectors;
3591 
3592 	adapter->num_tx_queues = 0;
3593 	adapter->num_rx_queues = 0;
3594 	adapter->num_q_vectors = 0;
3595 
3596 	while (v_idx--) {
3597 		igc_reset_q_vector(adapter, v_idx);
3598 		igc_free_q_vector(adapter, v_idx);
3599 	}
3600 }
3601 
3602 /**
3603  * igc_update_itr - update the dynamic ITR value based on statistics
3604  * @q_vector: pointer to q_vector
3605  * @ring_container: ring info to update the itr for
3606  *
3607  * Stores a new ITR value based on packets and byte
3608  * counts during the last interrupt.  The advantage of per interrupt
3609  * computation is faster updates and more accurate ITR for the current
3610  * traffic pattern.  Constants in this function were computed
3611  * based on theoretical maximum wire speed and thresholds were set based
3612  * on testing data as well as attempting to minimize response time
3613  * while increasing bulk throughput.
3614  * NOTE: These calculations are only valid when operating in a single-
3615  * queue environment.
3616  */
3617 static void igc_update_itr(struct igc_q_vector *q_vector,
3618 			   struct igc_ring_container *ring_container)
3619 {
3620 	unsigned int packets = ring_container->total_packets;
3621 	unsigned int bytes = ring_container->total_bytes;
3622 	u8 itrval = ring_container->itr;
3623 
3624 	/* no packets, exit with status unchanged */
3625 	if (packets == 0)
3626 		return;
3627 
3628 	switch (itrval) {
3629 	case lowest_latency:
3630 		/* handle TSO and jumbo frames */
3631 		if (bytes / packets > 8000)
3632 			itrval = bulk_latency;
3633 		else if ((packets < 5) && (bytes > 512))
3634 			itrval = low_latency;
3635 		break;
3636 	case low_latency:  /* 50 usec aka 20000 ints/s */
3637 		if (bytes > 10000) {
3638 			/* this if handles the TSO accounting */
3639 			if (bytes / packets > 8000)
3640 				itrval = bulk_latency;
3641 			else if ((packets < 10) || ((bytes / packets) > 1200))
3642 				itrval = bulk_latency;
3643 			else if ((packets > 35))
3644 				itrval = lowest_latency;
3645 		} else if (bytes / packets > 2000) {
3646 			itrval = bulk_latency;
3647 		} else if (packets <= 2 && bytes < 512) {
3648 			itrval = lowest_latency;
3649 		}
3650 		break;
3651 	case bulk_latency: /* 250 usec aka 4000 ints/s */
3652 		if (bytes > 25000) {
3653 			if (packets > 35)
3654 				itrval = low_latency;
3655 		} else if (bytes < 1500) {
3656 			itrval = low_latency;
3657 		}
3658 		break;
3659 	}
3660 
3661 	/* clear work counters since we have the values we need */
3662 	ring_container->total_bytes = 0;
3663 	ring_container->total_packets = 0;
3664 
3665 	/* write updated itr to ring container */
3666 	ring_container->itr = itrval;
3667 }
3668 
3669 static void igc_set_itr(struct igc_q_vector *q_vector)
3670 {
3671 	struct igc_adapter *adapter = q_vector->adapter;
3672 	u32 new_itr = q_vector->itr_val;
3673 	u8 current_itr = 0;
3674 
3675 	/* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3676 	switch (adapter->link_speed) {
3677 	case SPEED_10:
3678 	case SPEED_100:
3679 		current_itr = 0;
3680 		new_itr = IGC_4K_ITR;
3681 		goto set_itr_now;
3682 	default:
3683 		break;
3684 	}
3685 
3686 	igc_update_itr(q_vector, &q_vector->tx);
3687 	igc_update_itr(q_vector, &q_vector->rx);
3688 
3689 	current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
3690 
3691 	/* conservative mode (itr 3) eliminates the lowest_latency setting */
3692 	if (current_itr == lowest_latency &&
3693 	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3694 	    (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3695 		current_itr = low_latency;
3696 
3697 	switch (current_itr) {
3698 	/* counts and packets in update_itr are dependent on these numbers */
3699 	case lowest_latency:
3700 		new_itr = IGC_70K_ITR; /* 70,000 ints/sec */
3701 		break;
3702 	case low_latency:
3703 		new_itr = IGC_20K_ITR; /* 20,000 ints/sec */
3704 		break;
3705 	case bulk_latency:
3706 		new_itr = IGC_4K_ITR;  /* 4,000 ints/sec */
3707 		break;
3708 	default:
3709 		break;
3710 	}
3711 
3712 set_itr_now:
3713 	if (new_itr != q_vector->itr_val) {
3714 		/* this attempts to bias the interrupt rate towards Bulk
3715 		 * by adding intermediate steps when interrupt rate is
3716 		 * increasing
3717 		 */
3718 		new_itr = new_itr > q_vector->itr_val ?
3719 			  max((new_itr * q_vector->itr_val) /
3720 			  (new_itr + (q_vector->itr_val >> 2)),
3721 			  new_itr) : new_itr;
3722 		/* Don't write the value here; it resets the adapter's
3723 		 * internal timer, and causes us to delay far longer than
3724 		 * we should between interrupts.  Instead, we write the ITR
3725 		 * value at the beginning of the next interrupt so the timing
3726 		 * ends up being correct.
3727 		 */
3728 		q_vector->itr_val = new_itr;
3729 		q_vector->set_itr = 1;
3730 	}
3731 }
3732 
3733 static void igc_reset_interrupt_capability(struct igc_adapter *adapter)
3734 {
3735 	int v_idx = adapter->num_q_vectors;
3736 
3737 	if (adapter->msix_entries) {
3738 		pci_disable_msix(adapter->pdev);
3739 		kfree(adapter->msix_entries);
3740 		adapter->msix_entries = NULL;
3741 	} else if (adapter->flags & IGC_FLAG_HAS_MSI) {
3742 		pci_disable_msi(adapter->pdev);
3743 	}
3744 
3745 	while (v_idx--)
3746 		igc_reset_q_vector(adapter, v_idx);
3747 }
3748 
3749 /**
3750  * igc_set_interrupt_capability - set MSI or MSI-X if supported
3751  * @adapter: Pointer to adapter structure
3752  * @msix: boolean value for MSI-X capability
3753  *
3754  * Attempt to configure interrupts using the best available
3755  * capabilities of the hardware and kernel.
3756  */
3757 static void igc_set_interrupt_capability(struct igc_adapter *adapter,
3758 					 bool msix)
3759 {
3760 	int numvecs, i;
3761 	int err;
3762 
3763 	if (!msix)
3764 		goto msi_only;
3765 	adapter->flags |= IGC_FLAG_HAS_MSIX;
3766 
3767 	/* Number of supported queues. */
3768 	adapter->num_rx_queues = adapter->rss_queues;
3769 
3770 	adapter->num_tx_queues = adapter->rss_queues;
3771 
3772 	/* start with one vector for every Rx queue */
3773 	numvecs = adapter->num_rx_queues;
3774 
3775 	/* if Tx handler is separate add 1 for every Tx queue */
3776 	if (!(adapter->flags & IGC_FLAG_QUEUE_PAIRS))
3777 		numvecs += adapter->num_tx_queues;
3778 
3779 	/* store the number of vectors reserved for queues */
3780 	adapter->num_q_vectors = numvecs;
3781 
3782 	/* add 1 vector for link status interrupts */
3783 	numvecs++;
3784 
3785 	adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
3786 					GFP_KERNEL);
3787 
3788 	if (!adapter->msix_entries)
3789 		return;
3790 
3791 	/* populate entry values */
3792 	for (i = 0; i < numvecs; i++)
3793 		adapter->msix_entries[i].entry = i;
3794 
3795 	err = pci_enable_msix_range(adapter->pdev,
3796 				    adapter->msix_entries,
3797 				    numvecs,
3798 				    numvecs);
3799 	if (err > 0)
3800 		return;
3801 
3802 	kfree(adapter->msix_entries);
3803 	adapter->msix_entries = NULL;
3804 
3805 	igc_reset_interrupt_capability(adapter);
3806 
3807 msi_only:
3808 	adapter->flags &= ~IGC_FLAG_HAS_MSIX;
3809 
3810 	adapter->rss_queues = 1;
3811 	adapter->flags |= IGC_FLAG_QUEUE_PAIRS;
3812 	adapter->num_rx_queues = 1;
3813 	adapter->num_tx_queues = 1;
3814 	adapter->num_q_vectors = 1;
3815 	if (!pci_enable_msi(adapter->pdev))
3816 		adapter->flags |= IGC_FLAG_HAS_MSI;
3817 }
3818 
3819 /**
3820  * igc_update_ring_itr - update the dynamic ITR value based on packet size
3821  * @q_vector: pointer to q_vector
3822  *
3823  * Stores a new ITR value based on strictly on packet size.  This
3824  * algorithm is less sophisticated than that used in igc_update_itr,
3825  * due to the difficulty of synchronizing statistics across multiple
3826  * receive rings.  The divisors and thresholds used by this function
3827  * were determined based on theoretical maximum wire speed and testing
3828  * data, in order to minimize response time while increasing bulk
3829  * throughput.
3830  * NOTE: This function is called only when operating in a multiqueue
3831  * receive environment.
3832  */
3833 static void igc_update_ring_itr(struct igc_q_vector *q_vector)
3834 {
3835 	struct igc_adapter *adapter = q_vector->adapter;
3836 	int new_val = q_vector->itr_val;
3837 	int avg_wire_size = 0;
3838 	unsigned int packets;
3839 
3840 	/* For non-gigabit speeds, just fix the interrupt rate at 4000
3841 	 * ints/sec - ITR timer value of 120 ticks.
3842 	 */
3843 	switch (adapter->link_speed) {
3844 	case SPEED_10:
3845 	case SPEED_100:
3846 		new_val = IGC_4K_ITR;
3847 		goto set_itr_val;
3848 	default:
3849 		break;
3850 	}
3851 
3852 	packets = q_vector->rx.total_packets;
3853 	if (packets)
3854 		avg_wire_size = q_vector->rx.total_bytes / packets;
3855 
3856 	packets = q_vector->tx.total_packets;
3857 	if (packets)
3858 		avg_wire_size = max_t(u32, avg_wire_size,
3859 				      q_vector->tx.total_bytes / packets);
3860 
3861 	/* if avg_wire_size isn't set no work was done */
3862 	if (!avg_wire_size)
3863 		goto clear_counts;
3864 
3865 	/* Add 24 bytes to size to account for CRC, preamble, and gap */
3866 	avg_wire_size += 24;
3867 
3868 	/* Don't starve jumbo frames */
3869 	avg_wire_size = min(avg_wire_size, 3000);
3870 
3871 	/* Give a little boost to mid-size frames */
3872 	if (avg_wire_size > 300 && avg_wire_size < 1200)
3873 		new_val = avg_wire_size / 3;
3874 	else
3875 		new_val = avg_wire_size / 2;
3876 
3877 	/* conservative mode (itr 3) eliminates the lowest_latency setting */
3878 	if (new_val < IGC_20K_ITR &&
3879 	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3880 	    (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3881 		new_val = IGC_20K_ITR;
3882 
3883 set_itr_val:
3884 	if (new_val != q_vector->itr_val) {
3885 		q_vector->itr_val = new_val;
3886 		q_vector->set_itr = 1;
3887 	}
3888 clear_counts:
3889 	q_vector->rx.total_bytes = 0;
3890 	q_vector->rx.total_packets = 0;
3891 	q_vector->tx.total_bytes = 0;
3892 	q_vector->tx.total_packets = 0;
3893 }
3894 
3895 static void igc_ring_irq_enable(struct igc_q_vector *q_vector)
3896 {
3897 	struct igc_adapter *adapter = q_vector->adapter;
3898 	struct igc_hw *hw = &adapter->hw;
3899 
3900 	if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
3901 	    (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
3902 		if (adapter->num_q_vectors == 1)
3903 			igc_set_itr(q_vector);
3904 		else
3905 			igc_update_ring_itr(q_vector);
3906 	}
3907 
3908 	if (!test_bit(__IGC_DOWN, &adapter->state)) {
3909 		if (adapter->msix_entries)
3910 			wr32(IGC_EIMS, q_vector->eims_value);
3911 		else
3912 			igc_irq_enable(adapter);
3913 	}
3914 }
3915 
3916 static void igc_add_ring(struct igc_ring *ring,
3917 			 struct igc_ring_container *head)
3918 {
3919 	head->ring = ring;
3920 	head->count++;
3921 }
3922 
3923 /**
3924  * igc_cache_ring_register - Descriptor ring to register mapping
3925  * @adapter: board private structure to initialize
3926  *
3927  * Once we know the feature-set enabled for the device, we'll cache
3928  * the register offset the descriptor ring is assigned to.
3929  */
3930 static void igc_cache_ring_register(struct igc_adapter *adapter)
3931 {
3932 	int i = 0, j = 0;
3933 
3934 	switch (adapter->hw.mac.type) {
3935 	case igc_i225:
3936 	default:
3937 		for (; i < adapter->num_rx_queues; i++)
3938 			adapter->rx_ring[i]->reg_idx = i;
3939 		for (; j < adapter->num_tx_queues; j++)
3940 			adapter->tx_ring[j]->reg_idx = j;
3941 		break;
3942 	}
3943 }
3944 
3945 /**
3946  * igc_poll - NAPI Rx polling callback
3947  * @napi: napi polling structure
3948  * @budget: count of how many packets we should handle
3949  */
3950 static int igc_poll(struct napi_struct *napi, int budget)
3951 {
3952 	struct igc_q_vector *q_vector = container_of(napi,
3953 						     struct igc_q_vector,
3954 						     napi);
3955 	struct igc_ring *rx_ring = q_vector->rx.ring;
3956 	bool clean_complete = true;
3957 	int work_done = 0;
3958 
3959 	if (q_vector->tx.ring)
3960 		clean_complete = igc_clean_tx_irq(q_vector, budget);
3961 
3962 	if (rx_ring) {
3963 		int cleaned = rx_ring->xsk_pool ?
3964 			      igc_clean_rx_irq_zc(q_vector, budget) :
3965 			      igc_clean_rx_irq(q_vector, budget);
3966 
3967 		work_done += cleaned;
3968 		if (cleaned >= budget)
3969 			clean_complete = false;
3970 	}
3971 
3972 	/* If all work not completed, return budget and keep polling */
3973 	if (!clean_complete)
3974 		return budget;
3975 
3976 	/* Exit the polling mode, but don't re-enable interrupts if stack might
3977 	 * poll us due to busy-polling
3978 	 */
3979 	if (likely(napi_complete_done(napi, work_done)))
3980 		igc_ring_irq_enable(q_vector);
3981 
3982 	return min(work_done, budget - 1);
3983 }
3984 
3985 /**
3986  * igc_alloc_q_vector - Allocate memory for a single interrupt vector
3987  * @adapter: board private structure to initialize
3988  * @v_count: q_vectors allocated on adapter, used for ring interleaving
3989  * @v_idx: index of vector in adapter struct
3990  * @txr_count: total number of Tx rings to allocate
3991  * @txr_idx: index of first Tx ring to allocate
3992  * @rxr_count: total number of Rx rings to allocate
3993  * @rxr_idx: index of first Rx ring to allocate
3994  *
3995  * We allocate one q_vector.  If allocation fails we return -ENOMEM.
3996  */
3997 static int igc_alloc_q_vector(struct igc_adapter *adapter,
3998 			      unsigned int v_count, unsigned int v_idx,
3999 			      unsigned int txr_count, unsigned int txr_idx,
4000 			      unsigned int rxr_count, unsigned int rxr_idx)
4001 {
4002 	struct igc_q_vector *q_vector;
4003 	struct igc_ring *ring;
4004 	int ring_count;
4005 
4006 	/* igc only supports 1 Tx and/or 1 Rx queue per vector */
4007 	if (txr_count > 1 || rxr_count > 1)
4008 		return -ENOMEM;
4009 
4010 	ring_count = txr_count + rxr_count;
4011 
4012 	/* allocate q_vector and rings */
4013 	q_vector = adapter->q_vector[v_idx];
4014 	if (!q_vector)
4015 		q_vector = kzalloc(struct_size(q_vector, ring, ring_count),
4016 				   GFP_KERNEL);
4017 	else
4018 		memset(q_vector, 0, struct_size(q_vector, ring, ring_count));
4019 	if (!q_vector)
4020 		return -ENOMEM;
4021 
4022 	/* initialize NAPI */
4023 	netif_napi_add(adapter->netdev, &q_vector->napi,
4024 		       igc_poll, 64);
4025 
4026 	/* tie q_vector and adapter together */
4027 	adapter->q_vector[v_idx] = q_vector;
4028 	q_vector->adapter = adapter;
4029 
4030 	/* initialize work limits */
4031 	q_vector->tx.work_limit = adapter->tx_work_limit;
4032 
4033 	/* initialize ITR configuration */
4034 	q_vector->itr_register = adapter->io_addr + IGC_EITR(0);
4035 	q_vector->itr_val = IGC_START_ITR;
4036 
4037 	/* initialize pointer to rings */
4038 	ring = q_vector->ring;
4039 
4040 	/* initialize ITR */
4041 	if (rxr_count) {
4042 		/* rx or rx/tx vector */
4043 		if (!adapter->rx_itr_setting || adapter->rx_itr_setting > 3)
4044 			q_vector->itr_val = adapter->rx_itr_setting;
4045 	} else {
4046 		/* tx only vector */
4047 		if (!adapter->tx_itr_setting || adapter->tx_itr_setting > 3)
4048 			q_vector->itr_val = adapter->tx_itr_setting;
4049 	}
4050 
4051 	if (txr_count) {
4052 		/* assign generic ring traits */
4053 		ring->dev = &adapter->pdev->dev;
4054 		ring->netdev = adapter->netdev;
4055 
4056 		/* configure backlink on ring */
4057 		ring->q_vector = q_vector;
4058 
4059 		/* update q_vector Tx values */
4060 		igc_add_ring(ring, &q_vector->tx);
4061 
4062 		/* apply Tx specific ring traits */
4063 		ring->count = adapter->tx_ring_count;
4064 		ring->queue_index = txr_idx;
4065 
4066 		/* assign ring to adapter */
4067 		adapter->tx_ring[txr_idx] = ring;
4068 
4069 		/* push pointer to next ring */
4070 		ring++;
4071 	}
4072 
4073 	if (rxr_count) {
4074 		/* assign generic ring traits */
4075 		ring->dev = &adapter->pdev->dev;
4076 		ring->netdev = adapter->netdev;
4077 
4078 		/* configure backlink on ring */
4079 		ring->q_vector = q_vector;
4080 
4081 		/* update q_vector Rx values */
4082 		igc_add_ring(ring, &q_vector->rx);
4083 
4084 		/* apply Rx specific ring traits */
4085 		ring->count = adapter->rx_ring_count;
4086 		ring->queue_index = rxr_idx;
4087 
4088 		/* assign ring to adapter */
4089 		adapter->rx_ring[rxr_idx] = ring;
4090 	}
4091 
4092 	return 0;
4093 }
4094 
4095 /**
4096  * igc_alloc_q_vectors - Allocate memory for interrupt vectors
4097  * @adapter: board private structure to initialize
4098  *
4099  * We allocate one q_vector per queue interrupt.  If allocation fails we
4100  * return -ENOMEM.
4101  */
4102 static int igc_alloc_q_vectors(struct igc_adapter *adapter)
4103 {
4104 	int rxr_remaining = adapter->num_rx_queues;
4105 	int txr_remaining = adapter->num_tx_queues;
4106 	int rxr_idx = 0, txr_idx = 0, v_idx = 0;
4107 	int q_vectors = adapter->num_q_vectors;
4108 	int err;
4109 
4110 	if (q_vectors >= (rxr_remaining + txr_remaining)) {
4111 		for (; rxr_remaining; v_idx++) {
4112 			err = igc_alloc_q_vector(adapter, q_vectors, v_idx,
4113 						 0, 0, 1, rxr_idx);
4114 
4115 			if (err)
4116 				goto err_out;
4117 
4118 			/* update counts and index */
4119 			rxr_remaining--;
4120 			rxr_idx++;
4121 		}
4122 	}
4123 
4124 	for (; v_idx < q_vectors; v_idx++) {
4125 		int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx);
4126 		int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx);
4127 
4128 		err = igc_alloc_q_vector(adapter, q_vectors, v_idx,
4129 					 tqpv, txr_idx, rqpv, rxr_idx);
4130 
4131 		if (err)
4132 			goto err_out;
4133 
4134 		/* update counts and index */
4135 		rxr_remaining -= rqpv;
4136 		txr_remaining -= tqpv;
4137 		rxr_idx++;
4138 		txr_idx++;
4139 	}
4140 
4141 	return 0;
4142 
4143 err_out:
4144 	adapter->num_tx_queues = 0;
4145 	adapter->num_rx_queues = 0;
4146 	adapter->num_q_vectors = 0;
4147 
4148 	while (v_idx--)
4149 		igc_free_q_vector(adapter, v_idx);
4150 
4151 	return -ENOMEM;
4152 }
4153 
4154 /**
4155  * igc_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
4156  * @adapter: Pointer to adapter structure
4157  * @msix: boolean for MSI-X capability
4158  *
4159  * This function initializes the interrupts and allocates all of the queues.
4160  */
4161 static int igc_init_interrupt_scheme(struct igc_adapter *adapter, bool msix)
4162 {
4163 	struct net_device *dev = adapter->netdev;
4164 	int err = 0;
4165 
4166 	igc_set_interrupt_capability(adapter, msix);
4167 
4168 	err = igc_alloc_q_vectors(adapter);
4169 	if (err) {
4170 		netdev_err(dev, "Unable to allocate memory for vectors\n");
4171 		goto err_alloc_q_vectors;
4172 	}
4173 
4174 	igc_cache_ring_register(adapter);
4175 
4176 	return 0;
4177 
4178 err_alloc_q_vectors:
4179 	igc_reset_interrupt_capability(adapter);
4180 	return err;
4181 }
4182 
4183 /**
4184  * igc_sw_init - Initialize general software structures (struct igc_adapter)
4185  * @adapter: board private structure to initialize
4186  *
4187  * igc_sw_init initializes the Adapter private data structure.
4188  * Fields are initialized based on PCI device information and
4189  * OS network device settings (MTU size).
4190  */
4191 static int igc_sw_init(struct igc_adapter *adapter)
4192 {
4193 	struct net_device *netdev = adapter->netdev;
4194 	struct pci_dev *pdev = adapter->pdev;
4195 	struct igc_hw *hw = &adapter->hw;
4196 
4197 	pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
4198 
4199 	/* set default ring sizes */
4200 	adapter->tx_ring_count = IGC_DEFAULT_TXD;
4201 	adapter->rx_ring_count = IGC_DEFAULT_RXD;
4202 
4203 	/* set default ITR values */
4204 	adapter->rx_itr_setting = IGC_DEFAULT_ITR;
4205 	adapter->tx_itr_setting = IGC_DEFAULT_ITR;
4206 
4207 	/* set default work limits */
4208 	adapter->tx_work_limit = IGC_DEFAULT_TX_WORK;
4209 
4210 	/* adjust max frame to be at least the size of a standard frame */
4211 	adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
4212 				VLAN_HLEN;
4213 	adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
4214 
4215 	mutex_init(&adapter->nfc_rule_lock);
4216 	INIT_LIST_HEAD(&adapter->nfc_rule_list);
4217 	adapter->nfc_rule_count = 0;
4218 
4219 	spin_lock_init(&adapter->stats64_lock);
4220 	/* Assume MSI-X interrupts, will be checked during IRQ allocation */
4221 	adapter->flags |= IGC_FLAG_HAS_MSIX;
4222 
4223 	igc_init_queue_configuration(adapter);
4224 
4225 	/* This call may decrease the number of queues */
4226 	if (igc_init_interrupt_scheme(adapter, true)) {
4227 		netdev_err(netdev, "Unable to allocate memory for queues\n");
4228 		return -ENOMEM;
4229 	}
4230 
4231 	/* Explicitly disable IRQ since the NIC can be in any state. */
4232 	igc_irq_disable(adapter);
4233 
4234 	set_bit(__IGC_DOWN, &adapter->state);
4235 
4236 	return 0;
4237 }
4238 
4239 /**
4240  * igc_up - Open the interface and prepare it to handle traffic
4241  * @adapter: board private structure
4242  */
4243 void igc_up(struct igc_adapter *adapter)
4244 {
4245 	struct igc_hw *hw = &adapter->hw;
4246 	int i = 0;
4247 
4248 	/* hardware has been reset, we need to reload some things */
4249 	igc_configure(adapter);
4250 
4251 	clear_bit(__IGC_DOWN, &adapter->state);
4252 
4253 	for (i = 0; i < adapter->num_q_vectors; i++)
4254 		napi_enable(&adapter->q_vector[i]->napi);
4255 
4256 	if (adapter->msix_entries)
4257 		igc_configure_msix(adapter);
4258 	else
4259 		igc_assign_vector(adapter->q_vector[0], 0);
4260 
4261 	/* Clear any pending interrupts. */
4262 	rd32(IGC_ICR);
4263 	igc_irq_enable(adapter);
4264 
4265 	netif_tx_start_all_queues(adapter->netdev);
4266 
4267 	/* start the watchdog. */
4268 	hw->mac.get_link_status = true;
4269 	schedule_work(&adapter->watchdog_task);
4270 }
4271 
4272 /**
4273  * igc_update_stats - Update the board statistics counters
4274  * @adapter: board private structure
4275  */
4276 void igc_update_stats(struct igc_adapter *adapter)
4277 {
4278 	struct rtnl_link_stats64 *net_stats = &adapter->stats64;
4279 	struct pci_dev *pdev = adapter->pdev;
4280 	struct igc_hw *hw = &adapter->hw;
4281 	u64 _bytes, _packets;
4282 	u64 bytes, packets;
4283 	unsigned int start;
4284 	u32 mpc;
4285 	int i;
4286 
4287 	/* Prevent stats update while adapter is being reset, or if the pci
4288 	 * connection is down.
4289 	 */
4290 	if (adapter->link_speed == 0)
4291 		return;
4292 	if (pci_channel_offline(pdev))
4293 		return;
4294 
4295 	packets = 0;
4296 	bytes = 0;
4297 
4298 	rcu_read_lock();
4299 	for (i = 0; i < adapter->num_rx_queues; i++) {
4300 		struct igc_ring *ring = adapter->rx_ring[i];
4301 		u32 rqdpc = rd32(IGC_RQDPC(i));
4302 
4303 		if (hw->mac.type >= igc_i225)
4304 			wr32(IGC_RQDPC(i), 0);
4305 
4306 		if (rqdpc) {
4307 			ring->rx_stats.drops += rqdpc;
4308 			net_stats->rx_fifo_errors += rqdpc;
4309 		}
4310 
4311 		do {
4312 			start = u64_stats_fetch_begin_irq(&ring->rx_syncp);
4313 			_bytes = ring->rx_stats.bytes;
4314 			_packets = ring->rx_stats.packets;
4315 		} while (u64_stats_fetch_retry_irq(&ring->rx_syncp, start));
4316 		bytes += _bytes;
4317 		packets += _packets;
4318 	}
4319 
4320 	net_stats->rx_bytes = bytes;
4321 	net_stats->rx_packets = packets;
4322 
4323 	packets = 0;
4324 	bytes = 0;
4325 	for (i = 0; i < adapter->num_tx_queues; i++) {
4326 		struct igc_ring *ring = adapter->tx_ring[i];
4327 
4328 		do {
4329 			start = u64_stats_fetch_begin_irq(&ring->tx_syncp);
4330 			_bytes = ring->tx_stats.bytes;
4331 			_packets = ring->tx_stats.packets;
4332 		} while (u64_stats_fetch_retry_irq(&ring->tx_syncp, start));
4333 		bytes += _bytes;
4334 		packets += _packets;
4335 	}
4336 	net_stats->tx_bytes = bytes;
4337 	net_stats->tx_packets = packets;
4338 	rcu_read_unlock();
4339 
4340 	/* read stats registers */
4341 	adapter->stats.crcerrs += rd32(IGC_CRCERRS);
4342 	adapter->stats.gprc += rd32(IGC_GPRC);
4343 	adapter->stats.gorc += rd32(IGC_GORCL);
4344 	rd32(IGC_GORCH); /* clear GORCL */
4345 	adapter->stats.bprc += rd32(IGC_BPRC);
4346 	adapter->stats.mprc += rd32(IGC_MPRC);
4347 	adapter->stats.roc += rd32(IGC_ROC);
4348 
4349 	adapter->stats.prc64 += rd32(IGC_PRC64);
4350 	adapter->stats.prc127 += rd32(IGC_PRC127);
4351 	adapter->stats.prc255 += rd32(IGC_PRC255);
4352 	adapter->stats.prc511 += rd32(IGC_PRC511);
4353 	adapter->stats.prc1023 += rd32(IGC_PRC1023);
4354 	adapter->stats.prc1522 += rd32(IGC_PRC1522);
4355 	adapter->stats.tlpic += rd32(IGC_TLPIC);
4356 	adapter->stats.rlpic += rd32(IGC_RLPIC);
4357 	adapter->stats.hgptc += rd32(IGC_HGPTC);
4358 
4359 	mpc = rd32(IGC_MPC);
4360 	adapter->stats.mpc += mpc;
4361 	net_stats->rx_fifo_errors += mpc;
4362 	adapter->stats.scc += rd32(IGC_SCC);
4363 	adapter->stats.ecol += rd32(IGC_ECOL);
4364 	adapter->stats.mcc += rd32(IGC_MCC);
4365 	adapter->stats.latecol += rd32(IGC_LATECOL);
4366 	adapter->stats.dc += rd32(IGC_DC);
4367 	adapter->stats.rlec += rd32(IGC_RLEC);
4368 	adapter->stats.xonrxc += rd32(IGC_XONRXC);
4369 	adapter->stats.xontxc += rd32(IGC_XONTXC);
4370 	adapter->stats.xoffrxc += rd32(IGC_XOFFRXC);
4371 	adapter->stats.xofftxc += rd32(IGC_XOFFTXC);
4372 	adapter->stats.fcruc += rd32(IGC_FCRUC);
4373 	adapter->stats.gptc += rd32(IGC_GPTC);
4374 	adapter->stats.gotc += rd32(IGC_GOTCL);
4375 	rd32(IGC_GOTCH); /* clear GOTCL */
4376 	adapter->stats.rnbc += rd32(IGC_RNBC);
4377 	adapter->stats.ruc += rd32(IGC_RUC);
4378 	adapter->stats.rfc += rd32(IGC_RFC);
4379 	adapter->stats.rjc += rd32(IGC_RJC);
4380 	adapter->stats.tor += rd32(IGC_TORH);
4381 	adapter->stats.tot += rd32(IGC_TOTH);
4382 	adapter->stats.tpr += rd32(IGC_TPR);
4383 
4384 	adapter->stats.ptc64 += rd32(IGC_PTC64);
4385 	adapter->stats.ptc127 += rd32(IGC_PTC127);
4386 	adapter->stats.ptc255 += rd32(IGC_PTC255);
4387 	adapter->stats.ptc511 += rd32(IGC_PTC511);
4388 	adapter->stats.ptc1023 += rd32(IGC_PTC1023);
4389 	adapter->stats.ptc1522 += rd32(IGC_PTC1522);
4390 
4391 	adapter->stats.mptc += rd32(IGC_MPTC);
4392 	adapter->stats.bptc += rd32(IGC_BPTC);
4393 
4394 	adapter->stats.tpt += rd32(IGC_TPT);
4395 	adapter->stats.colc += rd32(IGC_COLC);
4396 	adapter->stats.colc += rd32(IGC_RERC);
4397 
4398 	adapter->stats.algnerrc += rd32(IGC_ALGNERRC);
4399 
4400 	adapter->stats.tsctc += rd32(IGC_TSCTC);
4401 
4402 	adapter->stats.iac += rd32(IGC_IAC);
4403 
4404 	/* Fill out the OS statistics structure */
4405 	net_stats->multicast = adapter->stats.mprc;
4406 	net_stats->collisions = adapter->stats.colc;
4407 
4408 	/* Rx Errors */
4409 
4410 	/* RLEC on some newer hardware can be incorrect so build
4411 	 * our own version based on RUC and ROC
4412 	 */
4413 	net_stats->rx_errors = adapter->stats.rxerrc +
4414 		adapter->stats.crcerrs + adapter->stats.algnerrc +
4415 		adapter->stats.ruc + adapter->stats.roc +
4416 		adapter->stats.cexterr;
4417 	net_stats->rx_length_errors = adapter->stats.ruc +
4418 				      adapter->stats.roc;
4419 	net_stats->rx_crc_errors = adapter->stats.crcerrs;
4420 	net_stats->rx_frame_errors = adapter->stats.algnerrc;
4421 	net_stats->rx_missed_errors = adapter->stats.mpc;
4422 
4423 	/* Tx Errors */
4424 	net_stats->tx_errors = adapter->stats.ecol +
4425 			       adapter->stats.latecol;
4426 	net_stats->tx_aborted_errors = adapter->stats.ecol;
4427 	net_stats->tx_window_errors = adapter->stats.latecol;
4428 	net_stats->tx_carrier_errors = adapter->stats.tncrs;
4429 
4430 	/* Tx Dropped needs to be maintained elsewhere */
4431 
4432 	/* Management Stats */
4433 	adapter->stats.mgptc += rd32(IGC_MGTPTC);
4434 	adapter->stats.mgprc += rd32(IGC_MGTPRC);
4435 	adapter->stats.mgpdc += rd32(IGC_MGTPDC);
4436 }
4437 
4438 /**
4439  * igc_down - Close the interface
4440  * @adapter: board private structure
4441  */
4442 void igc_down(struct igc_adapter *adapter)
4443 {
4444 	struct net_device *netdev = adapter->netdev;
4445 	struct igc_hw *hw = &adapter->hw;
4446 	u32 tctl, rctl;
4447 	int i = 0;
4448 
4449 	set_bit(__IGC_DOWN, &adapter->state);
4450 
4451 	igc_ptp_suspend(adapter);
4452 
4453 	/* disable receives in the hardware */
4454 	rctl = rd32(IGC_RCTL);
4455 	wr32(IGC_RCTL, rctl & ~IGC_RCTL_EN);
4456 	/* flush and sleep below */
4457 
4458 	/* set trans_start so we don't get spurious watchdogs during reset */
4459 	netif_trans_update(netdev);
4460 
4461 	netif_carrier_off(netdev);
4462 	netif_tx_stop_all_queues(netdev);
4463 
4464 	/* disable transmits in the hardware */
4465 	tctl = rd32(IGC_TCTL);
4466 	tctl &= ~IGC_TCTL_EN;
4467 	wr32(IGC_TCTL, tctl);
4468 	/* flush both disables and wait for them to finish */
4469 	wrfl();
4470 	usleep_range(10000, 20000);
4471 
4472 	igc_irq_disable(adapter);
4473 
4474 	adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE;
4475 
4476 	for (i = 0; i < adapter->num_q_vectors; i++) {
4477 		if (adapter->q_vector[i]) {
4478 			napi_synchronize(&adapter->q_vector[i]->napi);
4479 			napi_disable(&adapter->q_vector[i]->napi);
4480 		}
4481 	}
4482 
4483 	del_timer_sync(&adapter->watchdog_timer);
4484 	del_timer_sync(&adapter->phy_info_timer);
4485 
4486 	/* record the stats before reset*/
4487 	spin_lock(&adapter->stats64_lock);
4488 	igc_update_stats(adapter);
4489 	spin_unlock(&adapter->stats64_lock);
4490 
4491 	adapter->link_speed = 0;
4492 	adapter->link_duplex = 0;
4493 
4494 	if (!pci_channel_offline(adapter->pdev))
4495 		igc_reset(adapter);
4496 
4497 	/* clear VLAN promisc flag so VFTA will be updated if necessary */
4498 	adapter->flags &= ~IGC_FLAG_VLAN_PROMISC;
4499 
4500 	igc_clean_all_tx_rings(adapter);
4501 	igc_clean_all_rx_rings(adapter);
4502 }
4503 
4504 void igc_reinit_locked(struct igc_adapter *adapter)
4505 {
4506 	while (test_and_set_bit(__IGC_RESETTING, &adapter->state))
4507 		usleep_range(1000, 2000);
4508 	igc_down(adapter);
4509 	igc_up(adapter);
4510 	clear_bit(__IGC_RESETTING, &adapter->state);
4511 }
4512 
4513 static void igc_reset_task(struct work_struct *work)
4514 {
4515 	struct igc_adapter *adapter;
4516 
4517 	adapter = container_of(work, struct igc_adapter, reset_task);
4518 
4519 	rtnl_lock();
4520 	/* If we're already down or resetting, just bail */
4521 	if (test_bit(__IGC_DOWN, &adapter->state) ||
4522 	    test_bit(__IGC_RESETTING, &adapter->state)) {
4523 		rtnl_unlock();
4524 		return;
4525 	}
4526 
4527 	igc_rings_dump(adapter);
4528 	igc_regs_dump(adapter);
4529 	netdev_err(adapter->netdev, "Reset adapter\n");
4530 	igc_reinit_locked(adapter);
4531 	rtnl_unlock();
4532 }
4533 
4534 /**
4535  * igc_change_mtu - Change the Maximum Transfer Unit
4536  * @netdev: network interface device structure
4537  * @new_mtu: new value for maximum frame size
4538  *
4539  * Returns 0 on success, negative on failure
4540  */
4541 static int igc_change_mtu(struct net_device *netdev, int new_mtu)
4542 {
4543 	int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
4544 	struct igc_adapter *adapter = netdev_priv(netdev);
4545 
4546 	if (igc_xdp_is_enabled(adapter) && new_mtu > ETH_DATA_LEN) {
4547 		netdev_dbg(netdev, "Jumbo frames not supported with XDP");
4548 		return -EINVAL;
4549 	}
4550 
4551 	/* adjust max frame to be at least the size of a standard frame */
4552 	if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN))
4553 		max_frame = ETH_FRAME_LEN + ETH_FCS_LEN;
4554 
4555 	while (test_and_set_bit(__IGC_RESETTING, &adapter->state))
4556 		usleep_range(1000, 2000);
4557 
4558 	/* igc_down has a dependency on max_frame_size */
4559 	adapter->max_frame_size = max_frame;
4560 
4561 	if (netif_running(netdev))
4562 		igc_down(adapter);
4563 
4564 	netdev_dbg(netdev, "changing MTU from %d to %d\n", netdev->mtu, new_mtu);
4565 	netdev->mtu = new_mtu;
4566 
4567 	if (netif_running(netdev))
4568 		igc_up(adapter);
4569 	else
4570 		igc_reset(adapter);
4571 
4572 	clear_bit(__IGC_RESETTING, &adapter->state);
4573 
4574 	return 0;
4575 }
4576 
4577 /**
4578  * igc_get_stats64 - Get System Network Statistics
4579  * @netdev: network interface device structure
4580  * @stats: rtnl_link_stats64 pointer
4581  *
4582  * Returns the address of the device statistics structure.
4583  * The statistics are updated here and also from the timer callback.
4584  */
4585 static void igc_get_stats64(struct net_device *netdev,
4586 			    struct rtnl_link_stats64 *stats)
4587 {
4588 	struct igc_adapter *adapter = netdev_priv(netdev);
4589 
4590 	spin_lock(&adapter->stats64_lock);
4591 	if (!test_bit(__IGC_RESETTING, &adapter->state))
4592 		igc_update_stats(adapter);
4593 	memcpy(stats, &adapter->stats64, sizeof(*stats));
4594 	spin_unlock(&adapter->stats64_lock);
4595 }
4596 
4597 static netdev_features_t igc_fix_features(struct net_device *netdev,
4598 					  netdev_features_t features)
4599 {
4600 	/* Since there is no support for separate Rx/Tx vlan accel
4601 	 * enable/disable make sure Tx flag is always in same state as Rx.
4602 	 */
4603 	if (features & NETIF_F_HW_VLAN_CTAG_RX)
4604 		features |= NETIF_F_HW_VLAN_CTAG_TX;
4605 	else
4606 		features &= ~NETIF_F_HW_VLAN_CTAG_TX;
4607 
4608 	return features;
4609 }
4610 
4611 static int igc_set_features(struct net_device *netdev,
4612 			    netdev_features_t features)
4613 {
4614 	netdev_features_t changed = netdev->features ^ features;
4615 	struct igc_adapter *adapter = netdev_priv(netdev);
4616 
4617 	if (changed & NETIF_F_HW_VLAN_CTAG_RX)
4618 		igc_vlan_mode(netdev, features);
4619 
4620 	/* Add VLAN support */
4621 	if (!(changed & (NETIF_F_RXALL | NETIF_F_NTUPLE)))
4622 		return 0;
4623 
4624 	if (!(features & NETIF_F_NTUPLE))
4625 		igc_flush_nfc_rules(adapter);
4626 
4627 	netdev->features = features;
4628 
4629 	if (netif_running(netdev))
4630 		igc_reinit_locked(adapter);
4631 	else
4632 		igc_reset(adapter);
4633 
4634 	return 1;
4635 }
4636 
4637 static netdev_features_t
4638 igc_features_check(struct sk_buff *skb, struct net_device *dev,
4639 		   netdev_features_t features)
4640 {
4641 	unsigned int network_hdr_len, mac_hdr_len;
4642 
4643 	/* Make certain the headers can be described by a context descriptor */
4644 	mac_hdr_len = skb_network_header(skb) - skb->data;
4645 	if (unlikely(mac_hdr_len > IGC_MAX_MAC_HDR_LEN))
4646 		return features & ~(NETIF_F_HW_CSUM |
4647 				    NETIF_F_SCTP_CRC |
4648 				    NETIF_F_HW_VLAN_CTAG_TX |
4649 				    NETIF_F_TSO |
4650 				    NETIF_F_TSO6);
4651 
4652 	network_hdr_len = skb_checksum_start(skb) - skb_network_header(skb);
4653 	if (unlikely(network_hdr_len >  IGC_MAX_NETWORK_HDR_LEN))
4654 		return features & ~(NETIF_F_HW_CSUM |
4655 				    NETIF_F_SCTP_CRC |
4656 				    NETIF_F_TSO |
4657 				    NETIF_F_TSO6);
4658 
4659 	/* We can only support IPv4 TSO in tunnels if we can mangle the
4660 	 * inner IP ID field, so strip TSO if MANGLEID is not supported.
4661 	 */
4662 	if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID))
4663 		features &= ~NETIF_F_TSO;
4664 
4665 	return features;
4666 }
4667 
4668 static void igc_tsync_interrupt(struct igc_adapter *adapter)
4669 {
4670 	u32 ack, tsauxc, sec, nsec, tsicr;
4671 	struct igc_hw *hw = &adapter->hw;
4672 	struct ptp_clock_event event;
4673 	struct timespec64 ts;
4674 
4675 	tsicr = rd32(IGC_TSICR);
4676 	ack = 0;
4677 
4678 	if (tsicr & IGC_TSICR_SYS_WRAP) {
4679 		event.type = PTP_CLOCK_PPS;
4680 		if (adapter->ptp_caps.pps)
4681 			ptp_clock_event(adapter->ptp_clock, &event);
4682 		ack |= IGC_TSICR_SYS_WRAP;
4683 	}
4684 
4685 	if (tsicr & IGC_TSICR_TXTS) {
4686 		/* retrieve hardware timestamp */
4687 		schedule_work(&adapter->ptp_tx_work);
4688 		ack |= IGC_TSICR_TXTS;
4689 	}
4690 
4691 	if (tsicr & IGC_TSICR_TT0) {
4692 		spin_lock(&adapter->tmreg_lock);
4693 		ts = timespec64_add(adapter->perout[0].start,
4694 				    adapter->perout[0].period);
4695 		wr32(IGC_TRGTTIML0, ts.tv_nsec | IGC_TT_IO_TIMER_SEL_SYSTIM0);
4696 		wr32(IGC_TRGTTIMH0, (u32)ts.tv_sec);
4697 		tsauxc = rd32(IGC_TSAUXC);
4698 		tsauxc |= IGC_TSAUXC_EN_TT0;
4699 		wr32(IGC_TSAUXC, tsauxc);
4700 		adapter->perout[0].start = ts;
4701 		spin_unlock(&adapter->tmreg_lock);
4702 		ack |= IGC_TSICR_TT0;
4703 	}
4704 
4705 	if (tsicr & IGC_TSICR_TT1) {
4706 		spin_lock(&adapter->tmreg_lock);
4707 		ts = timespec64_add(adapter->perout[1].start,
4708 				    adapter->perout[1].period);
4709 		wr32(IGC_TRGTTIML1, ts.tv_nsec | IGC_TT_IO_TIMER_SEL_SYSTIM0);
4710 		wr32(IGC_TRGTTIMH1, (u32)ts.tv_sec);
4711 		tsauxc = rd32(IGC_TSAUXC);
4712 		tsauxc |= IGC_TSAUXC_EN_TT1;
4713 		wr32(IGC_TSAUXC, tsauxc);
4714 		adapter->perout[1].start = ts;
4715 		spin_unlock(&adapter->tmreg_lock);
4716 		ack |= IGC_TSICR_TT1;
4717 	}
4718 
4719 	if (tsicr & IGC_TSICR_AUTT0) {
4720 		nsec = rd32(IGC_AUXSTMPL0);
4721 		sec  = rd32(IGC_AUXSTMPH0);
4722 		event.type = PTP_CLOCK_EXTTS;
4723 		event.index = 0;
4724 		event.timestamp = sec * NSEC_PER_SEC + nsec;
4725 		ptp_clock_event(adapter->ptp_clock, &event);
4726 		ack |= IGC_TSICR_AUTT0;
4727 	}
4728 
4729 	if (tsicr & IGC_TSICR_AUTT1) {
4730 		nsec = rd32(IGC_AUXSTMPL1);
4731 		sec  = rd32(IGC_AUXSTMPH1);
4732 		event.type = PTP_CLOCK_EXTTS;
4733 		event.index = 1;
4734 		event.timestamp = sec * NSEC_PER_SEC + nsec;
4735 		ptp_clock_event(adapter->ptp_clock, &event);
4736 		ack |= IGC_TSICR_AUTT1;
4737 	}
4738 
4739 	/* acknowledge the interrupts */
4740 	wr32(IGC_TSICR, ack);
4741 }
4742 
4743 /**
4744  * igc_msix_other - msix other interrupt handler
4745  * @irq: interrupt number
4746  * @data: pointer to a q_vector
4747  */
4748 static irqreturn_t igc_msix_other(int irq, void *data)
4749 {
4750 	struct igc_adapter *adapter = data;
4751 	struct igc_hw *hw = &adapter->hw;
4752 	u32 icr = rd32(IGC_ICR);
4753 
4754 	/* reading ICR causes bit 31 of EICR to be cleared */
4755 	if (icr & IGC_ICR_DRSTA)
4756 		schedule_work(&adapter->reset_task);
4757 
4758 	if (icr & IGC_ICR_DOUTSYNC) {
4759 		/* HW is reporting DMA is out of sync */
4760 		adapter->stats.doosync++;
4761 	}
4762 
4763 	if (icr & IGC_ICR_LSC) {
4764 		hw->mac.get_link_status = true;
4765 		/* guard against interrupt when we're going down */
4766 		if (!test_bit(__IGC_DOWN, &adapter->state))
4767 			mod_timer(&adapter->watchdog_timer, jiffies + 1);
4768 	}
4769 
4770 	if (icr & IGC_ICR_TS)
4771 		igc_tsync_interrupt(adapter);
4772 
4773 	wr32(IGC_EIMS, adapter->eims_other);
4774 
4775 	return IRQ_HANDLED;
4776 }
4777 
4778 static void igc_write_itr(struct igc_q_vector *q_vector)
4779 {
4780 	u32 itr_val = q_vector->itr_val & IGC_QVECTOR_MASK;
4781 
4782 	if (!q_vector->set_itr)
4783 		return;
4784 
4785 	if (!itr_val)
4786 		itr_val = IGC_ITR_VAL_MASK;
4787 
4788 	itr_val |= IGC_EITR_CNT_IGNR;
4789 
4790 	writel(itr_val, q_vector->itr_register);
4791 	q_vector->set_itr = 0;
4792 }
4793 
4794 static irqreturn_t igc_msix_ring(int irq, void *data)
4795 {
4796 	struct igc_q_vector *q_vector = data;
4797 
4798 	/* Write the ITR value calculated from the previous interrupt. */
4799 	igc_write_itr(q_vector);
4800 
4801 	napi_schedule(&q_vector->napi);
4802 
4803 	return IRQ_HANDLED;
4804 }
4805 
4806 /**
4807  * igc_request_msix - Initialize MSI-X interrupts
4808  * @adapter: Pointer to adapter structure
4809  *
4810  * igc_request_msix allocates MSI-X vectors and requests interrupts from the
4811  * kernel.
4812  */
4813 static int igc_request_msix(struct igc_adapter *adapter)
4814 {
4815 	int i = 0, err = 0, vector = 0, free_vector = 0;
4816 	struct net_device *netdev = adapter->netdev;
4817 
4818 	err = request_irq(adapter->msix_entries[vector].vector,
4819 			  &igc_msix_other, 0, netdev->name, adapter);
4820 	if (err)
4821 		goto err_out;
4822 
4823 	for (i = 0; i < adapter->num_q_vectors; i++) {
4824 		struct igc_q_vector *q_vector = adapter->q_vector[i];
4825 
4826 		vector++;
4827 
4828 		q_vector->itr_register = adapter->io_addr + IGC_EITR(vector);
4829 
4830 		if (q_vector->rx.ring && q_vector->tx.ring)
4831 			sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
4832 				q_vector->rx.ring->queue_index);
4833 		else if (q_vector->tx.ring)
4834 			sprintf(q_vector->name, "%s-tx-%u", netdev->name,
4835 				q_vector->tx.ring->queue_index);
4836 		else if (q_vector->rx.ring)
4837 			sprintf(q_vector->name, "%s-rx-%u", netdev->name,
4838 				q_vector->rx.ring->queue_index);
4839 		else
4840 			sprintf(q_vector->name, "%s-unused", netdev->name);
4841 
4842 		err = request_irq(adapter->msix_entries[vector].vector,
4843 				  igc_msix_ring, 0, q_vector->name,
4844 				  q_vector);
4845 		if (err)
4846 			goto err_free;
4847 	}
4848 
4849 	igc_configure_msix(adapter);
4850 	return 0;
4851 
4852 err_free:
4853 	/* free already assigned IRQs */
4854 	free_irq(adapter->msix_entries[free_vector++].vector, adapter);
4855 
4856 	vector--;
4857 	for (i = 0; i < vector; i++) {
4858 		free_irq(adapter->msix_entries[free_vector++].vector,
4859 			 adapter->q_vector[i]);
4860 	}
4861 err_out:
4862 	return err;
4863 }
4864 
4865 /**
4866  * igc_clear_interrupt_scheme - reset the device to a state of no interrupts
4867  * @adapter: Pointer to adapter structure
4868  *
4869  * This function resets the device so that it has 0 rx queues, tx queues, and
4870  * MSI-X interrupts allocated.
4871  */
4872 static void igc_clear_interrupt_scheme(struct igc_adapter *adapter)
4873 {
4874 	igc_free_q_vectors(adapter);
4875 	igc_reset_interrupt_capability(adapter);
4876 }
4877 
4878 /* Need to wait a few seconds after link up to get diagnostic information from
4879  * the phy
4880  */
4881 static void igc_update_phy_info(struct timer_list *t)
4882 {
4883 	struct igc_adapter *adapter = from_timer(adapter, t, phy_info_timer);
4884 
4885 	igc_get_phy_info(&adapter->hw);
4886 }
4887 
4888 /**
4889  * igc_has_link - check shared code for link and determine up/down
4890  * @adapter: pointer to driver private info
4891  */
4892 bool igc_has_link(struct igc_adapter *adapter)
4893 {
4894 	struct igc_hw *hw = &adapter->hw;
4895 	bool link_active = false;
4896 
4897 	/* get_link_status is set on LSC (link status) interrupt or
4898 	 * rx sequence error interrupt.  get_link_status will stay
4899 	 * false until the igc_check_for_link establishes link
4900 	 * for copper adapters ONLY
4901 	 */
4902 	switch (hw->phy.media_type) {
4903 	case igc_media_type_copper:
4904 		if (!hw->mac.get_link_status)
4905 			return true;
4906 		hw->mac.ops.check_for_link(hw);
4907 		link_active = !hw->mac.get_link_status;
4908 		break;
4909 	default:
4910 	case igc_media_type_unknown:
4911 		break;
4912 	}
4913 
4914 	if (hw->mac.type == igc_i225 &&
4915 	    hw->phy.id == I225_I_PHY_ID) {
4916 		if (!netif_carrier_ok(adapter->netdev)) {
4917 			adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE;
4918 		} else if (!(adapter->flags & IGC_FLAG_NEED_LINK_UPDATE)) {
4919 			adapter->flags |= IGC_FLAG_NEED_LINK_UPDATE;
4920 			adapter->link_check_timeout = jiffies;
4921 		}
4922 	}
4923 
4924 	return link_active;
4925 }
4926 
4927 /**
4928  * igc_watchdog - Timer Call-back
4929  * @t: timer for the watchdog
4930  */
4931 static void igc_watchdog(struct timer_list *t)
4932 {
4933 	struct igc_adapter *adapter = from_timer(adapter, t, watchdog_timer);
4934 	/* Do the rest outside of interrupt context */
4935 	schedule_work(&adapter->watchdog_task);
4936 }
4937 
4938 static void igc_watchdog_task(struct work_struct *work)
4939 {
4940 	struct igc_adapter *adapter = container_of(work,
4941 						   struct igc_adapter,
4942 						   watchdog_task);
4943 	struct net_device *netdev = adapter->netdev;
4944 	struct igc_hw *hw = &adapter->hw;
4945 	struct igc_phy_info *phy = &hw->phy;
4946 	u16 phy_data, retry_count = 20;
4947 	u32 link;
4948 	int i;
4949 
4950 	link = igc_has_link(adapter);
4951 
4952 	if (adapter->flags & IGC_FLAG_NEED_LINK_UPDATE) {
4953 		if (time_after(jiffies, (adapter->link_check_timeout + HZ)))
4954 			adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE;
4955 		else
4956 			link = false;
4957 	}
4958 
4959 	if (link) {
4960 		/* Cancel scheduled suspend requests. */
4961 		pm_runtime_resume(netdev->dev.parent);
4962 
4963 		if (!netif_carrier_ok(netdev)) {
4964 			u32 ctrl;
4965 
4966 			hw->mac.ops.get_speed_and_duplex(hw,
4967 							 &adapter->link_speed,
4968 							 &adapter->link_duplex);
4969 
4970 			ctrl = rd32(IGC_CTRL);
4971 			/* Link status message must follow this format */
4972 			netdev_info(netdev,
4973 				    "NIC Link is Up %d Mbps %s Duplex, Flow Control: %s\n",
4974 				    adapter->link_speed,
4975 				    adapter->link_duplex == FULL_DUPLEX ?
4976 				    "Full" : "Half",
4977 				    (ctrl & IGC_CTRL_TFCE) &&
4978 				    (ctrl & IGC_CTRL_RFCE) ? "RX/TX" :
4979 				    (ctrl & IGC_CTRL_RFCE) ?  "RX" :
4980 				    (ctrl & IGC_CTRL_TFCE) ?  "TX" : "None");
4981 
4982 			/* disable EEE if enabled */
4983 			if ((adapter->flags & IGC_FLAG_EEE) &&
4984 			    adapter->link_duplex == HALF_DUPLEX) {
4985 				netdev_info(netdev,
4986 					    "EEE Disabled: unsupported at half duplex. Re-enable using ethtool when at full duplex\n");
4987 				adapter->hw.dev_spec._base.eee_enable = false;
4988 				adapter->flags &= ~IGC_FLAG_EEE;
4989 			}
4990 
4991 			/* check if SmartSpeed worked */
4992 			igc_check_downshift(hw);
4993 			if (phy->speed_downgraded)
4994 				netdev_warn(netdev, "Link Speed was downgraded by SmartSpeed\n");
4995 
4996 			/* adjust timeout factor according to speed/duplex */
4997 			adapter->tx_timeout_factor = 1;
4998 			switch (adapter->link_speed) {
4999 			case SPEED_10:
5000 				adapter->tx_timeout_factor = 14;
5001 				break;
5002 			case SPEED_100:
5003 				/* maybe add some timeout factor ? */
5004 				break;
5005 			}
5006 
5007 			if (adapter->link_speed != SPEED_1000)
5008 				goto no_wait;
5009 
5010 			/* wait for Remote receiver status OK */
5011 retry_read_status:
5012 			if (!igc_read_phy_reg(hw, PHY_1000T_STATUS,
5013 					      &phy_data)) {
5014 				if (!(phy_data & SR_1000T_REMOTE_RX_STATUS) &&
5015 				    retry_count) {
5016 					msleep(100);
5017 					retry_count--;
5018 					goto retry_read_status;
5019 				} else if (!retry_count) {
5020 					netdev_err(netdev, "exceed max 2 second\n");
5021 				}
5022 			} else {
5023 				netdev_err(netdev, "read 1000Base-T Status Reg\n");
5024 			}
5025 no_wait:
5026 			netif_carrier_on(netdev);
5027 
5028 			/* link state has changed, schedule phy info update */
5029 			if (!test_bit(__IGC_DOWN, &adapter->state))
5030 				mod_timer(&adapter->phy_info_timer,
5031 					  round_jiffies(jiffies + 2 * HZ));
5032 		}
5033 	} else {
5034 		if (netif_carrier_ok(netdev)) {
5035 			adapter->link_speed = 0;
5036 			adapter->link_duplex = 0;
5037 
5038 			/* Links status message must follow this format */
5039 			netdev_info(netdev, "NIC Link is Down\n");
5040 			netif_carrier_off(netdev);
5041 
5042 			/* link state has changed, schedule phy info update */
5043 			if (!test_bit(__IGC_DOWN, &adapter->state))
5044 				mod_timer(&adapter->phy_info_timer,
5045 					  round_jiffies(jiffies + 2 * HZ));
5046 
5047 			/* link is down, time to check for alternate media */
5048 			if (adapter->flags & IGC_FLAG_MAS_ENABLE) {
5049 				if (adapter->flags & IGC_FLAG_MEDIA_RESET) {
5050 					schedule_work(&adapter->reset_task);
5051 					/* return immediately */
5052 					return;
5053 				}
5054 			}
5055 			pm_schedule_suspend(netdev->dev.parent,
5056 					    MSEC_PER_SEC * 5);
5057 
5058 		/* also check for alternate media here */
5059 		} else if (!netif_carrier_ok(netdev) &&
5060 			   (adapter->flags & IGC_FLAG_MAS_ENABLE)) {
5061 			if (adapter->flags & IGC_FLAG_MEDIA_RESET) {
5062 				schedule_work(&adapter->reset_task);
5063 				/* return immediately */
5064 				return;
5065 			}
5066 		}
5067 	}
5068 
5069 	spin_lock(&adapter->stats64_lock);
5070 	igc_update_stats(adapter);
5071 	spin_unlock(&adapter->stats64_lock);
5072 
5073 	for (i = 0; i < adapter->num_tx_queues; i++) {
5074 		struct igc_ring *tx_ring = adapter->tx_ring[i];
5075 
5076 		if (!netif_carrier_ok(netdev)) {
5077 			/* We've lost link, so the controller stops DMA,
5078 			 * but we've got queued Tx work that's never going
5079 			 * to get done, so reset controller to flush Tx.
5080 			 * (Do the reset outside of interrupt context).
5081 			 */
5082 			if (igc_desc_unused(tx_ring) + 1 < tx_ring->count) {
5083 				adapter->tx_timeout_count++;
5084 				schedule_work(&adapter->reset_task);
5085 				/* return immediately since reset is imminent */
5086 				return;
5087 			}
5088 		}
5089 
5090 		/* Force detection of hung controller every watchdog period */
5091 		set_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
5092 	}
5093 
5094 	/* Cause software interrupt to ensure Rx ring is cleaned */
5095 	if (adapter->flags & IGC_FLAG_HAS_MSIX) {
5096 		u32 eics = 0;
5097 
5098 		for (i = 0; i < adapter->num_q_vectors; i++)
5099 			eics |= adapter->q_vector[i]->eims_value;
5100 		wr32(IGC_EICS, eics);
5101 	} else {
5102 		wr32(IGC_ICS, IGC_ICS_RXDMT0);
5103 	}
5104 
5105 	igc_ptp_tx_hang(adapter);
5106 
5107 	/* Reset the timer */
5108 	if (!test_bit(__IGC_DOWN, &adapter->state)) {
5109 		if (adapter->flags & IGC_FLAG_NEED_LINK_UPDATE)
5110 			mod_timer(&adapter->watchdog_timer,
5111 				  round_jiffies(jiffies +  HZ));
5112 		else
5113 			mod_timer(&adapter->watchdog_timer,
5114 				  round_jiffies(jiffies + 2 * HZ));
5115 	}
5116 }
5117 
5118 /**
5119  * igc_intr_msi - Interrupt Handler
5120  * @irq: interrupt number
5121  * @data: pointer to a network interface device structure
5122  */
5123 static irqreturn_t igc_intr_msi(int irq, void *data)
5124 {
5125 	struct igc_adapter *adapter = data;
5126 	struct igc_q_vector *q_vector = adapter->q_vector[0];
5127 	struct igc_hw *hw = &adapter->hw;
5128 	/* read ICR disables interrupts using IAM */
5129 	u32 icr = rd32(IGC_ICR);
5130 
5131 	igc_write_itr(q_vector);
5132 
5133 	if (icr & IGC_ICR_DRSTA)
5134 		schedule_work(&adapter->reset_task);
5135 
5136 	if (icr & IGC_ICR_DOUTSYNC) {
5137 		/* HW is reporting DMA is out of sync */
5138 		adapter->stats.doosync++;
5139 	}
5140 
5141 	if (icr & (IGC_ICR_RXSEQ | IGC_ICR_LSC)) {
5142 		hw->mac.get_link_status = true;
5143 		if (!test_bit(__IGC_DOWN, &adapter->state))
5144 			mod_timer(&adapter->watchdog_timer, jiffies + 1);
5145 	}
5146 
5147 	napi_schedule(&q_vector->napi);
5148 
5149 	return IRQ_HANDLED;
5150 }
5151 
5152 /**
5153  * igc_intr - Legacy Interrupt Handler
5154  * @irq: interrupt number
5155  * @data: pointer to a network interface device structure
5156  */
5157 static irqreturn_t igc_intr(int irq, void *data)
5158 {
5159 	struct igc_adapter *adapter = data;
5160 	struct igc_q_vector *q_vector = adapter->q_vector[0];
5161 	struct igc_hw *hw = &adapter->hw;
5162 	/* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5163 	 * need for the IMC write
5164 	 */
5165 	u32 icr = rd32(IGC_ICR);
5166 
5167 	/* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5168 	 * not set, then the adapter didn't send an interrupt
5169 	 */
5170 	if (!(icr & IGC_ICR_INT_ASSERTED))
5171 		return IRQ_NONE;
5172 
5173 	igc_write_itr(q_vector);
5174 
5175 	if (icr & IGC_ICR_DRSTA)
5176 		schedule_work(&adapter->reset_task);
5177 
5178 	if (icr & IGC_ICR_DOUTSYNC) {
5179 		/* HW is reporting DMA is out of sync */
5180 		adapter->stats.doosync++;
5181 	}
5182 
5183 	if (icr & (IGC_ICR_RXSEQ | IGC_ICR_LSC)) {
5184 		hw->mac.get_link_status = true;
5185 		/* guard against interrupt when we're going down */
5186 		if (!test_bit(__IGC_DOWN, &adapter->state))
5187 			mod_timer(&adapter->watchdog_timer, jiffies + 1);
5188 	}
5189 
5190 	napi_schedule(&q_vector->napi);
5191 
5192 	return IRQ_HANDLED;
5193 }
5194 
5195 static void igc_free_irq(struct igc_adapter *adapter)
5196 {
5197 	if (adapter->msix_entries) {
5198 		int vector = 0, i;
5199 
5200 		free_irq(adapter->msix_entries[vector++].vector, adapter);
5201 
5202 		for (i = 0; i < adapter->num_q_vectors; i++)
5203 			free_irq(adapter->msix_entries[vector++].vector,
5204 				 adapter->q_vector[i]);
5205 	} else {
5206 		free_irq(adapter->pdev->irq, adapter);
5207 	}
5208 }
5209 
5210 /**
5211  * igc_request_irq - initialize interrupts
5212  * @adapter: Pointer to adapter structure
5213  *
5214  * Attempts to configure interrupts using the best available
5215  * capabilities of the hardware and kernel.
5216  */
5217 static int igc_request_irq(struct igc_adapter *adapter)
5218 {
5219 	struct net_device *netdev = adapter->netdev;
5220 	struct pci_dev *pdev = adapter->pdev;
5221 	int err = 0;
5222 
5223 	if (adapter->flags & IGC_FLAG_HAS_MSIX) {
5224 		err = igc_request_msix(adapter);
5225 		if (!err)
5226 			goto request_done;
5227 		/* fall back to MSI */
5228 		igc_free_all_tx_resources(adapter);
5229 		igc_free_all_rx_resources(adapter);
5230 
5231 		igc_clear_interrupt_scheme(adapter);
5232 		err = igc_init_interrupt_scheme(adapter, false);
5233 		if (err)
5234 			goto request_done;
5235 		igc_setup_all_tx_resources(adapter);
5236 		igc_setup_all_rx_resources(adapter);
5237 		igc_configure(adapter);
5238 	}
5239 
5240 	igc_assign_vector(adapter->q_vector[0], 0);
5241 
5242 	if (adapter->flags & IGC_FLAG_HAS_MSI) {
5243 		err = request_irq(pdev->irq, &igc_intr_msi, 0,
5244 				  netdev->name, adapter);
5245 		if (!err)
5246 			goto request_done;
5247 
5248 		/* fall back to legacy interrupts */
5249 		igc_reset_interrupt_capability(adapter);
5250 		adapter->flags &= ~IGC_FLAG_HAS_MSI;
5251 	}
5252 
5253 	err = request_irq(pdev->irq, &igc_intr, IRQF_SHARED,
5254 			  netdev->name, adapter);
5255 
5256 	if (err)
5257 		netdev_err(netdev, "Error %d getting interrupt\n", err);
5258 
5259 request_done:
5260 	return err;
5261 }
5262 
5263 /**
5264  * __igc_open - Called when a network interface is made active
5265  * @netdev: network interface device structure
5266  * @resuming: boolean indicating if the device is resuming
5267  *
5268  * Returns 0 on success, negative value on failure
5269  *
5270  * The open entry point is called when a network interface is made
5271  * active by the system (IFF_UP).  At this point all resources needed
5272  * for transmit and receive operations are allocated, the interrupt
5273  * handler is registered with the OS, the watchdog timer is started,
5274  * and the stack is notified that the interface is ready.
5275  */
5276 static int __igc_open(struct net_device *netdev, bool resuming)
5277 {
5278 	struct igc_adapter *adapter = netdev_priv(netdev);
5279 	struct pci_dev *pdev = adapter->pdev;
5280 	struct igc_hw *hw = &adapter->hw;
5281 	int err = 0;
5282 	int i = 0;
5283 
5284 	/* disallow open during test */
5285 
5286 	if (test_bit(__IGC_TESTING, &adapter->state)) {
5287 		WARN_ON(resuming);
5288 		return -EBUSY;
5289 	}
5290 
5291 	if (!resuming)
5292 		pm_runtime_get_sync(&pdev->dev);
5293 
5294 	netif_carrier_off(netdev);
5295 
5296 	/* allocate transmit descriptors */
5297 	err = igc_setup_all_tx_resources(adapter);
5298 	if (err)
5299 		goto err_setup_tx;
5300 
5301 	/* allocate receive descriptors */
5302 	err = igc_setup_all_rx_resources(adapter);
5303 	if (err)
5304 		goto err_setup_rx;
5305 
5306 	igc_power_up_link(adapter);
5307 
5308 	igc_configure(adapter);
5309 
5310 	err = igc_request_irq(adapter);
5311 	if (err)
5312 		goto err_req_irq;
5313 
5314 	/* Notify the stack of the actual queue counts. */
5315 	err = netif_set_real_num_tx_queues(netdev, adapter->num_tx_queues);
5316 	if (err)
5317 		goto err_set_queues;
5318 
5319 	err = netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues);
5320 	if (err)
5321 		goto err_set_queues;
5322 
5323 	clear_bit(__IGC_DOWN, &adapter->state);
5324 
5325 	for (i = 0; i < adapter->num_q_vectors; i++)
5326 		napi_enable(&adapter->q_vector[i]->napi);
5327 
5328 	/* Clear any pending interrupts. */
5329 	rd32(IGC_ICR);
5330 	igc_irq_enable(adapter);
5331 
5332 	if (!resuming)
5333 		pm_runtime_put(&pdev->dev);
5334 
5335 	netif_tx_start_all_queues(netdev);
5336 
5337 	/* start the watchdog. */
5338 	hw->mac.get_link_status = true;
5339 	schedule_work(&adapter->watchdog_task);
5340 
5341 	return IGC_SUCCESS;
5342 
5343 err_set_queues:
5344 	igc_free_irq(adapter);
5345 err_req_irq:
5346 	igc_release_hw_control(adapter);
5347 	igc_power_down_phy_copper_base(&adapter->hw);
5348 	igc_free_all_rx_resources(adapter);
5349 err_setup_rx:
5350 	igc_free_all_tx_resources(adapter);
5351 err_setup_tx:
5352 	igc_reset(adapter);
5353 	if (!resuming)
5354 		pm_runtime_put(&pdev->dev);
5355 
5356 	return err;
5357 }
5358 
5359 int igc_open(struct net_device *netdev)
5360 {
5361 	return __igc_open(netdev, false);
5362 }
5363 
5364 /**
5365  * __igc_close - Disables a network interface
5366  * @netdev: network interface device structure
5367  * @suspending: boolean indicating the device is suspending
5368  *
5369  * Returns 0, this is not allowed to fail
5370  *
5371  * The close entry point is called when an interface is de-activated
5372  * by the OS.  The hardware is still under the driver's control, but
5373  * needs to be disabled.  A global MAC reset is issued to stop the
5374  * hardware, and all transmit and receive resources are freed.
5375  */
5376 static int __igc_close(struct net_device *netdev, bool suspending)
5377 {
5378 	struct igc_adapter *adapter = netdev_priv(netdev);
5379 	struct pci_dev *pdev = adapter->pdev;
5380 
5381 	WARN_ON(test_bit(__IGC_RESETTING, &adapter->state));
5382 
5383 	if (!suspending)
5384 		pm_runtime_get_sync(&pdev->dev);
5385 
5386 	igc_down(adapter);
5387 
5388 	igc_release_hw_control(adapter);
5389 
5390 	igc_free_irq(adapter);
5391 
5392 	igc_free_all_tx_resources(adapter);
5393 	igc_free_all_rx_resources(adapter);
5394 
5395 	if (!suspending)
5396 		pm_runtime_put_sync(&pdev->dev);
5397 
5398 	return 0;
5399 }
5400 
5401 int igc_close(struct net_device *netdev)
5402 {
5403 	if (netif_device_present(netdev) || netdev->dismantle)
5404 		return __igc_close(netdev, false);
5405 	return 0;
5406 }
5407 
5408 /**
5409  * igc_ioctl - Access the hwtstamp interface
5410  * @netdev: network interface device structure
5411  * @ifr: interface request data
5412  * @cmd: ioctl command
5413  **/
5414 static int igc_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5415 {
5416 	switch (cmd) {
5417 	case SIOCGHWTSTAMP:
5418 		return igc_ptp_get_ts_config(netdev, ifr);
5419 	case SIOCSHWTSTAMP:
5420 		return igc_ptp_set_ts_config(netdev, ifr);
5421 	default:
5422 		return -EOPNOTSUPP;
5423 	}
5424 }
5425 
5426 static int igc_save_launchtime_params(struct igc_adapter *adapter, int queue,
5427 				      bool enable)
5428 {
5429 	struct igc_ring *ring;
5430 	int i;
5431 
5432 	if (queue < 0 || queue >= adapter->num_tx_queues)
5433 		return -EINVAL;
5434 
5435 	ring = adapter->tx_ring[queue];
5436 	ring->launchtime_enable = enable;
5437 
5438 	if (adapter->base_time)
5439 		return 0;
5440 
5441 	adapter->cycle_time = NSEC_PER_SEC;
5442 
5443 	for (i = 0; i < adapter->num_tx_queues; i++) {
5444 		ring = adapter->tx_ring[i];
5445 		ring->start_time = 0;
5446 		ring->end_time = NSEC_PER_SEC;
5447 	}
5448 
5449 	return 0;
5450 }
5451 
5452 static bool is_base_time_past(ktime_t base_time, const struct timespec64 *now)
5453 {
5454 	struct timespec64 b;
5455 
5456 	b = ktime_to_timespec64(base_time);
5457 
5458 	return timespec64_compare(now, &b) > 0;
5459 }
5460 
5461 static bool validate_schedule(struct igc_adapter *adapter,
5462 			      const struct tc_taprio_qopt_offload *qopt)
5463 {
5464 	int queue_uses[IGC_MAX_TX_QUEUES] = { };
5465 	struct timespec64 now;
5466 	size_t n;
5467 
5468 	if (qopt->cycle_time_extension)
5469 		return false;
5470 
5471 	igc_ptp_read(adapter, &now);
5472 
5473 	/* If we program the controller's BASET registers with a time
5474 	 * in the future, it will hold all the packets until that
5475 	 * time, causing a lot of TX Hangs, so to avoid that, we
5476 	 * reject schedules that would start in the future.
5477 	 */
5478 	if (!is_base_time_past(qopt->base_time, &now))
5479 		return false;
5480 
5481 	for (n = 0; n < qopt->num_entries; n++) {
5482 		const struct tc_taprio_sched_entry *e;
5483 		int i;
5484 
5485 		e = &qopt->entries[n];
5486 
5487 		/* i225 only supports "global" frame preemption
5488 		 * settings.
5489 		 */
5490 		if (e->command != TC_TAPRIO_CMD_SET_GATES)
5491 			return false;
5492 
5493 		for (i = 0; i < IGC_MAX_TX_QUEUES; i++) {
5494 			if (e->gate_mask & BIT(i))
5495 				queue_uses[i]++;
5496 
5497 			if (queue_uses[i] > 1)
5498 				return false;
5499 		}
5500 	}
5501 
5502 	return true;
5503 }
5504 
5505 static int igc_tsn_enable_launchtime(struct igc_adapter *adapter,
5506 				     struct tc_etf_qopt_offload *qopt)
5507 {
5508 	struct igc_hw *hw = &adapter->hw;
5509 	int err;
5510 
5511 	if (hw->mac.type != igc_i225)
5512 		return -EOPNOTSUPP;
5513 
5514 	err = igc_save_launchtime_params(adapter, qopt->queue, qopt->enable);
5515 	if (err)
5516 		return err;
5517 
5518 	return igc_tsn_offload_apply(adapter);
5519 }
5520 
5521 static int igc_save_qbv_schedule(struct igc_adapter *adapter,
5522 				 struct tc_taprio_qopt_offload *qopt)
5523 {
5524 	u32 start_time = 0, end_time = 0;
5525 	size_t n;
5526 
5527 	if (!qopt->enable) {
5528 		adapter->base_time = 0;
5529 		return 0;
5530 	}
5531 
5532 	if (adapter->base_time)
5533 		return -EALREADY;
5534 
5535 	if (!validate_schedule(adapter, qopt))
5536 		return -EINVAL;
5537 
5538 	adapter->cycle_time = qopt->cycle_time;
5539 	adapter->base_time = qopt->base_time;
5540 
5541 	/* FIXME: be a little smarter about cases when the gate for a
5542 	 * queue stays open for more than one entry.
5543 	 */
5544 	for (n = 0; n < qopt->num_entries; n++) {
5545 		struct tc_taprio_sched_entry *e = &qopt->entries[n];
5546 		int i;
5547 
5548 		end_time += e->interval;
5549 
5550 		for (i = 0; i < IGC_MAX_TX_QUEUES; i++) {
5551 			struct igc_ring *ring = adapter->tx_ring[i];
5552 
5553 			if (!(e->gate_mask & BIT(i)))
5554 				continue;
5555 
5556 			ring->start_time = start_time;
5557 			ring->end_time = end_time;
5558 		}
5559 
5560 		start_time += e->interval;
5561 	}
5562 
5563 	return 0;
5564 }
5565 
5566 static int igc_tsn_enable_qbv_scheduling(struct igc_adapter *adapter,
5567 					 struct tc_taprio_qopt_offload *qopt)
5568 {
5569 	struct igc_hw *hw = &adapter->hw;
5570 	int err;
5571 
5572 	if (hw->mac.type != igc_i225)
5573 		return -EOPNOTSUPP;
5574 
5575 	err = igc_save_qbv_schedule(adapter, qopt);
5576 	if (err)
5577 		return err;
5578 
5579 	return igc_tsn_offload_apply(adapter);
5580 }
5581 
5582 static int igc_setup_tc(struct net_device *dev, enum tc_setup_type type,
5583 			void *type_data)
5584 {
5585 	struct igc_adapter *adapter = netdev_priv(dev);
5586 
5587 	switch (type) {
5588 	case TC_SETUP_QDISC_TAPRIO:
5589 		return igc_tsn_enable_qbv_scheduling(adapter, type_data);
5590 
5591 	case TC_SETUP_QDISC_ETF:
5592 		return igc_tsn_enable_launchtime(adapter, type_data);
5593 
5594 	default:
5595 		return -EOPNOTSUPP;
5596 	}
5597 }
5598 
5599 static int igc_bpf(struct net_device *dev, struct netdev_bpf *bpf)
5600 {
5601 	struct igc_adapter *adapter = netdev_priv(dev);
5602 
5603 	switch (bpf->command) {
5604 	case XDP_SETUP_PROG:
5605 		return igc_xdp_set_prog(adapter, bpf->prog, bpf->extack);
5606 	case XDP_SETUP_XSK_POOL:
5607 		return igc_xdp_setup_pool(adapter, bpf->xsk.pool,
5608 					  bpf->xsk.queue_id);
5609 	default:
5610 		return -EOPNOTSUPP;
5611 	}
5612 }
5613 
5614 static int igc_xdp_xmit(struct net_device *dev, int num_frames,
5615 			struct xdp_frame **frames, u32 flags)
5616 {
5617 	struct igc_adapter *adapter = netdev_priv(dev);
5618 	int cpu = smp_processor_id();
5619 	struct netdev_queue *nq;
5620 	struct igc_ring *ring;
5621 	int i, drops;
5622 
5623 	if (unlikely(test_bit(__IGC_DOWN, &adapter->state)))
5624 		return -ENETDOWN;
5625 
5626 	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
5627 		return -EINVAL;
5628 
5629 	ring = igc_xdp_get_tx_ring(adapter, cpu);
5630 	nq = txring_txq(ring);
5631 
5632 	__netif_tx_lock(nq, cpu);
5633 
5634 	drops = 0;
5635 	for (i = 0; i < num_frames; i++) {
5636 		int err;
5637 		struct xdp_frame *xdpf = frames[i];
5638 
5639 		err = igc_xdp_init_tx_descriptor(ring, xdpf);
5640 		if (err) {
5641 			xdp_return_frame_rx_napi(xdpf);
5642 			drops++;
5643 		}
5644 	}
5645 
5646 	if (flags & XDP_XMIT_FLUSH)
5647 		igc_flush_tx_descriptors(ring);
5648 
5649 	__netif_tx_unlock(nq);
5650 
5651 	return num_frames - drops;
5652 }
5653 
5654 static void igc_trigger_rxtxq_interrupt(struct igc_adapter *adapter,
5655 					struct igc_q_vector *q_vector)
5656 {
5657 	struct igc_hw *hw = &adapter->hw;
5658 	u32 eics = 0;
5659 
5660 	eics |= q_vector->eims_value;
5661 	wr32(IGC_EICS, eics);
5662 }
5663 
5664 int igc_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
5665 {
5666 	struct igc_adapter *adapter = netdev_priv(dev);
5667 	struct igc_q_vector *q_vector;
5668 	struct igc_ring *ring;
5669 
5670 	if (test_bit(__IGC_DOWN, &adapter->state))
5671 		return -ENETDOWN;
5672 
5673 	if (!igc_xdp_is_enabled(adapter))
5674 		return -ENXIO;
5675 
5676 	if (queue_id >= adapter->num_rx_queues)
5677 		return -EINVAL;
5678 
5679 	ring = adapter->rx_ring[queue_id];
5680 
5681 	if (!ring->xsk_pool)
5682 		return -ENXIO;
5683 
5684 	q_vector = adapter->q_vector[queue_id];
5685 	if (!napi_if_scheduled_mark_missed(&q_vector->napi))
5686 		igc_trigger_rxtxq_interrupt(adapter, q_vector);
5687 
5688 	return 0;
5689 }
5690 
5691 static const struct net_device_ops igc_netdev_ops = {
5692 	.ndo_open		= igc_open,
5693 	.ndo_stop		= igc_close,
5694 	.ndo_start_xmit		= igc_xmit_frame,
5695 	.ndo_set_rx_mode	= igc_set_rx_mode,
5696 	.ndo_set_mac_address	= igc_set_mac,
5697 	.ndo_change_mtu		= igc_change_mtu,
5698 	.ndo_get_stats64	= igc_get_stats64,
5699 	.ndo_fix_features	= igc_fix_features,
5700 	.ndo_set_features	= igc_set_features,
5701 	.ndo_features_check	= igc_features_check,
5702 	.ndo_do_ioctl		= igc_ioctl,
5703 	.ndo_setup_tc		= igc_setup_tc,
5704 	.ndo_bpf		= igc_bpf,
5705 	.ndo_xdp_xmit		= igc_xdp_xmit,
5706 	.ndo_xsk_wakeup		= igc_xsk_wakeup,
5707 };
5708 
5709 /* PCIe configuration access */
5710 void igc_read_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value)
5711 {
5712 	struct igc_adapter *adapter = hw->back;
5713 
5714 	pci_read_config_word(adapter->pdev, reg, value);
5715 }
5716 
5717 void igc_write_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value)
5718 {
5719 	struct igc_adapter *adapter = hw->back;
5720 
5721 	pci_write_config_word(adapter->pdev, reg, *value);
5722 }
5723 
5724 s32 igc_read_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value)
5725 {
5726 	struct igc_adapter *adapter = hw->back;
5727 
5728 	if (!pci_is_pcie(adapter->pdev))
5729 		return -IGC_ERR_CONFIG;
5730 
5731 	pcie_capability_read_word(adapter->pdev, reg, value);
5732 
5733 	return IGC_SUCCESS;
5734 }
5735 
5736 s32 igc_write_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value)
5737 {
5738 	struct igc_adapter *adapter = hw->back;
5739 
5740 	if (!pci_is_pcie(adapter->pdev))
5741 		return -IGC_ERR_CONFIG;
5742 
5743 	pcie_capability_write_word(adapter->pdev, reg, *value);
5744 
5745 	return IGC_SUCCESS;
5746 }
5747 
5748 u32 igc_rd32(struct igc_hw *hw, u32 reg)
5749 {
5750 	struct igc_adapter *igc = container_of(hw, struct igc_adapter, hw);
5751 	u8 __iomem *hw_addr = READ_ONCE(hw->hw_addr);
5752 	u32 value = 0;
5753 
5754 	value = readl(&hw_addr[reg]);
5755 
5756 	/* reads should not return all F's */
5757 	if (!(~value) && (!reg || !(~readl(hw_addr)))) {
5758 		struct net_device *netdev = igc->netdev;
5759 
5760 		hw->hw_addr = NULL;
5761 		netif_device_detach(netdev);
5762 		netdev_err(netdev, "PCIe link lost, device now detached\n");
5763 		WARN(pci_device_is_present(igc->pdev),
5764 		     "igc: Failed to read reg 0x%x!\n", reg);
5765 	}
5766 
5767 	return value;
5768 }
5769 
5770 int igc_set_spd_dplx(struct igc_adapter *adapter, u32 spd, u8 dplx)
5771 {
5772 	struct igc_mac_info *mac = &adapter->hw.mac;
5773 
5774 	mac->autoneg = false;
5775 
5776 	/* Make sure dplx is at most 1 bit and lsb of speed is not set
5777 	 * for the switch() below to work
5778 	 */
5779 	if ((spd & 1) || (dplx & ~1))
5780 		goto err_inval;
5781 
5782 	switch (spd + dplx) {
5783 	case SPEED_10 + DUPLEX_HALF:
5784 		mac->forced_speed_duplex = ADVERTISE_10_HALF;
5785 		break;
5786 	case SPEED_10 + DUPLEX_FULL:
5787 		mac->forced_speed_duplex = ADVERTISE_10_FULL;
5788 		break;
5789 	case SPEED_100 + DUPLEX_HALF:
5790 		mac->forced_speed_duplex = ADVERTISE_100_HALF;
5791 		break;
5792 	case SPEED_100 + DUPLEX_FULL:
5793 		mac->forced_speed_duplex = ADVERTISE_100_FULL;
5794 		break;
5795 	case SPEED_1000 + DUPLEX_FULL:
5796 		mac->autoneg = true;
5797 		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
5798 		break;
5799 	case SPEED_1000 + DUPLEX_HALF: /* not supported */
5800 		goto err_inval;
5801 	case SPEED_2500 + DUPLEX_FULL:
5802 		mac->autoneg = true;
5803 		adapter->hw.phy.autoneg_advertised = ADVERTISE_2500_FULL;
5804 		break;
5805 	case SPEED_2500 + DUPLEX_HALF: /* not supported */
5806 	default:
5807 		goto err_inval;
5808 	}
5809 
5810 	/* clear MDI, MDI(-X) override is only allowed when autoneg enabled */
5811 	adapter->hw.phy.mdix = AUTO_ALL_MODES;
5812 
5813 	return 0;
5814 
5815 err_inval:
5816 	netdev_err(adapter->netdev, "Unsupported Speed/Duplex configuration\n");
5817 	return -EINVAL;
5818 }
5819 
5820 /**
5821  * igc_probe - Device Initialization Routine
5822  * @pdev: PCI device information struct
5823  * @ent: entry in igc_pci_tbl
5824  *
5825  * Returns 0 on success, negative on failure
5826  *
5827  * igc_probe initializes an adapter identified by a pci_dev structure.
5828  * The OS initialization, configuring the adapter private structure,
5829  * and a hardware reset occur.
5830  */
5831 static int igc_probe(struct pci_dev *pdev,
5832 		     const struct pci_device_id *ent)
5833 {
5834 	struct igc_adapter *adapter;
5835 	struct net_device *netdev;
5836 	struct igc_hw *hw;
5837 	const struct igc_info *ei = igc_info_tbl[ent->driver_data];
5838 	int err, pci_using_dac;
5839 
5840 	err = pci_enable_device_mem(pdev);
5841 	if (err)
5842 		return err;
5843 
5844 	pci_using_dac = 0;
5845 	err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5846 	if (!err) {
5847 		pci_using_dac = 1;
5848 	} else {
5849 		err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5850 		if (err) {
5851 			dev_err(&pdev->dev,
5852 				"No usable DMA configuration, aborting\n");
5853 			goto err_dma;
5854 		}
5855 	}
5856 
5857 	err = pci_request_mem_regions(pdev, igc_driver_name);
5858 	if (err)
5859 		goto err_pci_reg;
5860 
5861 	pci_enable_pcie_error_reporting(pdev);
5862 
5863 	pci_set_master(pdev);
5864 
5865 	err = -ENOMEM;
5866 	netdev = alloc_etherdev_mq(sizeof(struct igc_adapter),
5867 				   IGC_MAX_TX_QUEUES);
5868 
5869 	if (!netdev)
5870 		goto err_alloc_etherdev;
5871 
5872 	SET_NETDEV_DEV(netdev, &pdev->dev);
5873 
5874 	pci_set_drvdata(pdev, netdev);
5875 	adapter = netdev_priv(netdev);
5876 	adapter->netdev = netdev;
5877 	adapter->pdev = pdev;
5878 	hw = &adapter->hw;
5879 	hw->back = adapter;
5880 	adapter->port_num = hw->bus.func;
5881 	adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
5882 
5883 	err = pci_save_state(pdev);
5884 	if (err)
5885 		goto err_ioremap;
5886 
5887 	err = -EIO;
5888 	adapter->io_addr = ioremap(pci_resource_start(pdev, 0),
5889 				   pci_resource_len(pdev, 0));
5890 	if (!adapter->io_addr)
5891 		goto err_ioremap;
5892 
5893 	/* hw->hw_addr can be zeroed, so use adapter->io_addr for unmap */
5894 	hw->hw_addr = adapter->io_addr;
5895 
5896 	netdev->netdev_ops = &igc_netdev_ops;
5897 	igc_ethtool_set_ops(netdev);
5898 	netdev->watchdog_timeo = 5 * HZ;
5899 
5900 	netdev->mem_start = pci_resource_start(pdev, 0);
5901 	netdev->mem_end = pci_resource_end(pdev, 0);
5902 
5903 	/* PCI config space info */
5904 	hw->vendor_id = pdev->vendor;
5905 	hw->device_id = pdev->device;
5906 	hw->revision_id = pdev->revision;
5907 	hw->subsystem_vendor_id = pdev->subsystem_vendor;
5908 	hw->subsystem_device_id = pdev->subsystem_device;
5909 
5910 	/* Copy the default MAC and PHY function pointers */
5911 	memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
5912 	memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
5913 
5914 	/* Initialize skew-specific constants */
5915 	err = ei->get_invariants(hw);
5916 	if (err)
5917 		goto err_sw_init;
5918 
5919 	/* Add supported features to the features list*/
5920 	netdev->features |= NETIF_F_SG;
5921 	netdev->features |= NETIF_F_TSO;
5922 	netdev->features |= NETIF_F_TSO6;
5923 	netdev->features |= NETIF_F_TSO_ECN;
5924 	netdev->features |= NETIF_F_RXCSUM;
5925 	netdev->features |= NETIF_F_HW_CSUM;
5926 	netdev->features |= NETIF_F_SCTP_CRC;
5927 	netdev->features |= NETIF_F_HW_TC;
5928 
5929 #define IGC_GSO_PARTIAL_FEATURES (NETIF_F_GSO_GRE | \
5930 				  NETIF_F_GSO_GRE_CSUM | \
5931 				  NETIF_F_GSO_IPXIP4 | \
5932 				  NETIF_F_GSO_IPXIP6 | \
5933 				  NETIF_F_GSO_UDP_TUNNEL | \
5934 				  NETIF_F_GSO_UDP_TUNNEL_CSUM)
5935 
5936 	netdev->gso_partial_features = IGC_GSO_PARTIAL_FEATURES;
5937 	netdev->features |= NETIF_F_GSO_PARTIAL | IGC_GSO_PARTIAL_FEATURES;
5938 
5939 	/* setup the private structure */
5940 	err = igc_sw_init(adapter);
5941 	if (err)
5942 		goto err_sw_init;
5943 
5944 	/* copy netdev features into list of user selectable features */
5945 	netdev->hw_features |= NETIF_F_NTUPLE;
5946 	netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX;
5947 	netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX;
5948 	netdev->hw_features |= netdev->features;
5949 
5950 	if (pci_using_dac)
5951 		netdev->features |= NETIF_F_HIGHDMA;
5952 
5953 	netdev->vlan_features |= netdev->features;
5954 
5955 	/* MTU range: 68 - 9216 */
5956 	netdev->min_mtu = ETH_MIN_MTU;
5957 	netdev->max_mtu = MAX_STD_JUMBO_FRAME_SIZE;
5958 
5959 	/* before reading the NVM, reset the controller to put the device in a
5960 	 * known good starting state
5961 	 */
5962 	hw->mac.ops.reset_hw(hw);
5963 
5964 	if (igc_get_flash_presence_i225(hw)) {
5965 		if (hw->nvm.ops.validate(hw) < 0) {
5966 			dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
5967 			err = -EIO;
5968 			goto err_eeprom;
5969 		}
5970 	}
5971 
5972 	if (eth_platform_get_mac_address(&pdev->dev, hw->mac.addr)) {
5973 		/* copy the MAC address out of the NVM */
5974 		if (hw->mac.ops.read_mac_addr(hw))
5975 			dev_err(&pdev->dev, "NVM Read Error\n");
5976 	}
5977 
5978 	memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
5979 
5980 	if (!is_valid_ether_addr(netdev->dev_addr)) {
5981 		dev_err(&pdev->dev, "Invalid MAC Address\n");
5982 		err = -EIO;
5983 		goto err_eeprom;
5984 	}
5985 
5986 	/* configure RXPBSIZE and TXPBSIZE */
5987 	wr32(IGC_RXPBS, I225_RXPBSIZE_DEFAULT);
5988 	wr32(IGC_TXPBS, I225_TXPBSIZE_DEFAULT);
5989 
5990 	timer_setup(&adapter->watchdog_timer, igc_watchdog, 0);
5991 	timer_setup(&adapter->phy_info_timer, igc_update_phy_info, 0);
5992 
5993 	INIT_WORK(&adapter->reset_task, igc_reset_task);
5994 	INIT_WORK(&adapter->watchdog_task, igc_watchdog_task);
5995 
5996 	/* Initialize link properties that are user-changeable */
5997 	adapter->fc_autoneg = true;
5998 	hw->mac.autoneg = true;
5999 	hw->phy.autoneg_advertised = 0xaf;
6000 
6001 	hw->fc.requested_mode = igc_fc_default;
6002 	hw->fc.current_mode = igc_fc_default;
6003 
6004 	/* By default, support wake on port A */
6005 	adapter->flags |= IGC_FLAG_WOL_SUPPORTED;
6006 
6007 	/* initialize the wol settings based on the eeprom settings */
6008 	if (adapter->flags & IGC_FLAG_WOL_SUPPORTED)
6009 		adapter->wol |= IGC_WUFC_MAG;
6010 
6011 	device_set_wakeup_enable(&adapter->pdev->dev,
6012 				 adapter->flags & IGC_FLAG_WOL_SUPPORTED);
6013 
6014 	igc_ptp_init(adapter);
6015 
6016 	/* reset the hardware with the new settings */
6017 	igc_reset(adapter);
6018 
6019 	/* let the f/w know that the h/w is now under the control of the
6020 	 * driver.
6021 	 */
6022 	igc_get_hw_control(adapter);
6023 
6024 	strncpy(netdev->name, "eth%d", IFNAMSIZ);
6025 	err = register_netdev(netdev);
6026 	if (err)
6027 		goto err_register;
6028 
6029 	 /* carrier off reporting is important to ethtool even BEFORE open */
6030 	netif_carrier_off(netdev);
6031 
6032 	/* Check if Media Autosense is enabled */
6033 	adapter->ei = *ei;
6034 
6035 	/* print pcie link status and MAC address */
6036 	pcie_print_link_status(pdev);
6037 	netdev_info(netdev, "MAC: %pM\n", netdev->dev_addr);
6038 
6039 	dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NO_DIRECT_COMPLETE);
6040 	/* Disable EEE for internal PHY devices */
6041 	hw->dev_spec._base.eee_enable = false;
6042 	adapter->flags &= ~IGC_FLAG_EEE;
6043 	igc_set_eee_i225(hw, false, false, false);
6044 
6045 	pm_runtime_put_noidle(&pdev->dev);
6046 
6047 	return 0;
6048 
6049 err_register:
6050 	igc_release_hw_control(adapter);
6051 err_eeprom:
6052 	if (!igc_check_reset_block(hw))
6053 		igc_reset_phy(hw);
6054 err_sw_init:
6055 	igc_clear_interrupt_scheme(adapter);
6056 	iounmap(adapter->io_addr);
6057 err_ioremap:
6058 	free_netdev(netdev);
6059 err_alloc_etherdev:
6060 	pci_release_mem_regions(pdev);
6061 err_pci_reg:
6062 err_dma:
6063 	pci_disable_device(pdev);
6064 	return err;
6065 }
6066 
6067 /**
6068  * igc_remove - Device Removal Routine
6069  * @pdev: PCI device information struct
6070  *
6071  * igc_remove is called by the PCI subsystem to alert the driver
6072  * that it should release a PCI device.  This could be caused by a
6073  * Hot-Plug event, or because the driver is going to be removed from
6074  * memory.
6075  */
6076 static void igc_remove(struct pci_dev *pdev)
6077 {
6078 	struct net_device *netdev = pci_get_drvdata(pdev);
6079 	struct igc_adapter *adapter = netdev_priv(netdev);
6080 
6081 	pm_runtime_get_noresume(&pdev->dev);
6082 
6083 	igc_flush_nfc_rules(adapter);
6084 
6085 	igc_ptp_stop(adapter);
6086 
6087 	set_bit(__IGC_DOWN, &adapter->state);
6088 
6089 	del_timer_sync(&adapter->watchdog_timer);
6090 	del_timer_sync(&adapter->phy_info_timer);
6091 
6092 	cancel_work_sync(&adapter->reset_task);
6093 	cancel_work_sync(&adapter->watchdog_task);
6094 
6095 	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
6096 	 * would have already happened in close and is redundant.
6097 	 */
6098 	igc_release_hw_control(adapter);
6099 	unregister_netdev(netdev);
6100 
6101 	igc_clear_interrupt_scheme(adapter);
6102 	pci_iounmap(pdev, adapter->io_addr);
6103 	pci_release_mem_regions(pdev);
6104 
6105 	free_netdev(netdev);
6106 
6107 	pci_disable_pcie_error_reporting(pdev);
6108 
6109 	pci_disable_device(pdev);
6110 }
6111 
6112 static int __igc_shutdown(struct pci_dev *pdev, bool *enable_wake,
6113 			  bool runtime)
6114 {
6115 	struct net_device *netdev = pci_get_drvdata(pdev);
6116 	struct igc_adapter *adapter = netdev_priv(netdev);
6117 	u32 wufc = runtime ? IGC_WUFC_LNKC : adapter->wol;
6118 	struct igc_hw *hw = &adapter->hw;
6119 	u32 ctrl, rctl, status;
6120 	bool wake;
6121 
6122 	rtnl_lock();
6123 	netif_device_detach(netdev);
6124 
6125 	if (netif_running(netdev))
6126 		__igc_close(netdev, true);
6127 
6128 	igc_ptp_suspend(adapter);
6129 
6130 	igc_clear_interrupt_scheme(adapter);
6131 	rtnl_unlock();
6132 
6133 	status = rd32(IGC_STATUS);
6134 	if (status & IGC_STATUS_LU)
6135 		wufc &= ~IGC_WUFC_LNKC;
6136 
6137 	if (wufc) {
6138 		igc_setup_rctl(adapter);
6139 		igc_set_rx_mode(netdev);
6140 
6141 		/* turn on all-multi mode if wake on multicast is enabled */
6142 		if (wufc & IGC_WUFC_MC) {
6143 			rctl = rd32(IGC_RCTL);
6144 			rctl |= IGC_RCTL_MPE;
6145 			wr32(IGC_RCTL, rctl);
6146 		}
6147 
6148 		ctrl = rd32(IGC_CTRL);
6149 		ctrl |= IGC_CTRL_ADVD3WUC;
6150 		wr32(IGC_CTRL, ctrl);
6151 
6152 		/* Allow time for pending master requests to run */
6153 		igc_disable_pcie_master(hw);
6154 
6155 		wr32(IGC_WUC, IGC_WUC_PME_EN);
6156 		wr32(IGC_WUFC, wufc);
6157 	} else {
6158 		wr32(IGC_WUC, 0);
6159 		wr32(IGC_WUFC, 0);
6160 	}
6161 
6162 	wake = wufc || adapter->en_mng_pt;
6163 	if (!wake)
6164 		igc_power_down_phy_copper_base(&adapter->hw);
6165 	else
6166 		igc_power_up_link(adapter);
6167 
6168 	if (enable_wake)
6169 		*enable_wake = wake;
6170 
6171 	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
6172 	 * would have already happened in close and is redundant.
6173 	 */
6174 	igc_release_hw_control(adapter);
6175 
6176 	pci_disable_device(pdev);
6177 
6178 	return 0;
6179 }
6180 
6181 #ifdef CONFIG_PM
6182 static int __maybe_unused igc_runtime_suspend(struct device *dev)
6183 {
6184 	return __igc_shutdown(to_pci_dev(dev), NULL, 1);
6185 }
6186 
6187 static void igc_deliver_wake_packet(struct net_device *netdev)
6188 {
6189 	struct igc_adapter *adapter = netdev_priv(netdev);
6190 	struct igc_hw *hw = &adapter->hw;
6191 	struct sk_buff *skb;
6192 	u32 wupl;
6193 
6194 	wupl = rd32(IGC_WUPL) & IGC_WUPL_MASK;
6195 
6196 	/* WUPM stores only the first 128 bytes of the wake packet.
6197 	 * Read the packet only if we have the whole thing.
6198 	 */
6199 	if (wupl == 0 || wupl > IGC_WUPM_BYTES)
6200 		return;
6201 
6202 	skb = netdev_alloc_skb_ip_align(netdev, IGC_WUPM_BYTES);
6203 	if (!skb)
6204 		return;
6205 
6206 	skb_put(skb, wupl);
6207 
6208 	/* Ensure reads are 32-bit aligned */
6209 	wupl = roundup(wupl, 4);
6210 
6211 	memcpy_fromio(skb->data, hw->hw_addr + IGC_WUPM_REG(0), wupl);
6212 
6213 	skb->protocol = eth_type_trans(skb, netdev);
6214 	netif_rx(skb);
6215 }
6216 
6217 static int __maybe_unused igc_resume(struct device *dev)
6218 {
6219 	struct pci_dev *pdev = to_pci_dev(dev);
6220 	struct net_device *netdev = pci_get_drvdata(pdev);
6221 	struct igc_adapter *adapter = netdev_priv(netdev);
6222 	struct igc_hw *hw = &adapter->hw;
6223 	u32 err, val;
6224 
6225 	pci_set_power_state(pdev, PCI_D0);
6226 	pci_restore_state(pdev);
6227 	pci_save_state(pdev);
6228 
6229 	if (!pci_device_is_present(pdev))
6230 		return -ENODEV;
6231 	err = pci_enable_device_mem(pdev);
6232 	if (err) {
6233 		netdev_err(netdev, "Cannot enable PCI device from suspend\n");
6234 		return err;
6235 	}
6236 	pci_set_master(pdev);
6237 
6238 	pci_enable_wake(pdev, PCI_D3hot, 0);
6239 	pci_enable_wake(pdev, PCI_D3cold, 0);
6240 
6241 	if (igc_init_interrupt_scheme(adapter, true)) {
6242 		netdev_err(netdev, "Unable to allocate memory for queues\n");
6243 		return -ENOMEM;
6244 	}
6245 
6246 	igc_reset(adapter);
6247 
6248 	/* let the f/w know that the h/w is now under the control of the
6249 	 * driver.
6250 	 */
6251 	igc_get_hw_control(adapter);
6252 
6253 	val = rd32(IGC_WUS);
6254 	if (val & WAKE_PKT_WUS)
6255 		igc_deliver_wake_packet(netdev);
6256 
6257 	wr32(IGC_WUS, ~0);
6258 
6259 	rtnl_lock();
6260 	if (!err && netif_running(netdev))
6261 		err = __igc_open(netdev, true);
6262 
6263 	if (!err)
6264 		netif_device_attach(netdev);
6265 	rtnl_unlock();
6266 
6267 	return err;
6268 }
6269 
6270 static int __maybe_unused igc_runtime_resume(struct device *dev)
6271 {
6272 	return igc_resume(dev);
6273 }
6274 
6275 static int __maybe_unused igc_suspend(struct device *dev)
6276 {
6277 	return __igc_shutdown(to_pci_dev(dev), NULL, 0);
6278 }
6279 
6280 static int __maybe_unused igc_runtime_idle(struct device *dev)
6281 {
6282 	struct net_device *netdev = dev_get_drvdata(dev);
6283 	struct igc_adapter *adapter = netdev_priv(netdev);
6284 
6285 	if (!igc_has_link(adapter))
6286 		pm_schedule_suspend(dev, MSEC_PER_SEC * 5);
6287 
6288 	return -EBUSY;
6289 }
6290 #endif /* CONFIG_PM */
6291 
6292 static void igc_shutdown(struct pci_dev *pdev)
6293 {
6294 	bool wake;
6295 
6296 	__igc_shutdown(pdev, &wake, 0);
6297 
6298 	if (system_state == SYSTEM_POWER_OFF) {
6299 		pci_wake_from_d3(pdev, wake);
6300 		pci_set_power_state(pdev, PCI_D3hot);
6301 	}
6302 }
6303 
6304 /**
6305  *  igc_io_error_detected - called when PCI error is detected
6306  *  @pdev: Pointer to PCI device
6307  *  @state: The current PCI connection state
6308  *
6309  *  This function is called after a PCI bus error affecting
6310  *  this device has been detected.
6311  **/
6312 static pci_ers_result_t igc_io_error_detected(struct pci_dev *pdev,
6313 					      pci_channel_state_t state)
6314 {
6315 	struct net_device *netdev = pci_get_drvdata(pdev);
6316 	struct igc_adapter *adapter = netdev_priv(netdev);
6317 
6318 	netif_device_detach(netdev);
6319 
6320 	if (state == pci_channel_io_perm_failure)
6321 		return PCI_ERS_RESULT_DISCONNECT;
6322 
6323 	if (netif_running(netdev))
6324 		igc_down(adapter);
6325 	pci_disable_device(pdev);
6326 
6327 	/* Request a slot reset. */
6328 	return PCI_ERS_RESULT_NEED_RESET;
6329 }
6330 
6331 /**
6332  *  igc_io_slot_reset - called after the PCI bus has been reset.
6333  *  @pdev: Pointer to PCI device
6334  *
6335  *  Restart the card from scratch, as if from a cold-boot. Implementation
6336  *  resembles the first-half of the igc_resume routine.
6337  **/
6338 static pci_ers_result_t igc_io_slot_reset(struct pci_dev *pdev)
6339 {
6340 	struct net_device *netdev = pci_get_drvdata(pdev);
6341 	struct igc_adapter *adapter = netdev_priv(netdev);
6342 	struct igc_hw *hw = &adapter->hw;
6343 	pci_ers_result_t result;
6344 
6345 	if (pci_enable_device_mem(pdev)) {
6346 		netdev_err(netdev, "Could not re-enable PCI device after reset\n");
6347 		result = PCI_ERS_RESULT_DISCONNECT;
6348 	} else {
6349 		pci_set_master(pdev);
6350 		pci_restore_state(pdev);
6351 		pci_save_state(pdev);
6352 
6353 		pci_enable_wake(pdev, PCI_D3hot, 0);
6354 		pci_enable_wake(pdev, PCI_D3cold, 0);
6355 
6356 		/* In case of PCI error, adapter loses its HW address
6357 		 * so we should re-assign it here.
6358 		 */
6359 		hw->hw_addr = adapter->io_addr;
6360 
6361 		igc_reset(adapter);
6362 		wr32(IGC_WUS, ~0);
6363 		result = PCI_ERS_RESULT_RECOVERED;
6364 	}
6365 
6366 	return result;
6367 }
6368 
6369 /**
6370  *  igc_io_resume - called when traffic can start to flow again.
6371  *  @pdev: Pointer to PCI device
6372  *
6373  *  This callback is called when the error recovery driver tells us that
6374  *  its OK to resume normal operation. Implementation resembles the
6375  *  second-half of the igc_resume routine.
6376  */
6377 static void igc_io_resume(struct pci_dev *pdev)
6378 {
6379 	struct net_device *netdev = pci_get_drvdata(pdev);
6380 	struct igc_adapter *adapter = netdev_priv(netdev);
6381 
6382 	rtnl_lock();
6383 	if (netif_running(netdev)) {
6384 		if (igc_open(netdev)) {
6385 			netdev_err(netdev, "igc_open failed after reset\n");
6386 			return;
6387 		}
6388 	}
6389 
6390 	netif_device_attach(netdev);
6391 
6392 	/* let the f/w know that the h/w is now under the control of the
6393 	 * driver.
6394 	 */
6395 	igc_get_hw_control(adapter);
6396 	rtnl_unlock();
6397 }
6398 
6399 static const struct pci_error_handlers igc_err_handler = {
6400 	.error_detected = igc_io_error_detected,
6401 	.slot_reset = igc_io_slot_reset,
6402 	.resume = igc_io_resume,
6403 };
6404 
6405 #ifdef CONFIG_PM
6406 static const struct dev_pm_ops igc_pm_ops = {
6407 	SET_SYSTEM_SLEEP_PM_OPS(igc_suspend, igc_resume)
6408 	SET_RUNTIME_PM_OPS(igc_runtime_suspend, igc_runtime_resume,
6409 			   igc_runtime_idle)
6410 };
6411 #endif
6412 
6413 static struct pci_driver igc_driver = {
6414 	.name     = igc_driver_name,
6415 	.id_table = igc_pci_tbl,
6416 	.probe    = igc_probe,
6417 	.remove   = igc_remove,
6418 #ifdef CONFIG_PM
6419 	.driver.pm = &igc_pm_ops,
6420 #endif
6421 	.shutdown = igc_shutdown,
6422 	.err_handler = &igc_err_handler,
6423 };
6424 
6425 /**
6426  * igc_reinit_queues - return error
6427  * @adapter: pointer to adapter structure
6428  */
6429 int igc_reinit_queues(struct igc_adapter *adapter)
6430 {
6431 	struct net_device *netdev = adapter->netdev;
6432 	int err = 0;
6433 
6434 	if (netif_running(netdev))
6435 		igc_close(netdev);
6436 
6437 	igc_reset_interrupt_capability(adapter);
6438 
6439 	if (igc_init_interrupt_scheme(adapter, true)) {
6440 		netdev_err(netdev, "Unable to allocate memory for queues\n");
6441 		return -ENOMEM;
6442 	}
6443 
6444 	if (netif_running(netdev))
6445 		err = igc_open(netdev);
6446 
6447 	return err;
6448 }
6449 
6450 /**
6451  * igc_get_hw_dev - return device
6452  * @hw: pointer to hardware structure
6453  *
6454  * used by hardware layer to print debugging information
6455  */
6456 struct net_device *igc_get_hw_dev(struct igc_hw *hw)
6457 {
6458 	struct igc_adapter *adapter = hw->back;
6459 
6460 	return adapter->netdev;
6461 }
6462 
6463 static void igc_disable_rx_ring_hw(struct igc_ring *ring)
6464 {
6465 	struct igc_hw *hw = &ring->q_vector->adapter->hw;
6466 	u8 idx = ring->reg_idx;
6467 	u32 rxdctl;
6468 
6469 	rxdctl = rd32(IGC_RXDCTL(idx));
6470 	rxdctl &= ~IGC_RXDCTL_QUEUE_ENABLE;
6471 	rxdctl |= IGC_RXDCTL_SWFLUSH;
6472 	wr32(IGC_RXDCTL(idx), rxdctl);
6473 }
6474 
6475 void igc_disable_rx_ring(struct igc_ring *ring)
6476 {
6477 	igc_disable_rx_ring_hw(ring);
6478 	igc_clean_rx_ring(ring);
6479 }
6480 
6481 void igc_enable_rx_ring(struct igc_ring *ring)
6482 {
6483 	struct igc_adapter *adapter = ring->q_vector->adapter;
6484 
6485 	igc_configure_rx_ring(adapter, ring);
6486 
6487 	if (ring->xsk_pool)
6488 		igc_alloc_rx_buffers_zc(ring, igc_desc_unused(ring));
6489 	else
6490 		igc_alloc_rx_buffers(ring, igc_desc_unused(ring));
6491 }
6492 
6493 static void igc_disable_tx_ring_hw(struct igc_ring *ring)
6494 {
6495 	struct igc_hw *hw = &ring->q_vector->adapter->hw;
6496 	u8 idx = ring->reg_idx;
6497 	u32 txdctl;
6498 
6499 	txdctl = rd32(IGC_TXDCTL(idx));
6500 	txdctl &= ~IGC_TXDCTL_QUEUE_ENABLE;
6501 	txdctl |= IGC_TXDCTL_SWFLUSH;
6502 	wr32(IGC_TXDCTL(idx), txdctl);
6503 }
6504 
6505 void igc_disable_tx_ring(struct igc_ring *ring)
6506 {
6507 	igc_disable_tx_ring_hw(ring);
6508 	igc_clean_tx_ring(ring);
6509 }
6510 
6511 void igc_enable_tx_ring(struct igc_ring *ring)
6512 {
6513 	struct igc_adapter *adapter = ring->q_vector->adapter;
6514 
6515 	igc_configure_tx_ring(adapter, ring);
6516 }
6517 
6518 /**
6519  * igc_init_module - Driver Registration Routine
6520  *
6521  * igc_init_module is the first routine called when the driver is
6522  * loaded. All it does is register with the PCI subsystem.
6523  */
6524 static int __init igc_init_module(void)
6525 {
6526 	int ret;
6527 
6528 	pr_info("%s\n", igc_driver_string);
6529 	pr_info("%s\n", igc_copyright);
6530 
6531 	ret = pci_register_driver(&igc_driver);
6532 	return ret;
6533 }
6534 
6535 module_init(igc_init_module);
6536 
6537 /**
6538  * igc_exit_module - Driver Exit Cleanup Routine
6539  *
6540  * igc_exit_module is called just before the driver is removed
6541  * from memory.
6542  */
6543 static void __exit igc_exit_module(void)
6544 {
6545 	pci_unregister_driver(&igc_driver);
6546 }
6547 
6548 module_exit(igc_exit_module);
6549 /* igc_main.c */
6550