1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2018 Intel Corporation */ 3 4 #include <linux/module.h> 5 #include <linux/types.h> 6 #include <linux/if_vlan.h> 7 #include <linux/aer.h> 8 #include <linux/tcp.h> 9 #include <linux/udp.h> 10 #include <linux/ip.h> 11 #include <linux/pm_runtime.h> 12 #include <net/pkt_sched.h> 13 #include <linux/bpf_trace.h> 14 #include <net/xdp_sock_drv.h> 15 #include <linux/pci.h> 16 17 #include <net/ipv6.h> 18 19 #include "igc.h" 20 #include "igc_hw.h" 21 #include "igc_tsn.h" 22 #include "igc_xdp.h" 23 24 #define DRV_SUMMARY "Intel(R) 2.5G Ethernet Linux Driver" 25 26 #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK) 27 28 #define IGC_XDP_PASS 0 29 #define IGC_XDP_CONSUMED BIT(0) 30 #define IGC_XDP_TX BIT(1) 31 #define IGC_XDP_REDIRECT BIT(2) 32 33 static int debug = -1; 34 35 MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>"); 36 MODULE_DESCRIPTION(DRV_SUMMARY); 37 MODULE_LICENSE("GPL v2"); 38 module_param(debug, int, 0); 39 MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)"); 40 41 char igc_driver_name[] = "igc"; 42 static const char igc_driver_string[] = DRV_SUMMARY; 43 static const char igc_copyright[] = 44 "Copyright(c) 2018 Intel Corporation."; 45 46 static const struct igc_info *igc_info_tbl[] = { 47 [board_base] = &igc_base_info, 48 }; 49 50 static const struct pci_device_id igc_pci_tbl[] = { 51 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_LM), board_base }, 52 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_V), board_base }, 53 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_I), board_base }, 54 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I220_V), board_base }, 55 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_K), board_base }, 56 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_K2), board_base }, 57 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_K), board_base }, 58 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_LMVP), board_base }, 59 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_LMVP), board_base }, 60 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_IT), board_base }, 61 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_LM), board_base }, 62 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_V), board_base }, 63 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_IT), board_base }, 64 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I221_V), board_base }, 65 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_BLANK_NVM), board_base }, 66 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_BLANK_NVM), board_base }, 67 /* required last entry */ 68 {0, } 69 }; 70 71 MODULE_DEVICE_TABLE(pci, igc_pci_tbl); 72 73 enum latency_range { 74 lowest_latency = 0, 75 low_latency = 1, 76 bulk_latency = 2, 77 latency_invalid = 255 78 }; 79 80 void igc_reset(struct igc_adapter *adapter) 81 { 82 struct net_device *dev = adapter->netdev; 83 struct igc_hw *hw = &adapter->hw; 84 struct igc_fc_info *fc = &hw->fc; 85 u32 pba, hwm; 86 87 /* Repartition PBA for greater than 9k MTU if required */ 88 pba = IGC_PBA_34K; 89 90 /* flow control settings 91 * The high water mark must be low enough to fit one full frame 92 * after transmitting the pause frame. As such we must have enough 93 * space to allow for us to complete our current transmit and then 94 * receive the frame that is in progress from the link partner. 95 * Set it to: 96 * - the full Rx FIFO size minus one full Tx plus one full Rx frame 97 */ 98 hwm = (pba << 10) - (adapter->max_frame_size + MAX_JUMBO_FRAME_SIZE); 99 100 fc->high_water = hwm & 0xFFFFFFF0; /* 16-byte granularity */ 101 fc->low_water = fc->high_water - 16; 102 fc->pause_time = 0xFFFF; 103 fc->send_xon = 1; 104 fc->current_mode = fc->requested_mode; 105 106 hw->mac.ops.reset_hw(hw); 107 108 if (hw->mac.ops.init_hw(hw)) 109 netdev_err(dev, "Error on hardware initialization\n"); 110 111 /* Re-establish EEE setting */ 112 igc_set_eee_i225(hw, true, true, true); 113 114 if (!netif_running(adapter->netdev)) 115 igc_power_down_phy_copper_base(&adapter->hw); 116 117 /* Enable HW to recognize an 802.1Q VLAN Ethernet packet */ 118 wr32(IGC_VET, ETH_P_8021Q); 119 120 /* Re-enable PTP, where applicable. */ 121 igc_ptp_reset(adapter); 122 123 /* Re-enable TSN offloading, where applicable. */ 124 igc_tsn_reset(adapter); 125 126 igc_get_phy_info(hw); 127 } 128 129 /** 130 * igc_power_up_link - Power up the phy link 131 * @adapter: address of board private structure 132 */ 133 static void igc_power_up_link(struct igc_adapter *adapter) 134 { 135 igc_reset_phy(&adapter->hw); 136 137 igc_power_up_phy_copper(&adapter->hw); 138 139 igc_setup_link(&adapter->hw); 140 } 141 142 /** 143 * igc_release_hw_control - release control of the h/w to f/w 144 * @adapter: address of board private structure 145 * 146 * igc_release_hw_control resets CTRL_EXT:DRV_LOAD bit. 147 * For ASF and Pass Through versions of f/w this means that the 148 * driver is no longer loaded. 149 */ 150 static void igc_release_hw_control(struct igc_adapter *adapter) 151 { 152 struct igc_hw *hw = &adapter->hw; 153 u32 ctrl_ext; 154 155 if (!pci_device_is_present(adapter->pdev)) 156 return; 157 158 /* Let firmware take over control of h/w */ 159 ctrl_ext = rd32(IGC_CTRL_EXT); 160 wr32(IGC_CTRL_EXT, 161 ctrl_ext & ~IGC_CTRL_EXT_DRV_LOAD); 162 } 163 164 /** 165 * igc_get_hw_control - get control of the h/w from f/w 166 * @adapter: address of board private structure 167 * 168 * igc_get_hw_control sets CTRL_EXT:DRV_LOAD bit. 169 * For ASF and Pass Through versions of f/w this means that 170 * the driver is loaded. 171 */ 172 static void igc_get_hw_control(struct igc_adapter *adapter) 173 { 174 struct igc_hw *hw = &adapter->hw; 175 u32 ctrl_ext; 176 177 /* Let firmware know the driver has taken over */ 178 ctrl_ext = rd32(IGC_CTRL_EXT); 179 wr32(IGC_CTRL_EXT, 180 ctrl_ext | IGC_CTRL_EXT_DRV_LOAD); 181 } 182 183 static void igc_unmap_tx_buffer(struct device *dev, struct igc_tx_buffer *buf) 184 { 185 dma_unmap_single(dev, dma_unmap_addr(buf, dma), 186 dma_unmap_len(buf, len), DMA_TO_DEVICE); 187 188 dma_unmap_len_set(buf, len, 0); 189 } 190 191 /** 192 * igc_clean_tx_ring - Free Tx Buffers 193 * @tx_ring: ring to be cleaned 194 */ 195 static void igc_clean_tx_ring(struct igc_ring *tx_ring) 196 { 197 u16 i = tx_ring->next_to_clean; 198 struct igc_tx_buffer *tx_buffer = &tx_ring->tx_buffer_info[i]; 199 u32 xsk_frames = 0; 200 201 while (i != tx_ring->next_to_use) { 202 union igc_adv_tx_desc *eop_desc, *tx_desc; 203 204 switch (tx_buffer->type) { 205 case IGC_TX_BUFFER_TYPE_XSK: 206 xsk_frames++; 207 break; 208 case IGC_TX_BUFFER_TYPE_XDP: 209 xdp_return_frame(tx_buffer->xdpf); 210 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 211 break; 212 case IGC_TX_BUFFER_TYPE_SKB: 213 dev_kfree_skb_any(tx_buffer->skb); 214 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 215 break; 216 default: 217 netdev_warn_once(tx_ring->netdev, "Unknown Tx buffer type\n"); 218 break; 219 } 220 221 /* check for eop_desc to determine the end of the packet */ 222 eop_desc = tx_buffer->next_to_watch; 223 tx_desc = IGC_TX_DESC(tx_ring, i); 224 225 /* unmap remaining buffers */ 226 while (tx_desc != eop_desc) { 227 tx_buffer++; 228 tx_desc++; 229 i++; 230 if (unlikely(i == tx_ring->count)) { 231 i = 0; 232 tx_buffer = tx_ring->tx_buffer_info; 233 tx_desc = IGC_TX_DESC(tx_ring, 0); 234 } 235 236 /* unmap any remaining paged data */ 237 if (dma_unmap_len(tx_buffer, len)) 238 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 239 } 240 241 tx_buffer->next_to_watch = NULL; 242 243 /* move us one more past the eop_desc for start of next pkt */ 244 tx_buffer++; 245 i++; 246 if (unlikely(i == tx_ring->count)) { 247 i = 0; 248 tx_buffer = tx_ring->tx_buffer_info; 249 } 250 } 251 252 if (tx_ring->xsk_pool && xsk_frames) 253 xsk_tx_completed(tx_ring->xsk_pool, xsk_frames); 254 255 /* reset BQL for queue */ 256 netdev_tx_reset_queue(txring_txq(tx_ring)); 257 258 /* reset next_to_use and next_to_clean */ 259 tx_ring->next_to_use = 0; 260 tx_ring->next_to_clean = 0; 261 } 262 263 /** 264 * igc_free_tx_resources - Free Tx Resources per Queue 265 * @tx_ring: Tx descriptor ring for a specific queue 266 * 267 * Free all transmit software resources 268 */ 269 void igc_free_tx_resources(struct igc_ring *tx_ring) 270 { 271 igc_clean_tx_ring(tx_ring); 272 273 vfree(tx_ring->tx_buffer_info); 274 tx_ring->tx_buffer_info = NULL; 275 276 /* if not set, then don't free */ 277 if (!tx_ring->desc) 278 return; 279 280 dma_free_coherent(tx_ring->dev, tx_ring->size, 281 tx_ring->desc, tx_ring->dma); 282 283 tx_ring->desc = NULL; 284 } 285 286 /** 287 * igc_free_all_tx_resources - Free Tx Resources for All Queues 288 * @adapter: board private structure 289 * 290 * Free all transmit software resources 291 */ 292 static void igc_free_all_tx_resources(struct igc_adapter *adapter) 293 { 294 int i; 295 296 for (i = 0; i < adapter->num_tx_queues; i++) 297 igc_free_tx_resources(adapter->tx_ring[i]); 298 } 299 300 /** 301 * igc_clean_all_tx_rings - Free Tx Buffers for all queues 302 * @adapter: board private structure 303 */ 304 static void igc_clean_all_tx_rings(struct igc_adapter *adapter) 305 { 306 int i; 307 308 for (i = 0; i < adapter->num_tx_queues; i++) 309 if (adapter->tx_ring[i]) 310 igc_clean_tx_ring(adapter->tx_ring[i]); 311 } 312 313 /** 314 * igc_setup_tx_resources - allocate Tx resources (Descriptors) 315 * @tx_ring: tx descriptor ring (for a specific queue) to setup 316 * 317 * Return 0 on success, negative on failure 318 */ 319 int igc_setup_tx_resources(struct igc_ring *tx_ring) 320 { 321 struct net_device *ndev = tx_ring->netdev; 322 struct device *dev = tx_ring->dev; 323 int size = 0; 324 325 size = sizeof(struct igc_tx_buffer) * tx_ring->count; 326 tx_ring->tx_buffer_info = vzalloc(size); 327 if (!tx_ring->tx_buffer_info) 328 goto err; 329 330 /* round up to nearest 4K */ 331 tx_ring->size = tx_ring->count * sizeof(union igc_adv_tx_desc); 332 tx_ring->size = ALIGN(tx_ring->size, 4096); 333 334 tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size, 335 &tx_ring->dma, GFP_KERNEL); 336 337 if (!tx_ring->desc) 338 goto err; 339 340 tx_ring->next_to_use = 0; 341 tx_ring->next_to_clean = 0; 342 343 return 0; 344 345 err: 346 vfree(tx_ring->tx_buffer_info); 347 netdev_err(ndev, "Unable to allocate memory for Tx descriptor ring\n"); 348 return -ENOMEM; 349 } 350 351 /** 352 * igc_setup_all_tx_resources - wrapper to allocate Tx resources for all queues 353 * @adapter: board private structure 354 * 355 * Return 0 on success, negative on failure 356 */ 357 static int igc_setup_all_tx_resources(struct igc_adapter *adapter) 358 { 359 struct net_device *dev = adapter->netdev; 360 int i, err = 0; 361 362 for (i = 0; i < adapter->num_tx_queues; i++) { 363 err = igc_setup_tx_resources(adapter->tx_ring[i]); 364 if (err) { 365 netdev_err(dev, "Error on Tx queue %u setup\n", i); 366 for (i--; i >= 0; i--) 367 igc_free_tx_resources(adapter->tx_ring[i]); 368 break; 369 } 370 } 371 372 return err; 373 } 374 375 static void igc_clean_rx_ring_page_shared(struct igc_ring *rx_ring) 376 { 377 u16 i = rx_ring->next_to_clean; 378 379 dev_kfree_skb(rx_ring->skb); 380 rx_ring->skb = NULL; 381 382 /* Free all the Rx ring sk_buffs */ 383 while (i != rx_ring->next_to_alloc) { 384 struct igc_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i]; 385 386 /* Invalidate cache lines that may have been written to by 387 * device so that we avoid corrupting memory. 388 */ 389 dma_sync_single_range_for_cpu(rx_ring->dev, 390 buffer_info->dma, 391 buffer_info->page_offset, 392 igc_rx_bufsz(rx_ring), 393 DMA_FROM_DEVICE); 394 395 /* free resources associated with mapping */ 396 dma_unmap_page_attrs(rx_ring->dev, 397 buffer_info->dma, 398 igc_rx_pg_size(rx_ring), 399 DMA_FROM_DEVICE, 400 IGC_RX_DMA_ATTR); 401 __page_frag_cache_drain(buffer_info->page, 402 buffer_info->pagecnt_bias); 403 404 i++; 405 if (i == rx_ring->count) 406 i = 0; 407 } 408 } 409 410 static void igc_clean_rx_ring_xsk_pool(struct igc_ring *ring) 411 { 412 struct igc_rx_buffer *bi; 413 u16 i; 414 415 for (i = 0; i < ring->count; i++) { 416 bi = &ring->rx_buffer_info[i]; 417 if (!bi->xdp) 418 continue; 419 420 xsk_buff_free(bi->xdp); 421 bi->xdp = NULL; 422 } 423 } 424 425 /** 426 * igc_clean_rx_ring - Free Rx Buffers per Queue 427 * @ring: ring to free buffers from 428 */ 429 static void igc_clean_rx_ring(struct igc_ring *ring) 430 { 431 if (ring->xsk_pool) 432 igc_clean_rx_ring_xsk_pool(ring); 433 else 434 igc_clean_rx_ring_page_shared(ring); 435 436 clear_ring_uses_large_buffer(ring); 437 438 ring->next_to_alloc = 0; 439 ring->next_to_clean = 0; 440 ring->next_to_use = 0; 441 } 442 443 /** 444 * igc_clean_all_rx_rings - Free Rx Buffers for all queues 445 * @adapter: board private structure 446 */ 447 static void igc_clean_all_rx_rings(struct igc_adapter *adapter) 448 { 449 int i; 450 451 for (i = 0; i < adapter->num_rx_queues; i++) 452 if (adapter->rx_ring[i]) 453 igc_clean_rx_ring(adapter->rx_ring[i]); 454 } 455 456 /** 457 * igc_free_rx_resources - Free Rx Resources 458 * @rx_ring: ring to clean the resources from 459 * 460 * Free all receive software resources 461 */ 462 void igc_free_rx_resources(struct igc_ring *rx_ring) 463 { 464 igc_clean_rx_ring(rx_ring); 465 466 xdp_rxq_info_unreg(&rx_ring->xdp_rxq); 467 468 vfree(rx_ring->rx_buffer_info); 469 rx_ring->rx_buffer_info = NULL; 470 471 /* if not set, then don't free */ 472 if (!rx_ring->desc) 473 return; 474 475 dma_free_coherent(rx_ring->dev, rx_ring->size, 476 rx_ring->desc, rx_ring->dma); 477 478 rx_ring->desc = NULL; 479 } 480 481 /** 482 * igc_free_all_rx_resources - Free Rx Resources for All Queues 483 * @adapter: board private structure 484 * 485 * Free all receive software resources 486 */ 487 static void igc_free_all_rx_resources(struct igc_adapter *adapter) 488 { 489 int i; 490 491 for (i = 0; i < adapter->num_rx_queues; i++) 492 igc_free_rx_resources(adapter->rx_ring[i]); 493 } 494 495 /** 496 * igc_setup_rx_resources - allocate Rx resources (Descriptors) 497 * @rx_ring: rx descriptor ring (for a specific queue) to setup 498 * 499 * Returns 0 on success, negative on failure 500 */ 501 int igc_setup_rx_resources(struct igc_ring *rx_ring) 502 { 503 struct net_device *ndev = rx_ring->netdev; 504 struct device *dev = rx_ring->dev; 505 u8 index = rx_ring->queue_index; 506 int size, desc_len, res; 507 508 res = xdp_rxq_info_reg(&rx_ring->xdp_rxq, ndev, index, 509 rx_ring->q_vector->napi.napi_id); 510 if (res < 0) { 511 netdev_err(ndev, "Failed to register xdp_rxq index %u\n", 512 index); 513 return res; 514 } 515 516 size = sizeof(struct igc_rx_buffer) * rx_ring->count; 517 rx_ring->rx_buffer_info = vzalloc(size); 518 if (!rx_ring->rx_buffer_info) 519 goto err; 520 521 desc_len = sizeof(union igc_adv_rx_desc); 522 523 /* Round up to nearest 4K */ 524 rx_ring->size = rx_ring->count * desc_len; 525 rx_ring->size = ALIGN(rx_ring->size, 4096); 526 527 rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size, 528 &rx_ring->dma, GFP_KERNEL); 529 530 if (!rx_ring->desc) 531 goto err; 532 533 rx_ring->next_to_alloc = 0; 534 rx_ring->next_to_clean = 0; 535 rx_ring->next_to_use = 0; 536 537 return 0; 538 539 err: 540 xdp_rxq_info_unreg(&rx_ring->xdp_rxq); 541 vfree(rx_ring->rx_buffer_info); 542 rx_ring->rx_buffer_info = NULL; 543 netdev_err(ndev, "Unable to allocate memory for Rx descriptor ring\n"); 544 return -ENOMEM; 545 } 546 547 /** 548 * igc_setup_all_rx_resources - wrapper to allocate Rx resources 549 * (Descriptors) for all queues 550 * @adapter: board private structure 551 * 552 * Return 0 on success, negative on failure 553 */ 554 static int igc_setup_all_rx_resources(struct igc_adapter *adapter) 555 { 556 struct net_device *dev = adapter->netdev; 557 int i, err = 0; 558 559 for (i = 0; i < adapter->num_rx_queues; i++) { 560 err = igc_setup_rx_resources(adapter->rx_ring[i]); 561 if (err) { 562 netdev_err(dev, "Error on Rx queue %u setup\n", i); 563 for (i--; i >= 0; i--) 564 igc_free_rx_resources(adapter->rx_ring[i]); 565 break; 566 } 567 } 568 569 return err; 570 } 571 572 static struct xsk_buff_pool *igc_get_xsk_pool(struct igc_adapter *adapter, 573 struct igc_ring *ring) 574 { 575 if (!igc_xdp_is_enabled(adapter) || 576 !test_bit(IGC_RING_FLAG_AF_XDP_ZC, &ring->flags)) 577 return NULL; 578 579 return xsk_get_pool_from_qid(ring->netdev, ring->queue_index); 580 } 581 582 /** 583 * igc_configure_rx_ring - Configure a receive ring after Reset 584 * @adapter: board private structure 585 * @ring: receive ring to be configured 586 * 587 * Configure the Rx unit of the MAC after a reset. 588 */ 589 static void igc_configure_rx_ring(struct igc_adapter *adapter, 590 struct igc_ring *ring) 591 { 592 struct igc_hw *hw = &adapter->hw; 593 union igc_adv_rx_desc *rx_desc; 594 int reg_idx = ring->reg_idx; 595 u32 srrctl = 0, rxdctl = 0; 596 u64 rdba = ring->dma; 597 u32 buf_size; 598 599 xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq); 600 ring->xsk_pool = igc_get_xsk_pool(adapter, ring); 601 if (ring->xsk_pool) { 602 WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, 603 MEM_TYPE_XSK_BUFF_POOL, 604 NULL)); 605 xsk_pool_set_rxq_info(ring->xsk_pool, &ring->xdp_rxq); 606 } else { 607 WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, 608 MEM_TYPE_PAGE_SHARED, 609 NULL)); 610 } 611 612 if (igc_xdp_is_enabled(adapter)) 613 set_ring_uses_large_buffer(ring); 614 615 /* disable the queue */ 616 wr32(IGC_RXDCTL(reg_idx), 0); 617 618 /* Set DMA base address registers */ 619 wr32(IGC_RDBAL(reg_idx), 620 rdba & 0x00000000ffffffffULL); 621 wr32(IGC_RDBAH(reg_idx), rdba >> 32); 622 wr32(IGC_RDLEN(reg_idx), 623 ring->count * sizeof(union igc_adv_rx_desc)); 624 625 /* initialize head and tail */ 626 ring->tail = adapter->io_addr + IGC_RDT(reg_idx); 627 wr32(IGC_RDH(reg_idx), 0); 628 writel(0, ring->tail); 629 630 /* reset next-to- use/clean to place SW in sync with hardware */ 631 ring->next_to_clean = 0; 632 ring->next_to_use = 0; 633 634 if (ring->xsk_pool) 635 buf_size = xsk_pool_get_rx_frame_size(ring->xsk_pool); 636 else if (ring_uses_large_buffer(ring)) 637 buf_size = IGC_RXBUFFER_3072; 638 else 639 buf_size = IGC_RXBUFFER_2048; 640 641 srrctl = IGC_RX_HDR_LEN << IGC_SRRCTL_BSIZEHDRSIZE_SHIFT; 642 srrctl |= buf_size >> IGC_SRRCTL_BSIZEPKT_SHIFT; 643 srrctl |= IGC_SRRCTL_DESCTYPE_ADV_ONEBUF; 644 645 wr32(IGC_SRRCTL(reg_idx), srrctl); 646 647 rxdctl |= IGC_RX_PTHRESH; 648 rxdctl |= IGC_RX_HTHRESH << 8; 649 rxdctl |= IGC_RX_WTHRESH << 16; 650 651 /* initialize rx_buffer_info */ 652 memset(ring->rx_buffer_info, 0, 653 sizeof(struct igc_rx_buffer) * ring->count); 654 655 /* initialize Rx descriptor 0 */ 656 rx_desc = IGC_RX_DESC(ring, 0); 657 rx_desc->wb.upper.length = 0; 658 659 /* enable receive descriptor fetching */ 660 rxdctl |= IGC_RXDCTL_QUEUE_ENABLE; 661 662 wr32(IGC_RXDCTL(reg_idx), rxdctl); 663 } 664 665 /** 666 * igc_configure_rx - Configure receive Unit after Reset 667 * @adapter: board private structure 668 * 669 * Configure the Rx unit of the MAC after a reset. 670 */ 671 static void igc_configure_rx(struct igc_adapter *adapter) 672 { 673 int i; 674 675 /* Setup the HW Rx Head and Tail Descriptor Pointers and 676 * the Base and Length of the Rx Descriptor Ring 677 */ 678 for (i = 0; i < adapter->num_rx_queues; i++) 679 igc_configure_rx_ring(adapter, adapter->rx_ring[i]); 680 } 681 682 /** 683 * igc_configure_tx_ring - Configure transmit ring after Reset 684 * @adapter: board private structure 685 * @ring: tx ring to configure 686 * 687 * Configure a transmit ring after a reset. 688 */ 689 static void igc_configure_tx_ring(struct igc_adapter *adapter, 690 struct igc_ring *ring) 691 { 692 struct igc_hw *hw = &adapter->hw; 693 int reg_idx = ring->reg_idx; 694 u64 tdba = ring->dma; 695 u32 txdctl = 0; 696 697 ring->xsk_pool = igc_get_xsk_pool(adapter, ring); 698 699 /* disable the queue */ 700 wr32(IGC_TXDCTL(reg_idx), 0); 701 wrfl(); 702 mdelay(10); 703 704 wr32(IGC_TDLEN(reg_idx), 705 ring->count * sizeof(union igc_adv_tx_desc)); 706 wr32(IGC_TDBAL(reg_idx), 707 tdba & 0x00000000ffffffffULL); 708 wr32(IGC_TDBAH(reg_idx), tdba >> 32); 709 710 ring->tail = adapter->io_addr + IGC_TDT(reg_idx); 711 wr32(IGC_TDH(reg_idx), 0); 712 writel(0, ring->tail); 713 714 txdctl |= IGC_TX_PTHRESH; 715 txdctl |= IGC_TX_HTHRESH << 8; 716 txdctl |= IGC_TX_WTHRESH << 16; 717 718 txdctl |= IGC_TXDCTL_QUEUE_ENABLE; 719 wr32(IGC_TXDCTL(reg_idx), txdctl); 720 } 721 722 /** 723 * igc_configure_tx - Configure transmit Unit after Reset 724 * @adapter: board private structure 725 * 726 * Configure the Tx unit of the MAC after a reset. 727 */ 728 static void igc_configure_tx(struct igc_adapter *adapter) 729 { 730 int i; 731 732 for (i = 0; i < adapter->num_tx_queues; i++) 733 igc_configure_tx_ring(adapter, adapter->tx_ring[i]); 734 } 735 736 /** 737 * igc_setup_mrqc - configure the multiple receive queue control registers 738 * @adapter: Board private structure 739 */ 740 static void igc_setup_mrqc(struct igc_adapter *adapter) 741 { 742 struct igc_hw *hw = &adapter->hw; 743 u32 j, num_rx_queues; 744 u32 mrqc, rxcsum; 745 u32 rss_key[10]; 746 747 netdev_rss_key_fill(rss_key, sizeof(rss_key)); 748 for (j = 0; j < 10; j++) 749 wr32(IGC_RSSRK(j), rss_key[j]); 750 751 num_rx_queues = adapter->rss_queues; 752 753 if (adapter->rss_indir_tbl_init != num_rx_queues) { 754 for (j = 0; j < IGC_RETA_SIZE; j++) 755 adapter->rss_indir_tbl[j] = 756 (j * num_rx_queues) / IGC_RETA_SIZE; 757 adapter->rss_indir_tbl_init = num_rx_queues; 758 } 759 igc_write_rss_indir_tbl(adapter); 760 761 /* Disable raw packet checksumming so that RSS hash is placed in 762 * descriptor on writeback. No need to enable TCP/UDP/IP checksum 763 * offloads as they are enabled by default 764 */ 765 rxcsum = rd32(IGC_RXCSUM); 766 rxcsum |= IGC_RXCSUM_PCSD; 767 768 /* Enable Receive Checksum Offload for SCTP */ 769 rxcsum |= IGC_RXCSUM_CRCOFL; 770 771 /* Don't need to set TUOFL or IPOFL, they default to 1 */ 772 wr32(IGC_RXCSUM, rxcsum); 773 774 /* Generate RSS hash based on packet types, TCP/UDP 775 * port numbers and/or IPv4/v6 src and dst addresses 776 */ 777 mrqc = IGC_MRQC_RSS_FIELD_IPV4 | 778 IGC_MRQC_RSS_FIELD_IPV4_TCP | 779 IGC_MRQC_RSS_FIELD_IPV6 | 780 IGC_MRQC_RSS_FIELD_IPV6_TCP | 781 IGC_MRQC_RSS_FIELD_IPV6_TCP_EX; 782 783 if (adapter->flags & IGC_FLAG_RSS_FIELD_IPV4_UDP) 784 mrqc |= IGC_MRQC_RSS_FIELD_IPV4_UDP; 785 if (adapter->flags & IGC_FLAG_RSS_FIELD_IPV6_UDP) 786 mrqc |= IGC_MRQC_RSS_FIELD_IPV6_UDP; 787 788 mrqc |= IGC_MRQC_ENABLE_RSS_MQ; 789 790 wr32(IGC_MRQC, mrqc); 791 } 792 793 /** 794 * igc_setup_rctl - configure the receive control registers 795 * @adapter: Board private structure 796 */ 797 static void igc_setup_rctl(struct igc_adapter *adapter) 798 { 799 struct igc_hw *hw = &adapter->hw; 800 u32 rctl; 801 802 rctl = rd32(IGC_RCTL); 803 804 rctl &= ~(3 << IGC_RCTL_MO_SHIFT); 805 rctl &= ~(IGC_RCTL_LBM_TCVR | IGC_RCTL_LBM_MAC); 806 807 rctl |= IGC_RCTL_EN | IGC_RCTL_BAM | IGC_RCTL_RDMTS_HALF | 808 (hw->mac.mc_filter_type << IGC_RCTL_MO_SHIFT); 809 810 /* enable stripping of CRC. Newer features require 811 * that the HW strips the CRC. 812 */ 813 rctl |= IGC_RCTL_SECRC; 814 815 /* disable store bad packets and clear size bits. */ 816 rctl &= ~(IGC_RCTL_SBP | IGC_RCTL_SZ_256); 817 818 /* enable LPE to allow for reception of jumbo frames */ 819 rctl |= IGC_RCTL_LPE; 820 821 /* disable queue 0 to prevent tail write w/o re-config */ 822 wr32(IGC_RXDCTL(0), 0); 823 824 /* This is useful for sniffing bad packets. */ 825 if (adapter->netdev->features & NETIF_F_RXALL) { 826 /* UPE and MPE will be handled by normal PROMISC logic 827 * in set_rx_mode 828 */ 829 rctl |= (IGC_RCTL_SBP | /* Receive bad packets */ 830 IGC_RCTL_BAM | /* RX All Bcast Pkts */ 831 IGC_RCTL_PMCF); /* RX All MAC Ctrl Pkts */ 832 833 rctl &= ~(IGC_RCTL_DPF | /* Allow filtered pause */ 834 IGC_RCTL_CFIEN); /* Disable VLAN CFIEN Filter */ 835 } 836 837 wr32(IGC_RCTL, rctl); 838 } 839 840 /** 841 * igc_setup_tctl - configure the transmit control registers 842 * @adapter: Board private structure 843 */ 844 static void igc_setup_tctl(struct igc_adapter *adapter) 845 { 846 struct igc_hw *hw = &adapter->hw; 847 u32 tctl; 848 849 /* disable queue 0 which icould be enabled by default */ 850 wr32(IGC_TXDCTL(0), 0); 851 852 /* Program the Transmit Control Register */ 853 tctl = rd32(IGC_TCTL); 854 tctl &= ~IGC_TCTL_CT; 855 tctl |= IGC_TCTL_PSP | IGC_TCTL_RTLC | 856 (IGC_COLLISION_THRESHOLD << IGC_CT_SHIFT); 857 858 /* Enable transmits */ 859 tctl |= IGC_TCTL_EN; 860 861 wr32(IGC_TCTL, tctl); 862 } 863 864 /** 865 * igc_set_mac_filter_hw() - Set MAC address filter in hardware 866 * @adapter: Pointer to adapter where the filter should be set 867 * @index: Filter index 868 * @type: MAC address filter type (source or destination) 869 * @addr: MAC address 870 * @queue: If non-negative, queue assignment feature is enabled and frames 871 * matching the filter are enqueued onto 'queue'. Otherwise, queue 872 * assignment is disabled. 873 */ 874 static void igc_set_mac_filter_hw(struct igc_adapter *adapter, int index, 875 enum igc_mac_filter_type type, 876 const u8 *addr, int queue) 877 { 878 struct net_device *dev = adapter->netdev; 879 struct igc_hw *hw = &adapter->hw; 880 u32 ral, rah; 881 882 if (WARN_ON(index >= hw->mac.rar_entry_count)) 883 return; 884 885 ral = le32_to_cpup((__le32 *)(addr)); 886 rah = le16_to_cpup((__le16 *)(addr + 4)); 887 888 if (type == IGC_MAC_FILTER_TYPE_SRC) { 889 rah &= ~IGC_RAH_ASEL_MASK; 890 rah |= IGC_RAH_ASEL_SRC_ADDR; 891 } 892 893 if (queue >= 0) { 894 rah &= ~IGC_RAH_QSEL_MASK; 895 rah |= (queue << IGC_RAH_QSEL_SHIFT); 896 rah |= IGC_RAH_QSEL_ENABLE; 897 } 898 899 rah |= IGC_RAH_AV; 900 901 wr32(IGC_RAL(index), ral); 902 wr32(IGC_RAH(index), rah); 903 904 netdev_dbg(dev, "MAC address filter set in HW: index %d", index); 905 } 906 907 /** 908 * igc_clear_mac_filter_hw() - Clear MAC address filter in hardware 909 * @adapter: Pointer to adapter where the filter should be cleared 910 * @index: Filter index 911 */ 912 static void igc_clear_mac_filter_hw(struct igc_adapter *adapter, int index) 913 { 914 struct net_device *dev = adapter->netdev; 915 struct igc_hw *hw = &adapter->hw; 916 917 if (WARN_ON(index >= hw->mac.rar_entry_count)) 918 return; 919 920 wr32(IGC_RAL(index), 0); 921 wr32(IGC_RAH(index), 0); 922 923 netdev_dbg(dev, "MAC address filter cleared in HW: index %d", index); 924 } 925 926 /* Set default MAC address for the PF in the first RAR entry */ 927 static void igc_set_default_mac_filter(struct igc_adapter *adapter) 928 { 929 struct net_device *dev = adapter->netdev; 930 u8 *addr = adapter->hw.mac.addr; 931 932 netdev_dbg(dev, "Set default MAC address filter: address %pM", addr); 933 934 igc_set_mac_filter_hw(adapter, 0, IGC_MAC_FILTER_TYPE_DST, addr, -1); 935 } 936 937 /** 938 * igc_set_mac - Change the Ethernet Address of the NIC 939 * @netdev: network interface device structure 940 * @p: pointer to an address structure 941 * 942 * Returns 0 on success, negative on failure 943 */ 944 static int igc_set_mac(struct net_device *netdev, void *p) 945 { 946 struct igc_adapter *adapter = netdev_priv(netdev); 947 struct igc_hw *hw = &adapter->hw; 948 struct sockaddr *addr = p; 949 950 if (!is_valid_ether_addr(addr->sa_data)) 951 return -EADDRNOTAVAIL; 952 953 eth_hw_addr_set(netdev, addr->sa_data); 954 memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len); 955 956 /* set the correct pool for the new PF MAC address in entry 0 */ 957 igc_set_default_mac_filter(adapter); 958 959 return 0; 960 } 961 962 /** 963 * igc_write_mc_addr_list - write multicast addresses to MTA 964 * @netdev: network interface device structure 965 * 966 * Writes multicast address list to the MTA hash table. 967 * Returns: -ENOMEM on failure 968 * 0 on no addresses written 969 * X on writing X addresses to MTA 970 **/ 971 static int igc_write_mc_addr_list(struct net_device *netdev) 972 { 973 struct igc_adapter *adapter = netdev_priv(netdev); 974 struct igc_hw *hw = &adapter->hw; 975 struct netdev_hw_addr *ha; 976 u8 *mta_list; 977 int i; 978 979 if (netdev_mc_empty(netdev)) { 980 /* nothing to program, so clear mc list */ 981 igc_update_mc_addr_list(hw, NULL, 0); 982 return 0; 983 } 984 985 mta_list = kcalloc(netdev_mc_count(netdev), 6, GFP_ATOMIC); 986 if (!mta_list) 987 return -ENOMEM; 988 989 /* The shared function expects a packed array of only addresses. */ 990 i = 0; 991 netdev_for_each_mc_addr(ha, netdev) 992 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN); 993 994 igc_update_mc_addr_list(hw, mta_list, i); 995 kfree(mta_list); 996 997 return netdev_mc_count(netdev); 998 } 999 1000 static __le32 igc_tx_launchtime(struct igc_adapter *adapter, ktime_t txtime) 1001 { 1002 ktime_t cycle_time = adapter->cycle_time; 1003 ktime_t base_time = adapter->base_time; 1004 u32 launchtime; 1005 1006 /* FIXME: when using ETF together with taprio, we may have a 1007 * case where 'delta' is larger than the cycle_time, this may 1008 * cause problems if we don't read the current value of 1009 * IGC_BASET, as the value writen into the launchtime 1010 * descriptor field may be misinterpreted. 1011 */ 1012 div_s64_rem(ktime_sub_ns(txtime, base_time), cycle_time, &launchtime); 1013 1014 return cpu_to_le32(launchtime); 1015 } 1016 1017 static void igc_tx_ctxtdesc(struct igc_ring *tx_ring, 1018 struct igc_tx_buffer *first, 1019 u32 vlan_macip_lens, u32 type_tucmd, 1020 u32 mss_l4len_idx) 1021 { 1022 struct igc_adv_tx_context_desc *context_desc; 1023 u16 i = tx_ring->next_to_use; 1024 1025 context_desc = IGC_TX_CTXTDESC(tx_ring, i); 1026 1027 i++; 1028 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; 1029 1030 /* set bits to identify this as an advanced context descriptor */ 1031 type_tucmd |= IGC_TXD_CMD_DEXT | IGC_ADVTXD_DTYP_CTXT; 1032 1033 /* For i225, context index must be unique per ring. */ 1034 if (test_bit(IGC_RING_FLAG_TX_CTX_IDX, &tx_ring->flags)) 1035 mss_l4len_idx |= tx_ring->reg_idx << 4; 1036 1037 context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens); 1038 context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd); 1039 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx); 1040 1041 /* We assume there is always a valid Tx time available. Invalid times 1042 * should have been handled by the upper layers. 1043 */ 1044 if (tx_ring->launchtime_enable) { 1045 struct igc_adapter *adapter = netdev_priv(tx_ring->netdev); 1046 ktime_t txtime = first->skb->tstamp; 1047 1048 skb_txtime_consumed(first->skb); 1049 context_desc->launch_time = igc_tx_launchtime(adapter, 1050 txtime); 1051 } else { 1052 context_desc->launch_time = 0; 1053 } 1054 } 1055 1056 static void igc_tx_csum(struct igc_ring *tx_ring, struct igc_tx_buffer *first) 1057 { 1058 struct sk_buff *skb = first->skb; 1059 u32 vlan_macip_lens = 0; 1060 u32 type_tucmd = 0; 1061 1062 if (skb->ip_summed != CHECKSUM_PARTIAL) { 1063 csum_failed: 1064 if (!(first->tx_flags & IGC_TX_FLAGS_VLAN) && 1065 !tx_ring->launchtime_enable) 1066 return; 1067 goto no_csum; 1068 } 1069 1070 switch (skb->csum_offset) { 1071 case offsetof(struct tcphdr, check): 1072 type_tucmd = IGC_ADVTXD_TUCMD_L4T_TCP; 1073 fallthrough; 1074 case offsetof(struct udphdr, check): 1075 break; 1076 case offsetof(struct sctphdr, checksum): 1077 /* validate that this is actually an SCTP request */ 1078 if (skb_csum_is_sctp(skb)) { 1079 type_tucmd = IGC_ADVTXD_TUCMD_L4T_SCTP; 1080 break; 1081 } 1082 fallthrough; 1083 default: 1084 skb_checksum_help(skb); 1085 goto csum_failed; 1086 } 1087 1088 /* update TX checksum flag */ 1089 first->tx_flags |= IGC_TX_FLAGS_CSUM; 1090 vlan_macip_lens = skb_checksum_start_offset(skb) - 1091 skb_network_offset(skb); 1092 no_csum: 1093 vlan_macip_lens |= skb_network_offset(skb) << IGC_ADVTXD_MACLEN_SHIFT; 1094 vlan_macip_lens |= first->tx_flags & IGC_TX_FLAGS_VLAN_MASK; 1095 1096 igc_tx_ctxtdesc(tx_ring, first, vlan_macip_lens, type_tucmd, 0); 1097 } 1098 1099 static int __igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size) 1100 { 1101 struct net_device *netdev = tx_ring->netdev; 1102 1103 netif_stop_subqueue(netdev, tx_ring->queue_index); 1104 1105 /* memory barriier comment */ 1106 smp_mb(); 1107 1108 /* We need to check again in a case another CPU has just 1109 * made room available. 1110 */ 1111 if (igc_desc_unused(tx_ring) < size) 1112 return -EBUSY; 1113 1114 /* A reprieve! */ 1115 netif_wake_subqueue(netdev, tx_ring->queue_index); 1116 1117 u64_stats_update_begin(&tx_ring->tx_syncp2); 1118 tx_ring->tx_stats.restart_queue2++; 1119 u64_stats_update_end(&tx_ring->tx_syncp2); 1120 1121 return 0; 1122 } 1123 1124 static inline int igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size) 1125 { 1126 if (igc_desc_unused(tx_ring) >= size) 1127 return 0; 1128 return __igc_maybe_stop_tx(tx_ring, size); 1129 } 1130 1131 #define IGC_SET_FLAG(_input, _flag, _result) \ 1132 (((_flag) <= (_result)) ? \ 1133 ((u32)((_input) & (_flag)) * ((_result) / (_flag))) : \ 1134 ((u32)((_input) & (_flag)) / ((_flag) / (_result)))) 1135 1136 static u32 igc_tx_cmd_type(struct sk_buff *skb, u32 tx_flags) 1137 { 1138 /* set type for advanced descriptor with frame checksum insertion */ 1139 u32 cmd_type = IGC_ADVTXD_DTYP_DATA | 1140 IGC_ADVTXD_DCMD_DEXT | 1141 IGC_ADVTXD_DCMD_IFCS; 1142 1143 /* set HW vlan bit if vlan is present */ 1144 cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_VLAN, 1145 IGC_ADVTXD_DCMD_VLE); 1146 1147 /* set segmentation bits for TSO */ 1148 cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSO, 1149 (IGC_ADVTXD_DCMD_TSE)); 1150 1151 /* set timestamp bit if present */ 1152 cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP, 1153 (IGC_ADVTXD_MAC_TSTAMP)); 1154 1155 /* insert frame checksum */ 1156 cmd_type ^= IGC_SET_FLAG(skb->no_fcs, 1, IGC_ADVTXD_DCMD_IFCS); 1157 1158 return cmd_type; 1159 } 1160 1161 static void igc_tx_olinfo_status(struct igc_ring *tx_ring, 1162 union igc_adv_tx_desc *tx_desc, 1163 u32 tx_flags, unsigned int paylen) 1164 { 1165 u32 olinfo_status = paylen << IGC_ADVTXD_PAYLEN_SHIFT; 1166 1167 /* insert L4 checksum */ 1168 olinfo_status |= (tx_flags & IGC_TX_FLAGS_CSUM) * 1169 ((IGC_TXD_POPTS_TXSM << 8) / 1170 IGC_TX_FLAGS_CSUM); 1171 1172 /* insert IPv4 checksum */ 1173 olinfo_status |= (tx_flags & IGC_TX_FLAGS_IPV4) * 1174 (((IGC_TXD_POPTS_IXSM << 8)) / 1175 IGC_TX_FLAGS_IPV4); 1176 1177 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status); 1178 } 1179 1180 static int igc_tx_map(struct igc_ring *tx_ring, 1181 struct igc_tx_buffer *first, 1182 const u8 hdr_len) 1183 { 1184 struct sk_buff *skb = first->skb; 1185 struct igc_tx_buffer *tx_buffer; 1186 union igc_adv_tx_desc *tx_desc; 1187 u32 tx_flags = first->tx_flags; 1188 skb_frag_t *frag; 1189 u16 i = tx_ring->next_to_use; 1190 unsigned int data_len, size; 1191 dma_addr_t dma; 1192 u32 cmd_type; 1193 1194 cmd_type = igc_tx_cmd_type(skb, tx_flags); 1195 tx_desc = IGC_TX_DESC(tx_ring, i); 1196 1197 igc_tx_olinfo_status(tx_ring, tx_desc, tx_flags, skb->len - hdr_len); 1198 1199 size = skb_headlen(skb); 1200 data_len = skb->data_len; 1201 1202 dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE); 1203 1204 tx_buffer = first; 1205 1206 for (frag = &skb_shinfo(skb)->frags[0];; frag++) { 1207 if (dma_mapping_error(tx_ring->dev, dma)) 1208 goto dma_error; 1209 1210 /* record length, and DMA address */ 1211 dma_unmap_len_set(tx_buffer, len, size); 1212 dma_unmap_addr_set(tx_buffer, dma, dma); 1213 1214 tx_desc->read.buffer_addr = cpu_to_le64(dma); 1215 1216 while (unlikely(size > IGC_MAX_DATA_PER_TXD)) { 1217 tx_desc->read.cmd_type_len = 1218 cpu_to_le32(cmd_type ^ IGC_MAX_DATA_PER_TXD); 1219 1220 i++; 1221 tx_desc++; 1222 if (i == tx_ring->count) { 1223 tx_desc = IGC_TX_DESC(tx_ring, 0); 1224 i = 0; 1225 } 1226 tx_desc->read.olinfo_status = 0; 1227 1228 dma += IGC_MAX_DATA_PER_TXD; 1229 size -= IGC_MAX_DATA_PER_TXD; 1230 1231 tx_desc->read.buffer_addr = cpu_to_le64(dma); 1232 } 1233 1234 if (likely(!data_len)) 1235 break; 1236 1237 tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type ^ size); 1238 1239 i++; 1240 tx_desc++; 1241 if (i == tx_ring->count) { 1242 tx_desc = IGC_TX_DESC(tx_ring, 0); 1243 i = 0; 1244 } 1245 tx_desc->read.olinfo_status = 0; 1246 1247 size = skb_frag_size(frag); 1248 data_len -= size; 1249 1250 dma = skb_frag_dma_map(tx_ring->dev, frag, 0, 1251 size, DMA_TO_DEVICE); 1252 1253 tx_buffer = &tx_ring->tx_buffer_info[i]; 1254 } 1255 1256 /* write last descriptor with RS and EOP bits */ 1257 cmd_type |= size | IGC_TXD_DCMD; 1258 tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); 1259 1260 netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount); 1261 1262 /* set the timestamp */ 1263 first->time_stamp = jiffies; 1264 1265 skb_tx_timestamp(skb); 1266 1267 /* Force memory writes to complete before letting h/w know there 1268 * are new descriptors to fetch. (Only applicable for weak-ordered 1269 * memory model archs, such as IA-64). 1270 * 1271 * We also need this memory barrier to make certain all of the 1272 * status bits have been updated before next_to_watch is written. 1273 */ 1274 wmb(); 1275 1276 /* set next_to_watch value indicating a packet is present */ 1277 first->next_to_watch = tx_desc; 1278 1279 i++; 1280 if (i == tx_ring->count) 1281 i = 0; 1282 1283 tx_ring->next_to_use = i; 1284 1285 /* Make sure there is space in the ring for the next send. */ 1286 igc_maybe_stop_tx(tx_ring, DESC_NEEDED); 1287 1288 if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more()) { 1289 writel(i, tx_ring->tail); 1290 } 1291 1292 return 0; 1293 dma_error: 1294 netdev_err(tx_ring->netdev, "TX DMA map failed\n"); 1295 tx_buffer = &tx_ring->tx_buffer_info[i]; 1296 1297 /* clear dma mappings for failed tx_buffer_info map */ 1298 while (tx_buffer != first) { 1299 if (dma_unmap_len(tx_buffer, len)) 1300 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 1301 1302 if (i-- == 0) 1303 i += tx_ring->count; 1304 tx_buffer = &tx_ring->tx_buffer_info[i]; 1305 } 1306 1307 if (dma_unmap_len(tx_buffer, len)) 1308 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 1309 1310 dev_kfree_skb_any(tx_buffer->skb); 1311 tx_buffer->skb = NULL; 1312 1313 tx_ring->next_to_use = i; 1314 1315 return -1; 1316 } 1317 1318 static int igc_tso(struct igc_ring *tx_ring, 1319 struct igc_tx_buffer *first, 1320 u8 *hdr_len) 1321 { 1322 u32 vlan_macip_lens, type_tucmd, mss_l4len_idx; 1323 struct sk_buff *skb = first->skb; 1324 union { 1325 struct iphdr *v4; 1326 struct ipv6hdr *v6; 1327 unsigned char *hdr; 1328 } ip; 1329 union { 1330 struct tcphdr *tcp; 1331 struct udphdr *udp; 1332 unsigned char *hdr; 1333 } l4; 1334 u32 paylen, l4_offset; 1335 int err; 1336 1337 if (skb->ip_summed != CHECKSUM_PARTIAL) 1338 return 0; 1339 1340 if (!skb_is_gso(skb)) 1341 return 0; 1342 1343 err = skb_cow_head(skb, 0); 1344 if (err < 0) 1345 return err; 1346 1347 ip.hdr = skb_network_header(skb); 1348 l4.hdr = skb_checksum_start(skb); 1349 1350 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */ 1351 type_tucmd = IGC_ADVTXD_TUCMD_L4T_TCP; 1352 1353 /* initialize outer IP header fields */ 1354 if (ip.v4->version == 4) { 1355 unsigned char *csum_start = skb_checksum_start(skb); 1356 unsigned char *trans_start = ip.hdr + (ip.v4->ihl * 4); 1357 1358 /* IP header will have to cancel out any data that 1359 * is not a part of the outer IP header 1360 */ 1361 ip.v4->check = csum_fold(csum_partial(trans_start, 1362 csum_start - trans_start, 1363 0)); 1364 type_tucmd |= IGC_ADVTXD_TUCMD_IPV4; 1365 1366 ip.v4->tot_len = 0; 1367 first->tx_flags |= IGC_TX_FLAGS_TSO | 1368 IGC_TX_FLAGS_CSUM | 1369 IGC_TX_FLAGS_IPV4; 1370 } else { 1371 ip.v6->payload_len = 0; 1372 first->tx_flags |= IGC_TX_FLAGS_TSO | 1373 IGC_TX_FLAGS_CSUM; 1374 } 1375 1376 /* determine offset of inner transport header */ 1377 l4_offset = l4.hdr - skb->data; 1378 1379 /* remove payload length from inner checksum */ 1380 paylen = skb->len - l4_offset; 1381 if (type_tucmd & IGC_ADVTXD_TUCMD_L4T_TCP) { 1382 /* compute length of segmentation header */ 1383 *hdr_len = (l4.tcp->doff * 4) + l4_offset; 1384 csum_replace_by_diff(&l4.tcp->check, 1385 (__force __wsum)htonl(paylen)); 1386 } else { 1387 /* compute length of segmentation header */ 1388 *hdr_len = sizeof(*l4.udp) + l4_offset; 1389 csum_replace_by_diff(&l4.udp->check, 1390 (__force __wsum)htonl(paylen)); 1391 } 1392 1393 /* update gso size and bytecount with header size */ 1394 first->gso_segs = skb_shinfo(skb)->gso_segs; 1395 first->bytecount += (first->gso_segs - 1) * *hdr_len; 1396 1397 /* MSS L4LEN IDX */ 1398 mss_l4len_idx = (*hdr_len - l4_offset) << IGC_ADVTXD_L4LEN_SHIFT; 1399 mss_l4len_idx |= skb_shinfo(skb)->gso_size << IGC_ADVTXD_MSS_SHIFT; 1400 1401 /* VLAN MACLEN IPLEN */ 1402 vlan_macip_lens = l4.hdr - ip.hdr; 1403 vlan_macip_lens |= (ip.hdr - skb->data) << IGC_ADVTXD_MACLEN_SHIFT; 1404 vlan_macip_lens |= first->tx_flags & IGC_TX_FLAGS_VLAN_MASK; 1405 1406 igc_tx_ctxtdesc(tx_ring, first, vlan_macip_lens, 1407 type_tucmd, mss_l4len_idx); 1408 1409 return 1; 1410 } 1411 1412 static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb, 1413 struct igc_ring *tx_ring) 1414 { 1415 u16 count = TXD_USE_COUNT(skb_headlen(skb)); 1416 __be16 protocol = vlan_get_protocol(skb); 1417 struct igc_tx_buffer *first; 1418 u32 tx_flags = 0; 1419 unsigned short f; 1420 u8 hdr_len = 0; 1421 int tso = 0; 1422 1423 /* need: 1 descriptor per page * PAGE_SIZE/IGC_MAX_DATA_PER_TXD, 1424 * + 1 desc for skb_headlen/IGC_MAX_DATA_PER_TXD, 1425 * + 2 desc gap to keep tail from touching head, 1426 * + 1 desc for context descriptor, 1427 * otherwise try next time 1428 */ 1429 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) 1430 count += TXD_USE_COUNT(skb_frag_size( 1431 &skb_shinfo(skb)->frags[f])); 1432 1433 if (igc_maybe_stop_tx(tx_ring, count + 3)) { 1434 /* this is a hard error */ 1435 return NETDEV_TX_BUSY; 1436 } 1437 1438 /* record the location of the first descriptor for this packet */ 1439 first = &tx_ring->tx_buffer_info[tx_ring->next_to_use]; 1440 first->type = IGC_TX_BUFFER_TYPE_SKB; 1441 first->skb = skb; 1442 first->bytecount = skb->len; 1443 first->gso_segs = 1; 1444 1445 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { 1446 struct igc_adapter *adapter = netdev_priv(tx_ring->netdev); 1447 1448 /* FIXME: add support for retrieving timestamps from 1449 * the other timer registers before skipping the 1450 * timestamping request. 1451 */ 1452 if (adapter->tstamp_config.tx_type == HWTSTAMP_TX_ON && 1453 !test_and_set_bit_lock(__IGC_PTP_TX_IN_PROGRESS, 1454 &adapter->state)) { 1455 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; 1456 tx_flags |= IGC_TX_FLAGS_TSTAMP; 1457 1458 adapter->ptp_tx_skb = skb_get(skb); 1459 adapter->ptp_tx_start = jiffies; 1460 } else { 1461 adapter->tx_hwtstamp_skipped++; 1462 } 1463 } 1464 1465 if (skb_vlan_tag_present(skb)) { 1466 tx_flags |= IGC_TX_FLAGS_VLAN; 1467 tx_flags |= (skb_vlan_tag_get(skb) << IGC_TX_FLAGS_VLAN_SHIFT); 1468 } 1469 1470 /* record initial flags and protocol */ 1471 first->tx_flags = tx_flags; 1472 first->protocol = protocol; 1473 1474 tso = igc_tso(tx_ring, first, &hdr_len); 1475 if (tso < 0) 1476 goto out_drop; 1477 else if (!tso) 1478 igc_tx_csum(tx_ring, first); 1479 1480 igc_tx_map(tx_ring, first, hdr_len); 1481 1482 return NETDEV_TX_OK; 1483 1484 out_drop: 1485 dev_kfree_skb_any(first->skb); 1486 first->skb = NULL; 1487 1488 return NETDEV_TX_OK; 1489 } 1490 1491 static inline struct igc_ring *igc_tx_queue_mapping(struct igc_adapter *adapter, 1492 struct sk_buff *skb) 1493 { 1494 unsigned int r_idx = skb->queue_mapping; 1495 1496 if (r_idx >= adapter->num_tx_queues) 1497 r_idx = r_idx % adapter->num_tx_queues; 1498 1499 return adapter->tx_ring[r_idx]; 1500 } 1501 1502 static netdev_tx_t igc_xmit_frame(struct sk_buff *skb, 1503 struct net_device *netdev) 1504 { 1505 struct igc_adapter *adapter = netdev_priv(netdev); 1506 1507 /* The minimum packet size with TCTL.PSP set is 17 so pad the skb 1508 * in order to meet this minimum size requirement. 1509 */ 1510 if (skb->len < 17) { 1511 if (skb_padto(skb, 17)) 1512 return NETDEV_TX_OK; 1513 skb->len = 17; 1514 } 1515 1516 return igc_xmit_frame_ring(skb, igc_tx_queue_mapping(adapter, skb)); 1517 } 1518 1519 static void igc_rx_checksum(struct igc_ring *ring, 1520 union igc_adv_rx_desc *rx_desc, 1521 struct sk_buff *skb) 1522 { 1523 skb_checksum_none_assert(skb); 1524 1525 /* Ignore Checksum bit is set */ 1526 if (igc_test_staterr(rx_desc, IGC_RXD_STAT_IXSM)) 1527 return; 1528 1529 /* Rx checksum disabled via ethtool */ 1530 if (!(ring->netdev->features & NETIF_F_RXCSUM)) 1531 return; 1532 1533 /* TCP/UDP checksum error bit is set */ 1534 if (igc_test_staterr(rx_desc, 1535 IGC_RXDEXT_STATERR_L4E | 1536 IGC_RXDEXT_STATERR_IPE)) { 1537 /* work around errata with sctp packets where the TCPE aka 1538 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc) 1539 * packets (aka let the stack check the crc32c) 1540 */ 1541 if (!(skb->len == 60 && 1542 test_bit(IGC_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) { 1543 u64_stats_update_begin(&ring->rx_syncp); 1544 ring->rx_stats.csum_err++; 1545 u64_stats_update_end(&ring->rx_syncp); 1546 } 1547 /* let the stack verify checksum errors */ 1548 return; 1549 } 1550 /* It must be a TCP or UDP packet with a valid checksum */ 1551 if (igc_test_staterr(rx_desc, IGC_RXD_STAT_TCPCS | 1552 IGC_RXD_STAT_UDPCS)) 1553 skb->ip_summed = CHECKSUM_UNNECESSARY; 1554 1555 netdev_dbg(ring->netdev, "cksum success: bits %08X\n", 1556 le32_to_cpu(rx_desc->wb.upper.status_error)); 1557 } 1558 1559 static inline void igc_rx_hash(struct igc_ring *ring, 1560 union igc_adv_rx_desc *rx_desc, 1561 struct sk_buff *skb) 1562 { 1563 if (ring->netdev->features & NETIF_F_RXHASH) 1564 skb_set_hash(skb, 1565 le32_to_cpu(rx_desc->wb.lower.hi_dword.rss), 1566 PKT_HASH_TYPE_L3); 1567 } 1568 1569 static void igc_rx_vlan(struct igc_ring *rx_ring, 1570 union igc_adv_rx_desc *rx_desc, 1571 struct sk_buff *skb) 1572 { 1573 struct net_device *dev = rx_ring->netdev; 1574 u16 vid; 1575 1576 if ((dev->features & NETIF_F_HW_VLAN_CTAG_RX) && 1577 igc_test_staterr(rx_desc, IGC_RXD_STAT_VP)) { 1578 if (igc_test_staterr(rx_desc, IGC_RXDEXT_STATERR_LB) && 1579 test_bit(IGC_RING_FLAG_RX_LB_VLAN_BSWAP, &rx_ring->flags)) 1580 vid = be16_to_cpu((__force __be16)rx_desc->wb.upper.vlan); 1581 else 1582 vid = le16_to_cpu(rx_desc->wb.upper.vlan); 1583 1584 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); 1585 } 1586 } 1587 1588 /** 1589 * igc_process_skb_fields - Populate skb header fields from Rx descriptor 1590 * @rx_ring: rx descriptor ring packet is being transacted on 1591 * @rx_desc: pointer to the EOP Rx descriptor 1592 * @skb: pointer to current skb being populated 1593 * 1594 * This function checks the ring, descriptor, and packet information in order 1595 * to populate the hash, checksum, VLAN, protocol, and other fields within the 1596 * skb. 1597 */ 1598 static void igc_process_skb_fields(struct igc_ring *rx_ring, 1599 union igc_adv_rx_desc *rx_desc, 1600 struct sk_buff *skb) 1601 { 1602 igc_rx_hash(rx_ring, rx_desc, skb); 1603 1604 igc_rx_checksum(rx_ring, rx_desc, skb); 1605 1606 igc_rx_vlan(rx_ring, rx_desc, skb); 1607 1608 skb_record_rx_queue(skb, rx_ring->queue_index); 1609 1610 skb->protocol = eth_type_trans(skb, rx_ring->netdev); 1611 } 1612 1613 static void igc_vlan_mode(struct net_device *netdev, netdev_features_t features) 1614 { 1615 bool enable = !!(features & NETIF_F_HW_VLAN_CTAG_RX); 1616 struct igc_adapter *adapter = netdev_priv(netdev); 1617 struct igc_hw *hw = &adapter->hw; 1618 u32 ctrl; 1619 1620 ctrl = rd32(IGC_CTRL); 1621 1622 if (enable) { 1623 /* enable VLAN tag insert/strip */ 1624 ctrl |= IGC_CTRL_VME; 1625 } else { 1626 /* disable VLAN tag insert/strip */ 1627 ctrl &= ~IGC_CTRL_VME; 1628 } 1629 wr32(IGC_CTRL, ctrl); 1630 } 1631 1632 static void igc_restore_vlan(struct igc_adapter *adapter) 1633 { 1634 igc_vlan_mode(adapter->netdev, adapter->netdev->features); 1635 } 1636 1637 static struct igc_rx_buffer *igc_get_rx_buffer(struct igc_ring *rx_ring, 1638 const unsigned int size, 1639 int *rx_buffer_pgcnt) 1640 { 1641 struct igc_rx_buffer *rx_buffer; 1642 1643 rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; 1644 *rx_buffer_pgcnt = 1645 #if (PAGE_SIZE < 8192) 1646 page_count(rx_buffer->page); 1647 #else 1648 0; 1649 #endif 1650 prefetchw(rx_buffer->page); 1651 1652 /* we are reusing so sync this buffer for CPU use */ 1653 dma_sync_single_range_for_cpu(rx_ring->dev, 1654 rx_buffer->dma, 1655 rx_buffer->page_offset, 1656 size, 1657 DMA_FROM_DEVICE); 1658 1659 rx_buffer->pagecnt_bias--; 1660 1661 return rx_buffer; 1662 } 1663 1664 static void igc_rx_buffer_flip(struct igc_rx_buffer *buffer, 1665 unsigned int truesize) 1666 { 1667 #if (PAGE_SIZE < 8192) 1668 buffer->page_offset ^= truesize; 1669 #else 1670 buffer->page_offset += truesize; 1671 #endif 1672 } 1673 1674 static unsigned int igc_get_rx_frame_truesize(struct igc_ring *ring, 1675 unsigned int size) 1676 { 1677 unsigned int truesize; 1678 1679 #if (PAGE_SIZE < 8192) 1680 truesize = igc_rx_pg_size(ring) / 2; 1681 #else 1682 truesize = ring_uses_build_skb(ring) ? 1683 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + 1684 SKB_DATA_ALIGN(IGC_SKB_PAD + size) : 1685 SKB_DATA_ALIGN(size); 1686 #endif 1687 return truesize; 1688 } 1689 1690 /** 1691 * igc_add_rx_frag - Add contents of Rx buffer to sk_buff 1692 * @rx_ring: rx descriptor ring to transact packets on 1693 * @rx_buffer: buffer containing page to add 1694 * @skb: sk_buff to place the data into 1695 * @size: size of buffer to be added 1696 * 1697 * This function will add the data contained in rx_buffer->page to the skb. 1698 */ 1699 static void igc_add_rx_frag(struct igc_ring *rx_ring, 1700 struct igc_rx_buffer *rx_buffer, 1701 struct sk_buff *skb, 1702 unsigned int size) 1703 { 1704 unsigned int truesize; 1705 1706 #if (PAGE_SIZE < 8192) 1707 truesize = igc_rx_pg_size(rx_ring) / 2; 1708 #else 1709 truesize = ring_uses_build_skb(rx_ring) ? 1710 SKB_DATA_ALIGN(IGC_SKB_PAD + size) : 1711 SKB_DATA_ALIGN(size); 1712 #endif 1713 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page, 1714 rx_buffer->page_offset, size, truesize); 1715 1716 igc_rx_buffer_flip(rx_buffer, truesize); 1717 } 1718 1719 static struct sk_buff *igc_build_skb(struct igc_ring *rx_ring, 1720 struct igc_rx_buffer *rx_buffer, 1721 struct xdp_buff *xdp) 1722 { 1723 unsigned int size = xdp->data_end - xdp->data; 1724 unsigned int truesize = igc_get_rx_frame_truesize(rx_ring, size); 1725 unsigned int metasize = xdp->data - xdp->data_meta; 1726 struct sk_buff *skb; 1727 1728 /* prefetch first cache line of first page */ 1729 net_prefetch(xdp->data_meta); 1730 1731 /* build an skb around the page buffer */ 1732 skb = napi_build_skb(xdp->data_hard_start, truesize); 1733 if (unlikely(!skb)) 1734 return NULL; 1735 1736 /* update pointers within the skb to store the data */ 1737 skb_reserve(skb, xdp->data - xdp->data_hard_start); 1738 __skb_put(skb, size); 1739 if (metasize) 1740 skb_metadata_set(skb, metasize); 1741 1742 igc_rx_buffer_flip(rx_buffer, truesize); 1743 return skb; 1744 } 1745 1746 static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring, 1747 struct igc_rx_buffer *rx_buffer, 1748 struct xdp_buff *xdp, 1749 ktime_t timestamp) 1750 { 1751 unsigned int metasize = xdp->data - xdp->data_meta; 1752 unsigned int size = xdp->data_end - xdp->data; 1753 unsigned int truesize = igc_get_rx_frame_truesize(rx_ring, size); 1754 void *va = xdp->data; 1755 unsigned int headlen; 1756 struct sk_buff *skb; 1757 1758 /* prefetch first cache line of first page */ 1759 net_prefetch(xdp->data_meta); 1760 1761 /* allocate a skb to store the frags */ 1762 skb = napi_alloc_skb(&rx_ring->q_vector->napi, 1763 IGC_RX_HDR_LEN + metasize); 1764 if (unlikely(!skb)) 1765 return NULL; 1766 1767 if (timestamp) 1768 skb_hwtstamps(skb)->hwtstamp = timestamp; 1769 1770 /* Determine available headroom for copy */ 1771 headlen = size; 1772 if (headlen > IGC_RX_HDR_LEN) 1773 headlen = eth_get_headlen(skb->dev, va, IGC_RX_HDR_LEN); 1774 1775 /* align pull length to size of long to optimize memcpy performance */ 1776 memcpy(__skb_put(skb, headlen + metasize), xdp->data_meta, 1777 ALIGN(headlen + metasize, sizeof(long))); 1778 1779 if (metasize) { 1780 skb_metadata_set(skb, metasize); 1781 __skb_pull(skb, metasize); 1782 } 1783 1784 /* update all of the pointers */ 1785 size -= headlen; 1786 if (size) { 1787 skb_add_rx_frag(skb, 0, rx_buffer->page, 1788 (va + headlen) - page_address(rx_buffer->page), 1789 size, truesize); 1790 igc_rx_buffer_flip(rx_buffer, truesize); 1791 } else { 1792 rx_buffer->pagecnt_bias++; 1793 } 1794 1795 return skb; 1796 } 1797 1798 /** 1799 * igc_reuse_rx_page - page flip buffer and store it back on the ring 1800 * @rx_ring: rx descriptor ring to store buffers on 1801 * @old_buff: donor buffer to have page reused 1802 * 1803 * Synchronizes page for reuse by the adapter 1804 */ 1805 static void igc_reuse_rx_page(struct igc_ring *rx_ring, 1806 struct igc_rx_buffer *old_buff) 1807 { 1808 u16 nta = rx_ring->next_to_alloc; 1809 struct igc_rx_buffer *new_buff; 1810 1811 new_buff = &rx_ring->rx_buffer_info[nta]; 1812 1813 /* update, and store next to alloc */ 1814 nta++; 1815 rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; 1816 1817 /* Transfer page from old buffer to new buffer. 1818 * Move each member individually to avoid possible store 1819 * forwarding stalls. 1820 */ 1821 new_buff->dma = old_buff->dma; 1822 new_buff->page = old_buff->page; 1823 new_buff->page_offset = old_buff->page_offset; 1824 new_buff->pagecnt_bias = old_buff->pagecnt_bias; 1825 } 1826 1827 static bool igc_can_reuse_rx_page(struct igc_rx_buffer *rx_buffer, 1828 int rx_buffer_pgcnt) 1829 { 1830 unsigned int pagecnt_bias = rx_buffer->pagecnt_bias; 1831 struct page *page = rx_buffer->page; 1832 1833 /* avoid re-using remote and pfmemalloc pages */ 1834 if (!dev_page_is_reusable(page)) 1835 return false; 1836 1837 #if (PAGE_SIZE < 8192) 1838 /* if we are only owner of page we can reuse it */ 1839 if (unlikely((rx_buffer_pgcnt - pagecnt_bias) > 1)) 1840 return false; 1841 #else 1842 #define IGC_LAST_OFFSET \ 1843 (SKB_WITH_OVERHEAD(PAGE_SIZE) - IGC_RXBUFFER_2048) 1844 1845 if (rx_buffer->page_offset > IGC_LAST_OFFSET) 1846 return false; 1847 #endif 1848 1849 /* If we have drained the page fragment pool we need to update 1850 * the pagecnt_bias and page count so that we fully restock the 1851 * number of references the driver holds. 1852 */ 1853 if (unlikely(pagecnt_bias == 1)) { 1854 page_ref_add(page, USHRT_MAX - 1); 1855 rx_buffer->pagecnt_bias = USHRT_MAX; 1856 } 1857 1858 return true; 1859 } 1860 1861 /** 1862 * igc_is_non_eop - process handling of non-EOP buffers 1863 * @rx_ring: Rx ring being processed 1864 * @rx_desc: Rx descriptor for current buffer 1865 * 1866 * This function updates next to clean. If the buffer is an EOP buffer 1867 * this function exits returning false, otherwise it will place the 1868 * sk_buff in the next buffer to be chained and return true indicating 1869 * that this is in fact a non-EOP buffer. 1870 */ 1871 static bool igc_is_non_eop(struct igc_ring *rx_ring, 1872 union igc_adv_rx_desc *rx_desc) 1873 { 1874 u32 ntc = rx_ring->next_to_clean + 1; 1875 1876 /* fetch, update, and store next to clean */ 1877 ntc = (ntc < rx_ring->count) ? ntc : 0; 1878 rx_ring->next_to_clean = ntc; 1879 1880 prefetch(IGC_RX_DESC(rx_ring, ntc)); 1881 1882 if (likely(igc_test_staterr(rx_desc, IGC_RXD_STAT_EOP))) 1883 return false; 1884 1885 return true; 1886 } 1887 1888 /** 1889 * igc_cleanup_headers - Correct corrupted or empty headers 1890 * @rx_ring: rx descriptor ring packet is being transacted on 1891 * @rx_desc: pointer to the EOP Rx descriptor 1892 * @skb: pointer to current skb being fixed 1893 * 1894 * Address the case where we are pulling data in on pages only 1895 * and as such no data is present in the skb header. 1896 * 1897 * In addition if skb is not at least 60 bytes we need to pad it so that 1898 * it is large enough to qualify as a valid Ethernet frame. 1899 * 1900 * Returns true if an error was encountered and skb was freed. 1901 */ 1902 static bool igc_cleanup_headers(struct igc_ring *rx_ring, 1903 union igc_adv_rx_desc *rx_desc, 1904 struct sk_buff *skb) 1905 { 1906 /* XDP packets use error pointer so abort at this point */ 1907 if (IS_ERR(skb)) 1908 return true; 1909 1910 if (unlikely(igc_test_staterr(rx_desc, IGC_RXDEXT_STATERR_RXE))) { 1911 struct net_device *netdev = rx_ring->netdev; 1912 1913 if (!(netdev->features & NETIF_F_RXALL)) { 1914 dev_kfree_skb_any(skb); 1915 return true; 1916 } 1917 } 1918 1919 /* if eth_skb_pad returns an error the skb was freed */ 1920 if (eth_skb_pad(skb)) 1921 return true; 1922 1923 return false; 1924 } 1925 1926 static void igc_put_rx_buffer(struct igc_ring *rx_ring, 1927 struct igc_rx_buffer *rx_buffer, 1928 int rx_buffer_pgcnt) 1929 { 1930 if (igc_can_reuse_rx_page(rx_buffer, rx_buffer_pgcnt)) { 1931 /* hand second half of page back to the ring */ 1932 igc_reuse_rx_page(rx_ring, rx_buffer); 1933 } else { 1934 /* We are not reusing the buffer so unmap it and free 1935 * any references we are holding to it 1936 */ 1937 dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, 1938 igc_rx_pg_size(rx_ring), DMA_FROM_DEVICE, 1939 IGC_RX_DMA_ATTR); 1940 __page_frag_cache_drain(rx_buffer->page, 1941 rx_buffer->pagecnt_bias); 1942 } 1943 1944 /* clear contents of rx_buffer */ 1945 rx_buffer->page = NULL; 1946 } 1947 1948 static inline unsigned int igc_rx_offset(struct igc_ring *rx_ring) 1949 { 1950 struct igc_adapter *adapter = rx_ring->q_vector->adapter; 1951 1952 if (ring_uses_build_skb(rx_ring)) 1953 return IGC_SKB_PAD; 1954 if (igc_xdp_is_enabled(adapter)) 1955 return XDP_PACKET_HEADROOM; 1956 1957 return 0; 1958 } 1959 1960 static bool igc_alloc_mapped_page(struct igc_ring *rx_ring, 1961 struct igc_rx_buffer *bi) 1962 { 1963 struct page *page = bi->page; 1964 dma_addr_t dma; 1965 1966 /* since we are recycling buffers we should seldom need to alloc */ 1967 if (likely(page)) 1968 return true; 1969 1970 /* alloc new page for storage */ 1971 page = dev_alloc_pages(igc_rx_pg_order(rx_ring)); 1972 if (unlikely(!page)) { 1973 rx_ring->rx_stats.alloc_failed++; 1974 return false; 1975 } 1976 1977 /* map page for use */ 1978 dma = dma_map_page_attrs(rx_ring->dev, page, 0, 1979 igc_rx_pg_size(rx_ring), 1980 DMA_FROM_DEVICE, 1981 IGC_RX_DMA_ATTR); 1982 1983 /* if mapping failed free memory back to system since 1984 * there isn't much point in holding memory we can't use 1985 */ 1986 if (dma_mapping_error(rx_ring->dev, dma)) { 1987 __free_page(page); 1988 1989 rx_ring->rx_stats.alloc_failed++; 1990 return false; 1991 } 1992 1993 bi->dma = dma; 1994 bi->page = page; 1995 bi->page_offset = igc_rx_offset(rx_ring); 1996 page_ref_add(page, USHRT_MAX - 1); 1997 bi->pagecnt_bias = USHRT_MAX; 1998 1999 return true; 2000 } 2001 2002 /** 2003 * igc_alloc_rx_buffers - Replace used receive buffers; packet split 2004 * @rx_ring: rx descriptor ring 2005 * @cleaned_count: number of buffers to clean 2006 */ 2007 static void igc_alloc_rx_buffers(struct igc_ring *rx_ring, u16 cleaned_count) 2008 { 2009 union igc_adv_rx_desc *rx_desc; 2010 u16 i = rx_ring->next_to_use; 2011 struct igc_rx_buffer *bi; 2012 u16 bufsz; 2013 2014 /* nothing to do */ 2015 if (!cleaned_count) 2016 return; 2017 2018 rx_desc = IGC_RX_DESC(rx_ring, i); 2019 bi = &rx_ring->rx_buffer_info[i]; 2020 i -= rx_ring->count; 2021 2022 bufsz = igc_rx_bufsz(rx_ring); 2023 2024 do { 2025 if (!igc_alloc_mapped_page(rx_ring, bi)) 2026 break; 2027 2028 /* sync the buffer for use by the device */ 2029 dma_sync_single_range_for_device(rx_ring->dev, bi->dma, 2030 bi->page_offset, bufsz, 2031 DMA_FROM_DEVICE); 2032 2033 /* Refresh the desc even if buffer_addrs didn't change 2034 * because each write-back erases this info. 2035 */ 2036 rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset); 2037 2038 rx_desc++; 2039 bi++; 2040 i++; 2041 if (unlikely(!i)) { 2042 rx_desc = IGC_RX_DESC(rx_ring, 0); 2043 bi = rx_ring->rx_buffer_info; 2044 i -= rx_ring->count; 2045 } 2046 2047 /* clear the length for the next_to_use descriptor */ 2048 rx_desc->wb.upper.length = 0; 2049 2050 cleaned_count--; 2051 } while (cleaned_count); 2052 2053 i += rx_ring->count; 2054 2055 if (rx_ring->next_to_use != i) { 2056 /* record the next descriptor to use */ 2057 rx_ring->next_to_use = i; 2058 2059 /* update next to alloc since we have filled the ring */ 2060 rx_ring->next_to_alloc = i; 2061 2062 /* Force memory writes to complete before letting h/w 2063 * know there are new descriptors to fetch. (Only 2064 * applicable for weak-ordered memory model archs, 2065 * such as IA-64). 2066 */ 2067 wmb(); 2068 writel(i, rx_ring->tail); 2069 } 2070 } 2071 2072 static bool igc_alloc_rx_buffers_zc(struct igc_ring *ring, u16 count) 2073 { 2074 union igc_adv_rx_desc *desc; 2075 u16 i = ring->next_to_use; 2076 struct igc_rx_buffer *bi; 2077 dma_addr_t dma; 2078 bool ok = true; 2079 2080 if (!count) 2081 return ok; 2082 2083 desc = IGC_RX_DESC(ring, i); 2084 bi = &ring->rx_buffer_info[i]; 2085 i -= ring->count; 2086 2087 do { 2088 bi->xdp = xsk_buff_alloc(ring->xsk_pool); 2089 if (!bi->xdp) { 2090 ok = false; 2091 break; 2092 } 2093 2094 dma = xsk_buff_xdp_get_dma(bi->xdp); 2095 desc->read.pkt_addr = cpu_to_le64(dma); 2096 2097 desc++; 2098 bi++; 2099 i++; 2100 if (unlikely(!i)) { 2101 desc = IGC_RX_DESC(ring, 0); 2102 bi = ring->rx_buffer_info; 2103 i -= ring->count; 2104 } 2105 2106 /* Clear the length for the next_to_use descriptor. */ 2107 desc->wb.upper.length = 0; 2108 2109 count--; 2110 } while (count); 2111 2112 i += ring->count; 2113 2114 if (ring->next_to_use != i) { 2115 ring->next_to_use = i; 2116 2117 /* Force memory writes to complete before letting h/w 2118 * know there are new descriptors to fetch. (Only 2119 * applicable for weak-ordered memory model archs, 2120 * such as IA-64). 2121 */ 2122 wmb(); 2123 writel(i, ring->tail); 2124 } 2125 2126 return ok; 2127 } 2128 2129 static int igc_xdp_init_tx_buffer(struct igc_tx_buffer *buffer, 2130 struct xdp_frame *xdpf, 2131 struct igc_ring *ring) 2132 { 2133 dma_addr_t dma; 2134 2135 dma = dma_map_single(ring->dev, xdpf->data, xdpf->len, DMA_TO_DEVICE); 2136 if (dma_mapping_error(ring->dev, dma)) { 2137 netdev_err_once(ring->netdev, "Failed to map DMA for TX\n"); 2138 return -ENOMEM; 2139 } 2140 2141 buffer->type = IGC_TX_BUFFER_TYPE_XDP; 2142 buffer->xdpf = xdpf; 2143 buffer->protocol = 0; 2144 buffer->bytecount = xdpf->len; 2145 buffer->gso_segs = 1; 2146 buffer->time_stamp = jiffies; 2147 dma_unmap_len_set(buffer, len, xdpf->len); 2148 dma_unmap_addr_set(buffer, dma, dma); 2149 return 0; 2150 } 2151 2152 /* This function requires __netif_tx_lock is held by the caller. */ 2153 static int igc_xdp_init_tx_descriptor(struct igc_ring *ring, 2154 struct xdp_frame *xdpf) 2155 { 2156 struct igc_tx_buffer *buffer; 2157 union igc_adv_tx_desc *desc; 2158 u32 cmd_type, olinfo_status; 2159 int err; 2160 2161 if (!igc_desc_unused(ring)) 2162 return -EBUSY; 2163 2164 buffer = &ring->tx_buffer_info[ring->next_to_use]; 2165 err = igc_xdp_init_tx_buffer(buffer, xdpf, ring); 2166 if (err) 2167 return err; 2168 2169 cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT | 2170 IGC_ADVTXD_DCMD_IFCS | IGC_TXD_DCMD | 2171 buffer->bytecount; 2172 olinfo_status = buffer->bytecount << IGC_ADVTXD_PAYLEN_SHIFT; 2173 2174 desc = IGC_TX_DESC(ring, ring->next_to_use); 2175 desc->read.cmd_type_len = cpu_to_le32(cmd_type); 2176 desc->read.olinfo_status = cpu_to_le32(olinfo_status); 2177 desc->read.buffer_addr = cpu_to_le64(dma_unmap_addr(buffer, dma)); 2178 2179 netdev_tx_sent_queue(txring_txq(ring), buffer->bytecount); 2180 2181 buffer->next_to_watch = desc; 2182 2183 ring->next_to_use++; 2184 if (ring->next_to_use == ring->count) 2185 ring->next_to_use = 0; 2186 2187 return 0; 2188 } 2189 2190 static struct igc_ring *igc_xdp_get_tx_ring(struct igc_adapter *adapter, 2191 int cpu) 2192 { 2193 int index = cpu; 2194 2195 if (unlikely(index < 0)) 2196 index = 0; 2197 2198 while (index >= adapter->num_tx_queues) 2199 index -= adapter->num_tx_queues; 2200 2201 return adapter->tx_ring[index]; 2202 } 2203 2204 static int igc_xdp_xmit_back(struct igc_adapter *adapter, struct xdp_buff *xdp) 2205 { 2206 struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp); 2207 int cpu = smp_processor_id(); 2208 struct netdev_queue *nq; 2209 struct igc_ring *ring; 2210 int res; 2211 2212 if (unlikely(!xdpf)) 2213 return -EFAULT; 2214 2215 ring = igc_xdp_get_tx_ring(adapter, cpu); 2216 nq = txring_txq(ring); 2217 2218 __netif_tx_lock(nq, cpu); 2219 res = igc_xdp_init_tx_descriptor(ring, xdpf); 2220 __netif_tx_unlock(nq); 2221 return res; 2222 } 2223 2224 /* This function assumes rcu_read_lock() is held by the caller. */ 2225 static int __igc_xdp_run_prog(struct igc_adapter *adapter, 2226 struct bpf_prog *prog, 2227 struct xdp_buff *xdp) 2228 { 2229 u32 act = bpf_prog_run_xdp(prog, xdp); 2230 2231 switch (act) { 2232 case XDP_PASS: 2233 return IGC_XDP_PASS; 2234 case XDP_TX: 2235 if (igc_xdp_xmit_back(adapter, xdp) < 0) 2236 goto out_failure; 2237 return IGC_XDP_TX; 2238 case XDP_REDIRECT: 2239 if (xdp_do_redirect(adapter->netdev, xdp, prog) < 0) 2240 goto out_failure; 2241 return IGC_XDP_REDIRECT; 2242 break; 2243 default: 2244 bpf_warn_invalid_xdp_action(adapter->netdev, prog, act); 2245 fallthrough; 2246 case XDP_ABORTED: 2247 out_failure: 2248 trace_xdp_exception(adapter->netdev, prog, act); 2249 fallthrough; 2250 case XDP_DROP: 2251 return IGC_XDP_CONSUMED; 2252 } 2253 } 2254 2255 static struct sk_buff *igc_xdp_run_prog(struct igc_adapter *adapter, 2256 struct xdp_buff *xdp) 2257 { 2258 struct bpf_prog *prog; 2259 int res; 2260 2261 prog = READ_ONCE(adapter->xdp_prog); 2262 if (!prog) { 2263 res = IGC_XDP_PASS; 2264 goto out; 2265 } 2266 2267 res = __igc_xdp_run_prog(adapter, prog, xdp); 2268 2269 out: 2270 return ERR_PTR(-res); 2271 } 2272 2273 /* This function assumes __netif_tx_lock is held by the caller. */ 2274 static void igc_flush_tx_descriptors(struct igc_ring *ring) 2275 { 2276 /* Once tail pointer is updated, hardware can fetch the descriptors 2277 * any time so we issue a write membar here to ensure all memory 2278 * writes are complete before the tail pointer is updated. 2279 */ 2280 wmb(); 2281 writel(ring->next_to_use, ring->tail); 2282 } 2283 2284 static void igc_finalize_xdp(struct igc_adapter *adapter, int status) 2285 { 2286 int cpu = smp_processor_id(); 2287 struct netdev_queue *nq; 2288 struct igc_ring *ring; 2289 2290 if (status & IGC_XDP_TX) { 2291 ring = igc_xdp_get_tx_ring(adapter, cpu); 2292 nq = txring_txq(ring); 2293 2294 __netif_tx_lock(nq, cpu); 2295 igc_flush_tx_descriptors(ring); 2296 __netif_tx_unlock(nq); 2297 } 2298 2299 if (status & IGC_XDP_REDIRECT) 2300 xdp_do_flush(); 2301 } 2302 2303 static void igc_update_rx_stats(struct igc_q_vector *q_vector, 2304 unsigned int packets, unsigned int bytes) 2305 { 2306 struct igc_ring *ring = q_vector->rx.ring; 2307 2308 u64_stats_update_begin(&ring->rx_syncp); 2309 ring->rx_stats.packets += packets; 2310 ring->rx_stats.bytes += bytes; 2311 u64_stats_update_end(&ring->rx_syncp); 2312 2313 q_vector->rx.total_packets += packets; 2314 q_vector->rx.total_bytes += bytes; 2315 } 2316 2317 static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) 2318 { 2319 unsigned int total_bytes = 0, total_packets = 0; 2320 struct igc_adapter *adapter = q_vector->adapter; 2321 struct igc_ring *rx_ring = q_vector->rx.ring; 2322 struct sk_buff *skb = rx_ring->skb; 2323 u16 cleaned_count = igc_desc_unused(rx_ring); 2324 int xdp_status = 0, rx_buffer_pgcnt; 2325 2326 while (likely(total_packets < budget)) { 2327 union igc_adv_rx_desc *rx_desc; 2328 struct igc_rx_buffer *rx_buffer; 2329 unsigned int size, truesize; 2330 ktime_t timestamp = 0; 2331 struct xdp_buff xdp; 2332 int pkt_offset = 0; 2333 void *pktbuf; 2334 2335 /* return some buffers to hardware, one at a time is too slow */ 2336 if (cleaned_count >= IGC_RX_BUFFER_WRITE) { 2337 igc_alloc_rx_buffers(rx_ring, cleaned_count); 2338 cleaned_count = 0; 2339 } 2340 2341 rx_desc = IGC_RX_DESC(rx_ring, rx_ring->next_to_clean); 2342 size = le16_to_cpu(rx_desc->wb.upper.length); 2343 if (!size) 2344 break; 2345 2346 /* This memory barrier is needed to keep us from reading 2347 * any other fields out of the rx_desc until we know the 2348 * descriptor has been written back 2349 */ 2350 dma_rmb(); 2351 2352 rx_buffer = igc_get_rx_buffer(rx_ring, size, &rx_buffer_pgcnt); 2353 truesize = igc_get_rx_frame_truesize(rx_ring, size); 2354 2355 pktbuf = page_address(rx_buffer->page) + rx_buffer->page_offset; 2356 2357 if (igc_test_staterr(rx_desc, IGC_RXDADV_STAT_TSIP)) { 2358 timestamp = igc_ptp_rx_pktstamp(q_vector->adapter, 2359 pktbuf); 2360 pkt_offset = IGC_TS_HDR_LEN; 2361 size -= IGC_TS_HDR_LEN; 2362 } 2363 2364 if (!skb) { 2365 xdp_init_buff(&xdp, truesize, &rx_ring->xdp_rxq); 2366 xdp_prepare_buff(&xdp, pktbuf - igc_rx_offset(rx_ring), 2367 igc_rx_offset(rx_ring) + pkt_offset, 2368 size, true); 2369 2370 skb = igc_xdp_run_prog(adapter, &xdp); 2371 } 2372 2373 if (IS_ERR(skb)) { 2374 unsigned int xdp_res = -PTR_ERR(skb); 2375 2376 switch (xdp_res) { 2377 case IGC_XDP_CONSUMED: 2378 rx_buffer->pagecnt_bias++; 2379 break; 2380 case IGC_XDP_TX: 2381 case IGC_XDP_REDIRECT: 2382 igc_rx_buffer_flip(rx_buffer, truesize); 2383 xdp_status |= xdp_res; 2384 break; 2385 } 2386 2387 total_packets++; 2388 total_bytes += size; 2389 } else if (skb) 2390 igc_add_rx_frag(rx_ring, rx_buffer, skb, size); 2391 else if (ring_uses_build_skb(rx_ring)) 2392 skb = igc_build_skb(rx_ring, rx_buffer, &xdp); 2393 else 2394 skb = igc_construct_skb(rx_ring, rx_buffer, &xdp, 2395 timestamp); 2396 2397 /* exit if we failed to retrieve a buffer */ 2398 if (!skb) { 2399 rx_ring->rx_stats.alloc_failed++; 2400 rx_buffer->pagecnt_bias++; 2401 break; 2402 } 2403 2404 igc_put_rx_buffer(rx_ring, rx_buffer, rx_buffer_pgcnt); 2405 cleaned_count++; 2406 2407 /* fetch next buffer in frame if non-eop */ 2408 if (igc_is_non_eop(rx_ring, rx_desc)) 2409 continue; 2410 2411 /* verify the packet layout is correct */ 2412 if (igc_cleanup_headers(rx_ring, rx_desc, skb)) { 2413 skb = NULL; 2414 continue; 2415 } 2416 2417 /* probably a little skewed due to removing CRC */ 2418 total_bytes += skb->len; 2419 2420 /* populate checksum, VLAN, and protocol */ 2421 igc_process_skb_fields(rx_ring, rx_desc, skb); 2422 2423 napi_gro_receive(&q_vector->napi, skb); 2424 2425 /* reset skb pointer */ 2426 skb = NULL; 2427 2428 /* update budget accounting */ 2429 total_packets++; 2430 } 2431 2432 if (xdp_status) 2433 igc_finalize_xdp(adapter, xdp_status); 2434 2435 /* place incomplete frames back on ring for completion */ 2436 rx_ring->skb = skb; 2437 2438 igc_update_rx_stats(q_vector, total_packets, total_bytes); 2439 2440 if (cleaned_count) 2441 igc_alloc_rx_buffers(rx_ring, cleaned_count); 2442 2443 return total_packets; 2444 } 2445 2446 static struct sk_buff *igc_construct_skb_zc(struct igc_ring *ring, 2447 struct xdp_buff *xdp) 2448 { 2449 unsigned int metasize = xdp->data - xdp->data_meta; 2450 unsigned int datasize = xdp->data_end - xdp->data; 2451 unsigned int totalsize = metasize + datasize; 2452 struct sk_buff *skb; 2453 2454 skb = __napi_alloc_skb(&ring->q_vector->napi, 2455 xdp->data_end - xdp->data_hard_start, 2456 GFP_ATOMIC | __GFP_NOWARN); 2457 if (unlikely(!skb)) 2458 return NULL; 2459 2460 skb_reserve(skb, xdp->data_meta - xdp->data_hard_start); 2461 memcpy(__skb_put(skb, totalsize), xdp->data_meta, totalsize); 2462 if (metasize) { 2463 skb_metadata_set(skb, metasize); 2464 __skb_pull(skb, metasize); 2465 } 2466 2467 return skb; 2468 } 2469 2470 static void igc_dispatch_skb_zc(struct igc_q_vector *q_vector, 2471 union igc_adv_rx_desc *desc, 2472 struct xdp_buff *xdp, 2473 ktime_t timestamp) 2474 { 2475 struct igc_ring *ring = q_vector->rx.ring; 2476 struct sk_buff *skb; 2477 2478 skb = igc_construct_skb_zc(ring, xdp); 2479 if (!skb) { 2480 ring->rx_stats.alloc_failed++; 2481 return; 2482 } 2483 2484 if (timestamp) 2485 skb_hwtstamps(skb)->hwtstamp = timestamp; 2486 2487 if (igc_cleanup_headers(ring, desc, skb)) 2488 return; 2489 2490 igc_process_skb_fields(ring, desc, skb); 2491 napi_gro_receive(&q_vector->napi, skb); 2492 } 2493 2494 static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget) 2495 { 2496 struct igc_adapter *adapter = q_vector->adapter; 2497 struct igc_ring *ring = q_vector->rx.ring; 2498 u16 cleaned_count = igc_desc_unused(ring); 2499 int total_bytes = 0, total_packets = 0; 2500 u16 ntc = ring->next_to_clean; 2501 struct bpf_prog *prog; 2502 bool failure = false; 2503 int xdp_status = 0; 2504 2505 rcu_read_lock(); 2506 2507 prog = READ_ONCE(adapter->xdp_prog); 2508 2509 while (likely(total_packets < budget)) { 2510 union igc_adv_rx_desc *desc; 2511 struct igc_rx_buffer *bi; 2512 ktime_t timestamp = 0; 2513 unsigned int size; 2514 int res; 2515 2516 desc = IGC_RX_DESC(ring, ntc); 2517 size = le16_to_cpu(desc->wb.upper.length); 2518 if (!size) 2519 break; 2520 2521 /* This memory barrier is needed to keep us from reading 2522 * any other fields out of the rx_desc until we know the 2523 * descriptor has been written back 2524 */ 2525 dma_rmb(); 2526 2527 bi = &ring->rx_buffer_info[ntc]; 2528 2529 if (igc_test_staterr(desc, IGC_RXDADV_STAT_TSIP)) { 2530 timestamp = igc_ptp_rx_pktstamp(q_vector->adapter, 2531 bi->xdp->data); 2532 2533 bi->xdp->data += IGC_TS_HDR_LEN; 2534 2535 /* HW timestamp has been copied into local variable. Metadata 2536 * length when XDP program is called should be 0. 2537 */ 2538 bi->xdp->data_meta += IGC_TS_HDR_LEN; 2539 size -= IGC_TS_HDR_LEN; 2540 } 2541 2542 bi->xdp->data_end = bi->xdp->data + size; 2543 xsk_buff_dma_sync_for_cpu(bi->xdp, ring->xsk_pool); 2544 2545 res = __igc_xdp_run_prog(adapter, prog, bi->xdp); 2546 switch (res) { 2547 case IGC_XDP_PASS: 2548 igc_dispatch_skb_zc(q_vector, desc, bi->xdp, timestamp); 2549 fallthrough; 2550 case IGC_XDP_CONSUMED: 2551 xsk_buff_free(bi->xdp); 2552 break; 2553 case IGC_XDP_TX: 2554 case IGC_XDP_REDIRECT: 2555 xdp_status |= res; 2556 break; 2557 } 2558 2559 bi->xdp = NULL; 2560 total_bytes += size; 2561 total_packets++; 2562 cleaned_count++; 2563 ntc++; 2564 if (ntc == ring->count) 2565 ntc = 0; 2566 } 2567 2568 ring->next_to_clean = ntc; 2569 rcu_read_unlock(); 2570 2571 if (cleaned_count >= IGC_RX_BUFFER_WRITE) 2572 failure = !igc_alloc_rx_buffers_zc(ring, cleaned_count); 2573 2574 if (xdp_status) 2575 igc_finalize_xdp(adapter, xdp_status); 2576 2577 igc_update_rx_stats(q_vector, total_packets, total_bytes); 2578 2579 if (xsk_uses_need_wakeup(ring->xsk_pool)) { 2580 if (failure || ring->next_to_clean == ring->next_to_use) 2581 xsk_set_rx_need_wakeup(ring->xsk_pool); 2582 else 2583 xsk_clear_rx_need_wakeup(ring->xsk_pool); 2584 return total_packets; 2585 } 2586 2587 return failure ? budget : total_packets; 2588 } 2589 2590 static void igc_update_tx_stats(struct igc_q_vector *q_vector, 2591 unsigned int packets, unsigned int bytes) 2592 { 2593 struct igc_ring *ring = q_vector->tx.ring; 2594 2595 u64_stats_update_begin(&ring->tx_syncp); 2596 ring->tx_stats.bytes += bytes; 2597 ring->tx_stats.packets += packets; 2598 u64_stats_update_end(&ring->tx_syncp); 2599 2600 q_vector->tx.total_bytes += bytes; 2601 q_vector->tx.total_packets += packets; 2602 } 2603 2604 static void igc_xdp_xmit_zc(struct igc_ring *ring) 2605 { 2606 struct xsk_buff_pool *pool = ring->xsk_pool; 2607 struct netdev_queue *nq = txring_txq(ring); 2608 union igc_adv_tx_desc *tx_desc = NULL; 2609 int cpu = smp_processor_id(); 2610 u16 ntu = ring->next_to_use; 2611 struct xdp_desc xdp_desc; 2612 u16 budget; 2613 2614 if (!netif_carrier_ok(ring->netdev)) 2615 return; 2616 2617 __netif_tx_lock(nq, cpu); 2618 2619 budget = igc_desc_unused(ring); 2620 2621 while (xsk_tx_peek_desc(pool, &xdp_desc) && budget--) { 2622 u32 cmd_type, olinfo_status; 2623 struct igc_tx_buffer *bi; 2624 dma_addr_t dma; 2625 2626 cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT | 2627 IGC_ADVTXD_DCMD_IFCS | IGC_TXD_DCMD | 2628 xdp_desc.len; 2629 olinfo_status = xdp_desc.len << IGC_ADVTXD_PAYLEN_SHIFT; 2630 2631 dma = xsk_buff_raw_get_dma(pool, xdp_desc.addr); 2632 xsk_buff_raw_dma_sync_for_device(pool, dma, xdp_desc.len); 2633 2634 tx_desc = IGC_TX_DESC(ring, ntu); 2635 tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); 2636 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status); 2637 tx_desc->read.buffer_addr = cpu_to_le64(dma); 2638 2639 bi = &ring->tx_buffer_info[ntu]; 2640 bi->type = IGC_TX_BUFFER_TYPE_XSK; 2641 bi->protocol = 0; 2642 bi->bytecount = xdp_desc.len; 2643 bi->gso_segs = 1; 2644 bi->time_stamp = jiffies; 2645 bi->next_to_watch = tx_desc; 2646 2647 netdev_tx_sent_queue(txring_txq(ring), xdp_desc.len); 2648 2649 ntu++; 2650 if (ntu == ring->count) 2651 ntu = 0; 2652 } 2653 2654 ring->next_to_use = ntu; 2655 if (tx_desc) { 2656 igc_flush_tx_descriptors(ring); 2657 xsk_tx_release(pool); 2658 } 2659 2660 __netif_tx_unlock(nq); 2661 } 2662 2663 /** 2664 * igc_clean_tx_irq - Reclaim resources after transmit completes 2665 * @q_vector: pointer to q_vector containing needed info 2666 * @napi_budget: Used to determine if we are in netpoll 2667 * 2668 * returns true if ring is completely cleaned 2669 */ 2670 static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget) 2671 { 2672 struct igc_adapter *adapter = q_vector->adapter; 2673 unsigned int total_bytes = 0, total_packets = 0; 2674 unsigned int budget = q_vector->tx.work_limit; 2675 struct igc_ring *tx_ring = q_vector->tx.ring; 2676 unsigned int i = tx_ring->next_to_clean; 2677 struct igc_tx_buffer *tx_buffer; 2678 union igc_adv_tx_desc *tx_desc; 2679 u32 xsk_frames = 0; 2680 2681 if (test_bit(__IGC_DOWN, &adapter->state)) 2682 return true; 2683 2684 tx_buffer = &tx_ring->tx_buffer_info[i]; 2685 tx_desc = IGC_TX_DESC(tx_ring, i); 2686 i -= tx_ring->count; 2687 2688 do { 2689 union igc_adv_tx_desc *eop_desc = tx_buffer->next_to_watch; 2690 2691 /* if next_to_watch is not set then there is no work pending */ 2692 if (!eop_desc) 2693 break; 2694 2695 /* prevent any other reads prior to eop_desc */ 2696 smp_rmb(); 2697 2698 /* if DD is not set pending work has not been completed */ 2699 if (!(eop_desc->wb.status & cpu_to_le32(IGC_TXD_STAT_DD))) 2700 break; 2701 2702 /* clear next_to_watch to prevent false hangs */ 2703 tx_buffer->next_to_watch = NULL; 2704 2705 /* update the statistics for this packet */ 2706 total_bytes += tx_buffer->bytecount; 2707 total_packets += tx_buffer->gso_segs; 2708 2709 switch (tx_buffer->type) { 2710 case IGC_TX_BUFFER_TYPE_XSK: 2711 xsk_frames++; 2712 break; 2713 case IGC_TX_BUFFER_TYPE_XDP: 2714 xdp_return_frame(tx_buffer->xdpf); 2715 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 2716 break; 2717 case IGC_TX_BUFFER_TYPE_SKB: 2718 napi_consume_skb(tx_buffer->skb, napi_budget); 2719 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 2720 break; 2721 default: 2722 netdev_warn_once(tx_ring->netdev, "Unknown Tx buffer type\n"); 2723 break; 2724 } 2725 2726 /* clear last DMA location and unmap remaining buffers */ 2727 while (tx_desc != eop_desc) { 2728 tx_buffer++; 2729 tx_desc++; 2730 i++; 2731 if (unlikely(!i)) { 2732 i -= tx_ring->count; 2733 tx_buffer = tx_ring->tx_buffer_info; 2734 tx_desc = IGC_TX_DESC(tx_ring, 0); 2735 } 2736 2737 /* unmap any remaining paged data */ 2738 if (dma_unmap_len(tx_buffer, len)) 2739 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 2740 } 2741 2742 /* move us one more past the eop_desc for start of next pkt */ 2743 tx_buffer++; 2744 tx_desc++; 2745 i++; 2746 if (unlikely(!i)) { 2747 i -= tx_ring->count; 2748 tx_buffer = tx_ring->tx_buffer_info; 2749 tx_desc = IGC_TX_DESC(tx_ring, 0); 2750 } 2751 2752 /* issue prefetch for next Tx descriptor */ 2753 prefetch(tx_desc); 2754 2755 /* update budget accounting */ 2756 budget--; 2757 } while (likely(budget)); 2758 2759 netdev_tx_completed_queue(txring_txq(tx_ring), 2760 total_packets, total_bytes); 2761 2762 i += tx_ring->count; 2763 tx_ring->next_to_clean = i; 2764 2765 igc_update_tx_stats(q_vector, total_packets, total_bytes); 2766 2767 if (tx_ring->xsk_pool) { 2768 if (xsk_frames) 2769 xsk_tx_completed(tx_ring->xsk_pool, xsk_frames); 2770 if (xsk_uses_need_wakeup(tx_ring->xsk_pool)) 2771 xsk_set_tx_need_wakeup(tx_ring->xsk_pool); 2772 igc_xdp_xmit_zc(tx_ring); 2773 } 2774 2775 if (test_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) { 2776 struct igc_hw *hw = &adapter->hw; 2777 2778 /* Detect a transmit hang in hardware, this serializes the 2779 * check with the clearing of time_stamp and movement of i 2780 */ 2781 clear_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags); 2782 if (tx_buffer->next_to_watch && 2783 time_after(jiffies, tx_buffer->time_stamp + 2784 (adapter->tx_timeout_factor * HZ)) && 2785 !(rd32(IGC_STATUS) & IGC_STATUS_TXOFF)) { 2786 /* detected Tx unit hang */ 2787 netdev_err(tx_ring->netdev, 2788 "Detected Tx Unit Hang\n" 2789 " Tx Queue <%d>\n" 2790 " TDH <%x>\n" 2791 " TDT <%x>\n" 2792 " next_to_use <%x>\n" 2793 " next_to_clean <%x>\n" 2794 "buffer_info[next_to_clean]\n" 2795 " time_stamp <%lx>\n" 2796 " next_to_watch <%p>\n" 2797 " jiffies <%lx>\n" 2798 " desc.status <%x>\n", 2799 tx_ring->queue_index, 2800 rd32(IGC_TDH(tx_ring->reg_idx)), 2801 readl(tx_ring->tail), 2802 tx_ring->next_to_use, 2803 tx_ring->next_to_clean, 2804 tx_buffer->time_stamp, 2805 tx_buffer->next_to_watch, 2806 jiffies, 2807 tx_buffer->next_to_watch->wb.status); 2808 netif_stop_subqueue(tx_ring->netdev, 2809 tx_ring->queue_index); 2810 2811 /* we are about to reset, no point in enabling stuff */ 2812 return true; 2813 } 2814 } 2815 2816 #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) 2817 if (unlikely(total_packets && 2818 netif_carrier_ok(tx_ring->netdev) && 2819 igc_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD)) { 2820 /* Make sure that anybody stopping the queue after this 2821 * sees the new next_to_clean. 2822 */ 2823 smp_mb(); 2824 if (__netif_subqueue_stopped(tx_ring->netdev, 2825 tx_ring->queue_index) && 2826 !(test_bit(__IGC_DOWN, &adapter->state))) { 2827 netif_wake_subqueue(tx_ring->netdev, 2828 tx_ring->queue_index); 2829 2830 u64_stats_update_begin(&tx_ring->tx_syncp); 2831 tx_ring->tx_stats.restart_queue++; 2832 u64_stats_update_end(&tx_ring->tx_syncp); 2833 } 2834 } 2835 2836 return !!budget; 2837 } 2838 2839 static int igc_find_mac_filter(struct igc_adapter *adapter, 2840 enum igc_mac_filter_type type, const u8 *addr) 2841 { 2842 struct igc_hw *hw = &adapter->hw; 2843 int max_entries = hw->mac.rar_entry_count; 2844 u32 ral, rah; 2845 int i; 2846 2847 for (i = 0; i < max_entries; i++) { 2848 ral = rd32(IGC_RAL(i)); 2849 rah = rd32(IGC_RAH(i)); 2850 2851 if (!(rah & IGC_RAH_AV)) 2852 continue; 2853 if (!!(rah & IGC_RAH_ASEL_SRC_ADDR) != type) 2854 continue; 2855 if ((rah & IGC_RAH_RAH_MASK) != 2856 le16_to_cpup((__le16 *)(addr + 4))) 2857 continue; 2858 if (ral != le32_to_cpup((__le32 *)(addr))) 2859 continue; 2860 2861 return i; 2862 } 2863 2864 return -1; 2865 } 2866 2867 static int igc_get_avail_mac_filter_slot(struct igc_adapter *adapter) 2868 { 2869 struct igc_hw *hw = &adapter->hw; 2870 int max_entries = hw->mac.rar_entry_count; 2871 u32 rah; 2872 int i; 2873 2874 for (i = 0; i < max_entries; i++) { 2875 rah = rd32(IGC_RAH(i)); 2876 2877 if (!(rah & IGC_RAH_AV)) 2878 return i; 2879 } 2880 2881 return -1; 2882 } 2883 2884 /** 2885 * igc_add_mac_filter() - Add MAC address filter 2886 * @adapter: Pointer to adapter where the filter should be added 2887 * @type: MAC address filter type (source or destination) 2888 * @addr: MAC address 2889 * @queue: If non-negative, queue assignment feature is enabled and frames 2890 * matching the filter are enqueued onto 'queue'. Otherwise, queue 2891 * assignment is disabled. 2892 * 2893 * Return: 0 in case of success, negative errno code otherwise. 2894 */ 2895 static int igc_add_mac_filter(struct igc_adapter *adapter, 2896 enum igc_mac_filter_type type, const u8 *addr, 2897 int queue) 2898 { 2899 struct net_device *dev = adapter->netdev; 2900 int index; 2901 2902 index = igc_find_mac_filter(adapter, type, addr); 2903 if (index >= 0) 2904 goto update_filter; 2905 2906 index = igc_get_avail_mac_filter_slot(adapter); 2907 if (index < 0) 2908 return -ENOSPC; 2909 2910 netdev_dbg(dev, "Add MAC address filter: index %d type %s address %pM queue %d\n", 2911 index, type == IGC_MAC_FILTER_TYPE_DST ? "dst" : "src", 2912 addr, queue); 2913 2914 update_filter: 2915 igc_set_mac_filter_hw(adapter, index, type, addr, queue); 2916 return 0; 2917 } 2918 2919 /** 2920 * igc_del_mac_filter() - Delete MAC address filter 2921 * @adapter: Pointer to adapter where the filter should be deleted from 2922 * @type: MAC address filter type (source or destination) 2923 * @addr: MAC address 2924 */ 2925 static void igc_del_mac_filter(struct igc_adapter *adapter, 2926 enum igc_mac_filter_type type, const u8 *addr) 2927 { 2928 struct net_device *dev = adapter->netdev; 2929 int index; 2930 2931 index = igc_find_mac_filter(adapter, type, addr); 2932 if (index < 0) 2933 return; 2934 2935 if (index == 0) { 2936 /* If this is the default filter, we don't actually delete it. 2937 * We just reset to its default value i.e. disable queue 2938 * assignment. 2939 */ 2940 netdev_dbg(dev, "Disable default MAC filter queue assignment"); 2941 2942 igc_set_mac_filter_hw(adapter, 0, type, addr, -1); 2943 } else { 2944 netdev_dbg(dev, "Delete MAC address filter: index %d type %s address %pM\n", 2945 index, 2946 type == IGC_MAC_FILTER_TYPE_DST ? "dst" : "src", 2947 addr); 2948 2949 igc_clear_mac_filter_hw(adapter, index); 2950 } 2951 } 2952 2953 /** 2954 * igc_add_vlan_prio_filter() - Add VLAN priority filter 2955 * @adapter: Pointer to adapter where the filter should be added 2956 * @prio: VLAN priority value 2957 * @queue: Queue number which matching frames are assigned to 2958 * 2959 * Return: 0 in case of success, negative errno code otherwise. 2960 */ 2961 static int igc_add_vlan_prio_filter(struct igc_adapter *adapter, int prio, 2962 int queue) 2963 { 2964 struct net_device *dev = adapter->netdev; 2965 struct igc_hw *hw = &adapter->hw; 2966 u32 vlanpqf; 2967 2968 vlanpqf = rd32(IGC_VLANPQF); 2969 2970 if (vlanpqf & IGC_VLANPQF_VALID(prio)) { 2971 netdev_dbg(dev, "VLAN priority filter already in use\n"); 2972 return -EEXIST; 2973 } 2974 2975 vlanpqf |= IGC_VLANPQF_QSEL(prio, queue); 2976 vlanpqf |= IGC_VLANPQF_VALID(prio); 2977 2978 wr32(IGC_VLANPQF, vlanpqf); 2979 2980 netdev_dbg(dev, "Add VLAN priority filter: prio %d queue %d\n", 2981 prio, queue); 2982 return 0; 2983 } 2984 2985 /** 2986 * igc_del_vlan_prio_filter() - Delete VLAN priority filter 2987 * @adapter: Pointer to adapter where the filter should be deleted from 2988 * @prio: VLAN priority value 2989 */ 2990 static void igc_del_vlan_prio_filter(struct igc_adapter *adapter, int prio) 2991 { 2992 struct igc_hw *hw = &adapter->hw; 2993 u32 vlanpqf; 2994 2995 vlanpqf = rd32(IGC_VLANPQF); 2996 2997 vlanpqf &= ~IGC_VLANPQF_VALID(prio); 2998 vlanpqf &= ~IGC_VLANPQF_QSEL(prio, IGC_VLANPQF_QUEUE_MASK); 2999 3000 wr32(IGC_VLANPQF, vlanpqf); 3001 3002 netdev_dbg(adapter->netdev, "Delete VLAN priority filter: prio %d\n", 3003 prio); 3004 } 3005 3006 static int igc_get_avail_etype_filter_slot(struct igc_adapter *adapter) 3007 { 3008 struct igc_hw *hw = &adapter->hw; 3009 int i; 3010 3011 for (i = 0; i < MAX_ETYPE_FILTER; i++) { 3012 u32 etqf = rd32(IGC_ETQF(i)); 3013 3014 if (!(etqf & IGC_ETQF_FILTER_ENABLE)) 3015 return i; 3016 } 3017 3018 return -1; 3019 } 3020 3021 /** 3022 * igc_add_etype_filter() - Add ethertype filter 3023 * @adapter: Pointer to adapter where the filter should be added 3024 * @etype: Ethertype value 3025 * @queue: If non-negative, queue assignment feature is enabled and frames 3026 * matching the filter are enqueued onto 'queue'. Otherwise, queue 3027 * assignment is disabled. 3028 * 3029 * Return: 0 in case of success, negative errno code otherwise. 3030 */ 3031 static int igc_add_etype_filter(struct igc_adapter *adapter, u16 etype, 3032 int queue) 3033 { 3034 struct igc_hw *hw = &adapter->hw; 3035 int index; 3036 u32 etqf; 3037 3038 index = igc_get_avail_etype_filter_slot(adapter); 3039 if (index < 0) 3040 return -ENOSPC; 3041 3042 etqf = rd32(IGC_ETQF(index)); 3043 3044 etqf &= ~IGC_ETQF_ETYPE_MASK; 3045 etqf |= etype; 3046 3047 if (queue >= 0) { 3048 etqf &= ~IGC_ETQF_QUEUE_MASK; 3049 etqf |= (queue << IGC_ETQF_QUEUE_SHIFT); 3050 etqf |= IGC_ETQF_QUEUE_ENABLE; 3051 } 3052 3053 etqf |= IGC_ETQF_FILTER_ENABLE; 3054 3055 wr32(IGC_ETQF(index), etqf); 3056 3057 netdev_dbg(adapter->netdev, "Add ethertype filter: etype %04x queue %d\n", 3058 etype, queue); 3059 return 0; 3060 } 3061 3062 static int igc_find_etype_filter(struct igc_adapter *adapter, u16 etype) 3063 { 3064 struct igc_hw *hw = &adapter->hw; 3065 int i; 3066 3067 for (i = 0; i < MAX_ETYPE_FILTER; i++) { 3068 u32 etqf = rd32(IGC_ETQF(i)); 3069 3070 if ((etqf & IGC_ETQF_ETYPE_MASK) == etype) 3071 return i; 3072 } 3073 3074 return -1; 3075 } 3076 3077 /** 3078 * igc_del_etype_filter() - Delete ethertype filter 3079 * @adapter: Pointer to adapter where the filter should be deleted from 3080 * @etype: Ethertype value 3081 */ 3082 static void igc_del_etype_filter(struct igc_adapter *adapter, u16 etype) 3083 { 3084 struct igc_hw *hw = &adapter->hw; 3085 int index; 3086 3087 index = igc_find_etype_filter(adapter, etype); 3088 if (index < 0) 3089 return; 3090 3091 wr32(IGC_ETQF(index), 0); 3092 3093 netdev_dbg(adapter->netdev, "Delete ethertype filter: etype %04x\n", 3094 etype); 3095 } 3096 3097 static int igc_flex_filter_select(struct igc_adapter *adapter, 3098 struct igc_flex_filter *input, 3099 u32 *fhft) 3100 { 3101 struct igc_hw *hw = &adapter->hw; 3102 u8 fhft_index; 3103 u32 fhftsl; 3104 3105 if (input->index >= MAX_FLEX_FILTER) { 3106 dev_err(&adapter->pdev->dev, "Wrong Flex Filter index selected!\n"); 3107 return -EINVAL; 3108 } 3109 3110 /* Indirect table select register */ 3111 fhftsl = rd32(IGC_FHFTSL); 3112 fhftsl &= ~IGC_FHFTSL_FTSL_MASK; 3113 switch (input->index) { 3114 case 0 ... 7: 3115 fhftsl |= 0x00; 3116 break; 3117 case 8 ... 15: 3118 fhftsl |= 0x01; 3119 break; 3120 case 16 ... 23: 3121 fhftsl |= 0x02; 3122 break; 3123 case 24 ... 31: 3124 fhftsl |= 0x03; 3125 break; 3126 } 3127 wr32(IGC_FHFTSL, fhftsl); 3128 3129 /* Normalize index down to host table register */ 3130 fhft_index = input->index % 8; 3131 3132 *fhft = (fhft_index < 4) ? IGC_FHFT(fhft_index) : 3133 IGC_FHFT_EXT(fhft_index - 4); 3134 3135 return 0; 3136 } 3137 3138 static int igc_write_flex_filter_ll(struct igc_adapter *adapter, 3139 struct igc_flex_filter *input) 3140 { 3141 struct device *dev = &adapter->pdev->dev; 3142 struct igc_hw *hw = &adapter->hw; 3143 u8 *data = input->data; 3144 u8 *mask = input->mask; 3145 u32 queuing; 3146 u32 fhft; 3147 u32 wufc; 3148 int ret; 3149 int i; 3150 3151 /* Length has to be aligned to 8. Otherwise the filter will fail. Bail 3152 * out early to avoid surprises later. 3153 */ 3154 if (input->length % 8 != 0) { 3155 dev_err(dev, "The length of a flex filter has to be 8 byte aligned!\n"); 3156 return -EINVAL; 3157 } 3158 3159 /* Select corresponding flex filter register and get base for host table. */ 3160 ret = igc_flex_filter_select(adapter, input, &fhft); 3161 if (ret) 3162 return ret; 3163 3164 /* When adding a filter globally disable flex filter feature. That is 3165 * recommended within the datasheet. 3166 */ 3167 wufc = rd32(IGC_WUFC); 3168 wufc &= ~IGC_WUFC_FLEX_HQ; 3169 wr32(IGC_WUFC, wufc); 3170 3171 /* Configure filter */ 3172 queuing = input->length & IGC_FHFT_LENGTH_MASK; 3173 queuing |= (input->rx_queue << IGC_FHFT_QUEUE_SHIFT) & IGC_FHFT_QUEUE_MASK; 3174 queuing |= (input->prio << IGC_FHFT_PRIO_SHIFT) & IGC_FHFT_PRIO_MASK; 3175 3176 if (input->immediate_irq) 3177 queuing |= IGC_FHFT_IMM_INT; 3178 3179 if (input->drop) 3180 queuing |= IGC_FHFT_DROP; 3181 3182 wr32(fhft + 0xFC, queuing); 3183 3184 /* Write data (128 byte) and mask (128 bit) */ 3185 for (i = 0; i < 16; ++i) { 3186 const size_t data_idx = i * 8; 3187 const size_t row_idx = i * 16; 3188 u32 dw0 = 3189 (data[data_idx + 0] << 0) | 3190 (data[data_idx + 1] << 8) | 3191 (data[data_idx + 2] << 16) | 3192 (data[data_idx + 3] << 24); 3193 u32 dw1 = 3194 (data[data_idx + 4] << 0) | 3195 (data[data_idx + 5] << 8) | 3196 (data[data_idx + 6] << 16) | 3197 (data[data_idx + 7] << 24); 3198 u32 tmp; 3199 3200 /* Write row: dw0, dw1 and mask */ 3201 wr32(fhft + row_idx, dw0); 3202 wr32(fhft + row_idx + 4, dw1); 3203 3204 /* mask is only valid for MASK(7, 0) */ 3205 tmp = rd32(fhft + row_idx + 8); 3206 tmp &= ~GENMASK(7, 0); 3207 tmp |= mask[i]; 3208 wr32(fhft + row_idx + 8, tmp); 3209 } 3210 3211 /* Enable filter. */ 3212 wufc |= IGC_WUFC_FLEX_HQ; 3213 if (input->index > 8) { 3214 /* Filter 0-7 are enabled via WUFC. The other 24 filters are not. */ 3215 u32 wufc_ext = rd32(IGC_WUFC_EXT); 3216 3217 wufc_ext |= (IGC_WUFC_EXT_FLX8 << (input->index - 8)); 3218 3219 wr32(IGC_WUFC_EXT, wufc_ext); 3220 } else { 3221 wufc |= (IGC_WUFC_FLX0 << input->index); 3222 } 3223 wr32(IGC_WUFC, wufc); 3224 3225 dev_dbg(&adapter->pdev->dev, "Added flex filter %u to HW.\n", 3226 input->index); 3227 3228 return 0; 3229 } 3230 3231 static void igc_flex_filter_add_field(struct igc_flex_filter *flex, 3232 const void *src, unsigned int offset, 3233 size_t len, const void *mask) 3234 { 3235 int i; 3236 3237 /* data */ 3238 memcpy(&flex->data[offset], src, len); 3239 3240 /* mask */ 3241 for (i = 0; i < len; ++i) { 3242 const unsigned int idx = i + offset; 3243 const u8 *ptr = mask; 3244 3245 if (mask) { 3246 if (ptr[i] & 0xff) 3247 flex->mask[idx / 8] |= BIT(idx % 8); 3248 3249 continue; 3250 } 3251 3252 flex->mask[idx / 8] |= BIT(idx % 8); 3253 } 3254 } 3255 3256 static int igc_find_avail_flex_filter_slot(struct igc_adapter *adapter) 3257 { 3258 struct igc_hw *hw = &adapter->hw; 3259 u32 wufc, wufc_ext; 3260 int i; 3261 3262 wufc = rd32(IGC_WUFC); 3263 wufc_ext = rd32(IGC_WUFC_EXT); 3264 3265 for (i = 0; i < MAX_FLEX_FILTER; i++) { 3266 if (i < 8) { 3267 if (!(wufc & (IGC_WUFC_FLX0 << i))) 3268 return i; 3269 } else { 3270 if (!(wufc_ext & (IGC_WUFC_EXT_FLX8 << (i - 8)))) 3271 return i; 3272 } 3273 } 3274 3275 return -ENOSPC; 3276 } 3277 3278 static bool igc_flex_filter_in_use(struct igc_adapter *adapter) 3279 { 3280 struct igc_hw *hw = &adapter->hw; 3281 u32 wufc, wufc_ext; 3282 3283 wufc = rd32(IGC_WUFC); 3284 wufc_ext = rd32(IGC_WUFC_EXT); 3285 3286 if (wufc & IGC_WUFC_FILTER_MASK) 3287 return true; 3288 3289 if (wufc_ext & IGC_WUFC_EXT_FILTER_MASK) 3290 return true; 3291 3292 return false; 3293 } 3294 3295 static int igc_add_flex_filter(struct igc_adapter *adapter, 3296 struct igc_nfc_rule *rule) 3297 { 3298 struct igc_flex_filter flex = { }; 3299 struct igc_nfc_filter *filter = &rule->filter; 3300 unsigned int eth_offset, user_offset; 3301 int ret, index; 3302 bool vlan; 3303 3304 index = igc_find_avail_flex_filter_slot(adapter); 3305 if (index < 0) 3306 return -ENOSPC; 3307 3308 /* Construct the flex filter: 3309 * -> dest_mac [6] 3310 * -> src_mac [6] 3311 * -> tpid [2] 3312 * -> vlan tci [2] 3313 * -> ether type [2] 3314 * -> user data [8] 3315 * -> = 26 bytes => 32 length 3316 */ 3317 flex.index = index; 3318 flex.length = 32; 3319 flex.rx_queue = rule->action; 3320 3321 vlan = rule->filter.vlan_tci || rule->filter.vlan_etype; 3322 eth_offset = vlan ? 16 : 12; 3323 user_offset = vlan ? 18 : 14; 3324 3325 /* Add destination MAC */ 3326 if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) 3327 igc_flex_filter_add_field(&flex, &filter->dst_addr, 0, 3328 ETH_ALEN, NULL); 3329 3330 /* Add source MAC */ 3331 if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) 3332 igc_flex_filter_add_field(&flex, &filter->src_addr, 6, 3333 ETH_ALEN, NULL); 3334 3335 /* Add VLAN etype */ 3336 if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_ETYPE) 3337 igc_flex_filter_add_field(&flex, &filter->vlan_etype, 12, 3338 sizeof(filter->vlan_etype), 3339 NULL); 3340 3341 /* Add VLAN TCI */ 3342 if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) 3343 igc_flex_filter_add_field(&flex, &filter->vlan_tci, 14, 3344 sizeof(filter->vlan_tci), NULL); 3345 3346 /* Add Ether type */ 3347 if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) { 3348 __be16 etype = cpu_to_be16(filter->etype); 3349 3350 igc_flex_filter_add_field(&flex, &etype, eth_offset, 3351 sizeof(etype), NULL); 3352 } 3353 3354 /* Add user data */ 3355 if (rule->filter.match_flags & IGC_FILTER_FLAG_USER_DATA) 3356 igc_flex_filter_add_field(&flex, &filter->user_data, 3357 user_offset, 3358 sizeof(filter->user_data), 3359 filter->user_mask); 3360 3361 /* Add it down to the hardware and enable it. */ 3362 ret = igc_write_flex_filter_ll(adapter, &flex); 3363 if (ret) 3364 return ret; 3365 3366 filter->flex_index = index; 3367 3368 return 0; 3369 } 3370 3371 static void igc_del_flex_filter(struct igc_adapter *adapter, 3372 u16 reg_index) 3373 { 3374 struct igc_hw *hw = &adapter->hw; 3375 u32 wufc; 3376 3377 /* Just disable the filter. The filter table itself is kept 3378 * intact. Another flex_filter_add() should override the "old" data 3379 * then. 3380 */ 3381 if (reg_index > 8) { 3382 u32 wufc_ext = rd32(IGC_WUFC_EXT); 3383 3384 wufc_ext &= ~(IGC_WUFC_EXT_FLX8 << (reg_index - 8)); 3385 wr32(IGC_WUFC_EXT, wufc_ext); 3386 } else { 3387 wufc = rd32(IGC_WUFC); 3388 3389 wufc &= ~(IGC_WUFC_FLX0 << reg_index); 3390 wr32(IGC_WUFC, wufc); 3391 } 3392 3393 if (igc_flex_filter_in_use(adapter)) 3394 return; 3395 3396 /* No filters are in use, we may disable flex filters */ 3397 wufc = rd32(IGC_WUFC); 3398 wufc &= ~IGC_WUFC_FLEX_HQ; 3399 wr32(IGC_WUFC, wufc); 3400 } 3401 3402 static int igc_enable_nfc_rule(struct igc_adapter *adapter, 3403 struct igc_nfc_rule *rule) 3404 { 3405 int err; 3406 3407 if (rule->flex) { 3408 return igc_add_flex_filter(adapter, rule); 3409 } 3410 3411 if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) { 3412 err = igc_add_etype_filter(adapter, rule->filter.etype, 3413 rule->action); 3414 if (err) 3415 return err; 3416 } 3417 3418 if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) { 3419 err = igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_SRC, 3420 rule->filter.src_addr, rule->action); 3421 if (err) 3422 return err; 3423 } 3424 3425 if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) { 3426 err = igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, 3427 rule->filter.dst_addr, rule->action); 3428 if (err) 3429 return err; 3430 } 3431 3432 if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) { 3433 int prio = (rule->filter.vlan_tci & VLAN_PRIO_MASK) >> 3434 VLAN_PRIO_SHIFT; 3435 3436 err = igc_add_vlan_prio_filter(adapter, prio, rule->action); 3437 if (err) 3438 return err; 3439 } 3440 3441 return 0; 3442 } 3443 3444 static void igc_disable_nfc_rule(struct igc_adapter *adapter, 3445 const struct igc_nfc_rule *rule) 3446 { 3447 if (rule->flex) { 3448 igc_del_flex_filter(adapter, rule->filter.flex_index); 3449 return; 3450 } 3451 3452 if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) 3453 igc_del_etype_filter(adapter, rule->filter.etype); 3454 3455 if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) { 3456 int prio = (rule->filter.vlan_tci & VLAN_PRIO_MASK) >> 3457 VLAN_PRIO_SHIFT; 3458 3459 igc_del_vlan_prio_filter(adapter, prio); 3460 } 3461 3462 if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) 3463 igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_SRC, 3464 rule->filter.src_addr); 3465 3466 if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) 3467 igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, 3468 rule->filter.dst_addr); 3469 } 3470 3471 /** 3472 * igc_get_nfc_rule() - Get NFC rule 3473 * @adapter: Pointer to adapter 3474 * @location: Rule location 3475 * 3476 * Context: Expects adapter->nfc_rule_lock to be held by caller. 3477 * 3478 * Return: Pointer to NFC rule at @location. If not found, NULL. 3479 */ 3480 struct igc_nfc_rule *igc_get_nfc_rule(struct igc_adapter *adapter, 3481 u32 location) 3482 { 3483 struct igc_nfc_rule *rule; 3484 3485 list_for_each_entry(rule, &adapter->nfc_rule_list, list) { 3486 if (rule->location == location) 3487 return rule; 3488 if (rule->location > location) 3489 break; 3490 } 3491 3492 return NULL; 3493 } 3494 3495 /** 3496 * igc_del_nfc_rule() - Delete NFC rule 3497 * @adapter: Pointer to adapter 3498 * @rule: Pointer to rule to be deleted 3499 * 3500 * Disable NFC rule in hardware and delete it from adapter. 3501 * 3502 * Context: Expects adapter->nfc_rule_lock to be held by caller. 3503 */ 3504 void igc_del_nfc_rule(struct igc_adapter *adapter, struct igc_nfc_rule *rule) 3505 { 3506 igc_disable_nfc_rule(adapter, rule); 3507 3508 list_del(&rule->list); 3509 adapter->nfc_rule_count--; 3510 3511 kfree(rule); 3512 } 3513 3514 static void igc_flush_nfc_rules(struct igc_adapter *adapter) 3515 { 3516 struct igc_nfc_rule *rule, *tmp; 3517 3518 mutex_lock(&adapter->nfc_rule_lock); 3519 3520 list_for_each_entry_safe(rule, tmp, &adapter->nfc_rule_list, list) 3521 igc_del_nfc_rule(adapter, rule); 3522 3523 mutex_unlock(&adapter->nfc_rule_lock); 3524 } 3525 3526 /** 3527 * igc_add_nfc_rule() - Add NFC rule 3528 * @adapter: Pointer to adapter 3529 * @rule: Pointer to rule to be added 3530 * 3531 * Enable NFC rule in hardware and add it to adapter. 3532 * 3533 * Context: Expects adapter->nfc_rule_lock to be held by caller. 3534 * 3535 * Return: 0 on success, negative errno on failure. 3536 */ 3537 int igc_add_nfc_rule(struct igc_adapter *adapter, struct igc_nfc_rule *rule) 3538 { 3539 struct igc_nfc_rule *pred, *cur; 3540 int err; 3541 3542 err = igc_enable_nfc_rule(adapter, rule); 3543 if (err) 3544 return err; 3545 3546 pred = NULL; 3547 list_for_each_entry(cur, &adapter->nfc_rule_list, list) { 3548 if (cur->location >= rule->location) 3549 break; 3550 pred = cur; 3551 } 3552 3553 list_add(&rule->list, pred ? &pred->list : &adapter->nfc_rule_list); 3554 adapter->nfc_rule_count++; 3555 return 0; 3556 } 3557 3558 static void igc_restore_nfc_rules(struct igc_adapter *adapter) 3559 { 3560 struct igc_nfc_rule *rule; 3561 3562 mutex_lock(&adapter->nfc_rule_lock); 3563 3564 list_for_each_entry_reverse(rule, &adapter->nfc_rule_list, list) 3565 igc_enable_nfc_rule(adapter, rule); 3566 3567 mutex_unlock(&adapter->nfc_rule_lock); 3568 } 3569 3570 static int igc_uc_sync(struct net_device *netdev, const unsigned char *addr) 3571 { 3572 struct igc_adapter *adapter = netdev_priv(netdev); 3573 3574 return igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, addr, -1); 3575 } 3576 3577 static int igc_uc_unsync(struct net_device *netdev, const unsigned char *addr) 3578 { 3579 struct igc_adapter *adapter = netdev_priv(netdev); 3580 3581 igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, addr); 3582 return 0; 3583 } 3584 3585 /** 3586 * igc_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set 3587 * @netdev: network interface device structure 3588 * 3589 * The set_rx_mode entry point is called whenever the unicast or multicast 3590 * address lists or the network interface flags are updated. This routine is 3591 * responsible for configuring the hardware for proper unicast, multicast, 3592 * promiscuous mode, and all-multi behavior. 3593 */ 3594 static void igc_set_rx_mode(struct net_device *netdev) 3595 { 3596 struct igc_adapter *adapter = netdev_priv(netdev); 3597 struct igc_hw *hw = &adapter->hw; 3598 u32 rctl = 0, rlpml = MAX_JUMBO_FRAME_SIZE; 3599 int count; 3600 3601 /* Check for Promiscuous and All Multicast modes */ 3602 if (netdev->flags & IFF_PROMISC) { 3603 rctl |= IGC_RCTL_UPE | IGC_RCTL_MPE; 3604 } else { 3605 if (netdev->flags & IFF_ALLMULTI) { 3606 rctl |= IGC_RCTL_MPE; 3607 } else { 3608 /* Write addresses to the MTA, if the attempt fails 3609 * then we should just turn on promiscuous mode so 3610 * that we can at least receive multicast traffic 3611 */ 3612 count = igc_write_mc_addr_list(netdev); 3613 if (count < 0) 3614 rctl |= IGC_RCTL_MPE; 3615 } 3616 } 3617 3618 /* Write addresses to available RAR registers, if there is not 3619 * sufficient space to store all the addresses then enable 3620 * unicast promiscuous mode 3621 */ 3622 if (__dev_uc_sync(netdev, igc_uc_sync, igc_uc_unsync)) 3623 rctl |= IGC_RCTL_UPE; 3624 3625 /* update state of unicast and multicast */ 3626 rctl |= rd32(IGC_RCTL) & ~(IGC_RCTL_UPE | IGC_RCTL_MPE); 3627 wr32(IGC_RCTL, rctl); 3628 3629 #if (PAGE_SIZE < 8192) 3630 if (adapter->max_frame_size <= IGC_MAX_FRAME_BUILD_SKB) 3631 rlpml = IGC_MAX_FRAME_BUILD_SKB; 3632 #endif 3633 wr32(IGC_RLPML, rlpml); 3634 } 3635 3636 /** 3637 * igc_configure - configure the hardware for RX and TX 3638 * @adapter: private board structure 3639 */ 3640 static void igc_configure(struct igc_adapter *adapter) 3641 { 3642 struct net_device *netdev = adapter->netdev; 3643 int i = 0; 3644 3645 igc_get_hw_control(adapter); 3646 igc_set_rx_mode(netdev); 3647 3648 igc_restore_vlan(adapter); 3649 3650 igc_setup_tctl(adapter); 3651 igc_setup_mrqc(adapter); 3652 igc_setup_rctl(adapter); 3653 3654 igc_set_default_mac_filter(adapter); 3655 igc_restore_nfc_rules(adapter); 3656 3657 igc_configure_tx(adapter); 3658 igc_configure_rx(adapter); 3659 3660 igc_rx_fifo_flush_base(&adapter->hw); 3661 3662 /* call igc_desc_unused which always leaves 3663 * at least 1 descriptor unused to make sure 3664 * next_to_use != next_to_clean 3665 */ 3666 for (i = 0; i < adapter->num_rx_queues; i++) { 3667 struct igc_ring *ring = adapter->rx_ring[i]; 3668 3669 if (ring->xsk_pool) 3670 igc_alloc_rx_buffers_zc(ring, igc_desc_unused(ring)); 3671 else 3672 igc_alloc_rx_buffers(ring, igc_desc_unused(ring)); 3673 } 3674 } 3675 3676 /** 3677 * igc_write_ivar - configure ivar for given MSI-X vector 3678 * @hw: pointer to the HW structure 3679 * @msix_vector: vector number we are allocating to a given ring 3680 * @index: row index of IVAR register to write within IVAR table 3681 * @offset: column offset of in IVAR, should be multiple of 8 3682 * 3683 * The IVAR table consists of 2 columns, 3684 * each containing an cause allocation for an Rx and Tx ring, and a 3685 * variable number of rows depending on the number of queues supported. 3686 */ 3687 static void igc_write_ivar(struct igc_hw *hw, int msix_vector, 3688 int index, int offset) 3689 { 3690 u32 ivar = array_rd32(IGC_IVAR0, index); 3691 3692 /* clear any bits that are currently set */ 3693 ivar &= ~((u32)0xFF << offset); 3694 3695 /* write vector and valid bit */ 3696 ivar |= (msix_vector | IGC_IVAR_VALID) << offset; 3697 3698 array_wr32(IGC_IVAR0, index, ivar); 3699 } 3700 3701 static void igc_assign_vector(struct igc_q_vector *q_vector, int msix_vector) 3702 { 3703 struct igc_adapter *adapter = q_vector->adapter; 3704 struct igc_hw *hw = &adapter->hw; 3705 int rx_queue = IGC_N0_QUEUE; 3706 int tx_queue = IGC_N0_QUEUE; 3707 3708 if (q_vector->rx.ring) 3709 rx_queue = q_vector->rx.ring->reg_idx; 3710 if (q_vector->tx.ring) 3711 tx_queue = q_vector->tx.ring->reg_idx; 3712 3713 switch (hw->mac.type) { 3714 case igc_i225: 3715 if (rx_queue > IGC_N0_QUEUE) 3716 igc_write_ivar(hw, msix_vector, 3717 rx_queue >> 1, 3718 (rx_queue & 0x1) << 4); 3719 if (tx_queue > IGC_N0_QUEUE) 3720 igc_write_ivar(hw, msix_vector, 3721 tx_queue >> 1, 3722 ((tx_queue & 0x1) << 4) + 8); 3723 q_vector->eims_value = BIT(msix_vector); 3724 break; 3725 default: 3726 WARN_ONCE(hw->mac.type != igc_i225, "Wrong MAC type\n"); 3727 break; 3728 } 3729 3730 /* add q_vector eims value to global eims_enable_mask */ 3731 adapter->eims_enable_mask |= q_vector->eims_value; 3732 3733 /* configure q_vector to set itr on first interrupt */ 3734 q_vector->set_itr = 1; 3735 } 3736 3737 /** 3738 * igc_configure_msix - Configure MSI-X hardware 3739 * @adapter: Pointer to adapter structure 3740 * 3741 * igc_configure_msix sets up the hardware to properly 3742 * generate MSI-X interrupts. 3743 */ 3744 static void igc_configure_msix(struct igc_adapter *adapter) 3745 { 3746 struct igc_hw *hw = &adapter->hw; 3747 int i, vector = 0; 3748 u32 tmp; 3749 3750 adapter->eims_enable_mask = 0; 3751 3752 /* set vector for other causes, i.e. link changes */ 3753 switch (hw->mac.type) { 3754 case igc_i225: 3755 /* Turn on MSI-X capability first, or our settings 3756 * won't stick. And it will take days to debug. 3757 */ 3758 wr32(IGC_GPIE, IGC_GPIE_MSIX_MODE | 3759 IGC_GPIE_PBA | IGC_GPIE_EIAME | 3760 IGC_GPIE_NSICR); 3761 3762 /* enable msix_other interrupt */ 3763 adapter->eims_other = BIT(vector); 3764 tmp = (vector++ | IGC_IVAR_VALID) << 8; 3765 3766 wr32(IGC_IVAR_MISC, tmp); 3767 break; 3768 default: 3769 /* do nothing, since nothing else supports MSI-X */ 3770 break; 3771 } /* switch (hw->mac.type) */ 3772 3773 adapter->eims_enable_mask |= adapter->eims_other; 3774 3775 for (i = 0; i < adapter->num_q_vectors; i++) 3776 igc_assign_vector(adapter->q_vector[i], vector++); 3777 3778 wrfl(); 3779 } 3780 3781 /** 3782 * igc_irq_enable - Enable default interrupt generation settings 3783 * @adapter: board private structure 3784 */ 3785 static void igc_irq_enable(struct igc_adapter *adapter) 3786 { 3787 struct igc_hw *hw = &adapter->hw; 3788 3789 if (adapter->msix_entries) { 3790 u32 ims = IGC_IMS_LSC | IGC_IMS_DOUTSYNC | IGC_IMS_DRSTA; 3791 u32 regval = rd32(IGC_EIAC); 3792 3793 wr32(IGC_EIAC, regval | adapter->eims_enable_mask); 3794 regval = rd32(IGC_EIAM); 3795 wr32(IGC_EIAM, regval | adapter->eims_enable_mask); 3796 wr32(IGC_EIMS, adapter->eims_enable_mask); 3797 wr32(IGC_IMS, ims); 3798 } else { 3799 wr32(IGC_IMS, IMS_ENABLE_MASK | IGC_IMS_DRSTA); 3800 wr32(IGC_IAM, IMS_ENABLE_MASK | IGC_IMS_DRSTA); 3801 } 3802 } 3803 3804 /** 3805 * igc_irq_disable - Mask off interrupt generation on the NIC 3806 * @adapter: board private structure 3807 */ 3808 static void igc_irq_disable(struct igc_adapter *adapter) 3809 { 3810 struct igc_hw *hw = &adapter->hw; 3811 3812 if (adapter->msix_entries) { 3813 u32 regval = rd32(IGC_EIAM); 3814 3815 wr32(IGC_EIAM, regval & ~adapter->eims_enable_mask); 3816 wr32(IGC_EIMC, adapter->eims_enable_mask); 3817 regval = rd32(IGC_EIAC); 3818 wr32(IGC_EIAC, regval & ~adapter->eims_enable_mask); 3819 } 3820 3821 wr32(IGC_IAM, 0); 3822 wr32(IGC_IMC, ~0); 3823 wrfl(); 3824 3825 if (adapter->msix_entries) { 3826 int vector = 0, i; 3827 3828 synchronize_irq(adapter->msix_entries[vector++].vector); 3829 3830 for (i = 0; i < adapter->num_q_vectors; i++) 3831 synchronize_irq(adapter->msix_entries[vector++].vector); 3832 } else { 3833 synchronize_irq(adapter->pdev->irq); 3834 } 3835 } 3836 3837 void igc_set_flag_queue_pairs(struct igc_adapter *adapter, 3838 const u32 max_rss_queues) 3839 { 3840 /* Determine if we need to pair queues. */ 3841 /* If rss_queues > half of max_rss_queues, pair the queues in 3842 * order to conserve interrupts due to limited supply. 3843 */ 3844 if (adapter->rss_queues > (max_rss_queues / 2)) 3845 adapter->flags |= IGC_FLAG_QUEUE_PAIRS; 3846 else 3847 adapter->flags &= ~IGC_FLAG_QUEUE_PAIRS; 3848 } 3849 3850 unsigned int igc_get_max_rss_queues(struct igc_adapter *adapter) 3851 { 3852 return IGC_MAX_RX_QUEUES; 3853 } 3854 3855 static void igc_init_queue_configuration(struct igc_adapter *adapter) 3856 { 3857 u32 max_rss_queues; 3858 3859 max_rss_queues = igc_get_max_rss_queues(adapter); 3860 adapter->rss_queues = min_t(u32, max_rss_queues, num_online_cpus()); 3861 3862 igc_set_flag_queue_pairs(adapter, max_rss_queues); 3863 } 3864 3865 /** 3866 * igc_reset_q_vector - Reset config for interrupt vector 3867 * @adapter: board private structure to initialize 3868 * @v_idx: Index of vector to be reset 3869 * 3870 * If NAPI is enabled it will delete any references to the 3871 * NAPI struct. This is preparation for igc_free_q_vector. 3872 */ 3873 static void igc_reset_q_vector(struct igc_adapter *adapter, int v_idx) 3874 { 3875 struct igc_q_vector *q_vector = adapter->q_vector[v_idx]; 3876 3877 /* if we're coming from igc_set_interrupt_capability, the vectors are 3878 * not yet allocated 3879 */ 3880 if (!q_vector) 3881 return; 3882 3883 if (q_vector->tx.ring) 3884 adapter->tx_ring[q_vector->tx.ring->queue_index] = NULL; 3885 3886 if (q_vector->rx.ring) 3887 adapter->rx_ring[q_vector->rx.ring->queue_index] = NULL; 3888 3889 netif_napi_del(&q_vector->napi); 3890 } 3891 3892 /** 3893 * igc_free_q_vector - Free memory allocated for specific interrupt vector 3894 * @adapter: board private structure to initialize 3895 * @v_idx: Index of vector to be freed 3896 * 3897 * This function frees the memory allocated to the q_vector. 3898 */ 3899 static void igc_free_q_vector(struct igc_adapter *adapter, int v_idx) 3900 { 3901 struct igc_q_vector *q_vector = adapter->q_vector[v_idx]; 3902 3903 adapter->q_vector[v_idx] = NULL; 3904 3905 /* igc_get_stats64() might access the rings on this vector, 3906 * we must wait a grace period before freeing it. 3907 */ 3908 if (q_vector) 3909 kfree_rcu(q_vector, rcu); 3910 } 3911 3912 /** 3913 * igc_free_q_vectors - Free memory allocated for interrupt vectors 3914 * @adapter: board private structure to initialize 3915 * 3916 * This function frees the memory allocated to the q_vectors. In addition if 3917 * NAPI is enabled it will delete any references to the NAPI struct prior 3918 * to freeing the q_vector. 3919 */ 3920 static void igc_free_q_vectors(struct igc_adapter *adapter) 3921 { 3922 int v_idx = adapter->num_q_vectors; 3923 3924 adapter->num_tx_queues = 0; 3925 adapter->num_rx_queues = 0; 3926 adapter->num_q_vectors = 0; 3927 3928 while (v_idx--) { 3929 igc_reset_q_vector(adapter, v_idx); 3930 igc_free_q_vector(adapter, v_idx); 3931 } 3932 } 3933 3934 /** 3935 * igc_update_itr - update the dynamic ITR value based on statistics 3936 * @q_vector: pointer to q_vector 3937 * @ring_container: ring info to update the itr for 3938 * 3939 * Stores a new ITR value based on packets and byte 3940 * counts during the last interrupt. The advantage of per interrupt 3941 * computation is faster updates and more accurate ITR for the current 3942 * traffic pattern. Constants in this function were computed 3943 * based on theoretical maximum wire speed and thresholds were set based 3944 * on testing data as well as attempting to minimize response time 3945 * while increasing bulk throughput. 3946 * NOTE: These calculations are only valid when operating in a single- 3947 * queue environment. 3948 */ 3949 static void igc_update_itr(struct igc_q_vector *q_vector, 3950 struct igc_ring_container *ring_container) 3951 { 3952 unsigned int packets = ring_container->total_packets; 3953 unsigned int bytes = ring_container->total_bytes; 3954 u8 itrval = ring_container->itr; 3955 3956 /* no packets, exit with status unchanged */ 3957 if (packets == 0) 3958 return; 3959 3960 switch (itrval) { 3961 case lowest_latency: 3962 /* handle TSO and jumbo frames */ 3963 if (bytes / packets > 8000) 3964 itrval = bulk_latency; 3965 else if ((packets < 5) && (bytes > 512)) 3966 itrval = low_latency; 3967 break; 3968 case low_latency: /* 50 usec aka 20000 ints/s */ 3969 if (bytes > 10000) { 3970 /* this if handles the TSO accounting */ 3971 if (bytes / packets > 8000) 3972 itrval = bulk_latency; 3973 else if ((packets < 10) || ((bytes / packets) > 1200)) 3974 itrval = bulk_latency; 3975 else if ((packets > 35)) 3976 itrval = lowest_latency; 3977 } else if (bytes / packets > 2000) { 3978 itrval = bulk_latency; 3979 } else if (packets <= 2 && bytes < 512) { 3980 itrval = lowest_latency; 3981 } 3982 break; 3983 case bulk_latency: /* 250 usec aka 4000 ints/s */ 3984 if (bytes > 25000) { 3985 if (packets > 35) 3986 itrval = low_latency; 3987 } else if (bytes < 1500) { 3988 itrval = low_latency; 3989 } 3990 break; 3991 } 3992 3993 /* clear work counters since we have the values we need */ 3994 ring_container->total_bytes = 0; 3995 ring_container->total_packets = 0; 3996 3997 /* write updated itr to ring container */ 3998 ring_container->itr = itrval; 3999 } 4000 4001 static void igc_set_itr(struct igc_q_vector *q_vector) 4002 { 4003 struct igc_adapter *adapter = q_vector->adapter; 4004 u32 new_itr = q_vector->itr_val; 4005 u8 current_itr = 0; 4006 4007 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */ 4008 switch (adapter->link_speed) { 4009 case SPEED_10: 4010 case SPEED_100: 4011 current_itr = 0; 4012 new_itr = IGC_4K_ITR; 4013 goto set_itr_now; 4014 default: 4015 break; 4016 } 4017 4018 igc_update_itr(q_vector, &q_vector->tx); 4019 igc_update_itr(q_vector, &q_vector->rx); 4020 4021 current_itr = max(q_vector->rx.itr, q_vector->tx.itr); 4022 4023 /* conservative mode (itr 3) eliminates the lowest_latency setting */ 4024 if (current_itr == lowest_latency && 4025 ((q_vector->rx.ring && adapter->rx_itr_setting == 3) || 4026 (!q_vector->rx.ring && adapter->tx_itr_setting == 3))) 4027 current_itr = low_latency; 4028 4029 switch (current_itr) { 4030 /* counts and packets in update_itr are dependent on these numbers */ 4031 case lowest_latency: 4032 new_itr = IGC_70K_ITR; /* 70,000 ints/sec */ 4033 break; 4034 case low_latency: 4035 new_itr = IGC_20K_ITR; /* 20,000 ints/sec */ 4036 break; 4037 case bulk_latency: 4038 new_itr = IGC_4K_ITR; /* 4,000 ints/sec */ 4039 break; 4040 default: 4041 break; 4042 } 4043 4044 set_itr_now: 4045 if (new_itr != q_vector->itr_val) { 4046 /* this attempts to bias the interrupt rate towards Bulk 4047 * by adding intermediate steps when interrupt rate is 4048 * increasing 4049 */ 4050 new_itr = new_itr > q_vector->itr_val ? 4051 max((new_itr * q_vector->itr_val) / 4052 (new_itr + (q_vector->itr_val >> 2)), 4053 new_itr) : new_itr; 4054 /* Don't write the value here; it resets the adapter's 4055 * internal timer, and causes us to delay far longer than 4056 * we should between interrupts. Instead, we write the ITR 4057 * value at the beginning of the next interrupt so the timing 4058 * ends up being correct. 4059 */ 4060 q_vector->itr_val = new_itr; 4061 q_vector->set_itr = 1; 4062 } 4063 } 4064 4065 static void igc_reset_interrupt_capability(struct igc_adapter *adapter) 4066 { 4067 int v_idx = adapter->num_q_vectors; 4068 4069 if (adapter->msix_entries) { 4070 pci_disable_msix(adapter->pdev); 4071 kfree(adapter->msix_entries); 4072 adapter->msix_entries = NULL; 4073 } else if (adapter->flags & IGC_FLAG_HAS_MSI) { 4074 pci_disable_msi(adapter->pdev); 4075 } 4076 4077 while (v_idx--) 4078 igc_reset_q_vector(adapter, v_idx); 4079 } 4080 4081 /** 4082 * igc_set_interrupt_capability - set MSI or MSI-X if supported 4083 * @adapter: Pointer to adapter structure 4084 * @msix: boolean value for MSI-X capability 4085 * 4086 * Attempt to configure interrupts using the best available 4087 * capabilities of the hardware and kernel. 4088 */ 4089 static void igc_set_interrupt_capability(struct igc_adapter *adapter, 4090 bool msix) 4091 { 4092 int numvecs, i; 4093 int err; 4094 4095 if (!msix) 4096 goto msi_only; 4097 adapter->flags |= IGC_FLAG_HAS_MSIX; 4098 4099 /* Number of supported queues. */ 4100 adapter->num_rx_queues = adapter->rss_queues; 4101 4102 adapter->num_tx_queues = adapter->rss_queues; 4103 4104 /* start with one vector for every Rx queue */ 4105 numvecs = adapter->num_rx_queues; 4106 4107 /* if Tx handler is separate add 1 for every Tx queue */ 4108 if (!(adapter->flags & IGC_FLAG_QUEUE_PAIRS)) 4109 numvecs += adapter->num_tx_queues; 4110 4111 /* store the number of vectors reserved for queues */ 4112 adapter->num_q_vectors = numvecs; 4113 4114 /* add 1 vector for link status interrupts */ 4115 numvecs++; 4116 4117 adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry), 4118 GFP_KERNEL); 4119 4120 if (!adapter->msix_entries) 4121 return; 4122 4123 /* populate entry values */ 4124 for (i = 0; i < numvecs; i++) 4125 adapter->msix_entries[i].entry = i; 4126 4127 err = pci_enable_msix_range(adapter->pdev, 4128 adapter->msix_entries, 4129 numvecs, 4130 numvecs); 4131 if (err > 0) 4132 return; 4133 4134 kfree(adapter->msix_entries); 4135 adapter->msix_entries = NULL; 4136 4137 igc_reset_interrupt_capability(adapter); 4138 4139 msi_only: 4140 adapter->flags &= ~IGC_FLAG_HAS_MSIX; 4141 4142 adapter->rss_queues = 1; 4143 adapter->flags |= IGC_FLAG_QUEUE_PAIRS; 4144 adapter->num_rx_queues = 1; 4145 adapter->num_tx_queues = 1; 4146 adapter->num_q_vectors = 1; 4147 if (!pci_enable_msi(adapter->pdev)) 4148 adapter->flags |= IGC_FLAG_HAS_MSI; 4149 } 4150 4151 /** 4152 * igc_update_ring_itr - update the dynamic ITR value based on packet size 4153 * @q_vector: pointer to q_vector 4154 * 4155 * Stores a new ITR value based on strictly on packet size. This 4156 * algorithm is less sophisticated than that used in igc_update_itr, 4157 * due to the difficulty of synchronizing statistics across multiple 4158 * receive rings. The divisors and thresholds used by this function 4159 * were determined based on theoretical maximum wire speed and testing 4160 * data, in order to minimize response time while increasing bulk 4161 * throughput. 4162 * NOTE: This function is called only when operating in a multiqueue 4163 * receive environment. 4164 */ 4165 static void igc_update_ring_itr(struct igc_q_vector *q_vector) 4166 { 4167 struct igc_adapter *adapter = q_vector->adapter; 4168 int new_val = q_vector->itr_val; 4169 int avg_wire_size = 0; 4170 unsigned int packets; 4171 4172 /* For non-gigabit speeds, just fix the interrupt rate at 4000 4173 * ints/sec - ITR timer value of 120 ticks. 4174 */ 4175 switch (adapter->link_speed) { 4176 case SPEED_10: 4177 case SPEED_100: 4178 new_val = IGC_4K_ITR; 4179 goto set_itr_val; 4180 default: 4181 break; 4182 } 4183 4184 packets = q_vector->rx.total_packets; 4185 if (packets) 4186 avg_wire_size = q_vector->rx.total_bytes / packets; 4187 4188 packets = q_vector->tx.total_packets; 4189 if (packets) 4190 avg_wire_size = max_t(u32, avg_wire_size, 4191 q_vector->tx.total_bytes / packets); 4192 4193 /* if avg_wire_size isn't set no work was done */ 4194 if (!avg_wire_size) 4195 goto clear_counts; 4196 4197 /* Add 24 bytes to size to account for CRC, preamble, and gap */ 4198 avg_wire_size += 24; 4199 4200 /* Don't starve jumbo frames */ 4201 avg_wire_size = min(avg_wire_size, 3000); 4202 4203 /* Give a little boost to mid-size frames */ 4204 if (avg_wire_size > 300 && avg_wire_size < 1200) 4205 new_val = avg_wire_size / 3; 4206 else 4207 new_val = avg_wire_size / 2; 4208 4209 /* conservative mode (itr 3) eliminates the lowest_latency setting */ 4210 if (new_val < IGC_20K_ITR && 4211 ((q_vector->rx.ring && adapter->rx_itr_setting == 3) || 4212 (!q_vector->rx.ring && adapter->tx_itr_setting == 3))) 4213 new_val = IGC_20K_ITR; 4214 4215 set_itr_val: 4216 if (new_val != q_vector->itr_val) { 4217 q_vector->itr_val = new_val; 4218 q_vector->set_itr = 1; 4219 } 4220 clear_counts: 4221 q_vector->rx.total_bytes = 0; 4222 q_vector->rx.total_packets = 0; 4223 q_vector->tx.total_bytes = 0; 4224 q_vector->tx.total_packets = 0; 4225 } 4226 4227 static void igc_ring_irq_enable(struct igc_q_vector *q_vector) 4228 { 4229 struct igc_adapter *adapter = q_vector->adapter; 4230 struct igc_hw *hw = &adapter->hw; 4231 4232 if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) || 4233 (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) { 4234 if (adapter->num_q_vectors == 1) 4235 igc_set_itr(q_vector); 4236 else 4237 igc_update_ring_itr(q_vector); 4238 } 4239 4240 if (!test_bit(__IGC_DOWN, &adapter->state)) { 4241 if (adapter->msix_entries) 4242 wr32(IGC_EIMS, q_vector->eims_value); 4243 else 4244 igc_irq_enable(adapter); 4245 } 4246 } 4247 4248 static void igc_add_ring(struct igc_ring *ring, 4249 struct igc_ring_container *head) 4250 { 4251 head->ring = ring; 4252 head->count++; 4253 } 4254 4255 /** 4256 * igc_cache_ring_register - Descriptor ring to register mapping 4257 * @adapter: board private structure to initialize 4258 * 4259 * Once we know the feature-set enabled for the device, we'll cache 4260 * the register offset the descriptor ring is assigned to. 4261 */ 4262 static void igc_cache_ring_register(struct igc_adapter *adapter) 4263 { 4264 int i = 0, j = 0; 4265 4266 switch (adapter->hw.mac.type) { 4267 case igc_i225: 4268 default: 4269 for (; i < adapter->num_rx_queues; i++) 4270 adapter->rx_ring[i]->reg_idx = i; 4271 for (; j < adapter->num_tx_queues; j++) 4272 adapter->tx_ring[j]->reg_idx = j; 4273 break; 4274 } 4275 } 4276 4277 /** 4278 * igc_poll - NAPI Rx polling callback 4279 * @napi: napi polling structure 4280 * @budget: count of how many packets we should handle 4281 */ 4282 static int igc_poll(struct napi_struct *napi, int budget) 4283 { 4284 struct igc_q_vector *q_vector = container_of(napi, 4285 struct igc_q_vector, 4286 napi); 4287 struct igc_ring *rx_ring = q_vector->rx.ring; 4288 bool clean_complete = true; 4289 int work_done = 0; 4290 4291 if (q_vector->tx.ring) 4292 clean_complete = igc_clean_tx_irq(q_vector, budget); 4293 4294 if (rx_ring) { 4295 int cleaned = rx_ring->xsk_pool ? 4296 igc_clean_rx_irq_zc(q_vector, budget) : 4297 igc_clean_rx_irq(q_vector, budget); 4298 4299 work_done += cleaned; 4300 if (cleaned >= budget) 4301 clean_complete = false; 4302 } 4303 4304 /* If all work not completed, return budget and keep polling */ 4305 if (!clean_complete) 4306 return budget; 4307 4308 /* Exit the polling mode, but don't re-enable interrupts if stack might 4309 * poll us due to busy-polling 4310 */ 4311 if (likely(napi_complete_done(napi, work_done))) 4312 igc_ring_irq_enable(q_vector); 4313 4314 return min(work_done, budget - 1); 4315 } 4316 4317 /** 4318 * igc_alloc_q_vector - Allocate memory for a single interrupt vector 4319 * @adapter: board private structure to initialize 4320 * @v_count: q_vectors allocated on adapter, used for ring interleaving 4321 * @v_idx: index of vector in adapter struct 4322 * @txr_count: total number of Tx rings to allocate 4323 * @txr_idx: index of first Tx ring to allocate 4324 * @rxr_count: total number of Rx rings to allocate 4325 * @rxr_idx: index of first Rx ring to allocate 4326 * 4327 * We allocate one q_vector. If allocation fails we return -ENOMEM. 4328 */ 4329 static int igc_alloc_q_vector(struct igc_adapter *adapter, 4330 unsigned int v_count, unsigned int v_idx, 4331 unsigned int txr_count, unsigned int txr_idx, 4332 unsigned int rxr_count, unsigned int rxr_idx) 4333 { 4334 struct igc_q_vector *q_vector; 4335 struct igc_ring *ring; 4336 int ring_count; 4337 4338 /* igc only supports 1 Tx and/or 1 Rx queue per vector */ 4339 if (txr_count > 1 || rxr_count > 1) 4340 return -ENOMEM; 4341 4342 ring_count = txr_count + rxr_count; 4343 4344 /* allocate q_vector and rings */ 4345 q_vector = adapter->q_vector[v_idx]; 4346 if (!q_vector) 4347 q_vector = kzalloc(struct_size(q_vector, ring, ring_count), 4348 GFP_KERNEL); 4349 else 4350 memset(q_vector, 0, struct_size(q_vector, ring, ring_count)); 4351 if (!q_vector) 4352 return -ENOMEM; 4353 4354 /* initialize NAPI */ 4355 netif_napi_add(adapter->netdev, &q_vector->napi, 4356 igc_poll, 64); 4357 4358 /* tie q_vector and adapter together */ 4359 adapter->q_vector[v_idx] = q_vector; 4360 q_vector->adapter = adapter; 4361 4362 /* initialize work limits */ 4363 q_vector->tx.work_limit = adapter->tx_work_limit; 4364 4365 /* initialize ITR configuration */ 4366 q_vector->itr_register = adapter->io_addr + IGC_EITR(0); 4367 q_vector->itr_val = IGC_START_ITR; 4368 4369 /* initialize pointer to rings */ 4370 ring = q_vector->ring; 4371 4372 /* initialize ITR */ 4373 if (rxr_count) { 4374 /* rx or rx/tx vector */ 4375 if (!adapter->rx_itr_setting || adapter->rx_itr_setting > 3) 4376 q_vector->itr_val = adapter->rx_itr_setting; 4377 } else { 4378 /* tx only vector */ 4379 if (!adapter->tx_itr_setting || adapter->tx_itr_setting > 3) 4380 q_vector->itr_val = adapter->tx_itr_setting; 4381 } 4382 4383 if (txr_count) { 4384 /* assign generic ring traits */ 4385 ring->dev = &adapter->pdev->dev; 4386 ring->netdev = adapter->netdev; 4387 4388 /* configure backlink on ring */ 4389 ring->q_vector = q_vector; 4390 4391 /* update q_vector Tx values */ 4392 igc_add_ring(ring, &q_vector->tx); 4393 4394 /* apply Tx specific ring traits */ 4395 ring->count = adapter->tx_ring_count; 4396 ring->queue_index = txr_idx; 4397 4398 /* assign ring to adapter */ 4399 adapter->tx_ring[txr_idx] = ring; 4400 4401 /* push pointer to next ring */ 4402 ring++; 4403 } 4404 4405 if (rxr_count) { 4406 /* assign generic ring traits */ 4407 ring->dev = &adapter->pdev->dev; 4408 ring->netdev = adapter->netdev; 4409 4410 /* configure backlink on ring */ 4411 ring->q_vector = q_vector; 4412 4413 /* update q_vector Rx values */ 4414 igc_add_ring(ring, &q_vector->rx); 4415 4416 /* apply Rx specific ring traits */ 4417 ring->count = adapter->rx_ring_count; 4418 ring->queue_index = rxr_idx; 4419 4420 /* assign ring to adapter */ 4421 adapter->rx_ring[rxr_idx] = ring; 4422 } 4423 4424 return 0; 4425 } 4426 4427 /** 4428 * igc_alloc_q_vectors - Allocate memory for interrupt vectors 4429 * @adapter: board private structure to initialize 4430 * 4431 * We allocate one q_vector per queue interrupt. If allocation fails we 4432 * return -ENOMEM. 4433 */ 4434 static int igc_alloc_q_vectors(struct igc_adapter *adapter) 4435 { 4436 int rxr_remaining = adapter->num_rx_queues; 4437 int txr_remaining = adapter->num_tx_queues; 4438 int rxr_idx = 0, txr_idx = 0, v_idx = 0; 4439 int q_vectors = adapter->num_q_vectors; 4440 int err; 4441 4442 if (q_vectors >= (rxr_remaining + txr_remaining)) { 4443 for (; rxr_remaining; v_idx++) { 4444 err = igc_alloc_q_vector(adapter, q_vectors, v_idx, 4445 0, 0, 1, rxr_idx); 4446 4447 if (err) 4448 goto err_out; 4449 4450 /* update counts and index */ 4451 rxr_remaining--; 4452 rxr_idx++; 4453 } 4454 } 4455 4456 for (; v_idx < q_vectors; v_idx++) { 4457 int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx); 4458 int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx); 4459 4460 err = igc_alloc_q_vector(adapter, q_vectors, v_idx, 4461 tqpv, txr_idx, rqpv, rxr_idx); 4462 4463 if (err) 4464 goto err_out; 4465 4466 /* update counts and index */ 4467 rxr_remaining -= rqpv; 4468 txr_remaining -= tqpv; 4469 rxr_idx++; 4470 txr_idx++; 4471 } 4472 4473 return 0; 4474 4475 err_out: 4476 adapter->num_tx_queues = 0; 4477 adapter->num_rx_queues = 0; 4478 adapter->num_q_vectors = 0; 4479 4480 while (v_idx--) 4481 igc_free_q_vector(adapter, v_idx); 4482 4483 return -ENOMEM; 4484 } 4485 4486 /** 4487 * igc_init_interrupt_scheme - initialize interrupts, allocate queues/vectors 4488 * @adapter: Pointer to adapter structure 4489 * @msix: boolean for MSI-X capability 4490 * 4491 * This function initializes the interrupts and allocates all of the queues. 4492 */ 4493 static int igc_init_interrupt_scheme(struct igc_adapter *adapter, bool msix) 4494 { 4495 struct net_device *dev = adapter->netdev; 4496 int err = 0; 4497 4498 igc_set_interrupt_capability(adapter, msix); 4499 4500 err = igc_alloc_q_vectors(adapter); 4501 if (err) { 4502 netdev_err(dev, "Unable to allocate memory for vectors\n"); 4503 goto err_alloc_q_vectors; 4504 } 4505 4506 igc_cache_ring_register(adapter); 4507 4508 return 0; 4509 4510 err_alloc_q_vectors: 4511 igc_reset_interrupt_capability(adapter); 4512 return err; 4513 } 4514 4515 /** 4516 * igc_sw_init - Initialize general software structures (struct igc_adapter) 4517 * @adapter: board private structure to initialize 4518 * 4519 * igc_sw_init initializes the Adapter private data structure. 4520 * Fields are initialized based on PCI device information and 4521 * OS network device settings (MTU size). 4522 */ 4523 static int igc_sw_init(struct igc_adapter *adapter) 4524 { 4525 struct net_device *netdev = adapter->netdev; 4526 struct pci_dev *pdev = adapter->pdev; 4527 struct igc_hw *hw = &adapter->hw; 4528 4529 pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word); 4530 4531 /* set default ring sizes */ 4532 adapter->tx_ring_count = IGC_DEFAULT_TXD; 4533 adapter->rx_ring_count = IGC_DEFAULT_RXD; 4534 4535 /* set default ITR values */ 4536 adapter->rx_itr_setting = IGC_DEFAULT_ITR; 4537 adapter->tx_itr_setting = IGC_DEFAULT_ITR; 4538 4539 /* set default work limits */ 4540 adapter->tx_work_limit = IGC_DEFAULT_TX_WORK; 4541 4542 /* adjust max frame to be at least the size of a standard frame */ 4543 adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN + 4544 VLAN_HLEN; 4545 adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN; 4546 4547 mutex_init(&adapter->nfc_rule_lock); 4548 INIT_LIST_HEAD(&adapter->nfc_rule_list); 4549 adapter->nfc_rule_count = 0; 4550 4551 spin_lock_init(&adapter->stats64_lock); 4552 /* Assume MSI-X interrupts, will be checked during IRQ allocation */ 4553 adapter->flags |= IGC_FLAG_HAS_MSIX; 4554 4555 igc_init_queue_configuration(adapter); 4556 4557 /* This call may decrease the number of queues */ 4558 if (igc_init_interrupt_scheme(adapter, true)) { 4559 netdev_err(netdev, "Unable to allocate memory for queues\n"); 4560 return -ENOMEM; 4561 } 4562 4563 /* Explicitly disable IRQ since the NIC can be in any state. */ 4564 igc_irq_disable(adapter); 4565 4566 set_bit(__IGC_DOWN, &adapter->state); 4567 4568 return 0; 4569 } 4570 4571 /** 4572 * igc_up - Open the interface and prepare it to handle traffic 4573 * @adapter: board private structure 4574 */ 4575 void igc_up(struct igc_adapter *adapter) 4576 { 4577 struct igc_hw *hw = &adapter->hw; 4578 int i = 0; 4579 4580 /* hardware has been reset, we need to reload some things */ 4581 igc_configure(adapter); 4582 4583 clear_bit(__IGC_DOWN, &adapter->state); 4584 4585 for (i = 0; i < adapter->num_q_vectors; i++) 4586 napi_enable(&adapter->q_vector[i]->napi); 4587 4588 if (adapter->msix_entries) 4589 igc_configure_msix(adapter); 4590 else 4591 igc_assign_vector(adapter->q_vector[0], 0); 4592 4593 /* Clear any pending interrupts. */ 4594 rd32(IGC_ICR); 4595 igc_irq_enable(adapter); 4596 4597 netif_tx_start_all_queues(adapter->netdev); 4598 4599 /* start the watchdog. */ 4600 hw->mac.get_link_status = true; 4601 schedule_work(&adapter->watchdog_task); 4602 } 4603 4604 /** 4605 * igc_update_stats - Update the board statistics counters 4606 * @adapter: board private structure 4607 */ 4608 void igc_update_stats(struct igc_adapter *adapter) 4609 { 4610 struct rtnl_link_stats64 *net_stats = &adapter->stats64; 4611 struct pci_dev *pdev = adapter->pdev; 4612 struct igc_hw *hw = &adapter->hw; 4613 u64 _bytes, _packets; 4614 u64 bytes, packets; 4615 unsigned int start; 4616 u32 mpc; 4617 int i; 4618 4619 /* Prevent stats update while adapter is being reset, or if the pci 4620 * connection is down. 4621 */ 4622 if (adapter->link_speed == 0) 4623 return; 4624 if (pci_channel_offline(pdev)) 4625 return; 4626 4627 packets = 0; 4628 bytes = 0; 4629 4630 rcu_read_lock(); 4631 for (i = 0; i < adapter->num_rx_queues; i++) { 4632 struct igc_ring *ring = adapter->rx_ring[i]; 4633 u32 rqdpc = rd32(IGC_RQDPC(i)); 4634 4635 if (hw->mac.type >= igc_i225) 4636 wr32(IGC_RQDPC(i), 0); 4637 4638 if (rqdpc) { 4639 ring->rx_stats.drops += rqdpc; 4640 net_stats->rx_fifo_errors += rqdpc; 4641 } 4642 4643 do { 4644 start = u64_stats_fetch_begin_irq(&ring->rx_syncp); 4645 _bytes = ring->rx_stats.bytes; 4646 _packets = ring->rx_stats.packets; 4647 } while (u64_stats_fetch_retry_irq(&ring->rx_syncp, start)); 4648 bytes += _bytes; 4649 packets += _packets; 4650 } 4651 4652 net_stats->rx_bytes = bytes; 4653 net_stats->rx_packets = packets; 4654 4655 packets = 0; 4656 bytes = 0; 4657 for (i = 0; i < adapter->num_tx_queues; i++) { 4658 struct igc_ring *ring = adapter->tx_ring[i]; 4659 4660 do { 4661 start = u64_stats_fetch_begin_irq(&ring->tx_syncp); 4662 _bytes = ring->tx_stats.bytes; 4663 _packets = ring->tx_stats.packets; 4664 } while (u64_stats_fetch_retry_irq(&ring->tx_syncp, start)); 4665 bytes += _bytes; 4666 packets += _packets; 4667 } 4668 net_stats->tx_bytes = bytes; 4669 net_stats->tx_packets = packets; 4670 rcu_read_unlock(); 4671 4672 /* read stats registers */ 4673 adapter->stats.crcerrs += rd32(IGC_CRCERRS); 4674 adapter->stats.gprc += rd32(IGC_GPRC); 4675 adapter->stats.gorc += rd32(IGC_GORCL); 4676 rd32(IGC_GORCH); /* clear GORCL */ 4677 adapter->stats.bprc += rd32(IGC_BPRC); 4678 adapter->stats.mprc += rd32(IGC_MPRC); 4679 adapter->stats.roc += rd32(IGC_ROC); 4680 4681 adapter->stats.prc64 += rd32(IGC_PRC64); 4682 adapter->stats.prc127 += rd32(IGC_PRC127); 4683 adapter->stats.prc255 += rd32(IGC_PRC255); 4684 adapter->stats.prc511 += rd32(IGC_PRC511); 4685 adapter->stats.prc1023 += rd32(IGC_PRC1023); 4686 adapter->stats.prc1522 += rd32(IGC_PRC1522); 4687 adapter->stats.tlpic += rd32(IGC_TLPIC); 4688 adapter->stats.rlpic += rd32(IGC_RLPIC); 4689 adapter->stats.hgptc += rd32(IGC_HGPTC); 4690 4691 mpc = rd32(IGC_MPC); 4692 adapter->stats.mpc += mpc; 4693 net_stats->rx_fifo_errors += mpc; 4694 adapter->stats.scc += rd32(IGC_SCC); 4695 adapter->stats.ecol += rd32(IGC_ECOL); 4696 adapter->stats.mcc += rd32(IGC_MCC); 4697 adapter->stats.latecol += rd32(IGC_LATECOL); 4698 adapter->stats.dc += rd32(IGC_DC); 4699 adapter->stats.rlec += rd32(IGC_RLEC); 4700 adapter->stats.xonrxc += rd32(IGC_XONRXC); 4701 adapter->stats.xontxc += rd32(IGC_XONTXC); 4702 adapter->stats.xoffrxc += rd32(IGC_XOFFRXC); 4703 adapter->stats.xofftxc += rd32(IGC_XOFFTXC); 4704 adapter->stats.fcruc += rd32(IGC_FCRUC); 4705 adapter->stats.gptc += rd32(IGC_GPTC); 4706 adapter->stats.gotc += rd32(IGC_GOTCL); 4707 rd32(IGC_GOTCH); /* clear GOTCL */ 4708 adapter->stats.rnbc += rd32(IGC_RNBC); 4709 adapter->stats.ruc += rd32(IGC_RUC); 4710 adapter->stats.rfc += rd32(IGC_RFC); 4711 adapter->stats.rjc += rd32(IGC_RJC); 4712 adapter->stats.tor += rd32(IGC_TORH); 4713 adapter->stats.tot += rd32(IGC_TOTH); 4714 adapter->stats.tpr += rd32(IGC_TPR); 4715 4716 adapter->stats.ptc64 += rd32(IGC_PTC64); 4717 adapter->stats.ptc127 += rd32(IGC_PTC127); 4718 adapter->stats.ptc255 += rd32(IGC_PTC255); 4719 adapter->stats.ptc511 += rd32(IGC_PTC511); 4720 adapter->stats.ptc1023 += rd32(IGC_PTC1023); 4721 adapter->stats.ptc1522 += rd32(IGC_PTC1522); 4722 4723 adapter->stats.mptc += rd32(IGC_MPTC); 4724 adapter->stats.bptc += rd32(IGC_BPTC); 4725 4726 adapter->stats.tpt += rd32(IGC_TPT); 4727 adapter->stats.colc += rd32(IGC_COLC); 4728 adapter->stats.colc += rd32(IGC_RERC); 4729 4730 adapter->stats.algnerrc += rd32(IGC_ALGNERRC); 4731 4732 adapter->stats.tsctc += rd32(IGC_TSCTC); 4733 4734 adapter->stats.iac += rd32(IGC_IAC); 4735 4736 /* Fill out the OS statistics structure */ 4737 net_stats->multicast = adapter->stats.mprc; 4738 net_stats->collisions = adapter->stats.colc; 4739 4740 /* Rx Errors */ 4741 4742 /* RLEC on some newer hardware can be incorrect so build 4743 * our own version based on RUC and ROC 4744 */ 4745 net_stats->rx_errors = adapter->stats.rxerrc + 4746 adapter->stats.crcerrs + adapter->stats.algnerrc + 4747 adapter->stats.ruc + adapter->stats.roc + 4748 adapter->stats.cexterr; 4749 net_stats->rx_length_errors = adapter->stats.ruc + 4750 adapter->stats.roc; 4751 net_stats->rx_crc_errors = adapter->stats.crcerrs; 4752 net_stats->rx_frame_errors = adapter->stats.algnerrc; 4753 net_stats->rx_missed_errors = adapter->stats.mpc; 4754 4755 /* Tx Errors */ 4756 net_stats->tx_errors = adapter->stats.ecol + 4757 adapter->stats.latecol; 4758 net_stats->tx_aborted_errors = adapter->stats.ecol; 4759 net_stats->tx_window_errors = adapter->stats.latecol; 4760 net_stats->tx_carrier_errors = adapter->stats.tncrs; 4761 4762 /* Tx Dropped needs to be maintained elsewhere */ 4763 4764 /* Management Stats */ 4765 adapter->stats.mgptc += rd32(IGC_MGTPTC); 4766 adapter->stats.mgprc += rd32(IGC_MGTPRC); 4767 adapter->stats.mgpdc += rd32(IGC_MGTPDC); 4768 } 4769 4770 /** 4771 * igc_down - Close the interface 4772 * @adapter: board private structure 4773 */ 4774 void igc_down(struct igc_adapter *adapter) 4775 { 4776 struct net_device *netdev = adapter->netdev; 4777 struct igc_hw *hw = &adapter->hw; 4778 u32 tctl, rctl; 4779 int i = 0; 4780 4781 set_bit(__IGC_DOWN, &adapter->state); 4782 4783 igc_ptp_suspend(adapter); 4784 4785 if (pci_device_is_present(adapter->pdev)) { 4786 /* disable receives in the hardware */ 4787 rctl = rd32(IGC_RCTL); 4788 wr32(IGC_RCTL, rctl & ~IGC_RCTL_EN); 4789 /* flush and sleep below */ 4790 } 4791 /* set trans_start so we don't get spurious watchdogs during reset */ 4792 netif_trans_update(netdev); 4793 4794 netif_carrier_off(netdev); 4795 netif_tx_stop_all_queues(netdev); 4796 4797 if (pci_device_is_present(adapter->pdev)) { 4798 /* disable transmits in the hardware */ 4799 tctl = rd32(IGC_TCTL); 4800 tctl &= ~IGC_TCTL_EN; 4801 wr32(IGC_TCTL, tctl); 4802 /* flush both disables and wait for them to finish */ 4803 wrfl(); 4804 usleep_range(10000, 20000); 4805 4806 igc_irq_disable(adapter); 4807 } 4808 4809 adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE; 4810 4811 for (i = 0; i < adapter->num_q_vectors; i++) { 4812 if (adapter->q_vector[i]) { 4813 napi_synchronize(&adapter->q_vector[i]->napi); 4814 napi_disable(&adapter->q_vector[i]->napi); 4815 } 4816 } 4817 4818 del_timer_sync(&adapter->watchdog_timer); 4819 del_timer_sync(&adapter->phy_info_timer); 4820 4821 /* record the stats before reset*/ 4822 spin_lock(&adapter->stats64_lock); 4823 igc_update_stats(adapter); 4824 spin_unlock(&adapter->stats64_lock); 4825 4826 adapter->link_speed = 0; 4827 adapter->link_duplex = 0; 4828 4829 if (!pci_channel_offline(adapter->pdev)) 4830 igc_reset(adapter); 4831 4832 /* clear VLAN promisc flag so VFTA will be updated if necessary */ 4833 adapter->flags &= ~IGC_FLAG_VLAN_PROMISC; 4834 4835 igc_clean_all_tx_rings(adapter); 4836 igc_clean_all_rx_rings(adapter); 4837 } 4838 4839 void igc_reinit_locked(struct igc_adapter *adapter) 4840 { 4841 while (test_and_set_bit(__IGC_RESETTING, &adapter->state)) 4842 usleep_range(1000, 2000); 4843 igc_down(adapter); 4844 igc_up(adapter); 4845 clear_bit(__IGC_RESETTING, &adapter->state); 4846 } 4847 4848 static void igc_reset_task(struct work_struct *work) 4849 { 4850 struct igc_adapter *adapter; 4851 4852 adapter = container_of(work, struct igc_adapter, reset_task); 4853 4854 rtnl_lock(); 4855 /* If we're already down or resetting, just bail */ 4856 if (test_bit(__IGC_DOWN, &adapter->state) || 4857 test_bit(__IGC_RESETTING, &adapter->state)) { 4858 rtnl_unlock(); 4859 return; 4860 } 4861 4862 igc_rings_dump(adapter); 4863 igc_regs_dump(adapter); 4864 netdev_err(adapter->netdev, "Reset adapter\n"); 4865 igc_reinit_locked(adapter); 4866 rtnl_unlock(); 4867 } 4868 4869 /** 4870 * igc_change_mtu - Change the Maximum Transfer Unit 4871 * @netdev: network interface device structure 4872 * @new_mtu: new value for maximum frame size 4873 * 4874 * Returns 0 on success, negative on failure 4875 */ 4876 static int igc_change_mtu(struct net_device *netdev, int new_mtu) 4877 { 4878 int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; 4879 struct igc_adapter *adapter = netdev_priv(netdev); 4880 4881 if (igc_xdp_is_enabled(adapter) && new_mtu > ETH_DATA_LEN) { 4882 netdev_dbg(netdev, "Jumbo frames not supported with XDP"); 4883 return -EINVAL; 4884 } 4885 4886 /* adjust max frame to be at least the size of a standard frame */ 4887 if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN)) 4888 max_frame = ETH_FRAME_LEN + ETH_FCS_LEN; 4889 4890 while (test_and_set_bit(__IGC_RESETTING, &adapter->state)) 4891 usleep_range(1000, 2000); 4892 4893 /* igc_down has a dependency on max_frame_size */ 4894 adapter->max_frame_size = max_frame; 4895 4896 if (netif_running(netdev)) 4897 igc_down(adapter); 4898 4899 netdev_dbg(netdev, "changing MTU from %d to %d\n", netdev->mtu, new_mtu); 4900 netdev->mtu = new_mtu; 4901 4902 if (netif_running(netdev)) 4903 igc_up(adapter); 4904 else 4905 igc_reset(adapter); 4906 4907 clear_bit(__IGC_RESETTING, &adapter->state); 4908 4909 return 0; 4910 } 4911 4912 /** 4913 * igc_get_stats64 - Get System Network Statistics 4914 * @netdev: network interface device structure 4915 * @stats: rtnl_link_stats64 pointer 4916 * 4917 * Returns the address of the device statistics structure. 4918 * The statistics are updated here and also from the timer callback. 4919 */ 4920 static void igc_get_stats64(struct net_device *netdev, 4921 struct rtnl_link_stats64 *stats) 4922 { 4923 struct igc_adapter *adapter = netdev_priv(netdev); 4924 4925 spin_lock(&adapter->stats64_lock); 4926 if (!test_bit(__IGC_RESETTING, &adapter->state)) 4927 igc_update_stats(adapter); 4928 memcpy(stats, &adapter->stats64, sizeof(*stats)); 4929 spin_unlock(&adapter->stats64_lock); 4930 } 4931 4932 static netdev_features_t igc_fix_features(struct net_device *netdev, 4933 netdev_features_t features) 4934 { 4935 /* Since there is no support for separate Rx/Tx vlan accel 4936 * enable/disable make sure Tx flag is always in same state as Rx. 4937 */ 4938 if (features & NETIF_F_HW_VLAN_CTAG_RX) 4939 features |= NETIF_F_HW_VLAN_CTAG_TX; 4940 else 4941 features &= ~NETIF_F_HW_VLAN_CTAG_TX; 4942 4943 return features; 4944 } 4945 4946 static int igc_set_features(struct net_device *netdev, 4947 netdev_features_t features) 4948 { 4949 netdev_features_t changed = netdev->features ^ features; 4950 struct igc_adapter *adapter = netdev_priv(netdev); 4951 4952 if (changed & NETIF_F_HW_VLAN_CTAG_RX) 4953 igc_vlan_mode(netdev, features); 4954 4955 /* Add VLAN support */ 4956 if (!(changed & (NETIF_F_RXALL | NETIF_F_NTUPLE))) 4957 return 0; 4958 4959 if (!(features & NETIF_F_NTUPLE)) 4960 igc_flush_nfc_rules(adapter); 4961 4962 netdev->features = features; 4963 4964 if (netif_running(netdev)) 4965 igc_reinit_locked(adapter); 4966 else 4967 igc_reset(adapter); 4968 4969 return 1; 4970 } 4971 4972 static netdev_features_t 4973 igc_features_check(struct sk_buff *skb, struct net_device *dev, 4974 netdev_features_t features) 4975 { 4976 unsigned int network_hdr_len, mac_hdr_len; 4977 4978 /* Make certain the headers can be described by a context descriptor */ 4979 mac_hdr_len = skb_network_header(skb) - skb->data; 4980 if (unlikely(mac_hdr_len > IGC_MAX_MAC_HDR_LEN)) 4981 return features & ~(NETIF_F_HW_CSUM | 4982 NETIF_F_SCTP_CRC | 4983 NETIF_F_HW_VLAN_CTAG_TX | 4984 NETIF_F_TSO | 4985 NETIF_F_TSO6); 4986 4987 network_hdr_len = skb_checksum_start(skb) - skb_network_header(skb); 4988 if (unlikely(network_hdr_len > IGC_MAX_NETWORK_HDR_LEN)) 4989 return features & ~(NETIF_F_HW_CSUM | 4990 NETIF_F_SCTP_CRC | 4991 NETIF_F_TSO | 4992 NETIF_F_TSO6); 4993 4994 /* We can only support IPv4 TSO in tunnels if we can mangle the 4995 * inner IP ID field, so strip TSO if MANGLEID is not supported. 4996 */ 4997 if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID)) 4998 features &= ~NETIF_F_TSO; 4999 5000 return features; 5001 } 5002 5003 static void igc_tsync_interrupt(struct igc_adapter *adapter) 5004 { 5005 u32 ack, tsauxc, sec, nsec, tsicr; 5006 struct igc_hw *hw = &adapter->hw; 5007 struct ptp_clock_event event; 5008 struct timespec64 ts; 5009 5010 tsicr = rd32(IGC_TSICR); 5011 ack = 0; 5012 5013 if (tsicr & IGC_TSICR_SYS_WRAP) { 5014 event.type = PTP_CLOCK_PPS; 5015 if (adapter->ptp_caps.pps) 5016 ptp_clock_event(adapter->ptp_clock, &event); 5017 ack |= IGC_TSICR_SYS_WRAP; 5018 } 5019 5020 if (tsicr & IGC_TSICR_TXTS) { 5021 /* retrieve hardware timestamp */ 5022 schedule_work(&adapter->ptp_tx_work); 5023 ack |= IGC_TSICR_TXTS; 5024 } 5025 5026 if (tsicr & IGC_TSICR_TT0) { 5027 spin_lock(&adapter->tmreg_lock); 5028 ts = timespec64_add(adapter->perout[0].start, 5029 adapter->perout[0].period); 5030 wr32(IGC_TRGTTIML0, ts.tv_nsec | IGC_TT_IO_TIMER_SEL_SYSTIM0); 5031 wr32(IGC_TRGTTIMH0, (u32)ts.tv_sec); 5032 tsauxc = rd32(IGC_TSAUXC); 5033 tsauxc |= IGC_TSAUXC_EN_TT0; 5034 wr32(IGC_TSAUXC, tsauxc); 5035 adapter->perout[0].start = ts; 5036 spin_unlock(&adapter->tmreg_lock); 5037 ack |= IGC_TSICR_TT0; 5038 } 5039 5040 if (tsicr & IGC_TSICR_TT1) { 5041 spin_lock(&adapter->tmreg_lock); 5042 ts = timespec64_add(adapter->perout[1].start, 5043 adapter->perout[1].period); 5044 wr32(IGC_TRGTTIML1, ts.tv_nsec | IGC_TT_IO_TIMER_SEL_SYSTIM0); 5045 wr32(IGC_TRGTTIMH1, (u32)ts.tv_sec); 5046 tsauxc = rd32(IGC_TSAUXC); 5047 tsauxc |= IGC_TSAUXC_EN_TT1; 5048 wr32(IGC_TSAUXC, tsauxc); 5049 adapter->perout[1].start = ts; 5050 spin_unlock(&adapter->tmreg_lock); 5051 ack |= IGC_TSICR_TT1; 5052 } 5053 5054 if (tsicr & IGC_TSICR_AUTT0) { 5055 nsec = rd32(IGC_AUXSTMPL0); 5056 sec = rd32(IGC_AUXSTMPH0); 5057 event.type = PTP_CLOCK_EXTTS; 5058 event.index = 0; 5059 event.timestamp = sec * NSEC_PER_SEC + nsec; 5060 ptp_clock_event(adapter->ptp_clock, &event); 5061 ack |= IGC_TSICR_AUTT0; 5062 } 5063 5064 if (tsicr & IGC_TSICR_AUTT1) { 5065 nsec = rd32(IGC_AUXSTMPL1); 5066 sec = rd32(IGC_AUXSTMPH1); 5067 event.type = PTP_CLOCK_EXTTS; 5068 event.index = 1; 5069 event.timestamp = sec * NSEC_PER_SEC + nsec; 5070 ptp_clock_event(adapter->ptp_clock, &event); 5071 ack |= IGC_TSICR_AUTT1; 5072 } 5073 5074 /* acknowledge the interrupts */ 5075 wr32(IGC_TSICR, ack); 5076 } 5077 5078 /** 5079 * igc_msix_other - msix other interrupt handler 5080 * @irq: interrupt number 5081 * @data: pointer to a q_vector 5082 */ 5083 static irqreturn_t igc_msix_other(int irq, void *data) 5084 { 5085 struct igc_adapter *adapter = data; 5086 struct igc_hw *hw = &adapter->hw; 5087 u32 icr = rd32(IGC_ICR); 5088 5089 /* reading ICR causes bit 31 of EICR to be cleared */ 5090 if (icr & IGC_ICR_DRSTA) 5091 schedule_work(&adapter->reset_task); 5092 5093 if (icr & IGC_ICR_DOUTSYNC) { 5094 /* HW is reporting DMA is out of sync */ 5095 adapter->stats.doosync++; 5096 } 5097 5098 if (icr & IGC_ICR_LSC) { 5099 hw->mac.get_link_status = true; 5100 /* guard against interrupt when we're going down */ 5101 if (!test_bit(__IGC_DOWN, &adapter->state)) 5102 mod_timer(&adapter->watchdog_timer, jiffies + 1); 5103 } 5104 5105 if (icr & IGC_ICR_TS) 5106 igc_tsync_interrupt(adapter); 5107 5108 wr32(IGC_EIMS, adapter->eims_other); 5109 5110 return IRQ_HANDLED; 5111 } 5112 5113 static void igc_write_itr(struct igc_q_vector *q_vector) 5114 { 5115 u32 itr_val = q_vector->itr_val & IGC_QVECTOR_MASK; 5116 5117 if (!q_vector->set_itr) 5118 return; 5119 5120 if (!itr_val) 5121 itr_val = IGC_ITR_VAL_MASK; 5122 5123 itr_val |= IGC_EITR_CNT_IGNR; 5124 5125 writel(itr_val, q_vector->itr_register); 5126 q_vector->set_itr = 0; 5127 } 5128 5129 static irqreturn_t igc_msix_ring(int irq, void *data) 5130 { 5131 struct igc_q_vector *q_vector = data; 5132 5133 /* Write the ITR value calculated from the previous interrupt. */ 5134 igc_write_itr(q_vector); 5135 5136 napi_schedule(&q_vector->napi); 5137 5138 return IRQ_HANDLED; 5139 } 5140 5141 /** 5142 * igc_request_msix - Initialize MSI-X interrupts 5143 * @adapter: Pointer to adapter structure 5144 * 5145 * igc_request_msix allocates MSI-X vectors and requests interrupts from the 5146 * kernel. 5147 */ 5148 static int igc_request_msix(struct igc_adapter *adapter) 5149 { 5150 unsigned int num_q_vectors = adapter->num_q_vectors; 5151 int i = 0, err = 0, vector = 0, free_vector = 0; 5152 struct net_device *netdev = adapter->netdev; 5153 5154 err = request_irq(adapter->msix_entries[vector].vector, 5155 &igc_msix_other, 0, netdev->name, adapter); 5156 if (err) 5157 goto err_out; 5158 5159 if (num_q_vectors > MAX_Q_VECTORS) { 5160 num_q_vectors = MAX_Q_VECTORS; 5161 dev_warn(&adapter->pdev->dev, 5162 "The number of queue vectors (%d) is higher than max allowed (%d)\n", 5163 adapter->num_q_vectors, MAX_Q_VECTORS); 5164 } 5165 for (i = 0; i < num_q_vectors; i++) { 5166 struct igc_q_vector *q_vector = adapter->q_vector[i]; 5167 5168 vector++; 5169 5170 q_vector->itr_register = adapter->io_addr + IGC_EITR(vector); 5171 5172 if (q_vector->rx.ring && q_vector->tx.ring) 5173 sprintf(q_vector->name, "%s-TxRx-%u", netdev->name, 5174 q_vector->rx.ring->queue_index); 5175 else if (q_vector->tx.ring) 5176 sprintf(q_vector->name, "%s-tx-%u", netdev->name, 5177 q_vector->tx.ring->queue_index); 5178 else if (q_vector->rx.ring) 5179 sprintf(q_vector->name, "%s-rx-%u", netdev->name, 5180 q_vector->rx.ring->queue_index); 5181 else 5182 sprintf(q_vector->name, "%s-unused", netdev->name); 5183 5184 err = request_irq(adapter->msix_entries[vector].vector, 5185 igc_msix_ring, 0, q_vector->name, 5186 q_vector); 5187 if (err) 5188 goto err_free; 5189 } 5190 5191 igc_configure_msix(adapter); 5192 return 0; 5193 5194 err_free: 5195 /* free already assigned IRQs */ 5196 free_irq(adapter->msix_entries[free_vector++].vector, adapter); 5197 5198 vector--; 5199 for (i = 0; i < vector; i++) { 5200 free_irq(adapter->msix_entries[free_vector++].vector, 5201 adapter->q_vector[i]); 5202 } 5203 err_out: 5204 return err; 5205 } 5206 5207 /** 5208 * igc_clear_interrupt_scheme - reset the device to a state of no interrupts 5209 * @adapter: Pointer to adapter structure 5210 * 5211 * This function resets the device so that it has 0 rx queues, tx queues, and 5212 * MSI-X interrupts allocated. 5213 */ 5214 static void igc_clear_interrupt_scheme(struct igc_adapter *adapter) 5215 { 5216 igc_free_q_vectors(adapter); 5217 igc_reset_interrupt_capability(adapter); 5218 } 5219 5220 /* Need to wait a few seconds after link up to get diagnostic information from 5221 * the phy 5222 */ 5223 static void igc_update_phy_info(struct timer_list *t) 5224 { 5225 struct igc_adapter *adapter = from_timer(adapter, t, phy_info_timer); 5226 5227 igc_get_phy_info(&adapter->hw); 5228 } 5229 5230 /** 5231 * igc_has_link - check shared code for link and determine up/down 5232 * @adapter: pointer to driver private info 5233 */ 5234 bool igc_has_link(struct igc_adapter *adapter) 5235 { 5236 struct igc_hw *hw = &adapter->hw; 5237 bool link_active = false; 5238 5239 /* get_link_status is set on LSC (link status) interrupt or 5240 * rx sequence error interrupt. get_link_status will stay 5241 * false until the igc_check_for_link establishes link 5242 * for copper adapters ONLY 5243 */ 5244 if (!hw->mac.get_link_status) 5245 return true; 5246 hw->mac.ops.check_for_link(hw); 5247 link_active = !hw->mac.get_link_status; 5248 5249 if (hw->mac.type == igc_i225) { 5250 if (!netif_carrier_ok(adapter->netdev)) { 5251 adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE; 5252 } else if (!(adapter->flags & IGC_FLAG_NEED_LINK_UPDATE)) { 5253 adapter->flags |= IGC_FLAG_NEED_LINK_UPDATE; 5254 adapter->link_check_timeout = jiffies; 5255 } 5256 } 5257 5258 return link_active; 5259 } 5260 5261 /** 5262 * igc_watchdog - Timer Call-back 5263 * @t: timer for the watchdog 5264 */ 5265 static void igc_watchdog(struct timer_list *t) 5266 { 5267 struct igc_adapter *adapter = from_timer(adapter, t, watchdog_timer); 5268 /* Do the rest outside of interrupt context */ 5269 schedule_work(&adapter->watchdog_task); 5270 } 5271 5272 static void igc_watchdog_task(struct work_struct *work) 5273 { 5274 struct igc_adapter *adapter = container_of(work, 5275 struct igc_adapter, 5276 watchdog_task); 5277 struct net_device *netdev = adapter->netdev; 5278 struct igc_hw *hw = &adapter->hw; 5279 struct igc_phy_info *phy = &hw->phy; 5280 u16 phy_data, retry_count = 20; 5281 u32 link; 5282 int i; 5283 5284 link = igc_has_link(adapter); 5285 5286 if (adapter->flags & IGC_FLAG_NEED_LINK_UPDATE) { 5287 if (time_after(jiffies, (adapter->link_check_timeout + HZ))) 5288 adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE; 5289 else 5290 link = false; 5291 } 5292 5293 if (link) { 5294 /* Cancel scheduled suspend requests. */ 5295 pm_runtime_resume(netdev->dev.parent); 5296 5297 if (!netif_carrier_ok(netdev)) { 5298 u32 ctrl; 5299 5300 hw->mac.ops.get_speed_and_duplex(hw, 5301 &adapter->link_speed, 5302 &adapter->link_duplex); 5303 5304 ctrl = rd32(IGC_CTRL); 5305 /* Link status message must follow this format */ 5306 netdev_info(netdev, 5307 "NIC Link is Up %d Mbps %s Duplex, Flow Control: %s\n", 5308 adapter->link_speed, 5309 adapter->link_duplex == FULL_DUPLEX ? 5310 "Full" : "Half", 5311 (ctrl & IGC_CTRL_TFCE) && 5312 (ctrl & IGC_CTRL_RFCE) ? "RX/TX" : 5313 (ctrl & IGC_CTRL_RFCE) ? "RX" : 5314 (ctrl & IGC_CTRL_TFCE) ? "TX" : "None"); 5315 5316 /* disable EEE if enabled */ 5317 if ((adapter->flags & IGC_FLAG_EEE) && 5318 adapter->link_duplex == HALF_DUPLEX) { 5319 netdev_info(netdev, 5320 "EEE Disabled: unsupported at half duplex. Re-enable using ethtool when at full duplex\n"); 5321 adapter->hw.dev_spec._base.eee_enable = false; 5322 adapter->flags &= ~IGC_FLAG_EEE; 5323 } 5324 5325 /* check if SmartSpeed worked */ 5326 igc_check_downshift(hw); 5327 if (phy->speed_downgraded) 5328 netdev_warn(netdev, "Link Speed was downgraded by SmartSpeed\n"); 5329 5330 /* adjust timeout factor according to speed/duplex */ 5331 adapter->tx_timeout_factor = 1; 5332 switch (adapter->link_speed) { 5333 case SPEED_10: 5334 adapter->tx_timeout_factor = 14; 5335 break; 5336 case SPEED_100: 5337 case SPEED_1000: 5338 case SPEED_2500: 5339 adapter->tx_timeout_factor = 7; 5340 break; 5341 } 5342 5343 if (adapter->link_speed != SPEED_1000) 5344 goto no_wait; 5345 5346 /* wait for Remote receiver status OK */ 5347 retry_read_status: 5348 if (!igc_read_phy_reg(hw, PHY_1000T_STATUS, 5349 &phy_data)) { 5350 if (!(phy_data & SR_1000T_REMOTE_RX_STATUS) && 5351 retry_count) { 5352 msleep(100); 5353 retry_count--; 5354 goto retry_read_status; 5355 } else if (!retry_count) { 5356 netdev_err(netdev, "exceed max 2 second\n"); 5357 } 5358 } else { 5359 netdev_err(netdev, "read 1000Base-T Status Reg\n"); 5360 } 5361 no_wait: 5362 netif_carrier_on(netdev); 5363 5364 /* link state has changed, schedule phy info update */ 5365 if (!test_bit(__IGC_DOWN, &adapter->state)) 5366 mod_timer(&adapter->phy_info_timer, 5367 round_jiffies(jiffies + 2 * HZ)); 5368 } 5369 } else { 5370 if (netif_carrier_ok(netdev)) { 5371 adapter->link_speed = 0; 5372 adapter->link_duplex = 0; 5373 5374 /* Links status message must follow this format */ 5375 netdev_info(netdev, "NIC Link is Down\n"); 5376 netif_carrier_off(netdev); 5377 5378 /* link state has changed, schedule phy info update */ 5379 if (!test_bit(__IGC_DOWN, &adapter->state)) 5380 mod_timer(&adapter->phy_info_timer, 5381 round_jiffies(jiffies + 2 * HZ)); 5382 5383 /* link is down, time to check for alternate media */ 5384 if (adapter->flags & IGC_FLAG_MAS_ENABLE) { 5385 if (adapter->flags & IGC_FLAG_MEDIA_RESET) { 5386 schedule_work(&adapter->reset_task); 5387 /* return immediately */ 5388 return; 5389 } 5390 } 5391 pm_schedule_suspend(netdev->dev.parent, 5392 MSEC_PER_SEC * 5); 5393 5394 /* also check for alternate media here */ 5395 } else if (!netif_carrier_ok(netdev) && 5396 (adapter->flags & IGC_FLAG_MAS_ENABLE)) { 5397 if (adapter->flags & IGC_FLAG_MEDIA_RESET) { 5398 schedule_work(&adapter->reset_task); 5399 /* return immediately */ 5400 return; 5401 } 5402 } 5403 } 5404 5405 spin_lock(&adapter->stats64_lock); 5406 igc_update_stats(adapter); 5407 spin_unlock(&adapter->stats64_lock); 5408 5409 for (i = 0; i < adapter->num_tx_queues; i++) { 5410 struct igc_ring *tx_ring = adapter->tx_ring[i]; 5411 5412 if (!netif_carrier_ok(netdev)) { 5413 /* We've lost link, so the controller stops DMA, 5414 * but we've got queued Tx work that's never going 5415 * to get done, so reset controller to flush Tx. 5416 * (Do the reset outside of interrupt context). 5417 */ 5418 if (igc_desc_unused(tx_ring) + 1 < tx_ring->count) { 5419 adapter->tx_timeout_count++; 5420 schedule_work(&adapter->reset_task); 5421 /* return immediately since reset is imminent */ 5422 return; 5423 } 5424 } 5425 5426 /* Force detection of hung controller every watchdog period */ 5427 set_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags); 5428 } 5429 5430 /* Cause software interrupt to ensure Rx ring is cleaned */ 5431 if (adapter->flags & IGC_FLAG_HAS_MSIX) { 5432 u32 eics = 0; 5433 5434 for (i = 0; i < adapter->num_q_vectors; i++) 5435 eics |= adapter->q_vector[i]->eims_value; 5436 wr32(IGC_EICS, eics); 5437 } else { 5438 wr32(IGC_ICS, IGC_ICS_RXDMT0); 5439 } 5440 5441 igc_ptp_tx_hang(adapter); 5442 5443 /* Reset the timer */ 5444 if (!test_bit(__IGC_DOWN, &adapter->state)) { 5445 if (adapter->flags & IGC_FLAG_NEED_LINK_UPDATE) 5446 mod_timer(&adapter->watchdog_timer, 5447 round_jiffies(jiffies + HZ)); 5448 else 5449 mod_timer(&adapter->watchdog_timer, 5450 round_jiffies(jiffies + 2 * HZ)); 5451 } 5452 } 5453 5454 /** 5455 * igc_intr_msi - Interrupt Handler 5456 * @irq: interrupt number 5457 * @data: pointer to a network interface device structure 5458 */ 5459 static irqreturn_t igc_intr_msi(int irq, void *data) 5460 { 5461 struct igc_adapter *adapter = data; 5462 struct igc_q_vector *q_vector = adapter->q_vector[0]; 5463 struct igc_hw *hw = &adapter->hw; 5464 /* read ICR disables interrupts using IAM */ 5465 u32 icr = rd32(IGC_ICR); 5466 5467 igc_write_itr(q_vector); 5468 5469 if (icr & IGC_ICR_DRSTA) 5470 schedule_work(&adapter->reset_task); 5471 5472 if (icr & IGC_ICR_DOUTSYNC) { 5473 /* HW is reporting DMA is out of sync */ 5474 adapter->stats.doosync++; 5475 } 5476 5477 if (icr & (IGC_ICR_RXSEQ | IGC_ICR_LSC)) { 5478 hw->mac.get_link_status = true; 5479 if (!test_bit(__IGC_DOWN, &adapter->state)) 5480 mod_timer(&adapter->watchdog_timer, jiffies + 1); 5481 } 5482 5483 if (icr & IGC_ICR_TS) 5484 igc_tsync_interrupt(adapter); 5485 5486 napi_schedule(&q_vector->napi); 5487 5488 return IRQ_HANDLED; 5489 } 5490 5491 /** 5492 * igc_intr - Legacy Interrupt Handler 5493 * @irq: interrupt number 5494 * @data: pointer to a network interface device structure 5495 */ 5496 static irqreturn_t igc_intr(int irq, void *data) 5497 { 5498 struct igc_adapter *adapter = data; 5499 struct igc_q_vector *q_vector = adapter->q_vector[0]; 5500 struct igc_hw *hw = &adapter->hw; 5501 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No 5502 * need for the IMC write 5503 */ 5504 u32 icr = rd32(IGC_ICR); 5505 5506 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is 5507 * not set, then the adapter didn't send an interrupt 5508 */ 5509 if (!(icr & IGC_ICR_INT_ASSERTED)) 5510 return IRQ_NONE; 5511 5512 igc_write_itr(q_vector); 5513 5514 if (icr & IGC_ICR_DRSTA) 5515 schedule_work(&adapter->reset_task); 5516 5517 if (icr & IGC_ICR_DOUTSYNC) { 5518 /* HW is reporting DMA is out of sync */ 5519 adapter->stats.doosync++; 5520 } 5521 5522 if (icr & (IGC_ICR_RXSEQ | IGC_ICR_LSC)) { 5523 hw->mac.get_link_status = true; 5524 /* guard against interrupt when we're going down */ 5525 if (!test_bit(__IGC_DOWN, &adapter->state)) 5526 mod_timer(&adapter->watchdog_timer, jiffies + 1); 5527 } 5528 5529 if (icr & IGC_ICR_TS) 5530 igc_tsync_interrupt(adapter); 5531 5532 napi_schedule(&q_vector->napi); 5533 5534 return IRQ_HANDLED; 5535 } 5536 5537 static void igc_free_irq(struct igc_adapter *adapter) 5538 { 5539 if (adapter->msix_entries) { 5540 int vector = 0, i; 5541 5542 free_irq(adapter->msix_entries[vector++].vector, adapter); 5543 5544 for (i = 0; i < adapter->num_q_vectors; i++) 5545 free_irq(adapter->msix_entries[vector++].vector, 5546 adapter->q_vector[i]); 5547 } else { 5548 free_irq(adapter->pdev->irq, adapter); 5549 } 5550 } 5551 5552 /** 5553 * igc_request_irq - initialize interrupts 5554 * @adapter: Pointer to adapter structure 5555 * 5556 * Attempts to configure interrupts using the best available 5557 * capabilities of the hardware and kernel. 5558 */ 5559 static int igc_request_irq(struct igc_adapter *adapter) 5560 { 5561 struct net_device *netdev = adapter->netdev; 5562 struct pci_dev *pdev = adapter->pdev; 5563 int err = 0; 5564 5565 if (adapter->flags & IGC_FLAG_HAS_MSIX) { 5566 err = igc_request_msix(adapter); 5567 if (!err) 5568 goto request_done; 5569 /* fall back to MSI */ 5570 igc_free_all_tx_resources(adapter); 5571 igc_free_all_rx_resources(adapter); 5572 5573 igc_clear_interrupt_scheme(adapter); 5574 err = igc_init_interrupt_scheme(adapter, false); 5575 if (err) 5576 goto request_done; 5577 igc_setup_all_tx_resources(adapter); 5578 igc_setup_all_rx_resources(adapter); 5579 igc_configure(adapter); 5580 } 5581 5582 igc_assign_vector(adapter->q_vector[0], 0); 5583 5584 if (adapter->flags & IGC_FLAG_HAS_MSI) { 5585 err = request_irq(pdev->irq, &igc_intr_msi, 0, 5586 netdev->name, adapter); 5587 if (!err) 5588 goto request_done; 5589 5590 /* fall back to legacy interrupts */ 5591 igc_reset_interrupt_capability(adapter); 5592 adapter->flags &= ~IGC_FLAG_HAS_MSI; 5593 } 5594 5595 err = request_irq(pdev->irq, &igc_intr, IRQF_SHARED, 5596 netdev->name, adapter); 5597 5598 if (err) 5599 netdev_err(netdev, "Error %d getting interrupt\n", err); 5600 5601 request_done: 5602 return err; 5603 } 5604 5605 /** 5606 * __igc_open - Called when a network interface is made active 5607 * @netdev: network interface device structure 5608 * @resuming: boolean indicating if the device is resuming 5609 * 5610 * Returns 0 on success, negative value on failure 5611 * 5612 * The open entry point is called when a network interface is made 5613 * active by the system (IFF_UP). At this point all resources needed 5614 * for transmit and receive operations are allocated, the interrupt 5615 * handler is registered with the OS, the watchdog timer is started, 5616 * and the stack is notified that the interface is ready. 5617 */ 5618 static int __igc_open(struct net_device *netdev, bool resuming) 5619 { 5620 struct igc_adapter *adapter = netdev_priv(netdev); 5621 struct pci_dev *pdev = adapter->pdev; 5622 struct igc_hw *hw = &adapter->hw; 5623 int err = 0; 5624 int i = 0; 5625 5626 /* disallow open during test */ 5627 5628 if (test_bit(__IGC_TESTING, &adapter->state)) { 5629 WARN_ON(resuming); 5630 return -EBUSY; 5631 } 5632 5633 if (!resuming) 5634 pm_runtime_get_sync(&pdev->dev); 5635 5636 netif_carrier_off(netdev); 5637 5638 /* allocate transmit descriptors */ 5639 err = igc_setup_all_tx_resources(adapter); 5640 if (err) 5641 goto err_setup_tx; 5642 5643 /* allocate receive descriptors */ 5644 err = igc_setup_all_rx_resources(adapter); 5645 if (err) 5646 goto err_setup_rx; 5647 5648 igc_power_up_link(adapter); 5649 5650 igc_configure(adapter); 5651 5652 err = igc_request_irq(adapter); 5653 if (err) 5654 goto err_req_irq; 5655 5656 /* Notify the stack of the actual queue counts. */ 5657 err = netif_set_real_num_tx_queues(netdev, adapter->num_tx_queues); 5658 if (err) 5659 goto err_set_queues; 5660 5661 err = netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues); 5662 if (err) 5663 goto err_set_queues; 5664 5665 clear_bit(__IGC_DOWN, &adapter->state); 5666 5667 for (i = 0; i < adapter->num_q_vectors; i++) 5668 napi_enable(&adapter->q_vector[i]->napi); 5669 5670 /* Clear any pending interrupts. */ 5671 rd32(IGC_ICR); 5672 igc_irq_enable(adapter); 5673 5674 if (!resuming) 5675 pm_runtime_put(&pdev->dev); 5676 5677 netif_tx_start_all_queues(netdev); 5678 5679 /* start the watchdog. */ 5680 hw->mac.get_link_status = true; 5681 schedule_work(&adapter->watchdog_task); 5682 5683 return IGC_SUCCESS; 5684 5685 err_set_queues: 5686 igc_free_irq(adapter); 5687 err_req_irq: 5688 igc_release_hw_control(adapter); 5689 igc_power_down_phy_copper_base(&adapter->hw); 5690 igc_free_all_rx_resources(adapter); 5691 err_setup_rx: 5692 igc_free_all_tx_resources(adapter); 5693 err_setup_tx: 5694 igc_reset(adapter); 5695 if (!resuming) 5696 pm_runtime_put(&pdev->dev); 5697 5698 return err; 5699 } 5700 5701 int igc_open(struct net_device *netdev) 5702 { 5703 return __igc_open(netdev, false); 5704 } 5705 5706 /** 5707 * __igc_close - Disables a network interface 5708 * @netdev: network interface device structure 5709 * @suspending: boolean indicating the device is suspending 5710 * 5711 * Returns 0, this is not allowed to fail 5712 * 5713 * The close entry point is called when an interface is de-activated 5714 * by the OS. The hardware is still under the driver's control, but 5715 * needs to be disabled. A global MAC reset is issued to stop the 5716 * hardware, and all transmit and receive resources are freed. 5717 */ 5718 static int __igc_close(struct net_device *netdev, bool suspending) 5719 { 5720 struct igc_adapter *adapter = netdev_priv(netdev); 5721 struct pci_dev *pdev = adapter->pdev; 5722 5723 WARN_ON(test_bit(__IGC_RESETTING, &adapter->state)); 5724 5725 if (!suspending) 5726 pm_runtime_get_sync(&pdev->dev); 5727 5728 igc_down(adapter); 5729 5730 igc_release_hw_control(adapter); 5731 5732 igc_free_irq(adapter); 5733 5734 igc_free_all_tx_resources(adapter); 5735 igc_free_all_rx_resources(adapter); 5736 5737 if (!suspending) 5738 pm_runtime_put_sync(&pdev->dev); 5739 5740 return 0; 5741 } 5742 5743 int igc_close(struct net_device *netdev) 5744 { 5745 if (netif_device_present(netdev) || netdev->dismantle) 5746 return __igc_close(netdev, false); 5747 return 0; 5748 } 5749 5750 /** 5751 * igc_ioctl - Access the hwtstamp interface 5752 * @netdev: network interface device structure 5753 * @ifr: interface request data 5754 * @cmd: ioctl command 5755 **/ 5756 static int igc_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) 5757 { 5758 switch (cmd) { 5759 case SIOCGHWTSTAMP: 5760 return igc_ptp_get_ts_config(netdev, ifr); 5761 case SIOCSHWTSTAMP: 5762 return igc_ptp_set_ts_config(netdev, ifr); 5763 default: 5764 return -EOPNOTSUPP; 5765 } 5766 } 5767 5768 static int igc_save_launchtime_params(struct igc_adapter *adapter, int queue, 5769 bool enable) 5770 { 5771 struct igc_ring *ring; 5772 5773 if (queue < 0 || queue >= adapter->num_tx_queues) 5774 return -EINVAL; 5775 5776 ring = adapter->tx_ring[queue]; 5777 ring->launchtime_enable = enable; 5778 5779 return 0; 5780 } 5781 5782 static bool is_base_time_past(ktime_t base_time, const struct timespec64 *now) 5783 { 5784 struct timespec64 b; 5785 5786 b = ktime_to_timespec64(base_time); 5787 5788 return timespec64_compare(now, &b) > 0; 5789 } 5790 5791 static bool validate_schedule(struct igc_adapter *adapter, 5792 const struct tc_taprio_qopt_offload *qopt) 5793 { 5794 int queue_uses[IGC_MAX_TX_QUEUES] = { }; 5795 struct timespec64 now; 5796 size_t n; 5797 5798 if (qopt->cycle_time_extension) 5799 return false; 5800 5801 igc_ptp_read(adapter, &now); 5802 5803 /* If we program the controller's BASET registers with a time 5804 * in the future, it will hold all the packets until that 5805 * time, causing a lot of TX Hangs, so to avoid that, we 5806 * reject schedules that would start in the future. 5807 */ 5808 if (!is_base_time_past(qopt->base_time, &now)) 5809 return false; 5810 5811 for (n = 0; n < qopt->num_entries; n++) { 5812 const struct tc_taprio_sched_entry *e; 5813 int i; 5814 5815 e = &qopt->entries[n]; 5816 5817 /* i225 only supports "global" frame preemption 5818 * settings. 5819 */ 5820 if (e->command != TC_TAPRIO_CMD_SET_GATES) 5821 return false; 5822 5823 for (i = 0; i < adapter->num_tx_queues; i++) { 5824 if (e->gate_mask & BIT(i)) 5825 queue_uses[i]++; 5826 5827 if (queue_uses[i] > 1) 5828 return false; 5829 } 5830 } 5831 5832 return true; 5833 } 5834 5835 static int igc_tsn_enable_launchtime(struct igc_adapter *adapter, 5836 struct tc_etf_qopt_offload *qopt) 5837 { 5838 struct igc_hw *hw = &adapter->hw; 5839 int err; 5840 5841 if (hw->mac.type != igc_i225) 5842 return -EOPNOTSUPP; 5843 5844 err = igc_save_launchtime_params(adapter, qopt->queue, qopt->enable); 5845 if (err) 5846 return err; 5847 5848 return igc_tsn_offload_apply(adapter); 5849 } 5850 5851 static int igc_tsn_clear_schedule(struct igc_adapter *adapter) 5852 { 5853 int i; 5854 5855 adapter->base_time = 0; 5856 adapter->cycle_time = NSEC_PER_SEC; 5857 5858 for (i = 0; i < adapter->num_tx_queues; i++) { 5859 struct igc_ring *ring = adapter->tx_ring[i]; 5860 5861 ring->start_time = 0; 5862 ring->end_time = NSEC_PER_SEC; 5863 } 5864 5865 return 0; 5866 } 5867 5868 static int igc_save_qbv_schedule(struct igc_adapter *adapter, 5869 struct tc_taprio_qopt_offload *qopt) 5870 { 5871 u32 start_time = 0, end_time = 0; 5872 size_t n; 5873 5874 if (!qopt->enable) 5875 return igc_tsn_clear_schedule(adapter); 5876 5877 if (adapter->base_time) 5878 return -EALREADY; 5879 5880 if (!validate_schedule(adapter, qopt)) 5881 return -EINVAL; 5882 5883 adapter->cycle_time = qopt->cycle_time; 5884 adapter->base_time = qopt->base_time; 5885 5886 /* FIXME: be a little smarter about cases when the gate for a 5887 * queue stays open for more than one entry. 5888 */ 5889 for (n = 0; n < qopt->num_entries; n++) { 5890 struct tc_taprio_sched_entry *e = &qopt->entries[n]; 5891 int i; 5892 5893 end_time += e->interval; 5894 5895 for (i = 0; i < adapter->num_tx_queues; i++) { 5896 struct igc_ring *ring = adapter->tx_ring[i]; 5897 5898 if (!(e->gate_mask & BIT(i))) 5899 continue; 5900 5901 ring->start_time = start_time; 5902 ring->end_time = end_time; 5903 } 5904 5905 start_time += e->interval; 5906 } 5907 5908 return 0; 5909 } 5910 5911 static int igc_tsn_enable_qbv_scheduling(struct igc_adapter *adapter, 5912 struct tc_taprio_qopt_offload *qopt) 5913 { 5914 struct igc_hw *hw = &adapter->hw; 5915 int err; 5916 5917 if (hw->mac.type != igc_i225) 5918 return -EOPNOTSUPP; 5919 5920 err = igc_save_qbv_schedule(adapter, qopt); 5921 if (err) 5922 return err; 5923 5924 return igc_tsn_offload_apply(adapter); 5925 } 5926 5927 static int igc_save_cbs_params(struct igc_adapter *adapter, int queue, 5928 bool enable, int idleslope, int sendslope, 5929 int hicredit, int locredit) 5930 { 5931 bool cbs_status[IGC_MAX_SR_QUEUES] = { false }; 5932 struct net_device *netdev = adapter->netdev; 5933 struct igc_ring *ring; 5934 int i; 5935 5936 /* i225 has two sets of credit-based shaper logic. 5937 * Supporting it only on the top two priority queues 5938 */ 5939 if (queue < 0 || queue > 1) 5940 return -EINVAL; 5941 5942 ring = adapter->tx_ring[queue]; 5943 5944 for (i = 0; i < IGC_MAX_SR_QUEUES; i++) 5945 if (adapter->tx_ring[i]) 5946 cbs_status[i] = adapter->tx_ring[i]->cbs_enable; 5947 5948 /* CBS should be enabled on the highest priority queue first in order 5949 * for the CBS algorithm to operate as intended. 5950 */ 5951 if (enable) { 5952 if (queue == 1 && !cbs_status[0]) { 5953 netdev_err(netdev, 5954 "Enabling CBS on queue1 before queue0\n"); 5955 return -EINVAL; 5956 } 5957 } else { 5958 if (queue == 0 && cbs_status[1]) { 5959 netdev_err(netdev, 5960 "Disabling CBS on queue0 before queue1\n"); 5961 return -EINVAL; 5962 } 5963 } 5964 5965 ring->cbs_enable = enable; 5966 ring->idleslope = idleslope; 5967 ring->sendslope = sendslope; 5968 ring->hicredit = hicredit; 5969 ring->locredit = locredit; 5970 5971 return 0; 5972 } 5973 5974 static int igc_tsn_enable_cbs(struct igc_adapter *adapter, 5975 struct tc_cbs_qopt_offload *qopt) 5976 { 5977 struct igc_hw *hw = &adapter->hw; 5978 int err; 5979 5980 if (hw->mac.type != igc_i225) 5981 return -EOPNOTSUPP; 5982 5983 if (qopt->queue < 0 || qopt->queue > 1) 5984 return -EINVAL; 5985 5986 err = igc_save_cbs_params(adapter, qopt->queue, qopt->enable, 5987 qopt->idleslope, qopt->sendslope, 5988 qopt->hicredit, qopt->locredit); 5989 if (err) 5990 return err; 5991 5992 return igc_tsn_offload_apply(adapter); 5993 } 5994 5995 static int igc_setup_tc(struct net_device *dev, enum tc_setup_type type, 5996 void *type_data) 5997 { 5998 struct igc_adapter *adapter = netdev_priv(dev); 5999 6000 switch (type) { 6001 case TC_SETUP_QDISC_TAPRIO: 6002 return igc_tsn_enable_qbv_scheduling(adapter, type_data); 6003 6004 case TC_SETUP_QDISC_ETF: 6005 return igc_tsn_enable_launchtime(adapter, type_data); 6006 6007 case TC_SETUP_QDISC_CBS: 6008 return igc_tsn_enable_cbs(adapter, type_data); 6009 6010 default: 6011 return -EOPNOTSUPP; 6012 } 6013 } 6014 6015 static int igc_bpf(struct net_device *dev, struct netdev_bpf *bpf) 6016 { 6017 struct igc_adapter *adapter = netdev_priv(dev); 6018 6019 switch (bpf->command) { 6020 case XDP_SETUP_PROG: 6021 return igc_xdp_set_prog(adapter, bpf->prog, bpf->extack); 6022 case XDP_SETUP_XSK_POOL: 6023 return igc_xdp_setup_pool(adapter, bpf->xsk.pool, 6024 bpf->xsk.queue_id); 6025 default: 6026 return -EOPNOTSUPP; 6027 } 6028 } 6029 6030 static int igc_xdp_xmit(struct net_device *dev, int num_frames, 6031 struct xdp_frame **frames, u32 flags) 6032 { 6033 struct igc_adapter *adapter = netdev_priv(dev); 6034 int cpu = smp_processor_id(); 6035 struct netdev_queue *nq; 6036 struct igc_ring *ring; 6037 int i, drops; 6038 6039 if (unlikely(test_bit(__IGC_DOWN, &adapter->state))) 6040 return -ENETDOWN; 6041 6042 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) 6043 return -EINVAL; 6044 6045 ring = igc_xdp_get_tx_ring(adapter, cpu); 6046 nq = txring_txq(ring); 6047 6048 __netif_tx_lock(nq, cpu); 6049 6050 drops = 0; 6051 for (i = 0; i < num_frames; i++) { 6052 int err; 6053 struct xdp_frame *xdpf = frames[i]; 6054 6055 err = igc_xdp_init_tx_descriptor(ring, xdpf); 6056 if (err) { 6057 xdp_return_frame_rx_napi(xdpf); 6058 drops++; 6059 } 6060 } 6061 6062 if (flags & XDP_XMIT_FLUSH) 6063 igc_flush_tx_descriptors(ring); 6064 6065 __netif_tx_unlock(nq); 6066 6067 return num_frames - drops; 6068 } 6069 6070 static void igc_trigger_rxtxq_interrupt(struct igc_adapter *adapter, 6071 struct igc_q_vector *q_vector) 6072 { 6073 struct igc_hw *hw = &adapter->hw; 6074 u32 eics = 0; 6075 6076 eics |= q_vector->eims_value; 6077 wr32(IGC_EICS, eics); 6078 } 6079 6080 int igc_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) 6081 { 6082 struct igc_adapter *adapter = netdev_priv(dev); 6083 struct igc_q_vector *q_vector; 6084 struct igc_ring *ring; 6085 6086 if (test_bit(__IGC_DOWN, &adapter->state)) 6087 return -ENETDOWN; 6088 6089 if (!igc_xdp_is_enabled(adapter)) 6090 return -ENXIO; 6091 6092 if (queue_id >= adapter->num_rx_queues) 6093 return -EINVAL; 6094 6095 ring = adapter->rx_ring[queue_id]; 6096 6097 if (!ring->xsk_pool) 6098 return -ENXIO; 6099 6100 q_vector = adapter->q_vector[queue_id]; 6101 if (!napi_if_scheduled_mark_missed(&q_vector->napi)) 6102 igc_trigger_rxtxq_interrupt(adapter, q_vector); 6103 6104 return 0; 6105 } 6106 6107 static const struct net_device_ops igc_netdev_ops = { 6108 .ndo_open = igc_open, 6109 .ndo_stop = igc_close, 6110 .ndo_start_xmit = igc_xmit_frame, 6111 .ndo_set_rx_mode = igc_set_rx_mode, 6112 .ndo_set_mac_address = igc_set_mac, 6113 .ndo_change_mtu = igc_change_mtu, 6114 .ndo_get_stats64 = igc_get_stats64, 6115 .ndo_fix_features = igc_fix_features, 6116 .ndo_set_features = igc_set_features, 6117 .ndo_features_check = igc_features_check, 6118 .ndo_eth_ioctl = igc_ioctl, 6119 .ndo_setup_tc = igc_setup_tc, 6120 .ndo_bpf = igc_bpf, 6121 .ndo_xdp_xmit = igc_xdp_xmit, 6122 .ndo_xsk_wakeup = igc_xsk_wakeup, 6123 }; 6124 6125 /* PCIe configuration access */ 6126 void igc_read_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value) 6127 { 6128 struct igc_adapter *adapter = hw->back; 6129 6130 pci_read_config_word(adapter->pdev, reg, value); 6131 } 6132 6133 void igc_write_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value) 6134 { 6135 struct igc_adapter *adapter = hw->back; 6136 6137 pci_write_config_word(adapter->pdev, reg, *value); 6138 } 6139 6140 s32 igc_read_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value) 6141 { 6142 struct igc_adapter *adapter = hw->back; 6143 6144 if (!pci_is_pcie(adapter->pdev)) 6145 return -IGC_ERR_CONFIG; 6146 6147 pcie_capability_read_word(adapter->pdev, reg, value); 6148 6149 return IGC_SUCCESS; 6150 } 6151 6152 s32 igc_write_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value) 6153 { 6154 struct igc_adapter *adapter = hw->back; 6155 6156 if (!pci_is_pcie(adapter->pdev)) 6157 return -IGC_ERR_CONFIG; 6158 6159 pcie_capability_write_word(adapter->pdev, reg, *value); 6160 6161 return IGC_SUCCESS; 6162 } 6163 6164 u32 igc_rd32(struct igc_hw *hw, u32 reg) 6165 { 6166 struct igc_adapter *igc = container_of(hw, struct igc_adapter, hw); 6167 u8 __iomem *hw_addr = READ_ONCE(hw->hw_addr); 6168 u32 value = 0; 6169 6170 value = readl(&hw_addr[reg]); 6171 6172 /* reads should not return all F's */ 6173 if (!(~value) && (!reg || !(~readl(hw_addr)))) { 6174 struct net_device *netdev = igc->netdev; 6175 6176 hw->hw_addr = NULL; 6177 netif_device_detach(netdev); 6178 netdev_err(netdev, "PCIe link lost, device now detached\n"); 6179 WARN(pci_device_is_present(igc->pdev), 6180 "igc: Failed to read reg 0x%x!\n", reg); 6181 } 6182 6183 return value; 6184 } 6185 6186 int igc_set_spd_dplx(struct igc_adapter *adapter, u32 spd, u8 dplx) 6187 { 6188 struct igc_mac_info *mac = &adapter->hw.mac; 6189 6190 mac->autoneg = false; 6191 6192 /* Make sure dplx is at most 1 bit and lsb of speed is not set 6193 * for the switch() below to work 6194 */ 6195 if ((spd & 1) || (dplx & ~1)) 6196 goto err_inval; 6197 6198 switch (spd + dplx) { 6199 case SPEED_10 + DUPLEX_HALF: 6200 mac->forced_speed_duplex = ADVERTISE_10_HALF; 6201 break; 6202 case SPEED_10 + DUPLEX_FULL: 6203 mac->forced_speed_duplex = ADVERTISE_10_FULL; 6204 break; 6205 case SPEED_100 + DUPLEX_HALF: 6206 mac->forced_speed_duplex = ADVERTISE_100_HALF; 6207 break; 6208 case SPEED_100 + DUPLEX_FULL: 6209 mac->forced_speed_duplex = ADVERTISE_100_FULL; 6210 break; 6211 case SPEED_1000 + DUPLEX_FULL: 6212 mac->autoneg = true; 6213 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL; 6214 break; 6215 case SPEED_1000 + DUPLEX_HALF: /* not supported */ 6216 goto err_inval; 6217 case SPEED_2500 + DUPLEX_FULL: 6218 mac->autoneg = true; 6219 adapter->hw.phy.autoneg_advertised = ADVERTISE_2500_FULL; 6220 break; 6221 case SPEED_2500 + DUPLEX_HALF: /* not supported */ 6222 default: 6223 goto err_inval; 6224 } 6225 6226 /* clear MDI, MDI(-X) override is only allowed when autoneg enabled */ 6227 adapter->hw.phy.mdix = AUTO_ALL_MODES; 6228 6229 return 0; 6230 6231 err_inval: 6232 netdev_err(adapter->netdev, "Unsupported Speed/Duplex configuration\n"); 6233 return -EINVAL; 6234 } 6235 6236 /** 6237 * igc_probe - Device Initialization Routine 6238 * @pdev: PCI device information struct 6239 * @ent: entry in igc_pci_tbl 6240 * 6241 * Returns 0 on success, negative on failure 6242 * 6243 * igc_probe initializes an adapter identified by a pci_dev structure. 6244 * The OS initialization, configuring the adapter private structure, 6245 * and a hardware reset occur. 6246 */ 6247 static int igc_probe(struct pci_dev *pdev, 6248 const struct pci_device_id *ent) 6249 { 6250 struct igc_adapter *adapter; 6251 struct net_device *netdev; 6252 struct igc_hw *hw; 6253 const struct igc_info *ei = igc_info_tbl[ent->driver_data]; 6254 int err, pci_using_dac; 6255 6256 err = pci_enable_device_mem(pdev); 6257 if (err) 6258 return err; 6259 6260 pci_using_dac = 0; 6261 err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); 6262 if (!err) { 6263 pci_using_dac = 1; 6264 } else { 6265 err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); 6266 if (err) { 6267 dev_err(&pdev->dev, 6268 "No usable DMA configuration, aborting\n"); 6269 goto err_dma; 6270 } 6271 } 6272 6273 err = pci_request_mem_regions(pdev, igc_driver_name); 6274 if (err) 6275 goto err_pci_reg; 6276 6277 pci_enable_pcie_error_reporting(pdev); 6278 6279 err = pci_enable_ptm(pdev, NULL); 6280 if (err < 0) 6281 dev_info(&pdev->dev, "PCIe PTM not supported by PCIe bus/controller\n"); 6282 6283 pci_set_master(pdev); 6284 6285 err = -ENOMEM; 6286 netdev = alloc_etherdev_mq(sizeof(struct igc_adapter), 6287 IGC_MAX_TX_QUEUES); 6288 6289 if (!netdev) 6290 goto err_alloc_etherdev; 6291 6292 SET_NETDEV_DEV(netdev, &pdev->dev); 6293 6294 pci_set_drvdata(pdev, netdev); 6295 adapter = netdev_priv(netdev); 6296 adapter->netdev = netdev; 6297 adapter->pdev = pdev; 6298 hw = &adapter->hw; 6299 hw->back = adapter; 6300 adapter->port_num = hw->bus.func; 6301 adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE); 6302 6303 err = pci_save_state(pdev); 6304 if (err) 6305 goto err_ioremap; 6306 6307 err = -EIO; 6308 adapter->io_addr = ioremap(pci_resource_start(pdev, 0), 6309 pci_resource_len(pdev, 0)); 6310 if (!adapter->io_addr) 6311 goto err_ioremap; 6312 6313 /* hw->hw_addr can be zeroed, so use adapter->io_addr for unmap */ 6314 hw->hw_addr = adapter->io_addr; 6315 6316 netdev->netdev_ops = &igc_netdev_ops; 6317 igc_ethtool_set_ops(netdev); 6318 netdev->watchdog_timeo = 5 * HZ; 6319 6320 netdev->mem_start = pci_resource_start(pdev, 0); 6321 netdev->mem_end = pci_resource_end(pdev, 0); 6322 6323 /* PCI config space info */ 6324 hw->vendor_id = pdev->vendor; 6325 hw->device_id = pdev->device; 6326 hw->revision_id = pdev->revision; 6327 hw->subsystem_vendor_id = pdev->subsystem_vendor; 6328 hw->subsystem_device_id = pdev->subsystem_device; 6329 6330 /* Copy the default MAC and PHY function pointers */ 6331 memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops)); 6332 memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops)); 6333 6334 /* Initialize skew-specific constants */ 6335 err = ei->get_invariants(hw); 6336 if (err) 6337 goto err_sw_init; 6338 6339 /* Add supported features to the features list*/ 6340 netdev->features |= NETIF_F_SG; 6341 netdev->features |= NETIF_F_TSO; 6342 netdev->features |= NETIF_F_TSO6; 6343 netdev->features |= NETIF_F_TSO_ECN; 6344 netdev->features |= NETIF_F_RXCSUM; 6345 netdev->features |= NETIF_F_HW_CSUM; 6346 netdev->features |= NETIF_F_SCTP_CRC; 6347 netdev->features |= NETIF_F_HW_TC; 6348 6349 #define IGC_GSO_PARTIAL_FEATURES (NETIF_F_GSO_GRE | \ 6350 NETIF_F_GSO_GRE_CSUM | \ 6351 NETIF_F_GSO_IPXIP4 | \ 6352 NETIF_F_GSO_IPXIP6 | \ 6353 NETIF_F_GSO_UDP_TUNNEL | \ 6354 NETIF_F_GSO_UDP_TUNNEL_CSUM) 6355 6356 netdev->gso_partial_features = IGC_GSO_PARTIAL_FEATURES; 6357 netdev->features |= NETIF_F_GSO_PARTIAL | IGC_GSO_PARTIAL_FEATURES; 6358 6359 /* setup the private structure */ 6360 err = igc_sw_init(adapter); 6361 if (err) 6362 goto err_sw_init; 6363 6364 /* copy netdev features into list of user selectable features */ 6365 netdev->hw_features |= NETIF_F_NTUPLE; 6366 netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX; 6367 netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; 6368 netdev->hw_features |= netdev->features; 6369 6370 if (pci_using_dac) 6371 netdev->features |= NETIF_F_HIGHDMA; 6372 6373 netdev->vlan_features |= netdev->features | NETIF_F_TSO_MANGLEID; 6374 netdev->mpls_features |= NETIF_F_HW_CSUM; 6375 netdev->hw_enc_features |= netdev->vlan_features; 6376 6377 /* MTU range: 68 - 9216 */ 6378 netdev->min_mtu = ETH_MIN_MTU; 6379 netdev->max_mtu = MAX_STD_JUMBO_FRAME_SIZE; 6380 6381 /* before reading the NVM, reset the controller to put the device in a 6382 * known good starting state 6383 */ 6384 hw->mac.ops.reset_hw(hw); 6385 6386 if (igc_get_flash_presence_i225(hw)) { 6387 if (hw->nvm.ops.validate(hw) < 0) { 6388 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n"); 6389 err = -EIO; 6390 goto err_eeprom; 6391 } 6392 } 6393 6394 if (eth_platform_get_mac_address(&pdev->dev, hw->mac.addr)) { 6395 /* copy the MAC address out of the NVM */ 6396 if (hw->mac.ops.read_mac_addr(hw)) 6397 dev_err(&pdev->dev, "NVM Read Error\n"); 6398 } 6399 6400 eth_hw_addr_set(netdev, hw->mac.addr); 6401 6402 if (!is_valid_ether_addr(netdev->dev_addr)) { 6403 dev_err(&pdev->dev, "Invalid MAC Address\n"); 6404 err = -EIO; 6405 goto err_eeprom; 6406 } 6407 6408 /* configure RXPBSIZE and TXPBSIZE */ 6409 wr32(IGC_RXPBS, I225_RXPBSIZE_DEFAULT); 6410 wr32(IGC_TXPBS, I225_TXPBSIZE_DEFAULT); 6411 6412 timer_setup(&adapter->watchdog_timer, igc_watchdog, 0); 6413 timer_setup(&adapter->phy_info_timer, igc_update_phy_info, 0); 6414 6415 INIT_WORK(&adapter->reset_task, igc_reset_task); 6416 INIT_WORK(&adapter->watchdog_task, igc_watchdog_task); 6417 6418 /* Initialize link properties that are user-changeable */ 6419 adapter->fc_autoneg = true; 6420 hw->mac.autoneg = true; 6421 hw->phy.autoneg_advertised = 0xaf; 6422 6423 hw->fc.requested_mode = igc_fc_default; 6424 hw->fc.current_mode = igc_fc_default; 6425 6426 /* By default, support wake on port A */ 6427 adapter->flags |= IGC_FLAG_WOL_SUPPORTED; 6428 6429 /* initialize the wol settings based on the eeprom settings */ 6430 if (adapter->flags & IGC_FLAG_WOL_SUPPORTED) 6431 adapter->wol |= IGC_WUFC_MAG; 6432 6433 device_set_wakeup_enable(&adapter->pdev->dev, 6434 adapter->flags & IGC_FLAG_WOL_SUPPORTED); 6435 6436 igc_ptp_init(adapter); 6437 6438 igc_tsn_clear_schedule(adapter); 6439 6440 /* reset the hardware with the new settings */ 6441 igc_reset(adapter); 6442 6443 /* let the f/w know that the h/w is now under the control of the 6444 * driver. 6445 */ 6446 igc_get_hw_control(adapter); 6447 6448 strncpy(netdev->name, "eth%d", IFNAMSIZ); 6449 err = register_netdev(netdev); 6450 if (err) 6451 goto err_register; 6452 6453 /* carrier off reporting is important to ethtool even BEFORE open */ 6454 netif_carrier_off(netdev); 6455 6456 /* Check if Media Autosense is enabled */ 6457 adapter->ei = *ei; 6458 6459 /* print pcie link status and MAC address */ 6460 pcie_print_link_status(pdev); 6461 netdev_info(netdev, "MAC: %pM\n", netdev->dev_addr); 6462 6463 dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NO_DIRECT_COMPLETE); 6464 /* Disable EEE for internal PHY devices */ 6465 hw->dev_spec._base.eee_enable = false; 6466 adapter->flags &= ~IGC_FLAG_EEE; 6467 igc_set_eee_i225(hw, false, false, false); 6468 6469 pm_runtime_put_noidle(&pdev->dev); 6470 6471 return 0; 6472 6473 err_register: 6474 igc_release_hw_control(adapter); 6475 err_eeprom: 6476 if (!igc_check_reset_block(hw)) 6477 igc_reset_phy(hw); 6478 err_sw_init: 6479 igc_clear_interrupt_scheme(adapter); 6480 iounmap(adapter->io_addr); 6481 err_ioremap: 6482 free_netdev(netdev); 6483 err_alloc_etherdev: 6484 pci_disable_pcie_error_reporting(pdev); 6485 pci_release_mem_regions(pdev); 6486 err_pci_reg: 6487 err_dma: 6488 pci_disable_device(pdev); 6489 return err; 6490 } 6491 6492 /** 6493 * igc_remove - Device Removal Routine 6494 * @pdev: PCI device information struct 6495 * 6496 * igc_remove is called by the PCI subsystem to alert the driver 6497 * that it should release a PCI device. This could be caused by a 6498 * Hot-Plug event, or because the driver is going to be removed from 6499 * memory. 6500 */ 6501 static void igc_remove(struct pci_dev *pdev) 6502 { 6503 struct net_device *netdev = pci_get_drvdata(pdev); 6504 struct igc_adapter *adapter = netdev_priv(netdev); 6505 6506 pm_runtime_get_noresume(&pdev->dev); 6507 6508 igc_flush_nfc_rules(adapter); 6509 6510 igc_ptp_stop(adapter); 6511 6512 set_bit(__IGC_DOWN, &adapter->state); 6513 6514 del_timer_sync(&adapter->watchdog_timer); 6515 del_timer_sync(&adapter->phy_info_timer); 6516 6517 cancel_work_sync(&adapter->reset_task); 6518 cancel_work_sync(&adapter->watchdog_task); 6519 6520 /* Release control of h/w to f/w. If f/w is AMT enabled, this 6521 * would have already happened in close and is redundant. 6522 */ 6523 igc_release_hw_control(adapter); 6524 unregister_netdev(netdev); 6525 6526 igc_clear_interrupt_scheme(adapter); 6527 pci_iounmap(pdev, adapter->io_addr); 6528 pci_release_mem_regions(pdev); 6529 6530 free_netdev(netdev); 6531 6532 pci_disable_pcie_error_reporting(pdev); 6533 6534 pci_disable_device(pdev); 6535 } 6536 6537 static int __igc_shutdown(struct pci_dev *pdev, bool *enable_wake, 6538 bool runtime) 6539 { 6540 struct net_device *netdev = pci_get_drvdata(pdev); 6541 struct igc_adapter *adapter = netdev_priv(netdev); 6542 u32 wufc = runtime ? IGC_WUFC_LNKC : adapter->wol; 6543 struct igc_hw *hw = &adapter->hw; 6544 u32 ctrl, rctl, status; 6545 bool wake; 6546 6547 rtnl_lock(); 6548 netif_device_detach(netdev); 6549 6550 if (netif_running(netdev)) 6551 __igc_close(netdev, true); 6552 6553 igc_ptp_suspend(adapter); 6554 6555 igc_clear_interrupt_scheme(adapter); 6556 rtnl_unlock(); 6557 6558 status = rd32(IGC_STATUS); 6559 if (status & IGC_STATUS_LU) 6560 wufc &= ~IGC_WUFC_LNKC; 6561 6562 if (wufc) { 6563 igc_setup_rctl(adapter); 6564 igc_set_rx_mode(netdev); 6565 6566 /* turn on all-multi mode if wake on multicast is enabled */ 6567 if (wufc & IGC_WUFC_MC) { 6568 rctl = rd32(IGC_RCTL); 6569 rctl |= IGC_RCTL_MPE; 6570 wr32(IGC_RCTL, rctl); 6571 } 6572 6573 ctrl = rd32(IGC_CTRL); 6574 ctrl |= IGC_CTRL_ADVD3WUC; 6575 wr32(IGC_CTRL, ctrl); 6576 6577 /* Allow time for pending master requests to run */ 6578 igc_disable_pcie_master(hw); 6579 6580 wr32(IGC_WUC, IGC_WUC_PME_EN); 6581 wr32(IGC_WUFC, wufc); 6582 } else { 6583 wr32(IGC_WUC, 0); 6584 wr32(IGC_WUFC, 0); 6585 } 6586 6587 wake = wufc || adapter->en_mng_pt; 6588 if (!wake) 6589 igc_power_down_phy_copper_base(&adapter->hw); 6590 else 6591 igc_power_up_link(adapter); 6592 6593 if (enable_wake) 6594 *enable_wake = wake; 6595 6596 /* Release control of h/w to f/w. If f/w is AMT enabled, this 6597 * would have already happened in close and is redundant. 6598 */ 6599 igc_release_hw_control(adapter); 6600 6601 pci_disable_device(pdev); 6602 6603 return 0; 6604 } 6605 6606 #ifdef CONFIG_PM 6607 static int __maybe_unused igc_runtime_suspend(struct device *dev) 6608 { 6609 return __igc_shutdown(to_pci_dev(dev), NULL, 1); 6610 } 6611 6612 static void igc_deliver_wake_packet(struct net_device *netdev) 6613 { 6614 struct igc_adapter *adapter = netdev_priv(netdev); 6615 struct igc_hw *hw = &adapter->hw; 6616 struct sk_buff *skb; 6617 u32 wupl; 6618 6619 wupl = rd32(IGC_WUPL) & IGC_WUPL_MASK; 6620 6621 /* WUPM stores only the first 128 bytes of the wake packet. 6622 * Read the packet only if we have the whole thing. 6623 */ 6624 if (wupl == 0 || wupl > IGC_WUPM_BYTES) 6625 return; 6626 6627 skb = netdev_alloc_skb_ip_align(netdev, IGC_WUPM_BYTES); 6628 if (!skb) 6629 return; 6630 6631 skb_put(skb, wupl); 6632 6633 /* Ensure reads are 32-bit aligned */ 6634 wupl = roundup(wupl, 4); 6635 6636 memcpy_fromio(skb->data, hw->hw_addr + IGC_WUPM_REG(0), wupl); 6637 6638 skb->protocol = eth_type_trans(skb, netdev); 6639 netif_rx(skb); 6640 } 6641 6642 static int __maybe_unused igc_resume(struct device *dev) 6643 { 6644 struct pci_dev *pdev = to_pci_dev(dev); 6645 struct net_device *netdev = pci_get_drvdata(pdev); 6646 struct igc_adapter *adapter = netdev_priv(netdev); 6647 struct igc_hw *hw = &adapter->hw; 6648 u32 err, val; 6649 6650 pci_set_power_state(pdev, PCI_D0); 6651 pci_restore_state(pdev); 6652 pci_save_state(pdev); 6653 6654 if (!pci_device_is_present(pdev)) 6655 return -ENODEV; 6656 err = pci_enable_device_mem(pdev); 6657 if (err) { 6658 netdev_err(netdev, "Cannot enable PCI device from suspend\n"); 6659 return err; 6660 } 6661 pci_set_master(pdev); 6662 6663 pci_enable_wake(pdev, PCI_D3hot, 0); 6664 pci_enable_wake(pdev, PCI_D3cold, 0); 6665 6666 if (igc_init_interrupt_scheme(adapter, true)) { 6667 netdev_err(netdev, "Unable to allocate memory for queues\n"); 6668 return -ENOMEM; 6669 } 6670 6671 igc_reset(adapter); 6672 6673 /* let the f/w know that the h/w is now under the control of the 6674 * driver. 6675 */ 6676 igc_get_hw_control(adapter); 6677 6678 val = rd32(IGC_WUS); 6679 if (val & WAKE_PKT_WUS) 6680 igc_deliver_wake_packet(netdev); 6681 6682 wr32(IGC_WUS, ~0); 6683 6684 rtnl_lock(); 6685 if (!err && netif_running(netdev)) 6686 err = __igc_open(netdev, true); 6687 6688 if (!err) 6689 netif_device_attach(netdev); 6690 rtnl_unlock(); 6691 6692 return err; 6693 } 6694 6695 static int __maybe_unused igc_runtime_resume(struct device *dev) 6696 { 6697 return igc_resume(dev); 6698 } 6699 6700 static int __maybe_unused igc_suspend(struct device *dev) 6701 { 6702 return __igc_shutdown(to_pci_dev(dev), NULL, 0); 6703 } 6704 6705 static int __maybe_unused igc_runtime_idle(struct device *dev) 6706 { 6707 struct net_device *netdev = dev_get_drvdata(dev); 6708 struct igc_adapter *adapter = netdev_priv(netdev); 6709 6710 if (!igc_has_link(adapter)) 6711 pm_schedule_suspend(dev, MSEC_PER_SEC * 5); 6712 6713 return -EBUSY; 6714 } 6715 #endif /* CONFIG_PM */ 6716 6717 static void igc_shutdown(struct pci_dev *pdev) 6718 { 6719 bool wake; 6720 6721 __igc_shutdown(pdev, &wake, 0); 6722 6723 if (system_state == SYSTEM_POWER_OFF) { 6724 pci_wake_from_d3(pdev, wake); 6725 pci_set_power_state(pdev, PCI_D3hot); 6726 } 6727 } 6728 6729 /** 6730 * igc_io_error_detected - called when PCI error is detected 6731 * @pdev: Pointer to PCI device 6732 * @state: The current PCI connection state 6733 * 6734 * This function is called after a PCI bus error affecting 6735 * this device has been detected. 6736 **/ 6737 static pci_ers_result_t igc_io_error_detected(struct pci_dev *pdev, 6738 pci_channel_state_t state) 6739 { 6740 struct net_device *netdev = pci_get_drvdata(pdev); 6741 struct igc_adapter *adapter = netdev_priv(netdev); 6742 6743 netif_device_detach(netdev); 6744 6745 if (state == pci_channel_io_perm_failure) 6746 return PCI_ERS_RESULT_DISCONNECT; 6747 6748 if (netif_running(netdev)) 6749 igc_down(adapter); 6750 pci_disable_device(pdev); 6751 6752 /* Request a slot reset. */ 6753 return PCI_ERS_RESULT_NEED_RESET; 6754 } 6755 6756 /** 6757 * igc_io_slot_reset - called after the PCI bus has been reset. 6758 * @pdev: Pointer to PCI device 6759 * 6760 * Restart the card from scratch, as if from a cold-boot. Implementation 6761 * resembles the first-half of the igc_resume routine. 6762 **/ 6763 static pci_ers_result_t igc_io_slot_reset(struct pci_dev *pdev) 6764 { 6765 struct net_device *netdev = pci_get_drvdata(pdev); 6766 struct igc_adapter *adapter = netdev_priv(netdev); 6767 struct igc_hw *hw = &adapter->hw; 6768 pci_ers_result_t result; 6769 6770 if (pci_enable_device_mem(pdev)) { 6771 netdev_err(netdev, "Could not re-enable PCI device after reset\n"); 6772 result = PCI_ERS_RESULT_DISCONNECT; 6773 } else { 6774 pci_set_master(pdev); 6775 pci_restore_state(pdev); 6776 pci_save_state(pdev); 6777 6778 pci_enable_wake(pdev, PCI_D3hot, 0); 6779 pci_enable_wake(pdev, PCI_D3cold, 0); 6780 6781 /* In case of PCI error, adapter loses its HW address 6782 * so we should re-assign it here. 6783 */ 6784 hw->hw_addr = adapter->io_addr; 6785 6786 igc_reset(adapter); 6787 wr32(IGC_WUS, ~0); 6788 result = PCI_ERS_RESULT_RECOVERED; 6789 } 6790 6791 return result; 6792 } 6793 6794 /** 6795 * igc_io_resume - called when traffic can start to flow again. 6796 * @pdev: Pointer to PCI device 6797 * 6798 * This callback is called when the error recovery driver tells us that 6799 * its OK to resume normal operation. Implementation resembles the 6800 * second-half of the igc_resume routine. 6801 */ 6802 static void igc_io_resume(struct pci_dev *pdev) 6803 { 6804 struct net_device *netdev = pci_get_drvdata(pdev); 6805 struct igc_adapter *adapter = netdev_priv(netdev); 6806 6807 rtnl_lock(); 6808 if (netif_running(netdev)) { 6809 if (igc_open(netdev)) { 6810 netdev_err(netdev, "igc_open failed after reset\n"); 6811 return; 6812 } 6813 } 6814 6815 netif_device_attach(netdev); 6816 6817 /* let the f/w know that the h/w is now under the control of the 6818 * driver. 6819 */ 6820 igc_get_hw_control(adapter); 6821 rtnl_unlock(); 6822 } 6823 6824 static const struct pci_error_handlers igc_err_handler = { 6825 .error_detected = igc_io_error_detected, 6826 .slot_reset = igc_io_slot_reset, 6827 .resume = igc_io_resume, 6828 }; 6829 6830 #ifdef CONFIG_PM 6831 static const struct dev_pm_ops igc_pm_ops = { 6832 SET_SYSTEM_SLEEP_PM_OPS(igc_suspend, igc_resume) 6833 SET_RUNTIME_PM_OPS(igc_runtime_suspend, igc_runtime_resume, 6834 igc_runtime_idle) 6835 }; 6836 #endif 6837 6838 static struct pci_driver igc_driver = { 6839 .name = igc_driver_name, 6840 .id_table = igc_pci_tbl, 6841 .probe = igc_probe, 6842 .remove = igc_remove, 6843 #ifdef CONFIG_PM 6844 .driver.pm = &igc_pm_ops, 6845 #endif 6846 .shutdown = igc_shutdown, 6847 .err_handler = &igc_err_handler, 6848 }; 6849 6850 /** 6851 * igc_reinit_queues - return error 6852 * @adapter: pointer to adapter structure 6853 */ 6854 int igc_reinit_queues(struct igc_adapter *adapter) 6855 { 6856 struct net_device *netdev = adapter->netdev; 6857 int err = 0; 6858 6859 if (netif_running(netdev)) 6860 igc_close(netdev); 6861 6862 igc_reset_interrupt_capability(adapter); 6863 6864 if (igc_init_interrupt_scheme(adapter, true)) { 6865 netdev_err(netdev, "Unable to allocate memory for queues\n"); 6866 return -ENOMEM; 6867 } 6868 6869 if (netif_running(netdev)) 6870 err = igc_open(netdev); 6871 6872 return err; 6873 } 6874 6875 /** 6876 * igc_get_hw_dev - return device 6877 * @hw: pointer to hardware structure 6878 * 6879 * used by hardware layer to print debugging information 6880 */ 6881 struct net_device *igc_get_hw_dev(struct igc_hw *hw) 6882 { 6883 struct igc_adapter *adapter = hw->back; 6884 6885 return adapter->netdev; 6886 } 6887 6888 static void igc_disable_rx_ring_hw(struct igc_ring *ring) 6889 { 6890 struct igc_hw *hw = &ring->q_vector->adapter->hw; 6891 u8 idx = ring->reg_idx; 6892 u32 rxdctl; 6893 6894 rxdctl = rd32(IGC_RXDCTL(idx)); 6895 rxdctl &= ~IGC_RXDCTL_QUEUE_ENABLE; 6896 rxdctl |= IGC_RXDCTL_SWFLUSH; 6897 wr32(IGC_RXDCTL(idx), rxdctl); 6898 } 6899 6900 void igc_disable_rx_ring(struct igc_ring *ring) 6901 { 6902 igc_disable_rx_ring_hw(ring); 6903 igc_clean_rx_ring(ring); 6904 } 6905 6906 void igc_enable_rx_ring(struct igc_ring *ring) 6907 { 6908 struct igc_adapter *adapter = ring->q_vector->adapter; 6909 6910 igc_configure_rx_ring(adapter, ring); 6911 6912 if (ring->xsk_pool) 6913 igc_alloc_rx_buffers_zc(ring, igc_desc_unused(ring)); 6914 else 6915 igc_alloc_rx_buffers(ring, igc_desc_unused(ring)); 6916 } 6917 6918 static void igc_disable_tx_ring_hw(struct igc_ring *ring) 6919 { 6920 struct igc_hw *hw = &ring->q_vector->adapter->hw; 6921 u8 idx = ring->reg_idx; 6922 u32 txdctl; 6923 6924 txdctl = rd32(IGC_TXDCTL(idx)); 6925 txdctl &= ~IGC_TXDCTL_QUEUE_ENABLE; 6926 txdctl |= IGC_TXDCTL_SWFLUSH; 6927 wr32(IGC_TXDCTL(idx), txdctl); 6928 } 6929 6930 void igc_disable_tx_ring(struct igc_ring *ring) 6931 { 6932 igc_disable_tx_ring_hw(ring); 6933 igc_clean_tx_ring(ring); 6934 } 6935 6936 void igc_enable_tx_ring(struct igc_ring *ring) 6937 { 6938 struct igc_adapter *adapter = ring->q_vector->adapter; 6939 6940 igc_configure_tx_ring(adapter, ring); 6941 } 6942 6943 /** 6944 * igc_init_module - Driver Registration Routine 6945 * 6946 * igc_init_module is the first routine called when the driver is 6947 * loaded. All it does is register with the PCI subsystem. 6948 */ 6949 static int __init igc_init_module(void) 6950 { 6951 int ret; 6952 6953 pr_info("%s\n", igc_driver_string); 6954 pr_info("%s\n", igc_copyright); 6955 6956 ret = pci_register_driver(&igc_driver); 6957 return ret; 6958 } 6959 6960 module_init(igc_init_module); 6961 6962 /** 6963 * igc_exit_module - Driver Exit Cleanup Routine 6964 * 6965 * igc_exit_module is called just before the driver is removed 6966 * from memory. 6967 */ 6968 static void __exit igc_exit_module(void) 6969 { 6970 pci_unregister_driver(&igc_driver); 6971 } 6972 6973 module_exit(igc_exit_module); 6974 /* igc_main.c */ 6975