1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2018 Intel Corporation */ 3 4 #include <linux/module.h> 5 #include <linux/types.h> 6 #include <linux/if_vlan.h> 7 #include <linux/aer.h> 8 #include <linux/tcp.h> 9 #include <linux/udp.h> 10 #include <linux/ip.h> 11 #include <linux/pm_runtime.h> 12 13 #include <net/ipv6.h> 14 15 #include "igc.h" 16 #include "igc_hw.h" 17 18 #define DRV_VERSION "0.0.1-k" 19 #define DRV_SUMMARY "Intel(R) 2.5G Ethernet Linux Driver" 20 21 #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK) 22 23 static int debug = -1; 24 25 MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>"); 26 MODULE_DESCRIPTION(DRV_SUMMARY); 27 MODULE_LICENSE("GPL v2"); 28 MODULE_VERSION(DRV_VERSION); 29 module_param(debug, int, 0); 30 MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)"); 31 32 char igc_driver_name[] = "igc"; 33 char igc_driver_version[] = DRV_VERSION; 34 static const char igc_driver_string[] = DRV_SUMMARY; 35 static const char igc_copyright[] = 36 "Copyright(c) 2018 Intel Corporation."; 37 38 static const struct igc_info *igc_info_tbl[] = { 39 [board_base] = &igc_base_info, 40 }; 41 42 static const struct pci_device_id igc_pci_tbl[] = { 43 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_LM), board_base }, 44 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_V), board_base }, 45 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_I), board_base }, 46 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I220_V), board_base }, 47 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_K), board_base }, 48 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_BLANK_NVM), board_base }, 49 /* required last entry */ 50 {0, } 51 }; 52 53 MODULE_DEVICE_TABLE(pci, igc_pci_tbl); 54 55 enum latency_range { 56 lowest_latency = 0, 57 low_latency = 1, 58 bulk_latency = 2, 59 latency_invalid = 255 60 }; 61 62 /** 63 * igc_power_down_link - Power down the phy/serdes link 64 * @adapter: address of board private structure 65 */ 66 static void igc_power_down_link(struct igc_adapter *adapter) 67 { 68 if (adapter->hw.phy.media_type == igc_media_type_copper) 69 igc_power_down_phy_copper_base(&adapter->hw); 70 } 71 72 void igc_reset(struct igc_adapter *adapter) 73 { 74 struct pci_dev *pdev = adapter->pdev; 75 struct igc_hw *hw = &adapter->hw; 76 struct igc_fc_info *fc = &hw->fc; 77 u32 pba, hwm; 78 79 /* Repartition PBA for greater than 9k MTU if required */ 80 pba = IGC_PBA_34K; 81 82 /* flow control settings 83 * The high water mark must be low enough to fit one full frame 84 * after transmitting the pause frame. As such we must have enough 85 * space to allow for us to complete our current transmit and then 86 * receive the frame that is in progress from the link partner. 87 * Set it to: 88 * - the full Rx FIFO size minus one full Tx plus one full Rx frame 89 */ 90 hwm = (pba << 10) - (adapter->max_frame_size + MAX_JUMBO_FRAME_SIZE); 91 92 fc->high_water = hwm & 0xFFFFFFF0; /* 16-byte granularity */ 93 fc->low_water = fc->high_water - 16; 94 fc->pause_time = 0xFFFF; 95 fc->send_xon = 1; 96 fc->current_mode = fc->requested_mode; 97 98 hw->mac.ops.reset_hw(hw); 99 100 if (hw->mac.ops.init_hw(hw)) 101 dev_err(&pdev->dev, "Hardware Error\n"); 102 103 if (!netif_running(adapter->netdev)) 104 igc_power_down_link(adapter); 105 106 /* Re-enable PTP, where applicable. */ 107 igc_ptp_reset(adapter); 108 109 igc_get_phy_info(hw); 110 } 111 112 /** 113 * igc_power_up_link - Power up the phy link 114 * @adapter: address of board private structure 115 */ 116 static void igc_power_up_link(struct igc_adapter *adapter) 117 { 118 igc_reset_phy(&adapter->hw); 119 120 if (adapter->hw.phy.media_type == igc_media_type_copper) 121 igc_power_up_phy_copper(&adapter->hw); 122 123 igc_setup_link(&adapter->hw); 124 } 125 126 /** 127 * igc_release_hw_control - release control of the h/w to f/w 128 * @adapter: address of board private structure 129 * 130 * igc_release_hw_control resets CTRL_EXT:DRV_LOAD bit. 131 * For ASF and Pass Through versions of f/w this means that the 132 * driver is no longer loaded. 133 */ 134 static void igc_release_hw_control(struct igc_adapter *adapter) 135 { 136 struct igc_hw *hw = &adapter->hw; 137 u32 ctrl_ext; 138 139 /* Let firmware take over control of h/w */ 140 ctrl_ext = rd32(IGC_CTRL_EXT); 141 wr32(IGC_CTRL_EXT, 142 ctrl_ext & ~IGC_CTRL_EXT_DRV_LOAD); 143 } 144 145 /** 146 * igc_get_hw_control - get control of the h/w from f/w 147 * @adapter: address of board private structure 148 * 149 * igc_get_hw_control sets CTRL_EXT:DRV_LOAD bit. 150 * For ASF and Pass Through versions of f/w this means that 151 * the driver is loaded. 152 */ 153 static void igc_get_hw_control(struct igc_adapter *adapter) 154 { 155 struct igc_hw *hw = &adapter->hw; 156 u32 ctrl_ext; 157 158 /* Let firmware know the driver has taken over */ 159 ctrl_ext = rd32(IGC_CTRL_EXT); 160 wr32(IGC_CTRL_EXT, 161 ctrl_ext | IGC_CTRL_EXT_DRV_LOAD); 162 } 163 164 /** 165 * igc_clean_tx_ring - Free Tx Buffers 166 * @tx_ring: ring to be cleaned 167 */ 168 static void igc_clean_tx_ring(struct igc_ring *tx_ring) 169 { 170 u16 i = tx_ring->next_to_clean; 171 struct igc_tx_buffer *tx_buffer = &tx_ring->tx_buffer_info[i]; 172 173 while (i != tx_ring->next_to_use) { 174 union igc_adv_tx_desc *eop_desc, *tx_desc; 175 176 /* Free all the Tx ring sk_buffs */ 177 dev_kfree_skb_any(tx_buffer->skb); 178 179 /* unmap skb header data */ 180 dma_unmap_single(tx_ring->dev, 181 dma_unmap_addr(tx_buffer, dma), 182 dma_unmap_len(tx_buffer, len), 183 DMA_TO_DEVICE); 184 185 /* check for eop_desc to determine the end of the packet */ 186 eop_desc = tx_buffer->next_to_watch; 187 tx_desc = IGC_TX_DESC(tx_ring, i); 188 189 /* unmap remaining buffers */ 190 while (tx_desc != eop_desc) { 191 tx_buffer++; 192 tx_desc++; 193 i++; 194 if (unlikely(i == tx_ring->count)) { 195 i = 0; 196 tx_buffer = tx_ring->tx_buffer_info; 197 tx_desc = IGC_TX_DESC(tx_ring, 0); 198 } 199 200 /* unmap any remaining paged data */ 201 if (dma_unmap_len(tx_buffer, len)) 202 dma_unmap_page(tx_ring->dev, 203 dma_unmap_addr(tx_buffer, dma), 204 dma_unmap_len(tx_buffer, len), 205 DMA_TO_DEVICE); 206 } 207 208 /* move us one more past the eop_desc for start of next pkt */ 209 tx_buffer++; 210 i++; 211 if (unlikely(i == tx_ring->count)) { 212 i = 0; 213 tx_buffer = tx_ring->tx_buffer_info; 214 } 215 } 216 217 /* reset BQL for queue */ 218 netdev_tx_reset_queue(txring_txq(tx_ring)); 219 220 /* reset next_to_use and next_to_clean */ 221 tx_ring->next_to_use = 0; 222 tx_ring->next_to_clean = 0; 223 } 224 225 /** 226 * igc_free_tx_resources - Free Tx Resources per Queue 227 * @tx_ring: Tx descriptor ring for a specific queue 228 * 229 * Free all transmit software resources 230 */ 231 void igc_free_tx_resources(struct igc_ring *tx_ring) 232 { 233 igc_clean_tx_ring(tx_ring); 234 235 vfree(tx_ring->tx_buffer_info); 236 tx_ring->tx_buffer_info = NULL; 237 238 /* if not set, then don't free */ 239 if (!tx_ring->desc) 240 return; 241 242 dma_free_coherent(tx_ring->dev, tx_ring->size, 243 tx_ring->desc, tx_ring->dma); 244 245 tx_ring->desc = NULL; 246 } 247 248 /** 249 * igc_free_all_tx_resources - Free Tx Resources for All Queues 250 * @adapter: board private structure 251 * 252 * Free all transmit software resources 253 */ 254 static void igc_free_all_tx_resources(struct igc_adapter *adapter) 255 { 256 int i; 257 258 for (i = 0; i < adapter->num_tx_queues; i++) 259 igc_free_tx_resources(adapter->tx_ring[i]); 260 } 261 262 /** 263 * igc_clean_all_tx_rings - Free Tx Buffers for all queues 264 * @adapter: board private structure 265 */ 266 static void igc_clean_all_tx_rings(struct igc_adapter *adapter) 267 { 268 int i; 269 270 for (i = 0; i < adapter->num_tx_queues; i++) 271 if (adapter->tx_ring[i]) 272 igc_clean_tx_ring(adapter->tx_ring[i]); 273 } 274 275 /** 276 * igc_setup_tx_resources - allocate Tx resources (Descriptors) 277 * @tx_ring: tx descriptor ring (for a specific queue) to setup 278 * 279 * Return 0 on success, negative on failure 280 */ 281 int igc_setup_tx_resources(struct igc_ring *tx_ring) 282 { 283 struct device *dev = tx_ring->dev; 284 int size = 0; 285 286 size = sizeof(struct igc_tx_buffer) * tx_ring->count; 287 tx_ring->tx_buffer_info = vzalloc(size); 288 if (!tx_ring->tx_buffer_info) 289 goto err; 290 291 /* round up to nearest 4K */ 292 tx_ring->size = tx_ring->count * sizeof(union igc_adv_tx_desc); 293 tx_ring->size = ALIGN(tx_ring->size, 4096); 294 295 tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size, 296 &tx_ring->dma, GFP_KERNEL); 297 298 if (!tx_ring->desc) 299 goto err; 300 301 tx_ring->next_to_use = 0; 302 tx_ring->next_to_clean = 0; 303 304 return 0; 305 306 err: 307 vfree(tx_ring->tx_buffer_info); 308 dev_err(dev, 309 "Unable to allocate memory for the transmit descriptor ring\n"); 310 return -ENOMEM; 311 } 312 313 /** 314 * igc_setup_all_tx_resources - wrapper to allocate Tx resources for all queues 315 * @adapter: board private structure 316 * 317 * Return 0 on success, negative on failure 318 */ 319 static int igc_setup_all_tx_resources(struct igc_adapter *adapter) 320 { 321 struct pci_dev *pdev = adapter->pdev; 322 int i, err = 0; 323 324 for (i = 0; i < adapter->num_tx_queues; i++) { 325 err = igc_setup_tx_resources(adapter->tx_ring[i]); 326 if (err) { 327 dev_err(&pdev->dev, 328 "Allocation for Tx Queue %u failed\n", i); 329 for (i--; i >= 0; i--) 330 igc_free_tx_resources(adapter->tx_ring[i]); 331 break; 332 } 333 } 334 335 return err; 336 } 337 338 /** 339 * igc_clean_rx_ring - Free Rx Buffers per Queue 340 * @rx_ring: ring to free buffers from 341 */ 342 static void igc_clean_rx_ring(struct igc_ring *rx_ring) 343 { 344 u16 i = rx_ring->next_to_clean; 345 346 dev_kfree_skb(rx_ring->skb); 347 rx_ring->skb = NULL; 348 349 /* Free all the Rx ring sk_buffs */ 350 while (i != rx_ring->next_to_alloc) { 351 struct igc_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i]; 352 353 /* Invalidate cache lines that may have been written to by 354 * device so that we avoid corrupting memory. 355 */ 356 dma_sync_single_range_for_cpu(rx_ring->dev, 357 buffer_info->dma, 358 buffer_info->page_offset, 359 igc_rx_bufsz(rx_ring), 360 DMA_FROM_DEVICE); 361 362 /* free resources associated with mapping */ 363 dma_unmap_page_attrs(rx_ring->dev, 364 buffer_info->dma, 365 igc_rx_pg_size(rx_ring), 366 DMA_FROM_DEVICE, 367 IGC_RX_DMA_ATTR); 368 __page_frag_cache_drain(buffer_info->page, 369 buffer_info->pagecnt_bias); 370 371 i++; 372 if (i == rx_ring->count) 373 i = 0; 374 } 375 376 rx_ring->next_to_alloc = 0; 377 rx_ring->next_to_clean = 0; 378 rx_ring->next_to_use = 0; 379 } 380 381 /** 382 * igc_clean_all_rx_rings - Free Rx Buffers for all queues 383 * @adapter: board private structure 384 */ 385 static void igc_clean_all_rx_rings(struct igc_adapter *adapter) 386 { 387 int i; 388 389 for (i = 0; i < adapter->num_rx_queues; i++) 390 if (adapter->rx_ring[i]) 391 igc_clean_rx_ring(adapter->rx_ring[i]); 392 } 393 394 /** 395 * igc_free_rx_resources - Free Rx Resources 396 * @rx_ring: ring to clean the resources from 397 * 398 * Free all receive software resources 399 */ 400 void igc_free_rx_resources(struct igc_ring *rx_ring) 401 { 402 igc_clean_rx_ring(rx_ring); 403 404 vfree(rx_ring->rx_buffer_info); 405 rx_ring->rx_buffer_info = NULL; 406 407 /* if not set, then don't free */ 408 if (!rx_ring->desc) 409 return; 410 411 dma_free_coherent(rx_ring->dev, rx_ring->size, 412 rx_ring->desc, rx_ring->dma); 413 414 rx_ring->desc = NULL; 415 } 416 417 /** 418 * igc_free_all_rx_resources - Free Rx Resources for All Queues 419 * @adapter: board private structure 420 * 421 * Free all receive software resources 422 */ 423 static void igc_free_all_rx_resources(struct igc_adapter *adapter) 424 { 425 int i; 426 427 for (i = 0; i < adapter->num_rx_queues; i++) 428 igc_free_rx_resources(adapter->rx_ring[i]); 429 } 430 431 /** 432 * igc_setup_rx_resources - allocate Rx resources (Descriptors) 433 * @rx_ring: rx descriptor ring (for a specific queue) to setup 434 * 435 * Returns 0 on success, negative on failure 436 */ 437 int igc_setup_rx_resources(struct igc_ring *rx_ring) 438 { 439 struct device *dev = rx_ring->dev; 440 int size, desc_len; 441 442 size = sizeof(struct igc_rx_buffer) * rx_ring->count; 443 rx_ring->rx_buffer_info = vzalloc(size); 444 if (!rx_ring->rx_buffer_info) 445 goto err; 446 447 desc_len = sizeof(union igc_adv_rx_desc); 448 449 /* Round up to nearest 4K */ 450 rx_ring->size = rx_ring->count * desc_len; 451 rx_ring->size = ALIGN(rx_ring->size, 4096); 452 453 rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size, 454 &rx_ring->dma, GFP_KERNEL); 455 456 if (!rx_ring->desc) 457 goto err; 458 459 rx_ring->next_to_alloc = 0; 460 rx_ring->next_to_clean = 0; 461 rx_ring->next_to_use = 0; 462 463 return 0; 464 465 err: 466 vfree(rx_ring->rx_buffer_info); 467 rx_ring->rx_buffer_info = NULL; 468 dev_err(dev, 469 "Unable to allocate memory for the receive descriptor ring\n"); 470 return -ENOMEM; 471 } 472 473 /** 474 * igc_setup_all_rx_resources - wrapper to allocate Rx resources 475 * (Descriptors) for all queues 476 * @adapter: board private structure 477 * 478 * Return 0 on success, negative on failure 479 */ 480 static int igc_setup_all_rx_resources(struct igc_adapter *adapter) 481 { 482 struct pci_dev *pdev = adapter->pdev; 483 int i, err = 0; 484 485 for (i = 0; i < adapter->num_rx_queues; i++) { 486 err = igc_setup_rx_resources(adapter->rx_ring[i]); 487 if (err) { 488 dev_err(&pdev->dev, 489 "Allocation for Rx Queue %u failed\n", i); 490 for (i--; i >= 0; i--) 491 igc_free_rx_resources(adapter->rx_ring[i]); 492 break; 493 } 494 } 495 496 return err; 497 } 498 499 /** 500 * igc_configure_rx_ring - Configure a receive ring after Reset 501 * @adapter: board private structure 502 * @ring: receive ring to be configured 503 * 504 * Configure the Rx unit of the MAC after a reset. 505 */ 506 static void igc_configure_rx_ring(struct igc_adapter *adapter, 507 struct igc_ring *ring) 508 { 509 struct igc_hw *hw = &adapter->hw; 510 union igc_adv_rx_desc *rx_desc; 511 int reg_idx = ring->reg_idx; 512 u32 srrctl = 0, rxdctl = 0; 513 u64 rdba = ring->dma; 514 515 /* disable the queue */ 516 wr32(IGC_RXDCTL(reg_idx), 0); 517 518 /* Set DMA base address registers */ 519 wr32(IGC_RDBAL(reg_idx), 520 rdba & 0x00000000ffffffffULL); 521 wr32(IGC_RDBAH(reg_idx), rdba >> 32); 522 wr32(IGC_RDLEN(reg_idx), 523 ring->count * sizeof(union igc_adv_rx_desc)); 524 525 /* initialize head and tail */ 526 ring->tail = adapter->io_addr + IGC_RDT(reg_idx); 527 wr32(IGC_RDH(reg_idx), 0); 528 writel(0, ring->tail); 529 530 /* reset next-to- use/clean to place SW in sync with hardware */ 531 ring->next_to_clean = 0; 532 ring->next_to_use = 0; 533 534 /* set descriptor configuration */ 535 srrctl = IGC_RX_HDR_LEN << IGC_SRRCTL_BSIZEHDRSIZE_SHIFT; 536 if (ring_uses_large_buffer(ring)) 537 srrctl |= IGC_RXBUFFER_3072 >> IGC_SRRCTL_BSIZEPKT_SHIFT; 538 else 539 srrctl |= IGC_RXBUFFER_2048 >> IGC_SRRCTL_BSIZEPKT_SHIFT; 540 srrctl |= IGC_SRRCTL_DESCTYPE_ADV_ONEBUF; 541 542 wr32(IGC_SRRCTL(reg_idx), srrctl); 543 544 rxdctl |= IGC_RX_PTHRESH; 545 rxdctl |= IGC_RX_HTHRESH << 8; 546 rxdctl |= IGC_RX_WTHRESH << 16; 547 548 /* initialize rx_buffer_info */ 549 memset(ring->rx_buffer_info, 0, 550 sizeof(struct igc_rx_buffer) * ring->count); 551 552 /* initialize Rx descriptor 0 */ 553 rx_desc = IGC_RX_DESC(ring, 0); 554 rx_desc->wb.upper.length = 0; 555 556 /* enable receive descriptor fetching */ 557 rxdctl |= IGC_RXDCTL_QUEUE_ENABLE; 558 559 wr32(IGC_RXDCTL(reg_idx), rxdctl); 560 } 561 562 /** 563 * igc_configure_rx - Configure receive Unit after Reset 564 * @adapter: board private structure 565 * 566 * Configure the Rx unit of the MAC after a reset. 567 */ 568 static void igc_configure_rx(struct igc_adapter *adapter) 569 { 570 int i; 571 572 /* Setup the HW Rx Head and Tail Descriptor Pointers and 573 * the Base and Length of the Rx Descriptor Ring 574 */ 575 for (i = 0; i < adapter->num_rx_queues; i++) 576 igc_configure_rx_ring(adapter, adapter->rx_ring[i]); 577 } 578 579 /** 580 * igc_configure_tx_ring - Configure transmit ring after Reset 581 * @adapter: board private structure 582 * @ring: tx ring to configure 583 * 584 * Configure a transmit ring after a reset. 585 */ 586 static void igc_configure_tx_ring(struct igc_adapter *adapter, 587 struct igc_ring *ring) 588 { 589 struct igc_hw *hw = &adapter->hw; 590 int reg_idx = ring->reg_idx; 591 u64 tdba = ring->dma; 592 u32 txdctl = 0; 593 594 /* disable the queue */ 595 wr32(IGC_TXDCTL(reg_idx), 0); 596 wrfl(); 597 mdelay(10); 598 599 wr32(IGC_TDLEN(reg_idx), 600 ring->count * sizeof(union igc_adv_tx_desc)); 601 wr32(IGC_TDBAL(reg_idx), 602 tdba & 0x00000000ffffffffULL); 603 wr32(IGC_TDBAH(reg_idx), tdba >> 32); 604 605 ring->tail = adapter->io_addr + IGC_TDT(reg_idx); 606 wr32(IGC_TDH(reg_idx), 0); 607 writel(0, ring->tail); 608 609 txdctl |= IGC_TX_PTHRESH; 610 txdctl |= IGC_TX_HTHRESH << 8; 611 txdctl |= IGC_TX_WTHRESH << 16; 612 613 txdctl |= IGC_TXDCTL_QUEUE_ENABLE; 614 wr32(IGC_TXDCTL(reg_idx), txdctl); 615 } 616 617 /** 618 * igc_configure_tx - Configure transmit Unit after Reset 619 * @adapter: board private structure 620 * 621 * Configure the Tx unit of the MAC after a reset. 622 */ 623 static void igc_configure_tx(struct igc_adapter *adapter) 624 { 625 int i; 626 627 for (i = 0; i < adapter->num_tx_queues; i++) 628 igc_configure_tx_ring(adapter, adapter->tx_ring[i]); 629 } 630 631 /** 632 * igc_setup_mrqc - configure the multiple receive queue control registers 633 * @adapter: Board private structure 634 */ 635 static void igc_setup_mrqc(struct igc_adapter *adapter) 636 { 637 struct igc_hw *hw = &adapter->hw; 638 u32 j, num_rx_queues; 639 u32 mrqc, rxcsum; 640 u32 rss_key[10]; 641 642 netdev_rss_key_fill(rss_key, sizeof(rss_key)); 643 for (j = 0; j < 10; j++) 644 wr32(IGC_RSSRK(j), rss_key[j]); 645 646 num_rx_queues = adapter->rss_queues; 647 648 if (adapter->rss_indir_tbl_init != num_rx_queues) { 649 for (j = 0; j < IGC_RETA_SIZE; j++) 650 adapter->rss_indir_tbl[j] = 651 (j * num_rx_queues) / IGC_RETA_SIZE; 652 adapter->rss_indir_tbl_init = num_rx_queues; 653 } 654 igc_write_rss_indir_tbl(adapter); 655 656 /* Disable raw packet checksumming so that RSS hash is placed in 657 * descriptor on writeback. No need to enable TCP/UDP/IP checksum 658 * offloads as they are enabled by default 659 */ 660 rxcsum = rd32(IGC_RXCSUM); 661 rxcsum |= IGC_RXCSUM_PCSD; 662 663 /* Enable Receive Checksum Offload for SCTP */ 664 rxcsum |= IGC_RXCSUM_CRCOFL; 665 666 /* Don't need to set TUOFL or IPOFL, they default to 1 */ 667 wr32(IGC_RXCSUM, rxcsum); 668 669 /* Generate RSS hash based on packet types, TCP/UDP 670 * port numbers and/or IPv4/v6 src and dst addresses 671 */ 672 mrqc = IGC_MRQC_RSS_FIELD_IPV4 | 673 IGC_MRQC_RSS_FIELD_IPV4_TCP | 674 IGC_MRQC_RSS_FIELD_IPV6 | 675 IGC_MRQC_RSS_FIELD_IPV6_TCP | 676 IGC_MRQC_RSS_FIELD_IPV6_TCP_EX; 677 678 if (adapter->flags & IGC_FLAG_RSS_FIELD_IPV4_UDP) 679 mrqc |= IGC_MRQC_RSS_FIELD_IPV4_UDP; 680 if (adapter->flags & IGC_FLAG_RSS_FIELD_IPV6_UDP) 681 mrqc |= IGC_MRQC_RSS_FIELD_IPV6_UDP; 682 683 mrqc |= IGC_MRQC_ENABLE_RSS_MQ; 684 685 wr32(IGC_MRQC, mrqc); 686 } 687 688 /** 689 * igc_setup_rctl - configure the receive control registers 690 * @adapter: Board private structure 691 */ 692 static void igc_setup_rctl(struct igc_adapter *adapter) 693 { 694 struct igc_hw *hw = &adapter->hw; 695 u32 rctl; 696 697 rctl = rd32(IGC_RCTL); 698 699 rctl &= ~(3 << IGC_RCTL_MO_SHIFT); 700 rctl &= ~(IGC_RCTL_LBM_TCVR | IGC_RCTL_LBM_MAC); 701 702 rctl |= IGC_RCTL_EN | IGC_RCTL_BAM | IGC_RCTL_RDMTS_HALF | 703 (hw->mac.mc_filter_type << IGC_RCTL_MO_SHIFT); 704 705 /* enable stripping of CRC. Newer features require 706 * that the HW strips the CRC. 707 */ 708 rctl |= IGC_RCTL_SECRC; 709 710 /* disable store bad packets and clear size bits. */ 711 rctl &= ~(IGC_RCTL_SBP | IGC_RCTL_SZ_256); 712 713 /* enable LPE to allow for reception of jumbo frames */ 714 rctl |= IGC_RCTL_LPE; 715 716 /* disable queue 0 to prevent tail write w/o re-config */ 717 wr32(IGC_RXDCTL(0), 0); 718 719 /* This is useful for sniffing bad packets. */ 720 if (adapter->netdev->features & NETIF_F_RXALL) { 721 /* UPE and MPE will be handled by normal PROMISC logic 722 * in set_rx_mode 723 */ 724 rctl |= (IGC_RCTL_SBP | /* Receive bad packets */ 725 IGC_RCTL_BAM | /* RX All Bcast Pkts */ 726 IGC_RCTL_PMCF); /* RX All MAC Ctrl Pkts */ 727 728 rctl &= ~(IGC_RCTL_DPF | /* Allow filtered pause */ 729 IGC_RCTL_CFIEN); /* Disable VLAN CFIEN Filter */ 730 } 731 732 wr32(IGC_RCTL, rctl); 733 } 734 735 /** 736 * igc_setup_tctl - configure the transmit control registers 737 * @adapter: Board private structure 738 */ 739 static void igc_setup_tctl(struct igc_adapter *adapter) 740 { 741 struct igc_hw *hw = &adapter->hw; 742 u32 tctl; 743 744 /* disable queue 0 which icould be enabled by default */ 745 wr32(IGC_TXDCTL(0), 0); 746 747 /* Program the Transmit Control Register */ 748 tctl = rd32(IGC_TCTL); 749 tctl &= ~IGC_TCTL_CT; 750 tctl |= IGC_TCTL_PSP | IGC_TCTL_RTLC | 751 (IGC_COLLISION_THRESHOLD << IGC_CT_SHIFT); 752 753 /* Enable transmits */ 754 tctl |= IGC_TCTL_EN; 755 756 wr32(IGC_TCTL, tctl); 757 } 758 759 /** 760 * igc_rar_set_index - Sync RAL[index] and RAH[index] registers with MAC table 761 * @adapter: address of board private structure 762 * @index: Index of the RAR entry which need to be synced with MAC table 763 */ 764 static void igc_rar_set_index(struct igc_adapter *adapter, u32 index) 765 { 766 u8 *addr = adapter->mac_table[index].addr; 767 struct igc_hw *hw = &adapter->hw; 768 u32 rar_low, rar_high; 769 770 /* HW expects these to be in network order when they are plugged 771 * into the registers which are little endian. In order to guarantee 772 * that ordering we need to do an leXX_to_cpup here in order to be 773 * ready for the byteswap that occurs with writel 774 */ 775 rar_low = le32_to_cpup((__le32 *)(addr)); 776 rar_high = le16_to_cpup((__le16 *)(addr + 4)); 777 778 /* Indicate to hardware the Address is Valid. */ 779 if (adapter->mac_table[index].state & IGC_MAC_STATE_IN_USE) { 780 if (is_valid_ether_addr(addr)) 781 rar_high |= IGC_RAH_AV; 782 783 rar_high |= IGC_RAH_POOL_1 << 784 adapter->mac_table[index].queue; 785 } 786 787 wr32(IGC_RAL(index), rar_low); 788 wrfl(); 789 wr32(IGC_RAH(index), rar_high); 790 wrfl(); 791 } 792 793 /* Set default MAC address for the PF in the first RAR entry */ 794 static void igc_set_default_mac_filter(struct igc_adapter *adapter) 795 { 796 struct igc_mac_addr *mac_table = &adapter->mac_table[0]; 797 798 ether_addr_copy(mac_table->addr, adapter->hw.mac.addr); 799 mac_table->state = IGC_MAC_STATE_DEFAULT | IGC_MAC_STATE_IN_USE; 800 801 igc_rar_set_index(adapter, 0); 802 } 803 804 /** 805 * igc_set_mac - Change the Ethernet Address of the NIC 806 * @netdev: network interface device structure 807 * @p: pointer to an address structure 808 * 809 * Returns 0 on success, negative on failure 810 */ 811 static int igc_set_mac(struct net_device *netdev, void *p) 812 { 813 struct igc_adapter *adapter = netdev_priv(netdev); 814 struct igc_hw *hw = &adapter->hw; 815 struct sockaddr *addr = p; 816 817 if (!is_valid_ether_addr(addr->sa_data)) 818 return -EADDRNOTAVAIL; 819 820 memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); 821 memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len); 822 823 /* set the correct pool for the new PF MAC address in entry 0 */ 824 igc_set_default_mac_filter(adapter); 825 826 return 0; 827 } 828 829 /** 830 * igc_write_mc_addr_list - write multicast addresses to MTA 831 * @netdev: network interface device structure 832 * 833 * Writes multicast address list to the MTA hash table. 834 * Returns: -ENOMEM on failure 835 * 0 on no addresses written 836 * X on writing X addresses to MTA 837 **/ 838 static int igc_write_mc_addr_list(struct net_device *netdev) 839 { 840 struct igc_adapter *adapter = netdev_priv(netdev); 841 struct igc_hw *hw = &adapter->hw; 842 struct netdev_hw_addr *ha; 843 u8 *mta_list; 844 int i; 845 846 if (netdev_mc_empty(netdev)) { 847 /* nothing to program, so clear mc list */ 848 igc_update_mc_addr_list(hw, NULL, 0); 849 return 0; 850 } 851 852 mta_list = kcalloc(netdev_mc_count(netdev), 6, GFP_ATOMIC); 853 if (!mta_list) 854 return -ENOMEM; 855 856 /* The shared function expects a packed array of only addresses. */ 857 i = 0; 858 netdev_for_each_mc_addr(ha, netdev) 859 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN); 860 861 igc_update_mc_addr_list(hw, mta_list, i); 862 kfree(mta_list); 863 864 return netdev_mc_count(netdev); 865 } 866 867 static void igc_tx_ctxtdesc(struct igc_ring *tx_ring, 868 struct igc_tx_buffer *first, 869 u32 vlan_macip_lens, u32 type_tucmd, 870 u32 mss_l4len_idx) 871 { 872 struct igc_adv_tx_context_desc *context_desc; 873 u16 i = tx_ring->next_to_use; 874 struct timespec64 ts; 875 876 context_desc = IGC_TX_CTXTDESC(tx_ring, i); 877 878 i++; 879 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; 880 881 /* set bits to identify this as an advanced context descriptor */ 882 type_tucmd |= IGC_TXD_CMD_DEXT | IGC_ADVTXD_DTYP_CTXT; 883 884 /* For i225, context index must be unique per ring. */ 885 if (test_bit(IGC_RING_FLAG_TX_CTX_IDX, &tx_ring->flags)) 886 mss_l4len_idx |= tx_ring->reg_idx << 4; 887 888 context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens); 889 context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd); 890 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx); 891 892 /* We assume there is always a valid Tx time available. Invalid times 893 * should have been handled by the upper layers. 894 */ 895 if (tx_ring->launchtime_enable) { 896 ts = ktime_to_timespec64(first->skb->tstamp); 897 first->skb->tstamp = ktime_set(0, 0); 898 context_desc->launch_time = cpu_to_le32(ts.tv_nsec / 32); 899 } else { 900 context_desc->launch_time = 0; 901 } 902 } 903 904 static inline bool igc_ipv6_csum_is_sctp(struct sk_buff *skb) 905 { 906 unsigned int offset = 0; 907 908 ipv6_find_hdr(skb, &offset, IPPROTO_SCTP, NULL, NULL); 909 910 return offset == skb_checksum_start_offset(skb); 911 } 912 913 static void igc_tx_csum(struct igc_ring *tx_ring, struct igc_tx_buffer *first) 914 { 915 struct sk_buff *skb = first->skb; 916 u32 vlan_macip_lens = 0; 917 u32 type_tucmd = 0; 918 919 if (skb->ip_summed != CHECKSUM_PARTIAL) { 920 csum_failed: 921 if (!(first->tx_flags & IGC_TX_FLAGS_VLAN) && 922 !tx_ring->launchtime_enable) 923 return; 924 goto no_csum; 925 } 926 927 switch (skb->csum_offset) { 928 case offsetof(struct tcphdr, check): 929 type_tucmd = IGC_ADVTXD_TUCMD_L4T_TCP; 930 /* fall through */ 931 case offsetof(struct udphdr, check): 932 break; 933 case offsetof(struct sctphdr, checksum): 934 /* validate that this is actually an SCTP request */ 935 if ((first->protocol == htons(ETH_P_IP) && 936 (ip_hdr(skb)->protocol == IPPROTO_SCTP)) || 937 (first->protocol == htons(ETH_P_IPV6) && 938 igc_ipv6_csum_is_sctp(skb))) { 939 type_tucmd = IGC_ADVTXD_TUCMD_L4T_SCTP; 940 break; 941 } 942 /* fall through */ 943 default: 944 skb_checksum_help(skb); 945 goto csum_failed; 946 } 947 948 /* update TX checksum flag */ 949 first->tx_flags |= IGC_TX_FLAGS_CSUM; 950 vlan_macip_lens = skb_checksum_start_offset(skb) - 951 skb_network_offset(skb); 952 no_csum: 953 vlan_macip_lens |= skb_network_offset(skb) << IGC_ADVTXD_MACLEN_SHIFT; 954 vlan_macip_lens |= first->tx_flags & IGC_TX_FLAGS_VLAN_MASK; 955 956 igc_tx_ctxtdesc(tx_ring, first, vlan_macip_lens, type_tucmd, 0); 957 } 958 959 static int __igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size) 960 { 961 struct net_device *netdev = tx_ring->netdev; 962 963 netif_stop_subqueue(netdev, tx_ring->queue_index); 964 965 /* memory barriier comment */ 966 smp_mb(); 967 968 /* We need to check again in a case another CPU has just 969 * made room available. 970 */ 971 if (igc_desc_unused(tx_ring) < size) 972 return -EBUSY; 973 974 /* A reprieve! */ 975 netif_wake_subqueue(netdev, tx_ring->queue_index); 976 977 u64_stats_update_begin(&tx_ring->tx_syncp2); 978 tx_ring->tx_stats.restart_queue2++; 979 u64_stats_update_end(&tx_ring->tx_syncp2); 980 981 return 0; 982 } 983 984 static inline int igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size) 985 { 986 if (igc_desc_unused(tx_ring) >= size) 987 return 0; 988 return __igc_maybe_stop_tx(tx_ring, size); 989 } 990 991 #define IGC_SET_FLAG(_input, _flag, _result) \ 992 (((_flag) <= (_result)) ? \ 993 ((u32)((_input) & (_flag)) * ((_result) / (_flag))) : \ 994 ((u32)((_input) & (_flag)) / ((_flag) / (_result)))) 995 996 static u32 igc_tx_cmd_type(struct sk_buff *skb, u32 tx_flags) 997 { 998 /* set type for advanced descriptor with frame checksum insertion */ 999 u32 cmd_type = IGC_ADVTXD_DTYP_DATA | 1000 IGC_ADVTXD_DCMD_DEXT | 1001 IGC_ADVTXD_DCMD_IFCS; 1002 1003 /* set segmentation bits for TSO */ 1004 cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSO, 1005 (IGC_ADVTXD_DCMD_TSE)); 1006 1007 /* set timestamp bit if present */ 1008 cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP, 1009 (IGC_ADVTXD_MAC_TSTAMP)); 1010 1011 return cmd_type; 1012 } 1013 1014 static void igc_tx_olinfo_status(struct igc_ring *tx_ring, 1015 union igc_adv_tx_desc *tx_desc, 1016 u32 tx_flags, unsigned int paylen) 1017 { 1018 u32 olinfo_status = paylen << IGC_ADVTXD_PAYLEN_SHIFT; 1019 1020 /* insert L4 checksum */ 1021 olinfo_status |= (tx_flags & IGC_TX_FLAGS_CSUM) * 1022 ((IGC_TXD_POPTS_TXSM << 8) / 1023 IGC_TX_FLAGS_CSUM); 1024 1025 /* insert IPv4 checksum */ 1026 olinfo_status |= (tx_flags & IGC_TX_FLAGS_IPV4) * 1027 (((IGC_TXD_POPTS_IXSM << 8)) / 1028 IGC_TX_FLAGS_IPV4); 1029 1030 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status); 1031 } 1032 1033 static int igc_tx_map(struct igc_ring *tx_ring, 1034 struct igc_tx_buffer *first, 1035 const u8 hdr_len) 1036 { 1037 struct sk_buff *skb = first->skb; 1038 struct igc_tx_buffer *tx_buffer; 1039 union igc_adv_tx_desc *tx_desc; 1040 u32 tx_flags = first->tx_flags; 1041 skb_frag_t *frag; 1042 u16 i = tx_ring->next_to_use; 1043 unsigned int data_len, size; 1044 dma_addr_t dma; 1045 u32 cmd_type = igc_tx_cmd_type(skb, tx_flags); 1046 1047 tx_desc = IGC_TX_DESC(tx_ring, i); 1048 1049 igc_tx_olinfo_status(tx_ring, tx_desc, tx_flags, skb->len - hdr_len); 1050 1051 size = skb_headlen(skb); 1052 data_len = skb->data_len; 1053 1054 dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE); 1055 1056 tx_buffer = first; 1057 1058 for (frag = &skb_shinfo(skb)->frags[0];; frag++) { 1059 if (dma_mapping_error(tx_ring->dev, dma)) 1060 goto dma_error; 1061 1062 /* record length, and DMA address */ 1063 dma_unmap_len_set(tx_buffer, len, size); 1064 dma_unmap_addr_set(tx_buffer, dma, dma); 1065 1066 tx_desc->read.buffer_addr = cpu_to_le64(dma); 1067 1068 while (unlikely(size > IGC_MAX_DATA_PER_TXD)) { 1069 tx_desc->read.cmd_type_len = 1070 cpu_to_le32(cmd_type ^ IGC_MAX_DATA_PER_TXD); 1071 1072 i++; 1073 tx_desc++; 1074 if (i == tx_ring->count) { 1075 tx_desc = IGC_TX_DESC(tx_ring, 0); 1076 i = 0; 1077 } 1078 tx_desc->read.olinfo_status = 0; 1079 1080 dma += IGC_MAX_DATA_PER_TXD; 1081 size -= IGC_MAX_DATA_PER_TXD; 1082 1083 tx_desc->read.buffer_addr = cpu_to_le64(dma); 1084 } 1085 1086 if (likely(!data_len)) 1087 break; 1088 1089 tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type ^ size); 1090 1091 i++; 1092 tx_desc++; 1093 if (i == tx_ring->count) { 1094 tx_desc = IGC_TX_DESC(tx_ring, 0); 1095 i = 0; 1096 } 1097 tx_desc->read.olinfo_status = 0; 1098 1099 size = skb_frag_size(frag); 1100 data_len -= size; 1101 1102 dma = skb_frag_dma_map(tx_ring->dev, frag, 0, 1103 size, DMA_TO_DEVICE); 1104 1105 tx_buffer = &tx_ring->tx_buffer_info[i]; 1106 } 1107 1108 /* write last descriptor with RS and EOP bits */ 1109 cmd_type |= size | IGC_TXD_DCMD; 1110 tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); 1111 1112 netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount); 1113 1114 /* set the timestamp */ 1115 first->time_stamp = jiffies; 1116 1117 skb_tx_timestamp(skb); 1118 1119 /* Force memory writes to complete before letting h/w know there 1120 * are new descriptors to fetch. (Only applicable for weak-ordered 1121 * memory model archs, such as IA-64). 1122 * 1123 * We also need this memory barrier to make certain all of the 1124 * status bits have been updated before next_to_watch is written. 1125 */ 1126 wmb(); 1127 1128 /* set next_to_watch value indicating a packet is present */ 1129 first->next_to_watch = tx_desc; 1130 1131 i++; 1132 if (i == tx_ring->count) 1133 i = 0; 1134 1135 tx_ring->next_to_use = i; 1136 1137 /* Make sure there is space in the ring for the next send. */ 1138 igc_maybe_stop_tx(tx_ring, DESC_NEEDED); 1139 1140 if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more()) { 1141 writel(i, tx_ring->tail); 1142 } 1143 1144 return 0; 1145 dma_error: 1146 dev_err(tx_ring->dev, "TX DMA map failed\n"); 1147 tx_buffer = &tx_ring->tx_buffer_info[i]; 1148 1149 /* clear dma mappings for failed tx_buffer_info map */ 1150 while (tx_buffer != first) { 1151 if (dma_unmap_len(tx_buffer, len)) 1152 dma_unmap_page(tx_ring->dev, 1153 dma_unmap_addr(tx_buffer, dma), 1154 dma_unmap_len(tx_buffer, len), 1155 DMA_TO_DEVICE); 1156 dma_unmap_len_set(tx_buffer, len, 0); 1157 1158 if (i-- == 0) 1159 i += tx_ring->count; 1160 tx_buffer = &tx_ring->tx_buffer_info[i]; 1161 } 1162 1163 if (dma_unmap_len(tx_buffer, len)) 1164 dma_unmap_single(tx_ring->dev, 1165 dma_unmap_addr(tx_buffer, dma), 1166 dma_unmap_len(tx_buffer, len), 1167 DMA_TO_DEVICE); 1168 dma_unmap_len_set(tx_buffer, len, 0); 1169 1170 dev_kfree_skb_any(tx_buffer->skb); 1171 tx_buffer->skb = NULL; 1172 1173 tx_ring->next_to_use = i; 1174 1175 return -1; 1176 } 1177 1178 static int igc_tso(struct igc_ring *tx_ring, 1179 struct igc_tx_buffer *first, 1180 u8 *hdr_len) 1181 { 1182 u32 vlan_macip_lens, type_tucmd, mss_l4len_idx; 1183 struct sk_buff *skb = first->skb; 1184 union { 1185 struct iphdr *v4; 1186 struct ipv6hdr *v6; 1187 unsigned char *hdr; 1188 } ip; 1189 union { 1190 struct tcphdr *tcp; 1191 struct udphdr *udp; 1192 unsigned char *hdr; 1193 } l4; 1194 u32 paylen, l4_offset; 1195 int err; 1196 1197 if (skb->ip_summed != CHECKSUM_PARTIAL) 1198 return 0; 1199 1200 if (!skb_is_gso(skb)) 1201 return 0; 1202 1203 err = skb_cow_head(skb, 0); 1204 if (err < 0) 1205 return err; 1206 1207 ip.hdr = skb_network_header(skb); 1208 l4.hdr = skb_checksum_start(skb); 1209 1210 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */ 1211 type_tucmd = IGC_ADVTXD_TUCMD_L4T_TCP; 1212 1213 /* initialize outer IP header fields */ 1214 if (ip.v4->version == 4) { 1215 unsigned char *csum_start = skb_checksum_start(skb); 1216 unsigned char *trans_start = ip.hdr + (ip.v4->ihl * 4); 1217 1218 /* IP header will have to cancel out any data that 1219 * is not a part of the outer IP header 1220 */ 1221 ip.v4->check = csum_fold(csum_partial(trans_start, 1222 csum_start - trans_start, 1223 0)); 1224 type_tucmd |= IGC_ADVTXD_TUCMD_IPV4; 1225 1226 ip.v4->tot_len = 0; 1227 first->tx_flags |= IGC_TX_FLAGS_TSO | 1228 IGC_TX_FLAGS_CSUM | 1229 IGC_TX_FLAGS_IPV4; 1230 } else { 1231 ip.v6->payload_len = 0; 1232 first->tx_flags |= IGC_TX_FLAGS_TSO | 1233 IGC_TX_FLAGS_CSUM; 1234 } 1235 1236 /* determine offset of inner transport header */ 1237 l4_offset = l4.hdr - skb->data; 1238 1239 /* remove payload length from inner checksum */ 1240 paylen = skb->len - l4_offset; 1241 if (type_tucmd & IGC_ADVTXD_TUCMD_L4T_TCP) { 1242 /* compute length of segmentation header */ 1243 *hdr_len = (l4.tcp->doff * 4) + l4_offset; 1244 csum_replace_by_diff(&l4.tcp->check, 1245 (__force __wsum)htonl(paylen)); 1246 } else { 1247 /* compute length of segmentation header */ 1248 *hdr_len = sizeof(*l4.udp) + l4_offset; 1249 csum_replace_by_diff(&l4.udp->check, 1250 (__force __wsum)htonl(paylen)); 1251 } 1252 1253 /* update gso size and bytecount with header size */ 1254 first->gso_segs = skb_shinfo(skb)->gso_segs; 1255 first->bytecount += (first->gso_segs - 1) * *hdr_len; 1256 1257 /* MSS L4LEN IDX */ 1258 mss_l4len_idx = (*hdr_len - l4_offset) << IGC_ADVTXD_L4LEN_SHIFT; 1259 mss_l4len_idx |= skb_shinfo(skb)->gso_size << IGC_ADVTXD_MSS_SHIFT; 1260 1261 /* VLAN MACLEN IPLEN */ 1262 vlan_macip_lens = l4.hdr - ip.hdr; 1263 vlan_macip_lens |= (ip.hdr - skb->data) << IGC_ADVTXD_MACLEN_SHIFT; 1264 vlan_macip_lens |= first->tx_flags & IGC_TX_FLAGS_VLAN_MASK; 1265 1266 igc_tx_ctxtdesc(tx_ring, first, vlan_macip_lens, 1267 type_tucmd, mss_l4len_idx); 1268 1269 return 1; 1270 } 1271 1272 static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb, 1273 struct igc_ring *tx_ring) 1274 { 1275 u16 count = TXD_USE_COUNT(skb_headlen(skb)); 1276 __be16 protocol = vlan_get_protocol(skb); 1277 struct igc_tx_buffer *first; 1278 u32 tx_flags = 0; 1279 unsigned short f; 1280 u8 hdr_len = 0; 1281 int tso = 0; 1282 1283 /* need: 1 descriptor per page * PAGE_SIZE/IGC_MAX_DATA_PER_TXD, 1284 * + 1 desc for skb_headlen/IGC_MAX_DATA_PER_TXD, 1285 * + 2 desc gap to keep tail from touching head, 1286 * + 1 desc for context descriptor, 1287 * otherwise try next time 1288 */ 1289 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) 1290 count += TXD_USE_COUNT(skb_frag_size( 1291 &skb_shinfo(skb)->frags[f])); 1292 1293 if (igc_maybe_stop_tx(tx_ring, count + 3)) { 1294 /* this is a hard error */ 1295 return NETDEV_TX_BUSY; 1296 } 1297 1298 /* record the location of the first descriptor for this packet */ 1299 first = &tx_ring->tx_buffer_info[tx_ring->next_to_use]; 1300 first->skb = skb; 1301 first->bytecount = skb->len; 1302 first->gso_segs = 1; 1303 1304 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { 1305 struct igc_adapter *adapter = netdev_priv(tx_ring->netdev); 1306 1307 /* FIXME: add support for retrieving timestamps from 1308 * the other timer registers before skipping the 1309 * timestamping request. 1310 */ 1311 if (adapter->tstamp_config.tx_type == HWTSTAMP_TX_ON && 1312 !test_and_set_bit_lock(__IGC_PTP_TX_IN_PROGRESS, 1313 &adapter->state)) { 1314 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; 1315 tx_flags |= IGC_TX_FLAGS_TSTAMP; 1316 1317 adapter->ptp_tx_skb = skb_get(skb); 1318 adapter->ptp_tx_start = jiffies; 1319 } else { 1320 adapter->tx_hwtstamp_skipped++; 1321 } 1322 } 1323 1324 /* record initial flags and protocol */ 1325 first->tx_flags = tx_flags; 1326 first->protocol = protocol; 1327 1328 tso = igc_tso(tx_ring, first, &hdr_len); 1329 if (tso < 0) 1330 goto out_drop; 1331 else if (!tso) 1332 igc_tx_csum(tx_ring, first); 1333 1334 igc_tx_map(tx_ring, first, hdr_len); 1335 1336 return NETDEV_TX_OK; 1337 1338 out_drop: 1339 dev_kfree_skb_any(first->skb); 1340 first->skb = NULL; 1341 1342 return NETDEV_TX_OK; 1343 } 1344 1345 static inline struct igc_ring *igc_tx_queue_mapping(struct igc_adapter *adapter, 1346 struct sk_buff *skb) 1347 { 1348 unsigned int r_idx = skb->queue_mapping; 1349 1350 if (r_idx >= adapter->num_tx_queues) 1351 r_idx = r_idx % adapter->num_tx_queues; 1352 1353 return adapter->tx_ring[r_idx]; 1354 } 1355 1356 static netdev_tx_t igc_xmit_frame(struct sk_buff *skb, 1357 struct net_device *netdev) 1358 { 1359 struct igc_adapter *adapter = netdev_priv(netdev); 1360 1361 /* The minimum packet size with TCTL.PSP set is 17 so pad the skb 1362 * in order to meet this minimum size requirement. 1363 */ 1364 if (skb->len < 17) { 1365 if (skb_padto(skb, 17)) 1366 return NETDEV_TX_OK; 1367 skb->len = 17; 1368 } 1369 1370 return igc_xmit_frame_ring(skb, igc_tx_queue_mapping(adapter, skb)); 1371 } 1372 1373 static void igc_rx_checksum(struct igc_ring *ring, 1374 union igc_adv_rx_desc *rx_desc, 1375 struct sk_buff *skb) 1376 { 1377 skb_checksum_none_assert(skb); 1378 1379 /* Ignore Checksum bit is set */ 1380 if (igc_test_staterr(rx_desc, IGC_RXD_STAT_IXSM)) 1381 return; 1382 1383 /* Rx checksum disabled via ethtool */ 1384 if (!(ring->netdev->features & NETIF_F_RXCSUM)) 1385 return; 1386 1387 /* TCP/UDP checksum error bit is set */ 1388 if (igc_test_staterr(rx_desc, 1389 IGC_RXDEXT_STATERR_TCPE | 1390 IGC_RXDEXT_STATERR_IPE)) { 1391 /* work around errata with sctp packets where the TCPE aka 1392 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc) 1393 * packets (aka let the stack check the crc32c) 1394 */ 1395 if (!(skb->len == 60 && 1396 test_bit(IGC_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) { 1397 u64_stats_update_begin(&ring->rx_syncp); 1398 ring->rx_stats.csum_err++; 1399 u64_stats_update_end(&ring->rx_syncp); 1400 } 1401 /* let the stack verify checksum errors */ 1402 return; 1403 } 1404 /* It must be a TCP or UDP packet with a valid checksum */ 1405 if (igc_test_staterr(rx_desc, IGC_RXD_STAT_TCPCS | 1406 IGC_RXD_STAT_UDPCS)) 1407 skb->ip_summed = CHECKSUM_UNNECESSARY; 1408 1409 dev_dbg(ring->dev, "cksum success: bits %08X\n", 1410 le32_to_cpu(rx_desc->wb.upper.status_error)); 1411 } 1412 1413 static inline void igc_rx_hash(struct igc_ring *ring, 1414 union igc_adv_rx_desc *rx_desc, 1415 struct sk_buff *skb) 1416 { 1417 if (ring->netdev->features & NETIF_F_RXHASH) 1418 skb_set_hash(skb, 1419 le32_to_cpu(rx_desc->wb.lower.hi_dword.rss), 1420 PKT_HASH_TYPE_L3); 1421 } 1422 1423 /** 1424 * igc_process_skb_fields - Populate skb header fields from Rx descriptor 1425 * @rx_ring: rx descriptor ring packet is being transacted on 1426 * @rx_desc: pointer to the EOP Rx descriptor 1427 * @skb: pointer to current skb being populated 1428 * 1429 * This function checks the ring, descriptor, and packet information in 1430 * order to populate the hash, checksum, VLAN, timestamp, protocol, and 1431 * other fields within the skb. 1432 */ 1433 static void igc_process_skb_fields(struct igc_ring *rx_ring, 1434 union igc_adv_rx_desc *rx_desc, 1435 struct sk_buff *skb) 1436 { 1437 igc_rx_hash(rx_ring, rx_desc, skb); 1438 1439 igc_rx_checksum(rx_ring, rx_desc, skb); 1440 1441 if (igc_test_staterr(rx_desc, IGC_RXDADV_STAT_TS) && 1442 !igc_test_staterr(rx_desc, IGC_RXDADV_STAT_TSIP)) 1443 igc_ptp_rx_rgtstamp(rx_ring->q_vector, skb); 1444 1445 skb_record_rx_queue(skb, rx_ring->queue_index); 1446 1447 skb->protocol = eth_type_trans(skb, rx_ring->netdev); 1448 } 1449 1450 static struct igc_rx_buffer *igc_get_rx_buffer(struct igc_ring *rx_ring, 1451 const unsigned int size) 1452 { 1453 struct igc_rx_buffer *rx_buffer; 1454 1455 rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; 1456 prefetchw(rx_buffer->page); 1457 1458 /* we are reusing so sync this buffer for CPU use */ 1459 dma_sync_single_range_for_cpu(rx_ring->dev, 1460 rx_buffer->dma, 1461 rx_buffer->page_offset, 1462 size, 1463 DMA_FROM_DEVICE); 1464 1465 rx_buffer->pagecnt_bias--; 1466 1467 return rx_buffer; 1468 } 1469 1470 /** 1471 * igc_add_rx_frag - Add contents of Rx buffer to sk_buff 1472 * @rx_ring: rx descriptor ring to transact packets on 1473 * @rx_buffer: buffer containing page to add 1474 * @skb: sk_buff to place the data into 1475 * @size: size of buffer to be added 1476 * 1477 * This function will add the data contained in rx_buffer->page to the skb. 1478 */ 1479 static void igc_add_rx_frag(struct igc_ring *rx_ring, 1480 struct igc_rx_buffer *rx_buffer, 1481 struct sk_buff *skb, 1482 unsigned int size) 1483 { 1484 #if (PAGE_SIZE < 8192) 1485 unsigned int truesize = igc_rx_pg_size(rx_ring) / 2; 1486 1487 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page, 1488 rx_buffer->page_offset, size, truesize); 1489 rx_buffer->page_offset ^= truesize; 1490 #else 1491 unsigned int truesize = ring_uses_build_skb(rx_ring) ? 1492 SKB_DATA_ALIGN(IGC_SKB_PAD + size) : 1493 SKB_DATA_ALIGN(size); 1494 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page, 1495 rx_buffer->page_offset, size, truesize); 1496 rx_buffer->page_offset += truesize; 1497 #endif 1498 } 1499 1500 static struct sk_buff *igc_build_skb(struct igc_ring *rx_ring, 1501 struct igc_rx_buffer *rx_buffer, 1502 union igc_adv_rx_desc *rx_desc, 1503 unsigned int size) 1504 { 1505 void *va = page_address(rx_buffer->page) + rx_buffer->page_offset; 1506 #if (PAGE_SIZE < 8192) 1507 unsigned int truesize = igc_rx_pg_size(rx_ring) / 2; 1508 #else 1509 unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + 1510 SKB_DATA_ALIGN(IGC_SKB_PAD + size); 1511 #endif 1512 struct sk_buff *skb; 1513 1514 /* prefetch first cache line of first page */ 1515 prefetch(va); 1516 #if L1_CACHE_BYTES < 128 1517 prefetch(va + L1_CACHE_BYTES); 1518 #endif 1519 1520 /* build an skb around the page buffer */ 1521 skb = build_skb(va - IGC_SKB_PAD, truesize); 1522 if (unlikely(!skb)) 1523 return NULL; 1524 1525 /* update pointers within the skb to store the data */ 1526 skb_reserve(skb, IGC_SKB_PAD); 1527 __skb_put(skb, size); 1528 1529 /* update buffer offset */ 1530 #if (PAGE_SIZE < 8192) 1531 rx_buffer->page_offset ^= truesize; 1532 #else 1533 rx_buffer->page_offset += truesize; 1534 #endif 1535 1536 return skb; 1537 } 1538 1539 static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring, 1540 struct igc_rx_buffer *rx_buffer, 1541 union igc_adv_rx_desc *rx_desc, 1542 unsigned int size) 1543 { 1544 void *va = page_address(rx_buffer->page) + rx_buffer->page_offset; 1545 #if (PAGE_SIZE < 8192) 1546 unsigned int truesize = igc_rx_pg_size(rx_ring) / 2; 1547 #else 1548 unsigned int truesize = SKB_DATA_ALIGN(size); 1549 #endif 1550 unsigned int headlen; 1551 struct sk_buff *skb; 1552 1553 /* prefetch first cache line of first page */ 1554 prefetch(va); 1555 #if L1_CACHE_BYTES < 128 1556 prefetch(va + L1_CACHE_BYTES); 1557 #endif 1558 1559 /* allocate a skb to store the frags */ 1560 skb = napi_alloc_skb(&rx_ring->q_vector->napi, IGC_RX_HDR_LEN); 1561 if (unlikely(!skb)) 1562 return NULL; 1563 1564 if (unlikely(igc_test_staterr(rx_desc, IGC_RXDADV_STAT_TSIP))) { 1565 igc_ptp_rx_pktstamp(rx_ring->q_vector, va, skb); 1566 va += IGC_TS_HDR_LEN; 1567 size -= IGC_TS_HDR_LEN; 1568 } 1569 1570 /* Determine available headroom for copy */ 1571 headlen = size; 1572 if (headlen > IGC_RX_HDR_LEN) 1573 headlen = eth_get_headlen(skb->dev, va, IGC_RX_HDR_LEN); 1574 1575 /* align pull length to size of long to optimize memcpy performance */ 1576 memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long))); 1577 1578 /* update all of the pointers */ 1579 size -= headlen; 1580 if (size) { 1581 skb_add_rx_frag(skb, 0, rx_buffer->page, 1582 (va + headlen) - page_address(rx_buffer->page), 1583 size, truesize); 1584 #if (PAGE_SIZE < 8192) 1585 rx_buffer->page_offset ^= truesize; 1586 #else 1587 rx_buffer->page_offset += truesize; 1588 #endif 1589 } else { 1590 rx_buffer->pagecnt_bias++; 1591 } 1592 1593 return skb; 1594 } 1595 1596 /** 1597 * igc_reuse_rx_page - page flip buffer and store it back on the ring 1598 * @rx_ring: rx descriptor ring to store buffers on 1599 * @old_buff: donor buffer to have page reused 1600 * 1601 * Synchronizes page for reuse by the adapter 1602 */ 1603 static void igc_reuse_rx_page(struct igc_ring *rx_ring, 1604 struct igc_rx_buffer *old_buff) 1605 { 1606 u16 nta = rx_ring->next_to_alloc; 1607 struct igc_rx_buffer *new_buff; 1608 1609 new_buff = &rx_ring->rx_buffer_info[nta]; 1610 1611 /* update, and store next to alloc */ 1612 nta++; 1613 rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; 1614 1615 /* Transfer page from old buffer to new buffer. 1616 * Move each member individually to avoid possible store 1617 * forwarding stalls. 1618 */ 1619 new_buff->dma = old_buff->dma; 1620 new_buff->page = old_buff->page; 1621 new_buff->page_offset = old_buff->page_offset; 1622 new_buff->pagecnt_bias = old_buff->pagecnt_bias; 1623 } 1624 1625 static inline bool igc_page_is_reserved(struct page *page) 1626 { 1627 return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page); 1628 } 1629 1630 static bool igc_can_reuse_rx_page(struct igc_rx_buffer *rx_buffer) 1631 { 1632 unsigned int pagecnt_bias = rx_buffer->pagecnt_bias; 1633 struct page *page = rx_buffer->page; 1634 1635 /* avoid re-using remote pages */ 1636 if (unlikely(igc_page_is_reserved(page))) 1637 return false; 1638 1639 #if (PAGE_SIZE < 8192) 1640 /* if we are only owner of page we can reuse it */ 1641 if (unlikely((page_ref_count(page) - pagecnt_bias) > 1)) 1642 return false; 1643 #else 1644 #define IGC_LAST_OFFSET \ 1645 (SKB_WITH_OVERHEAD(PAGE_SIZE) - IGC_RXBUFFER_2048) 1646 1647 if (rx_buffer->page_offset > IGC_LAST_OFFSET) 1648 return false; 1649 #endif 1650 1651 /* If we have drained the page fragment pool we need to update 1652 * the pagecnt_bias and page count so that we fully restock the 1653 * number of references the driver holds. 1654 */ 1655 if (unlikely(!pagecnt_bias)) { 1656 page_ref_add(page, USHRT_MAX); 1657 rx_buffer->pagecnt_bias = USHRT_MAX; 1658 } 1659 1660 return true; 1661 } 1662 1663 /** 1664 * igc_is_non_eop - process handling of non-EOP buffers 1665 * @rx_ring: Rx ring being processed 1666 * @rx_desc: Rx descriptor for current buffer 1667 * 1668 * This function updates next to clean. If the buffer is an EOP buffer 1669 * this function exits returning false, otherwise it will place the 1670 * sk_buff in the next buffer to be chained and return true indicating 1671 * that this is in fact a non-EOP buffer. 1672 */ 1673 static bool igc_is_non_eop(struct igc_ring *rx_ring, 1674 union igc_adv_rx_desc *rx_desc) 1675 { 1676 u32 ntc = rx_ring->next_to_clean + 1; 1677 1678 /* fetch, update, and store next to clean */ 1679 ntc = (ntc < rx_ring->count) ? ntc : 0; 1680 rx_ring->next_to_clean = ntc; 1681 1682 prefetch(IGC_RX_DESC(rx_ring, ntc)); 1683 1684 if (likely(igc_test_staterr(rx_desc, IGC_RXD_STAT_EOP))) 1685 return false; 1686 1687 return true; 1688 } 1689 1690 /** 1691 * igc_cleanup_headers - Correct corrupted or empty headers 1692 * @rx_ring: rx descriptor ring packet is being transacted on 1693 * @rx_desc: pointer to the EOP Rx descriptor 1694 * @skb: pointer to current skb being fixed 1695 * 1696 * Address the case where we are pulling data in on pages only 1697 * and as such no data is present in the skb header. 1698 * 1699 * In addition if skb is not at least 60 bytes we need to pad it so that 1700 * it is large enough to qualify as a valid Ethernet frame. 1701 * 1702 * Returns true if an error was encountered and skb was freed. 1703 */ 1704 static bool igc_cleanup_headers(struct igc_ring *rx_ring, 1705 union igc_adv_rx_desc *rx_desc, 1706 struct sk_buff *skb) 1707 { 1708 if (unlikely((igc_test_staterr(rx_desc, 1709 IGC_RXDEXT_ERR_FRAME_ERR_MASK)))) { 1710 struct net_device *netdev = rx_ring->netdev; 1711 1712 if (!(netdev->features & NETIF_F_RXALL)) { 1713 dev_kfree_skb_any(skb); 1714 return true; 1715 } 1716 } 1717 1718 /* if eth_skb_pad returns an error the skb was freed */ 1719 if (eth_skb_pad(skb)) 1720 return true; 1721 1722 return false; 1723 } 1724 1725 static void igc_put_rx_buffer(struct igc_ring *rx_ring, 1726 struct igc_rx_buffer *rx_buffer) 1727 { 1728 if (igc_can_reuse_rx_page(rx_buffer)) { 1729 /* hand second half of page back to the ring */ 1730 igc_reuse_rx_page(rx_ring, rx_buffer); 1731 } else { 1732 /* We are not reusing the buffer so unmap it and free 1733 * any references we are holding to it 1734 */ 1735 dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, 1736 igc_rx_pg_size(rx_ring), DMA_FROM_DEVICE, 1737 IGC_RX_DMA_ATTR); 1738 __page_frag_cache_drain(rx_buffer->page, 1739 rx_buffer->pagecnt_bias); 1740 } 1741 1742 /* clear contents of rx_buffer */ 1743 rx_buffer->page = NULL; 1744 } 1745 1746 static inline unsigned int igc_rx_offset(struct igc_ring *rx_ring) 1747 { 1748 return ring_uses_build_skb(rx_ring) ? IGC_SKB_PAD : 0; 1749 } 1750 1751 static bool igc_alloc_mapped_page(struct igc_ring *rx_ring, 1752 struct igc_rx_buffer *bi) 1753 { 1754 struct page *page = bi->page; 1755 dma_addr_t dma; 1756 1757 /* since we are recycling buffers we should seldom need to alloc */ 1758 if (likely(page)) 1759 return true; 1760 1761 /* alloc new page for storage */ 1762 page = dev_alloc_pages(igc_rx_pg_order(rx_ring)); 1763 if (unlikely(!page)) { 1764 rx_ring->rx_stats.alloc_failed++; 1765 return false; 1766 } 1767 1768 /* map page for use */ 1769 dma = dma_map_page_attrs(rx_ring->dev, page, 0, 1770 igc_rx_pg_size(rx_ring), 1771 DMA_FROM_DEVICE, 1772 IGC_RX_DMA_ATTR); 1773 1774 /* if mapping failed free memory back to system since 1775 * there isn't much point in holding memory we can't use 1776 */ 1777 if (dma_mapping_error(rx_ring->dev, dma)) { 1778 __free_page(page); 1779 1780 rx_ring->rx_stats.alloc_failed++; 1781 return false; 1782 } 1783 1784 bi->dma = dma; 1785 bi->page = page; 1786 bi->page_offset = igc_rx_offset(rx_ring); 1787 bi->pagecnt_bias = 1; 1788 1789 return true; 1790 } 1791 1792 /** 1793 * igc_alloc_rx_buffers - Replace used receive buffers; packet split 1794 * @rx_ring: rx descriptor ring 1795 * @cleaned_count: number of buffers to clean 1796 */ 1797 static void igc_alloc_rx_buffers(struct igc_ring *rx_ring, u16 cleaned_count) 1798 { 1799 union igc_adv_rx_desc *rx_desc; 1800 u16 i = rx_ring->next_to_use; 1801 struct igc_rx_buffer *bi; 1802 u16 bufsz; 1803 1804 /* nothing to do */ 1805 if (!cleaned_count) 1806 return; 1807 1808 rx_desc = IGC_RX_DESC(rx_ring, i); 1809 bi = &rx_ring->rx_buffer_info[i]; 1810 i -= rx_ring->count; 1811 1812 bufsz = igc_rx_bufsz(rx_ring); 1813 1814 do { 1815 if (!igc_alloc_mapped_page(rx_ring, bi)) 1816 break; 1817 1818 /* sync the buffer for use by the device */ 1819 dma_sync_single_range_for_device(rx_ring->dev, bi->dma, 1820 bi->page_offset, bufsz, 1821 DMA_FROM_DEVICE); 1822 1823 /* Refresh the desc even if buffer_addrs didn't change 1824 * because each write-back erases this info. 1825 */ 1826 rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset); 1827 1828 rx_desc++; 1829 bi++; 1830 i++; 1831 if (unlikely(!i)) { 1832 rx_desc = IGC_RX_DESC(rx_ring, 0); 1833 bi = rx_ring->rx_buffer_info; 1834 i -= rx_ring->count; 1835 } 1836 1837 /* clear the length for the next_to_use descriptor */ 1838 rx_desc->wb.upper.length = 0; 1839 1840 cleaned_count--; 1841 } while (cleaned_count); 1842 1843 i += rx_ring->count; 1844 1845 if (rx_ring->next_to_use != i) { 1846 /* record the next descriptor to use */ 1847 rx_ring->next_to_use = i; 1848 1849 /* update next to alloc since we have filled the ring */ 1850 rx_ring->next_to_alloc = i; 1851 1852 /* Force memory writes to complete before letting h/w 1853 * know there are new descriptors to fetch. (Only 1854 * applicable for weak-ordered memory model archs, 1855 * such as IA-64). 1856 */ 1857 wmb(); 1858 writel(i, rx_ring->tail); 1859 } 1860 } 1861 1862 static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) 1863 { 1864 unsigned int total_bytes = 0, total_packets = 0; 1865 struct igc_ring *rx_ring = q_vector->rx.ring; 1866 struct sk_buff *skb = rx_ring->skb; 1867 u16 cleaned_count = igc_desc_unused(rx_ring); 1868 1869 while (likely(total_packets < budget)) { 1870 union igc_adv_rx_desc *rx_desc; 1871 struct igc_rx_buffer *rx_buffer; 1872 unsigned int size; 1873 1874 /* return some buffers to hardware, one at a time is too slow */ 1875 if (cleaned_count >= IGC_RX_BUFFER_WRITE) { 1876 igc_alloc_rx_buffers(rx_ring, cleaned_count); 1877 cleaned_count = 0; 1878 } 1879 1880 rx_desc = IGC_RX_DESC(rx_ring, rx_ring->next_to_clean); 1881 size = le16_to_cpu(rx_desc->wb.upper.length); 1882 if (!size) 1883 break; 1884 1885 /* This memory barrier is needed to keep us from reading 1886 * any other fields out of the rx_desc until we know the 1887 * descriptor has been written back 1888 */ 1889 dma_rmb(); 1890 1891 rx_buffer = igc_get_rx_buffer(rx_ring, size); 1892 1893 /* retrieve a buffer from the ring */ 1894 if (skb) 1895 igc_add_rx_frag(rx_ring, rx_buffer, skb, size); 1896 else if (ring_uses_build_skb(rx_ring)) 1897 skb = igc_build_skb(rx_ring, rx_buffer, rx_desc, size); 1898 else 1899 skb = igc_construct_skb(rx_ring, rx_buffer, 1900 rx_desc, size); 1901 1902 /* exit if we failed to retrieve a buffer */ 1903 if (!skb) { 1904 rx_ring->rx_stats.alloc_failed++; 1905 rx_buffer->pagecnt_bias++; 1906 break; 1907 } 1908 1909 igc_put_rx_buffer(rx_ring, rx_buffer); 1910 cleaned_count++; 1911 1912 /* fetch next buffer in frame if non-eop */ 1913 if (igc_is_non_eop(rx_ring, rx_desc)) 1914 continue; 1915 1916 /* verify the packet layout is correct */ 1917 if (igc_cleanup_headers(rx_ring, rx_desc, skb)) { 1918 skb = NULL; 1919 continue; 1920 } 1921 1922 /* probably a little skewed due to removing CRC */ 1923 total_bytes += skb->len; 1924 1925 /* populate checksum, timestamp, VLAN, and protocol */ 1926 igc_process_skb_fields(rx_ring, rx_desc, skb); 1927 1928 napi_gro_receive(&q_vector->napi, skb); 1929 1930 /* reset skb pointer */ 1931 skb = NULL; 1932 1933 /* update budget accounting */ 1934 total_packets++; 1935 } 1936 1937 /* place incomplete frames back on ring for completion */ 1938 rx_ring->skb = skb; 1939 1940 u64_stats_update_begin(&rx_ring->rx_syncp); 1941 rx_ring->rx_stats.packets += total_packets; 1942 rx_ring->rx_stats.bytes += total_bytes; 1943 u64_stats_update_end(&rx_ring->rx_syncp); 1944 q_vector->rx.total_packets += total_packets; 1945 q_vector->rx.total_bytes += total_bytes; 1946 1947 if (cleaned_count) 1948 igc_alloc_rx_buffers(rx_ring, cleaned_count); 1949 1950 return total_packets; 1951 } 1952 1953 /** 1954 * igc_clean_tx_irq - Reclaim resources after transmit completes 1955 * @q_vector: pointer to q_vector containing needed info 1956 * @napi_budget: Used to determine if we are in netpoll 1957 * 1958 * returns true if ring is completely cleaned 1959 */ 1960 static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget) 1961 { 1962 struct igc_adapter *adapter = q_vector->adapter; 1963 unsigned int total_bytes = 0, total_packets = 0; 1964 unsigned int budget = q_vector->tx.work_limit; 1965 struct igc_ring *tx_ring = q_vector->tx.ring; 1966 unsigned int i = tx_ring->next_to_clean; 1967 struct igc_tx_buffer *tx_buffer; 1968 union igc_adv_tx_desc *tx_desc; 1969 1970 if (test_bit(__IGC_DOWN, &adapter->state)) 1971 return true; 1972 1973 tx_buffer = &tx_ring->tx_buffer_info[i]; 1974 tx_desc = IGC_TX_DESC(tx_ring, i); 1975 i -= tx_ring->count; 1976 1977 do { 1978 union igc_adv_tx_desc *eop_desc = tx_buffer->next_to_watch; 1979 1980 /* if next_to_watch is not set then there is no work pending */ 1981 if (!eop_desc) 1982 break; 1983 1984 /* prevent any other reads prior to eop_desc */ 1985 smp_rmb(); 1986 1987 /* if DD is not set pending work has not been completed */ 1988 if (!(eop_desc->wb.status & cpu_to_le32(IGC_TXD_STAT_DD))) 1989 break; 1990 1991 /* clear next_to_watch to prevent false hangs */ 1992 tx_buffer->next_to_watch = NULL; 1993 1994 /* update the statistics for this packet */ 1995 total_bytes += tx_buffer->bytecount; 1996 total_packets += tx_buffer->gso_segs; 1997 1998 /* free the skb */ 1999 napi_consume_skb(tx_buffer->skb, napi_budget); 2000 2001 /* unmap skb header data */ 2002 dma_unmap_single(tx_ring->dev, 2003 dma_unmap_addr(tx_buffer, dma), 2004 dma_unmap_len(tx_buffer, len), 2005 DMA_TO_DEVICE); 2006 2007 /* clear tx_buffer data */ 2008 dma_unmap_len_set(tx_buffer, len, 0); 2009 2010 /* clear last DMA location and unmap remaining buffers */ 2011 while (tx_desc != eop_desc) { 2012 tx_buffer++; 2013 tx_desc++; 2014 i++; 2015 if (unlikely(!i)) { 2016 i -= tx_ring->count; 2017 tx_buffer = tx_ring->tx_buffer_info; 2018 tx_desc = IGC_TX_DESC(tx_ring, 0); 2019 } 2020 2021 /* unmap any remaining paged data */ 2022 if (dma_unmap_len(tx_buffer, len)) { 2023 dma_unmap_page(tx_ring->dev, 2024 dma_unmap_addr(tx_buffer, dma), 2025 dma_unmap_len(tx_buffer, len), 2026 DMA_TO_DEVICE); 2027 dma_unmap_len_set(tx_buffer, len, 0); 2028 } 2029 } 2030 2031 /* move us one more past the eop_desc for start of next pkt */ 2032 tx_buffer++; 2033 tx_desc++; 2034 i++; 2035 if (unlikely(!i)) { 2036 i -= tx_ring->count; 2037 tx_buffer = tx_ring->tx_buffer_info; 2038 tx_desc = IGC_TX_DESC(tx_ring, 0); 2039 } 2040 2041 /* issue prefetch for next Tx descriptor */ 2042 prefetch(tx_desc); 2043 2044 /* update budget accounting */ 2045 budget--; 2046 } while (likely(budget)); 2047 2048 netdev_tx_completed_queue(txring_txq(tx_ring), 2049 total_packets, total_bytes); 2050 2051 i += tx_ring->count; 2052 tx_ring->next_to_clean = i; 2053 u64_stats_update_begin(&tx_ring->tx_syncp); 2054 tx_ring->tx_stats.bytes += total_bytes; 2055 tx_ring->tx_stats.packets += total_packets; 2056 u64_stats_update_end(&tx_ring->tx_syncp); 2057 q_vector->tx.total_bytes += total_bytes; 2058 q_vector->tx.total_packets += total_packets; 2059 2060 if (test_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) { 2061 struct igc_hw *hw = &adapter->hw; 2062 2063 /* Detect a transmit hang in hardware, this serializes the 2064 * check with the clearing of time_stamp and movement of i 2065 */ 2066 clear_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags); 2067 if (tx_buffer->next_to_watch && 2068 time_after(jiffies, tx_buffer->time_stamp + 2069 (adapter->tx_timeout_factor * HZ)) && 2070 !(rd32(IGC_STATUS) & IGC_STATUS_TXOFF)) { 2071 /* detected Tx unit hang */ 2072 dev_err(tx_ring->dev, 2073 "Detected Tx Unit Hang\n" 2074 " Tx Queue <%d>\n" 2075 " TDH <%x>\n" 2076 " TDT <%x>\n" 2077 " next_to_use <%x>\n" 2078 " next_to_clean <%x>\n" 2079 "buffer_info[next_to_clean]\n" 2080 " time_stamp <%lx>\n" 2081 " next_to_watch <%p>\n" 2082 " jiffies <%lx>\n" 2083 " desc.status <%x>\n", 2084 tx_ring->queue_index, 2085 rd32(IGC_TDH(tx_ring->reg_idx)), 2086 readl(tx_ring->tail), 2087 tx_ring->next_to_use, 2088 tx_ring->next_to_clean, 2089 tx_buffer->time_stamp, 2090 tx_buffer->next_to_watch, 2091 jiffies, 2092 tx_buffer->next_to_watch->wb.status); 2093 netif_stop_subqueue(tx_ring->netdev, 2094 tx_ring->queue_index); 2095 2096 /* we are about to reset, no point in enabling stuff */ 2097 return true; 2098 } 2099 } 2100 2101 #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) 2102 if (unlikely(total_packets && 2103 netif_carrier_ok(tx_ring->netdev) && 2104 igc_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD)) { 2105 /* Make sure that anybody stopping the queue after this 2106 * sees the new next_to_clean. 2107 */ 2108 smp_mb(); 2109 if (__netif_subqueue_stopped(tx_ring->netdev, 2110 tx_ring->queue_index) && 2111 !(test_bit(__IGC_DOWN, &adapter->state))) { 2112 netif_wake_subqueue(tx_ring->netdev, 2113 tx_ring->queue_index); 2114 2115 u64_stats_update_begin(&tx_ring->tx_syncp); 2116 tx_ring->tx_stats.restart_queue++; 2117 u64_stats_update_end(&tx_ring->tx_syncp); 2118 } 2119 } 2120 2121 return !!budget; 2122 } 2123 2124 static void igc_nfc_filter_restore(struct igc_adapter *adapter) 2125 { 2126 struct igc_nfc_filter *rule; 2127 2128 spin_lock(&adapter->nfc_lock); 2129 2130 hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node) 2131 igc_add_filter(adapter, rule); 2132 2133 spin_unlock(&adapter->nfc_lock); 2134 } 2135 2136 /* If the filter to be added and an already existing filter express 2137 * the same address and address type, it should be possible to only 2138 * override the other configurations, for example the queue to steer 2139 * traffic. 2140 */ 2141 static bool igc_mac_entry_can_be_used(const struct igc_mac_addr *entry, 2142 const u8 *addr, const u8 flags) 2143 { 2144 if (!(entry->state & IGC_MAC_STATE_IN_USE)) 2145 return true; 2146 2147 if ((entry->state & IGC_MAC_STATE_SRC_ADDR) != 2148 (flags & IGC_MAC_STATE_SRC_ADDR)) 2149 return false; 2150 2151 if (!ether_addr_equal(addr, entry->addr)) 2152 return false; 2153 2154 return true; 2155 } 2156 2157 /* Add a MAC filter for 'addr' directing matching traffic to 'queue', 2158 * 'flags' is used to indicate what kind of match is made, match is by 2159 * default for the destination address, if matching by source address 2160 * is desired the flag IGC_MAC_STATE_SRC_ADDR can be used. 2161 */ 2162 static int igc_add_mac_filter(struct igc_adapter *adapter, 2163 const u8 *addr, const u8 queue) 2164 { 2165 struct igc_hw *hw = &adapter->hw; 2166 int rar_entries = hw->mac.rar_entry_count; 2167 int i; 2168 2169 if (is_zero_ether_addr(addr)) 2170 return -EINVAL; 2171 2172 /* Search for the first empty entry in the MAC table. 2173 * Do not touch entries at the end of the table reserved for the VF MAC 2174 * addresses. 2175 */ 2176 for (i = 0; i < rar_entries; i++) { 2177 if (!igc_mac_entry_can_be_used(&adapter->mac_table[i], 2178 addr, 0)) 2179 continue; 2180 2181 ether_addr_copy(adapter->mac_table[i].addr, addr); 2182 adapter->mac_table[i].queue = queue; 2183 adapter->mac_table[i].state |= IGC_MAC_STATE_IN_USE; 2184 2185 igc_rar_set_index(adapter, i); 2186 return i; 2187 } 2188 2189 return -ENOSPC; 2190 } 2191 2192 /* Remove a MAC filter for 'addr' directing matching traffic to 2193 * 'queue', 'flags' is used to indicate what kind of match need to be 2194 * removed, match is by default for the destination address, if 2195 * matching by source address is to be removed the flag 2196 * IGC_MAC_STATE_SRC_ADDR can be used. 2197 */ 2198 static int igc_del_mac_filter(struct igc_adapter *adapter, 2199 const u8 *addr, const u8 queue) 2200 { 2201 struct igc_hw *hw = &adapter->hw; 2202 int rar_entries = hw->mac.rar_entry_count; 2203 int i; 2204 2205 if (is_zero_ether_addr(addr)) 2206 return -EINVAL; 2207 2208 /* Search for matching entry in the MAC table based on given address 2209 * and queue. Do not touch entries at the end of the table reserved 2210 * for the VF MAC addresses. 2211 */ 2212 for (i = 0; i < rar_entries; i++) { 2213 if (!(adapter->mac_table[i].state & IGC_MAC_STATE_IN_USE)) 2214 continue; 2215 if (adapter->mac_table[i].state != 0) 2216 continue; 2217 if (adapter->mac_table[i].queue != queue) 2218 continue; 2219 if (!ether_addr_equal(adapter->mac_table[i].addr, addr)) 2220 continue; 2221 2222 /* When a filter for the default address is "deleted", 2223 * we return it to its initial configuration 2224 */ 2225 if (adapter->mac_table[i].state & IGC_MAC_STATE_DEFAULT) { 2226 adapter->mac_table[i].state = 2227 IGC_MAC_STATE_DEFAULT | IGC_MAC_STATE_IN_USE; 2228 adapter->mac_table[i].queue = 0; 2229 } else { 2230 adapter->mac_table[i].state = 0; 2231 adapter->mac_table[i].queue = 0; 2232 memset(adapter->mac_table[i].addr, 0, ETH_ALEN); 2233 } 2234 2235 igc_rar_set_index(adapter, i); 2236 return 0; 2237 } 2238 2239 return -ENOENT; 2240 } 2241 2242 static int igc_uc_sync(struct net_device *netdev, const unsigned char *addr) 2243 { 2244 struct igc_adapter *adapter = netdev_priv(netdev); 2245 int ret; 2246 2247 ret = igc_add_mac_filter(adapter, addr, adapter->num_rx_queues); 2248 2249 return min_t(int, ret, 0); 2250 } 2251 2252 static int igc_uc_unsync(struct net_device *netdev, const unsigned char *addr) 2253 { 2254 struct igc_adapter *adapter = netdev_priv(netdev); 2255 2256 igc_del_mac_filter(adapter, addr, adapter->num_rx_queues); 2257 2258 return 0; 2259 } 2260 2261 /** 2262 * igc_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set 2263 * @netdev: network interface device structure 2264 * 2265 * The set_rx_mode entry point is called whenever the unicast or multicast 2266 * address lists or the network interface flags are updated. This routine is 2267 * responsible for configuring the hardware for proper unicast, multicast, 2268 * promiscuous mode, and all-multi behavior. 2269 */ 2270 static void igc_set_rx_mode(struct net_device *netdev) 2271 { 2272 struct igc_adapter *adapter = netdev_priv(netdev); 2273 struct igc_hw *hw = &adapter->hw; 2274 u32 rctl = 0, rlpml = MAX_JUMBO_FRAME_SIZE; 2275 int count; 2276 2277 /* Check for Promiscuous and All Multicast modes */ 2278 if (netdev->flags & IFF_PROMISC) { 2279 rctl |= IGC_RCTL_UPE | IGC_RCTL_MPE; 2280 } else { 2281 if (netdev->flags & IFF_ALLMULTI) { 2282 rctl |= IGC_RCTL_MPE; 2283 } else { 2284 /* Write addresses to the MTA, if the attempt fails 2285 * then we should just turn on promiscuous mode so 2286 * that we can at least receive multicast traffic 2287 */ 2288 count = igc_write_mc_addr_list(netdev); 2289 if (count < 0) 2290 rctl |= IGC_RCTL_MPE; 2291 } 2292 } 2293 2294 /* Write addresses to available RAR registers, if there is not 2295 * sufficient space to store all the addresses then enable 2296 * unicast promiscuous mode 2297 */ 2298 if (__dev_uc_sync(netdev, igc_uc_sync, igc_uc_unsync)) 2299 rctl |= IGC_RCTL_UPE; 2300 2301 /* update state of unicast and multicast */ 2302 rctl |= rd32(IGC_RCTL) & ~(IGC_RCTL_UPE | IGC_RCTL_MPE); 2303 wr32(IGC_RCTL, rctl); 2304 2305 #if (PAGE_SIZE < 8192) 2306 if (adapter->max_frame_size <= IGC_MAX_FRAME_BUILD_SKB) 2307 rlpml = IGC_MAX_FRAME_BUILD_SKB; 2308 #endif 2309 wr32(IGC_RLPML, rlpml); 2310 } 2311 2312 /** 2313 * igc_configure - configure the hardware for RX and TX 2314 * @adapter: private board structure 2315 */ 2316 static void igc_configure(struct igc_adapter *adapter) 2317 { 2318 struct net_device *netdev = adapter->netdev; 2319 int i = 0; 2320 2321 igc_get_hw_control(adapter); 2322 igc_set_rx_mode(netdev); 2323 2324 igc_setup_tctl(adapter); 2325 igc_setup_mrqc(adapter); 2326 igc_setup_rctl(adapter); 2327 2328 igc_nfc_filter_restore(adapter); 2329 igc_configure_tx(adapter); 2330 igc_configure_rx(adapter); 2331 2332 igc_rx_fifo_flush_base(&adapter->hw); 2333 2334 /* call igc_desc_unused which always leaves 2335 * at least 1 descriptor unused to make sure 2336 * next_to_use != next_to_clean 2337 */ 2338 for (i = 0; i < adapter->num_rx_queues; i++) { 2339 struct igc_ring *ring = adapter->rx_ring[i]; 2340 2341 igc_alloc_rx_buffers(ring, igc_desc_unused(ring)); 2342 } 2343 } 2344 2345 /** 2346 * igc_write_ivar - configure ivar for given MSI-X vector 2347 * @hw: pointer to the HW structure 2348 * @msix_vector: vector number we are allocating to a given ring 2349 * @index: row index of IVAR register to write within IVAR table 2350 * @offset: column offset of in IVAR, should be multiple of 8 2351 * 2352 * The IVAR table consists of 2 columns, 2353 * each containing an cause allocation for an Rx and Tx ring, and a 2354 * variable number of rows depending on the number of queues supported. 2355 */ 2356 static void igc_write_ivar(struct igc_hw *hw, int msix_vector, 2357 int index, int offset) 2358 { 2359 u32 ivar = array_rd32(IGC_IVAR0, index); 2360 2361 /* clear any bits that are currently set */ 2362 ivar &= ~((u32)0xFF << offset); 2363 2364 /* write vector and valid bit */ 2365 ivar |= (msix_vector | IGC_IVAR_VALID) << offset; 2366 2367 array_wr32(IGC_IVAR0, index, ivar); 2368 } 2369 2370 static void igc_assign_vector(struct igc_q_vector *q_vector, int msix_vector) 2371 { 2372 struct igc_adapter *adapter = q_vector->adapter; 2373 struct igc_hw *hw = &adapter->hw; 2374 int rx_queue = IGC_N0_QUEUE; 2375 int tx_queue = IGC_N0_QUEUE; 2376 2377 if (q_vector->rx.ring) 2378 rx_queue = q_vector->rx.ring->reg_idx; 2379 if (q_vector->tx.ring) 2380 tx_queue = q_vector->tx.ring->reg_idx; 2381 2382 switch (hw->mac.type) { 2383 case igc_i225: 2384 if (rx_queue > IGC_N0_QUEUE) 2385 igc_write_ivar(hw, msix_vector, 2386 rx_queue >> 1, 2387 (rx_queue & 0x1) << 4); 2388 if (tx_queue > IGC_N0_QUEUE) 2389 igc_write_ivar(hw, msix_vector, 2390 tx_queue >> 1, 2391 ((tx_queue & 0x1) << 4) + 8); 2392 q_vector->eims_value = BIT(msix_vector); 2393 break; 2394 default: 2395 WARN_ONCE(hw->mac.type != igc_i225, "Wrong MAC type\n"); 2396 break; 2397 } 2398 2399 /* add q_vector eims value to global eims_enable_mask */ 2400 adapter->eims_enable_mask |= q_vector->eims_value; 2401 2402 /* configure q_vector to set itr on first interrupt */ 2403 q_vector->set_itr = 1; 2404 } 2405 2406 /** 2407 * igc_configure_msix - Configure MSI-X hardware 2408 * @adapter: Pointer to adapter structure 2409 * 2410 * igc_configure_msix sets up the hardware to properly 2411 * generate MSI-X interrupts. 2412 */ 2413 static void igc_configure_msix(struct igc_adapter *adapter) 2414 { 2415 struct igc_hw *hw = &adapter->hw; 2416 int i, vector = 0; 2417 u32 tmp; 2418 2419 adapter->eims_enable_mask = 0; 2420 2421 /* set vector for other causes, i.e. link changes */ 2422 switch (hw->mac.type) { 2423 case igc_i225: 2424 /* Turn on MSI-X capability first, or our settings 2425 * won't stick. And it will take days to debug. 2426 */ 2427 wr32(IGC_GPIE, IGC_GPIE_MSIX_MODE | 2428 IGC_GPIE_PBA | IGC_GPIE_EIAME | 2429 IGC_GPIE_NSICR); 2430 2431 /* enable msix_other interrupt */ 2432 adapter->eims_other = BIT(vector); 2433 tmp = (vector++ | IGC_IVAR_VALID) << 8; 2434 2435 wr32(IGC_IVAR_MISC, tmp); 2436 break; 2437 default: 2438 /* do nothing, since nothing else supports MSI-X */ 2439 break; 2440 } /* switch (hw->mac.type) */ 2441 2442 adapter->eims_enable_mask |= adapter->eims_other; 2443 2444 for (i = 0; i < adapter->num_q_vectors; i++) 2445 igc_assign_vector(adapter->q_vector[i], vector++); 2446 2447 wrfl(); 2448 } 2449 2450 /** 2451 * igc_irq_enable - Enable default interrupt generation settings 2452 * @adapter: board private structure 2453 */ 2454 static void igc_irq_enable(struct igc_adapter *adapter) 2455 { 2456 struct igc_hw *hw = &adapter->hw; 2457 2458 if (adapter->msix_entries) { 2459 u32 ims = IGC_IMS_LSC | IGC_IMS_DOUTSYNC | IGC_IMS_DRSTA; 2460 u32 regval = rd32(IGC_EIAC); 2461 2462 wr32(IGC_EIAC, regval | adapter->eims_enable_mask); 2463 regval = rd32(IGC_EIAM); 2464 wr32(IGC_EIAM, regval | adapter->eims_enable_mask); 2465 wr32(IGC_EIMS, adapter->eims_enable_mask); 2466 wr32(IGC_IMS, ims); 2467 } else { 2468 wr32(IGC_IMS, IMS_ENABLE_MASK | IGC_IMS_DRSTA); 2469 wr32(IGC_IAM, IMS_ENABLE_MASK | IGC_IMS_DRSTA); 2470 } 2471 } 2472 2473 /** 2474 * igc_irq_disable - Mask off interrupt generation on the NIC 2475 * @adapter: board private structure 2476 */ 2477 static void igc_irq_disable(struct igc_adapter *adapter) 2478 { 2479 struct igc_hw *hw = &adapter->hw; 2480 2481 if (adapter->msix_entries) { 2482 u32 regval = rd32(IGC_EIAM); 2483 2484 wr32(IGC_EIAM, regval & ~adapter->eims_enable_mask); 2485 wr32(IGC_EIMC, adapter->eims_enable_mask); 2486 regval = rd32(IGC_EIAC); 2487 wr32(IGC_EIAC, regval & ~adapter->eims_enable_mask); 2488 } 2489 2490 wr32(IGC_IAM, 0); 2491 wr32(IGC_IMC, ~0); 2492 wrfl(); 2493 2494 if (adapter->msix_entries) { 2495 int vector = 0, i; 2496 2497 synchronize_irq(adapter->msix_entries[vector++].vector); 2498 2499 for (i = 0; i < adapter->num_q_vectors; i++) 2500 synchronize_irq(adapter->msix_entries[vector++].vector); 2501 } else { 2502 synchronize_irq(adapter->pdev->irq); 2503 } 2504 } 2505 2506 void igc_set_flag_queue_pairs(struct igc_adapter *adapter, 2507 const u32 max_rss_queues) 2508 { 2509 /* Determine if we need to pair queues. */ 2510 /* If rss_queues > half of max_rss_queues, pair the queues in 2511 * order to conserve interrupts due to limited supply. 2512 */ 2513 if (adapter->rss_queues > (max_rss_queues / 2)) 2514 adapter->flags |= IGC_FLAG_QUEUE_PAIRS; 2515 else 2516 adapter->flags &= ~IGC_FLAG_QUEUE_PAIRS; 2517 } 2518 2519 unsigned int igc_get_max_rss_queues(struct igc_adapter *adapter) 2520 { 2521 unsigned int max_rss_queues; 2522 2523 /* Determine the maximum number of RSS queues supported. */ 2524 max_rss_queues = IGC_MAX_RX_QUEUES; 2525 2526 return max_rss_queues; 2527 } 2528 2529 static void igc_init_queue_configuration(struct igc_adapter *adapter) 2530 { 2531 u32 max_rss_queues; 2532 2533 max_rss_queues = igc_get_max_rss_queues(adapter); 2534 adapter->rss_queues = min_t(u32, max_rss_queues, num_online_cpus()); 2535 2536 igc_set_flag_queue_pairs(adapter, max_rss_queues); 2537 } 2538 2539 /** 2540 * igc_reset_q_vector - Reset config for interrupt vector 2541 * @adapter: board private structure to initialize 2542 * @v_idx: Index of vector to be reset 2543 * 2544 * If NAPI is enabled it will delete any references to the 2545 * NAPI struct. This is preparation for igc_free_q_vector. 2546 */ 2547 static void igc_reset_q_vector(struct igc_adapter *adapter, int v_idx) 2548 { 2549 struct igc_q_vector *q_vector = adapter->q_vector[v_idx]; 2550 2551 /* if we're coming from igc_set_interrupt_capability, the vectors are 2552 * not yet allocated 2553 */ 2554 if (!q_vector) 2555 return; 2556 2557 if (q_vector->tx.ring) 2558 adapter->tx_ring[q_vector->tx.ring->queue_index] = NULL; 2559 2560 if (q_vector->rx.ring) 2561 adapter->rx_ring[q_vector->rx.ring->queue_index] = NULL; 2562 2563 netif_napi_del(&q_vector->napi); 2564 } 2565 2566 /** 2567 * igc_free_q_vector - Free memory allocated for specific interrupt vector 2568 * @adapter: board private structure to initialize 2569 * @v_idx: Index of vector to be freed 2570 * 2571 * This function frees the memory allocated to the q_vector. 2572 */ 2573 static void igc_free_q_vector(struct igc_adapter *adapter, int v_idx) 2574 { 2575 struct igc_q_vector *q_vector = adapter->q_vector[v_idx]; 2576 2577 adapter->q_vector[v_idx] = NULL; 2578 2579 /* igc_get_stats64() might access the rings on this vector, 2580 * we must wait a grace period before freeing it. 2581 */ 2582 if (q_vector) 2583 kfree_rcu(q_vector, rcu); 2584 } 2585 2586 /** 2587 * igc_free_q_vectors - Free memory allocated for interrupt vectors 2588 * @adapter: board private structure to initialize 2589 * 2590 * This function frees the memory allocated to the q_vectors. In addition if 2591 * NAPI is enabled it will delete any references to the NAPI struct prior 2592 * to freeing the q_vector. 2593 */ 2594 static void igc_free_q_vectors(struct igc_adapter *adapter) 2595 { 2596 int v_idx = adapter->num_q_vectors; 2597 2598 adapter->num_tx_queues = 0; 2599 adapter->num_rx_queues = 0; 2600 adapter->num_q_vectors = 0; 2601 2602 while (v_idx--) { 2603 igc_reset_q_vector(adapter, v_idx); 2604 igc_free_q_vector(adapter, v_idx); 2605 } 2606 } 2607 2608 /** 2609 * igc_update_itr - update the dynamic ITR value based on statistics 2610 * @q_vector: pointer to q_vector 2611 * @ring_container: ring info to update the itr for 2612 * 2613 * Stores a new ITR value based on packets and byte 2614 * counts during the last interrupt. The advantage of per interrupt 2615 * computation is faster updates and more accurate ITR for the current 2616 * traffic pattern. Constants in this function were computed 2617 * based on theoretical maximum wire speed and thresholds were set based 2618 * on testing data as well as attempting to minimize response time 2619 * while increasing bulk throughput. 2620 * NOTE: These calculations are only valid when operating in a single- 2621 * queue environment. 2622 */ 2623 static void igc_update_itr(struct igc_q_vector *q_vector, 2624 struct igc_ring_container *ring_container) 2625 { 2626 unsigned int packets = ring_container->total_packets; 2627 unsigned int bytes = ring_container->total_bytes; 2628 u8 itrval = ring_container->itr; 2629 2630 /* no packets, exit with status unchanged */ 2631 if (packets == 0) 2632 return; 2633 2634 switch (itrval) { 2635 case lowest_latency: 2636 /* handle TSO and jumbo frames */ 2637 if (bytes / packets > 8000) 2638 itrval = bulk_latency; 2639 else if ((packets < 5) && (bytes > 512)) 2640 itrval = low_latency; 2641 break; 2642 case low_latency: /* 50 usec aka 20000 ints/s */ 2643 if (bytes > 10000) { 2644 /* this if handles the TSO accounting */ 2645 if (bytes / packets > 8000) 2646 itrval = bulk_latency; 2647 else if ((packets < 10) || ((bytes / packets) > 1200)) 2648 itrval = bulk_latency; 2649 else if ((packets > 35)) 2650 itrval = lowest_latency; 2651 } else if (bytes / packets > 2000) { 2652 itrval = bulk_latency; 2653 } else if (packets <= 2 && bytes < 512) { 2654 itrval = lowest_latency; 2655 } 2656 break; 2657 case bulk_latency: /* 250 usec aka 4000 ints/s */ 2658 if (bytes > 25000) { 2659 if (packets > 35) 2660 itrval = low_latency; 2661 } else if (bytes < 1500) { 2662 itrval = low_latency; 2663 } 2664 break; 2665 } 2666 2667 /* clear work counters since we have the values we need */ 2668 ring_container->total_bytes = 0; 2669 ring_container->total_packets = 0; 2670 2671 /* write updated itr to ring container */ 2672 ring_container->itr = itrval; 2673 } 2674 2675 static void igc_set_itr(struct igc_q_vector *q_vector) 2676 { 2677 struct igc_adapter *adapter = q_vector->adapter; 2678 u32 new_itr = q_vector->itr_val; 2679 u8 current_itr = 0; 2680 2681 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */ 2682 switch (adapter->link_speed) { 2683 case SPEED_10: 2684 case SPEED_100: 2685 current_itr = 0; 2686 new_itr = IGC_4K_ITR; 2687 goto set_itr_now; 2688 default: 2689 break; 2690 } 2691 2692 igc_update_itr(q_vector, &q_vector->tx); 2693 igc_update_itr(q_vector, &q_vector->rx); 2694 2695 current_itr = max(q_vector->rx.itr, q_vector->tx.itr); 2696 2697 /* conservative mode (itr 3) eliminates the lowest_latency setting */ 2698 if (current_itr == lowest_latency && 2699 ((q_vector->rx.ring && adapter->rx_itr_setting == 3) || 2700 (!q_vector->rx.ring && adapter->tx_itr_setting == 3))) 2701 current_itr = low_latency; 2702 2703 switch (current_itr) { 2704 /* counts and packets in update_itr are dependent on these numbers */ 2705 case lowest_latency: 2706 new_itr = IGC_70K_ITR; /* 70,000 ints/sec */ 2707 break; 2708 case low_latency: 2709 new_itr = IGC_20K_ITR; /* 20,000 ints/sec */ 2710 break; 2711 case bulk_latency: 2712 new_itr = IGC_4K_ITR; /* 4,000 ints/sec */ 2713 break; 2714 default: 2715 break; 2716 } 2717 2718 set_itr_now: 2719 if (new_itr != q_vector->itr_val) { 2720 /* this attempts to bias the interrupt rate towards Bulk 2721 * by adding intermediate steps when interrupt rate is 2722 * increasing 2723 */ 2724 new_itr = new_itr > q_vector->itr_val ? 2725 max((new_itr * q_vector->itr_val) / 2726 (new_itr + (q_vector->itr_val >> 2)), 2727 new_itr) : new_itr; 2728 /* Don't write the value here; it resets the adapter's 2729 * internal timer, and causes us to delay far longer than 2730 * we should between interrupts. Instead, we write the ITR 2731 * value at the beginning of the next interrupt so the timing 2732 * ends up being correct. 2733 */ 2734 q_vector->itr_val = new_itr; 2735 q_vector->set_itr = 1; 2736 } 2737 } 2738 2739 static void igc_reset_interrupt_capability(struct igc_adapter *adapter) 2740 { 2741 int v_idx = adapter->num_q_vectors; 2742 2743 if (adapter->msix_entries) { 2744 pci_disable_msix(adapter->pdev); 2745 kfree(adapter->msix_entries); 2746 adapter->msix_entries = NULL; 2747 } else if (adapter->flags & IGC_FLAG_HAS_MSI) { 2748 pci_disable_msi(adapter->pdev); 2749 } 2750 2751 while (v_idx--) 2752 igc_reset_q_vector(adapter, v_idx); 2753 } 2754 2755 /** 2756 * igc_set_interrupt_capability - set MSI or MSI-X if supported 2757 * @adapter: Pointer to adapter structure 2758 * @msix: boolean value for MSI-X capability 2759 * 2760 * Attempt to configure interrupts using the best available 2761 * capabilities of the hardware and kernel. 2762 */ 2763 static void igc_set_interrupt_capability(struct igc_adapter *adapter, 2764 bool msix) 2765 { 2766 int numvecs, i; 2767 int err; 2768 2769 if (!msix) 2770 goto msi_only; 2771 adapter->flags |= IGC_FLAG_HAS_MSIX; 2772 2773 /* Number of supported queues. */ 2774 adapter->num_rx_queues = adapter->rss_queues; 2775 2776 adapter->num_tx_queues = adapter->rss_queues; 2777 2778 /* start with one vector for every Rx queue */ 2779 numvecs = adapter->num_rx_queues; 2780 2781 /* if Tx handler is separate add 1 for every Tx queue */ 2782 if (!(adapter->flags & IGC_FLAG_QUEUE_PAIRS)) 2783 numvecs += adapter->num_tx_queues; 2784 2785 /* store the number of vectors reserved for queues */ 2786 adapter->num_q_vectors = numvecs; 2787 2788 /* add 1 vector for link status interrupts */ 2789 numvecs++; 2790 2791 adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry), 2792 GFP_KERNEL); 2793 2794 if (!adapter->msix_entries) 2795 return; 2796 2797 /* populate entry values */ 2798 for (i = 0; i < numvecs; i++) 2799 adapter->msix_entries[i].entry = i; 2800 2801 err = pci_enable_msix_range(adapter->pdev, 2802 adapter->msix_entries, 2803 numvecs, 2804 numvecs); 2805 if (err > 0) 2806 return; 2807 2808 kfree(adapter->msix_entries); 2809 adapter->msix_entries = NULL; 2810 2811 igc_reset_interrupt_capability(adapter); 2812 2813 msi_only: 2814 adapter->flags &= ~IGC_FLAG_HAS_MSIX; 2815 2816 adapter->rss_queues = 1; 2817 adapter->flags |= IGC_FLAG_QUEUE_PAIRS; 2818 adapter->num_rx_queues = 1; 2819 adapter->num_tx_queues = 1; 2820 adapter->num_q_vectors = 1; 2821 if (!pci_enable_msi(adapter->pdev)) 2822 adapter->flags |= IGC_FLAG_HAS_MSI; 2823 } 2824 2825 /** 2826 * igc_update_ring_itr - update the dynamic ITR value based on packet size 2827 * @q_vector: pointer to q_vector 2828 * 2829 * Stores a new ITR value based on strictly on packet size. This 2830 * algorithm is less sophisticated than that used in igc_update_itr, 2831 * due to the difficulty of synchronizing statistics across multiple 2832 * receive rings. The divisors and thresholds used by this function 2833 * were determined based on theoretical maximum wire speed and testing 2834 * data, in order to minimize response time while increasing bulk 2835 * throughput. 2836 * NOTE: This function is called only when operating in a multiqueue 2837 * receive environment. 2838 */ 2839 static void igc_update_ring_itr(struct igc_q_vector *q_vector) 2840 { 2841 struct igc_adapter *adapter = q_vector->adapter; 2842 int new_val = q_vector->itr_val; 2843 int avg_wire_size = 0; 2844 unsigned int packets; 2845 2846 /* For non-gigabit speeds, just fix the interrupt rate at 4000 2847 * ints/sec - ITR timer value of 120 ticks. 2848 */ 2849 switch (adapter->link_speed) { 2850 case SPEED_10: 2851 case SPEED_100: 2852 new_val = IGC_4K_ITR; 2853 goto set_itr_val; 2854 default: 2855 break; 2856 } 2857 2858 packets = q_vector->rx.total_packets; 2859 if (packets) 2860 avg_wire_size = q_vector->rx.total_bytes / packets; 2861 2862 packets = q_vector->tx.total_packets; 2863 if (packets) 2864 avg_wire_size = max_t(u32, avg_wire_size, 2865 q_vector->tx.total_bytes / packets); 2866 2867 /* if avg_wire_size isn't set no work was done */ 2868 if (!avg_wire_size) 2869 goto clear_counts; 2870 2871 /* Add 24 bytes to size to account for CRC, preamble, and gap */ 2872 avg_wire_size += 24; 2873 2874 /* Don't starve jumbo frames */ 2875 avg_wire_size = min(avg_wire_size, 3000); 2876 2877 /* Give a little boost to mid-size frames */ 2878 if (avg_wire_size > 300 && avg_wire_size < 1200) 2879 new_val = avg_wire_size / 3; 2880 else 2881 new_val = avg_wire_size / 2; 2882 2883 /* conservative mode (itr 3) eliminates the lowest_latency setting */ 2884 if (new_val < IGC_20K_ITR && 2885 ((q_vector->rx.ring && adapter->rx_itr_setting == 3) || 2886 (!q_vector->rx.ring && adapter->tx_itr_setting == 3))) 2887 new_val = IGC_20K_ITR; 2888 2889 set_itr_val: 2890 if (new_val != q_vector->itr_val) { 2891 q_vector->itr_val = new_val; 2892 q_vector->set_itr = 1; 2893 } 2894 clear_counts: 2895 q_vector->rx.total_bytes = 0; 2896 q_vector->rx.total_packets = 0; 2897 q_vector->tx.total_bytes = 0; 2898 q_vector->tx.total_packets = 0; 2899 } 2900 2901 static void igc_ring_irq_enable(struct igc_q_vector *q_vector) 2902 { 2903 struct igc_adapter *adapter = q_vector->adapter; 2904 struct igc_hw *hw = &adapter->hw; 2905 2906 if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) || 2907 (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) { 2908 if (adapter->num_q_vectors == 1) 2909 igc_set_itr(q_vector); 2910 else 2911 igc_update_ring_itr(q_vector); 2912 } 2913 2914 if (!test_bit(__IGC_DOWN, &adapter->state)) { 2915 if (adapter->msix_entries) 2916 wr32(IGC_EIMS, q_vector->eims_value); 2917 else 2918 igc_irq_enable(adapter); 2919 } 2920 } 2921 2922 static void igc_add_ring(struct igc_ring *ring, 2923 struct igc_ring_container *head) 2924 { 2925 head->ring = ring; 2926 head->count++; 2927 } 2928 2929 /** 2930 * igc_cache_ring_register - Descriptor ring to register mapping 2931 * @adapter: board private structure to initialize 2932 * 2933 * Once we know the feature-set enabled for the device, we'll cache 2934 * the register offset the descriptor ring is assigned to. 2935 */ 2936 static void igc_cache_ring_register(struct igc_adapter *adapter) 2937 { 2938 int i = 0, j = 0; 2939 2940 switch (adapter->hw.mac.type) { 2941 case igc_i225: 2942 /* Fall through */ 2943 default: 2944 for (; i < adapter->num_rx_queues; i++) 2945 adapter->rx_ring[i]->reg_idx = i; 2946 for (; j < adapter->num_tx_queues; j++) 2947 adapter->tx_ring[j]->reg_idx = j; 2948 break; 2949 } 2950 } 2951 2952 /** 2953 * igc_poll - NAPI Rx polling callback 2954 * @napi: napi polling structure 2955 * @budget: count of how many packets we should handle 2956 */ 2957 static int igc_poll(struct napi_struct *napi, int budget) 2958 { 2959 struct igc_q_vector *q_vector = container_of(napi, 2960 struct igc_q_vector, 2961 napi); 2962 bool clean_complete = true; 2963 int work_done = 0; 2964 2965 if (q_vector->tx.ring) 2966 clean_complete = igc_clean_tx_irq(q_vector, budget); 2967 2968 if (q_vector->rx.ring) { 2969 int cleaned = igc_clean_rx_irq(q_vector, budget); 2970 2971 work_done += cleaned; 2972 if (cleaned >= budget) 2973 clean_complete = false; 2974 } 2975 2976 /* If all work not completed, return budget and keep polling */ 2977 if (!clean_complete) 2978 return budget; 2979 2980 /* Exit the polling mode, but don't re-enable interrupts if stack might 2981 * poll us due to busy-polling 2982 */ 2983 if (likely(napi_complete_done(napi, work_done))) 2984 igc_ring_irq_enable(q_vector); 2985 2986 return min(work_done, budget - 1); 2987 } 2988 2989 /** 2990 * igc_alloc_q_vector - Allocate memory for a single interrupt vector 2991 * @adapter: board private structure to initialize 2992 * @v_count: q_vectors allocated on adapter, used for ring interleaving 2993 * @v_idx: index of vector in adapter struct 2994 * @txr_count: total number of Tx rings to allocate 2995 * @txr_idx: index of first Tx ring to allocate 2996 * @rxr_count: total number of Rx rings to allocate 2997 * @rxr_idx: index of first Rx ring to allocate 2998 * 2999 * We allocate one q_vector. If allocation fails we return -ENOMEM. 3000 */ 3001 static int igc_alloc_q_vector(struct igc_adapter *adapter, 3002 unsigned int v_count, unsigned int v_idx, 3003 unsigned int txr_count, unsigned int txr_idx, 3004 unsigned int rxr_count, unsigned int rxr_idx) 3005 { 3006 struct igc_q_vector *q_vector; 3007 struct igc_ring *ring; 3008 int ring_count; 3009 3010 /* igc only supports 1 Tx and/or 1 Rx queue per vector */ 3011 if (txr_count > 1 || rxr_count > 1) 3012 return -ENOMEM; 3013 3014 ring_count = txr_count + rxr_count; 3015 3016 /* allocate q_vector and rings */ 3017 q_vector = adapter->q_vector[v_idx]; 3018 if (!q_vector) 3019 q_vector = kzalloc(struct_size(q_vector, ring, ring_count), 3020 GFP_KERNEL); 3021 else 3022 memset(q_vector, 0, struct_size(q_vector, ring, ring_count)); 3023 if (!q_vector) 3024 return -ENOMEM; 3025 3026 /* initialize NAPI */ 3027 netif_napi_add(adapter->netdev, &q_vector->napi, 3028 igc_poll, 64); 3029 3030 /* tie q_vector and adapter together */ 3031 adapter->q_vector[v_idx] = q_vector; 3032 q_vector->adapter = adapter; 3033 3034 /* initialize work limits */ 3035 q_vector->tx.work_limit = adapter->tx_work_limit; 3036 3037 /* initialize ITR configuration */ 3038 q_vector->itr_register = adapter->io_addr + IGC_EITR(0); 3039 q_vector->itr_val = IGC_START_ITR; 3040 3041 /* initialize pointer to rings */ 3042 ring = q_vector->ring; 3043 3044 /* initialize ITR */ 3045 if (rxr_count) { 3046 /* rx or rx/tx vector */ 3047 if (!adapter->rx_itr_setting || adapter->rx_itr_setting > 3) 3048 q_vector->itr_val = adapter->rx_itr_setting; 3049 } else { 3050 /* tx only vector */ 3051 if (!adapter->tx_itr_setting || adapter->tx_itr_setting > 3) 3052 q_vector->itr_val = adapter->tx_itr_setting; 3053 } 3054 3055 if (txr_count) { 3056 /* assign generic ring traits */ 3057 ring->dev = &adapter->pdev->dev; 3058 ring->netdev = adapter->netdev; 3059 3060 /* configure backlink on ring */ 3061 ring->q_vector = q_vector; 3062 3063 /* update q_vector Tx values */ 3064 igc_add_ring(ring, &q_vector->tx); 3065 3066 /* apply Tx specific ring traits */ 3067 ring->count = adapter->tx_ring_count; 3068 ring->queue_index = txr_idx; 3069 3070 /* assign ring to adapter */ 3071 adapter->tx_ring[txr_idx] = ring; 3072 3073 /* push pointer to next ring */ 3074 ring++; 3075 } 3076 3077 if (rxr_count) { 3078 /* assign generic ring traits */ 3079 ring->dev = &adapter->pdev->dev; 3080 ring->netdev = adapter->netdev; 3081 3082 /* configure backlink on ring */ 3083 ring->q_vector = q_vector; 3084 3085 /* update q_vector Rx values */ 3086 igc_add_ring(ring, &q_vector->rx); 3087 3088 /* apply Rx specific ring traits */ 3089 ring->count = adapter->rx_ring_count; 3090 ring->queue_index = rxr_idx; 3091 3092 /* assign ring to adapter */ 3093 adapter->rx_ring[rxr_idx] = ring; 3094 } 3095 3096 return 0; 3097 } 3098 3099 /** 3100 * igc_alloc_q_vectors - Allocate memory for interrupt vectors 3101 * @adapter: board private structure to initialize 3102 * 3103 * We allocate one q_vector per queue interrupt. If allocation fails we 3104 * return -ENOMEM. 3105 */ 3106 static int igc_alloc_q_vectors(struct igc_adapter *adapter) 3107 { 3108 int rxr_remaining = adapter->num_rx_queues; 3109 int txr_remaining = adapter->num_tx_queues; 3110 int rxr_idx = 0, txr_idx = 0, v_idx = 0; 3111 int q_vectors = adapter->num_q_vectors; 3112 int err; 3113 3114 if (q_vectors >= (rxr_remaining + txr_remaining)) { 3115 for (; rxr_remaining; v_idx++) { 3116 err = igc_alloc_q_vector(adapter, q_vectors, v_idx, 3117 0, 0, 1, rxr_idx); 3118 3119 if (err) 3120 goto err_out; 3121 3122 /* update counts and index */ 3123 rxr_remaining--; 3124 rxr_idx++; 3125 } 3126 } 3127 3128 for (; v_idx < q_vectors; v_idx++) { 3129 int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx); 3130 int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx); 3131 3132 err = igc_alloc_q_vector(adapter, q_vectors, v_idx, 3133 tqpv, txr_idx, rqpv, rxr_idx); 3134 3135 if (err) 3136 goto err_out; 3137 3138 /* update counts and index */ 3139 rxr_remaining -= rqpv; 3140 txr_remaining -= tqpv; 3141 rxr_idx++; 3142 txr_idx++; 3143 } 3144 3145 return 0; 3146 3147 err_out: 3148 adapter->num_tx_queues = 0; 3149 adapter->num_rx_queues = 0; 3150 adapter->num_q_vectors = 0; 3151 3152 while (v_idx--) 3153 igc_free_q_vector(adapter, v_idx); 3154 3155 return -ENOMEM; 3156 } 3157 3158 /** 3159 * igc_init_interrupt_scheme - initialize interrupts, allocate queues/vectors 3160 * @adapter: Pointer to adapter structure 3161 * @msix: boolean for MSI-X capability 3162 * 3163 * This function initializes the interrupts and allocates all of the queues. 3164 */ 3165 static int igc_init_interrupt_scheme(struct igc_adapter *adapter, bool msix) 3166 { 3167 struct pci_dev *pdev = adapter->pdev; 3168 int err = 0; 3169 3170 igc_set_interrupt_capability(adapter, msix); 3171 3172 err = igc_alloc_q_vectors(adapter); 3173 if (err) { 3174 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n"); 3175 goto err_alloc_q_vectors; 3176 } 3177 3178 igc_cache_ring_register(adapter); 3179 3180 return 0; 3181 3182 err_alloc_q_vectors: 3183 igc_reset_interrupt_capability(adapter); 3184 return err; 3185 } 3186 3187 /** 3188 * igc_sw_init - Initialize general software structures (struct igc_adapter) 3189 * @adapter: board private structure to initialize 3190 * 3191 * igc_sw_init initializes the Adapter private data structure. 3192 * Fields are initialized based on PCI device information and 3193 * OS network device settings (MTU size). 3194 */ 3195 static int igc_sw_init(struct igc_adapter *adapter) 3196 { 3197 struct net_device *netdev = adapter->netdev; 3198 struct pci_dev *pdev = adapter->pdev; 3199 struct igc_hw *hw = &adapter->hw; 3200 3201 int size = sizeof(struct igc_mac_addr) * hw->mac.rar_entry_count; 3202 3203 pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word); 3204 3205 /* set default ring sizes */ 3206 adapter->tx_ring_count = IGC_DEFAULT_TXD; 3207 adapter->rx_ring_count = IGC_DEFAULT_RXD; 3208 3209 /* set default ITR values */ 3210 adapter->rx_itr_setting = IGC_DEFAULT_ITR; 3211 adapter->tx_itr_setting = IGC_DEFAULT_ITR; 3212 3213 /* set default work limits */ 3214 adapter->tx_work_limit = IGC_DEFAULT_TX_WORK; 3215 3216 /* adjust max frame to be at least the size of a standard frame */ 3217 adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN + 3218 VLAN_HLEN; 3219 adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN; 3220 3221 spin_lock_init(&adapter->nfc_lock); 3222 spin_lock_init(&adapter->stats64_lock); 3223 /* Assume MSI-X interrupts, will be checked during IRQ allocation */ 3224 adapter->flags |= IGC_FLAG_HAS_MSIX; 3225 3226 adapter->mac_table = kzalloc(size, GFP_ATOMIC); 3227 if (!adapter->mac_table) 3228 return -ENOMEM; 3229 3230 igc_init_queue_configuration(adapter); 3231 3232 /* This call may decrease the number of queues */ 3233 if (igc_init_interrupt_scheme(adapter, true)) { 3234 dev_err(&pdev->dev, "Unable to allocate memory for queues\n"); 3235 return -ENOMEM; 3236 } 3237 3238 /* Explicitly disable IRQ since the NIC can be in any state. */ 3239 igc_irq_disable(adapter); 3240 3241 set_bit(__IGC_DOWN, &adapter->state); 3242 3243 return 0; 3244 } 3245 3246 /** 3247 * igc_up - Open the interface and prepare it to handle traffic 3248 * @adapter: board private structure 3249 */ 3250 void igc_up(struct igc_adapter *adapter) 3251 { 3252 struct igc_hw *hw = &adapter->hw; 3253 int i = 0; 3254 3255 /* hardware has been reset, we need to reload some things */ 3256 igc_configure(adapter); 3257 3258 clear_bit(__IGC_DOWN, &adapter->state); 3259 3260 for (i = 0; i < adapter->num_q_vectors; i++) 3261 napi_enable(&adapter->q_vector[i]->napi); 3262 3263 if (adapter->msix_entries) 3264 igc_configure_msix(adapter); 3265 else 3266 igc_assign_vector(adapter->q_vector[0], 0); 3267 3268 /* Clear any pending interrupts. */ 3269 rd32(IGC_ICR); 3270 igc_irq_enable(adapter); 3271 3272 netif_tx_start_all_queues(adapter->netdev); 3273 3274 /* start the watchdog. */ 3275 hw->mac.get_link_status = 1; 3276 schedule_work(&adapter->watchdog_task); 3277 } 3278 3279 /** 3280 * igc_update_stats - Update the board statistics counters 3281 * @adapter: board private structure 3282 */ 3283 void igc_update_stats(struct igc_adapter *adapter) 3284 { 3285 struct rtnl_link_stats64 *net_stats = &adapter->stats64; 3286 struct pci_dev *pdev = adapter->pdev; 3287 struct igc_hw *hw = &adapter->hw; 3288 u64 _bytes, _packets; 3289 u64 bytes, packets; 3290 unsigned int start; 3291 u32 mpc; 3292 int i; 3293 3294 /* Prevent stats update while adapter is being reset, or if the pci 3295 * connection is down. 3296 */ 3297 if (adapter->link_speed == 0) 3298 return; 3299 if (pci_channel_offline(pdev)) 3300 return; 3301 3302 packets = 0; 3303 bytes = 0; 3304 3305 rcu_read_lock(); 3306 for (i = 0; i < adapter->num_rx_queues; i++) { 3307 struct igc_ring *ring = adapter->rx_ring[i]; 3308 u32 rqdpc = rd32(IGC_RQDPC(i)); 3309 3310 if (hw->mac.type >= igc_i225) 3311 wr32(IGC_RQDPC(i), 0); 3312 3313 if (rqdpc) { 3314 ring->rx_stats.drops += rqdpc; 3315 net_stats->rx_fifo_errors += rqdpc; 3316 } 3317 3318 do { 3319 start = u64_stats_fetch_begin_irq(&ring->rx_syncp); 3320 _bytes = ring->rx_stats.bytes; 3321 _packets = ring->rx_stats.packets; 3322 } while (u64_stats_fetch_retry_irq(&ring->rx_syncp, start)); 3323 bytes += _bytes; 3324 packets += _packets; 3325 } 3326 3327 net_stats->rx_bytes = bytes; 3328 net_stats->rx_packets = packets; 3329 3330 packets = 0; 3331 bytes = 0; 3332 for (i = 0; i < adapter->num_tx_queues; i++) { 3333 struct igc_ring *ring = adapter->tx_ring[i]; 3334 3335 do { 3336 start = u64_stats_fetch_begin_irq(&ring->tx_syncp); 3337 _bytes = ring->tx_stats.bytes; 3338 _packets = ring->tx_stats.packets; 3339 } while (u64_stats_fetch_retry_irq(&ring->tx_syncp, start)); 3340 bytes += _bytes; 3341 packets += _packets; 3342 } 3343 net_stats->tx_bytes = bytes; 3344 net_stats->tx_packets = packets; 3345 rcu_read_unlock(); 3346 3347 /* read stats registers */ 3348 adapter->stats.crcerrs += rd32(IGC_CRCERRS); 3349 adapter->stats.gprc += rd32(IGC_GPRC); 3350 adapter->stats.gorc += rd32(IGC_GORCL); 3351 rd32(IGC_GORCH); /* clear GORCL */ 3352 adapter->stats.bprc += rd32(IGC_BPRC); 3353 adapter->stats.mprc += rd32(IGC_MPRC); 3354 adapter->stats.roc += rd32(IGC_ROC); 3355 3356 adapter->stats.prc64 += rd32(IGC_PRC64); 3357 adapter->stats.prc127 += rd32(IGC_PRC127); 3358 adapter->stats.prc255 += rd32(IGC_PRC255); 3359 adapter->stats.prc511 += rd32(IGC_PRC511); 3360 adapter->stats.prc1023 += rd32(IGC_PRC1023); 3361 adapter->stats.prc1522 += rd32(IGC_PRC1522); 3362 adapter->stats.symerrs += rd32(IGC_SYMERRS); 3363 adapter->stats.sec += rd32(IGC_SEC); 3364 3365 mpc = rd32(IGC_MPC); 3366 adapter->stats.mpc += mpc; 3367 net_stats->rx_fifo_errors += mpc; 3368 adapter->stats.scc += rd32(IGC_SCC); 3369 adapter->stats.ecol += rd32(IGC_ECOL); 3370 adapter->stats.mcc += rd32(IGC_MCC); 3371 adapter->stats.latecol += rd32(IGC_LATECOL); 3372 adapter->stats.dc += rd32(IGC_DC); 3373 adapter->stats.rlec += rd32(IGC_RLEC); 3374 adapter->stats.xonrxc += rd32(IGC_XONRXC); 3375 adapter->stats.xontxc += rd32(IGC_XONTXC); 3376 adapter->stats.xoffrxc += rd32(IGC_XOFFRXC); 3377 adapter->stats.xofftxc += rd32(IGC_XOFFTXC); 3378 adapter->stats.fcruc += rd32(IGC_FCRUC); 3379 adapter->stats.gptc += rd32(IGC_GPTC); 3380 adapter->stats.gotc += rd32(IGC_GOTCL); 3381 rd32(IGC_GOTCH); /* clear GOTCL */ 3382 adapter->stats.rnbc += rd32(IGC_RNBC); 3383 adapter->stats.ruc += rd32(IGC_RUC); 3384 adapter->stats.rfc += rd32(IGC_RFC); 3385 adapter->stats.rjc += rd32(IGC_RJC); 3386 adapter->stats.tor += rd32(IGC_TORH); 3387 adapter->stats.tot += rd32(IGC_TOTH); 3388 adapter->stats.tpr += rd32(IGC_TPR); 3389 3390 adapter->stats.ptc64 += rd32(IGC_PTC64); 3391 adapter->stats.ptc127 += rd32(IGC_PTC127); 3392 adapter->stats.ptc255 += rd32(IGC_PTC255); 3393 adapter->stats.ptc511 += rd32(IGC_PTC511); 3394 adapter->stats.ptc1023 += rd32(IGC_PTC1023); 3395 adapter->stats.ptc1522 += rd32(IGC_PTC1522); 3396 3397 adapter->stats.mptc += rd32(IGC_MPTC); 3398 adapter->stats.bptc += rd32(IGC_BPTC); 3399 3400 adapter->stats.tpt += rd32(IGC_TPT); 3401 adapter->stats.colc += rd32(IGC_COLC); 3402 3403 adapter->stats.algnerrc += rd32(IGC_ALGNERRC); 3404 3405 adapter->stats.tsctc += rd32(IGC_TSCTC); 3406 adapter->stats.tsctfc += rd32(IGC_TSCTFC); 3407 3408 adapter->stats.iac += rd32(IGC_IAC); 3409 adapter->stats.icrxoc += rd32(IGC_ICRXOC); 3410 adapter->stats.icrxptc += rd32(IGC_ICRXPTC); 3411 adapter->stats.icrxatc += rd32(IGC_ICRXATC); 3412 adapter->stats.ictxptc += rd32(IGC_ICTXPTC); 3413 adapter->stats.ictxatc += rd32(IGC_ICTXATC); 3414 adapter->stats.ictxqec += rd32(IGC_ICTXQEC); 3415 adapter->stats.ictxqmtc += rd32(IGC_ICTXQMTC); 3416 adapter->stats.icrxdmtc += rd32(IGC_ICRXDMTC); 3417 3418 /* Fill out the OS statistics structure */ 3419 net_stats->multicast = adapter->stats.mprc; 3420 net_stats->collisions = adapter->stats.colc; 3421 3422 /* Rx Errors */ 3423 3424 /* RLEC on some newer hardware can be incorrect so build 3425 * our own version based on RUC and ROC 3426 */ 3427 net_stats->rx_errors = adapter->stats.rxerrc + 3428 adapter->stats.crcerrs + adapter->stats.algnerrc + 3429 adapter->stats.ruc + adapter->stats.roc + 3430 adapter->stats.cexterr; 3431 net_stats->rx_length_errors = adapter->stats.ruc + 3432 adapter->stats.roc; 3433 net_stats->rx_crc_errors = adapter->stats.crcerrs; 3434 net_stats->rx_frame_errors = adapter->stats.algnerrc; 3435 net_stats->rx_missed_errors = adapter->stats.mpc; 3436 3437 /* Tx Errors */ 3438 net_stats->tx_errors = adapter->stats.ecol + 3439 adapter->stats.latecol; 3440 net_stats->tx_aborted_errors = adapter->stats.ecol; 3441 net_stats->tx_window_errors = adapter->stats.latecol; 3442 net_stats->tx_carrier_errors = adapter->stats.tncrs; 3443 3444 /* Tx Dropped needs to be maintained elsewhere */ 3445 3446 /* Management Stats */ 3447 adapter->stats.mgptc += rd32(IGC_MGTPTC); 3448 adapter->stats.mgprc += rd32(IGC_MGTPRC); 3449 adapter->stats.mgpdc += rd32(IGC_MGTPDC); 3450 } 3451 3452 static void igc_nfc_filter_exit(struct igc_adapter *adapter) 3453 { 3454 struct igc_nfc_filter *rule; 3455 3456 spin_lock(&adapter->nfc_lock); 3457 3458 hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node) 3459 igc_erase_filter(adapter, rule); 3460 3461 hlist_for_each_entry(rule, &adapter->cls_flower_list, nfc_node) 3462 igc_erase_filter(adapter, rule); 3463 3464 spin_unlock(&adapter->nfc_lock); 3465 } 3466 3467 /** 3468 * igc_down - Close the interface 3469 * @adapter: board private structure 3470 */ 3471 void igc_down(struct igc_adapter *adapter) 3472 { 3473 struct net_device *netdev = adapter->netdev; 3474 struct igc_hw *hw = &adapter->hw; 3475 u32 tctl, rctl; 3476 int i = 0; 3477 3478 set_bit(__IGC_DOWN, &adapter->state); 3479 3480 /* disable receives in the hardware */ 3481 rctl = rd32(IGC_RCTL); 3482 wr32(IGC_RCTL, rctl & ~IGC_RCTL_EN); 3483 /* flush and sleep below */ 3484 3485 igc_nfc_filter_exit(adapter); 3486 3487 /* set trans_start so we don't get spurious watchdogs during reset */ 3488 netif_trans_update(netdev); 3489 3490 netif_carrier_off(netdev); 3491 netif_tx_stop_all_queues(netdev); 3492 3493 /* disable transmits in the hardware */ 3494 tctl = rd32(IGC_TCTL); 3495 tctl &= ~IGC_TCTL_EN; 3496 wr32(IGC_TCTL, tctl); 3497 /* flush both disables and wait for them to finish */ 3498 wrfl(); 3499 usleep_range(10000, 20000); 3500 3501 igc_irq_disable(adapter); 3502 3503 adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE; 3504 3505 for (i = 0; i < adapter->num_q_vectors; i++) { 3506 if (adapter->q_vector[i]) { 3507 napi_synchronize(&adapter->q_vector[i]->napi); 3508 napi_disable(&adapter->q_vector[i]->napi); 3509 } 3510 } 3511 3512 del_timer_sync(&adapter->watchdog_timer); 3513 del_timer_sync(&adapter->phy_info_timer); 3514 3515 /* record the stats before reset*/ 3516 spin_lock(&adapter->stats64_lock); 3517 igc_update_stats(adapter); 3518 spin_unlock(&adapter->stats64_lock); 3519 3520 adapter->link_speed = 0; 3521 adapter->link_duplex = 0; 3522 3523 if (!pci_channel_offline(adapter->pdev)) 3524 igc_reset(adapter); 3525 3526 /* clear VLAN promisc flag so VFTA will be updated if necessary */ 3527 adapter->flags &= ~IGC_FLAG_VLAN_PROMISC; 3528 3529 igc_clean_all_tx_rings(adapter); 3530 igc_clean_all_rx_rings(adapter); 3531 } 3532 3533 void igc_reinit_locked(struct igc_adapter *adapter) 3534 { 3535 WARN_ON(in_interrupt()); 3536 while (test_and_set_bit(__IGC_RESETTING, &adapter->state)) 3537 usleep_range(1000, 2000); 3538 igc_down(adapter); 3539 igc_up(adapter); 3540 clear_bit(__IGC_RESETTING, &adapter->state); 3541 } 3542 3543 static void igc_reset_task(struct work_struct *work) 3544 { 3545 struct igc_adapter *adapter; 3546 3547 adapter = container_of(work, struct igc_adapter, reset_task); 3548 3549 netdev_err(adapter->netdev, "Reset adapter\n"); 3550 igc_reinit_locked(adapter); 3551 } 3552 3553 /** 3554 * igc_change_mtu - Change the Maximum Transfer Unit 3555 * @netdev: network interface device structure 3556 * @new_mtu: new value for maximum frame size 3557 * 3558 * Returns 0 on success, negative on failure 3559 */ 3560 static int igc_change_mtu(struct net_device *netdev, int new_mtu) 3561 { 3562 int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; 3563 struct igc_adapter *adapter = netdev_priv(netdev); 3564 3565 /* adjust max frame to be at least the size of a standard frame */ 3566 if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN)) 3567 max_frame = ETH_FRAME_LEN + ETH_FCS_LEN; 3568 3569 while (test_and_set_bit(__IGC_RESETTING, &adapter->state)) 3570 usleep_range(1000, 2000); 3571 3572 /* igc_down has a dependency on max_frame_size */ 3573 adapter->max_frame_size = max_frame; 3574 3575 if (netif_running(netdev)) 3576 igc_down(adapter); 3577 3578 netdev_dbg(netdev, "changing MTU from %d to %d\n", 3579 netdev->mtu, new_mtu); 3580 netdev->mtu = new_mtu; 3581 3582 if (netif_running(netdev)) 3583 igc_up(adapter); 3584 else 3585 igc_reset(adapter); 3586 3587 clear_bit(__IGC_RESETTING, &adapter->state); 3588 3589 return 0; 3590 } 3591 3592 /** 3593 * igc_get_stats - Get System Network Statistics 3594 * @netdev: network interface device structure 3595 * 3596 * Returns the address of the device statistics structure. 3597 * The statistics are updated here and also from the timer callback. 3598 */ 3599 static struct net_device_stats *igc_get_stats(struct net_device *netdev) 3600 { 3601 struct igc_adapter *adapter = netdev_priv(netdev); 3602 3603 if (!test_bit(__IGC_RESETTING, &adapter->state)) 3604 igc_update_stats(adapter); 3605 3606 /* only return the current stats */ 3607 return &netdev->stats; 3608 } 3609 3610 static netdev_features_t igc_fix_features(struct net_device *netdev, 3611 netdev_features_t features) 3612 { 3613 /* Since there is no support for separate Rx/Tx vlan accel 3614 * enable/disable make sure Tx flag is always in same state as Rx. 3615 */ 3616 if (features & NETIF_F_HW_VLAN_CTAG_RX) 3617 features |= NETIF_F_HW_VLAN_CTAG_TX; 3618 else 3619 features &= ~NETIF_F_HW_VLAN_CTAG_TX; 3620 3621 return features; 3622 } 3623 3624 static int igc_set_features(struct net_device *netdev, 3625 netdev_features_t features) 3626 { 3627 netdev_features_t changed = netdev->features ^ features; 3628 struct igc_adapter *adapter = netdev_priv(netdev); 3629 3630 /* Add VLAN support */ 3631 if (!(changed & (NETIF_F_RXALL | NETIF_F_NTUPLE))) 3632 return 0; 3633 3634 if (!(features & NETIF_F_NTUPLE)) { 3635 struct hlist_node *node2; 3636 struct igc_nfc_filter *rule; 3637 3638 spin_lock(&adapter->nfc_lock); 3639 hlist_for_each_entry_safe(rule, node2, 3640 &adapter->nfc_filter_list, nfc_node) { 3641 igc_erase_filter(adapter, rule); 3642 hlist_del(&rule->nfc_node); 3643 kfree(rule); 3644 } 3645 spin_unlock(&adapter->nfc_lock); 3646 adapter->nfc_filter_count = 0; 3647 } 3648 3649 netdev->features = features; 3650 3651 if (netif_running(netdev)) 3652 igc_reinit_locked(adapter); 3653 else 3654 igc_reset(adapter); 3655 3656 return 1; 3657 } 3658 3659 static netdev_features_t 3660 igc_features_check(struct sk_buff *skb, struct net_device *dev, 3661 netdev_features_t features) 3662 { 3663 unsigned int network_hdr_len, mac_hdr_len; 3664 3665 /* Make certain the headers can be described by a context descriptor */ 3666 mac_hdr_len = skb_network_header(skb) - skb->data; 3667 if (unlikely(mac_hdr_len > IGC_MAX_MAC_HDR_LEN)) 3668 return features & ~(NETIF_F_HW_CSUM | 3669 NETIF_F_SCTP_CRC | 3670 NETIF_F_HW_VLAN_CTAG_TX | 3671 NETIF_F_TSO | 3672 NETIF_F_TSO6); 3673 3674 network_hdr_len = skb_checksum_start(skb) - skb_network_header(skb); 3675 if (unlikely(network_hdr_len > IGC_MAX_NETWORK_HDR_LEN)) 3676 return features & ~(NETIF_F_HW_CSUM | 3677 NETIF_F_SCTP_CRC | 3678 NETIF_F_TSO | 3679 NETIF_F_TSO6); 3680 3681 /* We can only support IPv4 TSO in tunnels if we can mangle the 3682 * inner IP ID field, so strip TSO if MANGLEID is not supported. 3683 */ 3684 if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID)) 3685 features &= ~NETIF_F_TSO; 3686 3687 return features; 3688 } 3689 3690 /* Add a MAC filter for 'addr' directing matching traffic to 'queue', 3691 * 'flags' is used to indicate what kind of match is made, match is by 3692 * default for the destination address, if matching by source address 3693 * is desired the flag IGC_MAC_STATE_SRC_ADDR can be used. 3694 */ 3695 static int igc_add_mac_filter_flags(struct igc_adapter *adapter, 3696 const u8 *addr, const u8 queue, 3697 const u8 flags) 3698 { 3699 struct igc_hw *hw = &adapter->hw; 3700 int rar_entries = hw->mac.rar_entry_count; 3701 int i; 3702 3703 if (is_zero_ether_addr(addr)) 3704 return -EINVAL; 3705 3706 /* Search for the first empty entry in the MAC table. 3707 * Do not touch entries at the end of the table reserved for the VF MAC 3708 * addresses. 3709 */ 3710 for (i = 0; i < rar_entries; i++) { 3711 if (!igc_mac_entry_can_be_used(&adapter->mac_table[i], 3712 addr, flags)) 3713 continue; 3714 3715 ether_addr_copy(adapter->mac_table[i].addr, addr); 3716 adapter->mac_table[i].queue = queue; 3717 adapter->mac_table[i].state |= IGC_MAC_STATE_IN_USE | flags; 3718 3719 igc_rar_set_index(adapter, i); 3720 return i; 3721 } 3722 3723 return -ENOSPC; 3724 } 3725 3726 int igc_add_mac_steering_filter(struct igc_adapter *adapter, 3727 const u8 *addr, u8 queue, u8 flags) 3728 { 3729 return igc_add_mac_filter_flags(adapter, addr, queue, 3730 IGC_MAC_STATE_QUEUE_STEERING | flags); 3731 } 3732 3733 /* Remove a MAC filter for 'addr' directing matching traffic to 3734 * 'queue', 'flags' is used to indicate what kind of match need to be 3735 * removed, match is by default for the destination address, if 3736 * matching by source address is to be removed the flag 3737 * IGC_MAC_STATE_SRC_ADDR can be used. 3738 */ 3739 static int igc_del_mac_filter_flags(struct igc_adapter *adapter, 3740 const u8 *addr, const u8 queue, 3741 const u8 flags) 3742 { 3743 struct igc_hw *hw = &adapter->hw; 3744 int rar_entries = hw->mac.rar_entry_count; 3745 int i; 3746 3747 if (is_zero_ether_addr(addr)) 3748 return -EINVAL; 3749 3750 /* Search for matching entry in the MAC table based on given address 3751 * and queue. Do not touch entries at the end of the table reserved 3752 * for the VF MAC addresses. 3753 */ 3754 for (i = 0; i < rar_entries; i++) { 3755 if (!(adapter->mac_table[i].state & IGC_MAC_STATE_IN_USE)) 3756 continue; 3757 if ((adapter->mac_table[i].state & flags) != flags) 3758 continue; 3759 if (adapter->mac_table[i].queue != queue) 3760 continue; 3761 if (!ether_addr_equal(adapter->mac_table[i].addr, addr)) 3762 continue; 3763 3764 /* When a filter for the default address is "deleted", 3765 * we return it to its initial configuration 3766 */ 3767 if (adapter->mac_table[i].state & IGC_MAC_STATE_DEFAULT) { 3768 adapter->mac_table[i].state = 3769 IGC_MAC_STATE_DEFAULT | IGC_MAC_STATE_IN_USE; 3770 } else { 3771 adapter->mac_table[i].state = 0; 3772 adapter->mac_table[i].queue = 0; 3773 memset(adapter->mac_table[i].addr, 0, ETH_ALEN); 3774 } 3775 3776 igc_rar_set_index(adapter, i); 3777 return 0; 3778 } 3779 3780 return -ENOENT; 3781 } 3782 3783 int igc_del_mac_steering_filter(struct igc_adapter *adapter, 3784 const u8 *addr, u8 queue, u8 flags) 3785 { 3786 return igc_del_mac_filter_flags(adapter, addr, queue, 3787 IGC_MAC_STATE_QUEUE_STEERING | flags); 3788 } 3789 3790 static void igc_tsync_interrupt(struct igc_adapter *adapter) 3791 { 3792 struct igc_hw *hw = &adapter->hw; 3793 u32 tsicr = rd32(IGC_TSICR); 3794 u32 ack = 0; 3795 3796 if (tsicr & IGC_TSICR_TXTS) { 3797 /* retrieve hardware timestamp */ 3798 schedule_work(&adapter->ptp_tx_work); 3799 ack |= IGC_TSICR_TXTS; 3800 } 3801 3802 /* acknowledge the interrupts */ 3803 wr32(IGC_TSICR, ack); 3804 } 3805 3806 /** 3807 * igc_msix_other - msix other interrupt handler 3808 * @irq: interrupt number 3809 * @data: pointer to a q_vector 3810 */ 3811 static irqreturn_t igc_msix_other(int irq, void *data) 3812 { 3813 struct igc_adapter *adapter = data; 3814 struct igc_hw *hw = &adapter->hw; 3815 u32 icr = rd32(IGC_ICR); 3816 3817 /* reading ICR causes bit 31 of EICR to be cleared */ 3818 if (icr & IGC_ICR_DRSTA) 3819 schedule_work(&adapter->reset_task); 3820 3821 if (icr & IGC_ICR_DOUTSYNC) { 3822 /* HW is reporting DMA is out of sync */ 3823 adapter->stats.doosync++; 3824 } 3825 3826 if (icr & IGC_ICR_LSC) { 3827 hw->mac.get_link_status = 1; 3828 /* guard against interrupt when we're going down */ 3829 if (!test_bit(__IGC_DOWN, &adapter->state)) 3830 mod_timer(&adapter->watchdog_timer, jiffies + 1); 3831 } 3832 3833 if (icr & IGC_ICR_TS) 3834 igc_tsync_interrupt(adapter); 3835 3836 wr32(IGC_EIMS, adapter->eims_other); 3837 3838 return IRQ_HANDLED; 3839 } 3840 3841 static void igc_write_itr(struct igc_q_vector *q_vector) 3842 { 3843 u32 itr_val = q_vector->itr_val & IGC_QVECTOR_MASK; 3844 3845 if (!q_vector->set_itr) 3846 return; 3847 3848 if (!itr_val) 3849 itr_val = IGC_ITR_VAL_MASK; 3850 3851 itr_val |= IGC_EITR_CNT_IGNR; 3852 3853 writel(itr_val, q_vector->itr_register); 3854 q_vector->set_itr = 0; 3855 } 3856 3857 static irqreturn_t igc_msix_ring(int irq, void *data) 3858 { 3859 struct igc_q_vector *q_vector = data; 3860 3861 /* Write the ITR value calculated from the previous interrupt. */ 3862 igc_write_itr(q_vector); 3863 3864 napi_schedule(&q_vector->napi); 3865 3866 return IRQ_HANDLED; 3867 } 3868 3869 /** 3870 * igc_request_msix - Initialize MSI-X interrupts 3871 * @adapter: Pointer to adapter structure 3872 * 3873 * igc_request_msix allocates MSI-X vectors and requests interrupts from the 3874 * kernel. 3875 */ 3876 static int igc_request_msix(struct igc_adapter *adapter) 3877 { 3878 int i = 0, err = 0, vector = 0, free_vector = 0; 3879 struct net_device *netdev = adapter->netdev; 3880 3881 err = request_irq(adapter->msix_entries[vector].vector, 3882 &igc_msix_other, 0, netdev->name, adapter); 3883 if (err) 3884 goto err_out; 3885 3886 for (i = 0; i < adapter->num_q_vectors; i++) { 3887 struct igc_q_vector *q_vector = adapter->q_vector[i]; 3888 3889 vector++; 3890 3891 q_vector->itr_register = adapter->io_addr + IGC_EITR(vector); 3892 3893 if (q_vector->rx.ring && q_vector->tx.ring) 3894 sprintf(q_vector->name, "%s-TxRx-%u", netdev->name, 3895 q_vector->rx.ring->queue_index); 3896 else if (q_vector->tx.ring) 3897 sprintf(q_vector->name, "%s-tx-%u", netdev->name, 3898 q_vector->tx.ring->queue_index); 3899 else if (q_vector->rx.ring) 3900 sprintf(q_vector->name, "%s-rx-%u", netdev->name, 3901 q_vector->rx.ring->queue_index); 3902 else 3903 sprintf(q_vector->name, "%s-unused", netdev->name); 3904 3905 err = request_irq(adapter->msix_entries[vector].vector, 3906 igc_msix_ring, 0, q_vector->name, 3907 q_vector); 3908 if (err) 3909 goto err_free; 3910 } 3911 3912 igc_configure_msix(adapter); 3913 return 0; 3914 3915 err_free: 3916 /* free already assigned IRQs */ 3917 free_irq(adapter->msix_entries[free_vector++].vector, adapter); 3918 3919 vector--; 3920 for (i = 0; i < vector; i++) { 3921 free_irq(adapter->msix_entries[free_vector++].vector, 3922 adapter->q_vector[i]); 3923 } 3924 err_out: 3925 return err; 3926 } 3927 3928 /** 3929 * igc_clear_interrupt_scheme - reset the device to a state of no interrupts 3930 * @adapter: Pointer to adapter structure 3931 * 3932 * This function resets the device so that it has 0 rx queues, tx queues, and 3933 * MSI-X interrupts allocated. 3934 */ 3935 static void igc_clear_interrupt_scheme(struct igc_adapter *adapter) 3936 { 3937 igc_free_q_vectors(adapter); 3938 igc_reset_interrupt_capability(adapter); 3939 } 3940 3941 /* Need to wait a few seconds after link up to get diagnostic information from 3942 * the phy 3943 */ 3944 static void igc_update_phy_info(struct timer_list *t) 3945 { 3946 struct igc_adapter *adapter = from_timer(adapter, t, phy_info_timer); 3947 3948 igc_get_phy_info(&adapter->hw); 3949 } 3950 3951 /** 3952 * igc_has_link - check shared code for link and determine up/down 3953 * @adapter: pointer to driver private info 3954 */ 3955 bool igc_has_link(struct igc_adapter *adapter) 3956 { 3957 struct igc_hw *hw = &adapter->hw; 3958 bool link_active = false; 3959 3960 /* get_link_status is set on LSC (link status) interrupt or 3961 * rx sequence error interrupt. get_link_status will stay 3962 * false until the igc_check_for_link establishes link 3963 * for copper adapters ONLY 3964 */ 3965 switch (hw->phy.media_type) { 3966 case igc_media_type_copper: 3967 if (!hw->mac.get_link_status) 3968 return true; 3969 hw->mac.ops.check_for_link(hw); 3970 link_active = !hw->mac.get_link_status; 3971 break; 3972 default: 3973 case igc_media_type_unknown: 3974 break; 3975 } 3976 3977 if (hw->mac.type == igc_i225 && 3978 hw->phy.id == I225_I_PHY_ID) { 3979 if (!netif_carrier_ok(adapter->netdev)) { 3980 adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE; 3981 } else if (!(adapter->flags & IGC_FLAG_NEED_LINK_UPDATE)) { 3982 adapter->flags |= IGC_FLAG_NEED_LINK_UPDATE; 3983 adapter->link_check_timeout = jiffies; 3984 } 3985 } 3986 3987 return link_active; 3988 } 3989 3990 /** 3991 * igc_watchdog - Timer Call-back 3992 * @t: timer for the watchdog 3993 */ 3994 static void igc_watchdog(struct timer_list *t) 3995 { 3996 struct igc_adapter *adapter = from_timer(adapter, t, watchdog_timer); 3997 /* Do the rest outside of interrupt context */ 3998 schedule_work(&adapter->watchdog_task); 3999 } 4000 4001 static void igc_watchdog_task(struct work_struct *work) 4002 { 4003 struct igc_adapter *adapter = container_of(work, 4004 struct igc_adapter, 4005 watchdog_task); 4006 struct net_device *netdev = adapter->netdev; 4007 struct igc_hw *hw = &adapter->hw; 4008 struct igc_phy_info *phy = &hw->phy; 4009 u16 phy_data, retry_count = 20; 4010 u32 connsw; 4011 u32 link; 4012 int i; 4013 4014 link = igc_has_link(adapter); 4015 4016 if (adapter->flags & IGC_FLAG_NEED_LINK_UPDATE) { 4017 if (time_after(jiffies, (adapter->link_check_timeout + HZ))) 4018 adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE; 4019 else 4020 link = false; 4021 } 4022 4023 /* Force link down if we have fiber to swap to */ 4024 if (adapter->flags & IGC_FLAG_MAS_ENABLE) { 4025 if (hw->phy.media_type == igc_media_type_copper) { 4026 connsw = rd32(IGC_CONNSW); 4027 if (!(connsw & IGC_CONNSW_AUTOSENSE_EN)) 4028 link = 0; 4029 } 4030 } 4031 if (link) { 4032 if (!netif_carrier_ok(netdev)) { 4033 u32 ctrl; 4034 4035 hw->mac.ops.get_speed_and_duplex(hw, 4036 &adapter->link_speed, 4037 &adapter->link_duplex); 4038 4039 ctrl = rd32(IGC_CTRL); 4040 /* Link status message must follow this format */ 4041 netdev_info(netdev, 4042 "igc: %s NIC Link is Up %d Mbps %s Duplex, Flow Control: %s\n", 4043 netdev->name, 4044 adapter->link_speed, 4045 adapter->link_duplex == FULL_DUPLEX ? 4046 "Full" : "Half", 4047 (ctrl & IGC_CTRL_TFCE) && 4048 (ctrl & IGC_CTRL_RFCE) ? "RX/TX" : 4049 (ctrl & IGC_CTRL_RFCE) ? "RX" : 4050 (ctrl & IGC_CTRL_TFCE) ? "TX" : "None"); 4051 4052 /* check if SmartSpeed worked */ 4053 igc_check_downshift(hw); 4054 if (phy->speed_downgraded) 4055 netdev_warn(netdev, "Link Speed was downgraded by SmartSpeed\n"); 4056 4057 /* adjust timeout factor according to speed/duplex */ 4058 adapter->tx_timeout_factor = 1; 4059 switch (adapter->link_speed) { 4060 case SPEED_10: 4061 adapter->tx_timeout_factor = 14; 4062 break; 4063 case SPEED_100: 4064 /* maybe add some timeout factor ? */ 4065 break; 4066 } 4067 4068 if (adapter->link_speed != SPEED_1000) 4069 goto no_wait; 4070 4071 /* wait for Remote receiver status OK */ 4072 retry_read_status: 4073 if (!igc_read_phy_reg(hw, PHY_1000T_STATUS, 4074 &phy_data)) { 4075 if (!(phy_data & SR_1000T_REMOTE_RX_STATUS) && 4076 retry_count) { 4077 msleep(100); 4078 retry_count--; 4079 goto retry_read_status; 4080 } else if (!retry_count) { 4081 dev_err(&adapter->pdev->dev, "exceed max 2 second\n"); 4082 } 4083 } else { 4084 dev_err(&adapter->pdev->dev, "read 1000Base-T Status Reg\n"); 4085 } 4086 no_wait: 4087 netif_carrier_on(netdev); 4088 4089 /* link state has changed, schedule phy info update */ 4090 if (!test_bit(__IGC_DOWN, &adapter->state)) 4091 mod_timer(&adapter->phy_info_timer, 4092 round_jiffies(jiffies + 2 * HZ)); 4093 } 4094 } else { 4095 if (netif_carrier_ok(netdev)) { 4096 adapter->link_speed = 0; 4097 adapter->link_duplex = 0; 4098 4099 /* Links status message must follow this format */ 4100 netdev_info(netdev, "igc: %s NIC Link is Down\n", 4101 netdev->name); 4102 netif_carrier_off(netdev); 4103 4104 /* link state has changed, schedule phy info update */ 4105 if (!test_bit(__IGC_DOWN, &adapter->state)) 4106 mod_timer(&adapter->phy_info_timer, 4107 round_jiffies(jiffies + 2 * HZ)); 4108 4109 /* link is down, time to check for alternate media */ 4110 if (adapter->flags & IGC_FLAG_MAS_ENABLE) { 4111 if (adapter->flags & IGC_FLAG_MEDIA_RESET) { 4112 schedule_work(&adapter->reset_task); 4113 /* return immediately */ 4114 return; 4115 } 4116 } 4117 4118 /* also check for alternate media here */ 4119 } else if (!netif_carrier_ok(netdev) && 4120 (adapter->flags & IGC_FLAG_MAS_ENABLE)) { 4121 if (adapter->flags & IGC_FLAG_MEDIA_RESET) { 4122 schedule_work(&adapter->reset_task); 4123 /* return immediately */ 4124 return; 4125 } 4126 } 4127 } 4128 4129 spin_lock(&adapter->stats64_lock); 4130 igc_update_stats(adapter); 4131 spin_unlock(&adapter->stats64_lock); 4132 4133 for (i = 0; i < adapter->num_tx_queues; i++) { 4134 struct igc_ring *tx_ring = adapter->tx_ring[i]; 4135 4136 if (!netif_carrier_ok(netdev)) { 4137 /* We've lost link, so the controller stops DMA, 4138 * but we've got queued Tx work that's never going 4139 * to get done, so reset controller to flush Tx. 4140 * (Do the reset outside of interrupt context). 4141 */ 4142 if (igc_desc_unused(tx_ring) + 1 < tx_ring->count) { 4143 adapter->tx_timeout_count++; 4144 schedule_work(&adapter->reset_task); 4145 /* return immediately since reset is imminent */ 4146 return; 4147 } 4148 } 4149 4150 /* Force detection of hung controller every watchdog period */ 4151 set_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags); 4152 } 4153 4154 /* Cause software interrupt to ensure Rx ring is cleaned */ 4155 if (adapter->flags & IGC_FLAG_HAS_MSIX) { 4156 u32 eics = 0; 4157 4158 for (i = 0; i < adapter->num_q_vectors; i++) 4159 eics |= adapter->q_vector[i]->eims_value; 4160 wr32(IGC_EICS, eics); 4161 } else { 4162 wr32(IGC_ICS, IGC_ICS_RXDMT0); 4163 } 4164 4165 igc_ptp_tx_hang(adapter); 4166 4167 /* Reset the timer */ 4168 if (!test_bit(__IGC_DOWN, &adapter->state)) { 4169 if (adapter->flags & IGC_FLAG_NEED_LINK_UPDATE) 4170 mod_timer(&adapter->watchdog_timer, 4171 round_jiffies(jiffies + HZ)); 4172 else 4173 mod_timer(&adapter->watchdog_timer, 4174 round_jiffies(jiffies + 2 * HZ)); 4175 } 4176 } 4177 4178 /** 4179 * igc_intr_msi - Interrupt Handler 4180 * @irq: interrupt number 4181 * @data: pointer to a network interface device structure 4182 */ 4183 static irqreturn_t igc_intr_msi(int irq, void *data) 4184 { 4185 struct igc_adapter *adapter = data; 4186 struct igc_q_vector *q_vector = adapter->q_vector[0]; 4187 struct igc_hw *hw = &adapter->hw; 4188 /* read ICR disables interrupts using IAM */ 4189 u32 icr = rd32(IGC_ICR); 4190 4191 igc_write_itr(q_vector); 4192 4193 if (icr & IGC_ICR_DRSTA) 4194 schedule_work(&adapter->reset_task); 4195 4196 if (icr & IGC_ICR_DOUTSYNC) { 4197 /* HW is reporting DMA is out of sync */ 4198 adapter->stats.doosync++; 4199 } 4200 4201 if (icr & (IGC_ICR_RXSEQ | IGC_ICR_LSC)) { 4202 hw->mac.get_link_status = 1; 4203 if (!test_bit(__IGC_DOWN, &adapter->state)) 4204 mod_timer(&adapter->watchdog_timer, jiffies + 1); 4205 } 4206 4207 napi_schedule(&q_vector->napi); 4208 4209 return IRQ_HANDLED; 4210 } 4211 4212 /** 4213 * igc_intr - Legacy Interrupt Handler 4214 * @irq: interrupt number 4215 * @data: pointer to a network interface device structure 4216 */ 4217 static irqreturn_t igc_intr(int irq, void *data) 4218 { 4219 struct igc_adapter *adapter = data; 4220 struct igc_q_vector *q_vector = adapter->q_vector[0]; 4221 struct igc_hw *hw = &adapter->hw; 4222 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No 4223 * need for the IMC write 4224 */ 4225 u32 icr = rd32(IGC_ICR); 4226 4227 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is 4228 * not set, then the adapter didn't send an interrupt 4229 */ 4230 if (!(icr & IGC_ICR_INT_ASSERTED)) 4231 return IRQ_NONE; 4232 4233 igc_write_itr(q_vector); 4234 4235 if (icr & IGC_ICR_DRSTA) 4236 schedule_work(&adapter->reset_task); 4237 4238 if (icr & IGC_ICR_DOUTSYNC) { 4239 /* HW is reporting DMA is out of sync */ 4240 adapter->stats.doosync++; 4241 } 4242 4243 if (icr & (IGC_ICR_RXSEQ | IGC_ICR_LSC)) { 4244 hw->mac.get_link_status = 1; 4245 /* guard against interrupt when we're going down */ 4246 if (!test_bit(__IGC_DOWN, &adapter->state)) 4247 mod_timer(&adapter->watchdog_timer, jiffies + 1); 4248 } 4249 4250 napi_schedule(&q_vector->napi); 4251 4252 return IRQ_HANDLED; 4253 } 4254 4255 static void igc_free_irq(struct igc_adapter *adapter) 4256 { 4257 if (adapter->msix_entries) { 4258 int vector = 0, i; 4259 4260 free_irq(adapter->msix_entries[vector++].vector, adapter); 4261 4262 for (i = 0; i < adapter->num_q_vectors; i++) 4263 free_irq(adapter->msix_entries[vector++].vector, 4264 adapter->q_vector[i]); 4265 } else { 4266 free_irq(adapter->pdev->irq, adapter); 4267 } 4268 } 4269 4270 /** 4271 * igc_request_irq - initialize interrupts 4272 * @adapter: Pointer to adapter structure 4273 * 4274 * Attempts to configure interrupts using the best available 4275 * capabilities of the hardware and kernel. 4276 */ 4277 static int igc_request_irq(struct igc_adapter *adapter) 4278 { 4279 struct net_device *netdev = adapter->netdev; 4280 struct pci_dev *pdev = adapter->pdev; 4281 int err = 0; 4282 4283 if (adapter->flags & IGC_FLAG_HAS_MSIX) { 4284 err = igc_request_msix(adapter); 4285 if (!err) 4286 goto request_done; 4287 /* fall back to MSI */ 4288 igc_free_all_tx_resources(adapter); 4289 igc_free_all_rx_resources(adapter); 4290 4291 igc_clear_interrupt_scheme(adapter); 4292 err = igc_init_interrupt_scheme(adapter, false); 4293 if (err) 4294 goto request_done; 4295 igc_setup_all_tx_resources(adapter); 4296 igc_setup_all_rx_resources(adapter); 4297 igc_configure(adapter); 4298 } 4299 4300 igc_assign_vector(adapter->q_vector[0], 0); 4301 4302 if (adapter->flags & IGC_FLAG_HAS_MSI) { 4303 err = request_irq(pdev->irq, &igc_intr_msi, 0, 4304 netdev->name, adapter); 4305 if (!err) 4306 goto request_done; 4307 4308 /* fall back to legacy interrupts */ 4309 igc_reset_interrupt_capability(adapter); 4310 adapter->flags &= ~IGC_FLAG_HAS_MSI; 4311 } 4312 4313 err = request_irq(pdev->irq, &igc_intr, IRQF_SHARED, 4314 netdev->name, adapter); 4315 4316 if (err) 4317 dev_err(&pdev->dev, "Error %d getting interrupt\n", 4318 err); 4319 4320 request_done: 4321 return err; 4322 } 4323 4324 /** 4325 * __igc_open - Called when a network interface is made active 4326 * @netdev: network interface device structure 4327 * @resuming: boolean indicating if the device is resuming 4328 * 4329 * Returns 0 on success, negative value on failure 4330 * 4331 * The open entry point is called when a network interface is made 4332 * active by the system (IFF_UP). At this point all resources needed 4333 * for transmit and receive operations are allocated, the interrupt 4334 * handler is registered with the OS, the watchdog timer is started, 4335 * and the stack is notified that the interface is ready. 4336 */ 4337 static int __igc_open(struct net_device *netdev, bool resuming) 4338 { 4339 struct igc_adapter *adapter = netdev_priv(netdev); 4340 struct igc_hw *hw = &adapter->hw; 4341 int err = 0; 4342 int i = 0; 4343 4344 /* disallow open during test */ 4345 4346 if (test_bit(__IGC_TESTING, &adapter->state)) { 4347 WARN_ON(resuming); 4348 return -EBUSY; 4349 } 4350 4351 netif_carrier_off(netdev); 4352 4353 /* allocate transmit descriptors */ 4354 err = igc_setup_all_tx_resources(adapter); 4355 if (err) 4356 goto err_setup_tx; 4357 4358 /* allocate receive descriptors */ 4359 err = igc_setup_all_rx_resources(adapter); 4360 if (err) 4361 goto err_setup_rx; 4362 4363 igc_power_up_link(adapter); 4364 4365 igc_configure(adapter); 4366 4367 err = igc_request_irq(adapter); 4368 if (err) 4369 goto err_req_irq; 4370 4371 /* Notify the stack of the actual queue counts. */ 4372 err = netif_set_real_num_tx_queues(netdev, adapter->num_tx_queues); 4373 if (err) 4374 goto err_set_queues; 4375 4376 err = netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues); 4377 if (err) 4378 goto err_set_queues; 4379 4380 clear_bit(__IGC_DOWN, &adapter->state); 4381 4382 for (i = 0; i < adapter->num_q_vectors; i++) 4383 napi_enable(&adapter->q_vector[i]->napi); 4384 4385 /* Clear any pending interrupts. */ 4386 rd32(IGC_ICR); 4387 igc_irq_enable(adapter); 4388 4389 netif_tx_start_all_queues(netdev); 4390 4391 /* start the watchdog. */ 4392 hw->mac.get_link_status = 1; 4393 schedule_work(&adapter->watchdog_task); 4394 4395 return IGC_SUCCESS; 4396 4397 err_set_queues: 4398 igc_free_irq(adapter); 4399 err_req_irq: 4400 igc_release_hw_control(adapter); 4401 igc_power_down_link(adapter); 4402 igc_free_all_rx_resources(adapter); 4403 err_setup_rx: 4404 igc_free_all_tx_resources(adapter); 4405 err_setup_tx: 4406 igc_reset(adapter); 4407 4408 return err; 4409 } 4410 4411 static int igc_open(struct net_device *netdev) 4412 { 4413 return __igc_open(netdev, false); 4414 } 4415 4416 /** 4417 * __igc_close - Disables a network interface 4418 * @netdev: network interface device structure 4419 * @suspending: boolean indicating the device is suspending 4420 * 4421 * Returns 0, this is not allowed to fail 4422 * 4423 * The close entry point is called when an interface is de-activated 4424 * by the OS. The hardware is still under the driver's control, but 4425 * needs to be disabled. A global MAC reset is issued to stop the 4426 * hardware, and all transmit and receive resources are freed. 4427 */ 4428 static int __igc_close(struct net_device *netdev, bool suspending) 4429 { 4430 struct igc_adapter *adapter = netdev_priv(netdev); 4431 4432 WARN_ON(test_bit(__IGC_RESETTING, &adapter->state)); 4433 4434 igc_down(adapter); 4435 4436 igc_release_hw_control(adapter); 4437 4438 igc_free_irq(adapter); 4439 4440 igc_free_all_tx_resources(adapter); 4441 igc_free_all_rx_resources(adapter); 4442 4443 return 0; 4444 } 4445 4446 static int igc_close(struct net_device *netdev) 4447 { 4448 if (netif_device_present(netdev) || netdev->dismantle) 4449 return __igc_close(netdev, false); 4450 return 0; 4451 } 4452 4453 /** 4454 * igc_ioctl - Access the hwtstamp interface 4455 * @netdev: network interface device structure 4456 * @ifreq: interface request data 4457 * @cmd: ioctl command 4458 **/ 4459 static int igc_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) 4460 { 4461 switch (cmd) { 4462 case SIOCGHWTSTAMP: 4463 return igc_ptp_get_ts_config(netdev, ifr); 4464 case SIOCSHWTSTAMP: 4465 return igc_ptp_set_ts_config(netdev, ifr); 4466 default: 4467 return -EOPNOTSUPP; 4468 } 4469 } 4470 4471 static const struct net_device_ops igc_netdev_ops = { 4472 .ndo_open = igc_open, 4473 .ndo_stop = igc_close, 4474 .ndo_start_xmit = igc_xmit_frame, 4475 .ndo_set_rx_mode = igc_set_rx_mode, 4476 .ndo_set_mac_address = igc_set_mac, 4477 .ndo_change_mtu = igc_change_mtu, 4478 .ndo_get_stats = igc_get_stats, 4479 .ndo_fix_features = igc_fix_features, 4480 .ndo_set_features = igc_set_features, 4481 .ndo_features_check = igc_features_check, 4482 .ndo_do_ioctl = igc_ioctl, 4483 }; 4484 4485 /* PCIe configuration access */ 4486 void igc_read_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value) 4487 { 4488 struct igc_adapter *adapter = hw->back; 4489 4490 pci_read_config_word(adapter->pdev, reg, value); 4491 } 4492 4493 void igc_write_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value) 4494 { 4495 struct igc_adapter *adapter = hw->back; 4496 4497 pci_write_config_word(adapter->pdev, reg, *value); 4498 } 4499 4500 s32 igc_read_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value) 4501 { 4502 struct igc_adapter *adapter = hw->back; 4503 4504 if (!pci_is_pcie(adapter->pdev)) 4505 return -IGC_ERR_CONFIG; 4506 4507 pcie_capability_read_word(adapter->pdev, reg, value); 4508 4509 return IGC_SUCCESS; 4510 } 4511 4512 s32 igc_write_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value) 4513 { 4514 struct igc_adapter *adapter = hw->back; 4515 4516 if (!pci_is_pcie(adapter->pdev)) 4517 return -IGC_ERR_CONFIG; 4518 4519 pcie_capability_write_word(adapter->pdev, reg, *value); 4520 4521 return IGC_SUCCESS; 4522 } 4523 4524 u32 igc_rd32(struct igc_hw *hw, u32 reg) 4525 { 4526 struct igc_adapter *igc = container_of(hw, struct igc_adapter, hw); 4527 u8 __iomem *hw_addr = READ_ONCE(hw->hw_addr); 4528 u32 value = 0; 4529 4530 if (IGC_REMOVED(hw_addr)) 4531 return ~value; 4532 4533 value = readl(&hw_addr[reg]); 4534 4535 /* reads should not return all F's */ 4536 if (!(~value) && (!reg || !(~readl(hw_addr)))) { 4537 struct net_device *netdev = igc->netdev; 4538 4539 hw->hw_addr = NULL; 4540 netif_device_detach(netdev); 4541 netdev_err(netdev, "PCIe link lost, device now detached\n"); 4542 WARN(pci_device_is_present(igc->pdev), 4543 "igc: Failed to read reg 0x%x!\n", reg); 4544 } 4545 4546 return value; 4547 } 4548 4549 int igc_set_spd_dplx(struct igc_adapter *adapter, u32 spd, u8 dplx) 4550 { 4551 struct pci_dev *pdev = adapter->pdev; 4552 struct igc_mac_info *mac = &adapter->hw.mac; 4553 4554 mac->autoneg = 0; 4555 4556 /* Make sure dplx is at most 1 bit and lsb of speed is not set 4557 * for the switch() below to work 4558 */ 4559 if ((spd & 1) || (dplx & ~1)) 4560 goto err_inval; 4561 4562 switch (spd + dplx) { 4563 case SPEED_10 + DUPLEX_HALF: 4564 mac->forced_speed_duplex = ADVERTISE_10_HALF; 4565 break; 4566 case SPEED_10 + DUPLEX_FULL: 4567 mac->forced_speed_duplex = ADVERTISE_10_FULL; 4568 break; 4569 case SPEED_100 + DUPLEX_HALF: 4570 mac->forced_speed_duplex = ADVERTISE_100_HALF; 4571 break; 4572 case SPEED_100 + DUPLEX_FULL: 4573 mac->forced_speed_duplex = ADVERTISE_100_FULL; 4574 break; 4575 case SPEED_1000 + DUPLEX_FULL: 4576 mac->autoneg = 1; 4577 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL; 4578 break; 4579 case SPEED_1000 + DUPLEX_HALF: /* not supported */ 4580 goto err_inval; 4581 case SPEED_2500 + DUPLEX_FULL: 4582 mac->autoneg = 1; 4583 adapter->hw.phy.autoneg_advertised = ADVERTISE_2500_FULL; 4584 break; 4585 case SPEED_2500 + DUPLEX_HALF: /* not supported */ 4586 default: 4587 goto err_inval; 4588 } 4589 4590 /* clear MDI, MDI(-X) override is only allowed when autoneg enabled */ 4591 adapter->hw.phy.mdix = AUTO_ALL_MODES; 4592 4593 return 0; 4594 4595 err_inval: 4596 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n"); 4597 return -EINVAL; 4598 } 4599 4600 /** 4601 * igc_probe - Device Initialization Routine 4602 * @pdev: PCI device information struct 4603 * @ent: entry in igc_pci_tbl 4604 * 4605 * Returns 0 on success, negative on failure 4606 * 4607 * igc_probe initializes an adapter identified by a pci_dev structure. 4608 * The OS initialization, configuring the adapter private structure, 4609 * and a hardware reset occur. 4610 */ 4611 static int igc_probe(struct pci_dev *pdev, 4612 const struct pci_device_id *ent) 4613 { 4614 struct igc_adapter *adapter; 4615 struct net_device *netdev; 4616 struct igc_hw *hw; 4617 const struct igc_info *ei = igc_info_tbl[ent->driver_data]; 4618 int err, pci_using_dac; 4619 4620 err = pci_enable_device_mem(pdev); 4621 if (err) 4622 return err; 4623 4624 pci_using_dac = 0; 4625 err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); 4626 if (!err) { 4627 pci_using_dac = 1; 4628 } else { 4629 err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); 4630 if (err) { 4631 dev_err(&pdev->dev, 4632 "No usable DMA configuration, aborting\n"); 4633 goto err_dma; 4634 } 4635 } 4636 4637 err = pci_request_mem_regions(pdev, igc_driver_name); 4638 if (err) 4639 goto err_pci_reg; 4640 4641 pci_enable_pcie_error_reporting(pdev); 4642 4643 pci_set_master(pdev); 4644 4645 err = -ENOMEM; 4646 netdev = alloc_etherdev_mq(sizeof(struct igc_adapter), 4647 IGC_MAX_TX_QUEUES); 4648 4649 if (!netdev) 4650 goto err_alloc_etherdev; 4651 4652 SET_NETDEV_DEV(netdev, &pdev->dev); 4653 4654 pci_set_drvdata(pdev, netdev); 4655 adapter = netdev_priv(netdev); 4656 adapter->netdev = netdev; 4657 adapter->pdev = pdev; 4658 hw = &adapter->hw; 4659 hw->back = adapter; 4660 adapter->port_num = hw->bus.func; 4661 adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE); 4662 4663 err = pci_save_state(pdev); 4664 if (err) 4665 goto err_ioremap; 4666 4667 err = -EIO; 4668 adapter->io_addr = ioremap(pci_resource_start(pdev, 0), 4669 pci_resource_len(pdev, 0)); 4670 if (!adapter->io_addr) 4671 goto err_ioremap; 4672 4673 /* hw->hw_addr can be zeroed, so use adapter->io_addr for unmap */ 4674 hw->hw_addr = adapter->io_addr; 4675 4676 netdev->netdev_ops = &igc_netdev_ops; 4677 igc_set_ethtool_ops(netdev); 4678 netdev->watchdog_timeo = 5 * HZ; 4679 4680 netdev->mem_start = pci_resource_start(pdev, 0); 4681 netdev->mem_end = pci_resource_end(pdev, 0); 4682 4683 /* PCI config space info */ 4684 hw->vendor_id = pdev->vendor; 4685 hw->device_id = pdev->device; 4686 hw->revision_id = pdev->revision; 4687 hw->subsystem_vendor_id = pdev->subsystem_vendor; 4688 hw->subsystem_device_id = pdev->subsystem_device; 4689 4690 /* Copy the default MAC and PHY function pointers */ 4691 memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops)); 4692 memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops)); 4693 4694 /* Initialize skew-specific constants */ 4695 err = ei->get_invariants(hw); 4696 if (err) 4697 goto err_sw_init; 4698 4699 /* Add supported features to the features list*/ 4700 netdev->features |= NETIF_F_SG; 4701 netdev->features |= NETIF_F_TSO; 4702 netdev->features |= NETIF_F_TSO6; 4703 netdev->features |= NETIF_F_RXCSUM; 4704 netdev->features |= NETIF_F_HW_CSUM; 4705 netdev->features |= NETIF_F_SCTP_CRC; 4706 4707 /* setup the private structure */ 4708 err = igc_sw_init(adapter); 4709 if (err) 4710 goto err_sw_init; 4711 4712 /* copy netdev features into list of user selectable features */ 4713 netdev->hw_features |= NETIF_F_NTUPLE; 4714 netdev->hw_features |= netdev->features; 4715 4716 if (pci_using_dac) 4717 netdev->features |= NETIF_F_HIGHDMA; 4718 4719 /* MTU range: 68 - 9216 */ 4720 netdev->min_mtu = ETH_MIN_MTU; 4721 netdev->max_mtu = MAX_STD_JUMBO_FRAME_SIZE; 4722 4723 /* before reading the NVM, reset the controller to put the device in a 4724 * known good starting state 4725 */ 4726 hw->mac.ops.reset_hw(hw); 4727 4728 if (igc_get_flash_presence_i225(hw)) { 4729 if (hw->nvm.ops.validate(hw) < 0) { 4730 dev_err(&pdev->dev, 4731 "The NVM Checksum Is Not Valid\n"); 4732 err = -EIO; 4733 goto err_eeprom; 4734 } 4735 } 4736 4737 if (eth_platform_get_mac_address(&pdev->dev, hw->mac.addr)) { 4738 /* copy the MAC address out of the NVM */ 4739 if (hw->mac.ops.read_mac_addr(hw)) 4740 dev_err(&pdev->dev, "NVM Read Error\n"); 4741 } 4742 4743 memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len); 4744 4745 if (!is_valid_ether_addr(netdev->dev_addr)) { 4746 dev_err(&pdev->dev, "Invalid MAC Address\n"); 4747 err = -EIO; 4748 goto err_eeprom; 4749 } 4750 4751 /* configure RXPBSIZE and TXPBSIZE */ 4752 wr32(IGC_RXPBS, I225_RXPBSIZE_DEFAULT); 4753 wr32(IGC_TXPBS, I225_TXPBSIZE_DEFAULT); 4754 4755 timer_setup(&adapter->watchdog_timer, igc_watchdog, 0); 4756 timer_setup(&adapter->phy_info_timer, igc_update_phy_info, 0); 4757 4758 INIT_WORK(&adapter->reset_task, igc_reset_task); 4759 INIT_WORK(&adapter->watchdog_task, igc_watchdog_task); 4760 4761 /* Initialize link properties that are user-changeable */ 4762 adapter->fc_autoneg = true; 4763 hw->mac.autoneg = true; 4764 hw->phy.autoneg_advertised = 0xaf; 4765 4766 hw->fc.requested_mode = igc_fc_default; 4767 hw->fc.current_mode = igc_fc_default; 4768 4769 /* reset the hardware with the new settings */ 4770 igc_reset(adapter); 4771 4772 /* let the f/w know that the h/w is now under the control of the 4773 * driver. 4774 */ 4775 igc_get_hw_control(adapter); 4776 4777 strncpy(netdev->name, "eth%d", IFNAMSIZ); 4778 err = register_netdev(netdev); 4779 if (err) 4780 goto err_register; 4781 4782 /* carrier off reporting is important to ethtool even BEFORE open */ 4783 netif_carrier_off(netdev); 4784 4785 /* do hw tstamp init after resetting */ 4786 igc_ptp_init(adapter); 4787 4788 /* Check if Media Autosense is enabled */ 4789 adapter->ei = *ei; 4790 4791 /* print pcie link status and MAC address */ 4792 pcie_print_link_status(pdev); 4793 netdev_info(netdev, "MAC: %pM\n", netdev->dev_addr); 4794 4795 return 0; 4796 4797 err_register: 4798 igc_release_hw_control(adapter); 4799 err_eeprom: 4800 if (!igc_check_reset_block(hw)) 4801 igc_reset_phy(hw); 4802 err_sw_init: 4803 igc_clear_interrupt_scheme(adapter); 4804 iounmap(adapter->io_addr); 4805 err_ioremap: 4806 free_netdev(netdev); 4807 err_alloc_etherdev: 4808 pci_release_mem_regions(pdev); 4809 err_pci_reg: 4810 err_dma: 4811 pci_disable_device(pdev); 4812 return err; 4813 } 4814 4815 /** 4816 * igc_remove - Device Removal Routine 4817 * @pdev: PCI device information struct 4818 * 4819 * igc_remove is called by the PCI subsystem to alert the driver 4820 * that it should release a PCI device. This could be caused by a 4821 * Hot-Plug event, or because the driver is going to be removed from 4822 * memory. 4823 */ 4824 static void igc_remove(struct pci_dev *pdev) 4825 { 4826 struct net_device *netdev = pci_get_drvdata(pdev); 4827 struct igc_adapter *adapter = netdev_priv(netdev); 4828 4829 igc_ptp_stop(adapter); 4830 4831 set_bit(__IGC_DOWN, &adapter->state); 4832 4833 del_timer_sync(&adapter->watchdog_timer); 4834 del_timer_sync(&adapter->phy_info_timer); 4835 4836 cancel_work_sync(&adapter->reset_task); 4837 cancel_work_sync(&adapter->watchdog_task); 4838 4839 /* Release control of h/w to f/w. If f/w is AMT enabled, this 4840 * would have already happened in close and is redundant. 4841 */ 4842 igc_release_hw_control(adapter); 4843 unregister_netdev(netdev); 4844 4845 igc_clear_interrupt_scheme(adapter); 4846 pci_iounmap(pdev, adapter->io_addr); 4847 pci_release_mem_regions(pdev); 4848 4849 kfree(adapter->mac_table); 4850 free_netdev(netdev); 4851 4852 pci_disable_pcie_error_reporting(pdev); 4853 4854 pci_disable_device(pdev); 4855 } 4856 4857 static int __igc_shutdown(struct pci_dev *pdev, bool *enable_wake, 4858 bool runtime) 4859 { 4860 struct net_device *netdev = pci_get_drvdata(pdev); 4861 struct igc_adapter *adapter = netdev_priv(netdev); 4862 u32 wufc = runtime ? IGC_WUFC_LNKC : adapter->wol; 4863 struct igc_hw *hw = &adapter->hw; 4864 u32 ctrl, rctl, status; 4865 bool wake; 4866 4867 rtnl_lock(); 4868 netif_device_detach(netdev); 4869 4870 if (netif_running(netdev)) 4871 __igc_close(netdev, true); 4872 4873 igc_clear_interrupt_scheme(adapter); 4874 rtnl_unlock(); 4875 4876 status = rd32(IGC_STATUS); 4877 if (status & IGC_STATUS_LU) 4878 wufc &= ~IGC_WUFC_LNKC; 4879 4880 if (wufc) { 4881 igc_setup_rctl(adapter); 4882 igc_set_rx_mode(netdev); 4883 4884 /* turn on all-multi mode if wake on multicast is enabled */ 4885 if (wufc & IGC_WUFC_MC) { 4886 rctl = rd32(IGC_RCTL); 4887 rctl |= IGC_RCTL_MPE; 4888 wr32(IGC_RCTL, rctl); 4889 } 4890 4891 ctrl = rd32(IGC_CTRL); 4892 ctrl |= IGC_CTRL_ADVD3WUC; 4893 wr32(IGC_CTRL, ctrl); 4894 4895 /* Allow time for pending master requests to run */ 4896 igc_disable_pcie_master(hw); 4897 4898 wr32(IGC_WUC, IGC_WUC_PME_EN); 4899 wr32(IGC_WUFC, wufc); 4900 } else { 4901 wr32(IGC_WUC, 0); 4902 wr32(IGC_WUFC, 0); 4903 } 4904 4905 wake = wufc || adapter->en_mng_pt; 4906 if (!wake) 4907 igc_power_down_link(adapter); 4908 else 4909 igc_power_up_link(adapter); 4910 4911 if (enable_wake) 4912 *enable_wake = wake; 4913 4914 /* Release control of h/w to f/w. If f/w is AMT enabled, this 4915 * would have already happened in close and is redundant. 4916 */ 4917 igc_release_hw_control(adapter); 4918 4919 pci_disable_device(pdev); 4920 4921 return 0; 4922 } 4923 4924 #ifdef CONFIG_PM 4925 static int __maybe_unused igc_runtime_suspend(struct device *dev) 4926 { 4927 return __igc_shutdown(to_pci_dev(dev), NULL, 1); 4928 } 4929 4930 static void igc_deliver_wake_packet(struct net_device *netdev) 4931 { 4932 struct igc_adapter *adapter = netdev_priv(netdev); 4933 struct igc_hw *hw = &adapter->hw; 4934 struct sk_buff *skb; 4935 u32 wupl; 4936 4937 wupl = rd32(IGC_WUPL) & IGC_WUPL_MASK; 4938 4939 /* WUPM stores only the first 128 bytes of the wake packet. 4940 * Read the packet only if we have the whole thing. 4941 */ 4942 if (wupl == 0 || wupl > IGC_WUPM_BYTES) 4943 return; 4944 4945 skb = netdev_alloc_skb_ip_align(netdev, IGC_WUPM_BYTES); 4946 if (!skb) 4947 return; 4948 4949 skb_put(skb, wupl); 4950 4951 /* Ensure reads are 32-bit aligned */ 4952 wupl = roundup(wupl, 4); 4953 4954 memcpy_fromio(skb->data, hw->hw_addr + IGC_WUPM_REG(0), wupl); 4955 4956 skb->protocol = eth_type_trans(skb, netdev); 4957 netif_rx(skb); 4958 } 4959 4960 static int __maybe_unused igc_resume(struct device *dev) 4961 { 4962 struct pci_dev *pdev = to_pci_dev(dev); 4963 struct net_device *netdev = pci_get_drvdata(pdev); 4964 struct igc_adapter *adapter = netdev_priv(netdev); 4965 struct igc_hw *hw = &adapter->hw; 4966 u32 err, val; 4967 4968 pci_set_power_state(pdev, PCI_D0); 4969 pci_restore_state(pdev); 4970 pci_save_state(pdev); 4971 4972 if (!pci_device_is_present(pdev)) 4973 return -ENODEV; 4974 err = pci_enable_device_mem(pdev); 4975 if (err) { 4976 dev_err(&pdev->dev, 4977 "igc: Cannot enable PCI device from suspend\n"); 4978 return err; 4979 } 4980 pci_set_master(pdev); 4981 4982 pci_enable_wake(pdev, PCI_D3hot, 0); 4983 pci_enable_wake(pdev, PCI_D3cold, 0); 4984 4985 if (igc_init_interrupt_scheme(adapter, true)) { 4986 dev_err(&pdev->dev, "Unable to allocate memory for queues\n"); 4987 return -ENOMEM; 4988 } 4989 4990 igc_reset(adapter); 4991 4992 /* let the f/w know that the h/w is now under the control of the 4993 * driver. 4994 */ 4995 igc_get_hw_control(adapter); 4996 4997 val = rd32(IGC_WUS); 4998 if (val & WAKE_PKT_WUS) 4999 igc_deliver_wake_packet(netdev); 5000 5001 wr32(IGC_WUS, ~0); 5002 5003 rtnl_lock(); 5004 if (!err && netif_running(netdev)) 5005 err = __igc_open(netdev, true); 5006 5007 if (!err) 5008 netif_device_attach(netdev); 5009 rtnl_unlock(); 5010 5011 return err; 5012 } 5013 5014 static int __maybe_unused igc_runtime_resume(struct device *dev) 5015 { 5016 return igc_resume(dev); 5017 } 5018 5019 static int __maybe_unused igc_suspend(struct device *dev) 5020 { 5021 return __igc_shutdown(to_pci_dev(dev), NULL, 0); 5022 } 5023 5024 static int __maybe_unused igc_runtime_idle(struct device *dev) 5025 { 5026 struct net_device *netdev = dev_get_drvdata(dev); 5027 struct igc_adapter *adapter = netdev_priv(netdev); 5028 5029 if (!igc_has_link(adapter)) 5030 pm_schedule_suspend(dev, MSEC_PER_SEC * 5); 5031 5032 return -EBUSY; 5033 } 5034 #endif /* CONFIG_PM */ 5035 5036 static void igc_shutdown(struct pci_dev *pdev) 5037 { 5038 bool wake; 5039 5040 __igc_shutdown(pdev, &wake, 0); 5041 5042 if (system_state == SYSTEM_POWER_OFF) { 5043 pci_wake_from_d3(pdev, wake); 5044 pci_set_power_state(pdev, PCI_D3hot); 5045 } 5046 } 5047 5048 #ifdef CONFIG_PM 5049 static const struct dev_pm_ops igc_pm_ops = { 5050 SET_SYSTEM_SLEEP_PM_OPS(igc_suspend, igc_resume) 5051 SET_RUNTIME_PM_OPS(igc_runtime_suspend, igc_runtime_resume, 5052 igc_runtime_idle) 5053 }; 5054 #endif 5055 5056 static struct pci_driver igc_driver = { 5057 .name = igc_driver_name, 5058 .id_table = igc_pci_tbl, 5059 .probe = igc_probe, 5060 .remove = igc_remove, 5061 #ifdef CONFIG_PM 5062 .driver.pm = &igc_pm_ops, 5063 #endif 5064 .shutdown = igc_shutdown, 5065 }; 5066 5067 /** 5068 * igc_reinit_queues - return error 5069 * @adapter: pointer to adapter structure 5070 */ 5071 int igc_reinit_queues(struct igc_adapter *adapter) 5072 { 5073 struct net_device *netdev = adapter->netdev; 5074 struct pci_dev *pdev = adapter->pdev; 5075 int err = 0; 5076 5077 if (netif_running(netdev)) 5078 igc_close(netdev); 5079 5080 igc_reset_interrupt_capability(adapter); 5081 5082 if (igc_init_interrupt_scheme(adapter, true)) { 5083 dev_err(&pdev->dev, "Unable to allocate memory for queues\n"); 5084 return -ENOMEM; 5085 } 5086 5087 if (netif_running(netdev)) 5088 err = igc_open(netdev); 5089 5090 return err; 5091 } 5092 5093 /** 5094 * igc_get_hw_dev - return device 5095 * @hw: pointer to hardware structure 5096 * 5097 * used by hardware layer to print debugging information 5098 */ 5099 struct net_device *igc_get_hw_dev(struct igc_hw *hw) 5100 { 5101 struct igc_adapter *adapter = hw->back; 5102 5103 return adapter->netdev; 5104 } 5105 5106 /** 5107 * igc_init_module - Driver Registration Routine 5108 * 5109 * igc_init_module is the first routine called when the driver is 5110 * loaded. All it does is register with the PCI subsystem. 5111 */ 5112 static int __init igc_init_module(void) 5113 { 5114 int ret; 5115 5116 pr_info("%s - version %s\n", 5117 igc_driver_string, igc_driver_version); 5118 5119 pr_info("%s\n", igc_copyright); 5120 5121 ret = pci_register_driver(&igc_driver); 5122 return ret; 5123 } 5124 5125 module_init(igc_init_module); 5126 5127 /** 5128 * igc_exit_module - Driver Exit Cleanup Routine 5129 * 5130 * igc_exit_module is called just before the driver is removed 5131 * from memory. 5132 */ 5133 static void __exit igc_exit_module(void) 5134 { 5135 pci_unregister_driver(&igc_driver); 5136 } 5137 5138 module_exit(igc_exit_module); 5139 /* igc_main.c */ 5140