1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2018 Intel Corporation */ 3 4 #include <linux/module.h> 5 #include <linux/types.h> 6 #include <linux/if_vlan.h> 7 #include <linux/aer.h> 8 #include <linux/tcp.h> 9 #include <linux/udp.h> 10 #include <linux/ip.h> 11 #include <linux/pm_runtime.h> 12 #include <net/pkt_sched.h> 13 #include <linux/bpf_trace.h> 14 #include <net/xdp_sock_drv.h> 15 #include <linux/pci.h> 16 17 #include <net/ipv6.h> 18 19 #include "igc.h" 20 #include "igc_hw.h" 21 #include "igc_tsn.h" 22 #include "igc_xdp.h" 23 24 #define DRV_SUMMARY "Intel(R) 2.5G Ethernet Linux Driver" 25 26 #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK) 27 28 #define IGC_XDP_PASS 0 29 #define IGC_XDP_CONSUMED BIT(0) 30 #define IGC_XDP_TX BIT(1) 31 #define IGC_XDP_REDIRECT BIT(2) 32 33 static int debug = -1; 34 35 MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>"); 36 MODULE_DESCRIPTION(DRV_SUMMARY); 37 MODULE_LICENSE("GPL v2"); 38 module_param(debug, int, 0); 39 MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)"); 40 41 char igc_driver_name[] = "igc"; 42 static const char igc_driver_string[] = DRV_SUMMARY; 43 static const char igc_copyright[] = 44 "Copyright(c) 2018 Intel Corporation."; 45 46 static const struct igc_info *igc_info_tbl[] = { 47 [board_base] = &igc_base_info, 48 }; 49 50 static const struct pci_device_id igc_pci_tbl[] = { 51 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_LM), board_base }, 52 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_V), board_base }, 53 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_I), board_base }, 54 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I220_V), board_base }, 55 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_K), board_base }, 56 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_K2), board_base }, 57 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_K), board_base }, 58 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_LMVP), board_base }, 59 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_IT), board_base }, 60 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_LM), board_base }, 61 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_V), board_base }, 62 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_IT), board_base }, 63 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I221_V), board_base }, 64 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_BLANK_NVM), board_base }, 65 { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_BLANK_NVM), board_base }, 66 /* required last entry */ 67 {0, } 68 }; 69 70 MODULE_DEVICE_TABLE(pci, igc_pci_tbl); 71 72 enum latency_range { 73 lowest_latency = 0, 74 low_latency = 1, 75 bulk_latency = 2, 76 latency_invalid = 255 77 }; 78 79 void igc_reset(struct igc_adapter *adapter) 80 { 81 struct net_device *dev = adapter->netdev; 82 struct igc_hw *hw = &adapter->hw; 83 struct igc_fc_info *fc = &hw->fc; 84 u32 pba, hwm; 85 86 /* Repartition PBA for greater than 9k MTU if required */ 87 pba = IGC_PBA_34K; 88 89 /* flow control settings 90 * The high water mark must be low enough to fit one full frame 91 * after transmitting the pause frame. As such we must have enough 92 * space to allow for us to complete our current transmit and then 93 * receive the frame that is in progress from the link partner. 94 * Set it to: 95 * - the full Rx FIFO size minus one full Tx plus one full Rx frame 96 */ 97 hwm = (pba << 10) - (adapter->max_frame_size + MAX_JUMBO_FRAME_SIZE); 98 99 fc->high_water = hwm & 0xFFFFFFF0; /* 16-byte granularity */ 100 fc->low_water = fc->high_water - 16; 101 fc->pause_time = 0xFFFF; 102 fc->send_xon = 1; 103 fc->current_mode = fc->requested_mode; 104 105 hw->mac.ops.reset_hw(hw); 106 107 if (hw->mac.ops.init_hw(hw)) 108 netdev_err(dev, "Error on hardware initialization\n"); 109 110 /* Re-establish EEE setting */ 111 igc_set_eee_i225(hw, true, true, true); 112 113 if (!netif_running(adapter->netdev)) 114 igc_power_down_phy_copper_base(&adapter->hw); 115 116 /* Enable HW to recognize an 802.1Q VLAN Ethernet packet */ 117 wr32(IGC_VET, ETH_P_8021Q); 118 119 /* Re-enable PTP, where applicable. */ 120 igc_ptp_reset(adapter); 121 122 /* Re-enable TSN offloading, where applicable. */ 123 igc_tsn_offload_apply(adapter); 124 125 igc_get_phy_info(hw); 126 } 127 128 /** 129 * igc_power_up_link - Power up the phy link 130 * @adapter: address of board private structure 131 */ 132 static void igc_power_up_link(struct igc_adapter *adapter) 133 { 134 igc_reset_phy(&adapter->hw); 135 136 igc_power_up_phy_copper(&adapter->hw); 137 138 igc_setup_link(&adapter->hw); 139 } 140 141 /** 142 * igc_release_hw_control - release control of the h/w to f/w 143 * @adapter: address of board private structure 144 * 145 * igc_release_hw_control resets CTRL_EXT:DRV_LOAD bit. 146 * For ASF and Pass Through versions of f/w this means that the 147 * driver is no longer loaded. 148 */ 149 static void igc_release_hw_control(struct igc_adapter *adapter) 150 { 151 struct igc_hw *hw = &adapter->hw; 152 u32 ctrl_ext; 153 154 /* Let firmware take over control of h/w */ 155 ctrl_ext = rd32(IGC_CTRL_EXT); 156 wr32(IGC_CTRL_EXT, 157 ctrl_ext & ~IGC_CTRL_EXT_DRV_LOAD); 158 } 159 160 /** 161 * igc_get_hw_control - get control of the h/w from f/w 162 * @adapter: address of board private structure 163 * 164 * igc_get_hw_control sets CTRL_EXT:DRV_LOAD bit. 165 * For ASF and Pass Through versions of f/w this means that 166 * the driver is loaded. 167 */ 168 static void igc_get_hw_control(struct igc_adapter *adapter) 169 { 170 struct igc_hw *hw = &adapter->hw; 171 u32 ctrl_ext; 172 173 /* Let firmware know the driver has taken over */ 174 ctrl_ext = rd32(IGC_CTRL_EXT); 175 wr32(IGC_CTRL_EXT, 176 ctrl_ext | IGC_CTRL_EXT_DRV_LOAD); 177 } 178 179 static void igc_unmap_tx_buffer(struct device *dev, struct igc_tx_buffer *buf) 180 { 181 dma_unmap_single(dev, dma_unmap_addr(buf, dma), 182 dma_unmap_len(buf, len), DMA_TO_DEVICE); 183 184 dma_unmap_len_set(buf, len, 0); 185 } 186 187 /** 188 * igc_clean_tx_ring - Free Tx Buffers 189 * @tx_ring: ring to be cleaned 190 */ 191 static void igc_clean_tx_ring(struct igc_ring *tx_ring) 192 { 193 u16 i = tx_ring->next_to_clean; 194 struct igc_tx_buffer *tx_buffer = &tx_ring->tx_buffer_info[i]; 195 u32 xsk_frames = 0; 196 197 while (i != tx_ring->next_to_use) { 198 union igc_adv_tx_desc *eop_desc, *tx_desc; 199 200 switch (tx_buffer->type) { 201 case IGC_TX_BUFFER_TYPE_XSK: 202 xsk_frames++; 203 break; 204 case IGC_TX_BUFFER_TYPE_XDP: 205 xdp_return_frame(tx_buffer->xdpf); 206 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 207 break; 208 case IGC_TX_BUFFER_TYPE_SKB: 209 dev_kfree_skb_any(tx_buffer->skb); 210 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 211 break; 212 default: 213 netdev_warn_once(tx_ring->netdev, "Unknown Tx buffer type\n"); 214 break; 215 } 216 217 /* check for eop_desc to determine the end of the packet */ 218 eop_desc = tx_buffer->next_to_watch; 219 tx_desc = IGC_TX_DESC(tx_ring, i); 220 221 /* unmap remaining buffers */ 222 while (tx_desc != eop_desc) { 223 tx_buffer++; 224 tx_desc++; 225 i++; 226 if (unlikely(i == tx_ring->count)) { 227 i = 0; 228 tx_buffer = tx_ring->tx_buffer_info; 229 tx_desc = IGC_TX_DESC(tx_ring, 0); 230 } 231 232 /* unmap any remaining paged data */ 233 if (dma_unmap_len(tx_buffer, len)) 234 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 235 } 236 237 tx_buffer->next_to_watch = NULL; 238 239 /* move us one more past the eop_desc for start of next pkt */ 240 tx_buffer++; 241 i++; 242 if (unlikely(i == tx_ring->count)) { 243 i = 0; 244 tx_buffer = tx_ring->tx_buffer_info; 245 } 246 } 247 248 if (tx_ring->xsk_pool && xsk_frames) 249 xsk_tx_completed(tx_ring->xsk_pool, xsk_frames); 250 251 /* reset BQL for queue */ 252 netdev_tx_reset_queue(txring_txq(tx_ring)); 253 254 /* reset next_to_use and next_to_clean */ 255 tx_ring->next_to_use = 0; 256 tx_ring->next_to_clean = 0; 257 } 258 259 /** 260 * igc_free_tx_resources - Free Tx Resources per Queue 261 * @tx_ring: Tx descriptor ring for a specific queue 262 * 263 * Free all transmit software resources 264 */ 265 void igc_free_tx_resources(struct igc_ring *tx_ring) 266 { 267 igc_clean_tx_ring(tx_ring); 268 269 vfree(tx_ring->tx_buffer_info); 270 tx_ring->tx_buffer_info = NULL; 271 272 /* if not set, then don't free */ 273 if (!tx_ring->desc) 274 return; 275 276 dma_free_coherent(tx_ring->dev, tx_ring->size, 277 tx_ring->desc, tx_ring->dma); 278 279 tx_ring->desc = NULL; 280 } 281 282 /** 283 * igc_free_all_tx_resources - Free Tx Resources for All Queues 284 * @adapter: board private structure 285 * 286 * Free all transmit software resources 287 */ 288 static void igc_free_all_tx_resources(struct igc_adapter *adapter) 289 { 290 int i; 291 292 for (i = 0; i < adapter->num_tx_queues; i++) 293 igc_free_tx_resources(adapter->tx_ring[i]); 294 } 295 296 /** 297 * igc_clean_all_tx_rings - Free Tx Buffers for all queues 298 * @adapter: board private structure 299 */ 300 static void igc_clean_all_tx_rings(struct igc_adapter *adapter) 301 { 302 int i; 303 304 for (i = 0; i < adapter->num_tx_queues; i++) 305 if (adapter->tx_ring[i]) 306 igc_clean_tx_ring(adapter->tx_ring[i]); 307 } 308 309 /** 310 * igc_setup_tx_resources - allocate Tx resources (Descriptors) 311 * @tx_ring: tx descriptor ring (for a specific queue) to setup 312 * 313 * Return 0 on success, negative on failure 314 */ 315 int igc_setup_tx_resources(struct igc_ring *tx_ring) 316 { 317 struct net_device *ndev = tx_ring->netdev; 318 struct device *dev = tx_ring->dev; 319 int size = 0; 320 321 size = sizeof(struct igc_tx_buffer) * tx_ring->count; 322 tx_ring->tx_buffer_info = vzalloc(size); 323 if (!tx_ring->tx_buffer_info) 324 goto err; 325 326 /* round up to nearest 4K */ 327 tx_ring->size = tx_ring->count * sizeof(union igc_adv_tx_desc); 328 tx_ring->size = ALIGN(tx_ring->size, 4096); 329 330 tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size, 331 &tx_ring->dma, GFP_KERNEL); 332 333 if (!tx_ring->desc) 334 goto err; 335 336 tx_ring->next_to_use = 0; 337 tx_ring->next_to_clean = 0; 338 339 return 0; 340 341 err: 342 vfree(tx_ring->tx_buffer_info); 343 netdev_err(ndev, "Unable to allocate memory for Tx descriptor ring\n"); 344 return -ENOMEM; 345 } 346 347 /** 348 * igc_setup_all_tx_resources - wrapper to allocate Tx resources for all queues 349 * @adapter: board private structure 350 * 351 * Return 0 on success, negative on failure 352 */ 353 static int igc_setup_all_tx_resources(struct igc_adapter *adapter) 354 { 355 struct net_device *dev = adapter->netdev; 356 int i, err = 0; 357 358 for (i = 0; i < adapter->num_tx_queues; i++) { 359 err = igc_setup_tx_resources(adapter->tx_ring[i]); 360 if (err) { 361 netdev_err(dev, "Error on Tx queue %u setup\n", i); 362 for (i--; i >= 0; i--) 363 igc_free_tx_resources(adapter->tx_ring[i]); 364 break; 365 } 366 } 367 368 return err; 369 } 370 371 static void igc_clean_rx_ring_page_shared(struct igc_ring *rx_ring) 372 { 373 u16 i = rx_ring->next_to_clean; 374 375 dev_kfree_skb(rx_ring->skb); 376 rx_ring->skb = NULL; 377 378 /* Free all the Rx ring sk_buffs */ 379 while (i != rx_ring->next_to_alloc) { 380 struct igc_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i]; 381 382 /* Invalidate cache lines that may have been written to by 383 * device so that we avoid corrupting memory. 384 */ 385 dma_sync_single_range_for_cpu(rx_ring->dev, 386 buffer_info->dma, 387 buffer_info->page_offset, 388 igc_rx_bufsz(rx_ring), 389 DMA_FROM_DEVICE); 390 391 /* free resources associated with mapping */ 392 dma_unmap_page_attrs(rx_ring->dev, 393 buffer_info->dma, 394 igc_rx_pg_size(rx_ring), 395 DMA_FROM_DEVICE, 396 IGC_RX_DMA_ATTR); 397 __page_frag_cache_drain(buffer_info->page, 398 buffer_info->pagecnt_bias); 399 400 i++; 401 if (i == rx_ring->count) 402 i = 0; 403 } 404 } 405 406 static void igc_clean_rx_ring_xsk_pool(struct igc_ring *ring) 407 { 408 struct igc_rx_buffer *bi; 409 u16 i; 410 411 for (i = 0; i < ring->count; i++) { 412 bi = &ring->rx_buffer_info[i]; 413 if (!bi->xdp) 414 continue; 415 416 xsk_buff_free(bi->xdp); 417 bi->xdp = NULL; 418 } 419 } 420 421 /** 422 * igc_clean_rx_ring - Free Rx Buffers per Queue 423 * @ring: ring to free buffers from 424 */ 425 static void igc_clean_rx_ring(struct igc_ring *ring) 426 { 427 if (ring->xsk_pool) 428 igc_clean_rx_ring_xsk_pool(ring); 429 else 430 igc_clean_rx_ring_page_shared(ring); 431 432 clear_ring_uses_large_buffer(ring); 433 434 ring->next_to_alloc = 0; 435 ring->next_to_clean = 0; 436 ring->next_to_use = 0; 437 } 438 439 /** 440 * igc_clean_all_rx_rings - Free Rx Buffers for all queues 441 * @adapter: board private structure 442 */ 443 static void igc_clean_all_rx_rings(struct igc_adapter *adapter) 444 { 445 int i; 446 447 for (i = 0; i < adapter->num_rx_queues; i++) 448 if (adapter->rx_ring[i]) 449 igc_clean_rx_ring(adapter->rx_ring[i]); 450 } 451 452 /** 453 * igc_free_rx_resources - Free Rx Resources 454 * @rx_ring: ring to clean the resources from 455 * 456 * Free all receive software resources 457 */ 458 void igc_free_rx_resources(struct igc_ring *rx_ring) 459 { 460 igc_clean_rx_ring(rx_ring); 461 462 xdp_rxq_info_unreg(&rx_ring->xdp_rxq); 463 464 vfree(rx_ring->rx_buffer_info); 465 rx_ring->rx_buffer_info = NULL; 466 467 /* if not set, then don't free */ 468 if (!rx_ring->desc) 469 return; 470 471 dma_free_coherent(rx_ring->dev, rx_ring->size, 472 rx_ring->desc, rx_ring->dma); 473 474 rx_ring->desc = NULL; 475 } 476 477 /** 478 * igc_free_all_rx_resources - Free Rx Resources for All Queues 479 * @adapter: board private structure 480 * 481 * Free all receive software resources 482 */ 483 static void igc_free_all_rx_resources(struct igc_adapter *adapter) 484 { 485 int i; 486 487 for (i = 0; i < adapter->num_rx_queues; i++) 488 igc_free_rx_resources(adapter->rx_ring[i]); 489 } 490 491 /** 492 * igc_setup_rx_resources - allocate Rx resources (Descriptors) 493 * @rx_ring: rx descriptor ring (for a specific queue) to setup 494 * 495 * Returns 0 on success, negative on failure 496 */ 497 int igc_setup_rx_resources(struct igc_ring *rx_ring) 498 { 499 struct net_device *ndev = rx_ring->netdev; 500 struct device *dev = rx_ring->dev; 501 u8 index = rx_ring->queue_index; 502 int size, desc_len, res; 503 504 res = xdp_rxq_info_reg(&rx_ring->xdp_rxq, ndev, index, 505 rx_ring->q_vector->napi.napi_id); 506 if (res < 0) { 507 netdev_err(ndev, "Failed to register xdp_rxq index %u\n", 508 index); 509 return res; 510 } 511 512 size = sizeof(struct igc_rx_buffer) * rx_ring->count; 513 rx_ring->rx_buffer_info = vzalloc(size); 514 if (!rx_ring->rx_buffer_info) 515 goto err; 516 517 desc_len = sizeof(union igc_adv_rx_desc); 518 519 /* Round up to nearest 4K */ 520 rx_ring->size = rx_ring->count * desc_len; 521 rx_ring->size = ALIGN(rx_ring->size, 4096); 522 523 rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size, 524 &rx_ring->dma, GFP_KERNEL); 525 526 if (!rx_ring->desc) 527 goto err; 528 529 rx_ring->next_to_alloc = 0; 530 rx_ring->next_to_clean = 0; 531 rx_ring->next_to_use = 0; 532 533 return 0; 534 535 err: 536 xdp_rxq_info_unreg(&rx_ring->xdp_rxq); 537 vfree(rx_ring->rx_buffer_info); 538 rx_ring->rx_buffer_info = NULL; 539 netdev_err(ndev, "Unable to allocate memory for Rx descriptor ring\n"); 540 return -ENOMEM; 541 } 542 543 /** 544 * igc_setup_all_rx_resources - wrapper to allocate Rx resources 545 * (Descriptors) for all queues 546 * @adapter: board private structure 547 * 548 * Return 0 on success, negative on failure 549 */ 550 static int igc_setup_all_rx_resources(struct igc_adapter *adapter) 551 { 552 struct net_device *dev = adapter->netdev; 553 int i, err = 0; 554 555 for (i = 0; i < adapter->num_rx_queues; i++) { 556 err = igc_setup_rx_resources(adapter->rx_ring[i]); 557 if (err) { 558 netdev_err(dev, "Error on Rx queue %u setup\n", i); 559 for (i--; i >= 0; i--) 560 igc_free_rx_resources(adapter->rx_ring[i]); 561 break; 562 } 563 } 564 565 return err; 566 } 567 568 static struct xsk_buff_pool *igc_get_xsk_pool(struct igc_adapter *adapter, 569 struct igc_ring *ring) 570 { 571 if (!igc_xdp_is_enabled(adapter) || 572 !test_bit(IGC_RING_FLAG_AF_XDP_ZC, &ring->flags)) 573 return NULL; 574 575 return xsk_get_pool_from_qid(ring->netdev, ring->queue_index); 576 } 577 578 /** 579 * igc_configure_rx_ring - Configure a receive ring after Reset 580 * @adapter: board private structure 581 * @ring: receive ring to be configured 582 * 583 * Configure the Rx unit of the MAC after a reset. 584 */ 585 static void igc_configure_rx_ring(struct igc_adapter *adapter, 586 struct igc_ring *ring) 587 { 588 struct igc_hw *hw = &adapter->hw; 589 union igc_adv_rx_desc *rx_desc; 590 int reg_idx = ring->reg_idx; 591 u32 srrctl = 0, rxdctl = 0; 592 u64 rdba = ring->dma; 593 u32 buf_size; 594 595 xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq); 596 ring->xsk_pool = igc_get_xsk_pool(adapter, ring); 597 if (ring->xsk_pool) { 598 WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, 599 MEM_TYPE_XSK_BUFF_POOL, 600 NULL)); 601 xsk_pool_set_rxq_info(ring->xsk_pool, &ring->xdp_rxq); 602 } else { 603 WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, 604 MEM_TYPE_PAGE_SHARED, 605 NULL)); 606 } 607 608 if (igc_xdp_is_enabled(adapter)) 609 set_ring_uses_large_buffer(ring); 610 611 /* disable the queue */ 612 wr32(IGC_RXDCTL(reg_idx), 0); 613 614 /* Set DMA base address registers */ 615 wr32(IGC_RDBAL(reg_idx), 616 rdba & 0x00000000ffffffffULL); 617 wr32(IGC_RDBAH(reg_idx), rdba >> 32); 618 wr32(IGC_RDLEN(reg_idx), 619 ring->count * sizeof(union igc_adv_rx_desc)); 620 621 /* initialize head and tail */ 622 ring->tail = adapter->io_addr + IGC_RDT(reg_idx); 623 wr32(IGC_RDH(reg_idx), 0); 624 writel(0, ring->tail); 625 626 /* reset next-to- use/clean to place SW in sync with hardware */ 627 ring->next_to_clean = 0; 628 ring->next_to_use = 0; 629 630 if (ring->xsk_pool) 631 buf_size = xsk_pool_get_rx_frame_size(ring->xsk_pool); 632 else if (ring_uses_large_buffer(ring)) 633 buf_size = IGC_RXBUFFER_3072; 634 else 635 buf_size = IGC_RXBUFFER_2048; 636 637 srrctl = IGC_RX_HDR_LEN << IGC_SRRCTL_BSIZEHDRSIZE_SHIFT; 638 srrctl |= buf_size >> IGC_SRRCTL_BSIZEPKT_SHIFT; 639 srrctl |= IGC_SRRCTL_DESCTYPE_ADV_ONEBUF; 640 641 wr32(IGC_SRRCTL(reg_idx), srrctl); 642 643 rxdctl |= IGC_RX_PTHRESH; 644 rxdctl |= IGC_RX_HTHRESH << 8; 645 rxdctl |= IGC_RX_WTHRESH << 16; 646 647 /* initialize rx_buffer_info */ 648 memset(ring->rx_buffer_info, 0, 649 sizeof(struct igc_rx_buffer) * ring->count); 650 651 /* initialize Rx descriptor 0 */ 652 rx_desc = IGC_RX_DESC(ring, 0); 653 rx_desc->wb.upper.length = 0; 654 655 /* enable receive descriptor fetching */ 656 rxdctl |= IGC_RXDCTL_QUEUE_ENABLE; 657 658 wr32(IGC_RXDCTL(reg_idx), rxdctl); 659 } 660 661 /** 662 * igc_configure_rx - Configure receive Unit after Reset 663 * @adapter: board private structure 664 * 665 * Configure the Rx unit of the MAC after a reset. 666 */ 667 static void igc_configure_rx(struct igc_adapter *adapter) 668 { 669 int i; 670 671 /* Setup the HW Rx Head and Tail Descriptor Pointers and 672 * the Base and Length of the Rx Descriptor Ring 673 */ 674 for (i = 0; i < adapter->num_rx_queues; i++) 675 igc_configure_rx_ring(adapter, adapter->rx_ring[i]); 676 } 677 678 /** 679 * igc_configure_tx_ring - Configure transmit ring after Reset 680 * @adapter: board private structure 681 * @ring: tx ring to configure 682 * 683 * Configure a transmit ring after a reset. 684 */ 685 static void igc_configure_tx_ring(struct igc_adapter *adapter, 686 struct igc_ring *ring) 687 { 688 struct igc_hw *hw = &adapter->hw; 689 int reg_idx = ring->reg_idx; 690 u64 tdba = ring->dma; 691 u32 txdctl = 0; 692 693 ring->xsk_pool = igc_get_xsk_pool(adapter, ring); 694 695 /* disable the queue */ 696 wr32(IGC_TXDCTL(reg_idx), 0); 697 wrfl(); 698 mdelay(10); 699 700 wr32(IGC_TDLEN(reg_idx), 701 ring->count * sizeof(union igc_adv_tx_desc)); 702 wr32(IGC_TDBAL(reg_idx), 703 tdba & 0x00000000ffffffffULL); 704 wr32(IGC_TDBAH(reg_idx), tdba >> 32); 705 706 ring->tail = adapter->io_addr + IGC_TDT(reg_idx); 707 wr32(IGC_TDH(reg_idx), 0); 708 writel(0, ring->tail); 709 710 txdctl |= IGC_TX_PTHRESH; 711 txdctl |= IGC_TX_HTHRESH << 8; 712 txdctl |= IGC_TX_WTHRESH << 16; 713 714 txdctl |= IGC_TXDCTL_QUEUE_ENABLE; 715 wr32(IGC_TXDCTL(reg_idx), txdctl); 716 } 717 718 /** 719 * igc_configure_tx - Configure transmit Unit after Reset 720 * @adapter: board private structure 721 * 722 * Configure the Tx unit of the MAC after a reset. 723 */ 724 static void igc_configure_tx(struct igc_adapter *adapter) 725 { 726 int i; 727 728 for (i = 0; i < adapter->num_tx_queues; i++) 729 igc_configure_tx_ring(adapter, adapter->tx_ring[i]); 730 } 731 732 /** 733 * igc_setup_mrqc - configure the multiple receive queue control registers 734 * @adapter: Board private structure 735 */ 736 static void igc_setup_mrqc(struct igc_adapter *adapter) 737 { 738 struct igc_hw *hw = &adapter->hw; 739 u32 j, num_rx_queues; 740 u32 mrqc, rxcsum; 741 u32 rss_key[10]; 742 743 netdev_rss_key_fill(rss_key, sizeof(rss_key)); 744 for (j = 0; j < 10; j++) 745 wr32(IGC_RSSRK(j), rss_key[j]); 746 747 num_rx_queues = adapter->rss_queues; 748 749 if (adapter->rss_indir_tbl_init != num_rx_queues) { 750 for (j = 0; j < IGC_RETA_SIZE; j++) 751 adapter->rss_indir_tbl[j] = 752 (j * num_rx_queues) / IGC_RETA_SIZE; 753 adapter->rss_indir_tbl_init = num_rx_queues; 754 } 755 igc_write_rss_indir_tbl(adapter); 756 757 /* Disable raw packet checksumming so that RSS hash is placed in 758 * descriptor on writeback. No need to enable TCP/UDP/IP checksum 759 * offloads as they are enabled by default 760 */ 761 rxcsum = rd32(IGC_RXCSUM); 762 rxcsum |= IGC_RXCSUM_PCSD; 763 764 /* Enable Receive Checksum Offload for SCTP */ 765 rxcsum |= IGC_RXCSUM_CRCOFL; 766 767 /* Don't need to set TUOFL or IPOFL, they default to 1 */ 768 wr32(IGC_RXCSUM, rxcsum); 769 770 /* Generate RSS hash based on packet types, TCP/UDP 771 * port numbers and/or IPv4/v6 src and dst addresses 772 */ 773 mrqc = IGC_MRQC_RSS_FIELD_IPV4 | 774 IGC_MRQC_RSS_FIELD_IPV4_TCP | 775 IGC_MRQC_RSS_FIELD_IPV6 | 776 IGC_MRQC_RSS_FIELD_IPV6_TCP | 777 IGC_MRQC_RSS_FIELD_IPV6_TCP_EX; 778 779 if (adapter->flags & IGC_FLAG_RSS_FIELD_IPV4_UDP) 780 mrqc |= IGC_MRQC_RSS_FIELD_IPV4_UDP; 781 if (adapter->flags & IGC_FLAG_RSS_FIELD_IPV6_UDP) 782 mrqc |= IGC_MRQC_RSS_FIELD_IPV6_UDP; 783 784 mrqc |= IGC_MRQC_ENABLE_RSS_MQ; 785 786 wr32(IGC_MRQC, mrqc); 787 } 788 789 /** 790 * igc_setup_rctl - configure the receive control registers 791 * @adapter: Board private structure 792 */ 793 static void igc_setup_rctl(struct igc_adapter *adapter) 794 { 795 struct igc_hw *hw = &adapter->hw; 796 u32 rctl; 797 798 rctl = rd32(IGC_RCTL); 799 800 rctl &= ~(3 << IGC_RCTL_MO_SHIFT); 801 rctl &= ~(IGC_RCTL_LBM_TCVR | IGC_RCTL_LBM_MAC); 802 803 rctl |= IGC_RCTL_EN | IGC_RCTL_BAM | IGC_RCTL_RDMTS_HALF | 804 (hw->mac.mc_filter_type << IGC_RCTL_MO_SHIFT); 805 806 /* enable stripping of CRC. Newer features require 807 * that the HW strips the CRC. 808 */ 809 rctl |= IGC_RCTL_SECRC; 810 811 /* disable store bad packets and clear size bits. */ 812 rctl &= ~(IGC_RCTL_SBP | IGC_RCTL_SZ_256); 813 814 /* enable LPE to allow for reception of jumbo frames */ 815 rctl |= IGC_RCTL_LPE; 816 817 /* disable queue 0 to prevent tail write w/o re-config */ 818 wr32(IGC_RXDCTL(0), 0); 819 820 /* This is useful for sniffing bad packets. */ 821 if (adapter->netdev->features & NETIF_F_RXALL) { 822 /* UPE and MPE will be handled by normal PROMISC logic 823 * in set_rx_mode 824 */ 825 rctl |= (IGC_RCTL_SBP | /* Receive bad packets */ 826 IGC_RCTL_BAM | /* RX All Bcast Pkts */ 827 IGC_RCTL_PMCF); /* RX All MAC Ctrl Pkts */ 828 829 rctl &= ~(IGC_RCTL_DPF | /* Allow filtered pause */ 830 IGC_RCTL_CFIEN); /* Disable VLAN CFIEN Filter */ 831 } 832 833 wr32(IGC_RCTL, rctl); 834 } 835 836 /** 837 * igc_setup_tctl - configure the transmit control registers 838 * @adapter: Board private structure 839 */ 840 static void igc_setup_tctl(struct igc_adapter *adapter) 841 { 842 struct igc_hw *hw = &adapter->hw; 843 u32 tctl; 844 845 /* disable queue 0 which icould be enabled by default */ 846 wr32(IGC_TXDCTL(0), 0); 847 848 /* Program the Transmit Control Register */ 849 tctl = rd32(IGC_TCTL); 850 tctl &= ~IGC_TCTL_CT; 851 tctl |= IGC_TCTL_PSP | IGC_TCTL_RTLC | 852 (IGC_COLLISION_THRESHOLD << IGC_CT_SHIFT); 853 854 /* Enable transmits */ 855 tctl |= IGC_TCTL_EN; 856 857 wr32(IGC_TCTL, tctl); 858 } 859 860 /** 861 * igc_set_mac_filter_hw() - Set MAC address filter in hardware 862 * @adapter: Pointer to adapter where the filter should be set 863 * @index: Filter index 864 * @type: MAC address filter type (source or destination) 865 * @addr: MAC address 866 * @queue: If non-negative, queue assignment feature is enabled and frames 867 * matching the filter are enqueued onto 'queue'. Otherwise, queue 868 * assignment is disabled. 869 */ 870 static void igc_set_mac_filter_hw(struct igc_adapter *adapter, int index, 871 enum igc_mac_filter_type type, 872 const u8 *addr, int queue) 873 { 874 struct net_device *dev = adapter->netdev; 875 struct igc_hw *hw = &adapter->hw; 876 u32 ral, rah; 877 878 if (WARN_ON(index >= hw->mac.rar_entry_count)) 879 return; 880 881 ral = le32_to_cpup((__le32 *)(addr)); 882 rah = le16_to_cpup((__le16 *)(addr + 4)); 883 884 if (type == IGC_MAC_FILTER_TYPE_SRC) { 885 rah &= ~IGC_RAH_ASEL_MASK; 886 rah |= IGC_RAH_ASEL_SRC_ADDR; 887 } 888 889 if (queue >= 0) { 890 rah &= ~IGC_RAH_QSEL_MASK; 891 rah |= (queue << IGC_RAH_QSEL_SHIFT); 892 rah |= IGC_RAH_QSEL_ENABLE; 893 } 894 895 rah |= IGC_RAH_AV; 896 897 wr32(IGC_RAL(index), ral); 898 wr32(IGC_RAH(index), rah); 899 900 netdev_dbg(dev, "MAC address filter set in HW: index %d", index); 901 } 902 903 /** 904 * igc_clear_mac_filter_hw() - Clear MAC address filter in hardware 905 * @adapter: Pointer to adapter where the filter should be cleared 906 * @index: Filter index 907 */ 908 static void igc_clear_mac_filter_hw(struct igc_adapter *adapter, int index) 909 { 910 struct net_device *dev = adapter->netdev; 911 struct igc_hw *hw = &adapter->hw; 912 913 if (WARN_ON(index >= hw->mac.rar_entry_count)) 914 return; 915 916 wr32(IGC_RAL(index), 0); 917 wr32(IGC_RAH(index), 0); 918 919 netdev_dbg(dev, "MAC address filter cleared in HW: index %d", index); 920 } 921 922 /* Set default MAC address for the PF in the first RAR entry */ 923 static void igc_set_default_mac_filter(struct igc_adapter *adapter) 924 { 925 struct net_device *dev = adapter->netdev; 926 u8 *addr = adapter->hw.mac.addr; 927 928 netdev_dbg(dev, "Set default MAC address filter: address %pM", addr); 929 930 igc_set_mac_filter_hw(adapter, 0, IGC_MAC_FILTER_TYPE_DST, addr, -1); 931 } 932 933 /** 934 * igc_set_mac - Change the Ethernet Address of the NIC 935 * @netdev: network interface device structure 936 * @p: pointer to an address structure 937 * 938 * Returns 0 on success, negative on failure 939 */ 940 static int igc_set_mac(struct net_device *netdev, void *p) 941 { 942 struct igc_adapter *adapter = netdev_priv(netdev); 943 struct igc_hw *hw = &adapter->hw; 944 struct sockaddr *addr = p; 945 946 if (!is_valid_ether_addr(addr->sa_data)) 947 return -EADDRNOTAVAIL; 948 949 memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); 950 memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len); 951 952 /* set the correct pool for the new PF MAC address in entry 0 */ 953 igc_set_default_mac_filter(adapter); 954 955 return 0; 956 } 957 958 /** 959 * igc_write_mc_addr_list - write multicast addresses to MTA 960 * @netdev: network interface device structure 961 * 962 * Writes multicast address list to the MTA hash table. 963 * Returns: -ENOMEM on failure 964 * 0 on no addresses written 965 * X on writing X addresses to MTA 966 **/ 967 static int igc_write_mc_addr_list(struct net_device *netdev) 968 { 969 struct igc_adapter *adapter = netdev_priv(netdev); 970 struct igc_hw *hw = &adapter->hw; 971 struct netdev_hw_addr *ha; 972 u8 *mta_list; 973 int i; 974 975 if (netdev_mc_empty(netdev)) { 976 /* nothing to program, so clear mc list */ 977 igc_update_mc_addr_list(hw, NULL, 0); 978 return 0; 979 } 980 981 mta_list = kcalloc(netdev_mc_count(netdev), 6, GFP_ATOMIC); 982 if (!mta_list) 983 return -ENOMEM; 984 985 /* The shared function expects a packed array of only addresses. */ 986 i = 0; 987 netdev_for_each_mc_addr(ha, netdev) 988 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN); 989 990 igc_update_mc_addr_list(hw, mta_list, i); 991 kfree(mta_list); 992 993 return netdev_mc_count(netdev); 994 } 995 996 static __le32 igc_tx_launchtime(struct igc_adapter *adapter, ktime_t txtime) 997 { 998 ktime_t cycle_time = adapter->cycle_time; 999 ktime_t base_time = adapter->base_time; 1000 u32 launchtime; 1001 1002 /* FIXME: when using ETF together with taprio, we may have a 1003 * case where 'delta' is larger than the cycle_time, this may 1004 * cause problems if we don't read the current value of 1005 * IGC_BASET, as the value writen into the launchtime 1006 * descriptor field may be misinterpreted. 1007 */ 1008 div_s64_rem(ktime_sub_ns(txtime, base_time), cycle_time, &launchtime); 1009 1010 return cpu_to_le32(launchtime); 1011 } 1012 1013 static void igc_tx_ctxtdesc(struct igc_ring *tx_ring, 1014 struct igc_tx_buffer *first, 1015 u32 vlan_macip_lens, u32 type_tucmd, 1016 u32 mss_l4len_idx) 1017 { 1018 struct igc_adv_tx_context_desc *context_desc; 1019 u16 i = tx_ring->next_to_use; 1020 1021 context_desc = IGC_TX_CTXTDESC(tx_ring, i); 1022 1023 i++; 1024 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; 1025 1026 /* set bits to identify this as an advanced context descriptor */ 1027 type_tucmd |= IGC_TXD_CMD_DEXT | IGC_ADVTXD_DTYP_CTXT; 1028 1029 /* For i225, context index must be unique per ring. */ 1030 if (test_bit(IGC_RING_FLAG_TX_CTX_IDX, &tx_ring->flags)) 1031 mss_l4len_idx |= tx_ring->reg_idx << 4; 1032 1033 context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens); 1034 context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd); 1035 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx); 1036 1037 /* We assume there is always a valid Tx time available. Invalid times 1038 * should have been handled by the upper layers. 1039 */ 1040 if (tx_ring->launchtime_enable) { 1041 struct igc_adapter *adapter = netdev_priv(tx_ring->netdev); 1042 ktime_t txtime = first->skb->tstamp; 1043 1044 skb_txtime_consumed(first->skb); 1045 context_desc->launch_time = igc_tx_launchtime(adapter, 1046 txtime); 1047 } else { 1048 context_desc->launch_time = 0; 1049 } 1050 } 1051 1052 static void igc_tx_csum(struct igc_ring *tx_ring, struct igc_tx_buffer *first) 1053 { 1054 struct sk_buff *skb = first->skb; 1055 u32 vlan_macip_lens = 0; 1056 u32 type_tucmd = 0; 1057 1058 if (skb->ip_summed != CHECKSUM_PARTIAL) { 1059 csum_failed: 1060 if (!(first->tx_flags & IGC_TX_FLAGS_VLAN) && 1061 !tx_ring->launchtime_enable) 1062 return; 1063 goto no_csum; 1064 } 1065 1066 switch (skb->csum_offset) { 1067 case offsetof(struct tcphdr, check): 1068 type_tucmd = IGC_ADVTXD_TUCMD_L4T_TCP; 1069 fallthrough; 1070 case offsetof(struct udphdr, check): 1071 break; 1072 case offsetof(struct sctphdr, checksum): 1073 /* validate that this is actually an SCTP request */ 1074 if (skb_csum_is_sctp(skb)) { 1075 type_tucmd = IGC_ADVTXD_TUCMD_L4T_SCTP; 1076 break; 1077 } 1078 fallthrough; 1079 default: 1080 skb_checksum_help(skb); 1081 goto csum_failed; 1082 } 1083 1084 /* update TX checksum flag */ 1085 first->tx_flags |= IGC_TX_FLAGS_CSUM; 1086 vlan_macip_lens = skb_checksum_start_offset(skb) - 1087 skb_network_offset(skb); 1088 no_csum: 1089 vlan_macip_lens |= skb_network_offset(skb) << IGC_ADVTXD_MACLEN_SHIFT; 1090 vlan_macip_lens |= first->tx_flags & IGC_TX_FLAGS_VLAN_MASK; 1091 1092 igc_tx_ctxtdesc(tx_ring, first, vlan_macip_lens, type_tucmd, 0); 1093 } 1094 1095 static int __igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size) 1096 { 1097 struct net_device *netdev = tx_ring->netdev; 1098 1099 netif_stop_subqueue(netdev, tx_ring->queue_index); 1100 1101 /* memory barriier comment */ 1102 smp_mb(); 1103 1104 /* We need to check again in a case another CPU has just 1105 * made room available. 1106 */ 1107 if (igc_desc_unused(tx_ring) < size) 1108 return -EBUSY; 1109 1110 /* A reprieve! */ 1111 netif_wake_subqueue(netdev, tx_ring->queue_index); 1112 1113 u64_stats_update_begin(&tx_ring->tx_syncp2); 1114 tx_ring->tx_stats.restart_queue2++; 1115 u64_stats_update_end(&tx_ring->tx_syncp2); 1116 1117 return 0; 1118 } 1119 1120 static inline int igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size) 1121 { 1122 if (igc_desc_unused(tx_ring) >= size) 1123 return 0; 1124 return __igc_maybe_stop_tx(tx_ring, size); 1125 } 1126 1127 #define IGC_SET_FLAG(_input, _flag, _result) \ 1128 (((_flag) <= (_result)) ? \ 1129 ((u32)((_input) & (_flag)) * ((_result) / (_flag))) : \ 1130 ((u32)((_input) & (_flag)) / ((_flag) / (_result)))) 1131 1132 static u32 igc_tx_cmd_type(struct sk_buff *skb, u32 tx_flags) 1133 { 1134 /* set type for advanced descriptor with frame checksum insertion */ 1135 u32 cmd_type = IGC_ADVTXD_DTYP_DATA | 1136 IGC_ADVTXD_DCMD_DEXT | 1137 IGC_ADVTXD_DCMD_IFCS; 1138 1139 /* set HW vlan bit if vlan is present */ 1140 cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_VLAN, 1141 IGC_ADVTXD_DCMD_VLE); 1142 1143 /* set segmentation bits for TSO */ 1144 cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSO, 1145 (IGC_ADVTXD_DCMD_TSE)); 1146 1147 /* set timestamp bit if present */ 1148 cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP, 1149 (IGC_ADVTXD_MAC_TSTAMP)); 1150 1151 /* insert frame checksum */ 1152 cmd_type ^= IGC_SET_FLAG(skb->no_fcs, 1, IGC_ADVTXD_DCMD_IFCS); 1153 1154 return cmd_type; 1155 } 1156 1157 static void igc_tx_olinfo_status(struct igc_ring *tx_ring, 1158 union igc_adv_tx_desc *tx_desc, 1159 u32 tx_flags, unsigned int paylen) 1160 { 1161 u32 olinfo_status = paylen << IGC_ADVTXD_PAYLEN_SHIFT; 1162 1163 /* insert L4 checksum */ 1164 olinfo_status |= (tx_flags & IGC_TX_FLAGS_CSUM) * 1165 ((IGC_TXD_POPTS_TXSM << 8) / 1166 IGC_TX_FLAGS_CSUM); 1167 1168 /* insert IPv4 checksum */ 1169 olinfo_status |= (tx_flags & IGC_TX_FLAGS_IPV4) * 1170 (((IGC_TXD_POPTS_IXSM << 8)) / 1171 IGC_TX_FLAGS_IPV4); 1172 1173 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status); 1174 } 1175 1176 static int igc_tx_map(struct igc_ring *tx_ring, 1177 struct igc_tx_buffer *first, 1178 const u8 hdr_len) 1179 { 1180 struct sk_buff *skb = first->skb; 1181 struct igc_tx_buffer *tx_buffer; 1182 union igc_adv_tx_desc *tx_desc; 1183 u32 tx_flags = first->tx_flags; 1184 skb_frag_t *frag; 1185 u16 i = tx_ring->next_to_use; 1186 unsigned int data_len, size; 1187 dma_addr_t dma; 1188 u32 cmd_type; 1189 1190 cmd_type = igc_tx_cmd_type(skb, tx_flags); 1191 tx_desc = IGC_TX_DESC(tx_ring, i); 1192 1193 igc_tx_olinfo_status(tx_ring, tx_desc, tx_flags, skb->len - hdr_len); 1194 1195 size = skb_headlen(skb); 1196 data_len = skb->data_len; 1197 1198 dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE); 1199 1200 tx_buffer = first; 1201 1202 for (frag = &skb_shinfo(skb)->frags[0];; frag++) { 1203 if (dma_mapping_error(tx_ring->dev, dma)) 1204 goto dma_error; 1205 1206 /* record length, and DMA address */ 1207 dma_unmap_len_set(tx_buffer, len, size); 1208 dma_unmap_addr_set(tx_buffer, dma, dma); 1209 1210 tx_desc->read.buffer_addr = cpu_to_le64(dma); 1211 1212 while (unlikely(size > IGC_MAX_DATA_PER_TXD)) { 1213 tx_desc->read.cmd_type_len = 1214 cpu_to_le32(cmd_type ^ IGC_MAX_DATA_PER_TXD); 1215 1216 i++; 1217 tx_desc++; 1218 if (i == tx_ring->count) { 1219 tx_desc = IGC_TX_DESC(tx_ring, 0); 1220 i = 0; 1221 } 1222 tx_desc->read.olinfo_status = 0; 1223 1224 dma += IGC_MAX_DATA_PER_TXD; 1225 size -= IGC_MAX_DATA_PER_TXD; 1226 1227 tx_desc->read.buffer_addr = cpu_to_le64(dma); 1228 } 1229 1230 if (likely(!data_len)) 1231 break; 1232 1233 tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type ^ size); 1234 1235 i++; 1236 tx_desc++; 1237 if (i == tx_ring->count) { 1238 tx_desc = IGC_TX_DESC(tx_ring, 0); 1239 i = 0; 1240 } 1241 tx_desc->read.olinfo_status = 0; 1242 1243 size = skb_frag_size(frag); 1244 data_len -= size; 1245 1246 dma = skb_frag_dma_map(tx_ring->dev, frag, 0, 1247 size, DMA_TO_DEVICE); 1248 1249 tx_buffer = &tx_ring->tx_buffer_info[i]; 1250 } 1251 1252 /* write last descriptor with RS and EOP bits */ 1253 cmd_type |= size | IGC_TXD_DCMD; 1254 tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); 1255 1256 netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount); 1257 1258 /* set the timestamp */ 1259 first->time_stamp = jiffies; 1260 1261 skb_tx_timestamp(skb); 1262 1263 /* Force memory writes to complete before letting h/w know there 1264 * are new descriptors to fetch. (Only applicable for weak-ordered 1265 * memory model archs, such as IA-64). 1266 * 1267 * We also need this memory barrier to make certain all of the 1268 * status bits have been updated before next_to_watch is written. 1269 */ 1270 wmb(); 1271 1272 /* set next_to_watch value indicating a packet is present */ 1273 first->next_to_watch = tx_desc; 1274 1275 i++; 1276 if (i == tx_ring->count) 1277 i = 0; 1278 1279 tx_ring->next_to_use = i; 1280 1281 /* Make sure there is space in the ring for the next send. */ 1282 igc_maybe_stop_tx(tx_ring, DESC_NEEDED); 1283 1284 if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more()) { 1285 writel(i, tx_ring->tail); 1286 } 1287 1288 return 0; 1289 dma_error: 1290 netdev_err(tx_ring->netdev, "TX DMA map failed\n"); 1291 tx_buffer = &tx_ring->tx_buffer_info[i]; 1292 1293 /* clear dma mappings for failed tx_buffer_info map */ 1294 while (tx_buffer != first) { 1295 if (dma_unmap_len(tx_buffer, len)) 1296 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 1297 1298 if (i-- == 0) 1299 i += tx_ring->count; 1300 tx_buffer = &tx_ring->tx_buffer_info[i]; 1301 } 1302 1303 if (dma_unmap_len(tx_buffer, len)) 1304 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 1305 1306 dev_kfree_skb_any(tx_buffer->skb); 1307 tx_buffer->skb = NULL; 1308 1309 tx_ring->next_to_use = i; 1310 1311 return -1; 1312 } 1313 1314 static int igc_tso(struct igc_ring *tx_ring, 1315 struct igc_tx_buffer *first, 1316 u8 *hdr_len) 1317 { 1318 u32 vlan_macip_lens, type_tucmd, mss_l4len_idx; 1319 struct sk_buff *skb = first->skb; 1320 union { 1321 struct iphdr *v4; 1322 struct ipv6hdr *v6; 1323 unsigned char *hdr; 1324 } ip; 1325 union { 1326 struct tcphdr *tcp; 1327 struct udphdr *udp; 1328 unsigned char *hdr; 1329 } l4; 1330 u32 paylen, l4_offset; 1331 int err; 1332 1333 if (skb->ip_summed != CHECKSUM_PARTIAL) 1334 return 0; 1335 1336 if (!skb_is_gso(skb)) 1337 return 0; 1338 1339 err = skb_cow_head(skb, 0); 1340 if (err < 0) 1341 return err; 1342 1343 ip.hdr = skb_network_header(skb); 1344 l4.hdr = skb_checksum_start(skb); 1345 1346 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */ 1347 type_tucmd = IGC_ADVTXD_TUCMD_L4T_TCP; 1348 1349 /* initialize outer IP header fields */ 1350 if (ip.v4->version == 4) { 1351 unsigned char *csum_start = skb_checksum_start(skb); 1352 unsigned char *trans_start = ip.hdr + (ip.v4->ihl * 4); 1353 1354 /* IP header will have to cancel out any data that 1355 * is not a part of the outer IP header 1356 */ 1357 ip.v4->check = csum_fold(csum_partial(trans_start, 1358 csum_start - trans_start, 1359 0)); 1360 type_tucmd |= IGC_ADVTXD_TUCMD_IPV4; 1361 1362 ip.v4->tot_len = 0; 1363 first->tx_flags |= IGC_TX_FLAGS_TSO | 1364 IGC_TX_FLAGS_CSUM | 1365 IGC_TX_FLAGS_IPV4; 1366 } else { 1367 ip.v6->payload_len = 0; 1368 first->tx_flags |= IGC_TX_FLAGS_TSO | 1369 IGC_TX_FLAGS_CSUM; 1370 } 1371 1372 /* determine offset of inner transport header */ 1373 l4_offset = l4.hdr - skb->data; 1374 1375 /* remove payload length from inner checksum */ 1376 paylen = skb->len - l4_offset; 1377 if (type_tucmd & IGC_ADVTXD_TUCMD_L4T_TCP) { 1378 /* compute length of segmentation header */ 1379 *hdr_len = (l4.tcp->doff * 4) + l4_offset; 1380 csum_replace_by_diff(&l4.tcp->check, 1381 (__force __wsum)htonl(paylen)); 1382 } else { 1383 /* compute length of segmentation header */ 1384 *hdr_len = sizeof(*l4.udp) + l4_offset; 1385 csum_replace_by_diff(&l4.udp->check, 1386 (__force __wsum)htonl(paylen)); 1387 } 1388 1389 /* update gso size and bytecount with header size */ 1390 first->gso_segs = skb_shinfo(skb)->gso_segs; 1391 first->bytecount += (first->gso_segs - 1) * *hdr_len; 1392 1393 /* MSS L4LEN IDX */ 1394 mss_l4len_idx = (*hdr_len - l4_offset) << IGC_ADVTXD_L4LEN_SHIFT; 1395 mss_l4len_idx |= skb_shinfo(skb)->gso_size << IGC_ADVTXD_MSS_SHIFT; 1396 1397 /* VLAN MACLEN IPLEN */ 1398 vlan_macip_lens = l4.hdr - ip.hdr; 1399 vlan_macip_lens |= (ip.hdr - skb->data) << IGC_ADVTXD_MACLEN_SHIFT; 1400 vlan_macip_lens |= first->tx_flags & IGC_TX_FLAGS_VLAN_MASK; 1401 1402 igc_tx_ctxtdesc(tx_ring, first, vlan_macip_lens, 1403 type_tucmd, mss_l4len_idx); 1404 1405 return 1; 1406 } 1407 1408 static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb, 1409 struct igc_ring *tx_ring) 1410 { 1411 u16 count = TXD_USE_COUNT(skb_headlen(skb)); 1412 __be16 protocol = vlan_get_protocol(skb); 1413 struct igc_tx_buffer *first; 1414 u32 tx_flags = 0; 1415 unsigned short f; 1416 u8 hdr_len = 0; 1417 int tso = 0; 1418 1419 /* need: 1 descriptor per page * PAGE_SIZE/IGC_MAX_DATA_PER_TXD, 1420 * + 1 desc for skb_headlen/IGC_MAX_DATA_PER_TXD, 1421 * + 2 desc gap to keep tail from touching head, 1422 * + 1 desc for context descriptor, 1423 * otherwise try next time 1424 */ 1425 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) 1426 count += TXD_USE_COUNT(skb_frag_size( 1427 &skb_shinfo(skb)->frags[f])); 1428 1429 if (igc_maybe_stop_tx(tx_ring, count + 3)) { 1430 /* this is a hard error */ 1431 return NETDEV_TX_BUSY; 1432 } 1433 1434 /* record the location of the first descriptor for this packet */ 1435 first = &tx_ring->tx_buffer_info[tx_ring->next_to_use]; 1436 first->type = IGC_TX_BUFFER_TYPE_SKB; 1437 first->skb = skb; 1438 first->bytecount = skb->len; 1439 first->gso_segs = 1; 1440 1441 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { 1442 struct igc_adapter *adapter = netdev_priv(tx_ring->netdev); 1443 1444 /* FIXME: add support for retrieving timestamps from 1445 * the other timer registers before skipping the 1446 * timestamping request. 1447 */ 1448 if (adapter->tstamp_config.tx_type == HWTSTAMP_TX_ON && 1449 !test_and_set_bit_lock(__IGC_PTP_TX_IN_PROGRESS, 1450 &adapter->state)) { 1451 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; 1452 tx_flags |= IGC_TX_FLAGS_TSTAMP; 1453 1454 adapter->ptp_tx_skb = skb_get(skb); 1455 adapter->ptp_tx_start = jiffies; 1456 } else { 1457 adapter->tx_hwtstamp_skipped++; 1458 } 1459 } 1460 1461 if (skb_vlan_tag_present(skb)) { 1462 tx_flags |= IGC_TX_FLAGS_VLAN; 1463 tx_flags |= (skb_vlan_tag_get(skb) << IGC_TX_FLAGS_VLAN_SHIFT); 1464 } 1465 1466 /* record initial flags and protocol */ 1467 first->tx_flags = tx_flags; 1468 first->protocol = protocol; 1469 1470 tso = igc_tso(tx_ring, first, &hdr_len); 1471 if (tso < 0) 1472 goto out_drop; 1473 else if (!tso) 1474 igc_tx_csum(tx_ring, first); 1475 1476 igc_tx_map(tx_ring, first, hdr_len); 1477 1478 return NETDEV_TX_OK; 1479 1480 out_drop: 1481 dev_kfree_skb_any(first->skb); 1482 first->skb = NULL; 1483 1484 return NETDEV_TX_OK; 1485 } 1486 1487 static inline struct igc_ring *igc_tx_queue_mapping(struct igc_adapter *adapter, 1488 struct sk_buff *skb) 1489 { 1490 unsigned int r_idx = skb->queue_mapping; 1491 1492 if (r_idx >= adapter->num_tx_queues) 1493 r_idx = r_idx % adapter->num_tx_queues; 1494 1495 return adapter->tx_ring[r_idx]; 1496 } 1497 1498 static netdev_tx_t igc_xmit_frame(struct sk_buff *skb, 1499 struct net_device *netdev) 1500 { 1501 struct igc_adapter *adapter = netdev_priv(netdev); 1502 1503 /* The minimum packet size with TCTL.PSP set is 17 so pad the skb 1504 * in order to meet this minimum size requirement. 1505 */ 1506 if (skb->len < 17) { 1507 if (skb_padto(skb, 17)) 1508 return NETDEV_TX_OK; 1509 skb->len = 17; 1510 } 1511 1512 return igc_xmit_frame_ring(skb, igc_tx_queue_mapping(adapter, skb)); 1513 } 1514 1515 static void igc_rx_checksum(struct igc_ring *ring, 1516 union igc_adv_rx_desc *rx_desc, 1517 struct sk_buff *skb) 1518 { 1519 skb_checksum_none_assert(skb); 1520 1521 /* Ignore Checksum bit is set */ 1522 if (igc_test_staterr(rx_desc, IGC_RXD_STAT_IXSM)) 1523 return; 1524 1525 /* Rx checksum disabled via ethtool */ 1526 if (!(ring->netdev->features & NETIF_F_RXCSUM)) 1527 return; 1528 1529 /* TCP/UDP checksum error bit is set */ 1530 if (igc_test_staterr(rx_desc, 1531 IGC_RXDEXT_STATERR_L4E | 1532 IGC_RXDEXT_STATERR_IPE)) { 1533 /* work around errata with sctp packets where the TCPE aka 1534 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc) 1535 * packets (aka let the stack check the crc32c) 1536 */ 1537 if (!(skb->len == 60 && 1538 test_bit(IGC_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) { 1539 u64_stats_update_begin(&ring->rx_syncp); 1540 ring->rx_stats.csum_err++; 1541 u64_stats_update_end(&ring->rx_syncp); 1542 } 1543 /* let the stack verify checksum errors */ 1544 return; 1545 } 1546 /* It must be a TCP or UDP packet with a valid checksum */ 1547 if (igc_test_staterr(rx_desc, IGC_RXD_STAT_TCPCS | 1548 IGC_RXD_STAT_UDPCS)) 1549 skb->ip_summed = CHECKSUM_UNNECESSARY; 1550 1551 netdev_dbg(ring->netdev, "cksum success: bits %08X\n", 1552 le32_to_cpu(rx_desc->wb.upper.status_error)); 1553 } 1554 1555 static inline void igc_rx_hash(struct igc_ring *ring, 1556 union igc_adv_rx_desc *rx_desc, 1557 struct sk_buff *skb) 1558 { 1559 if (ring->netdev->features & NETIF_F_RXHASH) 1560 skb_set_hash(skb, 1561 le32_to_cpu(rx_desc->wb.lower.hi_dword.rss), 1562 PKT_HASH_TYPE_L3); 1563 } 1564 1565 static void igc_rx_vlan(struct igc_ring *rx_ring, 1566 union igc_adv_rx_desc *rx_desc, 1567 struct sk_buff *skb) 1568 { 1569 struct net_device *dev = rx_ring->netdev; 1570 u16 vid; 1571 1572 if ((dev->features & NETIF_F_HW_VLAN_CTAG_RX) && 1573 igc_test_staterr(rx_desc, IGC_RXD_STAT_VP)) { 1574 if (igc_test_staterr(rx_desc, IGC_RXDEXT_STATERR_LB) && 1575 test_bit(IGC_RING_FLAG_RX_LB_VLAN_BSWAP, &rx_ring->flags)) 1576 vid = be16_to_cpu((__force __be16)rx_desc->wb.upper.vlan); 1577 else 1578 vid = le16_to_cpu(rx_desc->wb.upper.vlan); 1579 1580 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); 1581 } 1582 } 1583 1584 /** 1585 * igc_process_skb_fields - Populate skb header fields from Rx descriptor 1586 * @rx_ring: rx descriptor ring packet is being transacted on 1587 * @rx_desc: pointer to the EOP Rx descriptor 1588 * @skb: pointer to current skb being populated 1589 * 1590 * This function checks the ring, descriptor, and packet information in order 1591 * to populate the hash, checksum, VLAN, protocol, and other fields within the 1592 * skb. 1593 */ 1594 static void igc_process_skb_fields(struct igc_ring *rx_ring, 1595 union igc_adv_rx_desc *rx_desc, 1596 struct sk_buff *skb) 1597 { 1598 igc_rx_hash(rx_ring, rx_desc, skb); 1599 1600 igc_rx_checksum(rx_ring, rx_desc, skb); 1601 1602 igc_rx_vlan(rx_ring, rx_desc, skb); 1603 1604 skb_record_rx_queue(skb, rx_ring->queue_index); 1605 1606 skb->protocol = eth_type_trans(skb, rx_ring->netdev); 1607 } 1608 1609 static void igc_vlan_mode(struct net_device *netdev, netdev_features_t features) 1610 { 1611 bool enable = !!(features & NETIF_F_HW_VLAN_CTAG_RX); 1612 struct igc_adapter *adapter = netdev_priv(netdev); 1613 struct igc_hw *hw = &adapter->hw; 1614 u32 ctrl; 1615 1616 ctrl = rd32(IGC_CTRL); 1617 1618 if (enable) { 1619 /* enable VLAN tag insert/strip */ 1620 ctrl |= IGC_CTRL_VME; 1621 } else { 1622 /* disable VLAN tag insert/strip */ 1623 ctrl &= ~IGC_CTRL_VME; 1624 } 1625 wr32(IGC_CTRL, ctrl); 1626 } 1627 1628 static void igc_restore_vlan(struct igc_adapter *adapter) 1629 { 1630 igc_vlan_mode(adapter->netdev, adapter->netdev->features); 1631 } 1632 1633 static struct igc_rx_buffer *igc_get_rx_buffer(struct igc_ring *rx_ring, 1634 const unsigned int size, 1635 int *rx_buffer_pgcnt) 1636 { 1637 struct igc_rx_buffer *rx_buffer; 1638 1639 rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; 1640 *rx_buffer_pgcnt = 1641 #if (PAGE_SIZE < 8192) 1642 page_count(rx_buffer->page); 1643 #else 1644 0; 1645 #endif 1646 prefetchw(rx_buffer->page); 1647 1648 /* we are reusing so sync this buffer for CPU use */ 1649 dma_sync_single_range_for_cpu(rx_ring->dev, 1650 rx_buffer->dma, 1651 rx_buffer->page_offset, 1652 size, 1653 DMA_FROM_DEVICE); 1654 1655 rx_buffer->pagecnt_bias--; 1656 1657 return rx_buffer; 1658 } 1659 1660 static void igc_rx_buffer_flip(struct igc_rx_buffer *buffer, 1661 unsigned int truesize) 1662 { 1663 #if (PAGE_SIZE < 8192) 1664 buffer->page_offset ^= truesize; 1665 #else 1666 buffer->page_offset += truesize; 1667 #endif 1668 } 1669 1670 static unsigned int igc_get_rx_frame_truesize(struct igc_ring *ring, 1671 unsigned int size) 1672 { 1673 unsigned int truesize; 1674 1675 #if (PAGE_SIZE < 8192) 1676 truesize = igc_rx_pg_size(ring) / 2; 1677 #else 1678 truesize = ring_uses_build_skb(ring) ? 1679 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + 1680 SKB_DATA_ALIGN(IGC_SKB_PAD + size) : 1681 SKB_DATA_ALIGN(size); 1682 #endif 1683 return truesize; 1684 } 1685 1686 /** 1687 * igc_add_rx_frag - Add contents of Rx buffer to sk_buff 1688 * @rx_ring: rx descriptor ring to transact packets on 1689 * @rx_buffer: buffer containing page to add 1690 * @skb: sk_buff to place the data into 1691 * @size: size of buffer to be added 1692 * 1693 * This function will add the data contained in rx_buffer->page to the skb. 1694 */ 1695 static void igc_add_rx_frag(struct igc_ring *rx_ring, 1696 struct igc_rx_buffer *rx_buffer, 1697 struct sk_buff *skb, 1698 unsigned int size) 1699 { 1700 unsigned int truesize; 1701 1702 #if (PAGE_SIZE < 8192) 1703 truesize = igc_rx_pg_size(rx_ring) / 2; 1704 #else 1705 truesize = ring_uses_build_skb(rx_ring) ? 1706 SKB_DATA_ALIGN(IGC_SKB_PAD + size) : 1707 SKB_DATA_ALIGN(size); 1708 #endif 1709 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page, 1710 rx_buffer->page_offset, size, truesize); 1711 1712 igc_rx_buffer_flip(rx_buffer, truesize); 1713 } 1714 1715 static struct sk_buff *igc_build_skb(struct igc_ring *rx_ring, 1716 struct igc_rx_buffer *rx_buffer, 1717 union igc_adv_rx_desc *rx_desc, 1718 unsigned int size) 1719 { 1720 void *va = page_address(rx_buffer->page) + rx_buffer->page_offset; 1721 unsigned int truesize = igc_get_rx_frame_truesize(rx_ring, size); 1722 struct sk_buff *skb; 1723 1724 /* prefetch first cache line of first page */ 1725 net_prefetch(va); 1726 1727 /* build an skb around the page buffer */ 1728 skb = build_skb(va - IGC_SKB_PAD, truesize); 1729 if (unlikely(!skb)) 1730 return NULL; 1731 1732 /* update pointers within the skb to store the data */ 1733 skb_reserve(skb, IGC_SKB_PAD); 1734 __skb_put(skb, size); 1735 1736 igc_rx_buffer_flip(rx_buffer, truesize); 1737 return skb; 1738 } 1739 1740 static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring, 1741 struct igc_rx_buffer *rx_buffer, 1742 struct xdp_buff *xdp, 1743 ktime_t timestamp) 1744 { 1745 unsigned int size = xdp->data_end - xdp->data; 1746 unsigned int truesize = igc_get_rx_frame_truesize(rx_ring, size); 1747 void *va = xdp->data; 1748 unsigned int headlen; 1749 struct sk_buff *skb; 1750 1751 /* prefetch first cache line of first page */ 1752 net_prefetch(va); 1753 1754 /* allocate a skb to store the frags */ 1755 skb = napi_alloc_skb(&rx_ring->q_vector->napi, IGC_RX_HDR_LEN); 1756 if (unlikely(!skb)) 1757 return NULL; 1758 1759 if (timestamp) 1760 skb_hwtstamps(skb)->hwtstamp = timestamp; 1761 1762 /* Determine available headroom for copy */ 1763 headlen = size; 1764 if (headlen > IGC_RX_HDR_LEN) 1765 headlen = eth_get_headlen(skb->dev, va, IGC_RX_HDR_LEN); 1766 1767 /* align pull length to size of long to optimize memcpy performance */ 1768 memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long))); 1769 1770 /* update all of the pointers */ 1771 size -= headlen; 1772 if (size) { 1773 skb_add_rx_frag(skb, 0, rx_buffer->page, 1774 (va + headlen) - page_address(rx_buffer->page), 1775 size, truesize); 1776 igc_rx_buffer_flip(rx_buffer, truesize); 1777 } else { 1778 rx_buffer->pagecnt_bias++; 1779 } 1780 1781 return skb; 1782 } 1783 1784 /** 1785 * igc_reuse_rx_page - page flip buffer and store it back on the ring 1786 * @rx_ring: rx descriptor ring to store buffers on 1787 * @old_buff: donor buffer to have page reused 1788 * 1789 * Synchronizes page for reuse by the adapter 1790 */ 1791 static void igc_reuse_rx_page(struct igc_ring *rx_ring, 1792 struct igc_rx_buffer *old_buff) 1793 { 1794 u16 nta = rx_ring->next_to_alloc; 1795 struct igc_rx_buffer *new_buff; 1796 1797 new_buff = &rx_ring->rx_buffer_info[nta]; 1798 1799 /* update, and store next to alloc */ 1800 nta++; 1801 rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; 1802 1803 /* Transfer page from old buffer to new buffer. 1804 * Move each member individually to avoid possible store 1805 * forwarding stalls. 1806 */ 1807 new_buff->dma = old_buff->dma; 1808 new_buff->page = old_buff->page; 1809 new_buff->page_offset = old_buff->page_offset; 1810 new_buff->pagecnt_bias = old_buff->pagecnt_bias; 1811 } 1812 1813 static bool igc_can_reuse_rx_page(struct igc_rx_buffer *rx_buffer, 1814 int rx_buffer_pgcnt) 1815 { 1816 unsigned int pagecnt_bias = rx_buffer->pagecnt_bias; 1817 struct page *page = rx_buffer->page; 1818 1819 /* avoid re-using remote and pfmemalloc pages */ 1820 if (!dev_page_is_reusable(page)) 1821 return false; 1822 1823 #if (PAGE_SIZE < 8192) 1824 /* if we are only owner of page we can reuse it */ 1825 if (unlikely((rx_buffer_pgcnt - pagecnt_bias) > 1)) 1826 return false; 1827 #else 1828 #define IGC_LAST_OFFSET \ 1829 (SKB_WITH_OVERHEAD(PAGE_SIZE) - IGC_RXBUFFER_2048) 1830 1831 if (rx_buffer->page_offset > IGC_LAST_OFFSET) 1832 return false; 1833 #endif 1834 1835 /* If we have drained the page fragment pool we need to update 1836 * the pagecnt_bias and page count so that we fully restock the 1837 * number of references the driver holds. 1838 */ 1839 if (unlikely(pagecnt_bias == 1)) { 1840 page_ref_add(page, USHRT_MAX - 1); 1841 rx_buffer->pagecnt_bias = USHRT_MAX; 1842 } 1843 1844 return true; 1845 } 1846 1847 /** 1848 * igc_is_non_eop - process handling of non-EOP buffers 1849 * @rx_ring: Rx ring being processed 1850 * @rx_desc: Rx descriptor for current buffer 1851 * 1852 * This function updates next to clean. If the buffer is an EOP buffer 1853 * this function exits returning false, otherwise it will place the 1854 * sk_buff in the next buffer to be chained and return true indicating 1855 * that this is in fact a non-EOP buffer. 1856 */ 1857 static bool igc_is_non_eop(struct igc_ring *rx_ring, 1858 union igc_adv_rx_desc *rx_desc) 1859 { 1860 u32 ntc = rx_ring->next_to_clean + 1; 1861 1862 /* fetch, update, and store next to clean */ 1863 ntc = (ntc < rx_ring->count) ? ntc : 0; 1864 rx_ring->next_to_clean = ntc; 1865 1866 prefetch(IGC_RX_DESC(rx_ring, ntc)); 1867 1868 if (likely(igc_test_staterr(rx_desc, IGC_RXD_STAT_EOP))) 1869 return false; 1870 1871 return true; 1872 } 1873 1874 /** 1875 * igc_cleanup_headers - Correct corrupted or empty headers 1876 * @rx_ring: rx descriptor ring packet is being transacted on 1877 * @rx_desc: pointer to the EOP Rx descriptor 1878 * @skb: pointer to current skb being fixed 1879 * 1880 * Address the case where we are pulling data in on pages only 1881 * and as such no data is present in the skb header. 1882 * 1883 * In addition if skb is not at least 60 bytes we need to pad it so that 1884 * it is large enough to qualify as a valid Ethernet frame. 1885 * 1886 * Returns true if an error was encountered and skb was freed. 1887 */ 1888 static bool igc_cleanup_headers(struct igc_ring *rx_ring, 1889 union igc_adv_rx_desc *rx_desc, 1890 struct sk_buff *skb) 1891 { 1892 /* XDP packets use error pointer so abort at this point */ 1893 if (IS_ERR(skb)) 1894 return true; 1895 1896 if (unlikely(igc_test_staterr(rx_desc, IGC_RXDEXT_STATERR_RXE))) { 1897 struct net_device *netdev = rx_ring->netdev; 1898 1899 if (!(netdev->features & NETIF_F_RXALL)) { 1900 dev_kfree_skb_any(skb); 1901 return true; 1902 } 1903 } 1904 1905 /* if eth_skb_pad returns an error the skb was freed */ 1906 if (eth_skb_pad(skb)) 1907 return true; 1908 1909 return false; 1910 } 1911 1912 static void igc_put_rx_buffer(struct igc_ring *rx_ring, 1913 struct igc_rx_buffer *rx_buffer, 1914 int rx_buffer_pgcnt) 1915 { 1916 if (igc_can_reuse_rx_page(rx_buffer, rx_buffer_pgcnt)) { 1917 /* hand second half of page back to the ring */ 1918 igc_reuse_rx_page(rx_ring, rx_buffer); 1919 } else { 1920 /* We are not reusing the buffer so unmap it and free 1921 * any references we are holding to it 1922 */ 1923 dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, 1924 igc_rx_pg_size(rx_ring), DMA_FROM_DEVICE, 1925 IGC_RX_DMA_ATTR); 1926 __page_frag_cache_drain(rx_buffer->page, 1927 rx_buffer->pagecnt_bias); 1928 } 1929 1930 /* clear contents of rx_buffer */ 1931 rx_buffer->page = NULL; 1932 } 1933 1934 static inline unsigned int igc_rx_offset(struct igc_ring *rx_ring) 1935 { 1936 struct igc_adapter *adapter = rx_ring->q_vector->adapter; 1937 1938 if (ring_uses_build_skb(rx_ring)) 1939 return IGC_SKB_PAD; 1940 if (igc_xdp_is_enabled(adapter)) 1941 return XDP_PACKET_HEADROOM; 1942 1943 return 0; 1944 } 1945 1946 static bool igc_alloc_mapped_page(struct igc_ring *rx_ring, 1947 struct igc_rx_buffer *bi) 1948 { 1949 struct page *page = bi->page; 1950 dma_addr_t dma; 1951 1952 /* since we are recycling buffers we should seldom need to alloc */ 1953 if (likely(page)) 1954 return true; 1955 1956 /* alloc new page for storage */ 1957 page = dev_alloc_pages(igc_rx_pg_order(rx_ring)); 1958 if (unlikely(!page)) { 1959 rx_ring->rx_stats.alloc_failed++; 1960 return false; 1961 } 1962 1963 /* map page for use */ 1964 dma = dma_map_page_attrs(rx_ring->dev, page, 0, 1965 igc_rx_pg_size(rx_ring), 1966 DMA_FROM_DEVICE, 1967 IGC_RX_DMA_ATTR); 1968 1969 /* if mapping failed free memory back to system since 1970 * there isn't much point in holding memory we can't use 1971 */ 1972 if (dma_mapping_error(rx_ring->dev, dma)) { 1973 __free_page(page); 1974 1975 rx_ring->rx_stats.alloc_failed++; 1976 return false; 1977 } 1978 1979 bi->dma = dma; 1980 bi->page = page; 1981 bi->page_offset = igc_rx_offset(rx_ring); 1982 page_ref_add(page, USHRT_MAX - 1); 1983 bi->pagecnt_bias = USHRT_MAX; 1984 1985 return true; 1986 } 1987 1988 /** 1989 * igc_alloc_rx_buffers - Replace used receive buffers; packet split 1990 * @rx_ring: rx descriptor ring 1991 * @cleaned_count: number of buffers to clean 1992 */ 1993 static void igc_alloc_rx_buffers(struct igc_ring *rx_ring, u16 cleaned_count) 1994 { 1995 union igc_adv_rx_desc *rx_desc; 1996 u16 i = rx_ring->next_to_use; 1997 struct igc_rx_buffer *bi; 1998 u16 bufsz; 1999 2000 /* nothing to do */ 2001 if (!cleaned_count) 2002 return; 2003 2004 rx_desc = IGC_RX_DESC(rx_ring, i); 2005 bi = &rx_ring->rx_buffer_info[i]; 2006 i -= rx_ring->count; 2007 2008 bufsz = igc_rx_bufsz(rx_ring); 2009 2010 do { 2011 if (!igc_alloc_mapped_page(rx_ring, bi)) 2012 break; 2013 2014 /* sync the buffer for use by the device */ 2015 dma_sync_single_range_for_device(rx_ring->dev, bi->dma, 2016 bi->page_offset, bufsz, 2017 DMA_FROM_DEVICE); 2018 2019 /* Refresh the desc even if buffer_addrs didn't change 2020 * because each write-back erases this info. 2021 */ 2022 rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset); 2023 2024 rx_desc++; 2025 bi++; 2026 i++; 2027 if (unlikely(!i)) { 2028 rx_desc = IGC_RX_DESC(rx_ring, 0); 2029 bi = rx_ring->rx_buffer_info; 2030 i -= rx_ring->count; 2031 } 2032 2033 /* clear the length for the next_to_use descriptor */ 2034 rx_desc->wb.upper.length = 0; 2035 2036 cleaned_count--; 2037 } while (cleaned_count); 2038 2039 i += rx_ring->count; 2040 2041 if (rx_ring->next_to_use != i) { 2042 /* record the next descriptor to use */ 2043 rx_ring->next_to_use = i; 2044 2045 /* update next to alloc since we have filled the ring */ 2046 rx_ring->next_to_alloc = i; 2047 2048 /* Force memory writes to complete before letting h/w 2049 * know there are new descriptors to fetch. (Only 2050 * applicable for weak-ordered memory model archs, 2051 * such as IA-64). 2052 */ 2053 wmb(); 2054 writel(i, rx_ring->tail); 2055 } 2056 } 2057 2058 static bool igc_alloc_rx_buffers_zc(struct igc_ring *ring, u16 count) 2059 { 2060 union igc_adv_rx_desc *desc; 2061 u16 i = ring->next_to_use; 2062 struct igc_rx_buffer *bi; 2063 dma_addr_t dma; 2064 bool ok = true; 2065 2066 if (!count) 2067 return ok; 2068 2069 desc = IGC_RX_DESC(ring, i); 2070 bi = &ring->rx_buffer_info[i]; 2071 i -= ring->count; 2072 2073 do { 2074 bi->xdp = xsk_buff_alloc(ring->xsk_pool); 2075 if (!bi->xdp) { 2076 ok = false; 2077 break; 2078 } 2079 2080 dma = xsk_buff_xdp_get_dma(bi->xdp); 2081 desc->read.pkt_addr = cpu_to_le64(dma); 2082 2083 desc++; 2084 bi++; 2085 i++; 2086 if (unlikely(!i)) { 2087 desc = IGC_RX_DESC(ring, 0); 2088 bi = ring->rx_buffer_info; 2089 i -= ring->count; 2090 } 2091 2092 /* Clear the length for the next_to_use descriptor. */ 2093 desc->wb.upper.length = 0; 2094 2095 count--; 2096 } while (count); 2097 2098 i += ring->count; 2099 2100 if (ring->next_to_use != i) { 2101 ring->next_to_use = i; 2102 2103 /* Force memory writes to complete before letting h/w 2104 * know there are new descriptors to fetch. (Only 2105 * applicable for weak-ordered memory model archs, 2106 * such as IA-64). 2107 */ 2108 wmb(); 2109 writel(i, ring->tail); 2110 } 2111 2112 return ok; 2113 } 2114 2115 static int igc_xdp_init_tx_buffer(struct igc_tx_buffer *buffer, 2116 struct xdp_frame *xdpf, 2117 struct igc_ring *ring) 2118 { 2119 dma_addr_t dma; 2120 2121 dma = dma_map_single(ring->dev, xdpf->data, xdpf->len, DMA_TO_DEVICE); 2122 if (dma_mapping_error(ring->dev, dma)) { 2123 netdev_err_once(ring->netdev, "Failed to map DMA for TX\n"); 2124 return -ENOMEM; 2125 } 2126 2127 buffer->type = IGC_TX_BUFFER_TYPE_XDP; 2128 buffer->xdpf = xdpf; 2129 buffer->protocol = 0; 2130 buffer->bytecount = xdpf->len; 2131 buffer->gso_segs = 1; 2132 buffer->time_stamp = jiffies; 2133 dma_unmap_len_set(buffer, len, xdpf->len); 2134 dma_unmap_addr_set(buffer, dma, dma); 2135 return 0; 2136 } 2137 2138 /* This function requires __netif_tx_lock is held by the caller. */ 2139 static int igc_xdp_init_tx_descriptor(struct igc_ring *ring, 2140 struct xdp_frame *xdpf) 2141 { 2142 struct igc_tx_buffer *buffer; 2143 union igc_adv_tx_desc *desc; 2144 u32 cmd_type, olinfo_status; 2145 int err; 2146 2147 if (!igc_desc_unused(ring)) 2148 return -EBUSY; 2149 2150 buffer = &ring->tx_buffer_info[ring->next_to_use]; 2151 err = igc_xdp_init_tx_buffer(buffer, xdpf, ring); 2152 if (err) 2153 return err; 2154 2155 cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT | 2156 IGC_ADVTXD_DCMD_IFCS | IGC_TXD_DCMD | 2157 buffer->bytecount; 2158 olinfo_status = buffer->bytecount << IGC_ADVTXD_PAYLEN_SHIFT; 2159 2160 desc = IGC_TX_DESC(ring, ring->next_to_use); 2161 desc->read.cmd_type_len = cpu_to_le32(cmd_type); 2162 desc->read.olinfo_status = cpu_to_le32(olinfo_status); 2163 desc->read.buffer_addr = cpu_to_le64(dma_unmap_addr(buffer, dma)); 2164 2165 netdev_tx_sent_queue(txring_txq(ring), buffer->bytecount); 2166 2167 buffer->next_to_watch = desc; 2168 2169 ring->next_to_use++; 2170 if (ring->next_to_use == ring->count) 2171 ring->next_to_use = 0; 2172 2173 return 0; 2174 } 2175 2176 static struct igc_ring *igc_xdp_get_tx_ring(struct igc_adapter *adapter, 2177 int cpu) 2178 { 2179 int index = cpu; 2180 2181 if (unlikely(index < 0)) 2182 index = 0; 2183 2184 while (index >= adapter->num_tx_queues) 2185 index -= adapter->num_tx_queues; 2186 2187 return adapter->tx_ring[index]; 2188 } 2189 2190 static int igc_xdp_xmit_back(struct igc_adapter *adapter, struct xdp_buff *xdp) 2191 { 2192 struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp); 2193 int cpu = smp_processor_id(); 2194 struct netdev_queue *nq; 2195 struct igc_ring *ring; 2196 int res; 2197 2198 if (unlikely(!xdpf)) 2199 return -EFAULT; 2200 2201 ring = igc_xdp_get_tx_ring(adapter, cpu); 2202 nq = txring_txq(ring); 2203 2204 __netif_tx_lock(nq, cpu); 2205 res = igc_xdp_init_tx_descriptor(ring, xdpf); 2206 __netif_tx_unlock(nq); 2207 return res; 2208 } 2209 2210 /* This function assumes rcu_read_lock() is held by the caller. */ 2211 static int __igc_xdp_run_prog(struct igc_adapter *adapter, 2212 struct bpf_prog *prog, 2213 struct xdp_buff *xdp) 2214 { 2215 u32 act = bpf_prog_run_xdp(prog, xdp); 2216 2217 switch (act) { 2218 case XDP_PASS: 2219 return IGC_XDP_PASS; 2220 case XDP_TX: 2221 if (igc_xdp_xmit_back(adapter, xdp) < 0) 2222 goto out_failure; 2223 return IGC_XDP_TX; 2224 case XDP_REDIRECT: 2225 if (xdp_do_redirect(adapter->netdev, xdp, prog) < 0) 2226 goto out_failure; 2227 return IGC_XDP_REDIRECT; 2228 break; 2229 default: 2230 bpf_warn_invalid_xdp_action(act); 2231 fallthrough; 2232 case XDP_ABORTED: 2233 out_failure: 2234 trace_xdp_exception(adapter->netdev, prog, act); 2235 fallthrough; 2236 case XDP_DROP: 2237 return IGC_XDP_CONSUMED; 2238 } 2239 } 2240 2241 static struct sk_buff *igc_xdp_run_prog(struct igc_adapter *adapter, 2242 struct xdp_buff *xdp) 2243 { 2244 struct bpf_prog *prog; 2245 int res; 2246 2247 prog = READ_ONCE(adapter->xdp_prog); 2248 if (!prog) { 2249 res = IGC_XDP_PASS; 2250 goto out; 2251 } 2252 2253 res = __igc_xdp_run_prog(adapter, prog, xdp); 2254 2255 out: 2256 return ERR_PTR(-res); 2257 } 2258 2259 /* This function assumes __netif_tx_lock is held by the caller. */ 2260 static void igc_flush_tx_descriptors(struct igc_ring *ring) 2261 { 2262 /* Once tail pointer is updated, hardware can fetch the descriptors 2263 * any time so we issue a write membar here to ensure all memory 2264 * writes are complete before the tail pointer is updated. 2265 */ 2266 wmb(); 2267 writel(ring->next_to_use, ring->tail); 2268 } 2269 2270 static void igc_finalize_xdp(struct igc_adapter *adapter, int status) 2271 { 2272 int cpu = smp_processor_id(); 2273 struct netdev_queue *nq; 2274 struct igc_ring *ring; 2275 2276 if (status & IGC_XDP_TX) { 2277 ring = igc_xdp_get_tx_ring(adapter, cpu); 2278 nq = txring_txq(ring); 2279 2280 __netif_tx_lock(nq, cpu); 2281 igc_flush_tx_descriptors(ring); 2282 __netif_tx_unlock(nq); 2283 } 2284 2285 if (status & IGC_XDP_REDIRECT) 2286 xdp_do_flush(); 2287 } 2288 2289 static void igc_update_rx_stats(struct igc_q_vector *q_vector, 2290 unsigned int packets, unsigned int bytes) 2291 { 2292 struct igc_ring *ring = q_vector->rx.ring; 2293 2294 u64_stats_update_begin(&ring->rx_syncp); 2295 ring->rx_stats.packets += packets; 2296 ring->rx_stats.bytes += bytes; 2297 u64_stats_update_end(&ring->rx_syncp); 2298 2299 q_vector->rx.total_packets += packets; 2300 q_vector->rx.total_bytes += bytes; 2301 } 2302 2303 static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) 2304 { 2305 unsigned int total_bytes = 0, total_packets = 0; 2306 struct igc_adapter *adapter = q_vector->adapter; 2307 struct igc_ring *rx_ring = q_vector->rx.ring; 2308 struct sk_buff *skb = rx_ring->skb; 2309 u16 cleaned_count = igc_desc_unused(rx_ring); 2310 int xdp_status = 0, rx_buffer_pgcnt; 2311 2312 while (likely(total_packets < budget)) { 2313 union igc_adv_rx_desc *rx_desc; 2314 struct igc_rx_buffer *rx_buffer; 2315 unsigned int size, truesize; 2316 ktime_t timestamp = 0; 2317 struct xdp_buff xdp; 2318 int pkt_offset = 0; 2319 void *pktbuf; 2320 2321 /* return some buffers to hardware, one at a time is too slow */ 2322 if (cleaned_count >= IGC_RX_BUFFER_WRITE) { 2323 igc_alloc_rx_buffers(rx_ring, cleaned_count); 2324 cleaned_count = 0; 2325 } 2326 2327 rx_desc = IGC_RX_DESC(rx_ring, rx_ring->next_to_clean); 2328 size = le16_to_cpu(rx_desc->wb.upper.length); 2329 if (!size) 2330 break; 2331 2332 /* This memory barrier is needed to keep us from reading 2333 * any other fields out of the rx_desc until we know the 2334 * descriptor has been written back 2335 */ 2336 dma_rmb(); 2337 2338 rx_buffer = igc_get_rx_buffer(rx_ring, size, &rx_buffer_pgcnt); 2339 truesize = igc_get_rx_frame_truesize(rx_ring, size); 2340 2341 pktbuf = page_address(rx_buffer->page) + rx_buffer->page_offset; 2342 2343 if (igc_test_staterr(rx_desc, IGC_RXDADV_STAT_TSIP)) { 2344 timestamp = igc_ptp_rx_pktstamp(q_vector->adapter, 2345 pktbuf); 2346 pkt_offset = IGC_TS_HDR_LEN; 2347 size -= IGC_TS_HDR_LEN; 2348 } 2349 2350 if (!skb) { 2351 xdp_init_buff(&xdp, truesize, &rx_ring->xdp_rxq); 2352 xdp_prepare_buff(&xdp, pktbuf - igc_rx_offset(rx_ring), 2353 igc_rx_offset(rx_ring) + pkt_offset, size, false); 2354 2355 skb = igc_xdp_run_prog(adapter, &xdp); 2356 } 2357 2358 if (IS_ERR(skb)) { 2359 unsigned int xdp_res = -PTR_ERR(skb); 2360 2361 switch (xdp_res) { 2362 case IGC_XDP_CONSUMED: 2363 rx_buffer->pagecnt_bias++; 2364 break; 2365 case IGC_XDP_TX: 2366 case IGC_XDP_REDIRECT: 2367 igc_rx_buffer_flip(rx_buffer, truesize); 2368 xdp_status |= xdp_res; 2369 break; 2370 } 2371 2372 total_packets++; 2373 total_bytes += size; 2374 } else if (skb) 2375 igc_add_rx_frag(rx_ring, rx_buffer, skb, size); 2376 else if (ring_uses_build_skb(rx_ring)) 2377 skb = igc_build_skb(rx_ring, rx_buffer, rx_desc, size); 2378 else 2379 skb = igc_construct_skb(rx_ring, rx_buffer, &xdp, 2380 timestamp); 2381 2382 /* exit if we failed to retrieve a buffer */ 2383 if (!skb) { 2384 rx_ring->rx_stats.alloc_failed++; 2385 rx_buffer->pagecnt_bias++; 2386 break; 2387 } 2388 2389 igc_put_rx_buffer(rx_ring, rx_buffer, rx_buffer_pgcnt); 2390 cleaned_count++; 2391 2392 /* fetch next buffer in frame if non-eop */ 2393 if (igc_is_non_eop(rx_ring, rx_desc)) 2394 continue; 2395 2396 /* verify the packet layout is correct */ 2397 if (igc_cleanup_headers(rx_ring, rx_desc, skb)) { 2398 skb = NULL; 2399 continue; 2400 } 2401 2402 /* probably a little skewed due to removing CRC */ 2403 total_bytes += skb->len; 2404 2405 /* populate checksum, VLAN, and protocol */ 2406 igc_process_skb_fields(rx_ring, rx_desc, skb); 2407 2408 napi_gro_receive(&q_vector->napi, skb); 2409 2410 /* reset skb pointer */ 2411 skb = NULL; 2412 2413 /* update budget accounting */ 2414 total_packets++; 2415 } 2416 2417 if (xdp_status) 2418 igc_finalize_xdp(adapter, xdp_status); 2419 2420 /* place incomplete frames back on ring for completion */ 2421 rx_ring->skb = skb; 2422 2423 igc_update_rx_stats(q_vector, total_packets, total_bytes); 2424 2425 if (cleaned_count) 2426 igc_alloc_rx_buffers(rx_ring, cleaned_count); 2427 2428 return total_packets; 2429 } 2430 2431 static struct sk_buff *igc_construct_skb_zc(struct igc_ring *ring, 2432 struct xdp_buff *xdp) 2433 { 2434 unsigned int metasize = xdp->data - xdp->data_meta; 2435 unsigned int datasize = xdp->data_end - xdp->data; 2436 unsigned int totalsize = metasize + datasize; 2437 struct sk_buff *skb; 2438 2439 skb = __napi_alloc_skb(&ring->q_vector->napi, 2440 xdp->data_end - xdp->data_hard_start, 2441 GFP_ATOMIC | __GFP_NOWARN); 2442 if (unlikely(!skb)) 2443 return NULL; 2444 2445 skb_reserve(skb, xdp->data_meta - xdp->data_hard_start); 2446 memcpy(__skb_put(skb, totalsize), xdp->data_meta, totalsize); 2447 if (metasize) 2448 skb_metadata_set(skb, metasize); 2449 2450 return skb; 2451 } 2452 2453 static void igc_dispatch_skb_zc(struct igc_q_vector *q_vector, 2454 union igc_adv_rx_desc *desc, 2455 struct xdp_buff *xdp, 2456 ktime_t timestamp) 2457 { 2458 struct igc_ring *ring = q_vector->rx.ring; 2459 struct sk_buff *skb; 2460 2461 skb = igc_construct_skb_zc(ring, xdp); 2462 if (!skb) { 2463 ring->rx_stats.alloc_failed++; 2464 return; 2465 } 2466 2467 if (timestamp) 2468 skb_hwtstamps(skb)->hwtstamp = timestamp; 2469 2470 if (igc_cleanup_headers(ring, desc, skb)) 2471 return; 2472 2473 igc_process_skb_fields(ring, desc, skb); 2474 napi_gro_receive(&q_vector->napi, skb); 2475 } 2476 2477 static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget) 2478 { 2479 struct igc_adapter *adapter = q_vector->adapter; 2480 struct igc_ring *ring = q_vector->rx.ring; 2481 u16 cleaned_count = igc_desc_unused(ring); 2482 int total_bytes = 0, total_packets = 0; 2483 u16 ntc = ring->next_to_clean; 2484 struct bpf_prog *prog; 2485 bool failure = false; 2486 int xdp_status = 0; 2487 2488 rcu_read_lock(); 2489 2490 prog = READ_ONCE(adapter->xdp_prog); 2491 2492 while (likely(total_packets < budget)) { 2493 union igc_adv_rx_desc *desc; 2494 struct igc_rx_buffer *bi; 2495 ktime_t timestamp = 0; 2496 unsigned int size; 2497 int res; 2498 2499 desc = IGC_RX_DESC(ring, ntc); 2500 size = le16_to_cpu(desc->wb.upper.length); 2501 if (!size) 2502 break; 2503 2504 /* This memory barrier is needed to keep us from reading 2505 * any other fields out of the rx_desc until we know the 2506 * descriptor has been written back 2507 */ 2508 dma_rmb(); 2509 2510 bi = &ring->rx_buffer_info[ntc]; 2511 2512 if (igc_test_staterr(desc, IGC_RXDADV_STAT_TSIP)) { 2513 timestamp = igc_ptp_rx_pktstamp(q_vector->adapter, 2514 bi->xdp->data); 2515 2516 bi->xdp->data += IGC_TS_HDR_LEN; 2517 2518 /* HW timestamp has been copied into local variable. Metadata 2519 * length when XDP program is called should be 0. 2520 */ 2521 bi->xdp->data_meta += IGC_TS_HDR_LEN; 2522 size -= IGC_TS_HDR_LEN; 2523 } 2524 2525 bi->xdp->data_end = bi->xdp->data + size; 2526 xsk_buff_dma_sync_for_cpu(bi->xdp, ring->xsk_pool); 2527 2528 res = __igc_xdp_run_prog(adapter, prog, bi->xdp); 2529 switch (res) { 2530 case IGC_XDP_PASS: 2531 igc_dispatch_skb_zc(q_vector, desc, bi->xdp, timestamp); 2532 fallthrough; 2533 case IGC_XDP_CONSUMED: 2534 xsk_buff_free(bi->xdp); 2535 break; 2536 case IGC_XDP_TX: 2537 case IGC_XDP_REDIRECT: 2538 xdp_status |= res; 2539 break; 2540 } 2541 2542 bi->xdp = NULL; 2543 total_bytes += size; 2544 total_packets++; 2545 cleaned_count++; 2546 ntc++; 2547 if (ntc == ring->count) 2548 ntc = 0; 2549 } 2550 2551 ring->next_to_clean = ntc; 2552 rcu_read_unlock(); 2553 2554 if (cleaned_count >= IGC_RX_BUFFER_WRITE) 2555 failure = !igc_alloc_rx_buffers_zc(ring, cleaned_count); 2556 2557 if (xdp_status) 2558 igc_finalize_xdp(adapter, xdp_status); 2559 2560 igc_update_rx_stats(q_vector, total_packets, total_bytes); 2561 2562 if (xsk_uses_need_wakeup(ring->xsk_pool)) { 2563 if (failure || ring->next_to_clean == ring->next_to_use) 2564 xsk_set_rx_need_wakeup(ring->xsk_pool); 2565 else 2566 xsk_clear_rx_need_wakeup(ring->xsk_pool); 2567 return total_packets; 2568 } 2569 2570 return failure ? budget : total_packets; 2571 } 2572 2573 static void igc_update_tx_stats(struct igc_q_vector *q_vector, 2574 unsigned int packets, unsigned int bytes) 2575 { 2576 struct igc_ring *ring = q_vector->tx.ring; 2577 2578 u64_stats_update_begin(&ring->tx_syncp); 2579 ring->tx_stats.bytes += bytes; 2580 ring->tx_stats.packets += packets; 2581 u64_stats_update_end(&ring->tx_syncp); 2582 2583 q_vector->tx.total_bytes += bytes; 2584 q_vector->tx.total_packets += packets; 2585 } 2586 2587 static void igc_xdp_xmit_zc(struct igc_ring *ring) 2588 { 2589 struct xsk_buff_pool *pool = ring->xsk_pool; 2590 struct netdev_queue *nq = txring_txq(ring); 2591 union igc_adv_tx_desc *tx_desc = NULL; 2592 int cpu = smp_processor_id(); 2593 u16 ntu = ring->next_to_use; 2594 struct xdp_desc xdp_desc; 2595 u16 budget; 2596 2597 if (!netif_carrier_ok(ring->netdev)) 2598 return; 2599 2600 __netif_tx_lock(nq, cpu); 2601 2602 budget = igc_desc_unused(ring); 2603 2604 while (xsk_tx_peek_desc(pool, &xdp_desc) && budget--) { 2605 u32 cmd_type, olinfo_status; 2606 struct igc_tx_buffer *bi; 2607 dma_addr_t dma; 2608 2609 cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT | 2610 IGC_ADVTXD_DCMD_IFCS | IGC_TXD_DCMD | 2611 xdp_desc.len; 2612 olinfo_status = xdp_desc.len << IGC_ADVTXD_PAYLEN_SHIFT; 2613 2614 dma = xsk_buff_raw_get_dma(pool, xdp_desc.addr); 2615 xsk_buff_raw_dma_sync_for_device(pool, dma, xdp_desc.len); 2616 2617 tx_desc = IGC_TX_DESC(ring, ntu); 2618 tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); 2619 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status); 2620 tx_desc->read.buffer_addr = cpu_to_le64(dma); 2621 2622 bi = &ring->tx_buffer_info[ntu]; 2623 bi->type = IGC_TX_BUFFER_TYPE_XSK; 2624 bi->protocol = 0; 2625 bi->bytecount = xdp_desc.len; 2626 bi->gso_segs = 1; 2627 bi->time_stamp = jiffies; 2628 bi->next_to_watch = tx_desc; 2629 2630 netdev_tx_sent_queue(txring_txq(ring), xdp_desc.len); 2631 2632 ntu++; 2633 if (ntu == ring->count) 2634 ntu = 0; 2635 } 2636 2637 ring->next_to_use = ntu; 2638 if (tx_desc) { 2639 igc_flush_tx_descriptors(ring); 2640 xsk_tx_release(pool); 2641 } 2642 2643 __netif_tx_unlock(nq); 2644 } 2645 2646 /** 2647 * igc_clean_tx_irq - Reclaim resources after transmit completes 2648 * @q_vector: pointer to q_vector containing needed info 2649 * @napi_budget: Used to determine if we are in netpoll 2650 * 2651 * returns true if ring is completely cleaned 2652 */ 2653 static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget) 2654 { 2655 struct igc_adapter *adapter = q_vector->adapter; 2656 unsigned int total_bytes = 0, total_packets = 0; 2657 unsigned int budget = q_vector->tx.work_limit; 2658 struct igc_ring *tx_ring = q_vector->tx.ring; 2659 unsigned int i = tx_ring->next_to_clean; 2660 struct igc_tx_buffer *tx_buffer; 2661 union igc_adv_tx_desc *tx_desc; 2662 u32 xsk_frames = 0; 2663 2664 if (test_bit(__IGC_DOWN, &adapter->state)) 2665 return true; 2666 2667 tx_buffer = &tx_ring->tx_buffer_info[i]; 2668 tx_desc = IGC_TX_DESC(tx_ring, i); 2669 i -= tx_ring->count; 2670 2671 do { 2672 union igc_adv_tx_desc *eop_desc = tx_buffer->next_to_watch; 2673 2674 /* if next_to_watch is not set then there is no work pending */ 2675 if (!eop_desc) 2676 break; 2677 2678 /* prevent any other reads prior to eop_desc */ 2679 smp_rmb(); 2680 2681 /* if DD is not set pending work has not been completed */ 2682 if (!(eop_desc->wb.status & cpu_to_le32(IGC_TXD_STAT_DD))) 2683 break; 2684 2685 /* clear next_to_watch to prevent false hangs */ 2686 tx_buffer->next_to_watch = NULL; 2687 2688 /* update the statistics for this packet */ 2689 total_bytes += tx_buffer->bytecount; 2690 total_packets += tx_buffer->gso_segs; 2691 2692 switch (tx_buffer->type) { 2693 case IGC_TX_BUFFER_TYPE_XSK: 2694 xsk_frames++; 2695 break; 2696 case IGC_TX_BUFFER_TYPE_XDP: 2697 xdp_return_frame(tx_buffer->xdpf); 2698 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 2699 break; 2700 case IGC_TX_BUFFER_TYPE_SKB: 2701 napi_consume_skb(tx_buffer->skb, napi_budget); 2702 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 2703 break; 2704 default: 2705 netdev_warn_once(tx_ring->netdev, "Unknown Tx buffer type\n"); 2706 break; 2707 } 2708 2709 /* clear last DMA location and unmap remaining buffers */ 2710 while (tx_desc != eop_desc) { 2711 tx_buffer++; 2712 tx_desc++; 2713 i++; 2714 if (unlikely(!i)) { 2715 i -= tx_ring->count; 2716 tx_buffer = tx_ring->tx_buffer_info; 2717 tx_desc = IGC_TX_DESC(tx_ring, 0); 2718 } 2719 2720 /* unmap any remaining paged data */ 2721 if (dma_unmap_len(tx_buffer, len)) 2722 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer); 2723 } 2724 2725 /* move us one more past the eop_desc for start of next pkt */ 2726 tx_buffer++; 2727 tx_desc++; 2728 i++; 2729 if (unlikely(!i)) { 2730 i -= tx_ring->count; 2731 tx_buffer = tx_ring->tx_buffer_info; 2732 tx_desc = IGC_TX_DESC(tx_ring, 0); 2733 } 2734 2735 /* issue prefetch for next Tx descriptor */ 2736 prefetch(tx_desc); 2737 2738 /* update budget accounting */ 2739 budget--; 2740 } while (likely(budget)); 2741 2742 netdev_tx_completed_queue(txring_txq(tx_ring), 2743 total_packets, total_bytes); 2744 2745 i += tx_ring->count; 2746 tx_ring->next_to_clean = i; 2747 2748 igc_update_tx_stats(q_vector, total_packets, total_bytes); 2749 2750 if (tx_ring->xsk_pool) { 2751 if (xsk_frames) 2752 xsk_tx_completed(tx_ring->xsk_pool, xsk_frames); 2753 if (xsk_uses_need_wakeup(tx_ring->xsk_pool)) 2754 xsk_set_tx_need_wakeup(tx_ring->xsk_pool); 2755 igc_xdp_xmit_zc(tx_ring); 2756 } 2757 2758 if (test_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) { 2759 struct igc_hw *hw = &adapter->hw; 2760 2761 /* Detect a transmit hang in hardware, this serializes the 2762 * check with the clearing of time_stamp and movement of i 2763 */ 2764 clear_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags); 2765 if (tx_buffer->next_to_watch && 2766 time_after(jiffies, tx_buffer->time_stamp + 2767 (adapter->tx_timeout_factor * HZ)) && 2768 !(rd32(IGC_STATUS) & IGC_STATUS_TXOFF)) { 2769 /* detected Tx unit hang */ 2770 netdev_err(tx_ring->netdev, 2771 "Detected Tx Unit Hang\n" 2772 " Tx Queue <%d>\n" 2773 " TDH <%x>\n" 2774 " TDT <%x>\n" 2775 " next_to_use <%x>\n" 2776 " next_to_clean <%x>\n" 2777 "buffer_info[next_to_clean]\n" 2778 " time_stamp <%lx>\n" 2779 " next_to_watch <%p>\n" 2780 " jiffies <%lx>\n" 2781 " desc.status <%x>\n", 2782 tx_ring->queue_index, 2783 rd32(IGC_TDH(tx_ring->reg_idx)), 2784 readl(tx_ring->tail), 2785 tx_ring->next_to_use, 2786 tx_ring->next_to_clean, 2787 tx_buffer->time_stamp, 2788 tx_buffer->next_to_watch, 2789 jiffies, 2790 tx_buffer->next_to_watch->wb.status); 2791 netif_stop_subqueue(tx_ring->netdev, 2792 tx_ring->queue_index); 2793 2794 /* we are about to reset, no point in enabling stuff */ 2795 return true; 2796 } 2797 } 2798 2799 #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) 2800 if (unlikely(total_packets && 2801 netif_carrier_ok(tx_ring->netdev) && 2802 igc_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD)) { 2803 /* Make sure that anybody stopping the queue after this 2804 * sees the new next_to_clean. 2805 */ 2806 smp_mb(); 2807 if (__netif_subqueue_stopped(tx_ring->netdev, 2808 tx_ring->queue_index) && 2809 !(test_bit(__IGC_DOWN, &adapter->state))) { 2810 netif_wake_subqueue(tx_ring->netdev, 2811 tx_ring->queue_index); 2812 2813 u64_stats_update_begin(&tx_ring->tx_syncp); 2814 tx_ring->tx_stats.restart_queue++; 2815 u64_stats_update_end(&tx_ring->tx_syncp); 2816 } 2817 } 2818 2819 return !!budget; 2820 } 2821 2822 static int igc_find_mac_filter(struct igc_adapter *adapter, 2823 enum igc_mac_filter_type type, const u8 *addr) 2824 { 2825 struct igc_hw *hw = &adapter->hw; 2826 int max_entries = hw->mac.rar_entry_count; 2827 u32 ral, rah; 2828 int i; 2829 2830 for (i = 0; i < max_entries; i++) { 2831 ral = rd32(IGC_RAL(i)); 2832 rah = rd32(IGC_RAH(i)); 2833 2834 if (!(rah & IGC_RAH_AV)) 2835 continue; 2836 if (!!(rah & IGC_RAH_ASEL_SRC_ADDR) != type) 2837 continue; 2838 if ((rah & IGC_RAH_RAH_MASK) != 2839 le16_to_cpup((__le16 *)(addr + 4))) 2840 continue; 2841 if (ral != le32_to_cpup((__le32 *)(addr))) 2842 continue; 2843 2844 return i; 2845 } 2846 2847 return -1; 2848 } 2849 2850 static int igc_get_avail_mac_filter_slot(struct igc_adapter *adapter) 2851 { 2852 struct igc_hw *hw = &adapter->hw; 2853 int max_entries = hw->mac.rar_entry_count; 2854 u32 rah; 2855 int i; 2856 2857 for (i = 0; i < max_entries; i++) { 2858 rah = rd32(IGC_RAH(i)); 2859 2860 if (!(rah & IGC_RAH_AV)) 2861 return i; 2862 } 2863 2864 return -1; 2865 } 2866 2867 /** 2868 * igc_add_mac_filter() - Add MAC address filter 2869 * @adapter: Pointer to adapter where the filter should be added 2870 * @type: MAC address filter type (source or destination) 2871 * @addr: MAC address 2872 * @queue: If non-negative, queue assignment feature is enabled and frames 2873 * matching the filter are enqueued onto 'queue'. Otherwise, queue 2874 * assignment is disabled. 2875 * 2876 * Return: 0 in case of success, negative errno code otherwise. 2877 */ 2878 static int igc_add_mac_filter(struct igc_adapter *adapter, 2879 enum igc_mac_filter_type type, const u8 *addr, 2880 int queue) 2881 { 2882 struct net_device *dev = adapter->netdev; 2883 int index; 2884 2885 index = igc_find_mac_filter(adapter, type, addr); 2886 if (index >= 0) 2887 goto update_filter; 2888 2889 index = igc_get_avail_mac_filter_slot(adapter); 2890 if (index < 0) 2891 return -ENOSPC; 2892 2893 netdev_dbg(dev, "Add MAC address filter: index %d type %s address %pM queue %d\n", 2894 index, type == IGC_MAC_FILTER_TYPE_DST ? "dst" : "src", 2895 addr, queue); 2896 2897 update_filter: 2898 igc_set_mac_filter_hw(adapter, index, type, addr, queue); 2899 return 0; 2900 } 2901 2902 /** 2903 * igc_del_mac_filter() - Delete MAC address filter 2904 * @adapter: Pointer to adapter where the filter should be deleted from 2905 * @type: MAC address filter type (source or destination) 2906 * @addr: MAC address 2907 */ 2908 static void igc_del_mac_filter(struct igc_adapter *adapter, 2909 enum igc_mac_filter_type type, const u8 *addr) 2910 { 2911 struct net_device *dev = adapter->netdev; 2912 int index; 2913 2914 index = igc_find_mac_filter(adapter, type, addr); 2915 if (index < 0) 2916 return; 2917 2918 if (index == 0) { 2919 /* If this is the default filter, we don't actually delete it. 2920 * We just reset to its default value i.e. disable queue 2921 * assignment. 2922 */ 2923 netdev_dbg(dev, "Disable default MAC filter queue assignment"); 2924 2925 igc_set_mac_filter_hw(adapter, 0, type, addr, -1); 2926 } else { 2927 netdev_dbg(dev, "Delete MAC address filter: index %d type %s address %pM\n", 2928 index, 2929 type == IGC_MAC_FILTER_TYPE_DST ? "dst" : "src", 2930 addr); 2931 2932 igc_clear_mac_filter_hw(adapter, index); 2933 } 2934 } 2935 2936 /** 2937 * igc_add_vlan_prio_filter() - Add VLAN priority filter 2938 * @adapter: Pointer to adapter where the filter should be added 2939 * @prio: VLAN priority value 2940 * @queue: Queue number which matching frames are assigned to 2941 * 2942 * Return: 0 in case of success, negative errno code otherwise. 2943 */ 2944 static int igc_add_vlan_prio_filter(struct igc_adapter *adapter, int prio, 2945 int queue) 2946 { 2947 struct net_device *dev = adapter->netdev; 2948 struct igc_hw *hw = &adapter->hw; 2949 u32 vlanpqf; 2950 2951 vlanpqf = rd32(IGC_VLANPQF); 2952 2953 if (vlanpqf & IGC_VLANPQF_VALID(prio)) { 2954 netdev_dbg(dev, "VLAN priority filter already in use\n"); 2955 return -EEXIST; 2956 } 2957 2958 vlanpqf |= IGC_VLANPQF_QSEL(prio, queue); 2959 vlanpqf |= IGC_VLANPQF_VALID(prio); 2960 2961 wr32(IGC_VLANPQF, vlanpqf); 2962 2963 netdev_dbg(dev, "Add VLAN priority filter: prio %d queue %d\n", 2964 prio, queue); 2965 return 0; 2966 } 2967 2968 /** 2969 * igc_del_vlan_prio_filter() - Delete VLAN priority filter 2970 * @adapter: Pointer to adapter where the filter should be deleted from 2971 * @prio: VLAN priority value 2972 */ 2973 static void igc_del_vlan_prio_filter(struct igc_adapter *adapter, int prio) 2974 { 2975 struct igc_hw *hw = &adapter->hw; 2976 u32 vlanpqf; 2977 2978 vlanpqf = rd32(IGC_VLANPQF); 2979 2980 vlanpqf &= ~IGC_VLANPQF_VALID(prio); 2981 vlanpqf &= ~IGC_VLANPQF_QSEL(prio, IGC_VLANPQF_QUEUE_MASK); 2982 2983 wr32(IGC_VLANPQF, vlanpqf); 2984 2985 netdev_dbg(adapter->netdev, "Delete VLAN priority filter: prio %d\n", 2986 prio); 2987 } 2988 2989 static int igc_get_avail_etype_filter_slot(struct igc_adapter *adapter) 2990 { 2991 struct igc_hw *hw = &adapter->hw; 2992 int i; 2993 2994 for (i = 0; i < MAX_ETYPE_FILTER; i++) { 2995 u32 etqf = rd32(IGC_ETQF(i)); 2996 2997 if (!(etqf & IGC_ETQF_FILTER_ENABLE)) 2998 return i; 2999 } 3000 3001 return -1; 3002 } 3003 3004 /** 3005 * igc_add_etype_filter() - Add ethertype filter 3006 * @adapter: Pointer to adapter where the filter should be added 3007 * @etype: Ethertype value 3008 * @queue: If non-negative, queue assignment feature is enabled and frames 3009 * matching the filter are enqueued onto 'queue'. Otherwise, queue 3010 * assignment is disabled. 3011 * 3012 * Return: 0 in case of success, negative errno code otherwise. 3013 */ 3014 static int igc_add_etype_filter(struct igc_adapter *adapter, u16 etype, 3015 int queue) 3016 { 3017 struct igc_hw *hw = &adapter->hw; 3018 int index; 3019 u32 etqf; 3020 3021 index = igc_get_avail_etype_filter_slot(adapter); 3022 if (index < 0) 3023 return -ENOSPC; 3024 3025 etqf = rd32(IGC_ETQF(index)); 3026 3027 etqf &= ~IGC_ETQF_ETYPE_MASK; 3028 etqf |= etype; 3029 3030 if (queue >= 0) { 3031 etqf &= ~IGC_ETQF_QUEUE_MASK; 3032 etqf |= (queue << IGC_ETQF_QUEUE_SHIFT); 3033 etqf |= IGC_ETQF_QUEUE_ENABLE; 3034 } 3035 3036 etqf |= IGC_ETQF_FILTER_ENABLE; 3037 3038 wr32(IGC_ETQF(index), etqf); 3039 3040 netdev_dbg(adapter->netdev, "Add ethertype filter: etype %04x queue %d\n", 3041 etype, queue); 3042 return 0; 3043 } 3044 3045 static int igc_find_etype_filter(struct igc_adapter *adapter, u16 etype) 3046 { 3047 struct igc_hw *hw = &adapter->hw; 3048 int i; 3049 3050 for (i = 0; i < MAX_ETYPE_FILTER; i++) { 3051 u32 etqf = rd32(IGC_ETQF(i)); 3052 3053 if ((etqf & IGC_ETQF_ETYPE_MASK) == etype) 3054 return i; 3055 } 3056 3057 return -1; 3058 } 3059 3060 /** 3061 * igc_del_etype_filter() - Delete ethertype filter 3062 * @adapter: Pointer to adapter where the filter should be deleted from 3063 * @etype: Ethertype value 3064 */ 3065 static void igc_del_etype_filter(struct igc_adapter *adapter, u16 etype) 3066 { 3067 struct igc_hw *hw = &adapter->hw; 3068 int index; 3069 3070 index = igc_find_etype_filter(adapter, etype); 3071 if (index < 0) 3072 return; 3073 3074 wr32(IGC_ETQF(index), 0); 3075 3076 netdev_dbg(adapter->netdev, "Delete ethertype filter: etype %04x\n", 3077 etype); 3078 } 3079 3080 static int igc_flex_filter_select(struct igc_adapter *adapter, 3081 struct igc_flex_filter *input, 3082 u32 *fhft) 3083 { 3084 struct igc_hw *hw = &adapter->hw; 3085 u8 fhft_index; 3086 u32 fhftsl; 3087 3088 if (input->index >= MAX_FLEX_FILTER) { 3089 dev_err(&adapter->pdev->dev, "Wrong Flex Filter index selected!\n"); 3090 return -EINVAL; 3091 } 3092 3093 /* Indirect table select register */ 3094 fhftsl = rd32(IGC_FHFTSL); 3095 fhftsl &= ~IGC_FHFTSL_FTSL_MASK; 3096 switch (input->index) { 3097 case 0 ... 7: 3098 fhftsl |= 0x00; 3099 break; 3100 case 8 ... 15: 3101 fhftsl |= 0x01; 3102 break; 3103 case 16 ... 23: 3104 fhftsl |= 0x02; 3105 break; 3106 case 24 ... 31: 3107 fhftsl |= 0x03; 3108 break; 3109 } 3110 wr32(IGC_FHFTSL, fhftsl); 3111 3112 /* Normalize index down to host table register */ 3113 fhft_index = input->index % 8; 3114 3115 *fhft = (fhft_index < 4) ? IGC_FHFT(fhft_index) : 3116 IGC_FHFT_EXT(fhft_index - 4); 3117 3118 return 0; 3119 } 3120 3121 static int igc_write_flex_filter_ll(struct igc_adapter *adapter, 3122 struct igc_flex_filter *input) 3123 { 3124 struct device *dev = &adapter->pdev->dev; 3125 struct igc_hw *hw = &adapter->hw; 3126 u8 *data = input->data; 3127 u8 *mask = input->mask; 3128 u32 queuing; 3129 u32 fhft; 3130 u32 wufc; 3131 int ret; 3132 int i; 3133 3134 /* Length has to be aligned to 8. Otherwise the filter will fail. Bail 3135 * out early to avoid surprises later. 3136 */ 3137 if (input->length % 8 != 0) { 3138 dev_err(dev, "The length of a flex filter has to be 8 byte aligned!\n"); 3139 return -EINVAL; 3140 } 3141 3142 /* Select corresponding flex filter register and get base for host table. */ 3143 ret = igc_flex_filter_select(adapter, input, &fhft); 3144 if (ret) 3145 return ret; 3146 3147 /* When adding a filter globally disable flex filter feature. That is 3148 * recommended within the datasheet. 3149 */ 3150 wufc = rd32(IGC_WUFC); 3151 wufc &= ~IGC_WUFC_FLEX_HQ; 3152 wr32(IGC_WUFC, wufc); 3153 3154 /* Configure filter */ 3155 queuing = input->length & IGC_FHFT_LENGTH_MASK; 3156 queuing |= (input->rx_queue << IGC_FHFT_QUEUE_SHIFT) & IGC_FHFT_QUEUE_MASK; 3157 queuing |= (input->prio << IGC_FHFT_PRIO_SHIFT) & IGC_FHFT_PRIO_MASK; 3158 3159 if (input->immediate_irq) 3160 queuing |= IGC_FHFT_IMM_INT; 3161 3162 if (input->drop) 3163 queuing |= IGC_FHFT_DROP; 3164 3165 wr32(fhft + 0xFC, queuing); 3166 3167 /* Write data (128 byte) and mask (128 bit) */ 3168 for (i = 0; i < 16; ++i) { 3169 const size_t data_idx = i * 8; 3170 const size_t row_idx = i * 16; 3171 u32 dw0 = 3172 (data[data_idx + 0] << 0) | 3173 (data[data_idx + 1] << 8) | 3174 (data[data_idx + 2] << 16) | 3175 (data[data_idx + 3] << 24); 3176 u32 dw1 = 3177 (data[data_idx + 4] << 0) | 3178 (data[data_idx + 5] << 8) | 3179 (data[data_idx + 6] << 16) | 3180 (data[data_idx + 7] << 24); 3181 u32 tmp; 3182 3183 /* Write row: dw0, dw1 and mask */ 3184 wr32(fhft + row_idx, dw0); 3185 wr32(fhft + row_idx + 4, dw1); 3186 3187 /* mask is only valid for MASK(7, 0) */ 3188 tmp = rd32(fhft + row_idx + 8); 3189 tmp &= ~GENMASK(7, 0); 3190 tmp |= mask[i]; 3191 wr32(fhft + row_idx + 8, tmp); 3192 } 3193 3194 /* Enable filter. */ 3195 wufc |= IGC_WUFC_FLEX_HQ; 3196 if (input->index > 8) { 3197 /* Filter 0-7 are enabled via WUFC. The other 24 filters are not. */ 3198 u32 wufc_ext = rd32(IGC_WUFC_EXT); 3199 3200 wufc_ext |= (IGC_WUFC_EXT_FLX8 << (input->index - 8)); 3201 3202 wr32(IGC_WUFC_EXT, wufc_ext); 3203 } else { 3204 wufc |= (IGC_WUFC_FLX0 << input->index); 3205 } 3206 wr32(IGC_WUFC, wufc); 3207 3208 dev_dbg(&adapter->pdev->dev, "Added flex filter %u to HW.\n", 3209 input->index); 3210 3211 return 0; 3212 } 3213 3214 static void igc_flex_filter_add_field(struct igc_flex_filter *flex, 3215 const void *src, unsigned int offset, 3216 size_t len, const void *mask) 3217 { 3218 int i; 3219 3220 /* data */ 3221 memcpy(&flex->data[offset], src, len); 3222 3223 /* mask */ 3224 for (i = 0; i < len; ++i) { 3225 const unsigned int idx = i + offset; 3226 const u8 *ptr = mask; 3227 3228 if (mask) { 3229 if (ptr[i] & 0xff) 3230 flex->mask[idx / 8] |= BIT(idx % 8); 3231 3232 continue; 3233 } 3234 3235 flex->mask[idx / 8] |= BIT(idx % 8); 3236 } 3237 } 3238 3239 static int igc_find_avail_flex_filter_slot(struct igc_adapter *adapter) 3240 { 3241 struct igc_hw *hw = &adapter->hw; 3242 u32 wufc, wufc_ext; 3243 int i; 3244 3245 wufc = rd32(IGC_WUFC); 3246 wufc_ext = rd32(IGC_WUFC_EXT); 3247 3248 for (i = 0; i < MAX_FLEX_FILTER; i++) { 3249 if (i < 8) { 3250 if (!(wufc & (IGC_WUFC_FLX0 << i))) 3251 return i; 3252 } else { 3253 if (!(wufc_ext & (IGC_WUFC_EXT_FLX8 << (i - 8)))) 3254 return i; 3255 } 3256 } 3257 3258 return -ENOSPC; 3259 } 3260 3261 static bool igc_flex_filter_in_use(struct igc_adapter *adapter) 3262 { 3263 struct igc_hw *hw = &adapter->hw; 3264 u32 wufc, wufc_ext; 3265 3266 wufc = rd32(IGC_WUFC); 3267 wufc_ext = rd32(IGC_WUFC_EXT); 3268 3269 if (wufc & IGC_WUFC_FILTER_MASK) 3270 return true; 3271 3272 if (wufc_ext & IGC_WUFC_EXT_FILTER_MASK) 3273 return true; 3274 3275 return false; 3276 } 3277 3278 static int igc_add_flex_filter(struct igc_adapter *adapter, 3279 struct igc_nfc_rule *rule) 3280 { 3281 struct igc_flex_filter flex = { }; 3282 struct igc_nfc_filter *filter = &rule->filter; 3283 unsigned int eth_offset, user_offset; 3284 int ret, index; 3285 bool vlan; 3286 3287 index = igc_find_avail_flex_filter_slot(adapter); 3288 if (index < 0) 3289 return -ENOSPC; 3290 3291 /* Construct the flex filter: 3292 * -> dest_mac [6] 3293 * -> src_mac [6] 3294 * -> tpid [2] 3295 * -> vlan tci [2] 3296 * -> ether type [2] 3297 * -> user data [8] 3298 * -> = 26 bytes => 32 length 3299 */ 3300 flex.index = index; 3301 flex.length = 32; 3302 flex.rx_queue = rule->action; 3303 3304 vlan = rule->filter.vlan_tci || rule->filter.vlan_etype; 3305 eth_offset = vlan ? 16 : 12; 3306 user_offset = vlan ? 18 : 14; 3307 3308 /* Add destination MAC */ 3309 if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) 3310 igc_flex_filter_add_field(&flex, &filter->dst_addr, 0, 3311 ETH_ALEN, NULL); 3312 3313 /* Add source MAC */ 3314 if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) 3315 igc_flex_filter_add_field(&flex, &filter->src_addr, 6, 3316 ETH_ALEN, NULL); 3317 3318 /* Add VLAN etype */ 3319 if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_ETYPE) 3320 igc_flex_filter_add_field(&flex, &filter->vlan_etype, 12, 3321 sizeof(filter->vlan_etype), 3322 NULL); 3323 3324 /* Add VLAN TCI */ 3325 if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) 3326 igc_flex_filter_add_field(&flex, &filter->vlan_tci, 14, 3327 sizeof(filter->vlan_tci), NULL); 3328 3329 /* Add Ether type */ 3330 if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) { 3331 __be16 etype = cpu_to_be16(filter->etype); 3332 3333 igc_flex_filter_add_field(&flex, &etype, eth_offset, 3334 sizeof(etype), NULL); 3335 } 3336 3337 /* Add user data */ 3338 if (rule->filter.match_flags & IGC_FILTER_FLAG_USER_DATA) 3339 igc_flex_filter_add_field(&flex, &filter->user_data, 3340 user_offset, 3341 sizeof(filter->user_data), 3342 filter->user_mask); 3343 3344 /* Add it down to the hardware and enable it. */ 3345 ret = igc_write_flex_filter_ll(adapter, &flex); 3346 if (ret) 3347 return ret; 3348 3349 filter->flex_index = index; 3350 3351 return 0; 3352 } 3353 3354 static void igc_del_flex_filter(struct igc_adapter *adapter, 3355 u16 reg_index) 3356 { 3357 struct igc_hw *hw = &adapter->hw; 3358 u32 wufc; 3359 3360 /* Just disable the filter. The filter table itself is kept 3361 * intact. Another flex_filter_add() should override the "old" data 3362 * then. 3363 */ 3364 if (reg_index > 8) { 3365 u32 wufc_ext = rd32(IGC_WUFC_EXT); 3366 3367 wufc_ext &= ~(IGC_WUFC_EXT_FLX8 << (reg_index - 8)); 3368 wr32(IGC_WUFC_EXT, wufc_ext); 3369 } else { 3370 wufc = rd32(IGC_WUFC); 3371 3372 wufc &= ~(IGC_WUFC_FLX0 << reg_index); 3373 wr32(IGC_WUFC, wufc); 3374 } 3375 3376 if (igc_flex_filter_in_use(adapter)) 3377 return; 3378 3379 /* No filters are in use, we may disable flex filters */ 3380 wufc = rd32(IGC_WUFC); 3381 wufc &= ~IGC_WUFC_FLEX_HQ; 3382 wr32(IGC_WUFC, wufc); 3383 } 3384 3385 static int igc_enable_nfc_rule(struct igc_adapter *adapter, 3386 struct igc_nfc_rule *rule) 3387 { 3388 int err; 3389 3390 if (rule->flex) { 3391 return igc_add_flex_filter(adapter, rule); 3392 } 3393 3394 if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) { 3395 err = igc_add_etype_filter(adapter, rule->filter.etype, 3396 rule->action); 3397 if (err) 3398 return err; 3399 } 3400 3401 if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) { 3402 err = igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_SRC, 3403 rule->filter.src_addr, rule->action); 3404 if (err) 3405 return err; 3406 } 3407 3408 if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) { 3409 err = igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, 3410 rule->filter.dst_addr, rule->action); 3411 if (err) 3412 return err; 3413 } 3414 3415 if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) { 3416 int prio = (rule->filter.vlan_tci & VLAN_PRIO_MASK) >> 3417 VLAN_PRIO_SHIFT; 3418 3419 err = igc_add_vlan_prio_filter(adapter, prio, rule->action); 3420 if (err) 3421 return err; 3422 } 3423 3424 return 0; 3425 } 3426 3427 static void igc_disable_nfc_rule(struct igc_adapter *adapter, 3428 const struct igc_nfc_rule *rule) 3429 { 3430 if (rule->flex) { 3431 igc_del_flex_filter(adapter, rule->filter.flex_index); 3432 return; 3433 } 3434 3435 if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) 3436 igc_del_etype_filter(adapter, rule->filter.etype); 3437 3438 if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) { 3439 int prio = (rule->filter.vlan_tci & VLAN_PRIO_MASK) >> 3440 VLAN_PRIO_SHIFT; 3441 3442 igc_del_vlan_prio_filter(adapter, prio); 3443 } 3444 3445 if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) 3446 igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_SRC, 3447 rule->filter.src_addr); 3448 3449 if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) 3450 igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, 3451 rule->filter.dst_addr); 3452 } 3453 3454 /** 3455 * igc_get_nfc_rule() - Get NFC rule 3456 * @adapter: Pointer to adapter 3457 * @location: Rule location 3458 * 3459 * Context: Expects adapter->nfc_rule_lock to be held by caller. 3460 * 3461 * Return: Pointer to NFC rule at @location. If not found, NULL. 3462 */ 3463 struct igc_nfc_rule *igc_get_nfc_rule(struct igc_adapter *adapter, 3464 u32 location) 3465 { 3466 struct igc_nfc_rule *rule; 3467 3468 list_for_each_entry(rule, &adapter->nfc_rule_list, list) { 3469 if (rule->location == location) 3470 return rule; 3471 if (rule->location > location) 3472 break; 3473 } 3474 3475 return NULL; 3476 } 3477 3478 /** 3479 * igc_del_nfc_rule() - Delete NFC rule 3480 * @adapter: Pointer to adapter 3481 * @rule: Pointer to rule to be deleted 3482 * 3483 * Disable NFC rule in hardware and delete it from adapter. 3484 * 3485 * Context: Expects adapter->nfc_rule_lock to be held by caller. 3486 */ 3487 void igc_del_nfc_rule(struct igc_adapter *adapter, struct igc_nfc_rule *rule) 3488 { 3489 igc_disable_nfc_rule(adapter, rule); 3490 3491 list_del(&rule->list); 3492 adapter->nfc_rule_count--; 3493 3494 kfree(rule); 3495 } 3496 3497 static void igc_flush_nfc_rules(struct igc_adapter *adapter) 3498 { 3499 struct igc_nfc_rule *rule, *tmp; 3500 3501 mutex_lock(&adapter->nfc_rule_lock); 3502 3503 list_for_each_entry_safe(rule, tmp, &adapter->nfc_rule_list, list) 3504 igc_del_nfc_rule(adapter, rule); 3505 3506 mutex_unlock(&adapter->nfc_rule_lock); 3507 } 3508 3509 /** 3510 * igc_add_nfc_rule() - Add NFC rule 3511 * @adapter: Pointer to adapter 3512 * @rule: Pointer to rule to be added 3513 * 3514 * Enable NFC rule in hardware and add it to adapter. 3515 * 3516 * Context: Expects adapter->nfc_rule_lock to be held by caller. 3517 * 3518 * Return: 0 on success, negative errno on failure. 3519 */ 3520 int igc_add_nfc_rule(struct igc_adapter *adapter, struct igc_nfc_rule *rule) 3521 { 3522 struct igc_nfc_rule *pred, *cur; 3523 int err; 3524 3525 err = igc_enable_nfc_rule(adapter, rule); 3526 if (err) 3527 return err; 3528 3529 pred = NULL; 3530 list_for_each_entry(cur, &adapter->nfc_rule_list, list) { 3531 if (cur->location >= rule->location) 3532 break; 3533 pred = cur; 3534 } 3535 3536 list_add(&rule->list, pred ? &pred->list : &adapter->nfc_rule_list); 3537 adapter->nfc_rule_count++; 3538 return 0; 3539 } 3540 3541 static void igc_restore_nfc_rules(struct igc_adapter *adapter) 3542 { 3543 struct igc_nfc_rule *rule; 3544 3545 mutex_lock(&adapter->nfc_rule_lock); 3546 3547 list_for_each_entry_reverse(rule, &adapter->nfc_rule_list, list) 3548 igc_enable_nfc_rule(adapter, rule); 3549 3550 mutex_unlock(&adapter->nfc_rule_lock); 3551 } 3552 3553 static int igc_uc_sync(struct net_device *netdev, const unsigned char *addr) 3554 { 3555 struct igc_adapter *adapter = netdev_priv(netdev); 3556 3557 return igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, addr, -1); 3558 } 3559 3560 static int igc_uc_unsync(struct net_device *netdev, const unsigned char *addr) 3561 { 3562 struct igc_adapter *adapter = netdev_priv(netdev); 3563 3564 igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, addr); 3565 return 0; 3566 } 3567 3568 /** 3569 * igc_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set 3570 * @netdev: network interface device structure 3571 * 3572 * The set_rx_mode entry point is called whenever the unicast or multicast 3573 * address lists or the network interface flags are updated. This routine is 3574 * responsible for configuring the hardware for proper unicast, multicast, 3575 * promiscuous mode, and all-multi behavior. 3576 */ 3577 static void igc_set_rx_mode(struct net_device *netdev) 3578 { 3579 struct igc_adapter *adapter = netdev_priv(netdev); 3580 struct igc_hw *hw = &adapter->hw; 3581 u32 rctl = 0, rlpml = MAX_JUMBO_FRAME_SIZE; 3582 int count; 3583 3584 /* Check for Promiscuous and All Multicast modes */ 3585 if (netdev->flags & IFF_PROMISC) { 3586 rctl |= IGC_RCTL_UPE | IGC_RCTL_MPE; 3587 } else { 3588 if (netdev->flags & IFF_ALLMULTI) { 3589 rctl |= IGC_RCTL_MPE; 3590 } else { 3591 /* Write addresses to the MTA, if the attempt fails 3592 * then we should just turn on promiscuous mode so 3593 * that we can at least receive multicast traffic 3594 */ 3595 count = igc_write_mc_addr_list(netdev); 3596 if (count < 0) 3597 rctl |= IGC_RCTL_MPE; 3598 } 3599 } 3600 3601 /* Write addresses to available RAR registers, if there is not 3602 * sufficient space to store all the addresses then enable 3603 * unicast promiscuous mode 3604 */ 3605 if (__dev_uc_sync(netdev, igc_uc_sync, igc_uc_unsync)) 3606 rctl |= IGC_RCTL_UPE; 3607 3608 /* update state of unicast and multicast */ 3609 rctl |= rd32(IGC_RCTL) & ~(IGC_RCTL_UPE | IGC_RCTL_MPE); 3610 wr32(IGC_RCTL, rctl); 3611 3612 #if (PAGE_SIZE < 8192) 3613 if (adapter->max_frame_size <= IGC_MAX_FRAME_BUILD_SKB) 3614 rlpml = IGC_MAX_FRAME_BUILD_SKB; 3615 #endif 3616 wr32(IGC_RLPML, rlpml); 3617 } 3618 3619 /** 3620 * igc_configure - configure the hardware for RX and TX 3621 * @adapter: private board structure 3622 */ 3623 static void igc_configure(struct igc_adapter *adapter) 3624 { 3625 struct net_device *netdev = adapter->netdev; 3626 int i = 0; 3627 3628 igc_get_hw_control(adapter); 3629 igc_set_rx_mode(netdev); 3630 3631 igc_restore_vlan(adapter); 3632 3633 igc_setup_tctl(adapter); 3634 igc_setup_mrqc(adapter); 3635 igc_setup_rctl(adapter); 3636 3637 igc_set_default_mac_filter(adapter); 3638 igc_restore_nfc_rules(adapter); 3639 3640 igc_configure_tx(adapter); 3641 igc_configure_rx(adapter); 3642 3643 igc_rx_fifo_flush_base(&adapter->hw); 3644 3645 /* call igc_desc_unused which always leaves 3646 * at least 1 descriptor unused to make sure 3647 * next_to_use != next_to_clean 3648 */ 3649 for (i = 0; i < adapter->num_rx_queues; i++) { 3650 struct igc_ring *ring = adapter->rx_ring[i]; 3651 3652 if (ring->xsk_pool) 3653 igc_alloc_rx_buffers_zc(ring, igc_desc_unused(ring)); 3654 else 3655 igc_alloc_rx_buffers(ring, igc_desc_unused(ring)); 3656 } 3657 } 3658 3659 /** 3660 * igc_write_ivar - configure ivar for given MSI-X vector 3661 * @hw: pointer to the HW structure 3662 * @msix_vector: vector number we are allocating to a given ring 3663 * @index: row index of IVAR register to write within IVAR table 3664 * @offset: column offset of in IVAR, should be multiple of 8 3665 * 3666 * The IVAR table consists of 2 columns, 3667 * each containing an cause allocation for an Rx and Tx ring, and a 3668 * variable number of rows depending on the number of queues supported. 3669 */ 3670 static void igc_write_ivar(struct igc_hw *hw, int msix_vector, 3671 int index, int offset) 3672 { 3673 u32 ivar = array_rd32(IGC_IVAR0, index); 3674 3675 /* clear any bits that are currently set */ 3676 ivar &= ~((u32)0xFF << offset); 3677 3678 /* write vector and valid bit */ 3679 ivar |= (msix_vector | IGC_IVAR_VALID) << offset; 3680 3681 array_wr32(IGC_IVAR0, index, ivar); 3682 } 3683 3684 static void igc_assign_vector(struct igc_q_vector *q_vector, int msix_vector) 3685 { 3686 struct igc_adapter *adapter = q_vector->adapter; 3687 struct igc_hw *hw = &adapter->hw; 3688 int rx_queue = IGC_N0_QUEUE; 3689 int tx_queue = IGC_N0_QUEUE; 3690 3691 if (q_vector->rx.ring) 3692 rx_queue = q_vector->rx.ring->reg_idx; 3693 if (q_vector->tx.ring) 3694 tx_queue = q_vector->tx.ring->reg_idx; 3695 3696 switch (hw->mac.type) { 3697 case igc_i225: 3698 if (rx_queue > IGC_N0_QUEUE) 3699 igc_write_ivar(hw, msix_vector, 3700 rx_queue >> 1, 3701 (rx_queue & 0x1) << 4); 3702 if (tx_queue > IGC_N0_QUEUE) 3703 igc_write_ivar(hw, msix_vector, 3704 tx_queue >> 1, 3705 ((tx_queue & 0x1) << 4) + 8); 3706 q_vector->eims_value = BIT(msix_vector); 3707 break; 3708 default: 3709 WARN_ONCE(hw->mac.type != igc_i225, "Wrong MAC type\n"); 3710 break; 3711 } 3712 3713 /* add q_vector eims value to global eims_enable_mask */ 3714 adapter->eims_enable_mask |= q_vector->eims_value; 3715 3716 /* configure q_vector to set itr on first interrupt */ 3717 q_vector->set_itr = 1; 3718 } 3719 3720 /** 3721 * igc_configure_msix - Configure MSI-X hardware 3722 * @adapter: Pointer to adapter structure 3723 * 3724 * igc_configure_msix sets up the hardware to properly 3725 * generate MSI-X interrupts. 3726 */ 3727 static void igc_configure_msix(struct igc_adapter *adapter) 3728 { 3729 struct igc_hw *hw = &adapter->hw; 3730 int i, vector = 0; 3731 u32 tmp; 3732 3733 adapter->eims_enable_mask = 0; 3734 3735 /* set vector for other causes, i.e. link changes */ 3736 switch (hw->mac.type) { 3737 case igc_i225: 3738 /* Turn on MSI-X capability first, or our settings 3739 * won't stick. And it will take days to debug. 3740 */ 3741 wr32(IGC_GPIE, IGC_GPIE_MSIX_MODE | 3742 IGC_GPIE_PBA | IGC_GPIE_EIAME | 3743 IGC_GPIE_NSICR); 3744 3745 /* enable msix_other interrupt */ 3746 adapter->eims_other = BIT(vector); 3747 tmp = (vector++ | IGC_IVAR_VALID) << 8; 3748 3749 wr32(IGC_IVAR_MISC, tmp); 3750 break; 3751 default: 3752 /* do nothing, since nothing else supports MSI-X */ 3753 break; 3754 } /* switch (hw->mac.type) */ 3755 3756 adapter->eims_enable_mask |= adapter->eims_other; 3757 3758 for (i = 0; i < adapter->num_q_vectors; i++) 3759 igc_assign_vector(adapter->q_vector[i], vector++); 3760 3761 wrfl(); 3762 } 3763 3764 /** 3765 * igc_irq_enable - Enable default interrupt generation settings 3766 * @adapter: board private structure 3767 */ 3768 static void igc_irq_enable(struct igc_adapter *adapter) 3769 { 3770 struct igc_hw *hw = &adapter->hw; 3771 3772 if (adapter->msix_entries) { 3773 u32 ims = IGC_IMS_LSC | IGC_IMS_DOUTSYNC | IGC_IMS_DRSTA; 3774 u32 regval = rd32(IGC_EIAC); 3775 3776 wr32(IGC_EIAC, regval | adapter->eims_enable_mask); 3777 regval = rd32(IGC_EIAM); 3778 wr32(IGC_EIAM, regval | adapter->eims_enable_mask); 3779 wr32(IGC_EIMS, adapter->eims_enable_mask); 3780 wr32(IGC_IMS, ims); 3781 } else { 3782 wr32(IGC_IMS, IMS_ENABLE_MASK | IGC_IMS_DRSTA); 3783 wr32(IGC_IAM, IMS_ENABLE_MASK | IGC_IMS_DRSTA); 3784 } 3785 } 3786 3787 /** 3788 * igc_irq_disable - Mask off interrupt generation on the NIC 3789 * @adapter: board private structure 3790 */ 3791 static void igc_irq_disable(struct igc_adapter *adapter) 3792 { 3793 struct igc_hw *hw = &adapter->hw; 3794 3795 if (adapter->msix_entries) { 3796 u32 regval = rd32(IGC_EIAM); 3797 3798 wr32(IGC_EIAM, regval & ~adapter->eims_enable_mask); 3799 wr32(IGC_EIMC, adapter->eims_enable_mask); 3800 regval = rd32(IGC_EIAC); 3801 wr32(IGC_EIAC, regval & ~adapter->eims_enable_mask); 3802 } 3803 3804 wr32(IGC_IAM, 0); 3805 wr32(IGC_IMC, ~0); 3806 wrfl(); 3807 3808 if (adapter->msix_entries) { 3809 int vector = 0, i; 3810 3811 synchronize_irq(adapter->msix_entries[vector++].vector); 3812 3813 for (i = 0; i < adapter->num_q_vectors; i++) 3814 synchronize_irq(adapter->msix_entries[vector++].vector); 3815 } else { 3816 synchronize_irq(adapter->pdev->irq); 3817 } 3818 } 3819 3820 void igc_set_flag_queue_pairs(struct igc_adapter *adapter, 3821 const u32 max_rss_queues) 3822 { 3823 /* Determine if we need to pair queues. */ 3824 /* If rss_queues > half of max_rss_queues, pair the queues in 3825 * order to conserve interrupts due to limited supply. 3826 */ 3827 if (adapter->rss_queues > (max_rss_queues / 2)) 3828 adapter->flags |= IGC_FLAG_QUEUE_PAIRS; 3829 else 3830 adapter->flags &= ~IGC_FLAG_QUEUE_PAIRS; 3831 } 3832 3833 unsigned int igc_get_max_rss_queues(struct igc_adapter *adapter) 3834 { 3835 return IGC_MAX_RX_QUEUES; 3836 } 3837 3838 static void igc_init_queue_configuration(struct igc_adapter *adapter) 3839 { 3840 u32 max_rss_queues; 3841 3842 max_rss_queues = igc_get_max_rss_queues(adapter); 3843 adapter->rss_queues = min_t(u32, max_rss_queues, num_online_cpus()); 3844 3845 igc_set_flag_queue_pairs(adapter, max_rss_queues); 3846 } 3847 3848 /** 3849 * igc_reset_q_vector - Reset config for interrupt vector 3850 * @adapter: board private structure to initialize 3851 * @v_idx: Index of vector to be reset 3852 * 3853 * If NAPI is enabled it will delete any references to the 3854 * NAPI struct. This is preparation for igc_free_q_vector. 3855 */ 3856 static void igc_reset_q_vector(struct igc_adapter *adapter, int v_idx) 3857 { 3858 struct igc_q_vector *q_vector = adapter->q_vector[v_idx]; 3859 3860 /* if we're coming from igc_set_interrupt_capability, the vectors are 3861 * not yet allocated 3862 */ 3863 if (!q_vector) 3864 return; 3865 3866 if (q_vector->tx.ring) 3867 adapter->tx_ring[q_vector->tx.ring->queue_index] = NULL; 3868 3869 if (q_vector->rx.ring) 3870 adapter->rx_ring[q_vector->rx.ring->queue_index] = NULL; 3871 3872 netif_napi_del(&q_vector->napi); 3873 } 3874 3875 /** 3876 * igc_free_q_vector - Free memory allocated for specific interrupt vector 3877 * @adapter: board private structure to initialize 3878 * @v_idx: Index of vector to be freed 3879 * 3880 * This function frees the memory allocated to the q_vector. 3881 */ 3882 static void igc_free_q_vector(struct igc_adapter *adapter, int v_idx) 3883 { 3884 struct igc_q_vector *q_vector = adapter->q_vector[v_idx]; 3885 3886 adapter->q_vector[v_idx] = NULL; 3887 3888 /* igc_get_stats64() might access the rings on this vector, 3889 * we must wait a grace period before freeing it. 3890 */ 3891 if (q_vector) 3892 kfree_rcu(q_vector, rcu); 3893 } 3894 3895 /** 3896 * igc_free_q_vectors - Free memory allocated for interrupt vectors 3897 * @adapter: board private structure to initialize 3898 * 3899 * This function frees the memory allocated to the q_vectors. In addition if 3900 * NAPI is enabled it will delete any references to the NAPI struct prior 3901 * to freeing the q_vector. 3902 */ 3903 static void igc_free_q_vectors(struct igc_adapter *adapter) 3904 { 3905 int v_idx = adapter->num_q_vectors; 3906 3907 adapter->num_tx_queues = 0; 3908 adapter->num_rx_queues = 0; 3909 adapter->num_q_vectors = 0; 3910 3911 while (v_idx--) { 3912 igc_reset_q_vector(adapter, v_idx); 3913 igc_free_q_vector(adapter, v_idx); 3914 } 3915 } 3916 3917 /** 3918 * igc_update_itr - update the dynamic ITR value based on statistics 3919 * @q_vector: pointer to q_vector 3920 * @ring_container: ring info to update the itr for 3921 * 3922 * Stores a new ITR value based on packets and byte 3923 * counts during the last interrupt. The advantage of per interrupt 3924 * computation is faster updates and more accurate ITR for the current 3925 * traffic pattern. Constants in this function were computed 3926 * based on theoretical maximum wire speed and thresholds were set based 3927 * on testing data as well as attempting to minimize response time 3928 * while increasing bulk throughput. 3929 * NOTE: These calculations are only valid when operating in a single- 3930 * queue environment. 3931 */ 3932 static void igc_update_itr(struct igc_q_vector *q_vector, 3933 struct igc_ring_container *ring_container) 3934 { 3935 unsigned int packets = ring_container->total_packets; 3936 unsigned int bytes = ring_container->total_bytes; 3937 u8 itrval = ring_container->itr; 3938 3939 /* no packets, exit with status unchanged */ 3940 if (packets == 0) 3941 return; 3942 3943 switch (itrval) { 3944 case lowest_latency: 3945 /* handle TSO and jumbo frames */ 3946 if (bytes / packets > 8000) 3947 itrval = bulk_latency; 3948 else if ((packets < 5) && (bytes > 512)) 3949 itrval = low_latency; 3950 break; 3951 case low_latency: /* 50 usec aka 20000 ints/s */ 3952 if (bytes > 10000) { 3953 /* this if handles the TSO accounting */ 3954 if (bytes / packets > 8000) 3955 itrval = bulk_latency; 3956 else if ((packets < 10) || ((bytes / packets) > 1200)) 3957 itrval = bulk_latency; 3958 else if ((packets > 35)) 3959 itrval = lowest_latency; 3960 } else if (bytes / packets > 2000) { 3961 itrval = bulk_latency; 3962 } else if (packets <= 2 && bytes < 512) { 3963 itrval = lowest_latency; 3964 } 3965 break; 3966 case bulk_latency: /* 250 usec aka 4000 ints/s */ 3967 if (bytes > 25000) { 3968 if (packets > 35) 3969 itrval = low_latency; 3970 } else if (bytes < 1500) { 3971 itrval = low_latency; 3972 } 3973 break; 3974 } 3975 3976 /* clear work counters since we have the values we need */ 3977 ring_container->total_bytes = 0; 3978 ring_container->total_packets = 0; 3979 3980 /* write updated itr to ring container */ 3981 ring_container->itr = itrval; 3982 } 3983 3984 static void igc_set_itr(struct igc_q_vector *q_vector) 3985 { 3986 struct igc_adapter *adapter = q_vector->adapter; 3987 u32 new_itr = q_vector->itr_val; 3988 u8 current_itr = 0; 3989 3990 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */ 3991 switch (adapter->link_speed) { 3992 case SPEED_10: 3993 case SPEED_100: 3994 current_itr = 0; 3995 new_itr = IGC_4K_ITR; 3996 goto set_itr_now; 3997 default: 3998 break; 3999 } 4000 4001 igc_update_itr(q_vector, &q_vector->tx); 4002 igc_update_itr(q_vector, &q_vector->rx); 4003 4004 current_itr = max(q_vector->rx.itr, q_vector->tx.itr); 4005 4006 /* conservative mode (itr 3) eliminates the lowest_latency setting */ 4007 if (current_itr == lowest_latency && 4008 ((q_vector->rx.ring && adapter->rx_itr_setting == 3) || 4009 (!q_vector->rx.ring && adapter->tx_itr_setting == 3))) 4010 current_itr = low_latency; 4011 4012 switch (current_itr) { 4013 /* counts and packets in update_itr are dependent on these numbers */ 4014 case lowest_latency: 4015 new_itr = IGC_70K_ITR; /* 70,000 ints/sec */ 4016 break; 4017 case low_latency: 4018 new_itr = IGC_20K_ITR; /* 20,000 ints/sec */ 4019 break; 4020 case bulk_latency: 4021 new_itr = IGC_4K_ITR; /* 4,000 ints/sec */ 4022 break; 4023 default: 4024 break; 4025 } 4026 4027 set_itr_now: 4028 if (new_itr != q_vector->itr_val) { 4029 /* this attempts to bias the interrupt rate towards Bulk 4030 * by adding intermediate steps when interrupt rate is 4031 * increasing 4032 */ 4033 new_itr = new_itr > q_vector->itr_val ? 4034 max((new_itr * q_vector->itr_val) / 4035 (new_itr + (q_vector->itr_val >> 2)), 4036 new_itr) : new_itr; 4037 /* Don't write the value here; it resets the adapter's 4038 * internal timer, and causes us to delay far longer than 4039 * we should between interrupts. Instead, we write the ITR 4040 * value at the beginning of the next interrupt so the timing 4041 * ends up being correct. 4042 */ 4043 q_vector->itr_val = new_itr; 4044 q_vector->set_itr = 1; 4045 } 4046 } 4047 4048 static void igc_reset_interrupt_capability(struct igc_adapter *adapter) 4049 { 4050 int v_idx = adapter->num_q_vectors; 4051 4052 if (adapter->msix_entries) { 4053 pci_disable_msix(adapter->pdev); 4054 kfree(adapter->msix_entries); 4055 adapter->msix_entries = NULL; 4056 } else if (adapter->flags & IGC_FLAG_HAS_MSI) { 4057 pci_disable_msi(adapter->pdev); 4058 } 4059 4060 while (v_idx--) 4061 igc_reset_q_vector(adapter, v_idx); 4062 } 4063 4064 /** 4065 * igc_set_interrupt_capability - set MSI or MSI-X if supported 4066 * @adapter: Pointer to adapter structure 4067 * @msix: boolean value for MSI-X capability 4068 * 4069 * Attempt to configure interrupts using the best available 4070 * capabilities of the hardware and kernel. 4071 */ 4072 static void igc_set_interrupt_capability(struct igc_adapter *adapter, 4073 bool msix) 4074 { 4075 int numvecs, i; 4076 int err; 4077 4078 if (!msix) 4079 goto msi_only; 4080 adapter->flags |= IGC_FLAG_HAS_MSIX; 4081 4082 /* Number of supported queues. */ 4083 adapter->num_rx_queues = adapter->rss_queues; 4084 4085 adapter->num_tx_queues = adapter->rss_queues; 4086 4087 /* start with one vector for every Rx queue */ 4088 numvecs = adapter->num_rx_queues; 4089 4090 /* if Tx handler is separate add 1 for every Tx queue */ 4091 if (!(adapter->flags & IGC_FLAG_QUEUE_PAIRS)) 4092 numvecs += adapter->num_tx_queues; 4093 4094 /* store the number of vectors reserved for queues */ 4095 adapter->num_q_vectors = numvecs; 4096 4097 /* add 1 vector for link status interrupts */ 4098 numvecs++; 4099 4100 adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry), 4101 GFP_KERNEL); 4102 4103 if (!adapter->msix_entries) 4104 return; 4105 4106 /* populate entry values */ 4107 for (i = 0; i < numvecs; i++) 4108 adapter->msix_entries[i].entry = i; 4109 4110 err = pci_enable_msix_range(adapter->pdev, 4111 adapter->msix_entries, 4112 numvecs, 4113 numvecs); 4114 if (err > 0) 4115 return; 4116 4117 kfree(adapter->msix_entries); 4118 adapter->msix_entries = NULL; 4119 4120 igc_reset_interrupt_capability(adapter); 4121 4122 msi_only: 4123 adapter->flags &= ~IGC_FLAG_HAS_MSIX; 4124 4125 adapter->rss_queues = 1; 4126 adapter->flags |= IGC_FLAG_QUEUE_PAIRS; 4127 adapter->num_rx_queues = 1; 4128 adapter->num_tx_queues = 1; 4129 adapter->num_q_vectors = 1; 4130 if (!pci_enable_msi(adapter->pdev)) 4131 adapter->flags |= IGC_FLAG_HAS_MSI; 4132 } 4133 4134 /** 4135 * igc_update_ring_itr - update the dynamic ITR value based on packet size 4136 * @q_vector: pointer to q_vector 4137 * 4138 * Stores a new ITR value based on strictly on packet size. This 4139 * algorithm is less sophisticated than that used in igc_update_itr, 4140 * due to the difficulty of synchronizing statistics across multiple 4141 * receive rings. The divisors and thresholds used by this function 4142 * were determined based on theoretical maximum wire speed and testing 4143 * data, in order to minimize response time while increasing bulk 4144 * throughput. 4145 * NOTE: This function is called only when operating in a multiqueue 4146 * receive environment. 4147 */ 4148 static void igc_update_ring_itr(struct igc_q_vector *q_vector) 4149 { 4150 struct igc_adapter *adapter = q_vector->adapter; 4151 int new_val = q_vector->itr_val; 4152 int avg_wire_size = 0; 4153 unsigned int packets; 4154 4155 /* For non-gigabit speeds, just fix the interrupt rate at 4000 4156 * ints/sec - ITR timer value of 120 ticks. 4157 */ 4158 switch (adapter->link_speed) { 4159 case SPEED_10: 4160 case SPEED_100: 4161 new_val = IGC_4K_ITR; 4162 goto set_itr_val; 4163 default: 4164 break; 4165 } 4166 4167 packets = q_vector->rx.total_packets; 4168 if (packets) 4169 avg_wire_size = q_vector->rx.total_bytes / packets; 4170 4171 packets = q_vector->tx.total_packets; 4172 if (packets) 4173 avg_wire_size = max_t(u32, avg_wire_size, 4174 q_vector->tx.total_bytes / packets); 4175 4176 /* if avg_wire_size isn't set no work was done */ 4177 if (!avg_wire_size) 4178 goto clear_counts; 4179 4180 /* Add 24 bytes to size to account for CRC, preamble, and gap */ 4181 avg_wire_size += 24; 4182 4183 /* Don't starve jumbo frames */ 4184 avg_wire_size = min(avg_wire_size, 3000); 4185 4186 /* Give a little boost to mid-size frames */ 4187 if (avg_wire_size > 300 && avg_wire_size < 1200) 4188 new_val = avg_wire_size / 3; 4189 else 4190 new_val = avg_wire_size / 2; 4191 4192 /* conservative mode (itr 3) eliminates the lowest_latency setting */ 4193 if (new_val < IGC_20K_ITR && 4194 ((q_vector->rx.ring && adapter->rx_itr_setting == 3) || 4195 (!q_vector->rx.ring && adapter->tx_itr_setting == 3))) 4196 new_val = IGC_20K_ITR; 4197 4198 set_itr_val: 4199 if (new_val != q_vector->itr_val) { 4200 q_vector->itr_val = new_val; 4201 q_vector->set_itr = 1; 4202 } 4203 clear_counts: 4204 q_vector->rx.total_bytes = 0; 4205 q_vector->rx.total_packets = 0; 4206 q_vector->tx.total_bytes = 0; 4207 q_vector->tx.total_packets = 0; 4208 } 4209 4210 static void igc_ring_irq_enable(struct igc_q_vector *q_vector) 4211 { 4212 struct igc_adapter *adapter = q_vector->adapter; 4213 struct igc_hw *hw = &adapter->hw; 4214 4215 if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) || 4216 (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) { 4217 if (adapter->num_q_vectors == 1) 4218 igc_set_itr(q_vector); 4219 else 4220 igc_update_ring_itr(q_vector); 4221 } 4222 4223 if (!test_bit(__IGC_DOWN, &adapter->state)) { 4224 if (adapter->msix_entries) 4225 wr32(IGC_EIMS, q_vector->eims_value); 4226 else 4227 igc_irq_enable(adapter); 4228 } 4229 } 4230 4231 static void igc_add_ring(struct igc_ring *ring, 4232 struct igc_ring_container *head) 4233 { 4234 head->ring = ring; 4235 head->count++; 4236 } 4237 4238 /** 4239 * igc_cache_ring_register - Descriptor ring to register mapping 4240 * @adapter: board private structure to initialize 4241 * 4242 * Once we know the feature-set enabled for the device, we'll cache 4243 * the register offset the descriptor ring is assigned to. 4244 */ 4245 static void igc_cache_ring_register(struct igc_adapter *adapter) 4246 { 4247 int i = 0, j = 0; 4248 4249 switch (adapter->hw.mac.type) { 4250 case igc_i225: 4251 default: 4252 for (; i < adapter->num_rx_queues; i++) 4253 adapter->rx_ring[i]->reg_idx = i; 4254 for (; j < adapter->num_tx_queues; j++) 4255 adapter->tx_ring[j]->reg_idx = j; 4256 break; 4257 } 4258 } 4259 4260 /** 4261 * igc_poll - NAPI Rx polling callback 4262 * @napi: napi polling structure 4263 * @budget: count of how many packets we should handle 4264 */ 4265 static int igc_poll(struct napi_struct *napi, int budget) 4266 { 4267 struct igc_q_vector *q_vector = container_of(napi, 4268 struct igc_q_vector, 4269 napi); 4270 struct igc_ring *rx_ring = q_vector->rx.ring; 4271 bool clean_complete = true; 4272 int work_done = 0; 4273 4274 if (q_vector->tx.ring) 4275 clean_complete = igc_clean_tx_irq(q_vector, budget); 4276 4277 if (rx_ring) { 4278 int cleaned = rx_ring->xsk_pool ? 4279 igc_clean_rx_irq_zc(q_vector, budget) : 4280 igc_clean_rx_irq(q_vector, budget); 4281 4282 work_done += cleaned; 4283 if (cleaned >= budget) 4284 clean_complete = false; 4285 } 4286 4287 /* If all work not completed, return budget and keep polling */ 4288 if (!clean_complete) 4289 return budget; 4290 4291 /* Exit the polling mode, but don't re-enable interrupts if stack might 4292 * poll us due to busy-polling 4293 */ 4294 if (likely(napi_complete_done(napi, work_done))) 4295 igc_ring_irq_enable(q_vector); 4296 4297 return min(work_done, budget - 1); 4298 } 4299 4300 /** 4301 * igc_alloc_q_vector - Allocate memory for a single interrupt vector 4302 * @adapter: board private structure to initialize 4303 * @v_count: q_vectors allocated on adapter, used for ring interleaving 4304 * @v_idx: index of vector in adapter struct 4305 * @txr_count: total number of Tx rings to allocate 4306 * @txr_idx: index of first Tx ring to allocate 4307 * @rxr_count: total number of Rx rings to allocate 4308 * @rxr_idx: index of first Rx ring to allocate 4309 * 4310 * We allocate one q_vector. If allocation fails we return -ENOMEM. 4311 */ 4312 static int igc_alloc_q_vector(struct igc_adapter *adapter, 4313 unsigned int v_count, unsigned int v_idx, 4314 unsigned int txr_count, unsigned int txr_idx, 4315 unsigned int rxr_count, unsigned int rxr_idx) 4316 { 4317 struct igc_q_vector *q_vector; 4318 struct igc_ring *ring; 4319 int ring_count; 4320 4321 /* igc only supports 1 Tx and/or 1 Rx queue per vector */ 4322 if (txr_count > 1 || rxr_count > 1) 4323 return -ENOMEM; 4324 4325 ring_count = txr_count + rxr_count; 4326 4327 /* allocate q_vector and rings */ 4328 q_vector = adapter->q_vector[v_idx]; 4329 if (!q_vector) 4330 q_vector = kzalloc(struct_size(q_vector, ring, ring_count), 4331 GFP_KERNEL); 4332 else 4333 memset(q_vector, 0, struct_size(q_vector, ring, ring_count)); 4334 if (!q_vector) 4335 return -ENOMEM; 4336 4337 /* initialize NAPI */ 4338 netif_napi_add(adapter->netdev, &q_vector->napi, 4339 igc_poll, 64); 4340 4341 /* tie q_vector and adapter together */ 4342 adapter->q_vector[v_idx] = q_vector; 4343 q_vector->adapter = adapter; 4344 4345 /* initialize work limits */ 4346 q_vector->tx.work_limit = adapter->tx_work_limit; 4347 4348 /* initialize ITR configuration */ 4349 q_vector->itr_register = adapter->io_addr + IGC_EITR(0); 4350 q_vector->itr_val = IGC_START_ITR; 4351 4352 /* initialize pointer to rings */ 4353 ring = q_vector->ring; 4354 4355 /* initialize ITR */ 4356 if (rxr_count) { 4357 /* rx or rx/tx vector */ 4358 if (!adapter->rx_itr_setting || adapter->rx_itr_setting > 3) 4359 q_vector->itr_val = adapter->rx_itr_setting; 4360 } else { 4361 /* tx only vector */ 4362 if (!adapter->tx_itr_setting || adapter->tx_itr_setting > 3) 4363 q_vector->itr_val = adapter->tx_itr_setting; 4364 } 4365 4366 if (txr_count) { 4367 /* assign generic ring traits */ 4368 ring->dev = &adapter->pdev->dev; 4369 ring->netdev = adapter->netdev; 4370 4371 /* configure backlink on ring */ 4372 ring->q_vector = q_vector; 4373 4374 /* update q_vector Tx values */ 4375 igc_add_ring(ring, &q_vector->tx); 4376 4377 /* apply Tx specific ring traits */ 4378 ring->count = adapter->tx_ring_count; 4379 ring->queue_index = txr_idx; 4380 4381 /* assign ring to adapter */ 4382 adapter->tx_ring[txr_idx] = ring; 4383 4384 /* push pointer to next ring */ 4385 ring++; 4386 } 4387 4388 if (rxr_count) { 4389 /* assign generic ring traits */ 4390 ring->dev = &adapter->pdev->dev; 4391 ring->netdev = adapter->netdev; 4392 4393 /* configure backlink on ring */ 4394 ring->q_vector = q_vector; 4395 4396 /* update q_vector Rx values */ 4397 igc_add_ring(ring, &q_vector->rx); 4398 4399 /* apply Rx specific ring traits */ 4400 ring->count = adapter->rx_ring_count; 4401 ring->queue_index = rxr_idx; 4402 4403 /* assign ring to adapter */ 4404 adapter->rx_ring[rxr_idx] = ring; 4405 } 4406 4407 return 0; 4408 } 4409 4410 /** 4411 * igc_alloc_q_vectors - Allocate memory for interrupt vectors 4412 * @adapter: board private structure to initialize 4413 * 4414 * We allocate one q_vector per queue interrupt. If allocation fails we 4415 * return -ENOMEM. 4416 */ 4417 static int igc_alloc_q_vectors(struct igc_adapter *adapter) 4418 { 4419 int rxr_remaining = adapter->num_rx_queues; 4420 int txr_remaining = adapter->num_tx_queues; 4421 int rxr_idx = 0, txr_idx = 0, v_idx = 0; 4422 int q_vectors = adapter->num_q_vectors; 4423 int err; 4424 4425 if (q_vectors >= (rxr_remaining + txr_remaining)) { 4426 for (; rxr_remaining; v_idx++) { 4427 err = igc_alloc_q_vector(adapter, q_vectors, v_idx, 4428 0, 0, 1, rxr_idx); 4429 4430 if (err) 4431 goto err_out; 4432 4433 /* update counts and index */ 4434 rxr_remaining--; 4435 rxr_idx++; 4436 } 4437 } 4438 4439 for (; v_idx < q_vectors; v_idx++) { 4440 int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx); 4441 int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx); 4442 4443 err = igc_alloc_q_vector(adapter, q_vectors, v_idx, 4444 tqpv, txr_idx, rqpv, rxr_idx); 4445 4446 if (err) 4447 goto err_out; 4448 4449 /* update counts and index */ 4450 rxr_remaining -= rqpv; 4451 txr_remaining -= tqpv; 4452 rxr_idx++; 4453 txr_idx++; 4454 } 4455 4456 return 0; 4457 4458 err_out: 4459 adapter->num_tx_queues = 0; 4460 adapter->num_rx_queues = 0; 4461 adapter->num_q_vectors = 0; 4462 4463 while (v_idx--) 4464 igc_free_q_vector(adapter, v_idx); 4465 4466 return -ENOMEM; 4467 } 4468 4469 /** 4470 * igc_init_interrupt_scheme - initialize interrupts, allocate queues/vectors 4471 * @adapter: Pointer to adapter structure 4472 * @msix: boolean for MSI-X capability 4473 * 4474 * This function initializes the interrupts and allocates all of the queues. 4475 */ 4476 static int igc_init_interrupt_scheme(struct igc_adapter *adapter, bool msix) 4477 { 4478 struct net_device *dev = adapter->netdev; 4479 int err = 0; 4480 4481 igc_set_interrupt_capability(adapter, msix); 4482 4483 err = igc_alloc_q_vectors(adapter); 4484 if (err) { 4485 netdev_err(dev, "Unable to allocate memory for vectors\n"); 4486 goto err_alloc_q_vectors; 4487 } 4488 4489 igc_cache_ring_register(adapter); 4490 4491 return 0; 4492 4493 err_alloc_q_vectors: 4494 igc_reset_interrupt_capability(adapter); 4495 return err; 4496 } 4497 4498 /** 4499 * igc_sw_init - Initialize general software structures (struct igc_adapter) 4500 * @adapter: board private structure to initialize 4501 * 4502 * igc_sw_init initializes the Adapter private data structure. 4503 * Fields are initialized based on PCI device information and 4504 * OS network device settings (MTU size). 4505 */ 4506 static int igc_sw_init(struct igc_adapter *adapter) 4507 { 4508 struct net_device *netdev = adapter->netdev; 4509 struct pci_dev *pdev = adapter->pdev; 4510 struct igc_hw *hw = &adapter->hw; 4511 4512 pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word); 4513 4514 /* set default ring sizes */ 4515 adapter->tx_ring_count = IGC_DEFAULT_TXD; 4516 adapter->rx_ring_count = IGC_DEFAULT_RXD; 4517 4518 /* set default ITR values */ 4519 adapter->rx_itr_setting = IGC_DEFAULT_ITR; 4520 adapter->tx_itr_setting = IGC_DEFAULT_ITR; 4521 4522 /* set default work limits */ 4523 adapter->tx_work_limit = IGC_DEFAULT_TX_WORK; 4524 4525 /* adjust max frame to be at least the size of a standard frame */ 4526 adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN + 4527 VLAN_HLEN; 4528 adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN; 4529 4530 mutex_init(&adapter->nfc_rule_lock); 4531 INIT_LIST_HEAD(&adapter->nfc_rule_list); 4532 adapter->nfc_rule_count = 0; 4533 4534 spin_lock_init(&adapter->stats64_lock); 4535 /* Assume MSI-X interrupts, will be checked during IRQ allocation */ 4536 adapter->flags |= IGC_FLAG_HAS_MSIX; 4537 4538 igc_init_queue_configuration(adapter); 4539 4540 /* This call may decrease the number of queues */ 4541 if (igc_init_interrupt_scheme(adapter, true)) { 4542 netdev_err(netdev, "Unable to allocate memory for queues\n"); 4543 return -ENOMEM; 4544 } 4545 4546 /* Explicitly disable IRQ since the NIC can be in any state. */ 4547 igc_irq_disable(adapter); 4548 4549 set_bit(__IGC_DOWN, &adapter->state); 4550 4551 return 0; 4552 } 4553 4554 /** 4555 * igc_up - Open the interface and prepare it to handle traffic 4556 * @adapter: board private structure 4557 */ 4558 void igc_up(struct igc_adapter *adapter) 4559 { 4560 struct igc_hw *hw = &adapter->hw; 4561 int i = 0; 4562 4563 /* hardware has been reset, we need to reload some things */ 4564 igc_configure(adapter); 4565 4566 clear_bit(__IGC_DOWN, &adapter->state); 4567 4568 for (i = 0; i < adapter->num_q_vectors; i++) 4569 napi_enable(&adapter->q_vector[i]->napi); 4570 4571 if (adapter->msix_entries) 4572 igc_configure_msix(adapter); 4573 else 4574 igc_assign_vector(adapter->q_vector[0], 0); 4575 4576 /* Clear any pending interrupts. */ 4577 rd32(IGC_ICR); 4578 igc_irq_enable(adapter); 4579 4580 netif_tx_start_all_queues(adapter->netdev); 4581 4582 /* start the watchdog. */ 4583 hw->mac.get_link_status = true; 4584 schedule_work(&adapter->watchdog_task); 4585 } 4586 4587 /** 4588 * igc_update_stats - Update the board statistics counters 4589 * @adapter: board private structure 4590 */ 4591 void igc_update_stats(struct igc_adapter *adapter) 4592 { 4593 struct rtnl_link_stats64 *net_stats = &adapter->stats64; 4594 struct pci_dev *pdev = adapter->pdev; 4595 struct igc_hw *hw = &adapter->hw; 4596 u64 _bytes, _packets; 4597 u64 bytes, packets; 4598 unsigned int start; 4599 u32 mpc; 4600 int i; 4601 4602 /* Prevent stats update while adapter is being reset, or if the pci 4603 * connection is down. 4604 */ 4605 if (adapter->link_speed == 0) 4606 return; 4607 if (pci_channel_offline(pdev)) 4608 return; 4609 4610 packets = 0; 4611 bytes = 0; 4612 4613 rcu_read_lock(); 4614 for (i = 0; i < adapter->num_rx_queues; i++) { 4615 struct igc_ring *ring = adapter->rx_ring[i]; 4616 u32 rqdpc = rd32(IGC_RQDPC(i)); 4617 4618 if (hw->mac.type >= igc_i225) 4619 wr32(IGC_RQDPC(i), 0); 4620 4621 if (rqdpc) { 4622 ring->rx_stats.drops += rqdpc; 4623 net_stats->rx_fifo_errors += rqdpc; 4624 } 4625 4626 do { 4627 start = u64_stats_fetch_begin_irq(&ring->rx_syncp); 4628 _bytes = ring->rx_stats.bytes; 4629 _packets = ring->rx_stats.packets; 4630 } while (u64_stats_fetch_retry_irq(&ring->rx_syncp, start)); 4631 bytes += _bytes; 4632 packets += _packets; 4633 } 4634 4635 net_stats->rx_bytes = bytes; 4636 net_stats->rx_packets = packets; 4637 4638 packets = 0; 4639 bytes = 0; 4640 for (i = 0; i < adapter->num_tx_queues; i++) { 4641 struct igc_ring *ring = adapter->tx_ring[i]; 4642 4643 do { 4644 start = u64_stats_fetch_begin_irq(&ring->tx_syncp); 4645 _bytes = ring->tx_stats.bytes; 4646 _packets = ring->tx_stats.packets; 4647 } while (u64_stats_fetch_retry_irq(&ring->tx_syncp, start)); 4648 bytes += _bytes; 4649 packets += _packets; 4650 } 4651 net_stats->tx_bytes = bytes; 4652 net_stats->tx_packets = packets; 4653 rcu_read_unlock(); 4654 4655 /* read stats registers */ 4656 adapter->stats.crcerrs += rd32(IGC_CRCERRS); 4657 adapter->stats.gprc += rd32(IGC_GPRC); 4658 adapter->stats.gorc += rd32(IGC_GORCL); 4659 rd32(IGC_GORCH); /* clear GORCL */ 4660 adapter->stats.bprc += rd32(IGC_BPRC); 4661 adapter->stats.mprc += rd32(IGC_MPRC); 4662 adapter->stats.roc += rd32(IGC_ROC); 4663 4664 adapter->stats.prc64 += rd32(IGC_PRC64); 4665 adapter->stats.prc127 += rd32(IGC_PRC127); 4666 adapter->stats.prc255 += rd32(IGC_PRC255); 4667 adapter->stats.prc511 += rd32(IGC_PRC511); 4668 adapter->stats.prc1023 += rd32(IGC_PRC1023); 4669 adapter->stats.prc1522 += rd32(IGC_PRC1522); 4670 adapter->stats.tlpic += rd32(IGC_TLPIC); 4671 adapter->stats.rlpic += rd32(IGC_RLPIC); 4672 adapter->stats.hgptc += rd32(IGC_HGPTC); 4673 4674 mpc = rd32(IGC_MPC); 4675 adapter->stats.mpc += mpc; 4676 net_stats->rx_fifo_errors += mpc; 4677 adapter->stats.scc += rd32(IGC_SCC); 4678 adapter->stats.ecol += rd32(IGC_ECOL); 4679 adapter->stats.mcc += rd32(IGC_MCC); 4680 adapter->stats.latecol += rd32(IGC_LATECOL); 4681 adapter->stats.dc += rd32(IGC_DC); 4682 adapter->stats.rlec += rd32(IGC_RLEC); 4683 adapter->stats.xonrxc += rd32(IGC_XONRXC); 4684 adapter->stats.xontxc += rd32(IGC_XONTXC); 4685 adapter->stats.xoffrxc += rd32(IGC_XOFFRXC); 4686 adapter->stats.xofftxc += rd32(IGC_XOFFTXC); 4687 adapter->stats.fcruc += rd32(IGC_FCRUC); 4688 adapter->stats.gptc += rd32(IGC_GPTC); 4689 adapter->stats.gotc += rd32(IGC_GOTCL); 4690 rd32(IGC_GOTCH); /* clear GOTCL */ 4691 adapter->stats.rnbc += rd32(IGC_RNBC); 4692 adapter->stats.ruc += rd32(IGC_RUC); 4693 adapter->stats.rfc += rd32(IGC_RFC); 4694 adapter->stats.rjc += rd32(IGC_RJC); 4695 adapter->stats.tor += rd32(IGC_TORH); 4696 adapter->stats.tot += rd32(IGC_TOTH); 4697 adapter->stats.tpr += rd32(IGC_TPR); 4698 4699 adapter->stats.ptc64 += rd32(IGC_PTC64); 4700 adapter->stats.ptc127 += rd32(IGC_PTC127); 4701 adapter->stats.ptc255 += rd32(IGC_PTC255); 4702 adapter->stats.ptc511 += rd32(IGC_PTC511); 4703 adapter->stats.ptc1023 += rd32(IGC_PTC1023); 4704 adapter->stats.ptc1522 += rd32(IGC_PTC1522); 4705 4706 adapter->stats.mptc += rd32(IGC_MPTC); 4707 adapter->stats.bptc += rd32(IGC_BPTC); 4708 4709 adapter->stats.tpt += rd32(IGC_TPT); 4710 adapter->stats.colc += rd32(IGC_COLC); 4711 adapter->stats.colc += rd32(IGC_RERC); 4712 4713 adapter->stats.algnerrc += rd32(IGC_ALGNERRC); 4714 4715 adapter->stats.tsctc += rd32(IGC_TSCTC); 4716 4717 adapter->stats.iac += rd32(IGC_IAC); 4718 4719 /* Fill out the OS statistics structure */ 4720 net_stats->multicast = adapter->stats.mprc; 4721 net_stats->collisions = adapter->stats.colc; 4722 4723 /* Rx Errors */ 4724 4725 /* RLEC on some newer hardware can be incorrect so build 4726 * our own version based on RUC and ROC 4727 */ 4728 net_stats->rx_errors = adapter->stats.rxerrc + 4729 adapter->stats.crcerrs + adapter->stats.algnerrc + 4730 adapter->stats.ruc + adapter->stats.roc + 4731 adapter->stats.cexterr; 4732 net_stats->rx_length_errors = adapter->stats.ruc + 4733 adapter->stats.roc; 4734 net_stats->rx_crc_errors = adapter->stats.crcerrs; 4735 net_stats->rx_frame_errors = adapter->stats.algnerrc; 4736 net_stats->rx_missed_errors = adapter->stats.mpc; 4737 4738 /* Tx Errors */ 4739 net_stats->tx_errors = adapter->stats.ecol + 4740 adapter->stats.latecol; 4741 net_stats->tx_aborted_errors = adapter->stats.ecol; 4742 net_stats->tx_window_errors = adapter->stats.latecol; 4743 net_stats->tx_carrier_errors = adapter->stats.tncrs; 4744 4745 /* Tx Dropped needs to be maintained elsewhere */ 4746 4747 /* Management Stats */ 4748 adapter->stats.mgptc += rd32(IGC_MGTPTC); 4749 adapter->stats.mgprc += rd32(IGC_MGTPRC); 4750 adapter->stats.mgpdc += rd32(IGC_MGTPDC); 4751 } 4752 4753 /** 4754 * igc_down - Close the interface 4755 * @adapter: board private structure 4756 */ 4757 void igc_down(struct igc_adapter *adapter) 4758 { 4759 struct net_device *netdev = adapter->netdev; 4760 struct igc_hw *hw = &adapter->hw; 4761 u32 tctl, rctl; 4762 int i = 0; 4763 4764 set_bit(__IGC_DOWN, &adapter->state); 4765 4766 igc_ptp_suspend(adapter); 4767 4768 /* disable receives in the hardware */ 4769 rctl = rd32(IGC_RCTL); 4770 wr32(IGC_RCTL, rctl & ~IGC_RCTL_EN); 4771 /* flush and sleep below */ 4772 4773 /* set trans_start so we don't get spurious watchdogs during reset */ 4774 netif_trans_update(netdev); 4775 4776 netif_carrier_off(netdev); 4777 netif_tx_stop_all_queues(netdev); 4778 4779 /* disable transmits in the hardware */ 4780 tctl = rd32(IGC_TCTL); 4781 tctl &= ~IGC_TCTL_EN; 4782 wr32(IGC_TCTL, tctl); 4783 /* flush both disables and wait for them to finish */ 4784 wrfl(); 4785 usleep_range(10000, 20000); 4786 4787 igc_irq_disable(adapter); 4788 4789 adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE; 4790 4791 for (i = 0; i < adapter->num_q_vectors; i++) { 4792 if (adapter->q_vector[i]) { 4793 napi_synchronize(&adapter->q_vector[i]->napi); 4794 napi_disable(&adapter->q_vector[i]->napi); 4795 } 4796 } 4797 4798 del_timer_sync(&adapter->watchdog_timer); 4799 del_timer_sync(&adapter->phy_info_timer); 4800 4801 /* record the stats before reset*/ 4802 spin_lock(&adapter->stats64_lock); 4803 igc_update_stats(adapter); 4804 spin_unlock(&adapter->stats64_lock); 4805 4806 adapter->link_speed = 0; 4807 adapter->link_duplex = 0; 4808 4809 if (!pci_channel_offline(adapter->pdev)) 4810 igc_reset(adapter); 4811 4812 /* clear VLAN promisc flag so VFTA will be updated if necessary */ 4813 adapter->flags &= ~IGC_FLAG_VLAN_PROMISC; 4814 4815 igc_clean_all_tx_rings(adapter); 4816 igc_clean_all_rx_rings(adapter); 4817 } 4818 4819 void igc_reinit_locked(struct igc_adapter *adapter) 4820 { 4821 while (test_and_set_bit(__IGC_RESETTING, &adapter->state)) 4822 usleep_range(1000, 2000); 4823 igc_down(adapter); 4824 igc_up(adapter); 4825 clear_bit(__IGC_RESETTING, &adapter->state); 4826 } 4827 4828 static void igc_reset_task(struct work_struct *work) 4829 { 4830 struct igc_adapter *adapter; 4831 4832 adapter = container_of(work, struct igc_adapter, reset_task); 4833 4834 rtnl_lock(); 4835 /* If we're already down or resetting, just bail */ 4836 if (test_bit(__IGC_DOWN, &adapter->state) || 4837 test_bit(__IGC_RESETTING, &adapter->state)) { 4838 rtnl_unlock(); 4839 return; 4840 } 4841 4842 igc_rings_dump(adapter); 4843 igc_regs_dump(adapter); 4844 netdev_err(adapter->netdev, "Reset adapter\n"); 4845 igc_reinit_locked(adapter); 4846 rtnl_unlock(); 4847 } 4848 4849 /** 4850 * igc_change_mtu - Change the Maximum Transfer Unit 4851 * @netdev: network interface device structure 4852 * @new_mtu: new value for maximum frame size 4853 * 4854 * Returns 0 on success, negative on failure 4855 */ 4856 static int igc_change_mtu(struct net_device *netdev, int new_mtu) 4857 { 4858 int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; 4859 struct igc_adapter *adapter = netdev_priv(netdev); 4860 4861 if (igc_xdp_is_enabled(adapter) && new_mtu > ETH_DATA_LEN) { 4862 netdev_dbg(netdev, "Jumbo frames not supported with XDP"); 4863 return -EINVAL; 4864 } 4865 4866 /* adjust max frame to be at least the size of a standard frame */ 4867 if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN)) 4868 max_frame = ETH_FRAME_LEN + ETH_FCS_LEN; 4869 4870 while (test_and_set_bit(__IGC_RESETTING, &adapter->state)) 4871 usleep_range(1000, 2000); 4872 4873 /* igc_down has a dependency on max_frame_size */ 4874 adapter->max_frame_size = max_frame; 4875 4876 if (netif_running(netdev)) 4877 igc_down(adapter); 4878 4879 netdev_dbg(netdev, "changing MTU from %d to %d\n", netdev->mtu, new_mtu); 4880 netdev->mtu = new_mtu; 4881 4882 if (netif_running(netdev)) 4883 igc_up(adapter); 4884 else 4885 igc_reset(adapter); 4886 4887 clear_bit(__IGC_RESETTING, &adapter->state); 4888 4889 return 0; 4890 } 4891 4892 /** 4893 * igc_get_stats64 - Get System Network Statistics 4894 * @netdev: network interface device structure 4895 * @stats: rtnl_link_stats64 pointer 4896 * 4897 * Returns the address of the device statistics structure. 4898 * The statistics are updated here and also from the timer callback. 4899 */ 4900 static void igc_get_stats64(struct net_device *netdev, 4901 struct rtnl_link_stats64 *stats) 4902 { 4903 struct igc_adapter *adapter = netdev_priv(netdev); 4904 4905 spin_lock(&adapter->stats64_lock); 4906 if (!test_bit(__IGC_RESETTING, &adapter->state)) 4907 igc_update_stats(adapter); 4908 memcpy(stats, &adapter->stats64, sizeof(*stats)); 4909 spin_unlock(&adapter->stats64_lock); 4910 } 4911 4912 static netdev_features_t igc_fix_features(struct net_device *netdev, 4913 netdev_features_t features) 4914 { 4915 /* Since there is no support for separate Rx/Tx vlan accel 4916 * enable/disable make sure Tx flag is always in same state as Rx. 4917 */ 4918 if (features & NETIF_F_HW_VLAN_CTAG_RX) 4919 features |= NETIF_F_HW_VLAN_CTAG_TX; 4920 else 4921 features &= ~NETIF_F_HW_VLAN_CTAG_TX; 4922 4923 return features; 4924 } 4925 4926 static int igc_set_features(struct net_device *netdev, 4927 netdev_features_t features) 4928 { 4929 netdev_features_t changed = netdev->features ^ features; 4930 struct igc_adapter *adapter = netdev_priv(netdev); 4931 4932 if (changed & NETIF_F_HW_VLAN_CTAG_RX) 4933 igc_vlan_mode(netdev, features); 4934 4935 /* Add VLAN support */ 4936 if (!(changed & (NETIF_F_RXALL | NETIF_F_NTUPLE))) 4937 return 0; 4938 4939 if (!(features & NETIF_F_NTUPLE)) 4940 igc_flush_nfc_rules(adapter); 4941 4942 netdev->features = features; 4943 4944 if (netif_running(netdev)) 4945 igc_reinit_locked(adapter); 4946 else 4947 igc_reset(adapter); 4948 4949 return 1; 4950 } 4951 4952 static netdev_features_t 4953 igc_features_check(struct sk_buff *skb, struct net_device *dev, 4954 netdev_features_t features) 4955 { 4956 unsigned int network_hdr_len, mac_hdr_len; 4957 4958 /* Make certain the headers can be described by a context descriptor */ 4959 mac_hdr_len = skb_network_header(skb) - skb->data; 4960 if (unlikely(mac_hdr_len > IGC_MAX_MAC_HDR_LEN)) 4961 return features & ~(NETIF_F_HW_CSUM | 4962 NETIF_F_SCTP_CRC | 4963 NETIF_F_HW_VLAN_CTAG_TX | 4964 NETIF_F_TSO | 4965 NETIF_F_TSO6); 4966 4967 network_hdr_len = skb_checksum_start(skb) - skb_network_header(skb); 4968 if (unlikely(network_hdr_len > IGC_MAX_NETWORK_HDR_LEN)) 4969 return features & ~(NETIF_F_HW_CSUM | 4970 NETIF_F_SCTP_CRC | 4971 NETIF_F_TSO | 4972 NETIF_F_TSO6); 4973 4974 /* We can only support IPv4 TSO in tunnels if we can mangle the 4975 * inner IP ID field, so strip TSO if MANGLEID is not supported. 4976 */ 4977 if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID)) 4978 features &= ~NETIF_F_TSO; 4979 4980 return features; 4981 } 4982 4983 static void igc_tsync_interrupt(struct igc_adapter *adapter) 4984 { 4985 u32 ack, tsauxc, sec, nsec, tsicr; 4986 struct igc_hw *hw = &adapter->hw; 4987 struct ptp_clock_event event; 4988 struct timespec64 ts; 4989 4990 tsicr = rd32(IGC_TSICR); 4991 ack = 0; 4992 4993 if (tsicr & IGC_TSICR_SYS_WRAP) { 4994 event.type = PTP_CLOCK_PPS; 4995 if (adapter->ptp_caps.pps) 4996 ptp_clock_event(adapter->ptp_clock, &event); 4997 ack |= IGC_TSICR_SYS_WRAP; 4998 } 4999 5000 if (tsicr & IGC_TSICR_TXTS) { 5001 /* retrieve hardware timestamp */ 5002 schedule_work(&adapter->ptp_tx_work); 5003 ack |= IGC_TSICR_TXTS; 5004 } 5005 5006 if (tsicr & IGC_TSICR_TT0) { 5007 spin_lock(&adapter->tmreg_lock); 5008 ts = timespec64_add(adapter->perout[0].start, 5009 adapter->perout[0].period); 5010 wr32(IGC_TRGTTIML0, ts.tv_nsec | IGC_TT_IO_TIMER_SEL_SYSTIM0); 5011 wr32(IGC_TRGTTIMH0, (u32)ts.tv_sec); 5012 tsauxc = rd32(IGC_TSAUXC); 5013 tsauxc |= IGC_TSAUXC_EN_TT0; 5014 wr32(IGC_TSAUXC, tsauxc); 5015 adapter->perout[0].start = ts; 5016 spin_unlock(&adapter->tmreg_lock); 5017 ack |= IGC_TSICR_TT0; 5018 } 5019 5020 if (tsicr & IGC_TSICR_TT1) { 5021 spin_lock(&adapter->tmreg_lock); 5022 ts = timespec64_add(adapter->perout[1].start, 5023 adapter->perout[1].period); 5024 wr32(IGC_TRGTTIML1, ts.tv_nsec | IGC_TT_IO_TIMER_SEL_SYSTIM0); 5025 wr32(IGC_TRGTTIMH1, (u32)ts.tv_sec); 5026 tsauxc = rd32(IGC_TSAUXC); 5027 tsauxc |= IGC_TSAUXC_EN_TT1; 5028 wr32(IGC_TSAUXC, tsauxc); 5029 adapter->perout[1].start = ts; 5030 spin_unlock(&adapter->tmreg_lock); 5031 ack |= IGC_TSICR_TT1; 5032 } 5033 5034 if (tsicr & IGC_TSICR_AUTT0) { 5035 nsec = rd32(IGC_AUXSTMPL0); 5036 sec = rd32(IGC_AUXSTMPH0); 5037 event.type = PTP_CLOCK_EXTTS; 5038 event.index = 0; 5039 event.timestamp = sec * NSEC_PER_SEC + nsec; 5040 ptp_clock_event(adapter->ptp_clock, &event); 5041 ack |= IGC_TSICR_AUTT0; 5042 } 5043 5044 if (tsicr & IGC_TSICR_AUTT1) { 5045 nsec = rd32(IGC_AUXSTMPL1); 5046 sec = rd32(IGC_AUXSTMPH1); 5047 event.type = PTP_CLOCK_EXTTS; 5048 event.index = 1; 5049 event.timestamp = sec * NSEC_PER_SEC + nsec; 5050 ptp_clock_event(adapter->ptp_clock, &event); 5051 ack |= IGC_TSICR_AUTT1; 5052 } 5053 5054 /* acknowledge the interrupts */ 5055 wr32(IGC_TSICR, ack); 5056 } 5057 5058 /** 5059 * igc_msix_other - msix other interrupt handler 5060 * @irq: interrupt number 5061 * @data: pointer to a q_vector 5062 */ 5063 static irqreturn_t igc_msix_other(int irq, void *data) 5064 { 5065 struct igc_adapter *adapter = data; 5066 struct igc_hw *hw = &adapter->hw; 5067 u32 icr = rd32(IGC_ICR); 5068 5069 /* reading ICR causes bit 31 of EICR to be cleared */ 5070 if (icr & IGC_ICR_DRSTA) 5071 schedule_work(&adapter->reset_task); 5072 5073 if (icr & IGC_ICR_DOUTSYNC) { 5074 /* HW is reporting DMA is out of sync */ 5075 adapter->stats.doosync++; 5076 } 5077 5078 if (icr & IGC_ICR_LSC) { 5079 hw->mac.get_link_status = true; 5080 /* guard against interrupt when we're going down */ 5081 if (!test_bit(__IGC_DOWN, &adapter->state)) 5082 mod_timer(&adapter->watchdog_timer, jiffies + 1); 5083 } 5084 5085 if (icr & IGC_ICR_TS) 5086 igc_tsync_interrupt(adapter); 5087 5088 wr32(IGC_EIMS, adapter->eims_other); 5089 5090 return IRQ_HANDLED; 5091 } 5092 5093 static void igc_write_itr(struct igc_q_vector *q_vector) 5094 { 5095 u32 itr_val = q_vector->itr_val & IGC_QVECTOR_MASK; 5096 5097 if (!q_vector->set_itr) 5098 return; 5099 5100 if (!itr_val) 5101 itr_val = IGC_ITR_VAL_MASK; 5102 5103 itr_val |= IGC_EITR_CNT_IGNR; 5104 5105 writel(itr_val, q_vector->itr_register); 5106 q_vector->set_itr = 0; 5107 } 5108 5109 static irqreturn_t igc_msix_ring(int irq, void *data) 5110 { 5111 struct igc_q_vector *q_vector = data; 5112 5113 /* Write the ITR value calculated from the previous interrupt. */ 5114 igc_write_itr(q_vector); 5115 5116 napi_schedule(&q_vector->napi); 5117 5118 return IRQ_HANDLED; 5119 } 5120 5121 /** 5122 * igc_request_msix - Initialize MSI-X interrupts 5123 * @adapter: Pointer to adapter structure 5124 * 5125 * igc_request_msix allocates MSI-X vectors and requests interrupts from the 5126 * kernel. 5127 */ 5128 static int igc_request_msix(struct igc_adapter *adapter) 5129 { 5130 unsigned int num_q_vectors = adapter->num_q_vectors; 5131 int i = 0, err = 0, vector = 0, free_vector = 0; 5132 struct net_device *netdev = adapter->netdev; 5133 5134 err = request_irq(adapter->msix_entries[vector].vector, 5135 &igc_msix_other, 0, netdev->name, adapter); 5136 if (err) 5137 goto err_out; 5138 5139 if (num_q_vectors > MAX_Q_VECTORS) { 5140 num_q_vectors = MAX_Q_VECTORS; 5141 dev_warn(&adapter->pdev->dev, 5142 "The number of queue vectors (%d) is higher than max allowed (%d)\n", 5143 adapter->num_q_vectors, MAX_Q_VECTORS); 5144 } 5145 for (i = 0; i < num_q_vectors; i++) { 5146 struct igc_q_vector *q_vector = adapter->q_vector[i]; 5147 5148 vector++; 5149 5150 q_vector->itr_register = adapter->io_addr + IGC_EITR(vector); 5151 5152 if (q_vector->rx.ring && q_vector->tx.ring) 5153 sprintf(q_vector->name, "%s-TxRx-%u", netdev->name, 5154 q_vector->rx.ring->queue_index); 5155 else if (q_vector->tx.ring) 5156 sprintf(q_vector->name, "%s-tx-%u", netdev->name, 5157 q_vector->tx.ring->queue_index); 5158 else if (q_vector->rx.ring) 5159 sprintf(q_vector->name, "%s-rx-%u", netdev->name, 5160 q_vector->rx.ring->queue_index); 5161 else 5162 sprintf(q_vector->name, "%s-unused", netdev->name); 5163 5164 err = request_irq(adapter->msix_entries[vector].vector, 5165 igc_msix_ring, 0, q_vector->name, 5166 q_vector); 5167 if (err) 5168 goto err_free; 5169 } 5170 5171 igc_configure_msix(adapter); 5172 return 0; 5173 5174 err_free: 5175 /* free already assigned IRQs */ 5176 free_irq(adapter->msix_entries[free_vector++].vector, adapter); 5177 5178 vector--; 5179 for (i = 0; i < vector; i++) { 5180 free_irq(adapter->msix_entries[free_vector++].vector, 5181 adapter->q_vector[i]); 5182 } 5183 err_out: 5184 return err; 5185 } 5186 5187 /** 5188 * igc_clear_interrupt_scheme - reset the device to a state of no interrupts 5189 * @adapter: Pointer to adapter structure 5190 * 5191 * This function resets the device so that it has 0 rx queues, tx queues, and 5192 * MSI-X interrupts allocated. 5193 */ 5194 static void igc_clear_interrupt_scheme(struct igc_adapter *adapter) 5195 { 5196 igc_free_q_vectors(adapter); 5197 igc_reset_interrupt_capability(adapter); 5198 } 5199 5200 /* Need to wait a few seconds after link up to get diagnostic information from 5201 * the phy 5202 */ 5203 static void igc_update_phy_info(struct timer_list *t) 5204 { 5205 struct igc_adapter *adapter = from_timer(adapter, t, phy_info_timer); 5206 5207 igc_get_phy_info(&adapter->hw); 5208 } 5209 5210 /** 5211 * igc_has_link - check shared code for link and determine up/down 5212 * @adapter: pointer to driver private info 5213 */ 5214 bool igc_has_link(struct igc_adapter *adapter) 5215 { 5216 struct igc_hw *hw = &adapter->hw; 5217 bool link_active = false; 5218 5219 /* get_link_status is set on LSC (link status) interrupt or 5220 * rx sequence error interrupt. get_link_status will stay 5221 * false until the igc_check_for_link establishes link 5222 * for copper adapters ONLY 5223 */ 5224 if (!hw->mac.get_link_status) 5225 return true; 5226 hw->mac.ops.check_for_link(hw); 5227 link_active = !hw->mac.get_link_status; 5228 5229 if (hw->mac.type == igc_i225) { 5230 if (!netif_carrier_ok(adapter->netdev)) { 5231 adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE; 5232 } else if (!(adapter->flags & IGC_FLAG_NEED_LINK_UPDATE)) { 5233 adapter->flags |= IGC_FLAG_NEED_LINK_UPDATE; 5234 adapter->link_check_timeout = jiffies; 5235 } 5236 } 5237 5238 return link_active; 5239 } 5240 5241 /** 5242 * igc_watchdog - Timer Call-back 5243 * @t: timer for the watchdog 5244 */ 5245 static void igc_watchdog(struct timer_list *t) 5246 { 5247 struct igc_adapter *adapter = from_timer(adapter, t, watchdog_timer); 5248 /* Do the rest outside of interrupt context */ 5249 schedule_work(&adapter->watchdog_task); 5250 } 5251 5252 static void igc_watchdog_task(struct work_struct *work) 5253 { 5254 struct igc_adapter *adapter = container_of(work, 5255 struct igc_adapter, 5256 watchdog_task); 5257 struct net_device *netdev = adapter->netdev; 5258 struct igc_hw *hw = &adapter->hw; 5259 struct igc_phy_info *phy = &hw->phy; 5260 u16 phy_data, retry_count = 20; 5261 u32 link; 5262 int i; 5263 5264 link = igc_has_link(adapter); 5265 5266 if (adapter->flags & IGC_FLAG_NEED_LINK_UPDATE) { 5267 if (time_after(jiffies, (adapter->link_check_timeout + HZ))) 5268 adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE; 5269 else 5270 link = false; 5271 } 5272 5273 if (link) { 5274 /* Cancel scheduled suspend requests. */ 5275 pm_runtime_resume(netdev->dev.parent); 5276 5277 if (!netif_carrier_ok(netdev)) { 5278 u32 ctrl; 5279 5280 hw->mac.ops.get_speed_and_duplex(hw, 5281 &adapter->link_speed, 5282 &adapter->link_duplex); 5283 5284 ctrl = rd32(IGC_CTRL); 5285 /* Link status message must follow this format */ 5286 netdev_info(netdev, 5287 "NIC Link is Up %d Mbps %s Duplex, Flow Control: %s\n", 5288 adapter->link_speed, 5289 adapter->link_duplex == FULL_DUPLEX ? 5290 "Full" : "Half", 5291 (ctrl & IGC_CTRL_TFCE) && 5292 (ctrl & IGC_CTRL_RFCE) ? "RX/TX" : 5293 (ctrl & IGC_CTRL_RFCE) ? "RX" : 5294 (ctrl & IGC_CTRL_TFCE) ? "TX" : "None"); 5295 5296 /* disable EEE if enabled */ 5297 if ((adapter->flags & IGC_FLAG_EEE) && 5298 adapter->link_duplex == HALF_DUPLEX) { 5299 netdev_info(netdev, 5300 "EEE Disabled: unsupported at half duplex. Re-enable using ethtool when at full duplex\n"); 5301 adapter->hw.dev_spec._base.eee_enable = false; 5302 adapter->flags &= ~IGC_FLAG_EEE; 5303 } 5304 5305 /* check if SmartSpeed worked */ 5306 igc_check_downshift(hw); 5307 if (phy->speed_downgraded) 5308 netdev_warn(netdev, "Link Speed was downgraded by SmartSpeed\n"); 5309 5310 /* adjust timeout factor according to speed/duplex */ 5311 adapter->tx_timeout_factor = 1; 5312 switch (adapter->link_speed) { 5313 case SPEED_10: 5314 adapter->tx_timeout_factor = 14; 5315 break; 5316 case SPEED_100: 5317 case SPEED_1000: 5318 case SPEED_2500: 5319 adapter->tx_timeout_factor = 7; 5320 break; 5321 } 5322 5323 if (adapter->link_speed != SPEED_1000) 5324 goto no_wait; 5325 5326 /* wait for Remote receiver status OK */ 5327 retry_read_status: 5328 if (!igc_read_phy_reg(hw, PHY_1000T_STATUS, 5329 &phy_data)) { 5330 if (!(phy_data & SR_1000T_REMOTE_RX_STATUS) && 5331 retry_count) { 5332 msleep(100); 5333 retry_count--; 5334 goto retry_read_status; 5335 } else if (!retry_count) { 5336 netdev_err(netdev, "exceed max 2 second\n"); 5337 } 5338 } else { 5339 netdev_err(netdev, "read 1000Base-T Status Reg\n"); 5340 } 5341 no_wait: 5342 netif_carrier_on(netdev); 5343 5344 /* link state has changed, schedule phy info update */ 5345 if (!test_bit(__IGC_DOWN, &adapter->state)) 5346 mod_timer(&adapter->phy_info_timer, 5347 round_jiffies(jiffies + 2 * HZ)); 5348 } 5349 } else { 5350 if (netif_carrier_ok(netdev)) { 5351 adapter->link_speed = 0; 5352 adapter->link_duplex = 0; 5353 5354 /* Links status message must follow this format */ 5355 netdev_info(netdev, "NIC Link is Down\n"); 5356 netif_carrier_off(netdev); 5357 5358 /* link state has changed, schedule phy info update */ 5359 if (!test_bit(__IGC_DOWN, &adapter->state)) 5360 mod_timer(&adapter->phy_info_timer, 5361 round_jiffies(jiffies + 2 * HZ)); 5362 5363 /* link is down, time to check for alternate media */ 5364 if (adapter->flags & IGC_FLAG_MAS_ENABLE) { 5365 if (adapter->flags & IGC_FLAG_MEDIA_RESET) { 5366 schedule_work(&adapter->reset_task); 5367 /* return immediately */ 5368 return; 5369 } 5370 } 5371 pm_schedule_suspend(netdev->dev.parent, 5372 MSEC_PER_SEC * 5); 5373 5374 /* also check for alternate media here */ 5375 } else if (!netif_carrier_ok(netdev) && 5376 (adapter->flags & IGC_FLAG_MAS_ENABLE)) { 5377 if (adapter->flags & IGC_FLAG_MEDIA_RESET) { 5378 schedule_work(&adapter->reset_task); 5379 /* return immediately */ 5380 return; 5381 } 5382 } 5383 } 5384 5385 spin_lock(&adapter->stats64_lock); 5386 igc_update_stats(adapter); 5387 spin_unlock(&adapter->stats64_lock); 5388 5389 for (i = 0; i < adapter->num_tx_queues; i++) { 5390 struct igc_ring *tx_ring = adapter->tx_ring[i]; 5391 5392 if (!netif_carrier_ok(netdev)) { 5393 /* We've lost link, so the controller stops DMA, 5394 * but we've got queued Tx work that's never going 5395 * to get done, so reset controller to flush Tx. 5396 * (Do the reset outside of interrupt context). 5397 */ 5398 if (igc_desc_unused(tx_ring) + 1 < tx_ring->count) { 5399 adapter->tx_timeout_count++; 5400 schedule_work(&adapter->reset_task); 5401 /* return immediately since reset is imminent */ 5402 return; 5403 } 5404 } 5405 5406 /* Force detection of hung controller every watchdog period */ 5407 set_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags); 5408 } 5409 5410 /* Cause software interrupt to ensure Rx ring is cleaned */ 5411 if (adapter->flags & IGC_FLAG_HAS_MSIX) { 5412 u32 eics = 0; 5413 5414 for (i = 0; i < adapter->num_q_vectors; i++) 5415 eics |= adapter->q_vector[i]->eims_value; 5416 wr32(IGC_EICS, eics); 5417 } else { 5418 wr32(IGC_ICS, IGC_ICS_RXDMT0); 5419 } 5420 5421 igc_ptp_tx_hang(adapter); 5422 5423 /* Reset the timer */ 5424 if (!test_bit(__IGC_DOWN, &adapter->state)) { 5425 if (adapter->flags & IGC_FLAG_NEED_LINK_UPDATE) 5426 mod_timer(&adapter->watchdog_timer, 5427 round_jiffies(jiffies + HZ)); 5428 else 5429 mod_timer(&adapter->watchdog_timer, 5430 round_jiffies(jiffies + 2 * HZ)); 5431 } 5432 } 5433 5434 /** 5435 * igc_intr_msi - Interrupt Handler 5436 * @irq: interrupt number 5437 * @data: pointer to a network interface device structure 5438 */ 5439 static irqreturn_t igc_intr_msi(int irq, void *data) 5440 { 5441 struct igc_adapter *adapter = data; 5442 struct igc_q_vector *q_vector = adapter->q_vector[0]; 5443 struct igc_hw *hw = &adapter->hw; 5444 /* read ICR disables interrupts using IAM */ 5445 u32 icr = rd32(IGC_ICR); 5446 5447 igc_write_itr(q_vector); 5448 5449 if (icr & IGC_ICR_DRSTA) 5450 schedule_work(&adapter->reset_task); 5451 5452 if (icr & IGC_ICR_DOUTSYNC) { 5453 /* HW is reporting DMA is out of sync */ 5454 adapter->stats.doosync++; 5455 } 5456 5457 if (icr & (IGC_ICR_RXSEQ | IGC_ICR_LSC)) { 5458 hw->mac.get_link_status = true; 5459 if (!test_bit(__IGC_DOWN, &adapter->state)) 5460 mod_timer(&adapter->watchdog_timer, jiffies + 1); 5461 } 5462 5463 napi_schedule(&q_vector->napi); 5464 5465 return IRQ_HANDLED; 5466 } 5467 5468 /** 5469 * igc_intr - Legacy Interrupt Handler 5470 * @irq: interrupt number 5471 * @data: pointer to a network interface device structure 5472 */ 5473 static irqreturn_t igc_intr(int irq, void *data) 5474 { 5475 struct igc_adapter *adapter = data; 5476 struct igc_q_vector *q_vector = adapter->q_vector[0]; 5477 struct igc_hw *hw = &adapter->hw; 5478 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No 5479 * need for the IMC write 5480 */ 5481 u32 icr = rd32(IGC_ICR); 5482 5483 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is 5484 * not set, then the adapter didn't send an interrupt 5485 */ 5486 if (!(icr & IGC_ICR_INT_ASSERTED)) 5487 return IRQ_NONE; 5488 5489 igc_write_itr(q_vector); 5490 5491 if (icr & IGC_ICR_DRSTA) 5492 schedule_work(&adapter->reset_task); 5493 5494 if (icr & IGC_ICR_DOUTSYNC) { 5495 /* HW is reporting DMA is out of sync */ 5496 adapter->stats.doosync++; 5497 } 5498 5499 if (icr & (IGC_ICR_RXSEQ | IGC_ICR_LSC)) { 5500 hw->mac.get_link_status = true; 5501 /* guard against interrupt when we're going down */ 5502 if (!test_bit(__IGC_DOWN, &adapter->state)) 5503 mod_timer(&adapter->watchdog_timer, jiffies + 1); 5504 } 5505 5506 napi_schedule(&q_vector->napi); 5507 5508 return IRQ_HANDLED; 5509 } 5510 5511 static void igc_free_irq(struct igc_adapter *adapter) 5512 { 5513 if (adapter->msix_entries) { 5514 int vector = 0, i; 5515 5516 free_irq(adapter->msix_entries[vector++].vector, adapter); 5517 5518 for (i = 0; i < adapter->num_q_vectors; i++) 5519 free_irq(adapter->msix_entries[vector++].vector, 5520 adapter->q_vector[i]); 5521 } else { 5522 free_irq(adapter->pdev->irq, adapter); 5523 } 5524 } 5525 5526 /** 5527 * igc_request_irq - initialize interrupts 5528 * @adapter: Pointer to adapter structure 5529 * 5530 * Attempts to configure interrupts using the best available 5531 * capabilities of the hardware and kernel. 5532 */ 5533 static int igc_request_irq(struct igc_adapter *adapter) 5534 { 5535 struct net_device *netdev = adapter->netdev; 5536 struct pci_dev *pdev = adapter->pdev; 5537 int err = 0; 5538 5539 if (adapter->flags & IGC_FLAG_HAS_MSIX) { 5540 err = igc_request_msix(adapter); 5541 if (!err) 5542 goto request_done; 5543 /* fall back to MSI */ 5544 igc_free_all_tx_resources(adapter); 5545 igc_free_all_rx_resources(adapter); 5546 5547 igc_clear_interrupt_scheme(adapter); 5548 err = igc_init_interrupt_scheme(adapter, false); 5549 if (err) 5550 goto request_done; 5551 igc_setup_all_tx_resources(adapter); 5552 igc_setup_all_rx_resources(adapter); 5553 igc_configure(adapter); 5554 } 5555 5556 igc_assign_vector(adapter->q_vector[0], 0); 5557 5558 if (adapter->flags & IGC_FLAG_HAS_MSI) { 5559 err = request_irq(pdev->irq, &igc_intr_msi, 0, 5560 netdev->name, adapter); 5561 if (!err) 5562 goto request_done; 5563 5564 /* fall back to legacy interrupts */ 5565 igc_reset_interrupt_capability(adapter); 5566 adapter->flags &= ~IGC_FLAG_HAS_MSI; 5567 } 5568 5569 err = request_irq(pdev->irq, &igc_intr, IRQF_SHARED, 5570 netdev->name, adapter); 5571 5572 if (err) 5573 netdev_err(netdev, "Error %d getting interrupt\n", err); 5574 5575 request_done: 5576 return err; 5577 } 5578 5579 /** 5580 * __igc_open - Called when a network interface is made active 5581 * @netdev: network interface device structure 5582 * @resuming: boolean indicating if the device is resuming 5583 * 5584 * Returns 0 on success, negative value on failure 5585 * 5586 * The open entry point is called when a network interface is made 5587 * active by the system (IFF_UP). At this point all resources needed 5588 * for transmit and receive operations are allocated, the interrupt 5589 * handler is registered with the OS, the watchdog timer is started, 5590 * and the stack is notified that the interface is ready. 5591 */ 5592 static int __igc_open(struct net_device *netdev, bool resuming) 5593 { 5594 struct igc_adapter *adapter = netdev_priv(netdev); 5595 struct pci_dev *pdev = adapter->pdev; 5596 struct igc_hw *hw = &adapter->hw; 5597 int err = 0; 5598 int i = 0; 5599 5600 /* disallow open during test */ 5601 5602 if (test_bit(__IGC_TESTING, &adapter->state)) { 5603 WARN_ON(resuming); 5604 return -EBUSY; 5605 } 5606 5607 if (!resuming) 5608 pm_runtime_get_sync(&pdev->dev); 5609 5610 netif_carrier_off(netdev); 5611 5612 /* allocate transmit descriptors */ 5613 err = igc_setup_all_tx_resources(adapter); 5614 if (err) 5615 goto err_setup_tx; 5616 5617 /* allocate receive descriptors */ 5618 err = igc_setup_all_rx_resources(adapter); 5619 if (err) 5620 goto err_setup_rx; 5621 5622 igc_power_up_link(adapter); 5623 5624 igc_configure(adapter); 5625 5626 err = igc_request_irq(adapter); 5627 if (err) 5628 goto err_req_irq; 5629 5630 /* Notify the stack of the actual queue counts. */ 5631 err = netif_set_real_num_tx_queues(netdev, adapter->num_tx_queues); 5632 if (err) 5633 goto err_set_queues; 5634 5635 err = netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues); 5636 if (err) 5637 goto err_set_queues; 5638 5639 clear_bit(__IGC_DOWN, &adapter->state); 5640 5641 for (i = 0; i < adapter->num_q_vectors; i++) 5642 napi_enable(&adapter->q_vector[i]->napi); 5643 5644 /* Clear any pending interrupts. */ 5645 rd32(IGC_ICR); 5646 igc_irq_enable(adapter); 5647 5648 if (!resuming) 5649 pm_runtime_put(&pdev->dev); 5650 5651 netif_tx_start_all_queues(netdev); 5652 5653 /* start the watchdog. */ 5654 hw->mac.get_link_status = true; 5655 schedule_work(&adapter->watchdog_task); 5656 5657 return IGC_SUCCESS; 5658 5659 err_set_queues: 5660 igc_free_irq(adapter); 5661 err_req_irq: 5662 igc_release_hw_control(adapter); 5663 igc_power_down_phy_copper_base(&adapter->hw); 5664 igc_free_all_rx_resources(adapter); 5665 err_setup_rx: 5666 igc_free_all_tx_resources(adapter); 5667 err_setup_tx: 5668 igc_reset(adapter); 5669 if (!resuming) 5670 pm_runtime_put(&pdev->dev); 5671 5672 return err; 5673 } 5674 5675 int igc_open(struct net_device *netdev) 5676 { 5677 return __igc_open(netdev, false); 5678 } 5679 5680 /** 5681 * __igc_close - Disables a network interface 5682 * @netdev: network interface device structure 5683 * @suspending: boolean indicating the device is suspending 5684 * 5685 * Returns 0, this is not allowed to fail 5686 * 5687 * The close entry point is called when an interface is de-activated 5688 * by the OS. The hardware is still under the driver's control, but 5689 * needs to be disabled. A global MAC reset is issued to stop the 5690 * hardware, and all transmit and receive resources are freed. 5691 */ 5692 static int __igc_close(struct net_device *netdev, bool suspending) 5693 { 5694 struct igc_adapter *adapter = netdev_priv(netdev); 5695 struct pci_dev *pdev = adapter->pdev; 5696 5697 WARN_ON(test_bit(__IGC_RESETTING, &adapter->state)); 5698 5699 if (!suspending) 5700 pm_runtime_get_sync(&pdev->dev); 5701 5702 igc_down(adapter); 5703 5704 igc_release_hw_control(adapter); 5705 5706 igc_free_irq(adapter); 5707 5708 igc_free_all_tx_resources(adapter); 5709 igc_free_all_rx_resources(adapter); 5710 5711 if (!suspending) 5712 pm_runtime_put_sync(&pdev->dev); 5713 5714 return 0; 5715 } 5716 5717 int igc_close(struct net_device *netdev) 5718 { 5719 if (netif_device_present(netdev) || netdev->dismantle) 5720 return __igc_close(netdev, false); 5721 return 0; 5722 } 5723 5724 /** 5725 * igc_ioctl - Access the hwtstamp interface 5726 * @netdev: network interface device structure 5727 * @ifr: interface request data 5728 * @cmd: ioctl command 5729 **/ 5730 static int igc_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) 5731 { 5732 switch (cmd) { 5733 case SIOCGHWTSTAMP: 5734 return igc_ptp_get_ts_config(netdev, ifr); 5735 case SIOCSHWTSTAMP: 5736 return igc_ptp_set_ts_config(netdev, ifr); 5737 default: 5738 return -EOPNOTSUPP; 5739 } 5740 } 5741 5742 static int igc_save_launchtime_params(struct igc_adapter *adapter, int queue, 5743 bool enable) 5744 { 5745 struct igc_ring *ring; 5746 int i; 5747 5748 if (queue < 0 || queue >= adapter->num_tx_queues) 5749 return -EINVAL; 5750 5751 ring = adapter->tx_ring[queue]; 5752 ring->launchtime_enable = enable; 5753 5754 if (adapter->base_time) 5755 return 0; 5756 5757 adapter->cycle_time = NSEC_PER_SEC; 5758 5759 for (i = 0; i < adapter->num_tx_queues; i++) { 5760 ring = adapter->tx_ring[i]; 5761 ring->start_time = 0; 5762 ring->end_time = NSEC_PER_SEC; 5763 } 5764 5765 return 0; 5766 } 5767 5768 static bool is_base_time_past(ktime_t base_time, const struct timespec64 *now) 5769 { 5770 struct timespec64 b; 5771 5772 b = ktime_to_timespec64(base_time); 5773 5774 return timespec64_compare(now, &b) > 0; 5775 } 5776 5777 static bool validate_schedule(struct igc_adapter *adapter, 5778 const struct tc_taprio_qopt_offload *qopt) 5779 { 5780 int queue_uses[IGC_MAX_TX_QUEUES] = { }; 5781 struct timespec64 now; 5782 size_t n; 5783 5784 if (qopt->cycle_time_extension) 5785 return false; 5786 5787 igc_ptp_read(adapter, &now); 5788 5789 /* If we program the controller's BASET registers with a time 5790 * in the future, it will hold all the packets until that 5791 * time, causing a lot of TX Hangs, so to avoid that, we 5792 * reject schedules that would start in the future. 5793 */ 5794 if (!is_base_time_past(qopt->base_time, &now)) 5795 return false; 5796 5797 for (n = 0; n < qopt->num_entries; n++) { 5798 const struct tc_taprio_sched_entry *e; 5799 int i; 5800 5801 e = &qopt->entries[n]; 5802 5803 /* i225 only supports "global" frame preemption 5804 * settings. 5805 */ 5806 if (e->command != TC_TAPRIO_CMD_SET_GATES) 5807 return false; 5808 5809 for (i = 0; i < IGC_MAX_TX_QUEUES; i++) { 5810 if (e->gate_mask & BIT(i)) 5811 queue_uses[i]++; 5812 5813 if (queue_uses[i] > 1) 5814 return false; 5815 } 5816 } 5817 5818 return true; 5819 } 5820 5821 static int igc_tsn_enable_launchtime(struct igc_adapter *adapter, 5822 struct tc_etf_qopt_offload *qopt) 5823 { 5824 struct igc_hw *hw = &adapter->hw; 5825 int err; 5826 5827 if (hw->mac.type != igc_i225) 5828 return -EOPNOTSUPP; 5829 5830 err = igc_save_launchtime_params(adapter, qopt->queue, qopt->enable); 5831 if (err) 5832 return err; 5833 5834 return igc_tsn_offload_apply(adapter); 5835 } 5836 5837 static int igc_save_qbv_schedule(struct igc_adapter *adapter, 5838 struct tc_taprio_qopt_offload *qopt) 5839 { 5840 u32 start_time = 0, end_time = 0; 5841 size_t n; 5842 5843 if (!qopt->enable) { 5844 adapter->base_time = 0; 5845 return 0; 5846 } 5847 5848 if (adapter->base_time) 5849 return -EALREADY; 5850 5851 if (!validate_schedule(adapter, qopt)) 5852 return -EINVAL; 5853 5854 adapter->cycle_time = qopt->cycle_time; 5855 adapter->base_time = qopt->base_time; 5856 5857 /* FIXME: be a little smarter about cases when the gate for a 5858 * queue stays open for more than one entry. 5859 */ 5860 for (n = 0; n < qopt->num_entries; n++) { 5861 struct tc_taprio_sched_entry *e = &qopt->entries[n]; 5862 int i; 5863 5864 end_time += e->interval; 5865 5866 for (i = 0; i < IGC_MAX_TX_QUEUES; i++) { 5867 struct igc_ring *ring = adapter->tx_ring[i]; 5868 5869 if (!(e->gate_mask & BIT(i))) 5870 continue; 5871 5872 ring->start_time = start_time; 5873 ring->end_time = end_time; 5874 } 5875 5876 start_time += e->interval; 5877 } 5878 5879 return 0; 5880 } 5881 5882 static int igc_tsn_enable_qbv_scheduling(struct igc_adapter *adapter, 5883 struct tc_taprio_qopt_offload *qopt) 5884 { 5885 struct igc_hw *hw = &adapter->hw; 5886 int err; 5887 5888 if (hw->mac.type != igc_i225) 5889 return -EOPNOTSUPP; 5890 5891 err = igc_save_qbv_schedule(adapter, qopt); 5892 if (err) 5893 return err; 5894 5895 return igc_tsn_offload_apply(adapter); 5896 } 5897 5898 static int igc_setup_tc(struct net_device *dev, enum tc_setup_type type, 5899 void *type_data) 5900 { 5901 struct igc_adapter *adapter = netdev_priv(dev); 5902 5903 switch (type) { 5904 case TC_SETUP_QDISC_TAPRIO: 5905 return igc_tsn_enable_qbv_scheduling(adapter, type_data); 5906 5907 case TC_SETUP_QDISC_ETF: 5908 return igc_tsn_enable_launchtime(adapter, type_data); 5909 5910 default: 5911 return -EOPNOTSUPP; 5912 } 5913 } 5914 5915 static int igc_bpf(struct net_device *dev, struct netdev_bpf *bpf) 5916 { 5917 struct igc_adapter *adapter = netdev_priv(dev); 5918 5919 switch (bpf->command) { 5920 case XDP_SETUP_PROG: 5921 return igc_xdp_set_prog(adapter, bpf->prog, bpf->extack); 5922 case XDP_SETUP_XSK_POOL: 5923 return igc_xdp_setup_pool(adapter, bpf->xsk.pool, 5924 bpf->xsk.queue_id); 5925 default: 5926 return -EOPNOTSUPP; 5927 } 5928 } 5929 5930 static int igc_xdp_xmit(struct net_device *dev, int num_frames, 5931 struct xdp_frame **frames, u32 flags) 5932 { 5933 struct igc_adapter *adapter = netdev_priv(dev); 5934 int cpu = smp_processor_id(); 5935 struct netdev_queue *nq; 5936 struct igc_ring *ring; 5937 int i, drops; 5938 5939 if (unlikely(test_bit(__IGC_DOWN, &adapter->state))) 5940 return -ENETDOWN; 5941 5942 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) 5943 return -EINVAL; 5944 5945 ring = igc_xdp_get_tx_ring(adapter, cpu); 5946 nq = txring_txq(ring); 5947 5948 __netif_tx_lock(nq, cpu); 5949 5950 drops = 0; 5951 for (i = 0; i < num_frames; i++) { 5952 int err; 5953 struct xdp_frame *xdpf = frames[i]; 5954 5955 err = igc_xdp_init_tx_descriptor(ring, xdpf); 5956 if (err) { 5957 xdp_return_frame_rx_napi(xdpf); 5958 drops++; 5959 } 5960 } 5961 5962 if (flags & XDP_XMIT_FLUSH) 5963 igc_flush_tx_descriptors(ring); 5964 5965 __netif_tx_unlock(nq); 5966 5967 return num_frames - drops; 5968 } 5969 5970 static void igc_trigger_rxtxq_interrupt(struct igc_adapter *adapter, 5971 struct igc_q_vector *q_vector) 5972 { 5973 struct igc_hw *hw = &adapter->hw; 5974 u32 eics = 0; 5975 5976 eics |= q_vector->eims_value; 5977 wr32(IGC_EICS, eics); 5978 } 5979 5980 int igc_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) 5981 { 5982 struct igc_adapter *adapter = netdev_priv(dev); 5983 struct igc_q_vector *q_vector; 5984 struct igc_ring *ring; 5985 5986 if (test_bit(__IGC_DOWN, &adapter->state)) 5987 return -ENETDOWN; 5988 5989 if (!igc_xdp_is_enabled(adapter)) 5990 return -ENXIO; 5991 5992 if (queue_id >= adapter->num_rx_queues) 5993 return -EINVAL; 5994 5995 ring = adapter->rx_ring[queue_id]; 5996 5997 if (!ring->xsk_pool) 5998 return -ENXIO; 5999 6000 q_vector = adapter->q_vector[queue_id]; 6001 if (!napi_if_scheduled_mark_missed(&q_vector->napi)) 6002 igc_trigger_rxtxq_interrupt(adapter, q_vector); 6003 6004 return 0; 6005 } 6006 6007 static const struct net_device_ops igc_netdev_ops = { 6008 .ndo_open = igc_open, 6009 .ndo_stop = igc_close, 6010 .ndo_start_xmit = igc_xmit_frame, 6011 .ndo_set_rx_mode = igc_set_rx_mode, 6012 .ndo_set_mac_address = igc_set_mac, 6013 .ndo_change_mtu = igc_change_mtu, 6014 .ndo_get_stats64 = igc_get_stats64, 6015 .ndo_fix_features = igc_fix_features, 6016 .ndo_set_features = igc_set_features, 6017 .ndo_features_check = igc_features_check, 6018 .ndo_eth_ioctl = igc_ioctl, 6019 .ndo_setup_tc = igc_setup_tc, 6020 .ndo_bpf = igc_bpf, 6021 .ndo_xdp_xmit = igc_xdp_xmit, 6022 .ndo_xsk_wakeup = igc_xsk_wakeup, 6023 }; 6024 6025 /* PCIe configuration access */ 6026 void igc_read_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value) 6027 { 6028 struct igc_adapter *adapter = hw->back; 6029 6030 pci_read_config_word(adapter->pdev, reg, value); 6031 } 6032 6033 void igc_write_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value) 6034 { 6035 struct igc_adapter *adapter = hw->back; 6036 6037 pci_write_config_word(adapter->pdev, reg, *value); 6038 } 6039 6040 s32 igc_read_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value) 6041 { 6042 struct igc_adapter *adapter = hw->back; 6043 6044 if (!pci_is_pcie(adapter->pdev)) 6045 return -IGC_ERR_CONFIG; 6046 6047 pcie_capability_read_word(adapter->pdev, reg, value); 6048 6049 return IGC_SUCCESS; 6050 } 6051 6052 s32 igc_write_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value) 6053 { 6054 struct igc_adapter *adapter = hw->back; 6055 6056 if (!pci_is_pcie(adapter->pdev)) 6057 return -IGC_ERR_CONFIG; 6058 6059 pcie_capability_write_word(adapter->pdev, reg, *value); 6060 6061 return IGC_SUCCESS; 6062 } 6063 6064 u32 igc_rd32(struct igc_hw *hw, u32 reg) 6065 { 6066 struct igc_adapter *igc = container_of(hw, struct igc_adapter, hw); 6067 u8 __iomem *hw_addr = READ_ONCE(hw->hw_addr); 6068 u32 value = 0; 6069 6070 value = readl(&hw_addr[reg]); 6071 6072 /* reads should not return all F's */ 6073 if (!(~value) && (!reg || !(~readl(hw_addr)))) { 6074 struct net_device *netdev = igc->netdev; 6075 6076 hw->hw_addr = NULL; 6077 netif_device_detach(netdev); 6078 netdev_err(netdev, "PCIe link lost, device now detached\n"); 6079 WARN(pci_device_is_present(igc->pdev), 6080 "igc: Failed to read reg 0x%x!\n", reg); 6081 } 6082 6083 return value; 6084 } 6085 6086 int igc_set_spd_dplx(struct igc_adapter *adapter, u32 spd, u8 dplx) 6087 { 6088 struct igc_mac_info *mac = &adapter->hw.mac; 6089 6090 mac->autoneg = false; 6091 6092 /* Make sure dplx is at most 1 bit and lsb of speed is not set 6093 * for the switch() below to work 6094 */ 6095 if ((spd & 1) || (dplx & ~1)) 6096 goto err_inval; 6097 6098 switch (spd + dplx) { 6099 case SPEED_10 + DUPLEX_HALF: 6100 mac->forced_speed_duplex = ADVERTISE_10_HALF; 6101 break; 6102 case SPEED_10 + DUPLEX_FULL: 6103 mac->forced_speed_duplex = ADVERTISE_10_FULL; 6104 break; 6105 case SPEED_100 + DUPLEX_HALF: 6106 mac->forced_speed_duplex = ADVERTISE_100_HALF; 6107 break; 6108 case SPEED_100 + DUPLEX_FULL: 6109 mac->forced_speed_duplex = ADVERTISE_100_FULL; 6110 break; 6111 case SPEED_1000 + DUPLEX_FULL: 6112 mac->autoneg = true; 6113 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL; 6114 break; 6115 case SPEED_1000 + DUPLEX_HALF: /* not supported */ 6116 goto err_inval; 6117 case SPEED_2500 + DUPLEX_FULL: 6118 mac->autoneg = true; 6119 adapter->hw.phy.autoneg_advertised = ADVERTISE_2500_FULL; 6120 break; 6121 case SPEED_2500 + DUPLEX_HALF: /* not supported */ 6122 default: 6123 goto err_inval; 6124 } 6125 6126 /* clear MDI, MDI(-X) override is only allowed when autoneg enabled */ 6127 adapter->hw.phy.mdix = AUTO_ALL_MODES; 6128 6129 return 0; 6130 6131 err_inval: 6132 netdev_err(adapter->netdev, "Unsupported Speed/Duplex configuration\n"); 6133 return -EINVAL; 6134 } 6135 6136 /** 6137 * igc_probe - Device Initialization Routine 6138 * @pdev: PCI device information struct 6139 * @ent: entry in igc_pci_tbl 6140 * 6141 * Returns 0 on success, negative on failure 6142 * 6143 * igc_probe initializes an adapter identified by a pci_dev structure. 6144 * The OS initialization, configuring the adapter private structure, 6145 * and a hardware reset occur. 6146 */ 6147 static int igc_probe(struct pci_dev *pdev, 6148 const struct pci_device_id *ent) 6149 { 6150 struct igc_adapter *adapter; 6151 struct net_device *netdev; 6152 struct igc_hw *hw; 6153 const struct igc_info *ei = igc_info_tbl[ent->driver_data]; 6154 int err, pci_using_dac; 6155 6156 err = pci_enable_device_mem(pdev); 6157 if (err) 6158 return err; 6159 6160 pci_using_dac = 0; 6161 err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); 6162 if (!err) { 6163 pci_using_dac = 1; 6164 } else { 6165 err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); 6166 if (err) { 6167 dev_err(&pdev->dev, 6168 "No usable DMA configuration, aborting\n"); 6169 goto err_dma; 6170 } 6171 } 6172 6173 err = pci_request_mem_regions(pdev, igc_driver_name); 6174 if (err) 6175 goto err_pci_reg; 6176 6177 pci_enable_pcie_error_reporting(pdev); 6178 6179 err = pci_enable_ptm(pdev, NULL); 6180 if (err < 0) 6181 dev_info(&pdev->dev, "PCIe PTM not supported by PCIe bus/controller\n"); 6182 6183 pci_set_master(pdev); 6184 6185 err = -ENOMEM; 6186 netdev = alloc_etherdev_mq(sizeof(struct igc_adapter), 6187 IGC_MAX_TX_QUEUES); 6188 6189 if (!netdev) 6190 goto err_alloc_etherdev; 6191 6192 SET_NETDEV_DEV(netdev, &pdev->dev); 6193 6194 pci_set_drvdata(pdev, netdev); 6195 adapter = netdev_priv(netdev); 6196 adapter->netdev = netdev; 6197 adapter->pdev = pdev; 6198 hw = &adapter->hw; 6199 hw->back = adapter; 6200 adapter->port_num = hw->bus.func; 6201 adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE); 6202 6203 err = pci_save_state(pdev); 6204 if (err) 6205 goto err_ioremap; 6206 6207 err = -EIO; 6208 adapter->io_addr = ioremap(pci_resource_start(pdev, 0), 6209 pci_resource_len(pdev, 0)); 6210 if (!adapter->io_addr) 6211 goto err_ioremap; 6212 6213 /* hw->hw_addr can be zeroed, so use adapter->io_addr for unmap */ 6214 hw->hw_addr = adapter->io_addr; 6215 6216 netdev->netdev_ops = &igc_netdev_ops; 6217 igc_ethtool_set_ops(netdev); 6218 netdev->watchdog_timeo = 5 * HZ; 6219 6220 netdev->mem_start = pci_resource_start(pdev, 0); 6221 netdev->mem_end = pci_resource_end(pdev, 0); 6222 6223 /* PCI config space info */ 6224 hw->vendor_id = pdev->vendor; 6225 hw->device_id = pdev->device; 6226 hw->revision_id = pdev->revision; 6227 hw->subsystem_vendor_id = pdev->subsystem_vendor; 6228 hw->subsystem_device_id = pdev->subsystem_device; 6229 6230 /* Copy the default MAC and PHY function pointers */ 6231 memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops)); 6232 memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops)); 6233 6234 /* Initialize skew-specific constants */ 6235 err = ei->get_invariants(hw); 6236 if (err) 6237 goto err_sw_init; 6238 6239 /* Add supported features to the features list*/ 6240 netdev->features |= NETIF_F_SG; 6241 netdev->features |= NETIF_F_TSO; 6242 netdev->features |= NETIF_F_TSO6; 6243 netdev->features |= NETIF_F_TSO_ECN; 6244 netdev->features |= NETIF_F_RXCSUM; 6245 netdev->features |= NETIF_F_HW_CSUM; 6246 netdev->features |= NETIF_F_SCTP_CRC; 6247 netdev->features |= NETIF_F_HW_TC; 6248 6249 #define IGC_GSO_PARTIAL_FEATURES (NETIF_F_GSO_GRE | \ 6250 NETIF_F_GSO_GRE_CSUM | \ 6251 NETIF_F_GSO_IPXIP4 | \ 6252 NETIF_F_GSO_IPXIP6 | \ 6253 NETIF_F_GSO_UDP_TUNNEL | \ 6254 NETIF_F_GSO_UDP_TUNNEL_CSUM) 6255 6256 netdev->gso_partial_features = IGC_GSO_PARTIAL_FEATURES; 6257 netdev->features |= NETIF_F_GSO_PARTIAL | IGC_GSO_PARTIAL_FEATURES; 6258 6259 /* setup the private structure */ 6260 err = igc_sw_init(adapter); 6261 if (err) 6262 goto err_sw_init; 6263 6264 /* copy netdev features into list of user selectable features */ 6265 netdev->hw_features |= NETIF_F_NTUPLE; 6266 netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX; 6267 netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; 6268 netdev->hw_features |= netdev->features; 6269 6270 if (pci_using_dac) 6271 netdev->features |= NETIF_F_HIGHDMA; 6272 6273 netdev->vlan_features |= netdev->features; 6274 6275 /* MTU range: 68 - 9216 */ 6276 netdev->min_mtu = ETH_MIN_MTU; 6277 netdev->max_mtu = MAX_STD_JUMBO_FRAME_SIZE; 6278 6279 /* before reading the NVM, reset the controller to put the device in a 6280 * known good starting state 6281 */ 6282 hw->mac.ops.reset_hw(hw); 6283 6284 if (igc_get_flash_presence_i225(hw)) { 6285 if (hw->nvm.ops.validate(hw) < 0) { 6286 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n"); 6287 err = -EIO; 6288 goto err_eeprom; 6289 } 6290 } 6291 6292 if (eth_platform_get_mac_address(&pdev->dev, hw->mac.addr)) { 6293 /* copy the MAC address out of the NVM */ 6294 if (hw->mac.ops.read_mac_addr(hw)) 6295 dev_err(&pdev->dev, "NVM Read Error\n"); 6296 } 6297 6298 memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len); 6299 6300 if (!is_valid_ether_addr(netdev->dev_addr)) { 6301 dev_err(&pdev->dev, "Invalid MAC Address\n"); 6302 err = -EIO; 6303 goto err_eeprom; 6304 } 6305 6306 /* configure RXPBSIZE and TXPBSIZE */ 6307 wr32(IGC_RXPBS, I225_RXPBSIZE_DEFAULT); 6308 wr32(IGC_TXPBS, I225_TXPBSIZE_DEFAULT); 6309 6310 timer_setup(&adapter->watchdog_timer, igc_watchdog, 0); 6311 timer_setup(&adapter->phy_info_timer, igc_update_phy_info, 0); 6312 6313 INIT_WORK(&adapter->reset_task, igc_reset_task); 6314 INIT_WORK(&adapter->watchdog_task, igc_watchdog_task); 6315 6316 /* Initialize link properties that are user-changeable */ 6317 adapter->fc_autoneg = true; 6318 hw->mac.autoneg = true; 6319 hw->phy.autoneg_advertised = 0xaf; 6320 6321 hw->fc.requested_mode = igc_fc_default; 6322 hw->fc.current_mode = igc_fc_default; 6323 6324 /* By default, support wake on port A */ 6325 adapter->flags |= IGC_FLAG_WOL_SUPPORTED; 6326 6327 /* initialize the wol settings based on the eeprom settings */ 6328 if (adapter->flags & IGC_FLAG_WOL_SUPPORTED) 6329 adapter->wol |= IGC_WUFC_MAG; 6330 6331 device_set_wakeup_enable(&adapter->pdev->dev, 6332 adapter->flags & IGC_FLAG_WOL_SUPPORTED); 6333 6334 igc_ptp_init(adapter); 6335 6336 /* reset the hardware with the new settings */ 6337 igc_reset(adapter); 6338 6339 /* let the f/w know that the h/w is now under the control of the 6340 * driver. 6341 */ 6342 igc_get_hw_control(adapter); 6343 6344 strncpy(netdev->name, "eth%d", IFNAMSIZ); 6345 err = register_netdev(netdev); 6346 if (err) 6347 goto err_register; 6348 6349 /* carrier off reporting is important to ethtool even BEFORE open */ 6350 netif_carrier_off(netdev); 6351 6352 /* Check if Media Autosense is enabled */ 6353 adapter->ei = *ei; 6354 6355 /* print pcie link status and MAC address */ 6356 pcie_print_link_status(pdev); 6357 netdev_info(netdev, "MAC: %pM\n", netdev->dev_addr); 6358 6359 dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NO_DIRECT_COMPLETE); 6360 /* Disable EEE for internal PHY devices */ 6361 hw->dev_spec._base.eee_enable = false; 6362 adapter->flags &= ~IGC_FLAG_EEE; 6363 igc_set_eee_i225(hw, false, false, false); 6364 6365 pm_runtime_put_noidle(&pdev->dev); 6366 6367 return 0; 6368 6369 err_register: 6370 igc_release_hw_control(adapter); 6371 err_eeprom: 6372 if (!igc_check_reset_block(hw)) 6373 igc_reset_phy(hw); 6374 err_sw_init: 6375 igc_clear_interrupt_scheme(adapter); 6376 iounmap(adapter->io_addr); 6377 err_ioremap: 6378 free_netdev(netdev); 6379 err_alloc_etherdev: 6380 pci_disable_pcie_error_reporting(pdev); 6381 pci_release_mem_regions(pdev); 6382 err_pci_reg: 6383 err_dma: 6384 pci_disable_device(pdev); 6385 return err; 6386 } 6387 6388 /** 6389 * igc_remove - Device Removal Routine 6390 * @pdev: PCI device information struct 6391 * 6392 * igc_remove is called by the PCI subsystem to alert the driver 6393 * that it should release a PCI device. This could be caused by a 6394 * Hot-Plug event, or because the driver is going to be removed from 6395 * memory. 6396 */ 6397 static void igc_remove(struct pci_dev *pdev) 6398 { 6399 struct net_device *netdev = pci_get_drvdata(pdev); 6400 struct igc_adapter *adapter = netdev_priv(netdev); 6401 6402 pm_runtime_get_noresume(&pdev->dev); 6403 6404 igc_flush_nfc_rules(adapter); 6405 6406 igc_ptp_stop(adapter); 6407 6408 set_bit(__IGC_DOWN, &adapter->state); 6409 6410 del_timer_sync(&adapter->watchdog_timer); 6411 del_timer_sync(&adapter->phy_info_timer); 6412 6413 cancel_work_sync(&adapter->reset_task); 6414 cancel_work_sync(&adapter->watchdog_task); 6415 6416 /* Release control of h/w to f/w. If f/w is AMT enabled, this 6417 * would have already happened in close and is redundant. 6418 */ 6419 igc_release_hw_control(adapter); 6420 unregister_netdev(netdev); 6421 6422 igc_clear_interrupt_scheme(adapter); 6423 pci_iounmap(pdev, adapter->io_addr); 6424 pci_release_mem_regions(pdev); 6425 6426 free_netdev(netdev); 6427 6428 pci_disable_pcie_error_reporting(pdev); 6429 6430 pci_disable_device(pdev); 6431 } 6432 6433 static int __igc_shutdown(struct pci_dev *pdev, bool *enable_wake, 6434 bool runtime) 6435 { 6436 struct net_device *netdev = pci_get_drvdata(pdev); 6437 struct igc_adapter *adapter = netdev_priv(netdev); 6438 u32 wufc = runtime ? IGC_WUFC_LNKC : adapter->wol; 6439 struct igc_hw *hw = &adapter->hw; 6440 u32 ctrl, rctl, status; 6441 bool wake; 6442 6443 rtnl_lock(); 6444 netif_device_detach(netdev); 6445 6446 if (netif_running(netdev)) 6447 __igc_close(netdev, true); 6448 6449 igc_ptp_suspend(adapter); 6450 6451 igc_clear_interrupt_scheme(adapter); 6452 rtnl_unlock(); 6453 6454 status = rd32(IGC_STATUS); 6455 if (status & IGC_STATUS_LU) 6456 wufc &= ~IGC_WUFC_LNKC; 6457 6458 if (wufc) { 6459 igc_setup_rctl(adapter); 6460 igc_set_rx_mode(netdev); 6461 6462 /* turn on all-multi mode if wake on multicast is enabled */ 6463 if (wufc & IGC_WUFC_MC) { 6464 rctl = rd32(IGC_RCTL); 6465 rctl |= IGC_RCTL_MPE; 6466 wr32(IGC_RCTL, rctl); 6467 } 6468 6469 ctrl = rd32(IGC_CTRL); 6470 ctrl |= IGC_CTRL_ADVD3WUC; 6471 wr32(IGC_CTRL, ctrl); 6472 6473 /* Allow time for pending master requests to run */ 6474 igc_disable_pcie_master(hw); 6475 6476 wr32(IGC_WUC, IGC_WUC_PME_EN); 6477 wr32(IGC_WUFC, wufc); 6478 } else { 6479 wr32(IGC_WUC, 0); 6480 wr32(IGC_WUFC, 0); 6481 } 6482 6483 wake = wufc || adapter->en_mng_pt; 6484 if (!wake) 6485 igc_power_down_phy_copper_base(&adapter->hw); 6486 else 6487 igc_power_up_link(adapter); 6488 6489 if (enable_wake) 6490 *enable_wake = wake; 6491 6492 /* Release control of h/w to f/w. If f/w is AMT enabled, this 6493 * would have already happened in close and is redundant. 6494 */ 6495 igc_release_hw_control(adapter); 6496 6497 pci_disable_device(pdev); 6498 6499 return 0; 6500 } 6501 6502 #ifdef CONFIG_PM 6503 static int __maybe_unused igc_runtime_suspend(struct device *dev) 6504 { 6505 return __igc_shutdown(to_pci_dev(dev), NULL, 1); 6506 } 6507 6508 static void igc_deliver_wake_packet(struct net_device *netdev) 6509 { 6510 struct igc_adapter *adapter = netdev_priv(netdev); 6511 struct igc_hw *hw = &adapter->hw; 6512 struct sk_buff *skb; 6513 u32 wupl; 6514 6515 wupl = rd32(IGC_WUPL) & IGC_WUPL_MASK; 6516 6517 /* WUPM stores only the first 128 bytes of the wake packet. 6518 * Read the packet only if we have the whole thing. 6519 */ 6520 if (wupl == 0 || wupl > IGC_WUPM_BYTES) 6521 return; 6522 6523 skb = netdev_alloc_skb_ip_align(netdev, IGC_WUPM_BYTES); 6524 if (!skb) 6525 return; 6526 6527 skb_put(skb, wupl); 6528 6529 /* Ensure reads are 32-bit aligned */ 6530 wupl = roundup(wupl, 4); 6531 6532 memcpy_fromio(skb->data, hw->hw_addr + IGC_WUPM_REG(0), wupl); 6533 6534 skb->protocol = eth_type_trans(skb, netdev); 6535 netif_rx(skb); 6536 } 6537 6538 static int __maybe_unused igc_resume(struct device *dev) 6539 { 6540 struct pci_dev *pdev = to_pci_dev(dev); 6541 struct net_device *netdev = pci_get_drvdata(pdev); 6542 struct igc_adapter *adapter = netdev_priv(netdev); 6543 struct igc_hw *hw = &adapter->hw; 6544 u32 err, val; 6545 6546 pci_set_power_state(pdev, PCI_D0); 6547 pci_restore_state(pdev); 6548 pci_save_state(pdev); 6549 6550 if (!pci_device_is_present(pdev)) 6551 return -ENODEV; 6552 err = pci_enable_device_mem(pdev); 6553 if (err) { 6554 netdev_err(netdev, "Cannot enable PCI device from suspend\n"); 6555 return err; 6556 } 6557 pci_set_master(pdev); 6558 6559 pci_enable_wake(pdev, PCI_D3hot, 0); 6560 pci_enable_wake(pdev, PCI_D3cold, 0); 6561 6562 if (igc_init_interrupt_scheme(adapter, true)) { 6563 netdev_err(netdev, "Unable to allocate memory for queues\n"); 6564 return -ENOMEM; 6565 } 6566 6567 igc_reset(adapter); 6568 6569 /* let the f/w know that the h/w is now under the control of the 6570 * driver. 6571 */ 6572 igc_get_hw_control(adapter); 6573 6574 val = rd32(IGC_WUS); 6575 if (val & WAKE_PKT_WUS) 6576 igc_deliver_wake_packet(netdev); 6577 6578 wr32(IGC_WUS, ~0); 6579 6580 rtnl_lock(); 6581 if (!err && netif_running(netdev)) 6582 err = __igc_open(netdev, true); 6583 6584 if (!err) 6585 netif_device_attach(netdev); 6586 rtnl_unlock(); 6587 6588 return err; 6589 } 6590 6591 static int __maybe_unused igc_runtime_resume(struct device *dev) 6592 { 6593 return igc_resume(dev); 6594 } 6595 6596 static int __maybe_unused igc_suspend(struct device *dev) 6597 { 6598 return __igc_shutdown(to_pci_dev(dev), NULL, 0); 6599 } 6600 6601 static int __maybe_unused igc_runtime_idle(struct device *dev) 6602 { 6603 struct net_device *netdev = dev_get_drvdata(dev); 6604 struct igc_adapter *adapter = netdev_priv(netdev); 6605 6606 if (!igc_has_link(adapter)) 6607 pm_schedule_suspend(dev, MSEC_PER_SEC * 5); 6608 6609 return -EBUSY; 6610 } 6611 #endif /* CONFIG_PM */ 6612 6613 static void igc_shutdown(struct pci_dev *pdev) 6614 { 6615 bool wake; 6616 6617 __igc_shutdown(pdev, &wake, 0); 6618 6619 if (system_state == SYSTEM_POWER_OFF) { 6620 pci_wake_from_d3(pdev, wake); 6621 pci_set_power_state(pdev, PCI_D3hot); 6622 } 6623 } 6624 6625 /** 6626 * igc_io_error_detected - called when PCI error is detected 6627 * @pdev: Pointer to PCI device 6628 * @state: The current PCI connection state 6629 * 6630 * This function is called after a PCI bus error affecting 6631 * this device has been detected. 6632 **/ 6633 static pci_ers_result_t igc_io_error_detected(struct pci_dev *pdev, 6634 pci_channel_state_t state) 6635 { 6636 struct net_device *netdev = pci_get_drvdata(pdev); 6637 struct igc_adapter *adapter = netdev_priv(netdev); 6638 6639 netif_device_detach(netdev); 6640 6641 if (state == pci_channel_io_perm_failure) 6642 return PCI_ERS_RESULT_DISCONNECT; 6643 6644 if (netif_running(netdev)) 6645 igc_down(adapter); 6646 pci_disable_device(pdev); 6647 6648 /* Request a slot reset. */ 6649 return PCI_ERS_RESULT_NEED_RESET; 6650 } 6651 6652 /** 6653 * igc_io_slot_reset - called after the PCI bus has been reset. 6654 * @pdev: Pointer to PCI device 6655 * 6656 * Restart the card from scratch, as if from a cold-boot. Implementation 6657 * resembles the first-half of the igc_resume routine. 6658 **/ 6659 static pci_ers_result_t igc_io_slot_reset(struct pci_dev *pdev) 6660 { 6661 struct net_device *netdev = pci_get_drvdata(pdev); 6662 struct igc_adapter *adapter = netdev_priv(netdev); 6663 struct igc_hw *hw = &adapter->hw; 6664 pci_ers_result_t result; 6665 6666 if (pci_enable_device_mem(pdev)) { 6667 netdev_err(netdev, "Could not re-enable PCI device after reset\n"); 6668 result = PCI_ERS_RESULT_DISCONNECT; 6669 } else { 6670 pci_set_master(pdev); 6671 pci_restore_state(pdev); 6672 pci_save_state(pdev); 6673 6674 pci_enable_wake(pdev, PCI_D3hot, 0); 6675 pci_enable_wake(pdev, PCI_D3cold, 0); 6676 6677 /* In case of PCI error, adapter loses its HW address 6678 * so we should re-assign it here. 6679 */ 6680 hw->hw_addr = adapter->io_addr; 6681 6682 igc_reset(adapter); 6683 wr32(IGC_WUS, ~0); 6684 result = PCI_ERS_RESULT_RECOVERED; 6685 } 6686 6687 return result; 6688 } 6689 6690 /** 6691 * igc_io_resume - called when traffic can start to flow again. 6692 * @pdev: Pointer to PCI device 6693 * 6694 * This callback is called when the error recovery driver tells us that 6695 * its OK to resume normal operation. Implementation resembles the 6696 * second-half of the igc_resume routine. 6697 */ 6698 static void igc_io_resume(struct pci_dev *pdev) 6699 { 6700 struct net_device *netdev = pci_get_drvdata(pdev); 6701 struct igc_adapter *adapter = netdev_priv(netdev); 6702 6703 rtnl_lock(); 6704 if (netif_running(netdev)) { 6705 if (igc_open(netdev)) { 6706 netdev_err(netdev, "igc_open failed after reset\n"); 6707 return; 6708 } 6709 } 6710 6711 netif_device_attach(netdev); 6712 6713 /* let the f/w know that the h/w is now under the control of the 6714 * driver. 6715 */ 6716 igc_get_hw_control(adapter); 6717 rtnl_unlock(); 6718 } 6719 6720 static const struct pci_error_handlers igc_err_handler = { 6721 .error_detected = igc_io_error_detected, 6722 .slot_reset = igc_io_slot_reset, 6723 .resume = igc_io_resume, 6724 }; 6725 6726 #ifdef CONFIG_PM 6727 static const struct dev_pm_ops igc_pm_ops = { 6728 SET_SYSTEM_SLEEP_PM_OPS(igc_suspend, igc_resume) 6729 SET_RUNTIME_PM_OPS(igc_runtime_suspend, igc_runtime_resume, 6730 igc_runtime_idle) 6731 }; 6732 #endif 6733 6734 static struct pci_driver igc_driver = { 6735 .name = igc_driver_name, 6736 .id_table = igc_pci_tbl, 6737 .probe = igc_probe, 6738 .remove = igc_remove, 6739 #ifdef CONFIG_PM 6740 .driver.pm = &igc_pm_ops, 6741 #endif 6742 .shutdown = igc_shutdown, 6743 .err_handler = &igc_err_handler, 6744 }; 6745 6746 /** 6747 * igc_reinit_queues - return error 6748 * @adapter: pointer to adapter structure 6749 */ 6750 int igc_reinit_queues(struct igc_adapter *adapter) 6751 { 6752 struct net_device *netdev = adapter->netdev; 6753 int err = 0; 6754 6755 if (netif_running(netdev)) 6756 igc_close(netdev); 6757 6758 igc_reset_interrupt_capability(adapter); 6759 6760 if (igc_init_interrupt_scheme(adapter, true)) { 6761 netdev_err(netdev, "Unable to allocate memory for queues\n"); 6762 return -ENOMEM; 6763 } 6764 6765 if (netif_running(netdev)) 6766 err = igc_open(netdev); 6767 6768 return err; 6769 } 6770 6771 /** 6772 * igc_get_hw_dev - return device 6773 * @hw: pointer to hardware structure 6774 * 6775 * used by hardware layer to print debugging information 6776 */ 6777 struct net_device *igc_get_hw_dev(struct igc_hw *hw) 6778 { 6779 struct igc_adapter *adapter = hw->back; 6780 6781 return adapter->netdev; 6782 } 6783 6784 static void igc_disable_rx_ring_hw(struct igc_ring *ring) 6785 { 6786 struct igc_hw *hw = &ring->q_vector->adapter->hw; 6787 u8 idx = ring->reg_idx; 6788 u32 rxdctl; 6789 6790 rxdctl = rd32(IGC_RXDCTL(idx)); 6791 rxdctl &= ~IGC_RXDCTL_QUEUE_ENABLE; 6792 rxdctl |= IGC_RXDCTL_SWFLUSH; 6793 wr32(IGC_RXDCTL(idx), rxdctl); 6794 } 6795 6796 void igc_disable_rx_ring(struct igc_ring *ring) 6797 { 6798 igc_disable_rx_ring_hw(ring); 6799 igc_clean_rx_ring(ring); 6800 } 6801 6802 void igc_enable_rx_ring(struct igc_ring *ring) 6803 { 6804 struct igc_adapter *adapter = ring->q_vector->adapter; 6805 6806 igc_configure_rx_ring(adapter, ring); 6807 6808 if (ring->xsk_pool) 6809 igc_alloc_rx_buffers_zc(ring, igc_desc_unused(ring)); 6810 else 6811 igc_alloc_rx_buffers(ring, igc_desc_unused(ring)); 6812 } 6813 6814 static void igc_disable_tx_ring_hw(struct igc_ring *ring) 6815 { 6816 struct igc_hw *hw = &ring->q_vector->adapter->hw; 6817 u8 idx = ring->reg_idx; 6818 u32 txdctl; 6819 6820 txdctl = rd32(IGC_TXDCTL(idx)); 6821 txdctl &= ~IGC_TXDCTL_QUEUE_ENABLE; 6822 txdctl |= IGC_TXDCTL_SWFLUSH; 6823 wr32(IGC_TXDCTL(idx), txdctl); 6824 } 6825 6826 void igc_disable_tx_ring(struct igc_ring *ring) 6827 { 6828 igc_disable_tx_ring_hw(ring); 6829 igc_clean_tx_ring(ring); 6830 } 6831 6832 void igc_enable_tx_ring(struct igc_ring *ring) 6833 { 6834 struct igc_adapter *adapter = ring->q_vector->adapter; 6835 6836 igc_configure_tx_ring(adapter, ring); 6837 } 6838 6839 /** 6840 * igc_init_module - Driver Registration Routine 6841 * 6842 * igc_init_module is the first routine called when the driver is 6843 * loaded. All it does is register with the PCI subsystem. 6844 */ 6845 static int __init igc_init_module(void) 6846 { 6847 int ret; 6848 6849 pr_info("%s\n", igc_driver_string); 6850 pr_info("%s\n", igc_copyright); 6851 6852 ret = pci_register_driver(&igc_driver); 6853 return ret; 6854 } 6855 6856 module_init(igc_init_module); 6857 6858 /** 6859 * igc_exit_module - Driver Exit Cleanup Routine 6860 * 6861 * igc_exit_module is called just before the driver is removed 6862 * from memory. 6863 */ 6864 static void __exit igc_exit_module(void) 6865 { 6866 pci_unregister_driver(&igc_driver); 6867 } 6868 6869 module_exit(igc_exit_module); 6870 /* igc_main.c */ 6871