1 /******************************************************************************* 2 3 Intel(R) Gigabit Ethernet Linux driver 4 Copyright(c) 2007-2012 Intel Corporation. 5 6 This program is free software; you can redistribute it and/or modify it 7 under the terms and conditions of the GNU General Public License, 8 version 2, as published by the Free Software Foundation. 9 10 This program is distributed in the hope it will be useful, but WITHOUT 11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 more details. 14 15 You should have received a copy of the GNU General Public License along with 16 this program; if not, write to the Free Software Foundation, Inc., 17 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. 18 19 The full GNU General Public License is included in this distribution in 20 the file called "COPYING". 21 22 Contact Information: 23 e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> 24 Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 25 26 *******************************************************************************/ 27 28 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 29 30 #include <linux/module.h> 31 #include <linux/types.h> 32 #include <linux/init.h> 33 #include <linux/bitops.h> 34 #include <linux/vmalloc.h> 35 #include <linux/pagemap.h> 36 #include <linux/netdevice.h> 37 #include <linux/ipv6.h> 38 #include <linux/slab.h> 39 #include <net/checksum.h> 40 #include <net/ip6_checksum.h> 41 #include <linux/net_tstamp.h> 42 #include <linux/mii.h> 43 #include <linux/ethtool.h> 44 #include <linux/if.h> 45 #include <linux/if_vlan.h> 46 #include <linux/pci.h> 47 #include <linux/pci-aspm.h> 48 #include <linux/delay.h> 49 #include <linux/interrupt.h> 50 #include <linux/ip.h> 51 #include <linux/tcp.h> 52 #include <linux/sctp.h> 53 #include <linux/if_ether.h> 54 #include <linux/aer.h> 55 #include <linux/prefetch.h> 56 #include <linux/pm_runtime.h> 57 #ifdef CONFIG_IGB_DCA 58 #include <linux/dca.h> 59 #endif 60 #include "igb.h" 61 62 #define MAJ 3 63 #define MIN 2 64 #define BUILD 10 65 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \ 66 __stringify(BUILD) "-k" 67 char igb_driver_name[] = "igb"; 68 char igb_driver_version[] = DRV_VERSION; 69 static const char igb_driver_string[] = 70 "Intel(R) Gigabit Ethernet Network Driver"; 71 static const char igb_copyright[] = "Copyright (c) 2007-2012 Intel Corporation."; 72 73 static const struct e1000_info *igb_info_tbl[] = { 74 [board_82575] = &e1000_82575_info, 75 }; 76 77 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = { 78 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 }, 79 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 }, 80 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 }, 81 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 }, 82 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 }, 83 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 }, 84 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 }, 85 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 }, 86 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 }, 87 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 }, 88 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 }, 89 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 }, 90 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 }, 91 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 }, 92 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 }, 93 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 }, 94 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 }, 95 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 }, 96 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 }, 97 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 }, 98 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 }, 99 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 }, 100 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 }, 101 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 }, 102 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 }, 103 /* required last entry */ 104 {0, } 105 }; 106 107 MODULE_DEVICE_TABLE(pci, igb_pci_tbl); 108 109 void igb_reset(struct igb_adapter *); 110 static int igb_setup_all_tx_resources(struct igb_adapter *); 111 static int igb_setup_all_rx_resources(struct igb_adapter *); 112 static void igb_free_all_tx_resources(struct igb_adapter *); 113 static void igb_free_all_rx_resources(struct igb_adapter *); 114 static void igb_setup_mrqc(struct igb_adapter *); 115 static int igb_probe(struct pci_dev *, const struct pci_device_id *); 116 static void __devexit igb_remove(struct pci_dev *pdev); 117 static int igb_sw_init(struct igb_adapter *); 118 static int igb_open(struct net_device *); 119 static int igb_close(struct net_device *); 120 static void igb_configure_tx(struct igb_adapter *); 121 static void igb_configure_rx(struct igb_adapter *); 122 static void igb_clean_all_tx_rings(struct igb_adapter *); 123 static void igb_clean_all_rx_rings(struct igb_adapter *); 124 static void igb_clean_tx_ring(struct igb_ring *); 125 static void igb_clean_rx_ring(struct igb_ring *); 126 static void igb_set_rx_mode(struct net_device *); 127 static void igb_update_phy_info(unsigned long); 128 static void igb_watchdog(unsigned long); 129 static void igb_watchdog_task(struct work_struct *); 130 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *); 131 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev, 132 struct rtnl_link_stats64 *stats); 133 static int igb_change_mtu(struct net_device *, int); 134 static int igb_set_mac(struct net_device *, void *); 135 static void igb_set_uta(struct igb_adapter *adapter); 136 static irqreturn_t igb_intr(int irq, void *); 137 static irqreturn_t igb_intr_msi(int irq, void *); 138 static irqreturn_t igb_msix_other(int irq, void *); 139 static irqreturn_t igb_msix_ring(int irq, void *); 140 #ifdef CONFIG_IGB_DCA 141 static void igb_update_dca(struct igb_q_vector *); 142 static void igb_setup_dca(struct igb_adapter *); 143 #endif /* CONFIG_IGB_DCA */ 144 static int igb_poll(struct napi_struct *, int); 145 static bool igb_clean_tx_irq(struct igb_q_vector *); 146 static bool igb_clean_rx_irq(struct igb_q_vector *, int); 147 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd); 148 static void igb_tx_timeout(struct net_device *); 149 static void igb_reset_task(struct work_struct *); 150 static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features); 151 static int igb_vlan_rx_add_vid(struct net_device *, u16); 152 static int igb_vlan_rx_kill_vid(struct net_device *, u16); 153 static void igb_restore_vlan(struct igb_adapter *); 154 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8); 155 static void igb_ping_all_vfs(struct igb_adapter *); 156 static void igb_msg_task(struct igb_adapter *); 157 static void igb_vmm_control(struct igb_adapter *); 158 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *); 159 static void igb_restore_vf_multicasts(struct igb_adapter *adapter); 160 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac); 161 static int igb_ndo_set_vf_vlan(struct net_device *netdev, 162 int vf, u16 vlan, u8 qos); 163 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate); 164 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf, 165 struct ifla_vf_info *ivi); 166 static void igb_check_vf_rate_limit(struct igb_adapter *); 167 168 #ifdef CONFIG_PCI_IOV 169 static int igb_vf_configure(struct igb_adapter *adapter, int vf); 170 static int igb_find_enabled_vfs(struct igb_adapter *adapter); 171 static int igb_check_vf_assignment(struct igb_adapter *adapter); 172 #endif 173 174 #ifdef CONFIG_PM 175 #ifdef CONFIG_PM_SLEEP 176 static int igb_suspend(struct device *); 177 #endif 178 static int igb_resume(struct device *); 179 #ifdef CONFIG_PM_RUNTIME 180 static int igb_runtime_suspend(struct device *dev); 181 static int igb_runtime_resume(struct device *dev); 182 static int igb_runtime_idle(struct device *dev); 183 #endif 184 static const struct dev_pm_ops igb_pm_ops = { 185 SET_SYSTEM_SLEEP_PM_OPS(igb_suspend, igb_resume) 186 SET_RUNTIME_PM_OPS(igb_runtime_suspend, igb_runtime_resume, 187 igb_runtime_idle) 188 }; 189 #endif 190 static void igb_shutdown(struct pci_dev *); 191 #ifdef CONFIG_IGB_DCA 192 static int igb_notify_dca(struct notifier_block *, unsigned long, void *); 193 static struct notifier_block dca_notifier = { 194 .notifier_call = igb_notify_dca, 195 .next = NULL, 196 .priority = 0 197 }; 198 #endif 199 #ifdef CONFIG_NET_POLL_CONTROLLER 200 /* for netdump / net console */ 201 static void igb_netpoll(struct net_device *); 202 #endif 203 #ifdef CONFIG_PCI_IOV 204 static unsigned int max_vfs = 0; 205 module_param(max_vfs, uint, 0); 206 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate " 207 "per physical function"); 208 #endif /* CONFIG_PCI_IOV */ 209 210 static pci_ers_result_t igb_io_error_detected(struct pci_dev *, 211 pci_channel_state_t); 212 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *); 213 static void igb_io_resume(struct pci_dev *); 214 215 static struct pci_error_handlers igb_err_handler = { 216 .error_detected = igb_io_error_detected, 217 .slot_reset = igb_io_slot_reset, 218 .resume = igb_io_resume, 219 }; 220 221 static void igb_init_dmac(struct igb_adapter *adapter, u32 pba); 222 223 static struct pci_driver igb_driver = { 224 .name = igb_driver_name, 225 .id_table = igb_pci_tbl, 226 .probe = igb_probe, 227 .remove = __devexit_p(igb_remove), 228 #ifdef CONFIG_PM 229 .driver.pm = &igb_pm_ops, 230 #endif 231 .shutdown = igb_shutdown, 232 .err_handler = &igb_err_handler 233 }; 234 235 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>"); 236 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver"); 237 MODULE_LICENSE("GPL"); 238 MODULE_VERSION(DRV_VERSION); 239 240 #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK) 241 static int debug = -1; 242 module_param(debug, int, 0); 243 MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)"); 244 245 struct igb_reg_info { 246 u32 ofs; 247 char *name; 248 }; 249 250 static const struct igb_reg_info igb_reg_info_tbl[] = { 251 252 /* General Registers */ 253 {E1000_CTRL, "CTRL"}, 254 {E1000_STATUS, "STATUS"}, 255 {E1000_CTRL_EXT, "CTRL_EXT"}, 256 257 /* Interrupt Registers */ 258 {E1000_ICR, "ICR"}, 259 260 /* RX Registers */ 261 {E1000_RCTL, "RCTL"}, 262 {E1000_RDLEN(0), "RDLEN"}, 263 {E1000_RDH(0), "RDH"}, 264 {E1000_RDT(0), "RDT"}, 265 {E1000_RXDCTL(0), "RXDCTL"}, 266 {E1000_RDBAL(0), "RDBAL"}, 267 {E1000_RDBAH(0), "RDBAH"}, 268 269 /* TX Registers */ 270 {E1000_TCTL, "TCTL"}, 271 {E1000_TDBAL(0), "TDBAL"}, 272 {E1000_TDBAH(0), "TDBAH"}, 273 {E1000_TDLEN(0), "TDLEN"}, 274 {E1000_TDH(0), "TDH"}, 275 {E1000_TDT(0), "TDT"}, 276 {E1000_TXDCTL(0), "TXDCTL"}, 277 {E1000_TDFH, "TDFH"}, 278 {E1000_TDFT, "TDFT"}, 279 {E1000_TDFHS, "TDFHS"}, 280 {E1000_TDFPC, "TDFPC"}, 281 282 /* List Terminator */ 283 {} 284 }; 285 286 /* 287 * igb_regdump - register printout routine 288 */ 289 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo) 290 { 291 int n = 0; 292 char rname[16]; 293 u32 regs[8]; 294 295 switch (reginfo->ofs) { 296 case E1000_RDLEN(0): 297 for (n = 0; n < 4; n++) 298 regs[n] = rd32(E1000_RDLEN(n)); 299 break; 300 case E1000_RDH(0): 301 for (n = 0; n < 4; n++) 302 regs[n] = rd32(E1000_RDH(n)); 303 break; 304 case E1000_RDT(0): 305 for (n = 0; n < 4; n++) 306 regs[n] = rd32(E1000_RDT(n)); 307 break; 308 case E1000_RXDCTL(0): 309 for (n = 0; n < 4; n++) 310 regs[n] = rd32(E1000_RXDCTL(n)); 311 break; 312 case E1000_RDBAL(0): 313 for (n = 0; n < 4; n++) 314 regs[n] = rd32(E1000_RDBAL(n)); 315 break; 316 case E1000_RDBAH(0): 317 for (n = 0; n < 4; n++) 318 regs[n] = rd32(E1000_RDBAH(n)); 319 break; 320 case E1000_TDBAL(0): 321 for (n = 0; n < 4; n++) 322 regs[n] = rd32(E1000_RDBAL(n)); 323 break; 324 case E1000_TDBAH(0): 325 for (n = 0; n < 4; n++) 326 regs[n] = rd32(E1000_TDBAH(n)); 327 break; 328 case E1000_TDLEN(0): 329 for (n = 0; n < 4; n++) 330 regs[n] = rd32(E1000_TDLEN(n)); 331 break; 332 case E1000_TDH(0): 333 for (n = 0; n < 4; n++) 334 regs[n] = rd32(E1000_TDH(n)); 335 break; 336 case E1000_TDT(0): 337 for (n = 0; n < 4; n++) 338 regs[n] = rd32(E1000_TDT(n)); 339 break; 340 case E1000_TXDCTL(0): 341 for (n = 0; n < 4; n++) 342 regs[n] = rd32(E1000_TXDCTL(n)); 343 break; 344 default: 345 pr_info("%-15s %08x\n", reginfo->name, rd32(reginfo->ofs)); 346 return; 347 } 348 349 snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]"); 350 pr_info("%-15s %08x %08x %08x %08x\n", rname, regs[0], regs[1], 351 regs[2], regs[3]); 352 } 353 354 /* 355 * igb_dump - Print registers, tx-rings and rx-rings 356 */ 357 static void igb_dump(struct igb_adapter *adapter) 358 { 359 struct net_device *netdev = adapter->netdev; 360 struct e1000_hw *hw = &adapter->hw; 361 struct igb_reg_info *reginfo; 362 struct igb_ring *tx_ring; 363 union e1000_adv_tx_desc *tx_desc; 364 struct my_u0 { u64 a; u64 b; } *u0; 365 struct igb_ring *rx_ring; 366 union e1000_adv_rx_desc *rx_desc; 367 u32 staterr; 368 u16 i, n; 369 370 if (!netif_msg_hw(adapter)) 371 return; 372 373 /* Print netdevice Info */ 374 if (netdev) { 375 dev_info(&adapter->pdev->dev, "Net device Info\n"); 376 pr_info("Device Name state trans_start " 377 "last_rx\n"); 378 pr_info("%-15s %016lX %016lX %016lX\n", netdev->name, 379 netdev->state, netdev->trans_start, netdev->last_rx); 380 } 381 382 /* Print Registers */ 383 dev_info(&adapter->pdev->dev, "Register Dump\n"); 384 pr_info(" Register Name Value\n"); 385 for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl; 386 reginfo->name; reginfo++) { 387 igb_regdump(hw, reginfo); 388 } 389 390 /* Print TX Ring Summary */ 391 if (!netdev || !netif_running(netdev)) 392 goto exit; 393 394 dev_info(&adapter->pdev->dev, "TX Rings Summary\n"); 395 pr_info("Queue [NTU] [NTC] [bi(ntc)->dma ] leng ntw timestamp\n"); 396 for (n = 0; n < adapter->num_tx_queues; n++) { 397 struct igb_tx_buffer *buffer_info; 398 tx_ring = adapter->tx_ring[n]; 399 buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean]; 400 pr_info(" %5d %5X %5X %016llX %04X %p %016llX\n", 401 n, tx_ring->next_to_use, tx_ring->next_to_clean, 402 (u64)buffer_info->dma, 403 buffer_info->length, 404 buffer_info->next_to_watch, 405 (u64)buffer_info->time_stamp); 406 } 407 408 /* Print TX Rings */ 409 if (!netif_msg_tx_done(adapter)) 410 goto rx_ring_summary; 411 412 dev_info(&adapter->pdev->dev, "TX Rings Dump\n"); 413 414 /* Transmit Descriptor Formats 415 * 416 * Advanced Transmit Descriptor 417 * +--------------------------------------------------------------+ 418 * 0 | Buffer Address [63:0] | 419 * +--------------------------------------------------------------+ 420 * 8 | PAYLEN | PORTS |CC|IDX | STA | DCMD |DTYP|MAC|RSV| DTALEN | 421 * +--------------------------------------------------------------+ 422 * 63 46 45 40 39 38 36 35 32 31 24 15 0 423 */ 424 425 for (n = 0; n < adapter->num_tx_queues; n++) { 426 tx_ring = adapter->tx_ring[n]; 427 pr_info("------------------------------------\n"); 428 pr_info("TX QUEUE INDEX = %d\n", tx_ring->queue_index); 429 pr_info("------------------------------------\n"); 430 pr_info("T [desc] [address 63:0 ] [PlPOCIStDDM Ln] " 431 "[bi->dma ] leng ntw timestamp " 432 "bi->skb\n"); 433 434 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) { 435 const char *next_desc; 436 struct igb_tx_buffer *buffer_info; 437 tx_desc = IGB_TX_DESC(tx_ring, i); 438 buffer_info = &tx_ring->tx_buffer_info[i]; 439 u0 = (struct my_u0 *)tx_desc; 440 if (i == tx_ring->next_to_use && 441 i == tx_ring->next_to_clean) 442 next_desc = " NTC/U"; 443 else if (i == tx_ring->next_to_use) 444 next_desc = " NTU"; 445 else if (i == tx_ring->next_to_clean) 446 next_desc = " NTC"; 447 else 448 next_desc = ""; 449 450 pr_info("T [0x%03X] %016llX %016llX %016llX" 451 " %04X %p %016llX %p%s\n", i, 452 le64_to_cpu(u0->a), 453 le64_to_cpu(u0->b), 454 (u64)buffer_info->dma, 455 buffer_info->length, 456 buffer_info->next_to_watch, 457 (u64)buffer_info->time_stamp, 458 buffer_info->skb, next_desc); 459 460 if (netif_msg_pktdata(adapter) && buffer_info->dma != 0) 461 print_hex_dump(KERN_INFO, "", 462 DUMP_PREFIX_ADDRESS, 463 16, 1, phys_to_virt(buffer_info->dma), 464 buffer_info->length, true); 465 } 466 } 467 468 /* Print RX Rings Summary */ 469 rx_ring_summary: 470 dev_info(&adapter->pdev->dev, "RX Rings Summary\n"); 471 pr_info("Queue [NTU] [NTC]\n"); 472 for (n = 0; n < adapter->num_rx_queues; n++) { 473 rx_ring = adapter->rx_ring[n]; 474 pr_info(" %5d %5X %5X\n", 475 n, rx_ring->next_to_use, rx_ring->next_to_clean); 476 } 477 478 /* Print RX Rings */ 479 if (!netif_msg_rx_status(adapter)) 480 goto exit; 481 482 dev_info(&adapter->pdev->dev, "RX Rings Dump\n"); 483 484 /* Advanced Receive Descriptor (Read) Format 485 * 63 1 0 486 * +-----------------------------------------------------+ 487 * 0 | Packet Buffer Address [63:1] |A0/NSE| 488 * +----------------------------------------------+------+ 489 * 8 | Header Buffer Address [63:1] | DD | 490 * +-----------------------------------------------------+ 491 * 492 * 493 * Advanced Receive Descriptor (Write-Back) Format 494 * 495 * 63 48 47 32 31 30 21 20 17 16 4 3 0 496 * +------------------------------------------------------+ 497 * 0 | Packet IP |SPH| HDR_LEN | RSV|Packet| RSS | 498 * | Checksum Ident | | | | Type | Type | 499 * +------------------------------------------------------+ 500 * 8 | VLAN Tag | Length | Extended Error | Extended Status | 501 * +------------------------------------------------------+ 502 * 63 48 47 32 31 20 19 0 503 */ 504 505 for (n = 0; n < adapter->num_rx_queues; n++) { 506 rx_ring = adapter->rx_ring[n]; 507 pr_info("------------------------------------\n"); 508 pr_info("RX QUEUE INDEX = %d\n", rx_ring->queue_index); 509 pr_info("------------------------------------\n"); 510 pr_info("R [desc] [ PktBuf A0] [ HeadBuf DD] " 511 "[bi->dma ] [bi->skb] <-- Adv Rx Read format\n"); 512 pr_info("RWB[desc] [PcsmIpSHl PtRs] [vl er S cks ln] -----" 513 "----------- [bi->skb] <-- Adv Rx Write-Back format\n"); 514 515 for (i = 0; i < rx_ring->count; i++) { 516 const char *next_desc; 517 struct igb_rx_buffer *buffer_info; 518 buffer_info = &rx_ring->rx_buffer_info[i]; 519 rx_desc = IGB_RX_DESC(rx_ring, i); 520 u0 = (struct my_u0 *)rx_desc; 521 staterr = le32_to_cpu(rx_desc->wb.upper.status_error); 522 523 if (i == rx_ring->next_to_use) 524 next_desc = " NTU"; 525 else if (i == rx_ring->next_to_clean) 526 next_desc = " NTC"; 527 else 528 next_desc = ""; 529 530 if (staterr & E1000_RXD_STAT_DD) { 531 /* Descriptor Done */ 532 pr_info("%s[0x%03X] %016llX %016llX -------" 533 "--------- %p%s\n", "RWB", i, 534 le64_to_cpu(u0->a), 535 le64_to_cpu(u0->b), 536 buffer_info->skb, next_desc); 537 } else { 538 pr_info("%s[0x%03X] %016llX %016llX %016llX" 539 " %p%s\n", "R ", i, 540 le64_to_cpu(u0->a), 541 le64_to_cpu(u0->b), 542 (u64)buffer_info->dma, 543 buffer_info->skb, next_desc); 544 545 if (netif_msg_pktdata(adapter)) { 546 print_hex_dump(KERN_INFO, "", 547 DUMP_PREFIX_ADDRESS, 548 16, 1, 549 phys_to_virt(buffer_info->dma), 550 IGB_RX_HDR_LEN, true); 551 print_hex_dump(KERN_INFO, "", 552 DUMP_PREFIX_ADDRESS, 553 16, 1, 554 phys_to_virt( 555 buffer_info->page_dma + 556 buffer_info->page_offset), 557 PAGE_SIZE/2, true); 558 } 559 } 560 } 561 } 562 563 exit: 564 return; 565 } 566 567 /** 568 * igb_get_hw_dev - return device 569 * used by hardware layer to print debugging information 570 **/ 571 struct net_device *igb_get_hw_dev(struct e1000_hw *hw) 572 { 573 struct igb_adapter *adapter = hw->back; 574 return adapter->netdev; 575 } 576 577 /** 578 * igb_init_module - Driver Registration Routine 579 * 580 * igb_init_module is the first routine called when the driver is 581 * loaded. All it does is register with the PCI subsystem. 582 **/ 583 static int __init igb_init_module(void) 584 { 585 int ret; 586 pr_info("%s - version %s\n", 587 igb_driver_string, igb_driver_version); 588 589 pr_info("%s\n", igb_copyright); 590 591 #ifdef CONFIG_IGB_DCA 592 dca_register_notify(&dca_notifier); 593 #endif 594 ret = pci_register_driver(&igb_driver); 595 return ret; 596 } 597 598 module_init(igb_init_module); 599 600 /** 601 * igb_exit_module - Driver Exit Cleanup Routine 602 * 603 * igb_exit_module is called just before the driver is removed 604 * from memory. 605 **/ 606 static void __exit igb_exit_module(void) 607 { 608 #ifdef CONFIG_IGB_DCA 609 dca_unregister_notify(&dca_notifier); 610 #endif 611 pci_unregister_driver(&igb_driver); 612 } 613 614 module_exit(igb_exit_module); 615 616 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1)) 617 /** 618 * igb_cache_ring_register - Descriptor ring to register mapping 619 * @adapter: board private structure to initialize 620 * 621 * Once we know the feature-set enabled for the device, we'll cache 622 * the register offset the descriptor ring is assigned to. 623 **/ 624 static void igb_cache_ring_register(struct igb_adapter *adapter) 625 { 626 int i = 0, j = 0; 627 u32 rbase_offset = adapter->vfs_allocated_count; 628 629 switch (adapter->hw.mac.type) { 630 case e1000_82576: 631 /* The queues are allocated for virtualization such that VF 0 632 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc. 633 * In order to avoid collision we start at the first free queue 634 * and continue consuming queues in the same sequence 635 */ 636 if (adapter->vfs_allocated_count) { 637 for (; i < adapter->rss_queues; i++) 638 adapter->rx_ring[i]->reg_idx = rbase_offset + 639 Q_IDX_82576(i); 640 } 641 case e1000_82575: 642 case e1000_82580: 643 case e1000_i350: 644 default: 645 for (; i < adapter->num_rx_queues; i++) 646 adapter->rx_ring[i]->reg_idx = rbase_offset + i; 647 for (; j < adapter->num_tx_queues; j++) 648 adapter->tx_ring[j]->reg_idx = rbase_offset + j; 649 break; 650 } 651 } 652 653 static void igb_free_queues(struct igb_adapter *adapter) 654 { 655 int i; 656 657 for (i = 0; i < adapter->num_tx_queues; i++) { 658 kfree(adapter->tx_ring[i]); 659 adapter->tx_ring[i] = NULL; 660 } 661 for (i = 0; i < adapter->num_rx_queues; i++) { 662 kfree(adapter->rx_ring[i]); 663 adapter->rx_ring[i] = NULL; 664 } 665 adapter->num_rx_queues = 0; 666 adapter->num_tx_queues = 0; 667 } 668 669 /** 670 * igb_alloc_queues - Allocate memory for all rings 671 * @adapter: board private structure to initialize 672 * 673 * We allocate one ring per queue at run-time since we don't know the 674 * number of queues at compile-time. 675 **/ 676 static int igb_alloc_queues(struct igb_adapter *adapter) 677 { 678 struct igb_ring *ring; 679 int i; 680 int orig_node = adapter->node; 681 682 for (i = 0; i < adapter->num_tx_queues; i++) { 683 if (orig_node == -1) { 684 int cur_node = next_online_node(adapter->node); 685 if (cur_node == MAX_NUMNODES) 686 cur_node = first_online_node; 687 adapter->node = cur_node; 688 } 689 ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL, 690 adapter->node); 691 if (!ring) 692 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL); 693 if (!ring) 694 goto err; 695 ring->count = adapter->tx_ring_count; 696 ring->queue_index = i; 697 ring->dev = &adapter->pdev->dev; 698 ring->netdev = adapter->netdev; 699 ring->numa_node = adapter->node; 700 /* For 82575, context index must be unique per ring. */ 701 if (adapter->hw.mac.type == e1000_82575) 702 set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags); 703 adapter->tx_ring[i] = ring; 704 } 705 /* Restore the adapter's original node */ 706 adapter->node = orig_node; 707 708 for (i = 0; i < adapter->num_rx_queues; i++) { 709 if (orig_node == -1) { 710 int cur_node = next_online_node(adapter->node); 711 if (cur_node == MAX_NUMNODES) 712 cur_node = first_online_node; 713 adapter->node = cur_node; 714 } 715 ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL, 716 adapter->node); 717 if (!ring) 718 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL); 719 if (!ring) 720 goto err; 721 ring->count = adapter->rx_ring_count; 722 ring->queue_index = i; 723 ring->dev = &adapter->pdev->dev; 724 ring->netdev = adapter->netdev; 725 ring->numa_node = adapter->node; 726 /* set flag indicating ring supports SCTP checksum offload */ 727 if (adapter->hw.mac.type >= e1000_82576) 728 set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags); 729 730 /* On i350, loopback VLAN packets have the tag byte-swapped. */ 731 if (adapter->hw.mac.type == e1000_i350) 732 set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags); 733 734 adapter->rx_ring[i] = ring; 735 } 736 /* Restore the adapter's original node */ 737 adapter->node = orig_node; 738 739 igb_cache_ring_register(adapter); 740 741 return 0; 742 743 err: 744 /* Restore the adapter's original node */ 745 adapter->node = orig_node; 746 igb_free_queues(adapter); 747 748 return -ENOMEM; 749 } 750 751 /** 752 * igb_write_ivar - configure ivar for given MSI-X vector 753 * @hw: pointer to the HW structure 754 * @msix_vector: vector number we are allocating to a given ring 755 * @index: row index of IVAR register to write within IVAR table 756 * @offset: column offset of in IVAR, should be multiple of 8 757 * 758 * This function is intended to handle the writing of the IVAR register 759 * for adapters 82576 and newer. The IVAR table consists of 2 columns, 760 * each containing an cause allocation for an Rx and Tx ring, and a 761 * variable number of rows depending on the number of queues supported. 762 **/ 763 static void igb_write_ivar(struct e1000_hw *hw, int msix_vector, 764 int index, int offset) 765 { 766 u32 ivar = array_rd32(E1000_IVAR0, index); 767 768 /* clear any bits that are currently set */ 769 ivar &= ~((u32)0xFF << offset); 770 771 /* write vector and valid bit */ 772 ivar |= (msix_vector | E1000_IVAR_VALID) << offset; 773 774 array_wr32(E1000_IVAR0, index, ivar); 775 } 776 777 #define IGB_N0_QUEUE -1 778 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector) 779 { 780 struct igb_adapter *adapter = q_vector->adapter; 781 struct e1000_hw *hw = &adapter->hw; 782 int rx_queue = IGB_N0_QUEUE; 783 int tx_queue = IGB_N0_QUEUE; 784 u32 msixbm = 0; 785 786 if (q_vector->rx.ring) 787 rx_queue = q_vector->rx.ring->reg_idx; 788 if (q_vector->tx.ring) 789 tx_queue = q_vector->tx.ring->reg_idx; 790 791 switch (hw->mac.type) { 792 case e1000_82575: 793 /* The 82575 assigns vectors using a bitmask, which matches the 794 bitmask for the EICR/EIMS/EIMC registers. To assign one 795 or more queues to a vector, we write the appropriate bits 796 into the MSIXBM register for that vector. */ 797 if (rx_queue > IGB_N0_QUEUE) 798 msixbm = E1000_EICR_RX_QUEUE0 << rx_queue; 799 if (tx_queue > IGB_N0_QUEUE) 800 msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue; 801 if (!adapter->msix_entries && msix_vector == 0) 802 msixbm |= E1000_EIMS_OTHER; 803 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm); 804 q_vector->eims_value = msixbm; 805 break; 806 case e1000_82576: 807 /* 808 * 82576 uses a table that essentially consists of 2 columns 809 * with 8 rows. The ordering is column-major so we use the 810 * lower 3 bits as the row index, and the 4th bit as the 811 * column offset. 812 */ 813 if (rx_queue > IGB_N0_QUEUE) 814 igb_write_ivar(hw, msix_vector, 815 rx_queue & 0x7, 816 (rx_queue & 0x8) << 1); 817 if (tx_queue > IGB_N0_QUEUE) 818 igb_write_ivar(hw, msix_vector, 819 tx_queue & 0x7, 820 ((tx_queue & 0x8) << 1) + 8); 821 q_vector->eims_value = 1 << msix_vector; 822 break; 823 case e1000_82580: 824 case e1000_i350: 825 /* 826 * On 82580 and newer adapters the scheme is similar to 82576 827 * however instead of ordering column-major we have things 828 * ordered row-major. So we traverse the table by using 829 * bit 0 as the column offset, and the remaining bits as the 830 * row index. 831 */ 832 if (rx_queue > IGB_N0_QUEUE) 833 igb_write_ivar(hw, msix_vector, 834 rx_queue >> 1, 835 (rx_queue & 0x1) << 4); 836 if (tx_queue > IGB_N0_QUEUE) 837 igb_write_ivar(hw, msix_vector, 838 tx_queue >> 1, 839 ((tx_queue & 0x1) << 4) + 8); 840 q_vector->eims_value = 1 << msix_vector; 841 break; 842 default: 843 BUG(); 844 break; 845 } 846 847 /* add q_vector eims value to global eims_enable_mask */ 848 adapter->eims_enable_mask |= q_vector->eims_value; 849 850 /* configure q_vector to set itr on first interrupt */ 851 q_vector->set_itr = 1; 852 } 853 854 /** 855 * igb_configure_msix - Configure MSI-X hardware 856 * 857 * igb_configure_msix sets up the hardware to properly 858 * generate MSI-X interrupts. 859 **/ 860 static void igb_configure_msix(struct igb_adapter *adapter) 861 { 862 u32 tmp; 863 int i, vector = 0; 864 struct e1000_hw *hw = &adapter->hw; 865 866 adapter->eims_enable_mask = 0; 867 868 /* set vector for other causes, i.e. link changes */ 869 switch (hw->mac.type) { 870 case e1000_82575: 871 tmp = rd32(E1000_CTRL_EXT); 872 /* enable MSI-X PBA support*/ 873 tmp |= E1000_CTRL_EXT_PBA_CLR; 874 875 /* Auto-Mask interrupts upon ICR read. */ 876 tmp |= E1000_CTRL_EXT_EIAME; 877 tmp |= E1000_CTRL_EXT_IRCA; 878 879 wr32(E1000_CTRL_EXT, tmp); 880 881 /* enable msix_other interrupt */ 882 array_wr32(E1000_MSIXBM(0), vector++, 883 E1000_EIMS_OTHER); 884 adapter->eims_other = E1000_EIMS_OTHER; 885 886 break; 887 888 case e1000_82576: 889 case e1000_82580: 890 case e1000_i350: 891 /* Turn on MSI-X capability first, or our settings 892 * won't stick. And it will take days to debug. */ 893 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE | 894 E1000_GPIE_PBA | E1000_GPIE_EIAME | 895 E1000_GPIE_NSICR); 896 897 /* enable msix_other interrupt */ 898 adapter->eims_other = 1 << vector; 899 tmp = (vector++ | E1000_IVAR_VALID) << 8; 900 901 wr32(E1000_IVAR_MISC, tmp); 902 break; 903 default: 904 /* do nothing, since nothing else supports MSI-X */ 905 break; 906 } /* switch (hw->mac.type) */ 907 908 adapter->eims_enable_mask |= adapter->eims_other; 909 910 for (i = 0; i < adapter->num_q_vectors; i++) 911 igb_assign_vector(adapter->q_vector[i], vector++); 912 913 wrfl(); 914 } 915 916 /** 917 * igb_request_msix - Initialize MSI-X interrupts 918 * 919 * igb_request_msix allocates MSI-X vectors and requests interrupts from the 920 * kernel. 921 **/ 922 static int igb_request_msix(struct igb_adapter *adapter) 923 { 924 struct net_device *netdev = adapter->netdev; 925 struct e1000_hw *hw = &adapter->hw; 926 int i, err = 0, vector = 0; 927 928 err = request_irq(adapter->msix_entries[vector].vector, 929 igb_msix_other, 0, netdev->name, adapter); 930 if (err) 931 goto out; 932 vector++; 933 934 for (i = 0; i < adapter->num_q_vectors; i++) { 935 struct igb_q_vector *q_vector = adapter->q_vector[i]; 936 937 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector); 938 939 if (q_vector->rx.ring && q_vector->tx.ring) 940 sprintf(q_vector->name, "%s-TxRx-%u", netdev->name, 941 q_vector->rx.ring->queue_index); 942 else if (q_vector->tx.ring) 943 sprintf(q_vector->name, "%s-tx-%u", netdev->name, 944 q_vector->tx.ring->queue_index); 945 else if (q_vector->rx.ring) 946 sprintf(q_vector->name, "%s-rx-%u", netdev->name, 947 q_vector->rx.ring->queue_index); 948 else 949 sprintf(q_vector->name, "%s-unused", netdev->name); 950 951 err = request_irq(adapter->msix_entries[vector].vector, 952 igb_msix_ring, 0, q_vector->name, 953 q_vector); 954 if (err) 955 goto out; 956 vector++; 957 } 958 959 igb_configure_msix(adapter); 960 return 0; 961 out: 962 return err; 963 } 964 965 static void igb_reset_interrupt_capability(struct igb_adapter *adapter) 966 { 967 if (adapter->msix_entries) { 968 pci_disable_msix(adapter->pdev); 969 kfree(adapter->msix_entries); 970 adapter->msix_entries = NULL; 971 } else if (adapter->flags & IGB_FLAG_HAS_MSI) { 972 pci_disable_msi(adapter->pdev); 973 } 974 } 975 976 /** 977 * igb_free_q_vectors - Free memory allocated for interrupt vectors 978 * @adapter: board private structure to initialize 979 * 980 * This function frees the memory allocated to the q_vectors. In addition if 981 * NAPI is enabled it will delete any references to the NAPI struct prior 982 * to freeing the q_vector. 983 **/ 984 static void igb_free_q_vectors(struct igb_adapter *adapter) 985 { 986 int v_idx; 987 988 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) { 989 struct igb_q_vector *q_vector = adapter->q_vector[v_idx]; 990 adapter->q_vector[v_idx] = NULL; 991 if (!q_vector) 992 continue; 993 netif_napi_del(&q_vector->napi); 994 kfree(q_vector); 995 } 996 adapter->num_q_vectors = 0; 997 } 998 999 /** 1000 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts 1001 * 1002 * This function resets the device so that it has 0 rx queues, tx queues, and 1003 * MSI-X interrupts allocated. 1004 */ 1005 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter) 1006 { 1007 igb_free_queues(adapter); 1008 igb_free_q_vectors(adapter); 1009 igb_reset_interrupt_capability(adapter); 1010 } 1011 1012 /** 1013 * igb_set_interrupt_capability - set MSI or MSI-X if supported 1014 * 1015 * Attempt to configure interrupts using the best available 1016 * capabilities of the hardware and kernel. 1017 **/ 1018 static int igb_set_interrupt_capability(struct igb_adapter *adapter) 1019 { 1020 int err; 1021 int numvecs, i; 1022 1023 /* Number of supported queues. */ 1024 adapter->num_rx_queues = adapter->rss_queues; 1025 if (adapter->vfs_allocated_count) 1026 adapter->num_tx_queues = 1; 1027 else 1028 adapter->num_tx_queues = adapter->rss_queues; 1029 1030 /* start with one vector for every rx queue */ 1031 numvecs = adapter->num_rx_queues; 1032 1033 /* if tx handler is separate add 1 for every tx queue */ 1034 if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS)) 1035 numvecs += adapter->num_tx_queues; 1036 1037 /* store the number of vectors reserved for queues */ 1038 adapter->num_q_vectors = numvecs; 1039 1040 /* add 1 vector for link status interrupts */ 1041 numvecs++; 1042 adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry), 1043 GFP_KERNEL); 1044 if (!adapter->msix_entries) 1045 goto msi_only; 1046 1047 for (i = 0; i < numvecs; i++) 1048 adapter->msix_entries[i].entry = i; 1049 1050 err = pci_enable_msix(adapter->pdev, 1051 adapter->msix_entries, 1052 numvecs); 1053 if (err == 0) 1054 goto out; 1055 1056 igb_reset_interrupt_capability(adapter); 1057 1058 /* If we can't do MSI-X, try MSI */ 1059 msi_only: 1060 #ifdef CONFIG_PCI_IOV 1061 /* disable SR-IOV for non MSI-X configurations */ 1062 if (adapter->vf_data) { 1063 struct e1000_hw *hw = &adapter->hw; 1064 /* disable iov and allow time for transactions to clear */ 1065 pci_disable_sriov(adapter->pdev); 1066 msleep(500); 1067 1068 kfree(adapter->vf_data); 1069 adapter->vf_data = NULL; 1070 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ); 1071 wrfl(); 1072 msleep(100); 1073 dev_info(&adapter->pdev->dev, "IOV Disabled\n"); 1074 } 1075 #endif 1076 adapter->vfs_allocated_count = 0; 1077 adapter->rss_queues = 1; 1078 adapter->flags |= IGB_FLAG_QUEUE_PAIRS; 1079 adapter->num_rx_queues = 1; 1080 adapter->num_tx_queues = 1; 1081 adapter->num_q_vectors = 1; 1082 if (!pci_enable_msi(adapter->pdev)) 1083 adapter->flags |= IGB_FLAG_HAS_MSI; 1084 out: 1085 /* Notify the stack of the (possibly) reduced queue counts. */ 1086 netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues); 1087 return netif_set_real_num_rx_queues(adapter->netdev, 1088 adapter->num_rx_queues); 1089 } 1090 1091 /** 1092 * igb_alloc_q_vectors - Allocate memory for interrupt vectors 1093 * @adapter: board private structure to initialize 1094 * 1095 * We allocate one q_vector per queue interrupt. If allocation fails we 1096 * return -ENOMEM. 1097 **/ 1098 static int igb_alloc_q_vectors(struct igb_adapter *adapter) 1099 { 1100 struct igb_q_vector *q_vector; 1101 struct e1000_hw *hw = &adapter->hw; 1102 int v_idx; 1103 int orig_node = adapter->node; 1104 1105 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) { 1106 if ((adapter->num_q_vectors == (adapter->num_rx_queues + 1107 adapter->num_tx_queues)) && 1108 (adapter->num_rx_queues == v_idx)) 1109 adapter->node = orig_node; 1110 if (orig_node == -1) { 1111 int cur_node = next_online_node(adapter->node); 1112 if (cur_node == MAX_NUMNODES) 1113 cur_node = first_online_node; 1114 adapter->node = cur_node; 1115 } 1116 q_vector = kzalloc_node(sizeof(struct igb_q_vector), GFP_KERNEL, 1117 adapter->node); 1118 if (!q_vector) 1119 q_vector = kzalloc(sizeof(struct igb_q_vector), 1120 GFP_KERNEL); 1121 if (!q_vector) 1122 goto err_out; 1123 q_vector->adapter = adapter; 1124 q_vector->itr_register = hw->hw_addr + E1000_EITR(0); 1125 q_vector->itr_val = IGB_START_ITR; 1126 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64); 1127 adapter->q_vector[v_idx] = q_vector; 1128 } 1129 /* Restore the adapter's original node */ 1130 adapter->node = orig_node; 1131 1132 return 0; 1133 1134 err_out: 1135 /* Restore the adapter's original node */ 1136 adapter->node = orig_node; 1137 igb_free_q_vectors(adapter); 1138 return -ENOMEM; 1139 } 1140 1141 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter, 1142 int ring_idx, int v_idx) 1143 { 1144 struct igb_q_vector *q_vector = adapter->q_vector[v_idx]; 1145 1146 q_vector->rx.ring = adapter->rx_ring[ring_idx]; 1147 q_vector->rx.ring->q_vector = q_vector; 1148 q_vector->rx.count++; 1149 q_vector->itr_val = adapter->rx_itr_setting; 1150 if (q_vector->itr_val && q_vector->itr_val <= 3) 1151 q_vector->itr_val = IGB_START_ITR; 1152 } 1153 1154 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter, 1155 int ring_idx, int v_idx) 1156 { 1157 struct igb_q_vector *q_vector = adapter->q_vector[v_idx]; 1158 1159 q_vector->tx.ring = adapter->tx_ring[ring_idx]; 1160 q_vector->tx.ring->q_vector = q_vector; 1161 q_vector->tx.count++; 1162 q_vector->itr_val = adapter->tx_itr_setting; 1163 q_vector->tx.work_limit = adapter->tx_work_limit; 1164 if (q_vector->itr_val && q_vector->itr_val <= 3) 1165 q_vector->itr_val = IGB_START_ITR; 1166 } 1167 1168 /** 1169 * igb_map_ring_to_vector - maps allocated queues to vectors 1170 * 1171 * This function maps the recently allocated queues to vectors. 1172 **/ 1173 static int igb_map_ring_to_vector(struct igb_adapter *adapter) 1174 { 1175 int i; 1176 int v_idx = 0; 1177 1178 if ((adapter->num_q_vectors < adapter->num_rx_queues) || 1179 (adapter->num_q_vectors < adapter->num_tx_queues)) 1180 return -ENOMEM; 1181 1182 if (adapter->num_q_vectors >= 1183 (adapter->num_rx_queues + adapter->num_tx_queues)) { 1184 for (i = 0; i < adapter->num_rx_queues; i++) 1185 igb_map_rx_ring_to_vector(adapter, i, v_idx++); 1186 for (i = 0; i < adapter->num_tx_queues; i++) 1187 igb_map_tx_ring_to_vector(adapter, i, v_idx++); 1188 } else { 1189 for (i = 0; i < adapter->num_rx_queues; i++) { 1190 if (i < adapter->num_tx_queues) 1191 igb_map_tx_ring_to_vector(adapter, i, v_idx); 1192 igb_map_rx_ring_to_vector(adapter, i, v_idx++); 1193 } 1194 for (; i < adapter->num_tx_queues; i++) 1195 igb_map_tx_ring_to_vector(adapter, i, v_idx++); 1196 } 1197 return 0; 1198 } 1199 1200 /** 1201 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors 1202 * 1203 * This function initializes the interrupts and allocates all of the queues. 1204 **/ 1205 static int igb_init_interrupt_scheme(struct igb_adapter *adapter) 1206 { 1207 struct pci_dev *pdev = adapter->pdev; 1208 int err; 1209 1210 err = igb_set_interrupt_capability(adapter); 1211 if (err) 1212 return err; 1213 1214 err = igb_alloc_q_vectors(adapter); 1215 if (err) { 1216 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n"); 1217 goto err_alloc_q_vectors; 1218 } 1219 1220 err = igb_alloc_queues(adapter); 1221 if (err) { 1222 dev_err(&pdev->dev, "Unable to allocate memory for queues\n"); 1223 goto err_alloc_queues; 1224 } 1225 1226 err = igb_map_ring_to_vector(adapter); 1227 if (err) { 1228 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n"); 1229 goto err_map_queues; 1230 } 1231 1232 1233 return 0; 1234 err_map_queues: 1235 igb_free_queues(adapter); 1236 err_alloc_queues: 1237 igb_free_q_vectors(adapter); 1238 err_alloc_q_vectors: 1239 igb_reset_interrupt_capability(adapter); 1240 return err; 1241 } 1242 1243 /** 1244 * igb_request_irq - initialize interrupts 1245 * 1246 * Attempts to configure interrupts using the best available 1247 * capabilities of the hardware and kernel. 1248 **/ 1249 static int igb_request_irq(struct igb_adapter *adapter) 1250 { 1251 struct net_device *netdev = adapter->netdev; 1252 struct pci_dev *pdev = adapter->pdev; 1253 int err = 0; 1254 1255 if (adapter->msix_entries) { 1256 err = igb_request_msix(adapter); 1257 if (!err) 1258 goto request_done; 1259 /* fall back to MSI */ 1260 igb_clear_interrupt_scheme(adapter); 1261 if (!pci_enable_msi(pdev)) 1262 adapter->flags |= IGB_FLAG_HAS_MSI; 1263 igb_free_all_tx_resources(adapter); 1264 igb_free_all_rx_resources(adapter); 1265 adapter->num_tx_queues = 1; 1266 adapter->num_rx_queues = 1; 1267 adapter->num_q_vectors = 1; 1268 err = igb_alloc_q_vectors(adapter); 1269 if (err) { 1270 dev_err(&pdev->dev, 1271 "Unable to allocate memory for vectors\n"); 1272 goto request_done; 1273 } 1274 err = igb_alloc_queues(adapter); 1275 if (err) { 1276 dev_err(&pdev->dev, 1277 "Unable to allocate memory for queues\n"); 1278 igb_free_q_vectors(adapter); 1279 goto request_done; 1280 } 1281 igb_setup_all_tx_resources(adapter); 1282 igb_setup_all_rx_resources(adapter); 1283 } 1284 1285 igb_assign_vector(adapter->q_vector[0], 0); 1286 1287 if (adapter->flags & IGB_FLAG_HAS_MSI) { 1288 err = request_irq(pdev->irq, igb_intr_msi, 0, 1289 netdev->name, adapter); 1290 if (!err) 1291 goto request_done; 1292 1293 /* fall back to legacy interrupts */ 1294 igb_reset_interrupt_capability(adapter); 1295 adapter->flags &= ~IGB_FLAG_HAS_MSI; 1296 } 1297 1298 err = request_irq(pdev->irq, igb_intr, IRQF_SHARED, 1299 netdev->name, adapter); 1300 1301 if (err) 1302 dev_err(&pdev->dev, "Error %d getting interrupt\n", 1303 err); 1304 1305 request_done: 1306 return err; 1307 } 1308 1309 static void igb_free_irq(struct igb_adapter *adapter) 1310 { 1311 if (adapter->msix_entries) { 1312 int vector = 0, i; 1313 1314 free_irq(adapter->msix_entries[vector++].vector, adapter); 1315 1316 for (i = 0; i < adapter->num_q_vectors; i++) 1317 free_irq(adapter->msix_entries[vector++].vector, 1318 adapter->q_vector[i]); 1319 } else { 1320 free_irq(adapter->pdev->irq, adapter); 1321 } 1322 } 1323 1324 /** 1325 * igb_irq_disable - Mask off interrupt generation on the NIC 1326 * @adapter: board private structure 1327 **/ 1328 static void igb_irq_disable(struct igb_adapter *adapter) 1329 { 1330 struct e1000_hw *hw = &adapter->hw; 1331 1332 /* 1333 * we need to be careful when disabling interrupts. The VFs are also 1334 * mapped into these registers and so clearing the bits can cause 1335 * issues on the VF drivers so we only need to clear what we set 1336 */ 1337 if (adapter->msix_entries) { 1338 u32 regval = rd32(E1000_EIAM); 1339 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask); 1340 wr32(E1000_EIMC, adapter->eims_enable_mask); 1341 regval = rd32(E1000_EIAC); 1342 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask); 1343 } 1344 1345 wr32(E1000_IAM, 0); 1346 wr32(E1000_IMC, ~0); 1347 wrfl(); 1348 if (adapter->msix_entries) { 1349 int i; 1350 for (i = 0; i < adapter->num_q_vectors; i++) 1351 synchronize_irq(adapter->msix_entries[i].vector); 1352 } else { 1353 synchronize_irq(adapter->pdev->irq); 1354 } 1355 } 1356 1357 /** 1358 * igb_irq_enable - Enable default interrupt generation settings 1359 * @adapter: board private structure 1360 **/ 1361 static void igb_irq_enable(struct igb_adapter *adapter) 1362 { 1363 struct e1000_hw *hw = &adapter->hw; 1364 1365 if (adapter->msix_entries) { 1366 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA; 1367 u32 regval = rd32(E1000_EIAC); 1368 wr32(E1000_EIAC, regval | adapter->eims_enable_mask); 1369 regval = rd32(E1000_EIAM); 1370 wr32(E1000_EIAM, regval | adapter->eims_enable_mask); 1371 wr32(E1000_EIMS, adapter->eims_enable_mask); 1372 if (adapter->vfs_allocated_count) { 1373 wr32(E1000_MBVFIMR, 0xFF); 1374 ims |= E1000_IMS_VMMB; 1375 } 1376 wr32(E1000_IMS, ims); 1377 } else { 1378 wr32(E1000_IMS, IMS_ENABLE_MASK | 1379 E1000_IMS_DRSTA); 1380 wr32(E1000_IAM, IMS_ENABLE_MASK | 1381 E1000_IMS_DRSTA); 1382 } 1383 } 1384 1385 static void igb_update_mng_vlan(struct igb_adapter *adapter) 1386 { 1387 struct e1000_hw *hw = &adapter->hw; 1388 u16 vid = adapter->hw.mng_cookie.vlan_id; 1389 u16 old_vid = adapter->mng_vlan_id; 1390 1391 if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) { 1392 /* add VID to filter table */ 1393 igb_vfta_set(hw, vid, true); 1394 adapter->mng_vlan_id = vid; 1395 } else { 1396 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE; 1397 } 1398 1399 if ((old_vid != (u16)IGB_MNG_VLAN_NONE) && 1400 (vid != old_vid) && 1401 !test_bit(old_vid, adapter->active_vlans)) { 1402 /* remove VID from filter table */ 1403 igb_vfta_set(hw, old_vid, false); 1404 } 1405 } 1406 1407 /** 1408 * igb_release_hw_control - release control of the h/w to f/w 1409 * @adapter: address of board private structure 1410 * 1411 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit. 1412 * For ASF and Pass Through versions of f/w this means that the 1413 * driver is no longer loaded. 1414 * 1415 **/ 1416 static void igb_release_hw_control(struct igb_adapter *adapter) 1417 { 1418 struct e1000_hw *hw = &adapter->hw; 1419 u32 ctrl_ext; 1420 1421 /* Let firmware take over control of h/w */ 1422 ctrl_ext = rd32(E1000_CTRL_EXT); 1423 wr32(E1000_CTRL_EXT, 1424 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD); 1425 } 1426 1427 /** 1428 * igb_get_hw_control - get control of the h/w from f/w 1429 * @adapter: address of board private structure 1430 * 1431 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit. 1432 * For ASF and Pass Through versions of f/w this means that 1433 * the driver is loaded. 1434 * 1435 **/ 1436 static void igb_get_hw_control(struct igb_adapter *adapter) 1437 { 1438 struct e1000_hw *hw = &adapter->hw; 1439 u32 ctrl_ext; 1440 1441 /* Let firmware know the driver has taken over */ 1442 ctrl_ext = rd32(E1000_CTRL_EXT); 1443 wr32(E1000_CTRL_EXT, 1444 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD); 1445 } 1446 1447 /** 1448 * igb_configure - configure the hardware for RX and TX 1449 * @adapter: private board structure 1450 **/ 1451 static void igb_configure(struct igb_adapter *adapter) 1452 { 1453 struct net_device *netdev = adapter->netdev; 1454 int i; 1455 1456 igb_get_hw_control(adapter); 1457 igb_set_rx_mode(netdev); 1458 1459 igb_restore_vlan(adapter); 1460 1461 igb_setup_tctl(adapter); 1462 igb_setup_mrqc(adapter); 1463 igb_setup_rctl(adapter); 1464 1465 igb_configure_tx(adapter); 1466 igb_configure_rx(adapter); 1467 1468 igb_rx_fifo_flush_82575(&adapter->hw); 1469 1470 /* call igb_desc_unused which always leaves 1471 * at least 1 descriptor unused to make sure 1472 * next_to_use != next_to_clean */ 1473 for (i = 0; i < adapter->num_rx_queues; i++) { 1474 struct igb_ring *ring = adapter->rx_ring[i]; 1475 igb_alloc_rx_buffers(ring, igb_desc_unused(ring)); 1476 } 1477 } 1478 1479 /** 1480 * igb_power_up_link - Power up the phy/serdes link 1481 * @adapter: address of board private structure 1482 **/ 1483 void igb_power_up_link(struct igb_adapter *adapter) 1484 { 1485 if (adapter->hw.phy.media_type == e1000_media_type_copper) 1486 igb_power_up_phy_copper(&adapter->hw); 1487 else 1488 igb_power_up_serdes_link_82575(&adapter->hw); 1489 igb_reset_phy(&adapter->hw); 1490 } 1491 1492 /** 1493 * igb_power_down_link - Power down the phy/serdes link 1494 * @adapter: address of board private structure 1495 */ 1496 static void igb_power_down_link(struct igb_adapter *adapter) 1497 { 1498 if (adapter->hw.phy.media_type == e1000_media_type_copper) 1499 igb_power_down_phy_copper_82575(&adapter->hw); 1500 else 1501 igb_shutdown_serdes_link_82575(&adapter->hw); 1502 } 1503 1504 /** 1505 * igb_up - Open the interface and prepare it to handle traffic 1506 * @adapter: board private structure 1507 **/ 1508 int igb_up(struct igb_adapter *adapter) 1509 { 1510 struct e1000_hw *hw = &adapter->hw; 1511 int i; 1512 1513 /* hardware has been reset, we need to reload some things */ 1514 igb_configure(adapter); 1515 1516 clear_bit(__IGB_DOWN, &adapter->state); 1517 1518 for (i = 0; i < adapter->num_q_vectors; i++) 1519 napi_enable(&(adapter->q_vector[i]->napi)); 1520 1521 if (adapter->msix_entries) 1522 igb_configure_msix(adapter); 1523 else 1524 igb_assign_vector(adapter->q_vector[0], 0); 1525 1526 /* Clear any pending interrupts. */ 1527 rd32(E1000_ICR); 1528 igb_irq_enable(adapter); 1529 1530 /* notify VFs that reset has been completed */ 1531 if (adapter->vfs_allocated_count) { 1532 u32 reg_data = rd32(E1000_CTRL_EXT); 1533 reg_data |= E1000_CTRL_EXT_PFRSTD; 1534 wr32(E1000_CTRL_EXT, reg_data); 1535 } 1536 1537 netif_tx_start_all_queues(adapter->netdev); 1538 1539 /* start the watchdog. */ 1540 hw->mac.get_link_status = 1; 1541 schedule_work(&adapter->watchdog_task); 1542 1543 return 0; 1544 } 1545 1546 void igb_down(struct igb_adapter *adapter) 1547 { 1548 struct net_device *netdev = adapter->netdev; 1549 struct e1000_hw *hw = &adapter->hw; 1550 u32 tctl, rctl; 1551 int i; 1552 1553 /* signal that we're down so the interrupt handler does not 1554 * reschedule our watchdog timer */ 1555 set_bit(__IGB_DOWN, &adapter->state); 1556 1557 /* disable receives in the hardware */ 1558 rctl = rd32(E1000_RCTL); 1559 wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN); 1560 /* flush and sleep below */ 1561 1562 netif_tx_stop_all_queues(netdev); 1563 1564 /* disable transmits in the hardware */ 1565 tctl = rd32(E1000_TCTL); 1566 tctl &= ~E1000_TCTL_EN; 1567 wr32(E1000_TCTL, tctl); 1568 /* flush both disables and wait for them to finish */ 1569 wrfl(); 1570 msleep(10); 1571 1572 for (i = 0; i < adapter->num_q_vectors; i++) 1573 napi_disable(&(adapter->q_vector[i]->napi)); 1574 1575 igb_irq_disable(adapter); 1576 1577 del_timer_sync(&adapter->watchdog_timer); 1578 del_timer_sync(&adapter->phy_info_timer); 1579 1580 netif_carrier_off(netdev); 1581 1582 /* record the stats before reset*/ 1583 spin_lock(&adapter->stats64_lock); 1584 igb_update_stats(adapter, &adapter->stats64); 1585 spin_unlock(&adapter->stats64_lock); 1586 1587 adapter->link_speed = 0; 1588 adapter->link_duplex = 0; 1589 1590 if (!pci_channel_offline(adapter->pdev)) 1591 igb_reset(adapter); 1592 igb_clean_all_tx_rings(adapter); 1593 igb_clean_all_rx_rings(adapter); 1594 #ifdef CONFIG_IGB_DCA 1595 1596 /* since we reset the hardware DCA settings were cleared */ 1597 igb_setup_dca(adapter); 1598 #endif 1599 } 1600 1601 void igb_reinit_locked(struct igb_adapter *adapter) 1602 { 1603 WARN_ON(in_interrupt()); 1604 while (test_and_set_bit(__IGB_RESETTING, &adapter->state)) 1605 msleep(1); 1606 igb_down(adapter); 1607 igb_up(adapter); 1608 clear_bit(__IGB_RESETTING, &adapter->state); 1609 } 1610 1611 void igb_reset(struct igb_adapter *adapter) 1612 { 1613 struct pci_dev *pdev = adapter->pdev; 1614 struct e1000_hw *hw = &adapter->hw; 1615 struct e1000_mac_info *mac = &hw->mac; 1616 struct e1000_fc_info *fc = &hw->fc; 1617 u32 pba = 0, tx_space, min_tx_space, min_rx_space; 1618 u16 hwm; 1619 1620 /* Repartition Pba for greater than 9k mtu 1621 * To take effect CTRL.RST is required. 1622 */ 1623 switch (mac->type) { 1624 case e1000_i350: 1625 case e1000_82580: 1626 pba = rd32(E1000_RXPBS); 1627 pba = igb_rxpbs_adjust_82580(pba); 1628 break; 1629 case e1000_82576: 1630 pba = rd32(E1000_RXPBS); 1631 pba &= E1000_RXPBS_SIZE_MASK_82576; 1632 break; 1633 case e1000_82575: 1634 default: 1635 pba = E1000_PBA_34K; 1636 break; 1637 } 1638 1639 if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) && 1640 (mac->type < e1000_82576)) { 1641 /* adjust PBA for jumbo frames */ 1642 wr32(E1000_PBA, pba); 1643 1644 /* To maintain wire speed transmits, the Tx FIFO should be 1645 * large enough to accommodate two full transmit packets, 1646 * rounded up to the next 1KB and expressed in KB. Likewise, 1647 * the Rx FIFO should be large enough to accommodate at least 1648 * one full receive packet and is similarly rounded up and 1649 * expressed in KB. */ 1650 pba = rd32(E1000_PBA); 1651 /* upper 16 bits has Tx packet buffer allocation size in KB */ 1652 tx_space = pba >> 16; 1653 /* lower 16 bits has Rx packet buffer allocation size in KB */ 1654 pba &= 0xffff; 1655 /* the tx fifo also stores 16 bytes of information about the tx 1656 * but don't include ethernet FCS because hardware appends it */ 1657 min_tx_space = (adapter->max_frame_size + 1658 sizeof(union e1000_adv_tx_desc) - 1659 ETH_FCS_LEN) * 2; 1660 min_tx_space = ALIGN(min_tx_space, 1024); 1661 min_tx_space >>= 10; 1662 /* software strips receive CRC, so leave room for it */ 1663 min_rx_space = adapter->max_frame_size; 1664 min_rx_space = ALIGN(min_rx_space, 1024); 1665 min_rx_space >>= 10; 1666 1667 /* If current Tx allocation is less than the min Tx FIFO size, 1668 * and the min Tx FIFO size is less than the current Rx FIFO 1669 * allocation, take space away from current Rx allocation */ 1670 if (tx_space < min_tx_space && 1671 ((min_tx_space - tx_space) < pba)) { 1672 pba = pba - (min_tx_space - tx_space); 1673 1674 /* if short on rx space, rx wins and must trump tx 1675 * adjustment */ 1676 if (pba < min_rx_space) 1677 pba = min_rx_space; 1678 } 1679 wr32(E1000_PBA, pba); 1680 } 1681 1682 /* flow control settings */ 1683 /* The high water mark must be low enough to fit one full frame 1684 * (or the size used for early receive) above it in the Rx FIFO. 1685 * Set it to the lower of: 1686 * - 90% of the Rx FIFO size, or 1687 * - the full Rx FIFO size minus one full frame */ 1688 hwm = min(((pba << 10) * 9 / 10), 1689 ((pba << 10) - 2 * adapter->max_frame_size)); 1690 1691 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */ 1692 fc->low_water = fc->high_water - 16; 1693 fc->pause_time = 0xFFFF; 1694 fc->send_xon = 1; 1695 fc->current_mode = fc->requested_mode; 1696 1697 /* disable receive for all VFs and wait one second */ 1698 if (adapter->vfs_allocated_count) { 1699 int i; 1700 for (i = 0 ; i < adapter->vfs_allocated_count; i++) 1701 adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC; 1702 1703 /* ping all the active vfs to let them know we are going down */ 1704 igb_ping_all_vfs(adapter); 1705 1706 /* disable transmits and receives */ 1707 wr32(E1000_VFRE, 0); 1708 wr32(E1000_VFTE, 0); 1709 } 1710 1711 /* Allow time for pending master requests to run */ 1712 hw->mac.ops.reset_hw(hw); 1713 wr32(E1000_WUC, 0); 1714 1715 if (hw->mac.ops.init_hw(hw)) 1716 dev_err(&pdev->dev, "Hardware Error\n"); 1717 1718 igb_init_dmac(adapter, pba); 1719 if (!netif_running(adapter->netdev)) 1720 igb_power_down_link(adapter); 1721 1722 igb_update_mng_vlan(adapter); 1723 1724 /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */ 1725 wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE); 1726 1727 igb_get_phy_info(hw); 1728 } 1729 1730 static netdev_features_t igb_fix_features(struct net_device *netdev, 1731 netdev_features_t features) 1732 { 1733 /* 1734 * Since there is no support for separate rx/tx vlan accel 1735 * enable/disable make sure tx flag is always in same state as rx. 1736 */ 1737 if (features & NETIF_F_HW_VLAN_RX) 1738 features |= NETIF_F_HW_VLAN_TX; 1739 else 1740 features &= ~NETIF_F_HW_VLAN_TX; 1741 1742 return features; 1743 } 1744 1745 static int igb_set_features(struct net_device *netdev, 1746 netdev_features_t features) 1747 { 1748 netdev_features_t changed = netdev->features ^ features; 1749 struct igb_adapter *adapter = netdev_priv(netdev); 1750 1751 if (changed & NETIF_F_HW_VLAN_RX) 1752 igb_vlan_mode(netdev, features); 1753 1754 if (!(changed & NETIF_F_RXALL)) 1755 return 0; 1756 1757 netdev->features = features; 1758 1759 if (netif_running(netdev)) 1760 igb_reinit_locked(adapter); 1761 else 1762 igb_reset(adapter); 1763 1764 return 0; 1765 } 1766 1767 static const struct net_device_ops igb_netdev_ops = { 1768 .ndo_open = igb_open, 1769 .ndo_stop = igb_close, 1770 .ndo_start_xmit = igb_xmit_frame, 1771 .ndo_get_stats64 = igb_get_stats64, 1772 .ndo_set_rx_mode = igb_set_rx_mode, 1773 .ndo_set_mac_address = igb_set_mac, 1774 .ndo_change_mtu = igb_change_mtu, 1775 .ndo_do_ioctl = igb_ioctl, 1776 .ndo_tx_timeout = igb_tx_timeout, 1777 .ndo_validate_addr = eth_validate_addr, 1778 .ndo_vlan_rx_add_vid = igb_vlan_rx_add_vid, 1779 .ndo_vlan_rx_kill_vid = igb_vlan_rx_kill_vid, 1780 .ndo_set_vf_mac = igb_ndo_set_vf_mac, 1781 .ndo_set_vf_vlan = igb_ndo_set_vf_vlan, 1782 .ndo_set_vf_tx_rate = igb_ndo_set_vf_bw, 1783 .ndo_get_vf_config = igb_ndo_get_vf_config, 1784 #ifdef CONFIG_NET_POLL_CONTROLLER 1785 .ndo_poll_controller = igb_netpoll, 1786 #endif 1787 .ndo_fix_features = igb_fix_features, 1788 .ndo_set_features = igb_set_features, 1789 }; 1790 1791 /** 1792 * igb_probe - Device Initialization Routine 1793 * @pdev: PCI device information struct 1794 * @ent: entry in igb_pci_tbl 1795 * 1796 * Returns 0 on success, negative on failure 1797 * 1798 * igb_probe initializes an adapter identified by a pci_dev structure. 1799 * The OS initialization, configuring of the adapter private structure, 1800 * and a hardware reset occur. 1801 **/ 1802 static int __devinit igb_probe(struct pci_dev *pdev, 1803 const struct pci_device_id *ent) 1804 { 1805 struct net_device *netdev; 1806 struct igb_adapter *adapter; 1807 struct e1000_hw *hw; 1808 u16 eeprom_data = 0; 1809 s32 ret_val; 1810 static int global_quad_port_a; /* global quad port a indication */ 1811 const struct e1000_info *ei = igb_info_tbl[ent->driver_data]; 1812 unsigned long mmio_start, mmio_len; 1813 int err, pci_using_dac; 1814 u16 eeprom_apme_mask = IGB_EEPROM_APME; 1815 u8 part_str[E1000_PBANUM_LENGTH]; 1816 1817 /* Catch broken hardware that put the wrong VF device ID in 1818 * the PCIe SR-IOV capability. 1819 */ 1820 if (pdev->is_virtfn) { 1821 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n", 1822 pci_name(pdev), pdev->vendor, pdev->device); 1823 return -EINVAL; 1824 } 1825 1826 err = pci_enable_device_mem(pdev); 1827 if (err) 1828 return err; 1829 1830 pci_using_dac = 0; 1831 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)); 1832 if (!err) { 1833 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64)); 1834 if (!err) 1835 pci_using_dac = 1; 1836 } else { 1837 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)); 1838 if (err) { 1839 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32)); 1840 if (err) { 1841 dev_err(&pdev->dev, "No usable DMA " 1842 "configuration, aborting\n"); 1843 goto err_dma; 1844 } 1845 } 1846 } 1847 1848 err = pci_request_selected_regions(pdev, pci_select_bars(pdev, 1849 IORESOURCE_MEM), 1850 igb_driver_name); 1851 if (err) 1852 goto err_pci_reg; 1853 1854 pci_enable_pcie_error_reporting(pdev); 1855 1856 pci_set_master(pdev); 1857 pci_save_state(pdev); 1858 1859 err = -ENOMEM; 1860 netdev = alloc_etherdev_mq(sizeof(struct igb_adapter), 1861 IGB_MAX_TX_QUEUES); 1862 if (!netdev) 1863 goto err_alloc_etherdev; 1864 1865 SET_NETDEV_DEV(netdev, &pdev->dev); 1866 1867 pci_set_drvdata(pdev, netdev); 1868 adapter = netdev_priv(netdev); 1869 adapter->netdev = netdev; 1870 adapter->pdev = pdev; 1871 hw = &adapter->hw; 1872 hw->back = adapter; 1873 adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE); 1874 1875 mmio_start = pci_resource_start(pdev, 0); 1876 mmio_len = pci_resource_len(pdev, 0); 1877 1878 err = -EIO; 1879 hw->hw_addr = ioremap(mmio_start, mmio_len); 1880 if (!hw->hw_addr) 1881 goto err_ioremap; 1882 1883 netdev->netdev_ops = &igb_netdev_ops; 1884 igb_set_ethtool_ops(netdev); 1885 netdev->watchdog_timeo = 5 * HZ; 1886 1887 strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1); 1888 1889 netdev->mem_start = mmio_start; 1890 netdev->mem_end = mmio_start + mmio_len; 1891 1892 /* PCI config space info */ 1893 hw->vendor_id = pdev->vendor; 1894 hw->device_id = pdev->device; 1895 hw->revision_id = pdev->revision; 1896 hw->subsystem_vendor_id = pdev->subsystem_vendor; 1897 hw->subsystem_device_id = pdev->subsystem_device; 1898 1899 /* Copy the default MAC, PHY and NVM function pointers */ 1900 memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops)); 1901 memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops)); 1902 memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops)); 1903 /* Initialize skew-specific constants */ 1904 err = ei->get_invariants(hw); 1905 if (err) 1906 goto err_sw_init; 1907 1908 /* setup the private structure */ 1909 err = igb_sw_init(adapter); 1910 if (err) 1911 goto err_sw_init; 1912 1913 igb_get_bus_info_pcie(hw); 1914 1915 hw->phy.autoneg_wait_to_complete = false; 1916 1917 /* Copper options */ 1918 if (hw->phy.media_type == e1000_media_type_copper) { 1919 hw->phy.mdix = AUTO_ALL_MODES; 1920 hw->phy.disable_polarity_correction = false; 1921 hw->phy.ms_type = e1000_ms_hw_default; 1922 } 1923 1924 if (igb_check_reset_block(hw)) 1925 dev_info(&pdev->dev, 1926 "PHY reset is blocked due to SOL/IDER session.\n"); 1927 1928 /* 1929 * features is initialized to 0 in allocation, it might have bits 1930 * set by igb_sw_init so we should use an or instead of an 1931 * assignment. 1932 */ 1933 netdev->features |= NETIF_F_SG | 1934 NETIF_F_IP_CSUM | 1935 NETIF_F_IPV6_CSUM | 1936 NETIF_F_TSO | 1937 NETIF_F_TSO6 | 1938 NETIF_F_RXHASH | 1939 NETIF_F_RXCSUM | 1940 NETIF_F_HW_VLAN_RX | 1941 NETIF_F_HW_VLAN_TX; 1942 1943 /* copy netdev features into list of user selectable features */ 1944 netdev->hw_features |= netdev->features; 1945 netdev->hw_features |= NETIF_F_RXALL; 1946 1947 /* set this bit last since it cannot be part of hw_features */ 1948 netdev->features |= NETIF_F_HW_VLAN_FILTER; 1949 1950 netdev->vlan_features |= NETIF_F_TSO | 1951 NETIF_F_TSO6 | 1952 NETIF_F_IP_CSUM | 1953 NETIF_F_IPV6_CSUM | 1954 NETIF_F_SG; 1955 1956 netdev->priv_flags |= IFF_SUPP_NOFCS; 1957 1958 if (pci_using_dac) { 1959 netdev->features |= NETIF_F_HIGHDMA; 1960 netdev->vlan_features |= NETIF_F_HIGHDMA; 1961 } 1962 1963 if (hw->mac.type >= e1000_82576) { 1964 netdev->hw_features |= NETIF_F_SCTP_CSUM; 1965 netdev->features |= NETIF_F_SCTP_CSUM; 1966 } 1967 1968 netdev->priv_flags |= IFF_UNICAST_FLT; 1969 1970 adapter->en_mng_pt = igb_enable_mng_pass_thru(hw); 1971 1972 /* before reading the NVM, reset the controller to put the device in a 1973 * known good starting state */ 1974 hw->mac.ops.reset_hw(hw); 1975 1976 /* make sure the NVM is good */ 1977 if (hw->nvm.ops.validate(hw) < 0) { 1978 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n"); 1979 err = -EIO; 1980 goto err_eeprom; 1981 } 1982 1983 /* copy the MAC address out of the NVM */ 1984 if (hw->mac.ops.read_mac_addr(hw)) 1985 dev_err(&pdev->dev, "NVM Read Error\n"); 1986 1987 memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len); 1988 memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len); 1989 1990 if (!is_valid_ether_addr(netdev->perm_addr)) { 1991 dev_err(&pdev->dev, "Invalid MAC Address\n"); 1992 err = -EIO; 1993 goto err_eeprom; 1994 } 1995 1996 setup_timer(&adapter->watchdog_timer, igb_watchdog, 1997 (unsigned long) adapter); 1998 setup_timer(&adapter->phy_info_timer, igb_update_phy_info, 1999 (unsigned long) adapter); 2000 2001 INIT_WORK(&adapter->reset_task, igb_reset_task); 2002 INIT_WORK(&adapter->watchdog_task, igb_watchdog_task); 2003 2004 /* Initialize link properties that are user-changeable */ 2005 adapter->fc_autoneg = true; 2006 hw->mac.autoneg = true; 2007 hw->phy.autoneg_advertised = 0x2f; 2008 2009 hw->fc.requested_mode = e1000_fc_default; 2010 hw->fc.current_mode = e1000_fc_default; 2011 2012 igb_validate_mdi_setting(hw); 2013 2014 /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM, 2015 * enable the ACPI Magic Packet filter 2016 */ 2017 2018 if (hw->bus.func == 0) 2019 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); 2020 else if (hw->mac.type >= e1000_82580) 2021 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A + 2022 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1, 2023 &eeprom_data); 2024 else if (hw->bus.func == 1) 2025 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data); 2026 2027 if (eeprom_data & eeprom_apme_mask) 2028 adapter->eeprom_wol |= E1000_WUFC_MAG; 2029 2030 /* now that we have the eeprom settings, apply the special cases where 2031 * the eeprom may be wrong or the board simply won't support wake on 2032 * lan on a particular port */ 2033 switch (pdev->device) { 2034 case E1000_DEV_ID_82575GB_QUAD_COPPER: 2035 adapter->eeprom_wol = 0; 2036 break; 2037 case E1000_DEV_ID_82575EB_FIBER_SERDES: 2038 case E1000_DEV_ID_82576_FIBER: 2039 case E1000_DEV_ID_82576_SERDES: 2040 /* Wake events only supported on port A for dual fiber 2041 * regardless of eeprom setting */ 2042 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1) 2043 adapter->eeprom_wol = 0; 2044 break; 2045 case E1000_DEV_ID_82576_QUAD_COPPER: 2046 case E1000_DEV_ID_82576_QUAD_COPPER_ET2: 2047 /* if quad port adapter, disable WoL on all but port A */ 2048 if (global_quad_port_a != 0) 2049 adapter->eeprom_wol = 0; 2050 else 2051 adapter->flags |= IGB_FLAG_QUAD_PORT_A; 2052 /* Reset for multiple quad port adapters */ 2053 if (++global_quad_port_a == 4) 2054 global_quad_port_a = 0; 2055 break; 2056 } 2057 2058 /* initialize the wol settings based on the eeprom settings */ 2059 adapter->wol = adapter->eeprom_wol; 2060 device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol); 2061 2062 /* reset the hardware with the new settings */ 2063 igb_reset(adapter); 2064 2065 /* let the f/w know that the h/w is now under the control of the 2066 * driver. */ 2067 igb_get_hw_control(adapter); 2068 2069 strcpy(netdev->name, "eth%d"); 2070 err = register_netdev(netdev); 2071 if (err) 2072 goto err_register; 2073 2074 /* carrier off reporting is important to ethtool even BEFORE open */ 2075 netif_carrier_off(netdev); 2076 2077 #ifdef CONFIG_IGB_DCA 2078 if (dca_add_requester(&pdev->dev) == 0) { 2079 adapter->flags |= IGB_FLAG_DCA_ENABLED; 2080 dev_info(&pdev->dev, "DCA enabled\n"); 2081 igb_setup_dca(adapter); 2082 } 2083 2084 #endif 2085 #ifdef CONFIG_IGB_PTP 2086 /* do hw tstamp init after resetting */ 2087 igb_ptp_init(adapter); 2088 2089 #endif 2090 dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n"); 2091 /* print bus type/speed/width info */ 2092 dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n", 2093 netdev->name, 2094 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" : 2095 (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" : 2096 "unknown"), 2097 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" : 2098 (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" : 2099 (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" : 2100 "unknown"), 2101 netdev->dev_addr); 2102 2103 ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH); 2104 if (ret_val) 2105 strcpy(part_str, "Unknown"); 2106 dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str); 2107 dev_info(&pdev->dev, 2108 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n", 2109 adapter->msix_entries ? "MSI-X" : 2110 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy", 2111 adapter->num_rx_queues, adapter->num_tx_queues); 2112 switch (hw->mac.type) { 2113 case e1000_i350: 2114 igb_set_eee_i350(hw); 2115 break; 2116 default: 2117 break; 2118 } 2119 2120 pm_runtime_put_noidle(&pdev->dev); 2121 return 0; 2122 2123 err_register: 2124 igb_release_hw_control(adapter); 2125 err_eeprom: 2126 if (!igb_check_reset_block(hw)) 2127 igb_reset_phy(hw); 2128 2129 if (hw->flash_address) 2130 iounmap(hw->flash_address); 2131 err_sw_init: 2132 igb_clear_interrupt_scheme(adapter); 2133 iounmap(hw->hw_addr); 2134 err_ioremap: 2135 free_netdev(netdev); 2136 err_alloc_etherdev: 2137 pci_release_selected_regions(pdev, 2138 pci_select_bars(pdev, IORESOURCE_MEM)); 2139 err_pci_reg: 2140 err_dma: 2141 pci_disable_device(pdev); 2142 return err; 2143 } 2144 2145 /** 2146 * igb_remove - Device Removal Routine 2147 * @pdev: PCI device information struct 2148 * 2149 * igb_remove is called by the PCI subsystem to alert the driver 2150 * that it should release a PCI device. The could be caused by a 2151 * Hot-Plug event, or because the driver is going to be removed from 2152 * memory. 2153 **/ 2154 static void __devexit igb_remove(struct pci_dev *pdev) 2155 { 2156 struct net_device *netdev = pci_get_drvdata(pdev); 2157 struct igb_adapter *adapter = netdev_priv(netdev); 2158 struct e1000_hw *hw = &adapter->hw; 2159 2160 pm_runtime_get_noresume(&pdev->dev); 2161 #ifdef CONFIG_IGB_PTP 2162 igb_ptp_remove(adapter); 2163 2164 #endif 2165 /* 2166 * The watchdog timer may be rescheduled, so explicitly 2167 * disable watchdog from being rescheduled. 2168 */ 2169 set_bit(__IGB_DOWN, &adapter->state); 2170 del_timer_sync(&adapter->watchdog_timer); 2171 del_timer_sync(&adapter->phy_info_timer); 2172 2173 cancel_work_sync(&adapter->reset_task); 2174 cancel_work_sync(&adapter->watchdog_task); 2175 2176 #ifdef CONFIG_IGB_DCA 2177 if (adapter->flags & IGB_FLAG_DCA_ENABLED) { 2178 dev_info(&pdev->dev, "DCA disabled\n"); 2179 dca_remove_requester(&pdev->dev); 2180 adapter->flags &= ~IGB_FLAG_DCA_ENABLED; 2181 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE); 2182 } 2183 #endif 2184 2185 /* Release control of h/w to f/w. If f/w is AMT enabled, this 2186 * would have already happened in close and is redundant. */ 2187 igb_release_hw_control(adapter); 2188 2189 unregister_netdev(netdev); 2190 2191 igb_clear_interrupt_scheme(adapter); 2192 2193 #ifdef CONFIG_PCI_IOV 2194 /* reclaim resources allocated to VFs */ 2195 if (adapter->vf_data) { 2196 /* disable iov and allow time for transactions to clear */ 2197 if (!igb_check_vf_assignment(adapter)) { 2198 pci_disable_sriov(pdev); 2199 msleep(500); 2200 } else { 2201 dev_info(&pdev->dev, "VF(s) assigned to guests!\n"); 2202 } 2203 2204 kfree(adapter->vf_data); 2205 adapter->vf_data = NULL; 2206 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ); 2207 wrfl(); 2208 msleep(100); 2209 dev_info(&pdev->dev, "IOV Disabled\n"); 2210 } 2211 #endif 2212 2213 iounmap(hw->hw_addr); 2214 if (hw->flash_address) 2215 iounmap(hw->flash_address); 2216 pci_release_selected_regions(pdev, 2217 pci_select_bars(pdev, IORESOURCE_MEM)); 2218 2219 kfree(adapter->shadow_vfta); 2220 free_netdev(netdev); 2221 2222 pci_disable_pcie_error_reporting(pdev); 2223 2224 pci_disable_device(pdev); 2225 } 2226 2227 /** 2228 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space 2229 * @adapter: board private structure to initialize 2230 * 2231 * This function initializes the vf specific data storage and then attempts to 2232 * allocate the VFs. The reason for ordering it this way is because it is much 2233 * mor expensive time wise to disable SR-IOV than it is to allocate and free 2234 * the memory for the VFs. 2235 **/ 2236 static void __devinit igb_probe_vfs(struct igb_adapter * adapter) 2237 { 2238 #ifdef CONFIG_PCI_IOV 2239 struct pci_dev *pdev = adapter->pdev; 2240 int old_vfs = igb_find_enabled_vfs(adapter); 2241 int i; 2242 2243 if (old_vfs) { 2244 dev_info(&pdev->dev, "%d pre-allocated VFs found - override " 2245 "max_vfs setting of %d\n", old_vfs, max_vfs); 2246 adapter->vfs_allocated_count = old_vfs; 2247 } 2248 2249 if (!adapter->vfs_allocated_count) 2250 return; 2251 2252 adapter->vf_data = kcalloc(adapter->vfs_allocated_count, 2253 sizeof(struct vf_data_storage), GFP_KERNEL); 2254 /* if allocation failed then we do not support SR-IOV */ 2255 if (!adapter->vf_data) { 2256 adapter->vfs_allocated_count = 0; 2257 dev_err(&pdev->dev, "Unable to allocate memory for VF " 2258 "Data Storage\n"); 2259 goto out; 2260 } 2261 2262 if (!old_vfs) { 2263 if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) 2264 goto err_out; 2265 } 2266 dev_info(&pdev->dev, "%d VFs allocated\n", 2267 adapter->vfs_allocated_count); 2268 for (i = 0; i < adapter->vfs_allocated_count; i++) 2269 igb_vf_configure(adapter, i); 2270 2271 /* DMA Coalescing is not supported in IOV mode. */ 2272 adapter->flags &= ~IGB_FLAG_DMAC; 2273 goto out; 2274 err_out: 2275 kfree(adapter->vf_data); 2276 adapter->vf_data = NULL; 2277 adapter->vfs_allocated_count = 0; 2278 out: 2279 return; 2280 #endif /* CONFIG_PCI_IOV */ 2281 } 2282 2283 /** 2284 * igb_sw_init - Initialize general software structures (struct igb_adapter) 2285 * @adapter: board private structure to initialize 2286 * 2287 * igb_sw_init initializes the Adapter private data structure. 2288 * Fields are initialized based on PCI device information and 2289 * OS network device settings (MTU size). 2290 **/ 2291 static int __devinit igb_sw_init(struct igb_adapter *adapter) 2292 { 2293 struct e1000_hw *hw = &adapter->hw; 2294 struct net_device *netdev = adapter->netdev; 2295 struct pci_dev *pdev = adapter->pdev; 2296 2297 pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word); 2298 2299 /* set default ring sizes */ 2300 adapter->tx_ring_count = IGB_DEFAULT_TXD; 2301 adapter->rx_ring_count = IGB_DEFAULT_RXD; 2302 2303 /* set default ITR values */ 2304 adapter->rx_itr_setting = IGB_DEFAULT_ITR; 2305 adapter->tx_itr_setting = IGB_DEFAULT_ITR; 2306 2307 /* set default work limits */ 2308 adapter->tx_work_limit = IGB_DEFAULT_TX_WORK; 2309 2310 adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN + 2311 VLAN_HLEN; 2312 adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN; 2313 2314 adapter->node = -1; 2315 2316 spin_lock_init(&adapter->stats64_lock); 2317 #ifdef CONFIG_PCI_IOV 2318 switch (hw->mac.type) { 2319 case e1000_82576: 2320 case e1000_i350: 2321 if (max_vfs > 7) { 2322 dev_warn(&pdev->dev, 2323 "Maximum of 7 VFs per PF, using max\n"); 2324 adapter->vfs_allocated_count = 7; 2325 } else 2326 adapter->vfs_allocated_count = max_vfs; 2327 break; 2328 default: 2329 break; 2330 } 2331 #endif /* CONFIG_PCI_IOV */ 2332 adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus()); 2333 /* i350 cannot do RSS and SR-IOV at the same time */ 2334 if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count) 2335 adapter->rss_queues = 1; 2336 2337 /* 2338 * if rss_queues > 4 or vfs are going to be allocated with rss_queues 2339 * then we should combine the queues into a queue pair in order to 2340 * conserve interrupts due to limited supply 2341 */ 2342 if ((adapter->rss_queues > 4) || 2343 ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6))) 2344 adapter->flags |= IGB_FLAG_QUEUE_PAIRS; 2345 2346 /* Setup and initialize a copy of the hw vlan table array */ 2347 adapter->shadow_vfta = kzalloc(sizeof(u32) * 2348 E1000_VLAN_FILTER_TBL_SIZE, 2349 GFP_ATOMIC); 2350 2351 /* This call may decrease the number of queues */ 2352 if (igb_init_interrupt_scheme(adapter)) { 2353 dev_err(&pdev->dev, "Unable to allocate memory for queues\n"); 2354 return -ENOMEM; 2355 } 2356 2357 igb_probe_vfs(adapter); 2358 2359 /* Explicitly disable IRQ since the NIC can be in any state. */ 2360 igb_irq_disable(adapter); 2361 2362 if (hw->mac.type == e1000_i350) 2363 adapter->flags &= ~IGB_FLAG_DMAC; 2364 2365 set_bit(__IGB_DOWN, &adapter->state); 2366 return 0; 2367 } 2368 2369 /** 2370 * igb_open - Called when a network interface is made active 2371 * @netdev: network interface device structure 2372 * 2373 * Returns 0 on success, negative value on failure 2374 * 2375 * The open entry point is called when a network interface is made 2376 * active by the system (IFF_UP). At this point all resources needed 2377 * for transmit and receive operations are allocated, the interrupt 2378 * handler is registered with the OS, the watchdog timer is started, 2379 * and the stack is notified that the interface is ready. 2380 **/ 2381 static int __igb_open(struct net_device *netdev, bool resuming) 2382 { 2383 struct igb_adapter *adapter = netdev_priv(netdev); 2384 struct e1000_hw *hw = &adapter->hw; 2385 struct pci_dev *pdev = adapter->pdev; 2386 int err; 2387 int i; 2388 2389 /* disallow open during test */ 2390 if (test_bit(__IGB_TESTING, &adapter->state)) { 2391 WARN_ON(resuming); 2392 return -EBUSY; 2393 } 2394 2395 if (!resuming) 2396 pm_runtime_get_sync(&pdev->dev); 2397 2398 netif_carrier_off(netdev); 2399 2400 /* allocate transmit descriptors */ 2401 err = igb_setup_all_tx_resources(adapter); 2402 if (err) 2403 goto err_setup_tx; 2404 2405 /* allocate receive descriptors */ 2406 err = igb_setup_all_rx_resources(adapter); 2407 if (err) 2408 goto err_setup_rx; 2409 2410 igb_power_up_link(adapter); 2411 2412 /* before we allocate an interrupt, we must be ready to handle it. 2413 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt 2414 * as soon as we call pci_request_irq, so we have to setup our 2415 * clean_rx handler before we do so. */ 2416 igb_configure(adapter); 2417 2418 err = igb_request_irq(adapter); 2419 if (err) 2420 goto err_req_irq; 2421 2422 /* From here on the code is the same as igb_up() */ 2423 clear_bit(__IGB_DOWN, &adapter->state); 2424 2425 for (i = 0; i < adapter->num_q_vectors; i++) 2426 napi_enable(&(adapter->q_vector[i]->napi)); 2427 2428 /* Clear any pending interrupts. */ 2429 rd32(E1000_ICR); 2430 2431 igb_irq_enable(adapter); 2432 2433 /* notify VFs that reset has been completed */ 2434 if (adapter->vfs_allocated_count) { 2435 u32 reg_data = rd32(E1000_CTRL_EXT); 2436 reg_data |= E1000_CTRL_EXT_PFRSTD; 2437 wr32(E1000_CTRL_EXT, reg_data); 2438 } 2439 2440 netif_tx_start_all_queues(netdev); 2441 2442 if (!resuming) 2443 pm_runtime_put(&pdev->dev); 2444 2445 /* start the watchdog. */ 2446 hw->mac.get_link_status = 1; 2447 schedule_work(&adapter->watchdog_task); 2448 2449 return 0; 2450 2451 err_req_irq: 2452 igb_release_hw_control(adapter); 2453 igb_power_down_link(adapter); 2454 igb_free_all_rx_resources(adapter); 2455 err_setup_rx: 2456 igb_free_all_tx_resources(adapter); 2457 err_setup_tx: 2458 igb_reset(adapter); 2459 if (!resuming) 2460 pm_runtime_put(&pdev->dev); 2461 2462 return err; 2463 } 2464 2465 static int igb_open(struct net_device *netdev) 2466 { 2467 return __igb_open(netdev, false); 2468 } 2469 2470 /** 2471 * igb_close - Disables a network interface 2472 * @netdev: network interface device structure 2473 * 2474 * Returns 0, this is not allowed to fail 2475 * 2476 * The close entry point is called when an interface is de-activated 2477 * by the OS. The hardware is still under the driver's control, but 2478 * needs to be disabled. A global MAC reset is issued to stop the 2479 * hardware, and all transmit and receive resources are freed. 2480 **/ 2481 static int __igb_close(struct net_device *netdev, bool suspending) 2482 { 2483 struct igb_adapter *adapter = netdev_priv(netdev); 2484 struct pci_dev *pdev = adapter->pdev; 2485 2486 WARN_ON(test_bit(__IGB_RESETTING, &adapter->state)); 2487 2488 if (!suspending) 2489 pm_runtime_get_sync(&pdev->dev); 2490 2491 igb_down(adapter); 2492 igb_free_irq(adapter); 2493 2494 igb_free_all_tx_resources(adapter); 2495 igb_free_all_rx_resources(adapter); 2496 2497 if (!suspending) 2498 pm_runtime_put_sync(&pdev->dev); 2499 return 0; 2500 } 2501 2502 static int igb_close(struct net_device *netdev) 2503 { 2504 return __igb_close(netdev, false); 2505 } 2506 2507 /** 2508 * igb_setup_tx_resources - allocate Tx resources (Descriptors) 2509 * @tx_ring: tx descriptor ring (for a specific queue) to setup 2510 * 2511 * Return 0 on success, negative on failure 2512 **/ 2513 int igb_setup_tx_resources(struct igb_ring *tx_ring) 2514 { 2515 struct device *dev = tx_ring->dev; 2516 int orig_node = dev_to_node(dev); 2517 int size; 2518 2519 size = sizeof(struct igb_tx_buffer) * tx_ring->count; 2520 tx_ring->tx_buffer_info = vzalloc_node(size, tx_ring->numa_node); 2521 if (!tx_ring->tx_buffer_info) 2522 tx_ring->tx_buffer_info = vzalloc(size); 2523 if (!tx_ring->tx_buffer_info) 2524 goto err; 2525 2526 /* round up to nearest 4K */ 2527 tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc); 2528 tx_ring->size = ALIGN(tx_ring->size, 4096); 2529 2530 set_dev_node(dev, tx_ring->numa_node); 2531 tx_ring->desc = dma_alloc_coherent(dev, 2532 tx_ring->size, 2533 &tx_ring->dma, 2534 GFP_KERNEL); 2535 set_dev_node(dev, orig_node); 2536 if (!tx_ring->desc) 2537 tx_ring->desc = dma_alloc_coherent(dev, 2538 tx_ring->size, 2539 &tx_ring->dma, 2540 GFP_KERNEL); 2541 2542 if (!tx_ring->desc) 2543 goto err; 2544 2545 tx_ring->next_to_use = 0; 2546 tx_ring->next_to_clean = 0; 2547 2548 return 0; 2549 2550 err: 2551 vfree(tx_ring->tx_buffer_info); 2552 dev_err(dev, 2553 "Unable to allocate memory for the transmit descriptor ring\n"); 2554 return -ENOMEM; 2555 } 2556 2557 /** 2558 * igb_setup_all_tx_resources - wrapper to allocate Tx resources 2559 * (Descriptors) for all queues 2560 * @adapter: board private structure 2561 * 2562 * Return 0 on success, negative on failure 2563 **/ 2564 static int igb_setup_all_tx_resources(struct igb_adapter *adapter) 2565 { 2566 struct pci_dev *pdev = adapter->pdev; 2567 int i, err = 0; 2568 2569 for (i = 0; i < adapter->num_tx_queues; i++) { 2570 err = igb_setup_tx_resources(adapter->tx_ring[i]); 2571 if (err) { 2572 dev_err(&pdev->dev, 2573 "Allocation for Tx Queue %u failed\n", i); 2574 for (i--; i >= 0; i--) 2575 igb_free_tx_resources(adapter->tx_ring[i]); 2576 break; 2577 } 2578 } 2579 2580 return err; 2581 } 2582 2583 /** 2584 * igb_setup_tctl - configure the transmit control registers 2585 * @adapter: Board private structure 2586 **/ 2587 void igb_setup_tctl(struct igb_adapter *adapter) 2588 { 2589 struct e1000_hw *hw = &adapter->hw; 2590 u32 tctl; 2591 2592 /* disable queue 0 which is enabled by default on 82575 and 82576 */ 2593 wr32(E1000_TXDCTL(0), 0); 2594 2595 /* Program the Transmit Control Register */ 2596 tctl = rd32(E1000_TCTL); 2597 tctl &= ~E1000_TCTL_CT; 2598 tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC | 2599 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT); 2600 2601 igb_config_collision_dist(hw); 2602 2603 /* Enable transmits */ 2604 tctl |= E1000_TCTL_EN; 2605 2606 wr32(E1000_TCTL, tctl); 2607 } 2608 2609 /** 2610 * igb_configure_tx_ring - Configure transmit ring after Reset 2611 * @adapter: board private structure 2612 * @ring: tx ring to configure 2613 * 2614 * Configure a transmit ring after a reset. 2615 **/ 2616 void igb_configure_tx_ring(struct igb_adapter *adapter, 2617 struct igb_ring *ring) 2618 { 2619 struct e1000_hw *hw = &adapter->hw; 2620 u32 txdctl = 0; 2621 u64 tdba = ring->dma; 2622 int reg_idx = ring->reg_idx; 2623 2624 /* disable the queue */ 2625 wr32(E1000_TXDCTL(reg_idx), 0); 2626 wrfl(); 2627 mdelay(10); 2628 2629 wr32(E1000_TDLEN(reg_idx), 2630 ring->count * sizeof(union e1000_adv_tx_desc)); 2631 wr32(E1000_TDBAL(reg_idx), 2632 tdba & 0x00000000ffffffffULL); 2633 wr32(E1000_TDBAH(reg_idx), tdba >> 32); 2634 2635 ring->tail = hw->hw_addr + E1000_TDT(reg_idx); 2636 wr32(E1000_TDH(reg_idx), 0); 2637 writel(0, ring->tail); 2638 2639 txdctl |= IGB_TX_PTHRESH; 2640 txdctl |= IGB_TX_HTHRESH << 8; 2641 txdctl |= IGB_TX_WTHRESH << 16; 2642 2643 txdctl |= E1000_TXDCTL_QUEUE_ENABLE; 2644 wr32(E1000_TXDCTL(reg_idx), txdctl); 2645 2646 netdev_tx_reset_queue(txring_txq(ring)); 2647 } 2648 2649 /** 2650 * igb_configure_tx - Configure transmit Unit after Reset 2651 * @adapter: board private structure 2652 * 2653 * Configure the Tx unit of the MAC after a reset. 2654 **/ 2655 static void igb_configure_tx(struct igb_adapter *adapter) 2656 { 2657 int i; 2658 2659 for (i = 0; i < adapter->num_tx_queues; i++) 2660 igb_configure_tx_ring(adapter, adapter->tx_ring[i]); 2661 } 2662 2663 /** 2664 * igb_setup_rx_resources - allocate Rx resources (Descriptors) 2665 * @rx_ring: rx descriptor ring (for a specific queue) to setup 2666 * 2667 * Returns 0 on success, negative on failure 2668 **/ 2669 int igb_setup_rx_resources(struct igb_ring *rx_ring) 2670 { 2671 struct device *dev = rx_ring->dev; 2672 int orig_node = dev_to_node(dev); 2673 int size, desc_len; 2674 2675 size = sizeof(struct igb_rx_buffer) * rx_ring->count; 2676 rx_ring->rx_buffer_info = vzalloc_node(size, rx_ring->numa_node); 2677 if (!rx_ring->rx_buffer_info) 2678 rx_ring->rx_buffer_info = vzalloc(size); 2679 if (!rx_ring->rx_buffer_info) 2680 goto err; 2681 2682 desc_len = sizeof(union e1000_adv_rx_desc); 2683 2684 /* Round up to nearest 4K */ 2685 rx_ring->size = rx_ring->count * desc_len; 2686 rx_ring->size = ALIGN(rx_ring->size, 4096); 2687 2688 set_dev_node(dev, rx_ring->numa_node); 2689 rx_ring->desc = dma_alloc_coherent(dev, 2690 rx_ring->size, 2691 &rx_ring->dma, 2692 GFP_KERNEL); 2693 set_dev_node(dev, orig_node); 2694 if (!rx_ring->desc) 2695 rx_ring->desc = dma_alloc_coherent(dev, 2696 rx_ring->size, 2697 &rx_ring->dma, 2698 GFP_KERNEL); 2699 2700 if (!rx_ring->desc) 2701 goto err; 2702 2703 rx_ring->next_to_clean = 0; 2704 rx_ring->next_to_use = 0; 2705 2706 return 0; 2707 2708 err: 2709 vfree(rx_ring->rx_buffer_info); 2710 rx_ring->rx_buffer_info = NULL; 2711 dev_err(dev, "Unable to allocate memory for the receive descriptor" 2712 " ring\n"); 2713 return -ENOMEM; 2714 } 2715 2716 /** 2717 * igb_setup_all_rx_resources - wrapper to allocate Rx resources 2718 * (Descriptors) for all queues 2719 * @adapter: board private structure 2720 * 2721 * Return 0 on success, negative on failure 2722 **/ 2723 static int igb_setup_all_rx_resources(struct igb_adapter *adapter) 2724 { 2725 struct pci_dev *pdev = adapter->pdev; 2726 int i, err = 0; 2727 2728 for (i = 0; i < adapter->num_rx_queues; i++) { 2729 err = igb_setup_rx_resources(adapter->rx_ring[i]); 2730 if (err) { 2731 dev_err(&pdev->dev, 2732 "Allocation for Rx Queue %u failed\n", i); 2733 for (i--; i >= 0; i--) 2734 igb_free_rx_resources(adapter->rx_ring[i]); 2735 break; 2736 } 2737 } 2738 2739 return err; 2740 } 2741 2742 /** 2743 * igb_setup_mrqc - configure the multiple receive queue control registers 2744 * @adapter: Board private structure 2745 **/ 2746 static void igb_setup_mrqc(struct igb_adapter *adapter) 2747 { 2748 struct e1000_hw *hw = &adapter->hw; 2749 u32 mrqc, rxcsum; 2750 u32 j, num_rx_queues, shift = 0, shift2 = 0; 2751 union e1000_reta { 2752 u32 dword; 2753 u8 bytes[4]; 2754 } reta; 2755 static const u8 rsshash[40] = { 2756 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67, 2757 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb, 2758 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 2759 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa }; 2760 2761 /* Fill out hash function seeds */ 2762 for (j = 0; j < 10; j++) { 2763 u32 rsskey = rsshash[(j * 4)]; 2764 rsskey |= rsshash[(j * 4) + 1] << 8; 2765 rsskey |= rsshash[(j * 4) + 2] << 16; 2766 rsskey |= rsshash[(j * 4) + 3] << 24; 2767 array_wr32(E1000_RSSRK(0), j, rsskey); 2768 } 2769 2770 num_rx_queues = adapter->rss_queues; 2771 2772 if (adapter->vfs_allocated_count) { 2773 /* 82575 and 82576 supports 2 RSS queues for VMDq */ 2774 switch (hw->mac.type) { 2775 case e1000_i350: 2776 case e1000_82580: 2777 num_rx_queues = 1; 2778 shift = 0; 2779 break; 2780 case e1000_82576: 2781 shift = 3; 2782 num_rx_queues = 2; 2783 break; 2784 case e1000_82575: 2785 shift = 2; 2786 shift2 = 6; 2787 default: 2788 break; 2789 } 2790 } else { 2791 if (hw->mac.type == e1000_82575) 2792 shift = 6; 2793 } 2794 2795 for (j = 0; j < (32 * 4); j++) { 2796 reta.bytes[j & 3] = (j % num_rx_queues) << shift; 2797 if (shift2) 2798 reta.bytes[j & 3] |= num_rx_queues << shift2; 2799 if ((j & 3) == 3) 2800 wr32(E1000_RETA(j >> 2), reta.dword); 2801 } 2802 2803 /* 2804 * Disable raw packet checksumming so that RSS hash is placed in 2805 * descriptor on writeback. No need to enable TCP/UDP/IP checksum 2806 * offloads as they are enabled by default 2807 */ 2808 rxcsum = rd32(E1000_RXCSUM); 2809 rxcsum |= E1000_RXCSUM_PCSD; 2810 2811 if (adapter->hw.mac.type >= e1000_82576) 2812 /* Enable Receive Checksum Offload for SCTP */ 2813 rxcsum |= E1000_RXCSUM_CRCOFL; 2814 2815 /* Don't need to set TUOFL or IPOFL, they default to 1 */ 2816 wr32(E1000_RXCSUM, rxcsum); 2817 2818 /* If VMDq is enabled then we set the appropriate mode for that, else 2819 * we default to RSS so that an RSS hash is calculated per packet even 2820 * if we are only using one queue */ 2821 if (adapter->vfs_allocated_count) { 2822 if (hw->mac.type > e1000_82575) { 2823 /* Set the default pool for the PF's first queue */ 2824 u32 vtctl = rd32(E1000_VT_CTL); 2825 vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK | 2826 E1000_VT_CTL_DISABLE_DEF_POOL); 2827 vtctl |= adapter->vfs_allocated_count << 2828 E1000_VT_CTL_DEFAULT_POOL_SHIFT; 2829 wr32(E1000_VT_CTL, vtctl); 2830 } 2831 if (adapter->rss_queues > 1) 2832 mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q; 2833 else 2834 mrqc = E1000_MRQC_ENABLE_VMDQ; 2835 } else { 2836 mrqc = E1000_MRQC_ENABLE_RSS_4Q; 2837 } 2838 igb_vmm_control(adapter); 2839 2840 /* 2841 * Generate RSS hash based on TCP port numbers and/or 2842 * IPv4/v6 src and dst addresses since UDP cannot be 2843 * hashed reliably due to IP fragmentation 2844 */ 2845 mrqc |= E1000_MRQC_RSS_FIELD_IPV4 | 2846 E1000_MRQC_RSS_FIELD_IPV4_TCP | 2847 E1000_MRQC_RSS_FIELD_IPV6 | 2848 E1000_MRQC_RSS_FIELD_IPV6_TCP | 2849 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX; 2850 2851 wr32(E1000_MRQC, mrqc); 2852 } 2853 2854 /** 2855 * igb_setup_rctl - configure the receive control registers 2856 * @adapter: Board private structure 2857 **/ 2858 void igb_setup_rctl(struct igb_adapter *adapter) 2859 { 2860 struct e1000_hw *hw = &adapter->hw; 2861 u32 rctl; 2862 2863 rctl = rd32(E1000_RCTL); 2864 2865 rctl &= ~(3 << E1000_RCTL_MO_SHIFT); 2866 rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC); 2867 2868 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF | 2869 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT); 2870 2871 /* 2872 * enable stripping of CRC. It's unlikely this will break BMC 2873 * redirection as it did with e1000. Newer features require 2874 * that the HW strips the CRC. 2875 */ 2876 rctl |= E1000_RCTL_SECRC; 2877 2878 /* disable store bad packets and clear size bits. */ 2879 rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256); 2880 2881 /* enable LPE to prevent packets larger than max_frame_size */ 2882 rctl |= E1000_RCTL_LPE; 2883 2884 /* disable queue 0 to prevent tail write w/o re-config */ 2885 wr32(E1000_RXDCTL(0), 0); 2886 2887 /* Attention!!! For SR-IOV PF driver operations you must enable 2888 * queue drop for all VF and PF queues to prevent head of line blocking 2889 * if an un-trusted VF does not provide descriptors to hardware. 2890 */ 2891 if (adapter->vfs_allocated_count) { 2892 /* set all queue drop enable bits */ 2893 wr32(E1000_QDE, ALL_QUEUES); 2894 } 2895 2896 /* This is useful for sniffing bad packets. */ 2897 if (adapter->netdev->features & NETIF_F_RXALL) { 2898 /* UPE and MPE will be handled by normal PROMISC logic 2899 * in e1000e_set_rx_mode */ 2900 rctl |= (E1000_RCTL_SBP | /* Receive bad packets */ 2901 E1000_RCTL_BAM | /* RX All Bcast Pkts */ 2902 E1000_RCTL_PMCF); /* RX All MAC Ctrl Pkts */ 2903 2904 rctl &= ~(E1000_RCTL_VFE | /* Disable VLAN filter */ 2905 E1000_RCTL_DPF | /* Allow filtered pause */ 2906 E1000_RCTL_CFIEN); /* Dis VLAN CFIEN Filter */ 2907 /* Do not mess with E1000_CTRL_VME, it affects transmit as well, 2908 * and that breaks VLANs. 2909 */ 2910 } 2911 2912 wr32(E1000_RCTL, rctl); 2913 } 2914 2915 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size, 2916 int vfn) 2917 { 2918 struct e1000_hw *hw = &adapter->hw; 2919 u32 vmolr; 2920 2921 /* if it isn't the PF check to see if VFs are enabled and 2922 * increase the size to support vlan tags */ 2923 if (vfn < adapter->vfs_allocated_count && 2924 adapter->vf_data[vfn].vlans_enabled) 2925 size += VLAN_TAG_SIZE; 2926 2927 vmolr = rd32(E1000_VMOLR(vfn)); 2928 vmolr &= ~E1000_VMOLR_RLPML_MASK; 2929 vmolr |= size | E1000_VMOLR_LPE; 2930 wr32(E1000_VMOLR(vfn), vmolr); 2931 2932 return 0; 2933 } 2934 2935 /** 2936 * igb_rlpml_set - set maximum receive packet size 2937 * @adapter: board private structure 2938 * 2939 * Configure maximum receivable packet size. 2940 **/ 2941 static void igb_rlpml_set(struct igb_adapter *adapter) 2942 { 2943 u32 max_frame_size = adapter->max_frame_size; 2944 struct e1000_hw *hw = &adapter->hw; 2945 u16 pf_id = adapter->vfs_allocated_count; 2946 2947 if (pf_id) { 2948 igb_set_vf_rlpml(adapter, max_frame_size, pf_id); 2949 /* 2950 * If we're in VMDQ or SR-IOV mode, then set global RLPML 2951 * to our max jumbo frame size, in case we need to enable 2952 * jumbo frames on one of the rings later. 2953 * This will not pass over-length frames into the default 2954 * queue because it's gated by the VMOLR.RLPML. 2955 */ 2956 max_frame_size = MAX_JUMBO_FRAME_SIZE; 2957 } 2958 2959 wr32(E1000_RLPML, max_frame_size); 2960 } 2961 2962 static inline void igb_set_vmolr(struct igb_adapter *adapter, 2963 int vfn, bool aupe) 2964 { 2965 struct e1000_hw *hw = &adapter->hw; 2966 u32 vmolr; 2967 2968 /* 2969 * This register exists only on 82576 and newer so if we are older then 2970 * we should exit and do nothing 2971 */ 2972 if (hw->mac.type < e1000_82576) 2973 return; 2974 2975 vmolr = rd32(E1000_VMOLR(vfn)); 2976 vmolr |= E1000_VMOLR_STRVLAN; /* Strip vlan tags */ 2977 if (aupe) 2978 vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */ 2979 else 2980 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */ 2981 2982 /* clear all bits that might not be set */ 2983 vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE); 2984 2985 if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count) 2986 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */ 2987 /* 2988 * for VMDq only allow the VFs and pool 0 to accept broadcast and 2989 * multicast packets 2990 */ 2991 if (vfn <= adapter->vfs_allocated_count) 2992 vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */ 2993 2994 wr32(E1000_VMOLR(vfn), vmolr); 2995 } 2996 2997 /** 2998 * igb_configure_rx_ring - Configure a receive ring after Reset 2999 * @adapter: board private structure 3000 * @ring: receive ring to be configured 3001 * 3002 * Configure the Rx unit of the MAC after a reset. 3003 **/ 3004 void igb_configure_rx_ring(struct igb_adapter *adapter, 3005 struct igb_ring *ring) 3006 { 3007 struct e1000_hw *hw = &adapter->hw; 3008 u64 rdba = ring->dma; 3009 int reg_idx = ring->reg_idx; 3010 u32 srrctl = 0, rxdctl = 0; 3011 3012 /* disable the queue */ 3013 wr32(E1000_RXDCTL(reg_idx), 0); 3014 3015 /* Set DMA base address registers */ 3016 wr32(E1000_RDBAL(reg_idx), 3017 rdba & 0x00000000ffffffffULL); 3018 wr32(E1000_RDBAH(reg_idx), rdba >> 32); 3019 wr32(E1000_RDLEN(reg_idx), 3020 ring->count * sizeof(union e1000_adv_rx_desc)); 3021 3022 /* initialize head and tail */ 3023 ring->tail = hw->hw_addr + E1000_RDT(reg_idx); 3024 wr32(E1000_RDH(reg_idx), 0); 3025 writel(0, ring->tail); 3026 3027 /* set descriptor configuration */ 3028 srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT; 3029 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384 3030 srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT; 3031 #else 3032 srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT; 3033 #endif 3034 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS; 3035 if (hw->mac.type >= e1000_82580) 3036 srrctl |= E1000_SRRCTL_TIMESTAMP; 3037 /* Only set Drop Enable if we are supporting multiple queues */ 3038 if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1) 3039 srrctl |= E1000_SRRCTL_DROP_EN; 3040 3041 wr32(E1000_SRRCTL(reg_idx), srrctl); 3042 3043 /* set filtering for VMDQ pools */ 3044 igb_set_vmolr(adapter, reg_idx & 0x7, true); 3045 3046 rxdctl |= IGB_RX_PTHRESH; 3047 rxdctl |= IGB_RX_HTHRESH << 8; 3048 rxdctl |= IGB_RX_WTHRESH << 16; 3049 3050 /* enable receive descriptor fetching */ 3051 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE; 3052 wr32(E1000_RXDCTL(reg_idx), rxdctl); 3053 } 3054 3055 /** 3056 * igb_configure_rx - Configure receive Unit after Reset 3057 * @adapter: board private structure 3058 * 3059 * Configure the Rx unit of the MAC after a reset. 3060 **/ 3061 static void igb_configure_rx(struct igb_adapter *adapter) 3062 { 3063 int i; 3064 3065 /* set UTA to appropriate mode */ 3066 igb_set_uta(adapter); 3067 3068 /* set the correct pool for the PF default MAC address in entry 0 */ 3069 igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0, 3070 adapter->vfs_allocated_count); 3071 3072 /* Setup the HW Rx Head and Tail Descriptor Pointers and 3073 * the Base and Length of the Rx Descriptor Ring */ 3074 for (i = 0; i < adapter->num_rx_queues; i++) 3075 igb_configure_rx_ring(adapter, adapter->rx_ring[i]); 3076 } 3077 3078 /** 3079 * igb_free_tx_resources - Free Tx Resources per Queue 3080 * @tx_ring: Tx descriptor ring for a specific queue 3081 * 3082 * Free all transmit software resources 3083 **/ 3084 void igb_free_tx_resources(struct igb_ring *tx_ring) 3085 { 3086 igb_clean_tx_ring(tx_ring); 3087 3088 vfree(tx_ring->tx_buffer_info); 3089 tx_ring->tx_buffer_info = NULL; 3090 3091 /* if not set, then don't free */ 3092 if (!tx_ring->desc) 3093 return; 3094 3095 dma_free_coherent(tx_ring->dev, tx_ring->size, 3096 tx_ring->desc, tx_ring->dma); 3097 3098 tx_ring->desc = NULL; 3099 } 3100 3101 /** 3102 * igb_free_all_tx_resources - Free Tx Resources for All Queues 3103 * @adapter: board private structure 3104 * 3105 * Free all transmit software resources 3106 **/ 3107 static void igb_free_all_tx_resources(struct igb_adapter *adapter) 3108 { 3109 int i; 3110 3111 for (i = 0; i < adapter->num_tx_queues; i++) 3112 igb_free_tx_resources(adapter->tx_ring[i]); 3113 } 3114 3115 void igb_unmap_and_free_tx_resource(struct igb_ring *ring, 3116 struct igb_tx_buffer *tx_buffer) 3117 { 3118 if (tx_buffer->skb) { 3119 dev_kfree_skb_any(tx_buffer->skb); 3120 if (tx_buffer->dma) 3121 dma_unmap_single(ring->dev, 3122 tx_buffer->dma, 3123 tx_buffer->length, 3124 DMA_TO_DEVICE); 3125 } else if (tx_buffer->dma) { 3126 dma_unmap_page(ring->dev, 3127 tx_buffer->dma, 3128 tx_buffer->length, 3129 DMA_TO_DEVICE); 3130 } 3131 tx_buffer->next_to_watch = NULL; 3132 tx_buffer->skb = NULL; 3133 tx_buffer->dma = 0; 3134 /* buffer_info must be completely set up in the transmit path */ 3135 } 3136 3137 /** 3138 * igb_clean_tx_ring - Free Tx Buffers 3139 * @tx_ring: ring to be cleaned 3140 **/ 3141 static void igb_clean_tx_ring(struct igb_ring *tx_ring) 3142 { 3143 struct igb_tx_buffer *buffer_info; 3144 unsigned long size; 3145 u16 i; 3146 3147 if (!tx_ring->tx_buffer_info) 3148 return; 3149 /* Free all the Tx ring sk_buffs */ 3150 3151 for (i = 0; i < tx_ring->count; i++) { 3152 buffer_info = &tx_ring->tx_buffer_info[i]; 3153 igb_unmap_and_free_tx_resource(tx_ring, buffer_info); 3154 } 3155 3156 size = sizeof(struct igb_tx_buffer) * tx_ring->count; 3157 memset(tx_ring->tx_buffer_info, 0, size); 3158 3159 /* Zero out the descriptor ring */ 3160 memset(tx_ring->desc, 0, tx_ring->size); 3161 3162 tx_ring->next_to_use = 0; 3163 tx_ring->next_to_clean = 0; 3164 } 3165 3166 /** 3167 * igb_clean_all_tx_rings - Free Tx Buffers for all queues 3168 * @adapter: board private structure 3169 **/ 3170 static void igb_clean_all_tx_rings(struct igb_adapter *adapter) 3171 { 3172 int i; 3173 3174 for (i = 0; i < adapter->num_tx_queues; i++) 3175 igb_clean_tx_ring(adapter->tx_ring[i]); 3176 } 3177 3178 /** 3179 * igb_free_rx_resources - Free Rx Resources 3180 * @rx_ring: ring to clean the resources from 3181 * 3182 * Free all receive software resources 3183 **/ 3184 void igb_free_rx_resources(struct igb_ring *rx_ring) 3185 { 3186 igb_clean_rx_ring(rx_ring); 3187 3188 vfree(rx_ring->rx_buffer_info); 3189 rx_ring->rx_buffer_info = NULL; 3190 3191 /* if not set, then don't free */ 3192 if (!rx_ring->desc) 3193 return; 3194 3195 dma_free_coherent(rx_ring->dev, rx_ring->size, 3196 rx_ring->desc, rx_ring->dma); 3197 3198 rx_ring->desc = NULL; 3199 } 3200 3201 /** 3202 * igb_free_all_rx_resources - Free Rx Resources for All Queues 3203 * @adapter: board private structure 3204 * 3205 * Free all receive software resources 3206 **/ 3207 static void igb_free_all_rx_resources(struct igb_adapter *adapter) 3208 { 3209 int i; 3210 3211 for (i = 0; i < adapter->num_rx_queues; i++) 3212 igb_free_rx_resources(adapter->rx_ring[i]); 3213 } 3214 3215 /** 3216 * igb_clean_rx_ring - Free Rx Buffers per Queue 3217 * @rx_ring: ring to free buffers from 3218 **/ 3219 static void igb_clean_rx_ring(struct igb_ring *rx_ring) 3220 { 3221 unsigned long size; 3222 u16 i; 3223 3224 if (!rx_ring->rx_buffer_info) 3225 return; 3226 3227 /* Free all the Rx ring sk_buffs */ 3228 for (i = 0; i < rx_ring->count; i++) { 3229 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i]; 3230 if (buffer_info->dma) { 3231 dma_unmap_single(rx_ring->dev, 3232 buffer_info->dma, 3233 IGB_RX_HDR_LEN, 3234 DMA_FROM_DEVICE); 3235 buffer_info->dma = 0; 3236 } 3237 3238 if (buffer_info->skb) { 3239 dev_kfree_skb(buffer_info->skb); 3240 buffer_info->skb = NULL; 3241 } 3242 if (buffer_info->page_dma) { 3243 dma_unmap_page(rx_ring->dev, 3244 buffer_info->page_dma, 3245 PAGE_SIZE / 2, 3246 DMA_FROM_DEVICE); 3247 buffer_info->page_dma = 0; 3248 } 3249 if (buffer_info->page) { 3250 put_page(buffer_info->page); 3251 buffer_info->page = NULL; 3252 buffer_info->page_offset = 0; 3253 } 3254 } 3255 3256 size = sizeof(struct igb_rx_buffer) * rx_ring->count; 3257 memset(rx_ring->rx_buffer_info, 0, size); 3258 3259 /* Zero out the descriptor ring */ 3260 memset(rx_ring->desc, 0, rx_ring->size); 3261 3262 rx_ring->next_to_clean = 0; 3263 rx_ring->next_to_use = 0; 3264 } 3265 3266 /** 3267 * igb_clean_all_rx_rings - Free Rx Buffers for all queues 3268 * @adapter: board private structure 3269 **/ 3270 static void igb_clean_all_rx_rings(struct igb_adapter *adapter) 3271 { 3272 int i; 3273 3274 for (i = 0; i < adapter->num_rx_queues; i++) 3275 igb_clean_rx_ring(adapter->rx_ring[i]); 3276 } 3277 3278 /** 3279 * igb_set_mac - Change the Ethernet Address of the NIC 3280 * @netdev: network interface device structure 3281 * @p: pointer to an address structure 3282 * 3283 * Returns 0 on success, negative on failure 3284 **/ 3285 static int igb_set_mac(struct net_device *netdev, void *p) 3286 { 3287 struct igb_adapter *adapter = netdev_priv(netdev); 3288 struct e1000_hw *hw = &adapter->hw; 3289 struct sockaddr *addr = p; 3290 3291 if (!is_valid_ether_addr(addr->sa_data)) 3292 return -EADDRNOTAVAIL; 3293 3294 memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); 3295 memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len); 3296 3297 /* set the correct pool for the new PF MAC address in entry 0 */ 3298 igb_rar_set_qsel(adapter, hw->mac.addr, 0, 3299 adapter->vfs_allocated_count); 3300 3301 return 0; 3302 } 3303 3304 /** 3305 * igb_write_mc_addr_list - write multicast addresses to MTA 3306 * @netdev: network interface device structure 3307 * 3308 * Writes multicast address list to the MTA hash table. 3309 * Returns: -ENOMEM on failure 3310 * 0 on no addresses written 3311 * X on writing X addresses to MTA 3312 **/ 3313 static int igb_write_mc_addr_list(struct net_device *netdev) 3314 { 3315 struct igb_adapter *adapter = netdev_priv(netdev); 3316 struct e1000_hw *hw = &adapter->hw; 3317 struct netdev_hw_addr *ha; 3318 u8 *mta_list; 3319 int i; 3320 3321 if (netdev_mc_empty(netdev)) { 3322 /* nothing to program, so clear mc list */ 3323 igb_update_mc_addr_list(hw, NULL, 0); 3324 igb_restore_vf_multicasts(adapter); 3325 return 0; 3326 } 3327 3328 mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC); 3329 if (!mta_list) 3330 return -ENOMEM; 3331 3332 /* The shared function expects a packed array of only addresses. */ 3333 i = 0; 3334 netdev_for_each_mc_addr(ha, netdev) 3335 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN); 3336 3337 igb_update_mc_addr_list(hw, mta_list, i); 3338 kfree(mta_list); 3339 3340 return netdev_mc_count(netdev); 3341 } 3342 3343 /** 3344 * igb_write_uc_addr_list - write unicast addresses to RAR table 3345 * @netdev: network interface device structure 3346 * 3347 * Writes unicast address list to the RAR table. 3348 * Returns: -ENOMEM on failure/insufficient address space 3349 * 0 on no addresses written 3350 * X on writing X addresses to the RAR table 3351 **/ 3352 static int igb_write_uc_addr_list(struct net_device *netdev) 3353 { 3354 struct igb_adapter *adapter = netdev_priv(netdev); 3355 struct e1000_hw *hw = &adapter->hw; 3356 unsigned int vfn = adapter->vfs_allocated_count; 3357 unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1); 3358 int count = 0; 3359 3360 /* return ENOMEM indicating insufficient memory for addresses */ 3361 if (netdev_uc_count(netdev) > rar_entries) 3362 return -ENOMEM; 3363 3364 if (!netdev_uc_empty(netdev) && rar_entries) { 3365 struct netdev_hw_addr *ha; 3366 3367 netdev_for_each_uc_addr(ha, netdev) { 3368 if (!rar_entries) 3369 break; 3370 igb_rar_set_qsel(adapter, ha->addr, 3371 rar_entries--, 3372 vfn); 3373 count++; 3374 } 3375 } 3376 /* write the addresses in reverse order to avoid write combining */ 3377 for (; rar_entries > 0 ; rar_entries--) { 3378 wr32(E1000_RAH(rar_entries), 0); 3379 wr32(E1000_RAL(rar_entries), 0); 3380 } 3381 wrfl(); 3382 3383 return count; 3384 } 3385 3386 /** 3387 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set 3388 * @netdev: network interface device structure 3389 * 3390 * The set_rx_mode entry point is called whenever the unicast or multicast 3391 * address lists or the network interface flags are updated. This routine is 3392 * responsible for configuring the hardware for proper unicast, multicast, 3393 * promiscuous mode, and all-multi behavior. 3394 **/ 3395 static void igb_set_rx_mode(struct net_device *netdev) 3396 { 3397 struct igb_adapter *adapter = netdev_priv(netdev); 3398 struct e1000_hw *hw = &adapter->hw; 3399 unsigned int vfn = adapter->vfs_allocated_count; 3400 u32 rctl, vmolr = 0; 3401 int count; 3402 3403 /* Check for Promiscuous and All Multicast modes */ 3404 rctl = rd32(E1000_RCTL); 3405 3406 /* clear the effected bits */ 3407 rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE); 3408 3409 if (netdev->flags & IFF_PROMISC) { 3410 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE); 3411 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME); 3412 } else { 3413 if (netdev->flags & IFF_ALLMULTI) { 3414 rctl |= E1000_RCTL_MPE; 3415 vmolr |= E1000_VMOLR_MPME; 3416 } else { 3417 /* 3418 * Write addresses to the MTA, if the attempt fails 3419 * then we should just turn on promiscuous mode so 3420 * that we can at least receive multicast traffic 3421 */ 3422 count = igb_write_mc_addr_list(netdev); 3423 if (count < 0) { 3424 rctl |= E1000_RCTL_MPE; 3425 vmolr |= E1000_VMOLR_MPME; 3426 } else if (count) { 3427 vmolr |= E1000_VMOLR_ROMPE; 3428 } 3429 } 3430 /* 3431 * Write addresses to available RAR registers, if there is not 3432 * sufficient space to store all the addresses then enable 3433 * unicast promiscuous mode 3434 */ 3435 count = igb_write_uc_addr_list(netdev); 3436 if (count < 0) { 3437 rctl |= E1000_RCTL_UPE; 3438 vmolr |= E1000_VMOLR_ROPE; 3439 } 3440 rctl |= E1000_RCTL_VFE; 3441 } 3442 wr32(E1000_RCTL, rctl); 3443 3444 /* 3445 * In order to support SR-IOV and eventually VMDq it is necessary to set 3446 * the VMOLR to enable the appropriate modes. Without this workaround 3447 * we will have issues with VLAN tag stripping not being done for frames 3448 * that are only arriving because we are the default pool 3449 */ 3450 if (hw->mac.type < e1000_82576) 3451 return; 3452 3453 vmolr |= rd32(E1000_VMOLR(vfn)) & 3454 ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE); 3455 wr32(E1000_VMOLR(vfn), vmolr); 3456 igb_restore_vf_multicasts(adapter); 3457 } 3458 3459 static void igb_check_wvbr(struct igb_adapter *adapter) 3460 { 3461 struct e1000_hw *hw = &adapter->hw; 3462 u32 wvbr = 0; 3463 3464 switch (hw->mac.type) { 3465 case e1000_82576: 3466 case e1000_i350: 3467 if (!(wvbr = rd32(E1000_WVBR))) 3468 return; 3469 break; 3470 default: 3471 break; 3472 } 3473 3474 adapter->wvbr |= wvbr; 3475 } 3476 3477 #define IGB_STAGGERED_QUEUE_OFFSET 8 3478 3479 static void igb_spoof_check(struct igb_adapter *adapter) 3480 { 3481 int j; 3482 3483 if (!adapter->wvbr) 3484 return; 3485 3486 for(j = 0; j < adapter->vfs_allocated_count; j++) { 3487 if (adapter->wvbr & (1 << j) || 3488 adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) { 3489 dev_warn(&adapter->pdev->dev, 3490 "Spoof event(s) detected on VF %d\n", j); 3491 adapter->wvbr &= 3492 ~((1 << j) | 3493 (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))); 3494 } 3495 } 3496 } 3497 3498 /* Need to wait a few seconds after link up to get diagnostic information from 3499 * the phy */ 3500 static void igb_update_phy_info(unsigned long data) 3501 { 3502 struct igb_adapter *adapter = (struct igb_adapter *) data; 3503 igb_get_phy_info(&adapter->hw); 3504 } 3505 3506 /** 3507 * igb_has_link - check shared code for link and determine up/down 3508 * @adapter: pointer to driver private info 3509 **/ 3510 bool igb_has_link(struct igb_adapter *adapter) 3511 { 3512 struct e1000_hw *hw = &adapter->hw; 3513 bool link_active = false; 3514 s32 ret_val = 0; 3515 3516 /* get_link_status is set on LSC (link status) interrupt or 3517 * rx sequence error interrupt. get_link_status will stay 3518 * false until the e1000_check_for_link establishes link 3519 * for copper adapters ONLY 3520 */ 3521 switch (hw->phy.media_type) { 3522 case e1000_media_type_copper: 3523 if (hw->mac.get_link_status) { 3524 ret_val = hw->mac.ops.check_for_link(hw); 3525 link_active = !hw->mac.get_link_status; 3526 } else { 3527 link_active = true; 3528 } 3529 break; 3530 case e1000_media_type_internal_serdes: 3531 ret_val = hw->mac.ops.check_for_link(hw); 3532 link_active = hw->mac.serdes_has_link; 3533 break; 3534 default: 3535 case e1000_media_type_unknown: 3536 break; 3537 } 3538 3539 return link_active; 3540 } 3541 3542 static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event) 3543 { 3544 bool ret = false; 3545 u32 ctrl_ext, thstat; 3546 3547 /* check for thermal sensor event on i350, copper only */ 3548 if (hw->mac.type == e1000_i350) { 3549 thstat = rd32(E1000_THSTAT); 3550 ctrl_ext = rd32(E1000_CTRL_EXT); 3551 3552 if ((hw->phy.media_type == e1000_media_type_copper) && 3553 !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) { 3554 ret = !!(thstat & event); 3555 } 3556 } 3557 3558 return ret; 3559 } 3560 3561 /** 3562 * igb_watchdog - Timer Call-back 3563 * @data: pointer to adapter cast into an unsigned long 3564 **/ 3565 static void igb_watchdog(unsigned long data) 3566 { 3567 struct igb_adapter *adapter = (struct igb_adapter *)data; 3568 /* Do the rest outside of interrupt context */ 3569 schedule_work(&adapter->watchdog_task); 3570 } 3571 3572 static void igb_watchdog_task(struct work_struct *work) 3573 { 3574 struct igb_adapter *adapter = container_of(work, 3575 struct igb_adapter, 3576 watchdog_task); 3577 struct e1000_hw *hw = &adapter->hw; 3578 struct net_device *netdev = adapter->netdev; 3579 u32 link; 3580 int i; 3581 3582 link = igb_has_link(adapter); 3583 if (link) { 3584 /* Cancel scheduled suspend requests. */ 3585 pm_runtime_resume(netdev->dev.parent); 3586 3587 if (!netif_carrier_ok(netdev)) { 3588 u32 ctrl; 3589 hw->mac.ops.get_speed_and_duplex(hw, 3590 &adapter->link_speed, 3591 &adapter->link_duplex); 3592 3593 ctrl = rd32(E1000_CTRL); 3594 /* Links status message must follow this format */ 3595 printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s " 3596 "Duplex, Flow Control: %s\n", 3597 netdev->name, 3598 adapter->link_speed, 3599 adapter->link_duplex == FULL_DUPLEX ? 3600 "Full" : "Half", 3601 (ctrl & E1000_CTRL_TFCE) && 3602 (ctrl & E1000_CTRL_RFCE) ? "RX/TX" : 3603 (ctrl & E1000_CTRL_RFCE) ? "RX" : 3604 (ctrl & E1000_CTRL_TFCE) ? "TX" : "None"); 3605 3606 /* check for thermal sensor event */ 3607 if (igb_thermal_sensor_event(hw, 3608 E1000_THSTAT_LINK_THROTTLE)) { 3609 netdev_info(netdev, "The network adapter link " 3610 "speed was downshifted because it " 3611 "overheated\n"); 3612 } 3613 3614 /* adjust timeout factor according to speed/duplex */ 3615 adapter->tx_timeout_factor = 1; 3616 switch (adapter->link_speed) { 3617 case SPEED_10: 3618 adapter->tx_timeout_factor = 14; 3619 break; 3620 case SPEED_100: 3621 /* maybe add some timeout factor ? */ 3622 break; 3623 } 3624 3625 netif_carrier_on(netdev); 3626 3627 igb_ping_all_vfs(adapter); 3628 igb_check_vf_rate_limit(adapter); 3629 3630 /* link state has changed, schedule phy info update */ 3631 if (!test_bit(__IGB_DOWN, &adapter->state)) 3632 mod_timer(&adapter->phy_info_timer, 3633 round_jiffies(jiffies + 2 * HZ)); 3634 } 3635 } else { 3636 if (netif_carrier_ok(netdev)) { 3637 adapter->link_speed = 0; 3638 adapter->link_duplex = 0; 3639 3640 /* check for thermal sensor event */ 3641 if (igb_thermal_sensor_event(hw, 3642 E1000_THSTAT_PWR_DOWN)) { 3643 netdev_err(netdev, "The network adapter was " 3644 "stopped because it overheated\n"); 3645 } 3646 3647 /* Links status message must follow this format */ 3648 printk(KERN_INFO "igb: %s NIC Link is Down\n", 3649 netdev->name); 3650 netif_carrier_off(netdev); 3651 3652 igb_ping_all_vfs(adapter); 3653 3654 /* link state has changed, schedule phy info update */ 3655 if (!test_bit(__IGB_DOWN, &adapter->state)) 3656 mod_timer(&adapter->phy_info_timer, 3657 round_jiffies(jiffies + 2 * HZ)); 3658 3659 pm_schedule_suspend(netdev->dev.parent, 3660 MSEC_PER_SEC * 5); 3661 } 3662 } 3663 3664 spin_lock(&adapter->stats64_lock); 3665 igb_update_stats(adapter, &adapter->stats64); 3666 spin_unlock(&adapter->stats64_lock); 3667 3668 for (i = 0; i < adapter->num_tx_queues; i++) { 3669 struct igb_ring *tx_ring = adapter->tx_ring[i]; 3670 if (!netif_carrier_ok(netdev)) { 3671 /* We've lost link, so the controller stops DMA, 3672 * but we've got queued Tx work that's never going 3673 * to get done, so reset controller to flush Tx. 3674 * (Do the reset outside of interrupt context). */ 3675 if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) { 3676 adapter->tx_timeout_count++; 3677 schedule_work(&adapter->reset_task); 3678 /* return immediately since reset is imminent */ 3679 return; 3680 } 3681 } 3682 3683 /* Force detection of hung controller every watchdog period */ 3684 set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags); 3685 } 3686 3687 /* Cause software interrupt to ensure rx ring is cleaned */ 3688 if (adapter->msix_entries) { 3689 u32 eics = 0; 3690 for (i = 0; i < adapter->num_q_vectors; i++) 3691 eics |= adapter->q_vector[i]->eims_value; 3692 wr32(E1000_EICS, eics); 3693 } else { 3694 wr32(E1000_ICS, E1000_ICS_RXDMT0); 3695 } 3696 3697 igb_spoof_check(adapter); 3698 3699 /* Reset the timer */ 3700 if (!test_bit(__IGB_DOWN, &adapter->state)) 3701 mod_timer(&adapter->watchdog_timer, 3702 round_jiffies(jiffies + 2 * HZ)); 3703 } 3704 3705 enum latency_range { 3706 lowest_latency = 0, 3707 low_latency = 1, 3708 bulk_latency = 2, 3709 latency_invalid = 255 3710 }; 3711 3712 /** 3713 * igb_update_ring_itr - update the dynamic ITR value based on packet size 3714 * 3715 * Stores a new ITR value based on strictly on packet size. This 3716 * algorithm is less sophisticated than that used in igb_update_itr, 3717 * due to the difficulty of synchronizing statistics across multiple 3718 * receive rings. The divisors and thresholds used by this function 3719 * were determined based on theoretical maximum wire speed and testing 3720 * data, in order to minimize response time while increasing bulk 3721 * throughput. 3722 * This functionality is controlled by the InterruptThrottleRate module 3723 * parameter (see igb_param.c) 3724 * NOTE: This function is called only when operating in a multiqueue 3725 * receive environment. 3726 * @q_vector: pointer to q_vector 3727 **/ 3728 static void igb_update_ring_itr(struct igb_q_vector *q_vector) 3729 { 3730 int new_val = q_vector->itr_val; 3731 int avg_wire_size = 0; 3732 struct igb_adapter *adapter = q_vector->adapter; 3733 unsigned int packets; 3734 3735 /* For non-gigabit speeds, just fix the interrupt rate at 4000 3736 * ints/sec - ITR timer value of 120 ticks. 3737 */ 3738 if (adapter->link_speed != SPEED_1000) { 3739 new_val = IGB_4K_ITR; 3740 goto set_itr_val; 3741 } 3742 3743 packets = q_vector->rx.total_packets; 3744 if (packets) 3745 avg_wire_size = q_vector->rx.total_bytes / packets; 3746 3747 packets = q_vector->tx.total_packets; 3748 if (packets) 3749 avg_wire_size = max_t(u32, avg_wire_size, 3750 q_vector->tx.total_bytes / packets); 3751 3752 /* if avg_wire_size isn't set no work was done */ 3753 if (!avg_wire_size) 3754 goto clear_counts; 3755 3756 /* Add 24 bytes to size to account for CRC, preamble, and gap */ 3757 avg_wire_size += 24; 3758 3759 /* Don't starve jumbo frames */ 3760 avg_wire_size = min(avg_wire_size, 3000); 3761 3762 /* Give a little boost to mid-size frames */ 3763 if ((avg_wire_size > 300) && (avg_wire_size < 1200)) 3764 new_val = avg_wire_size / 3; 3765 else 3766 new_val = avg_wire_size / 2; 3767 3768 /* conservative mode (itr 3) eliminates the lowest_latency setting */ 3769 if (new_val < IGB_20K_ITR && 3770 ((q_vector->rx.ring && adapter->rx_itr_setting == 3) || 3771 (!q_vector->rx.ring && adapter->tx_itr_setting == 3))) 3772 new_val = IGB_20K_ITR; 3773 3774 set_itr_val: 3775 if (new_val != q_vector->itr_val) { 3776 q_vector->itr_val = new_val; 3777 q_vector->set_itr = 1; 3778 } 3779 clear_counts: 3780 q_vector->rx.total_bytes = 0; 3781 q_vector->rx.total_packets = 0; 3782 q_vector->tx.total_bytes = 0; 3783 q_vector->tx.total_packets = 0; 3784 } 3785 3786 /** 3787 * igb_update_itr - update the dynamic ITR value based on statistics 3788 * Stores a new ITR value based on packets and byte 3789 * counts during the last interrupt. The advantage of per interrupt 3790 * computation is faster updates and more accurate ITR for the current 3791 * traffic pattern. Constants in this function were computed 3792 * based on theoretical maximum wire speed and thresholds were set based 3793 * on testing data as well as attempting to minimize response time 3794 * while increasing bulk throughput. 3795 * this functionality is controlled by the InterruptThrottleRate module 3796 * parameter (see igb_param.c) 3797 * NOTE: These calculations are only valid when operating in a single- 3798 * queue environment. 3799 * @q_vector: pointer to q_vector 3800 * @ring_container: ring info to update the itr for 3801 **/ 3802 static void igb_update_itr(struct igb_q_vector *q_vector, 3803 struct igb_ring_container *ring_container) 3804 { 3805 unsigned int packets = ring_container->total_packets; 3806 unsigned int bytes = ring_container->total_bytes; 3807 u8 itrval = ring_container->itr; 3808 3809 /* no packets, exit with status unchanged */ 3810 if (packets == 0) 3811 return; 3812 3813 switch (itrval) { 3814 case lowest_latency: 3815 /* handle TSO and jumbo frames */ 3816 if (bytes/packets > 8000) 3817 itrval = bulk_latency; 3818 else if ((packets < 5) && (bytes > 512)) 3819 itrval = low_latency; 3820 break; 3821 case low_latency: /* 50 usec aka 20000 ints/s */ 3822 if (bytes > 10000) { 3823 /* this if handles the TSO accounting */ 3824 if (bytes/packets > 8000) { 3825 itrval = bulk_latency; 3826 } else if ((packets < 10) || ((bytes/packets) > 1200)) { 3827 itrval = bulk_latency; 3828 } else if ((packets > 35)) { 3829 itrval = lowest_latency; 3830 } 3831 } else if (bytes/packets > 2000) { 3832 itrval = bulk_latency; 3833 } else if (packets <= 2 && bytes < 512) { 3834 itrval = lowest_latency; 3835 } 3836 break; 3837 case bulk_latency: /* 250 usec aka 4000 ints/s */ 3838 if (bytes > 25000) { 3839 if (packets > 35) 3840 itrval = low_latency; 3841 } else if (bytes < 1500) { 3842 itrval = low_latency; 3843 } 3844 break; 3845 } 3846 3847 /* clear work counters since we have the values we need */ 3848 ring_container->total_bytes = 0; 3849 ring_container->total_packets = 0; 3850 3851 /* write updated itr to ring container */ 3852 ring_container->itr = itrval; 3853 } 3854 3855 static void igb_set_itr(struct igb_q_vector *q_vector) 3856 { 3857 struct igb_adapter *adapter = q_vector->adapter; 3858 u32 new_itr = q_vector->itr_val; 3859 u8 current_itr = 0; 3860 3861 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */ 3862 if (adapter->link_speed != SPEED_1000) { 3863 current_itr = 0; 3864 new_itr = IGB_4K_ITR; 3865 goto set_itr_now; 3866 } 3867 3868 igb_update_itr(q_vector, &q_vector->tx); 3869 igb_update_itr(q_vector, &q_vector->rx); 3870 3871 current_itr = max(q_vector->rx.itr, q_vector->tx.itr); 3872 3873 /* conservative mode (itr 3) eliminates the lowest_latency setting */ 3874 if (current_itr == lowest_latency && 3875 ((q_vector->rx.ring && adapter->rx_itr_setting == 3) || 3876 (!q_vector->rx.ring && adapter->tx_itr_setting == 3))) 3877 current_itr = low_latency; 3878 3879 switch (current_itr) { 3880 /* counts and packets in update_itr are dependent on these numbers */ 3881 case lowest_latency: 3882 new_itr = IGB_70K_ITR; /* 70,000 ints/sec */ 3883 break; 3884 case low_latency: 3885 new_itr = IGB_20K_ITR; /* 20,000 ints/sec */ 3886 break; 3887 case bulk_latency: 3888 new_itr = IGB_4K_ITR; /* 4,000 ints/sec */ 3889 break; 3890 default: 3891 break; 3892 } 3893 3894 set_itr_now: 3895 if (new_itr != q_vector->itr_val) { 3896 /* this attempts to bias the interrupt rate towards Bulk 3897 * by adding intermediate steps when interrupt rate is 3898 * increasing */ 3899 new_itr = new_itr > q_vector->itr_val ? 3900 max((new_itr * q_vector->itr_val) / 3901 (new_itr + (q_vector->itr_val >> 2)), 3902 new_itr) : 3903 new_itr; 3904 /* Don't write the value here; it resets the adapter's 3905 * internal timer, and causes us to delay far longer than 3906 * we should between interrupts. Instead, we write the ITR 3907 * value at the beginning of the next interrupt so the timing 3908 * ends up being correct. 3909 */ 3910 q_vector->itr_val = new_itr; 3911 q_vector->set_itr = 1; 3912 } 3913 } 3914 3915 static void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens, 3916 u32 type_tucmd, u32 mss_l4len_idx) 3917 { 3918 struct e1000_adv_tx_context_desc *context_desc; 3919 u16 i = tx_ring->next_to_use; 3920 3921 context_desc = IGB_TX_CTXTDESC(tx_ring, i); 3922 3923 i++; 3924 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; 3925 3926 /* set bits to identify this as an advanced context descriptor */ 3927 type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT; 3928 3929 /* For 82575, context index must be unique per ring. */ 3930 if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags)) 3931 mss_l4len_idx |= tx_ring->reg_idx << 4; 3932 3933 context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens); 3934 context_desc->seqnum_seed = 0; 3935 context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd); 3936 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx); 3937 } 3938 3939 static int igb_tso(struct igb_ring *tx_ring, 3940 struct igb_tx_buffer *first, 3941 u8 *hdr_len) 3942 { 3943 struct sk_buff *skb = first->skb; 3944 u32 vlan_macip_lens, type_tucmd; 3945 u32 mss_l4len_idx, l4len; 3946 3947 if (!skb_is_gso(skb)) 3948 return 0; 3949 3950 if (skb_header_cloned(skb)) { 3951 int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); 3952 if (err) 3953 return err; 3954 } 3955 3956 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */ 3957 type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP; 3958 3959 if (first->protocol == __constant_htons(ETH_P_IP)) { 3960 struct iphdr *iph = ip_hdr(skb); 3961 iph->tot_len = 0; 3962 iph->check = 0; 3963 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr, 3964 iph->daddr, 0, 3965 IPPROTO_TCP, 3966 0); 3967 type_tucmd |= E1000_ADVTXD_TUCMD_IPV4; 3968 first->tx_flags |= IGB_TX_FLAGS_TSO | 3969 IGB_TX_FLAGS_CSUM | 3970 IGB_TX_FLAGS_IPV4; 3971 } else if (skb_is_gso_v6(skb)) { 3972 ipv6_hdr(skb)->payload_len = 0; 3973 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, 3974 &ipv6_hdr(skb)->daddr, 3975 0, IPPROTO_TCP, 0); 3976 first->tx_flags |= IGB_TX_FLAGS_TSO | 3977 IGB_TX_FLAGS_CSUM; 3978 } 3979 3980 /* compute header lengths */ 3981 l4len = tcp_hdrlen(skb); 3982 *hdr_len = skb_transport_offset(skb) + l4len; 3983 3984 /* update gso size and bytecount with header size */ 3985 first->gso_segs = skb_shinfo(skb)->gso_segs; 3986 first->bytecount += (first->gso_segs - 1) * *hdr_len; 3987 3988 /* MSS L4LEN IDX */ 3989 mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT; 3990 mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT; 3991 3992 /* VLAN MACLEN IPLEN */ 3993 vlan_macip_lens = skb_network_header_len(skb); 3994 vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT; 3995 vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK; 3996 3997 igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx); 3998 3999 return 1; 4000 } 4001 4002 static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first) 4003 { 4004 struct sk_buff *skb = first->skb; 4005 u32 vlan_macip_lens = 0; 4006 u32 mss_l4len_idx = 0; 4007 u32 type_tucmd = 0; 4008 4009 if (skb->ip_summed != CHECKSUM_PARTIAL) { 4010 if (!(first->tx_flags & IGB_TX_FLAGS_VLAN)) 4011 return; 4012 } else { 4013 u8 l4_hdr = 0; 4014 switch (first->protocol) { 4015 case __constant_htons(ETH_P_IP): 4016 vlan_macip_lens |= skb_network_header_len(skb); 4017 type_tucmd |= E1000_ADVTXD_TUCMD_IPV4; 4018 l4_hdr = ip_hdr(skb)->protocol; 4019 break; 4020 case __constant_htons(ETH_P_IPV6): 4021 vlan_macip_lens |= skb_network_header_len(skb); 4022 l4_hdr = ipv6_hdr(skb)->nexthdr; 4023 break; 4024 default: 4025 if (unlikely(net_ratelimit())) { 4026 dev_warn(tx_ring->dev, 4027 "partial checksum but proto=%x!\n", 4028 first->protocol); 4029 } 4030 break; 4031 } 4032 4033 switch (l4_hdr) { 4034 case IPPROTO_TCP: 4035 type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP; 4036 mss_l4len_idx = tcp_hdrlen(skb) << 4037 E1000_ADVTXD_L4LEN_SHIFT; 4038 break; 4039 case IPPROTO_SCTP: 4040 type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP; 4041 mss_l4len_idx = sizeof(struct sctphdr) << 4042 E1000_ADVTXD_L4LEN_SHIFT; 4043 break; 4044 case IPPROTO_UDP: 4045 mss_l4len_idx = sizeof(struct udphdr) << 4046 E1000_ADVTXD_L4LEN_SHIFT; 4047 break; 4048 default: 4049 if (unlikely(net_ratelimit())) { 4050 dev_warn(tx_ring->dev, 4051 "partial checksum but l4 proto=%x!\n", 4052 l4_hdr); 4053 } 4054 break; 4055 } 4056 4057 /* update TX checksum flag */ 4058 first->tx_flags |= IGB_TX_FLAGS_CSUM; 4059 } 4060 4061 vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT; 4062 vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK; 4063 4064 igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx); 4065 } 4066 4067 static __le32 igb_tx_cmd_type(u32 tx_flags) 4068 { 4069 /* set type for advanced descriptor with frame checksum insertion */ 4070 __le32 cmd_type = cpu_to_le32(E1000_ADVTXD_DTYP_DATA | 4071 E1000_ADVTXD_DCMD_IFCS | 4072 E1000_ADVTXD_DCMD_DEXT); 4073 4074 /* set HW vlan bit if vlan is present */ 4075 if (tx_flags & IGB_TX_FLAGS_VLAN) 4076 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE); 4077 4078 /* set timestamp bit if present */ 4079 if (tx_flags & IGB_TX_FLAGS_TSTAMP) 4080 cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP); 4081 4082 /* set segmentation bits for TSO */ 4083 if (tx_flags & IGB_TX_FLAGS_TSO) 4084 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_TSE); 4085 4086 return cmd_type; 4087 } 4088 4089 static void igb_tx_olinfo_status(struct igb_ring *tx_ring, 4090 union e1000_adv_tx_desc *tx_desc, 4091 u32 tx_flags, unsigned int paylen) 4092 { 4093 u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT; 4094 4095 /* 82575 requires a unique index per ring if any offload is enabled */ 4096 if ((tx_flags & (IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_VLAN)) && 4097 test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags)) 4098 olinfo_status |= tx_ring->reg_idx << 4; 4099 4100 /* insert L4 checksum */ 4101 if (tx_flags & IGB_TX_FLAGS_CSUM) { 4102 olinfo_status |= E1000_TXD_POPTS_TXSM << 8; 4103 4104 /* insert IPv4 checksum */ 4105 if (tx_flags & IGB_TX_FLAGS_IPV4) 4106 olinfo_status |= E1000_TXD_POPTS_IXSM << 8; 4107 } 4108 4109 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status); 4110 } 4111 4112 /* 4113 * The largest size we can write to the descriptor is 65535. In order to 4114 * maintain a power of two alignment we have to limit ourselves to 32K. 4115 */ 4116 #define IGB_MAX_TXD_PWR 15 4117 #define IGB_MAX_DATA_PER_TXD (1<<IGB_MAX_TXD_PWR) 4118 4119 static void igb_tx_map(struct igb_ring *tx_ring, 4120 struct igb_tx_buffer *first, 4121 const u8 hdr_len) 4122 { 4123 struct sk_buff *skb = first->skb; 4124 struct igb_tx_buffer *tx_buffer_info; 4125 union e1000_adv_tx_desc *tx_desc; 4126 dma_addr_t dma; 4127 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; 4128 unsigned int data_len = skb->data_len; 4129 unsigned int size = skb_headlen(skb); 4130 unsigned int paylen = skb->len - hdr_len; 4131 __le32 cmd_type; 4132 u32 tx_flags = first->tx_flags; 4133 u16 i = tx_ring->next_to_use; 4134 4135 tx_desc = IGB_TX_DESC(tx_ring, i); 4136 4137 igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, paylen); 4138 cmd_type = igb_tx_cmd_type(tx_flags); 4139 4140 dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE); 4141 if (dma_mapping_error(tx_ring->dev, dma)) 4142 goto dma_error; 4143 4144 /* record length, and DMA address */ 4145 first->length = size; 4146 first->dma = dma; 4147 tx_desc->read.buffer_addr = cpu_to_le64(dma); 4148 4149 for (;;) { 4150 while (unlikely(size > IGB_MAX_DATA_PER_TXD)) { 4151 tx_desc->read.cmd_type_len = 4152 cmd_type | cpu_to_le32(IGB_MAX_DATA_PER_TXD); 4153 4154 i++; 4155 tx_desc++; 4156 if (i == tx_ring->count) { 4157 tx_desc = IGB_TX_DESC(tx_ring, 0); 4158 i = 0; 4159 } 4160 4161 dma += IGB_MAX_DATA_PER_TXD; 4162 size -= IGB_MAX_DATA_PER_TXD; 4163 4164 tx_desc->read.olinfo_status = 0; 4165 tx_desc->read.buffer_addr = cpu_to_le64(dma); 4166 } 4167 4168 if (likely(!data_len)) 4169 break; 4170 4171 tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size); 4172 4173 i++; 4174 tx_desc++; 4175 if (i == tx_ring->count) { 4176 tx_desc = IGB_TX_DESC(tx_ring, 0); 4177 i = 0; 4178 } 4179 4180 size = skb_frag_size(frag); 4181 data_len -= size; 4182 4183 dma = skb_frag_dma_map(tx_ring->dev, frag, 0, 4184 size, DMA_TO_DEVICE); 4185 if (dma_mapping_error(tx_ring->dev, dma)) 4186 goto dma_error; 4187 4188 tx_buffer_info = &tx_ring->tx_buffer_info[i]; 4189 tx_buffer_info->length = size; 4190 tx_buffer_info->dma = dma; 4191 4192 tx_desc->read.olinfo_status = 0; 4193 tx_desc->read.buffer_addr = cpu_to_le64(dma); 4194 4195 frag++; 4196 } 4197 4198 netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount); 4199 4200 /* write last descriptor with RS and EOP bits */ 4201 cmd_type |= cpu_to_le32(size) | cpu_to_le32(IGB_TXD_DCMD); 4202 if (unlikely(skb->no_fcs)) 4203 cmd_type &= ~(cpu_to_le32(E1000_ADVTXD_DCMD_IFCS)); 4204 tx_desc->read.cmd_type_len = cmd_type; 4205 4206 /* set the timestamp */ 4207 first->time_stamp = jiffies; 4208 4209 /* 4210 * Force memory writes to complete before letting h/w know there 4211 * are new descriptors to fetch. (Only applicable for weak-ordered 4212 * memory model archs, such as IA-64). 4213 * 4214 * We also need this memory barrier to make certain all of the 4215 * status bits have been updated before next_to_watch is written. 4216 */ 4217 wmb(); 4218 4219 /* set next_to_watch value indicating a packet is present */ 4220 first->next_to_watch = tx_desc; 4221 4222 i++; 4223 if (i == tx_ring->count) 4224 i = 0; 4225 4226 tx_ring->next_to_use = i; 4227 4228 writel(i, tx_ring->tail); 4229 4230 /* we need this if more than one processor can write to our tail 4231 * at a time, it syncronizes IO on IA64/Altix systems */ 4232 mmiowb(); 4233 4234 return; 4235 4236 dma_error: 4237 dev_err(tx_ring->dev, "TX DMA map failed\n"); 4238 4239 /* clear dma mappings for failed tx_buffer_info map */ 4240 for (;;) { 4241 tx_buffer_info = &tx_ring->tx_buffer_info[i]; 4242 igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info); 4243 if (tx_buffer_info == first) 4244 break; 4245 if (i == 0) 4246 i = tx_ring->count; 4247 i--; 4248 } 4249 4250 tx_ring->next_to_use = i; 4251 } 4252 4253 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size) 4254 { 4255 struct net_device *netdev = tx_ring->netdev; 4256 4257 netif_stop_subqueue(netdev, tx_ring->queue_index); 4258 4259 /* Herbert's original patch had: 4260 * smp_mb__after_netif_stop_queue(); 4261 * but since that doesn't exist yet, just open code it. */ 4262 smp_mb(); 4263 4264 /* We need to check again in a case another CPU has just 4265 * made room available. */ 4266 if (igb_desc_unused(tx_ring) < size) 4267 return -EBUSY; 4268 4269 /* A reprieve! */ 4270 netif_wake_subqueue(netdev, tx_ring->queue_index); 4271 4272 u64_stats_update_begin(&tx_ring->tx_syncp2); 4273 tx_ring->tx_stats.restart_queue2++; 4274 u64_stats_update_end(&tx_ring->tx_syncp2); 4275 4276 return 0; 4277 } 4278 4279 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size) 4280 { 4281 if (igb_desc_unused(tx_ring) >= size) 4282 return 0; 4283 return __igb_maybe_stop_tx(tx_ring, size); 4284 } 4285 4286 netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb, 4287 struct igb_ring *tx_ring) 4288 { 4289 struct igb_tx_buffer *first; 4290 int tso; 4291 u32 tx_flags = 0; 4292 __be16 protocol = vlan_get_protocol(skb); 4293 u8 hdr_len = 0; 4294 4295 /* need: 1 descriptor per page, 4296 * + 2 desc gap to keep tail from touching head, 4297 * + 1 desc for skb->data, 4298 * + 1 desc for context descriptor, 4299 * otherwise try next time */ 4300 if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) { 4301 /* this is a hard error */ 4302 return NETDEV_TX_BUSY; 4303 } 4304 4305 /* record the location of the first descriptor for this packet */ 4306 first = &tx_ring->tx_buffer_info[tx_ring->next_to_use]; 4307 first->skb = skb; 4308 first->bytecount = skb->len; 4309 first->gso_segs = 1; 4310 4311 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { 4312 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; 4313 tx_flags |= IGB_TX_FLAGS_TSTAMP; 4314 } 4315 4316 if (vlan_tx_tag_present(skb)) { 4317 tx_flags |= IGB_TX_FLAGS_VLAN; 4318 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT); 4319 } 4320 4321 /* record initial flags and protocol */ 4322 first->tx_flags = tx_flags; 4323 first->protocol = protocol; 4324 4325 tso = igb_tso(tx_ring, first, &hdr_len); 4326 if (tso < 0) 4327 goto out_drop; 4328 else if (!tso) 4329 igb_tx_csum(tx_ring, first); 4330 4331 igb_tx_map(tx_ring, first, hdr_len); 4332 4333 /* Make sure there is space in the ring for the next send. */ 4334 igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4); 4335 4336 return NETDEV_TX_OK; 4337 4338 out_drop: 4339 igb_unmap_and_free_tx_resource(tx_ring, first); 4340 4341 return NETDEV_TX_OK; 4342 } 4343 4344 static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter, 4345 struct sk_buff *skb) 4346 { 4347 unsigned int r_idx = skb->queue_mapping; 4348 4349 if (r_idx >= adapter->num_tx_queues) 4350 r_idx = r_idx % adapter->num_tx_queues; 4351 4352 return adapter->tx_ring[r_idx]; 4353 } 4354 4355 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, 4356 struct net_device *netdev) 4357 { 4358 struct igb_adapter *adapter = netdev_priv(netdev); 4359 4360 if (test_bit(__IGB_DOWN, &adapter->state)) { 4361 dev_kfree_skb_any(skb); 4362 return NETDEV_TX_OK; 4363 } 4364 4365 if (skb->len <= 0) { 4366 dev_kfree_skb_any(skb); 4367 return NETDEV_TX_OK; 4368 } 4369 4370 /* 4371 * The minimum packet size with TCTL.PSP set is 17 so pad the skb 4372 * in order to meet this minimum size requirement. 4373 */ 4374 if (skb->len < 17) { 4375 if (skb_padto(skb, 17)) 4376 return NETDEV_TX_OK; 4377 skb->len = 17; 4378 } 4379 4380 return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb)); 4381 } 4382 4383 /** 4384 * igb_tx_timeout - Respond to a Tx Hang 4385 * @netdev: network interface device structure 4386 **/ 4387 static void igb_tx_timeout(struct net_device *netdev) 4388 { 4389 struct igb_adapter *adapter = netdev_priv(netdev); 4390 struct e1000_hw *hw = &adapter->hw; 4391 4392 /* Do the reset outside of interrupt context */ 4393 adapter->tx_timeout_count++; 4394 4395 if (hw->mac.type >= e1000_82580) 4396 hw->dev_spec._82575.global_device_reset = true; 4397 4398 schedule_work(&adapter->reset_task); 4399 wr32(E1000_EICS, 4400 (adapter->eims_enable_mask & ~adapter->eims_other)); 4401 } 4402 4403 static void igb_reset_task(struct work_struct *work) 4404 { 4405 struct igb_adapter *adapter; 4406 adapter = container_of(work, struct igb_adapter, reset_task); 4407 4408 igb_dump(adapter); 4409 netdev_err(adapter->netdev, "Reset adapter\n"); 4410 igb_reinit_locked(adapter); 4411 } 4412 4413 /** 4414 * igb_get_stats64 - Get System Network Statistics 4415 * @netdev: network interface device structure 4416 * @stats: rtnl_link_stats64 pointer 4417 * 4418 **/ 4419 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev, 4420 struct rtnl_link_stats64 *stats) 4421 { 4422 struct igb_adapter *adapter = netdev_priv(netdev); 4423 4424 spin_lock(&adapter->stats64_lock); 4425 igb_update_stats(adapter, &adapter->stats64); 4426 memcpy(stats, &adapter->stats64, sizeof(*stats)); 4427 spin_unlock(&adapter->stats64_lock); 4428 4429 return stats; 4430 } 4431 4432 /** 4433 * igb_change_mtu - Change the Maximum Transfer Unit 4434 * @netdev: network interface device structure 4435 * @new_mtu: new value for maximum frame size 4436 * 4437 * Returns 0 on success, negative on failure 4438 **/ 4439 static int igb_change_mtu(struct net_device *netdev, int new_mtu) 4440 { 4441 struct igb_adapter *adapter = netdev_priv(netdev); 4442 struct pci_dev *pdev = adapter->pdev; 4443 int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; 4444 4445 if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) { 4446 dev_err(&pdev->dev, "Invalid MTU setting\n"); 4447 return -EINVAL; 4448 } 4449 4450 #define MAX_STD_JUMBO_FRAME_SIZE 9238 4451 if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) { 4452 dev_err(&pdev->dev, "MTU > 9216 not supported.\n"); 4453 return -EINVAL; 4454 } 4455 4456 while (test_and_set_bit(__IGB_RESETTING, &adapter->state)) 4457 msleep(1); 4458 4459 /* igb_down has a dependency on max_frame_size */ 4460 adapter->max_frame_size = max_frame; 4461 4462 if (netif_running(netdev)) 4463 igb_down(adapter); 4464 4465 dev_info(&pdev->dev, "changing MTU from %d to %d\n", 4466 netdev->mtu, new_mtu); 4467 netdev->mtu = new_mtu; 4468 4469 if (netif_running(netdev)) 4470 igb_up(adapter); 4471 else 4472 igb_reset(adapter); 4473 4474 clear_bit(__IGB_RESETTING, &adapter->state); 4475 4476 return 0; 4477 } 4478 4479 /** 4480 * igb_update_stats - Update the board statistics counters 4481 * @adapter: board private structure 4482 **/ 4483 4484 void igb_update_stats(struct igb_adapter *adapter, 4485 struct rtnl_link_stats64 *net_stats) 4486 { 4487 struct e1000_hw *hw = &adapter->hw; 4488 struct pci_dev *pdev = adapter->pdev; 4489 u32 reg, mpc; 4490 u16 phy_tmp; 4491 int i; 4492 u64 bytes, packets; 4493 unsigned int start; 4494 u64 _bytes, _packets; 4495 4496 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF 4497 4498 /* 4499 * Prevent stats update while adapter is being reset, or if the pci 4500 * connection is down. 4501 */ 4502 if (adapter->link_speed == 0) 4503 return; 4504 if (pci_channel_offline(pdev)) 4505 return; 4506 4507 bytes = 0; 4508 packets = 0; 4509 for (i = 0; i < adapter->num_rx_queues; i++) { 4510 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF; 4511 struct igb_ring *ring = adapter->rx_ring[i]; 4512 4513 ring->rx_stats.drops += rqdpc_tmp; 4514 net_stats->rx_fifo_errors += rqdpc_tmp; 4515 4516 do { 4517 start = u64_stats_fetch_begin_bh(&ring->rx_syncp); 4518 _bytes = ring->rx_stats.bytes; 4519 _packets = ring->rx_stats.packets; 4520 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start)); 4521 bytes += _bytes; 4522 packets += _packets; 4523 } 4524 4525 net_stats->rx_bytes = bytes; 4526 net_stats->rx_packets = packets; 4527 4528 bytes = 0; 4529 packets = 0; 4530 for (i = 0; i < adapter->num_tx_queues; i++) { 4531 struct igb_ring *ring = adapter->tx_ring[i]; 4532 do { 4533 start = u64_stats_fetch_begin_bh(&ring->tx_syncp); 4534 _bytes = ring->tx_stats.bytes; 4535 _packets = ring->tx_stats.packets; 4536 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start)); 4537 bytes += _bytes; 4538 packets += _packets; 4539 } 4540 net_stats->tx_bytes = bytes; 4541 net_stats->tx_packets = packets; 4542 4543 /* read stats registers */ 4544 adapter->stats.crcerrs += rd32(E1000_CRCERRS); 4545 adapter->stats.gprc += rd32(E1000_GPRC); 4546 adapter->stats.gorc += rd32(E1000_GORCL); 4547 rd32(E1000_GORCH); /* clear GORCL */ 4548 adapter->stats.bprc += rd32(E1000_BPRC); 4549 adapter->stats.mprc += rd32(E1000_MPRC); 4550 adapter->stats.roc += rd32(E1000_ROC); 4551 4552 adapter->stats.prc64 += rd32(E1000_PRC64); 4553 adapter->stats.prc127 += rd32(E1000_PRC127); 4554 adapter->stats.prc255 += rd32(E1000_PRC255); 4555 adapter->stats.prc511 += rd32(E1000_PRC511); 4556 adapter->stats.prc1023 += rd32(E1000_PRC1023); 4557 adapter->stats.prc1522 += rd32(E1000_PRC1522); 4558 adapter->stats.symerrs += rd32(E1000_SYMERRS); 4559 adapter->stats.sec += rd32(E1000_SEC); 4560 4561 mpc = rd32(E1000_MPC); 4562 adapter->stats.mpc += mpc; 4563 net_stats->rx_fifo_errors += mpc; 4564 adapter->stats.scc += rd32(E1000_SCC); 4565 adapter->stats.ecol += rd32(E1000_ECOL); 4566 adapter->stats.mcc += rd32(E1000_MCC); 4567 adapter->stats.latecol += rd32(E1000_LATECOL); 4568 adapter->stats.dc += rd32(E1000_DC); 4569 adapter->stats.rlec += rd32(E1000_RLEC); 4570 adapter->stats.xonrxc += rd32(E1000_XONRXC); 4571 adapter->stats.xontxc += rd32(E1000_XONTXC); 4572 adapter->stats.xoffrxc += rd32(E1000_XOFFRXC); 4573 adapter->stats.xofftxc += rd32(E1000_XOFFTXC); 4574 adapter->stats.fcruc += rd32(E1000_FCRUC); 4575 adapter->stats.gptc += rd32(E1000_GPTC); 4576 adapter->stats.gotc += rd32(E1000_GOTCL); 4577 rd32(E1000_GOTCH); /* clear GOTCL */ 4578 adapter->stats.rnbc += rd32(E1000_RNBC); 4579 adapter->stats.ruc += rd32(E1000_RUC); 4580 adapter->stats.rfc += rd32(E1000_RFC); 4581 adapter->stats.rjc += rd32(E1000_RJC); 4582 adapter->stats.tor += rd32(E1000_TORH); 4583 adapter->stats.tot += rd32(E1000_TOTH); 4584 adapter->stats.tpr += rd32(E1000_TPR); 4585 4586 adapter->stats.ptc64 += rd32(E1000_PTC64); 4587 adapter->stats.ptc127 += rd32(E1000_PTC127); 4588 adapter->stats.ptc255 += rd32(E1000_PTC255); 4589 adapter->stats.ptc511 += rd32(E1000_PTC511); 4590 adapter->stats.ptc1023 += rd32(E1000_PTC1023); 4591 adapter->stats.ptc1522 += rd32(E1000_PTC1522); 4592 4593 adapter->stats.mptc += rd32(E1000_MPTC); 4594 adapter->stats.bptc += rd32(E1000_BPTC); 4595 4596 adapter->stats.tpt += rd32(E1000_TPT); 4597 adapter->stats.colc += rd32(E1000_COLC); 4598 4599 adapter->stats.algnerrc += rd32(E1000_ALGNERRC); 4600 /* read internal phy specific stats */ 4601 reg = rd32(E1000_CTRL_EXT); 4602 if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) { 4603 adapter->stats.rxerrc += rd32(E1000_RXERRC); 4604 adapter->stats.tncrs += rd32(E1000_TNCRS); 4605 } 4606 4607 adapter->stats.tsctc += rd32(E1000_TSCTC); 4608 adapter->stats.tsctfc += rd32(E1000_TSCTFC); 4609 4610 adapter->stats.iac += rd32(E1000_IAC); 4611 adapter->stats.icrxoc += rd32(E1000_ICRXOC); 4612 adapter->stats.icrxptc += rd32(E1000_ICRXPTC); 4613 adapter->stats.icrxatc += rd32(E1000_ICRXATC); 4614 adapter->stats.ictxptc += rd32(E1000_ICTXPTC); 4615 adapter->stats.ictxatc += rd32(E1000_ICTXATC); 4616 adapter->stats.ictxqec += rd32(E1000_ICTXQEC); 4617 adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC); 4618 adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC); 4619 4620 /* Fill out the OS statistics structure */ 4621 net_stats->multicast = adapter->stats.mprc; 4622 net_stats->collisions = adapter->stats.colc; 4623 4624 /* Rx Errors */ 4625 4626 /* RLEC on some newer hardware can be incorrect so build 4627 * our own version based on RUC and ROC */ 4628 net_stats->rx_errors = adapter->stats.rxerrc + 4629 adapter->stats.crcerrs + adapter->stats.algnerrc + 4630 adapter->stats.ruc + adapter->stats.roc + 4631 adapter->stats.cexterr; 4632 net_stats->rx_length_errors = adapter->stats.ruc + 4633 adapter->stats.roc; 4634 net_stats->rx_crc_errors = adapter->stats.crcerrs; 4635 net_stats->rx_frame_errors = adapter->stats.algnerrc; 4636 net_stats->rx_missed_errors = adapter->stats.mpc; 4637 4638 /* Tx Errors */ 4639 net_stats->tx_errors = adapter->stats.ecol + 4640 adapter->stats.latecol; 4641 net_stats->tx_aborted_errors = adapter->stats.ecol; 4642 net_stats->tx_window_errors = adapter->stats.latecol; 4643 net_stats->tx_carrier_errors = adapter->stats.tncrs; 4644 4645 /* Tx Dropped needs to be maintained elsewhere */ 4646 4647 /* Phy Stats */ 4648 if (hw->phy.media_type == e1000_media_type_copper) { 4649 if ((adapter->link_speed == SPEED_1000) && 4650 (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) { 4651 phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK; 4652 adapter->phy_stats.idle_errors += phy_tmp; 4653 } 4654 } 4655 4656 /* Management Stats */ 4657 adapter->stats.mgptc += rd32(E1000_MGTPTC); 4658 adapter->stats.mgprc += rd32(E1000_MGTPRC); 4659 adapter->stats.mgpdc += rd32(E1000_MGTPDC); 4660 4661 /* OS2BMC Stats */ 4662 reg = rd32(E1000_MANC); 4663 if (reg & E1000_MANC_EN_BMC2OS) { 4664 adapter->stats.o2bgptc += rd32(E1000_O2BGPTC); 4665 adapter->stats.o2bspc += rd32(E1000_O2BSPC); 4666 adapter->stats.b2ospc += rd32(E1000_B2OSPC); 4667 adapter->stats.b2ogprc += rd32(E1000_B2OGPRC); 4668 } 4669 } 4670 4671 static irqreturn_t igb_msix_other(int irq, void *data) 4672 { 4673 struct igb_adapter *adapter = data; 4674 struct e1000_hw *hw = &adapter->hw; 4675 u32 icr = rd32(E1000_ICR); 4676 /* reading ICR causes bit 31 of EICR to be cleared */ 4677 4678 if (icr & E1000_ICR_DRSTA) 4679 schedule_work(&adapter->reset_task); 4680 4681 if (icr & E1000_ICR_DOUTSYNC) { 4682 /* HW is reporting DMA is out of sync */ 4683 adapter->stats.doosync++; 4684 /* The DMA Out of Sync is also indication of a spoof event 4685 * in IOV mode. Check the Wrong VM Behavior register to 4686 * see if it is really a spoof event. */ 4687 igb_check_wvbr(adapter); 4688 } 4689 4690 /* Check for a mailbox event */ 4691 if (icr & E1000_ICR_VMMB) 4692 igb_msg_task(adapter); 4693 4694 if (icr & E1000_ICR_LSC) { 4695 hw->mac.get_link_status = 1; 4696 /* guard against interrupt when we're going down */ 4697 if (!test_bit(__IGB_DOWN, &adapter->state)) 4698 mod_timer(&adapter->watchdog_timer, jiffies + 1); 4699 } 4700 4701 wr32(E1000_EIMS, adapter->eims_other); 4702 4703 return IRQ_HANDLED; 4704 } 4705 4706 static void igb_write_itr(struct igb_q_vector *q_vector) 4707 { 4708 struct igb_adapter *adapter = q_vector->adapter; 4709 u32 itr_val = q_vector->itr_val & 0x7FFC; 4710 4711 if (!q_vector->set_itr) 4712 return; 4713 4714 if (!itr_val) 4715 itr_val = 0x4; 4716 4717 if (adapter->hw.mac.type == e1000_82575) 4718 itr_val |= itr_val << 16; 4719 else 4720 itr_val |= E1000_EITR_CNT_IGNR; 4721 4722 writel(itr_val, q_vector->itr_register); 4723 q_vector->set_itr = 0; 4724 } 4725 4726 static irqreturn_t igb_msix_ring(int irq, void *data) 4727 { 4728 struct igb_q_vector *q_vector = data; 4729 4730 /* Write the ITR value calculated from the previous interrupt. */ 4731 igb_write_itr(q_vector); 4732 4733 napi_schedule(&q_vector->napi); 4734 4735 return IRQ_HANDLED; 4736 } 4737 4738 #ifdef CONFIG_IGB_DCA 4739 static void igb_update_dca(struct igb_q_vector *q_vector) 4740 { 4741 struct igb_adapter *adapter = q_vector->adapter; 4742 struct e1000_hw *hw = &adapter->hw; 4743 int cpu = get_cpu(); 4744 4745 if (q_vector->cpu == cpu) 4746 goto out_no_update; 4747 4748 if (q_vector->tx.ring) { 4749 int q = q_vector->tx.ring->reg_idx; 4750 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q)); 4751 if (hw->mac.type == e1000_82575) { 4752 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK; 4753 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu); 4754 } else { 4755 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576; 4756 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) << 4757 E1000_DCA_TXCTRL_CPUID_SHIFT; 4758 } 4759 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN; 4760 wr32(E1000_DCA_TXCTRL(q), dca_txctrl); 4761 } 4762 if (q_vector->rx.ring) { 4763 int q = q_vector->rx.ring->reg_idx; 4764 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q)); 4765 if (hw->mac.type == e1000_82575) { 4766 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK; 4767 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu); 4768 } else { 4769 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576; 4770 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) << 4771 E1000_DCA_RXCTRL_CPUID_SHIFT; 4772 } 4773 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN; 4774 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN; 4775 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN; 4776 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl); 4777 } 4778 q_vector->cpu = cpu; 4779 out_no_update: 4780 put_cpu(); 4781 } 4782 4783 static void igb_setup_dca(struct igb_adapter *adapter) 4784 { 4785 struct e1000_hw *hw = &adapter->hw; 4786 int i; 4787 4788 if (!(adapter->flags & IGB_FLAG_DCA_ENABLED)) 4789 return; 4790 4791 /* Always use CB2 mode, difference is masked in the CB driver. */ 4792 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2); 4793 4794 for (i = 0; i < adapter->num_q_vectors; i++) { 4795 adapter->q_vector[i]->cpu = -1; 4796 igb_update_dca(adapter->q_vector[i]); 4797 } 4798 } 4799 4800 static int __igb_notify_dca(struct device *dev, void *data) 4801 { 4802 struct net_device *netdev = dev_get_drvdata(dev); 4803 struct igb_adapter *adapter = netdev_priv(netdev); 4804 struct pci_dev *pdev = adapter->pdev; 4805 struct e1000_hw *hw = &adapter->hw; 4806 unsigned long event = *(unsigned long *)data; 4807 4808 switch (event) { 4809 case DCA_PROVIDER_ADD: 4810 /* if already enabled, don't do it again */ 4811 if (adapter->flags & IGB_FLAG_DCA_ENABLED) 4812 break; 4813 if (dca_add_requester(dev) == 0) { 4814 adapter->flags |= IGB_FLAG_DCA_ENABLED; 4815 dev_info(&pdev->dev, "DCA enabled\n"); 4816 igb_setup_dca(adapter); 4817 break; 4818 } 4819 /* Fall Through since DCA is disabled. */ 4820 case DCA_PROVIDER_REMOVE: 4821 if (adapter->flags & IGB_FLAG_DCA_ENABLED) { 4822 /* without this a class_device is left 4823 * hanging around in the sysfs model */ 4824 dca_remove_requester(dev); 4825 dev_info(&pdev->dev, "DCA disabled\n"); 4826 adapter->flags &= ~IGB_FLAG_DCA_ENABLED; 4827 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE); 4828 } 4829 break; 4830 } 4831 4832 return 0; 4833 } 4834 4835 static int igb_notify_dca(struct notifier_block *nb, unsigned long event, 4836 void *p) 4837 { 4838 int ret_val; 4839 4840 ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event, 4841 __igb_notify_dca); 4842 4843 return ret_val ? NOTIFY_BAD : NOTIFY_DONE; 4844 } 4845 #endif /* CONFIG_IGB_DCA */ 4846 4847 #ifdef CONFIG_PCI_IOV 4848 static int igb_vf_configure(struct igb_adapter *adapter, int vf) 4849 { 4850 unsigned char mac_addr[ETH_ALEN]; 4851 struct pci_dev *pdev = adapter->pdev; 4852 struct e1000_hw *hw = &adapter->hw; 4853 struct pci_dev *pvfdev; 4854 unsigned int device_id; 4855 u16 thisvf_devfn; 4856 4857 random_ether_addr(mac_addr); 4858 igb_set_vf_mac(adapter, vf, mac_addr); 4859 4860 switch (adapter->hw.mac.type) { 4861 case e1000_82576: 4862 device_id = IGB_82576_VF_DEV_ID; 4863 /* VF Stride for 82576 is 2 */ 4864 thisvf_devfn = (pdev->devfn + 0x80 + (vf << 1)) | 4865 (pdev->devfn & 1); 4866 break; 4867 case e1000_i350: 4868 device_id = IGB_I350_VF_DEV_ID; 4869 /* VF Stride for I350 is 4 */ 4870 thisvf_devfn = (pdev->devfn + 0x80 + (vf << 2)) | 4871 (pdev->devfn & 3); 4872 break; 4873 default: 4874 device_id = 0; 4875 thisvf_devfn = 0; 4876 break; 4877 } 4878 4879 pvfdev = pci_get_device(hw->vendor_id, device_id, NULL); 4880 while (pvfdev) { 4881 if (pvfdev->devfn == thisvf_devfn) 4882 break; 4883 pvfdev = pci_get_device(hw->vendor_id, 4884 device_id, pvfdev); 4885 } 4886 4887 if (pvfdev) 4888 adapter->vf_data[vf].vfdev = pvfdev; 4889 else 4890 dev_err(&pdev->dev, 4891 "Couldn't find pci dev ptr for VF %4.4x\n", 4892 thisvf_devfn); 4893 return pvfdev != NULL; 4894 } 4895 4896 static int igb_find_enabled_vfs(struct igb_adapter *adapter) 4897 { 4898 struct e1000_hw *hw = &adapter->hw; 4899 struct pci_dev *pdev = adapter->pdev; 4900 struct pci_dev *pvfdev; 4901 u16 vf_devfn = 0; 4902 u16 vf_stride; 4903 unsigned int device_id; 4904 int vfs_found = 0; 4905 4906 switch (adapter->hw.mac.type) { 4907 case e1000_82576: 4908 device_id = IGB_82576_VF_DEV_ID; 4909 /* VF Stride for 82576 is 2 */ 4910 vf_stride = 2; 4911 break; 4912 case e1000_i350: 4913 device_id = IGB_I350_VF_DEV_ID; 4914 /* VF Stride for I350 is 4 */ 4915 vf_stride = 4; 4916 break; 4917 default: 4918 device_id = 0; 4919 vf_stride = 0; 4920 break; 4921 } 4922 4923 vf_devfn = pdev->devfn + 0x80; 4924 pvfdev = pci_get_device(hw->vendor_id, device_id, NULL); 4925 while (pvfdev) { 4926 if (pvfdev->devfn == vf_devfn && 4927 (pvfdev->bus->number >= pdev->bus->number)) 4928 vfs_found++; 4929 vf_devfn += vf_stride; 4930 pvfdev = pci_get_device(hw->vendor_id, 4931 device_id, pvfdev); 4932 } 4933 4934 return vfs_found; 4935 } 4936 4937 static int igb_check_vf_assignment(struct igb_adapter *adapter) 4938 { 4939 int i; 4940 for (i = 0; i < adapter->vfs_allocated_count; i++) { 4941 if (adapter->vf_data[i].vfdev) { 4942 if (adapter->vf_data[i].vfdev->dev_flags & 4943 PCI_DEV_FLAGS_ASSIGNED) 4944 return true; 4945 } 4946 } 4947 return false; 4948 } 4949 4950 #endif 4951 static void igb_ping_all_vfs(struct igb_adapter *adapter) 4952 { 4953 struct e1000_hw *hw = &adapter->hw; 4954 u32 ping; 4955 int i; 4956 4957 for (i = 0 ; i < adapter->vfs_allocated_count; i++) { 4958 ping = E1000_PF_CONTROL_MSG; 4959 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS) 4960 ping |= E1000_VT_MSGTYPE_CTS; 4961 igb_write_mbx(hw, &ping, 1, i); 4962 } 4963 } 4964 4965 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf) 4966 { 4967 struct e1000_hw *hw = &adapter->hw; 4968 u32 vmolr = rd32(E1000_VMOLR(vf)); 4969 struct vf_data_storage *vf_data = &adapter->vf_data[vf]; 4970 4971 vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC | 4972 IGB_VF_FLAG_MULTI_PROMISC); 4973 vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME); 4974 4975 if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) { 4976 vmolr |= E1000_VMOLR_MPME; 4977 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC; 4978 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST; 4979 } else { 4980 /* 4981 * if we have hashes and we are clearing a multicast promisc 4982 * flag we need to write the hashes to the MTA as this step 4983 * was previously skipped 4984 */ 4985 if (vf_data->num_vf_mc_hashes > 30) { 4986 vmolr |= E1000_VMOLR_MPME; 4987 } else if (vf_data->num_vf_mc_hashes) { 4988 int j; 4989 vmolr |= E1000_VMOLR_ROMPE; 4990 for (j = 0; j < vf_data->num_vf_mc_hashes; j++) 4991 igb_mta_set(hw, vf_data->vf_mc_hashes[j]); 4992 } 4993 } 4994 4995 wr32(E1000_VMOLR(vf), vmolr); 4996 4997 /* there are flags left unprocessed, likely not supported */ 4998 if (*msgbuf & E1000_VT_MSGINFO_MASK) 4999 return -EINVAL; 5000 5001 return 0; 5002 5003 } 5004 5005 static int igb_set_vf_multicasts(struct igb_adapter *adapter, 5006 u32 *msgbuf, u32 vf) 5007 { 5008 int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT; 5009 u16 *hash_list = (u16 *)&msgbuf[1]; 5010 struct vf_data_storage *vf_data = &adapter->vf_data[vf]; 5011 int i; 5012 5013 /* salt away the number of multicast addresses assigned 5014 * to this VF for later use to restore when the PF multi cast 5015 * list changes 5016 */ 5017 vf_data->num_vf_mc_hashes = n; 5018 5019 /* only up to 30 hash values supported */ 5020 if (n > 30) 5021 n = 30; 5022 5023 /* store the hashes for later use */ 5024 for (i = 0; i < n; i++) 5025 vf_data->vf_mc_hashes[i] = hash_list[i]; 5026 5027 /* Flush and reset the mta with the new values */ 5028 igb_set_rx_mode(adapter->netdev); 5029 5030 return 0; 5031 } 5032 5033 static void igb_restore_vf_multicasts(struct igb_adapter *adapter) 5034 { 5035 struct e1000_hw *hw = &adapter->hw; 5036 struct vf_data_storage *vf_data; 5037 int i, j; 5038 5039 for (i = 0; i < adapter->vfs_allocated_count; i++) { 5040 u32 vmolr = rd32(E1000_VMOLR(i)); 5041 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME); 5042 5043 vf_data = &adapter->vf_data[i]; 5044 5045 if ((vf_data->num_vf_mc_hashes > 30) || 5046 (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) { 5047 vmolr |= E1000_VMOLR_MPME; 5048 } else if (vf_data->num_vf_mc_hashes) { 5049 vmolr |= E1000_VMOLR_ROMPE; 5050 for (j = 0; j < vf_data->num_vf_mc_hashes; j++) 5051 igb_mta_set(hw, vf_data->vf_mc_hashes[j]); 5052 } 5053 wr32(E1000_VMOLR(i), vmolr); 5054 } 5055 } 5056 5057 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf) 5058 { 5059 struct e1000_hw *hw = &adapter->hw; 5060 u32 pool_mask, reg, vid; 5061 int i; 5062 5063 pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf); 5064 5065 /* Find the vlan filter for this id */ 5066 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) { 5067 reg = rd32(E1000_VLVF(i)); 5068 5069 /* remove the vf from the pool */ 5070 reg &= ~pool_mask; 5071 5072 /* if pool is empty then remove entry from vfta */ 5073 if (!(reg & E1000_VLVF_POOLSEL_MASK) && 5074 (reg & E1000_VLVF_VLANID_ENABLE)) { 5075 reg = 0; 5076 vid = reg & E1000_VLVF_VLANID_MASK; 5077 igb_vfta_set(hw, vid, false); 5078 } 5079 5080 wr32(E1000_VLVF(i), reg); 5081 } 5082 5083 adapter->vf_data[vf].vlans_enabled = 0; 5084 } 5085 5086 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf) 5087 { 5088 struct e1000_hw *hw = &adapter->hw; 5089 u32 reg, i; 5090 5091 /* The vlvf table only exists on 82576 hardware and newer */ 5092 if (hw->mac.type < e1000_82576) 5093 return -1; 5094 5095 /* we only need to do this if VMDq is enabled */ 5096 if (!adapter->vfs_allocated_count) 5097 return -1; 5098 5099 /* Find the vlan filter for this id */ 5100 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) { 5101 reg = rd32(E1000_VLVF(i)); 5102 if ((reg & E1000_VLVF_VLANID_ENABLE) && 5103 vid == (reg & E1000_VLVF_VLANID_MASK)) 5104 break; 5105 } 5106 5107 if (add) { 5108 if (i == E1000_VLVF_ARRAY_SIZE) { 5109 /* Did not find a matching VLAN ID entry that was 5110 * enabled. Search for a free filter entry, i.e. 5111 * one without the enable bit set 5112 */ 5113 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) { 5114 reg = rd32(E1000_VLVF(i)); 5115 if (!(reg & E1000_VLVF_VLANID_ENABLE)) 5116 break; 5117 } 5118 } 5119 if (i < E1000_VLVF_ARRAY_SIZE) { 5120 /* Found an enabled/available entry */ 5121 reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf); 5122 5123 /* if !enabled we need to set this up in vfta */ 5124 if (!(reg & E1000_VLVF_VLANID_ENABLE)) { 5125 /* add VID to filter table */ 5126 igb_vfta_set(hw, vid, true); 5127 reg |= E1000_VLVF_VLANID_ENABLE; 5128 } 5129 reg &= ~E1000_VLVF_VLANID_MASK; 5130 reg |= vid; 5131 wr32(E1000_VLVF(i), reg); 5132 5133 /* do not modify RLPML for PF devices */ 5134 if (vf >= adapter->vfs_allocated_count) 5135 return 0; 5136 5137 if (!adapter->vf_data[vf].vlans_enabled) { 5138 u32 size; 5139 reg = rd32(E1000_VMOLR(vf)); 5140 size = reg & E1000_VMOLR_RLPML_MASK; 5141 size += 4; 5142 reg &= ~E1000_VMOLR_RLPML_MASK; 5143 reg |= size; 5144 wr32(E1000_VMOLR(vf), reg); 5145 } 5146 5147 adapter->vf_data[vf].vlans_enabled++; 5148 } 5149 } else { 5150 if (i < E1000_VLVF_ARRAY_SIZE) { 5151 /* remove vf from the pool */ 5152 reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf)); 5153 /* if pool is empty then remove entry from vfta */ 5154 if (!(reg & E1000_VLVF_POOLSEL_MASK)) { 5155 reg = 0; 5156 igb_vfta_set(hw, vid, false); 5157 } 5158 wr32(E1000_VLVF(i), reg); 5159 5160 /* do not modify RLPML for PF devices */ 5161 if (vf >= adapter->vfs_allocated_count) 5162 return 0; 5163 5164 adapter->vf_data[vf].vlans_enabled--; 5165 if (!adapter->vf_data[vf].vlans_enabled) { 5166 u32 size; 5167 reg = rd32(E1000_VMOLR(vf)); 5168 size = reg & E1000_VMOLR_RLPML_MASK; 5169 size -= 4; 5170 reg &= ~E1000_VMOLR_RLPML_MASK; 5171 reg |= size; 5172 wr32(E1000_VMOLR(vf), reg); 5173 } 5174 } 5175 } 5176 return 0; 5177 } 5178 5179 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf) 5180 { 5181 struct e1000_hw *hw = &adapter->hw; 5182 5183 if (vid) 5184 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT)); 5185 else 5186 wr32(E1000_VMVIR(vf), 0); 5187 } 5188 5189 static int igb_ndo_set_vf_vlan(struct net_device *netdev, 5190 int vf, u16 vlan, u8 qos) 5191 { 5192 int err = 0; 5193 struct igb_adapter *adapter = netdev_priv(netdev); 5194 5195 if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7)) 5196 return -EINVAL; 5197 if (vlan || qos) { 5198 err = igb_vlvf_set(adapter, vlan, !!vlan, vf); 5199 if (err) 5200 goto out; 5201 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf); 5202 igb_set_vmolr(adapter, vf, !vlan); 5203 adapter->vf_data[vf].pf_vlan = vlan; 5204 adapter->vf_data[vf].pf_qos = qos; 5205 dev_info(&adapter->pdev->dev, 5206 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf); 5207 if (test_bit(__IGB_DOWN, &adapter->state)) { 5208 dev_warn(&adapter->pdev->dev, 5209 "The VF VLAN has been set," 5210 " but the PF device is not up.\n"); 5211 dev_warn(&adapter->pdev->dev, 5212 "Bring the PF device up before" 5213 " attempting to use the VF device.\n"); 5214 } 5215 } else { 5216 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan, 5217 false, vf); 5218 igb_set_vmvir(adapter, vlan, vf); 5219 igb_set_vmolr(adapter, vf, true); 5220 adapter->vf_data[vf].pf_vlan = 0; 5221 adapter->vf_data[vf].pf_qos = 0; 5222 } 5223 out: 5224 return err; 5225 } 5226 5227 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf) 5228 { 5229 int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT; 5230 int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK); 5231 5232 return igb_vlvf_set(adapter, vid, add, vf); 5233 } 5234 5235 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf) 5236 { 5237 /* clear flags - except flag that indicates PF has set the MAC */ 5238 adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC; 5239 adapter->vf_data[vf].last_nack = jiffies; 5240 5241 /* reset offloads to defaults */ 5242 igb_set_vmolr(adapter, vf, true); 5243 5244 /* reset vlans for device */ 5245 igb_clear_vf_vfta(adapter, vf); 5246 if (adapter->vf_data[vf].pf_vlan) 5247 igb_ndo_set_vf_vlan(adapter->netdev, vf, 5248 adapter->vf_data[vf].pf_vlan, 5249 adapter->vf_data[vf].pf_qos); 5250 else 5251 igb_clear_vf_vfta(adapter, vf); 5252 5253 /* reset multicast table array for vf */ 5254 adapter->vf_data[vf].num_vf_mc_hashes = 0; 5255 5256 /* Flush and reset the mta with the new values */ 5257 igb_set_rx_mode(adapter->netdev); 5258 } 5259 5260 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf) 5261 { 5262 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses; 5263 5264 /* generate a new mac address as we were hotplug removed/added */ 5265 if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC)) 5266 random_ether_addr(vf_mac); 5267 5268 /* process remaining reset events */ 5269 igb_vf_reset(adapter, vf); 5270 } 5271 5272 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf) 5273 { 5274 struct e1000_hw *hw = &adapter->hw; 5275 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses; 5276 int rar_entry = hw->mac.rar_entry_count - (vf + 1); 5277 u32 reg, msgbuf[3]; 5278 u8 *addr = (u8 *)(&msgbuf[1]); 5279 5280 /* process all the same items cleared in a function level reset */ 5281 igb_vf_reset(adapter, vf); 5282 5283 /* set vf mac address */ 5284 igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf); 5285 5286 /* enable transmit and receive for vf */ 5287 reg = rd32(E1000_VFTE); 5288 wr32(E1000_VFTE, reg | (1 << vf)); 5289 reg = rd32(E1000_VFRE); 5290 wr32(E1000_VFRE, reg | (1 << vf)); 5291 5292 adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS; 5293 5294 /* reply to reset with ack and vf mac address */ 5295 msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK; 5296 memcpy(addr, vf_mac, 6); 5297 igb_write_mbx(hw, msgbuf, 3, vf); 5298 } 5299 5300 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf) 5301 { 5302 /* 5303 * The VF MAC Address is stored in a packed array of bytes 5304 * starting at the second 32 bit word of the msg array 5305 */ 5306 unsigned char *addr = (char *)&msg[1]; 5307 int err = -1; 5308 5309 if (is_valid_ether_addr(addr)) 5310 err = igb_set_vf_mac(adapter, vf, addr); 5311 5312 return err; 5313 } 5314 5315 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf) 5316 { 5317 struct e1000_hw *hw = &adapter->hw; 5318 struct vf_data_storage *vf_data = &adapter->vf_data[vf]; 5319 u32 msg = E1000_VT_MSGTYPE_NACK; 5320 5321 /* if device isn't clear to send it shouldn't be reading either */ 5322 if (!(vf_data->flags & IGB_VF_FLAG_CTS) && 5323 time_after(jiffies, vf_data->last_nack + (2 * HZ))) { 5324 igb_write_mbx(hw, &msg, 1, vf); 5325 vf_data->last_nack = jiffies; 5326 } 5327 } 5328 5329 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf) 5330 { 5331 struct pci_dev *pdev = adapter->pdev; 5332 u32 msgbuf[E1000_VFMAILBOX_SIZE]; 5333 struct e1000_hw *hw = &adapter->hw; 5334 struct vf_data_storage *vf_data = &adapter->vf_data[vf]; 5335 s32 retval; 5336 5337 retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf); 5338 5339 if (retval) { 5340 /* if receive failed revoke VF CTS stats and restart init */ 5341 dev_err(&pdev->dev, "Error receiving message from VF\n"); 5342 vf_data->flags &= ~IGB_VF_FLAG_CTS; 5343 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ))) 5344 return; 5345 goto out; 5346 } 5347 5348 /* this is a message we already processed, do nothing */ 5349 if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK)) 5350 return; 5351 5352 /* 5353 * until the vf completes a reset it should not be 5354 * allowed to start any configuration. 5355 */ 5356 5357 if (msgbuf[0] == E1000_VF_RESET) { 5358 igb_vf_reset_msg(adapter, vf); 5359 return; 5360 } 5361 5362 if (!(vf_data->flags & IGB_VF_FLAG_CTS)) { 5363 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ))) 5364 return; 5365 retval = -1; 5366 goto out; 5367 } 5368 5369 switch ((msgbuf[0] & 0xFFFF)) { 5370 case E1000_VF_SET_MAC_ADDR: 5371 retval = -EINVAL; 5372 if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC)) 5373 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf); 5374 else 5375 dev_warn(&pdev->dev, 5376 "VF %d attempted to override administratively " 5377 "set MAC address\nReload the VF driver to " 5378 "resume operations\n", vf); 5379 break; 5380 case E1000_VF_SET_PROMISC: 5381 retval = igb_set_vf_promisc(adapter, msgbuf, vf); 5382 break; 5383 case E1000_VF_SET_MULTICAST: 5384 retval = igb_set_vf_multicasts(adapter, msgbuf, vf); 5385 break; 5386 case E1000_VF_SET_LPE: 5387 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf); 5388 break; 5389 case E1000_VF_SET_VLAN: 5390 retval = -1; 5391 if (vf_data->pf_vlan) 5392 dev_warn(&pdev->dev, 5393 "VF %d attempted to override administratively " 5394 "set VLAN tag\nReload the VF driver to " 5395 "resume operations\n", vf); 5396 else 5397 retval = igb_set_vf_vlan(adapter, msgbuf, vf); 5398 break; 5399 default: 5400 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]); 5401 retval = -1; 5402 break; 5403 } 5404 5405 msgbuf[0] |= E1000_VT_MSGTYPE_CTS; 5406 out: 5407 /* notify the VF of the results of what it sent us */ 5408 if (retval) 5409 msgbuf[0] |= E1000_VT_MSGTYPE_NACK; 5410 else 5411 msgbuf[0] |= E1000_VT_MSGTYPE_ACK; 5412 5413 igb_write_mbx(hw, msgbuf, 1, vf); 5414 } 5415 5416 static void igb_msg_task(struct igb_adapter *adapter) 5417 { 5418 struct e1000_hw *hw = &adapter->hw; 5419 u32 vf; 5420 5421 for (vf = 0; vf < adapter->vfs_allocated_count; vf++) { 5422 /* process any reset requests */ 5423 if (!igb_check_for_rst(hw, vf)) 5424 igb_vf_reset_event(adapter, vf); 5425 5426 /* process any messages pending */ 5427 if (!igb_check_for_msg(hw, vf)) 5428 igb_rcv_msg_from_vf(adapter, vf); 5429 5430 /* process any acks */ 5431 if (!igb_check_for_ack(hw, vf)) 5432 igb_rcv_ack_from_vf(adapter, vf); 5433 } 5434 } 5435 5436 /** 5437 * igb_set_uta - Set unicast filter table address 5438 * @adapter: board private structure 5439 * 5440 * The unicast table address is a register array of 32-bit registers. 5441 * The table is meant to be used in a way similar to how the MTA is used 5442 * however due to certain limitations in the hardware it is necessary to 5443 * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous 5444 * enable bit to allow vlan tag stripping when promiscuous mode is enabled 5445 **/ 5446 static void igb_set_uta(struct igb_adapter *adapter) 5447 { 5448 struct e1000_hw *hw = &adapter->hw; 5449 int i; 5450 5451 /* The UTA table only exists on 82576 hardware and newer */ 5452 if (hw->mac.type < e1000_82576) 5453 return; 5454 5455 /* we only need to do this if VMDq is enabled */ 5456 if (!adapter->vfs_allocated_count) 5457 return; 5458 5459 for (i = 0; i < hw->mac.uta_reg_count; i++) 5460 array_wr32(E1000_UTA, i, ~0); 5461 } 5462 5463 /** 5464 * igb_intr_msi - Interrupt Handler 5465 * @irq: interrupt number 5466 * @data: pointer to a network interface device structure 5467 **/ 5468 static irqreturn_t igb_intr_msi(int irq, void *data) 5469 { 5470 struct igb_adapter *adapter = data; 5471 struct igb_q_vector *q_vector = adapter->q_vector[0]; 5472 struct e1000_hw *hw = &adapter->hw; 5473 /* read ICR disables interrupts using IAM */ 5474 u32 icr = rd32(E1000_ICR); 5475 5476 igb_write_itr(q_vector); 5477 5478 if (icr & E1000_ICR_DRSTA) 5479 schedule_work(&adapter->reset_task); 5480 5481 if (icr & E1000_ICR_DOUTSYNC) { 5482 /* HW is reporting DMA is out of sync */ 5483 adapter->stats.doosync++; 5484 } 5485 5486 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { 5487 hw->mac.get_link_status = 1; 5488 if (!test_bit(__IGB_DOWN, &adapter->state)) 5489 mod_timer(&adapter->watchdog_timer, jiffies + 1); 5490 } 5491 5492 napi_schedule(&q_vector->napi); 5493 5494 return IRQ_HANDLED; 5495 } 5496 5497 /** 5498 * igb_intr - Legacy Interrupt Handler 5499 * @irq: interrupt number 5500 * @data: pointer to a network interface device structure 5501 **/ 5502 static irqreturn_t igb_intr(int irq, void *data) 5503 { 5504 struct igb_adapter *adapter = data; 5505 struct igb_q_vector *q_vector = adapter->q_vector[0]; 5506 struct e1000_hw *hw = &adapter->hw; 5507 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No 5508 * need for the IMC write */ 5509 u32 icr = rd32(E1000_ICR); 5510 5511 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is 5512 * not set, then the adapter didn't send an interrupt */ 5513 if (!(icr & E1000_ICR_INT_ASSERTED)) 5514 return IRQ_NONE; 5515 5516 igb_write_itr(q_vector); 5517 5518 if (icr & E1000_ICR_DRSTA) 5519 schedule_work(&adapter->reset_task); 5520 5521 if (icr & E1000_ICR_DOUTSYNC) { 5522 /* HW is reporting DMA is out of sync */ 5523 adapter->stats.doosync++; 5524 } 5525 5526 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { 5527 hw->mac.get_link_status = 1; 5528 /* guard against interrupt when we're going down */ 5529 if (!test_bit(__IGB_DOWN, &adapter->state)) 5530 mod_timer(&adapter->watchdog_timer, jiffies + 1); 5531 } 5532 5533 napi_schedule(&q_vector->napi); 5534 5535 return IRQ_HANDLED; 5536 } 5537 5538 static void igb_ring_irq_enable(struct igb_q_vector *q_vector) 5539 { 5540 struct igb_adapter *adapter = q_vector->adapter; 5541 struct e1000_hw *hw = &adapter->hw; 5542 5543 if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) || 5544 (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) { 5545 if ((adapter->num_q_vectors == 1) && !adapter->vf_data) 5546 igb_set_itr(q_vector); 5547 else 5548 igb_update_ring_itr(q_vector); 5549 } 5550 5551 if (!test_bit(__IGB_DOWN, &adapter->state)) { 5552 if (adapter->msix_entries) 5553 wr32(E1000_EIMS, q_vector->eims_value); 5554 else 5555 igb_irq_enable(adapter); 5556 } 5557 } 5558 5559 /** 5560 * igb_poll - NAPI Rx polling callback 5561 * @napi: napi polling structure 5562 * @budget: count of how many packets we should handle 5563 **/ 5564 static int igb_poll(struct napi_struct *napi, int budget) 5565 { 5566 struct igb_q_vector *q_vector = container_of(napi, 5567 struct igb_q_vector, 5568 napi); 5569 bool clean_complete = true; 5570 5571 #ifdef CONFIG_IGB_DCA 5572 if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED) 5573 igb_update_dca(q_vector); 5574 #endif 5575 if (q_vector->tx.ring) 5576 clean_complete = igb_clean_tx_irq(q_vector); 5577 5578 if (q_vector->rx.ring) 5579 clean_complete &= igb_clean_rx_irq(q_vector, budget); 5580 5581 /* If all work not completed, return budget and keep polling */ 5582 if (!clean_complete) 5583 return budget; 5584 5585 /* If not enough Rx work done, exit the polling mode */ 5586 napi_complete(napi); 5587 igb_ring_irq_enable(q_vector); 5588 5589 return 0; 5590 } 5591 5592 #ifdef CONFIG_IGB_PTP 5593 /** 5594 * igb_tx_hwtstamp - utility function which checks for TX time stamp 5595 * @q_vector: pointer to q_vector containing needed info 5596 * @buffer: pointer to igb_tx_buffer structure 5597 * 5598 * If we were asked to do hardware stamping and such a time stamp is 5599 * available, then it must have been for this skb here because we only 5600 * allow only one such packet into the queue. 5601 */ 5602 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, 5603 struct igb_tx_buffer *buffer_info) 5604 { 5605 struct igb_adapter *adapter = q_vector->adapter; 5606 struct e1000_hw *hw = &adapter->hw; 5607 struct skb_shared_hwtstamps shhwtstamps; 5608 u64 regval; 5609 5610 /* if skb does not support hw timestamp or TX stamp not valid exit */ 5611 if (likely(!(buffer_info->tx_flags & IGB_TX_FLAGS_TSTAMP)) || 5612 !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID)) 5613 return; 5614 5615 regval = rd32(E1000_TXSTMPL); 5616 regval |= (u64)rd32(E1000_TXSTMPH) << 32; 5617 5618 igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval); 5619 skb_tstamp_tx(buffer_info->skb, &shhwtstamps); 5620 } 5621 5622 #endif 5623 /** 5624 * igb_clean_tx_irq - Reclaim resources after transmit completes 5625 * @q_vector: pointer to q_vector containing needed info 5626 * returns true if ring is completely cleaned 5627 **/ 5628 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) 5629 { 5630 struct igb_adapter *adapter = q_vector->adapter; 5631 struct igb_ring *tx_ring = q_vector->tx.ring; 5632 struct igb_tx_buffer *tx_buffer; 5633 union e1000_adv_tx_desc *tx_desc, *eop_desc; 5634 unsigned int total_bytes = 0, total_packets = 0; 5635 unsigned int budget = q_vector->tx.work_limit; 5636 unsigned int i = tx_ring->next_to_clean; 5637 5638 if (test_bit(__IGB_DOWN, &adapter->state)) 5639 return true; 5640 5641 tx_buffer = &tx_ring->tx_buffer_info[i]; 5642 tx_desc = IGB_TX_DESC(tx_ring, i); 5643 i -= tx_ring->count; 5644 5645 for (; budget; budget--) { 5646 eop_desc = tx_buffer->next_to_watch; 5647 5648 /* prevent any other reads prior to eop_desc */ 5649 rmb(); 5650 5651 /* if next_to_watch is not set then there is no work pending */ 5652 if (!eop_desc) 5653 break; 5654 5655 /* if DD is not set pending work has not been completed */ 5656 if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD))) 5657 break; 5658 5659 /* clear next_to_watch to prevent false hangs */ 5660 tx_buffer->next_to_watch = NULL; 5661 5662 /* update the statistics for this packet */ 5663 total_bytes += tx_buffer->bytecount; 5664 total_packets += tx_buffer->gso_segs; 5665 5666 #ifdef CONFIG_IGB_PTP 5667 /* retrieve hardware timestamp */ 5668 igb_tx_hwtstamp(q_vector, tx_buffer); 5669 5670 #endif 5671 /* free the skb */ 5672 dev_kfree_skb_any(tx_buffer->skb); 5673 tx_buffer->skb = NULL; 5674 5675 /* unmap skb header data */ 5676 dma_unmap_single(tx_ring->dev, 5677 tx_buffer->dma, 5678 tx_buffer->length, 5679 DMA_TO_DEVICE); 5680 5681 /* clear last DMA location and unmap remaining buffers */ 5682 while (tx_desc != eop_desc) { 5683 tx_buffer->dma = 0; 5684 5685 tx_buffer++; 5686 tx_desc++; 5687 i++; 5688 if (unlikely(!i)) { 5689 i -= tx_ring->count; 5690 tx_buffer = tx_ring->tx_buffer_info; 5691 tx_desc = IGB_TX_DESC(tx_ring, 0); 5692 } 5693 5694 /* unmap any remaining paged data */ 5695 if (tx_buffer->dma) { 5696 dma_unmap_page(tx_ring->dev, 5697 tx_buffer->dma, 5698 tx_buffer->length, 5699 DMA_TO_DEVICE); 5700 } 5701 } 5702 5703 /* clear last DMA location */ 5704 tx_buffer->dma = 0; 5705 5706 /* move us one more past the eop_desc for start of next pkt */ 5707 tx_buffer++; 5708 tx_desc++; 5709 i++; 5710 if (unlikely(!i)) { 5711 i -= tx_ring->count; 5712 tx_buffer = tx_ring->tx_buffer_info; 5713 tx_desc = IGB_TX_DESC(tx_ring, 0); 5714 } 5715 } 5716 5717 netdev_tx_completed_queue(txring_txq(tx_ring), 5718 total_packets, total_bytes); 5719 i += tx_ring->count; 5720 tx_ring->next_to_clean = i; 5721 u64_stats_update_begin(&tx_ring->tx_syncp); 5722 tx_ring->tx_stats.bytes += total_bytes; 5723 tx_ring->tx_stats.packets += total_packets; 5724 u64_stats_update_end(&tx_ring->tx_syncp); 5725 q_vector->tx.total_bytes += total_bytes; 5726 q_vector->tx.total_packets += total_packets; 5727 5728 if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) { 5729 struct e1000_hw *hw = &adapter->hw; 5730 5731 eop_desc = tx_buffer->next_to_watch; 5732 5733 /* Detect a transmit hang in hardware, this serializes the 5734 * check with the clearing of time_stamp and movement of i */ 5735 clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags); 5736 if (eop_desc && 5737 time_after(jiffies, tx_buffer->time_stamp + 5738 (adapter->tx_timeout_factor * HZ)) && 5739 !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) { 5740 5741 /* detected Tx unit hang */ 5742 dev_err(tx_ring->dev, 5743 "Detected Tx Unit Hang\n" 5744 " Tx Queue <%d>\n" 5745 " TDH <%x>\n" 5746 " TDT <%x>\n" 5747 " next_to_use <%x>\n" 5748 " next_to_clean <%x>\n" 5749 "buffer_info[next_to_clean]\n" 5750 " time_stamp <%lx>\n" 5751 " next_to_watch <%p>\n" 5752 " jiffies <%lx>\n" 5753 " desc.status <%x>\n", 5754 tx_ring->queue_index, 5755 rd32(E1000_TDH(tx_ring->reg_idx)), 5756 readl(tx_ring->tail), 5757 tx_ring->next_to_use, 5758 tx_ring->next_to_clean, 5759 tx_buffer->time_stamp, 5760 eop_desc, 5761 jiffies, 5762 eop_desc->wb.status); 5763 netif_stop_subqueue(tx_ring->netdev, 5764 tx_ring->queue_index); 5765 5766 /* we are about to reset, no point in enabling stuff */ 5767 return true; 5768 } 5769 } 5770 5771 if (unlikely(total_packets && 5772 netif_carrier_ok(tx_ring->netdev) && 5773 igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) { 5774 /* Make sure that anybody stopping the queue after this 5775 * sees the new next_to_clean. 5776 */ 5777 smp_mb(); 5778 if (__netif_subqueue_stopped(tx_ring->netdev, 5779 tx_ring->queue_index) && 5780 !(test_bit(__IGB_DOWN, &adapter->state))) { 5781 netif_wake_subqueue(tx_ring->netdev, 5782 tx_ring->queue_index); 5783 5784 u64_stats_update_begin(&tx_ring->tx_syncp); 5785 tx_ring->tx_stats.restart_queue++; 5786 u64_stats_update_end(&tx_ring->tx_syncp); 5787 } 5788 } 5789 5790 return !!budget; 5791 } 5792 5793 static inline void igb_rx_checksum(struct igb_ring *ring, 5794 union e1000_adv_rx_desc *rx_desc, 5795 struct sk_buff *skb) 5796 { 5797 skb_checksum_none_assert(skb); 5798 5799 /* Ignore Checksum bit is set */ 5800 if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM)) 5801 return; 5802 5803 /* Rx checksum disabled via ethtool */ 5804 if (!(ring->netdev->features & NETIF_F_RXCSUM)) 5805 return; 5806 5807 /* TCP/UDP checksum error bit is set */ 5808 if (igb_test_staterr(rx_desc, 5809 E1000_RXDEXT_STATERR_TCPE | 5810 E1000_RXDEXT_STATERR_IPE)) { 5811 /* 5812 * work around errata with sctp packets where the TCPE aka 5813 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc) 5814 * packets, (aka let the stack check the crc32c) 5815 */ 5816 if (!((skb->len == 60) && 5817 test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) { 5818 u64_stats_update_begin(&ring->rx_syncp); 5819 ring->rx_stats.csum_err++; 5820 u64_stats_update_end(&ring->rx_syncp); 5821 } 5822 /* let the stack verify checksum errors */ 5823 return; 5824 } 5825 /* It must be a TCP or UDP packet with a valid checksum */ 5826 if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS | 5827 E1000_RXD_STAT_UDPCS)) 5828 skb->ip_summed = CHECKSUM_UNNECESSARY; 5829 5830 dev_dbg(ring->dev, "cksum success: bits %08X\n", 5831 le32_to_cpu(rx_desc->wb.upper.status_error)); 5832 } 5833 5834 static inline void igb_rx_hash(struct igb_ring *ring, 5835 union e1000_adv_rx_desc *rx_desc, 5836 struct sk_buff *skb) 5837 { 5838 if (ring->netdev->features & NETIF_F_RXHASH) 5839 skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss); 5840 } 5841 5842 #ifdef CONFIG_IGB_PTP 5843 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, 5844 union e1000_adv_rx_desc *rx_desc, 5845 struct sk_buff *skb) 5846 { 5847 struct igb_adapter *adapter = q_vector->adapter; 5848 struct e1000_hw *hw = &adapter->hw; 5849 u64 regval; 5850 5851 if (!igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP | 5852 E1000_RXDADV_STAT_TS)) 5853 return; 5854 5855 /* 5856 * If this bit is set, then the RX registers contain the time stamp. No 5857 * other packet will be time stamped until we read these registers, so 5858 * read the registers to make them available again. Because only one 5859 * packet can be time stamped at a time, we know that the register 5860 * values must belong to this one here and therefore we don't need to 5861 * compare any of the additional attributes stored for it. 5862 * 5863 * If nothing went wrong, then it should have a shared tx_flags that we 5864 * can turn into a skb_shared_hwtstamps. 5865 */ 5866 if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) { 5867 u32 *stamp = (u32 *)skb->data; 5868 regval = le32_to_cpu(*(stamp + 2)); 5869 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32; 5870 skb_pull(skb, IGB_TS_HDR_LEN); 5871 } else { 5872 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID)) 5873 return; 5874 5875 regval = rd32(E1000_RXSTMPL); 5876 regval |= (u64)rd32(E1000_RXSTMPH) << 32; 5877 } 5878 5879 igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval); 5880 } 5881 5882 #endif 5883 static void igb_rx_vlan(struct igb_ring *ring, 5884 union e1000_adv_rx_desc *rx_desc, 5885 struct sk_buff *skb) 5886 { 5887 if (igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) { 5888 u16 vid; 5889 if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) && 5890 test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags)) 5891 vid = be16_to_cpu(rx_desc->wb.upper.vlan); 5892 else 5893 vid = le16_to_cpu(rx_desc->wb.upper.vlan); 5894 5895 __vlan_hwaccel_put_tag(skb, vid); 5896 } 5897 } 5898 5899 static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc) 5900 { 5901 /* HW will not DMA in data larger than the given buffer, even if it 5902 * parses the (NFS, of course) header to be larger. In that case, it 5903 * fills the header buffer and spills the rest into the page. 5904 */ 5905 u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) & 5906 E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT; 5907 if (hlen > IGB_RX_HDR_LEN) 5908 hlen = IGB_RX_HDR_LEN; 5909 return hlen; 5910 } 5911 5912 static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget) 5913 { 5914 struct igb_ring *rx_ring = q_vector->rx.ring; 5915 union e1000_adv_rx_desc *rx_desc; 5916 const int current_node = numa_node_id(); 5917 unsigned int total_bytes = 0, total_packets = 0; 5918 u16 cleaned_count = igb_desc_unused(rx_ring); 5919 u16 i = rx_ring->next_to_clean; 5920 5921 rx_desc = IGB_RX_DESC(rx_ring, i); 5922 5923 while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) { 5924 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i]; 5925 struct sk_buff *skb = buffer_info->skb; 5926 union e1000_adv_rx_desc *next_rxd; 5927 5928 buffer_info->skb = NULL; 5929 prefetch(skb->data); 5930 5931 i++; 5932 if (i == rx_ring->count) 5933 i = 0; 5934 5935 next_rxd = IGB_RX_DESC(rx_ring, i); 5936 prefetch(next_rxd); 5937 5938 /* 5939 * This memory barrier is needed to keep us from reading 5940 * any other fields out of the rx_desc until we know the 5941 * RXD_STAT_DD bit is set 5942 */ 5943 rmb(); 5944 5945 if (!skb_is_nonlinear(skb)) { 5946 __skb_put(skb, igb_get_hlen(rx_desc)); 5947 dma_unmap_single(rx_ring->dev, buffer_info->dma, 5948 IGB_RX_HDR_LEN, 5949 DMA_FROM_DEVICE); 5950 buffer_info->dma = 0; 5951 } 5952 5953 if (rx_desc->wb.upper.length) { 5954 u16 length = le16_to_cpu(rx_desc->wb.upper.length); 5955 5956 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, 5957 buffer_info->page, 5958 buffer_info->page_offset, 5959 length); 5960 5961 skb->len += length; 5962 skb->data_len += length; 5963 skb->truesize += PAGE_SIZE / 2; 5964 5965 if ((page_count(buffer_info->page) != 1) || 5966 (page_to_nid(buffer_info->page) != current_node)) 5967 buffer_info->page = NULL; 5968 else 5969 get_page(buffer_info->page); 5970 5971 dma_unmap_page(rx_ring->dev, buffer_info->page_dma, 5972 PAGE_SIZE / 2, DMA_FROM_DEVICE); 5973 buffer_info->page_dma = 0; 5974 } 5975 5976 if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)) { 5977 struct igb_rx_buffer *next_buffer; 5978 next_buffer = &rx_ring->rx_buffer_info[i]; 5979 buffer_info->skb = next_buffer->skb; 5980 buffer_info->dma = next_buffer->dma; 5981 next_buffer->skb = skb; 5982 next_buffer->dma = 0; 5983 goto next_desc; 5984 } 5985 5986 if (unlikely((igb_test_staterr(rx_desc, 5987 E1000_RXDEXT_ERR_FRAME_ERR_MASK)) 5988 && !(rx_ring->netdev->features & NETIF_F_RXALL))) { 5989 dev_kfree_skb_any(skb); 5990 goto next_desc; 5991 } 5992 5993 #ifdef CONFIG_IGB_PTP 5994 igb_rx_hwtstamp(q_vector, rx_desc, skb); 5995 #endif 5996 igb_rx_hash(rx_ring, rx_desc, skb); 5997 igb_rx_checksum(rx_ring, rx_desc, skb); 5998 igb_rx_vlan(rx_ring, rx_desc, skb); 5999 6000 total_bytes += skb->len; 6001 total_packets++; 6002 6003 skb->protocol = eth_type_trans(skb, rx_ring->netdev); 6004 6005 napi_gro_receive(&q_vector->napi, skb); 6006 6007 budget--; 6008 next_desc: 6009 if (!budget) 6010 break; 6011 6012 cleaned_count++; 6013 /* return some buffers to hardware, one at a time is too slow */ 6014 if (cleaned_count >= IGB_RX_BUFFER_WRITE) { 6015 igb_alloc_rx_buffers(rx_ring, cleaned_count); 6016 cleaned_count = 0; 6017 } 6018 6019 /* use prefetched values */ 6020 rx_desc = next_rxd; 6021 } 6022 6023 rx_ring->next_to_clean = i; 6024 u64_stats_update_begin(&rx_ring->rx_syncp); 6025 rx_ring->rx_stats.packets += total_packets; 6026 rx_ring->rx_stats.bytes += total_bytes; 6027 u64_stats_update_end(&rx_ring->rx_syncp); 6028 q_vector->rx.total_packets += total_packets; 6029 q_vector->rx.total_bytes += total_bytes; 6030 6031 if (cleaned_count) 6032 igb_alloc_rx_buffers(rx_ring, cleaned_count); 6033 6034 return !!budget; 6035 } 6036 6037 static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring, 6038 struct igb_rx_buffer *bi) 6039 { 6040 struct sk_buff *skb = bi->skb; 6041 dma_addr_t dma = bi->dma; 6042 6043 if (dma) 6044 return true; 6045 6046 if (likely(!skb)) { 6047 skb = netdev_alloc_skb_ip_align(rx_ring->netdev, 6048 IGB_RX_HDR_LEN); 6049 bi->skb = skb; 6050 if (!skb) { 6051 rx_ring->rx_stats.alloc_failed++; 6052 return false; 6053 } 6054 6055 /* initialize skb for ring */ 6056 skb_record_rx_queue(skb, rx_ring->queue_index); 6057 } 6058 6059 dma = dma_map_single(rx_ring->dev, skb->data, 6060 IGB_RX_HDR_LEN, DMA_FROM_DEVICE); 6061 6062 if (dma_mapping_error(rx_ring->dev, dma)) { 6063 rx_ring->rx_stats.alloc_failed++; 6064 return false; 6065 } 6066 6067 bi->dma = dma; 6068 return true; 6069 } 6070 6071 static bool igb_alloc_mapped_page(struct igb_ring *rx_ring, 6072 struct igb_rx_buffer *bi) 6073 { 6074 struct page *page = bi->page; 6075 dma_addr_t page_dma = bi->page_dma; 6076 unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2); 6077 6078 if (page_dma) 6079 return true; 6080 6081 if (!page) { 6082 page = alloc_page(GFP_ATOMIC | __GFP_COLD); 6083 bi->page = page; 6084 if (unlikely(!page)) { 6085 rx_ring->rx_stats.alloc_failed++; 6086 return false; 6087 } 6088 } 6089 6090 page_dma = dma_map_page(rx_ring->dev, page, 6091 page_offset, PAGE_SIZE / 2, 6092 DMA_FROM_DEVICE); 6093 6094 if (dma_mapping_error(rx_ring->dev, page_dma)) { 6095 rx_ring->rx_stats.alloc_failed++; 6096 return false; 6097 } 6098 6099 bi->page_dma = page_dma; 6100 bi->page_offset = page_offset; 6101 return true; 6102 } 6103 6104 /** 6105 * igb_alloc_rx_buffers - Replace used receive buffers; packet split 6106 * @adapter: address of board private structure 6107 **/ 6108 void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count) 6109 { 6110 union e1000_adv_rx_desc *rx_desc; 6111 struct igb_rx_buffer *bi; 6112 u16 i = rx_ring->next_to_use; 6113 6114 rx_desc = IGB_RX_DESC(rx_ring, i); 6115 bi = &rx_ring->rx_buffer_info[i]; 6116 i -= rx_ring->count; 6117 6118 while (cleaned_count--) { 6119 if (!igb_alloc_mapped_skb(rx_ring, bi)) 6120 break; 6121 6122 /* Refresh the desc even if buffer_addrs didn't change 6123 * because each write-back erases this info. */ 6124 rx_desc->read.hdr_addr = cpu_to_le64(bi->dma); 6125 6126 if (!igb_alloc_mapped_page(rx_ring, bi)) 6127 break; 6128 6129 rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma); 6130 6131 rx_desc++; 6132 bi++; 6133 i++; 6134 if (unlikely(!i)) { 6135 rx_desc = IGB_RX_DESC(rx_ring, 0); 6136 bi = rx_ring->rx_buffer_info; 6137 i -= rx_ring->count; 6138 } 6139 6140 /* clear the hdr_addr for the next_to_use descriptor */ 6141 rx_desc->read.hdr_addr = 0; 6142 } 6143 6144 i += rx_ring->count; 6145 6146 if (rx_ring->next_to_use != i) { 6147 rx_ring->next_to_use = i; 6148 6149 /* Force memory writes to complete before letting h/w 6150 * know there are new descriptors to fetch. (Only 6151 * applicable for weak-ordered memory model archs, 6152 * such as IA-64). */ 6153 wmb(); 6154 writel(i, rx_ring->tail); 6155 } 6156 } 6157 6158 /** 6159 * igb_mii_ioctl - 6160 * @netdev: 6161 * @ifreq: 6162 * @cmd: 6163 **/ 6164 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) 6165 { 6166 struct igb_adapter *adapter = netdev_priv(netdev); 6167 struct mii_ioctl_data *data = if_mii(ifr); 6168 6169 if (adapter->hw.phy.media_type != e1000_media_type_copper) 6170 return -EOPNOTSUPP; 6171 6172 switch (cmd) { 6173 case SIOCGMIIPHY: 6174 data->phy_id = adapter->hw.phy.addr; 6175 break; 6176 case SIOCGMIIREG: 6177 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F, 6178 &data->val_out)) 6179 return -EIO; 6180 break; 6181 case SIOCSMIIREG: 6182 default: 6183 return -EOPNOTSUPP; 6184 } 6185 return 0; 6186 } 6187 6188 /** 6189 * igb_hwtstamp_ioctl - control hardware time stamping 6190 * @netdev: 6191 * @ifreq: 6192 * @cmd: 6193 * 6194 * Outgoing time stamping can be enabled and disabled. Play nice and 6195 * disable it when requested, although it shouldn't case any overhead 6196 * when no packet needs it. At most one packet in the queue may be 6197 * marked for time stamping, otherwise it would be impossible to tell 6198 * for sure to which packet the hardware time stamp belongs. 6199 * 6200 * Incoming time stamping has to be configured via the hardware 6201 * filters. Not all combinations are supported, in particular event 6202 * type has to be specified. Matching the kind of event packet is 6203 * not supported, with the exception of "all V2 events regardless of 6204 * level 2 or 4". 6205 * 6206 **/ 6207 static int igb_hwtstamp_ioctl(struct net_device *netdev, 6208 struct ifreq *ifr, int cmd) 6209 { 6210 struct igb_adapter *adapter = netdev_priv(netdev); 6211 struct e1000_hw *hw = &adapter->hw; 6212 struct hwtstamp_config config; 6213 u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED; 6214 u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED; 6215 u32 tsync_rx_cfg = 0; 6216 bool is_l4 = false; 6217 bool is_l2 = false; 6218 u32 regval; 6219 6220 if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) 6221 return -EFAULT; 6222 6223 /* reserved for future extensions */ 6224 if (config.flags) 6225 return -EINVAL; 6226 6227 switch (config.tx_type) { 6228 case HWTSTAMP_TX_OFF: 6229 tsync_tx_ctl = 0; 6230 case HWTSTAMP_TX_ON: 6231 break; 6232 default: 6233 return -ERANGE; 6234 } 6235 6236 switch (config.rx_filter) { 6237 case HWTSTAMP_FILTER_NONE: 6238 tsync_rx_ctl = 0; 6239 break; 6240 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: 6241 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: 6242 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: 6243 case HWTSTAMP_FILTER_ALL: 6244 /* 6245 * register TSYNCRXCFG must be set, therefore it is not 6246 * possible to time stamp both Sync and Delay_Req messages 6247 * => fall back to time stamping all packets 6248 */ 6249 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL; 6250 config.rx_filter = HWTSTAMP_FILTER_ALL; 6251 break; 6252 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: 6253 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1; 6254 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE; 6255 is_l4 = true; 6256 break; 6257 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: 6258 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1; 6259 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE; 6260 is_l4 = true; 6261 break; 6262 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: 6263 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: 6264 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2; 6265 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE; 6266 is_l2 = true; 6267 is_l4 = true; 6268 config.rx_filter = HWTSTAMP_FILTER_SOME; 6269 break; 6270 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: 6271 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: 6272 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2; 6273 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE; 6274 is_l2 = true; 6275 is_l4 = true; 6276 config.rx_filter = HWTSTAMP_FILTER_SOME; 6277 break; 6278 case HWTSTAMP_FILTER_PTP_V2_EVENT: 6279 case HWTSTAMP_FILTER_PTP_V2_SYNC: 6280 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: 6281 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2; 6282 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; 6283 is_l2 = true; 6284 is_l4 = true; 6285 break; 6286 default: 6287 return -ERANGE; 6288 } 6289 6290 if (hw->mac.type == e1000_82575) { 6291 if (tsync_rx_ctl | tsync_tx_ctl) 6292 return -EINVAL; 6293 return 0; 6294 } 6295 6296 /* 6297 * Per-packet timestamping only works if all packets are 6298 * timestamped, so enable timestamping in all packets as 6299 * long as one rx filter was configured. 6300 */ 6301 if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) { 6302 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED; 6303 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL; 6304 } 6305 6306 /* enable/disable TX */ 6307 regval = rd32(E1000_TSYNCTXCTL); 6308 regval &= ~E1000_TSYNCTXCTL_ENABLED; 6309 regval |= tsync_tx_ctl; 6310 wr32(E1000_TSYNCTXCTL, regval); 6311 6312 /* enable/disable RX */ 6313 regval = rd32(E1000_TSYNCRXCTL); 6314 regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK); 6315 regval |= tsync_rx_ctl; 6316 wr32(E1000_TSYNCRXCTL, regval); 6317 6318 /* define which PTP packets are time stamped */ 6319 wr32(E1000_TSYNCRXCFG, tsync_rx_cfg); 6320 6321 /* define ethertype filter for timestamped packets */ 6322 if (is_l2) 6323 wr32(E1000_ETQF(3), 6324 (E1000_ETQF_FILTER_ENABLE | /* enable filter */ 6325 E1000_ETQF_1588 | /* enable timestamping */ 6326 ETH_P_1588)); /* 1588 eth protocol type */ 6327 else 6328 wr32(E1000_ETQF(3), 0); 6329 6330 #define PTP_PORT 319 6331 /* L4 Queue Filter[3]: filter by destination port and protocol */ 6332 if (is_l4) { 6333 u32 ftqf = (IPPROTO_UDP /* UDP */ 6334 | E1000_FTQF_VF_BP /* VF not compared */ 6335 | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */ 6336 | E1000_FTQF_MASK); /* mask all inputs */ 6337 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */ 6338 6339 wr32(E1000_IMIR(3), htons(PTP_PORT)); 6340 wr32(E1000_IMIREXT(3), 6341 (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP)); 6342 if (hw->mac.type == e1000_82576) { 6343 /* enable source port check */ 6344 wr32(E1000_SPQF(3), htons(PTP_PORT)); 6345 ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP; 6346 } 6347 wr32(E1000_FTQF(3), ftqf); 6348 } else { 6349 wr32(E1000_FTQF(3), E1000_FTQF_MASK); 6350 } 6351 wrfl(); 6352 6353 adapter->hwtstamp_config = config; 6354 6355 /* clear TX/RX time stamp registers, just to be sure */ 6356 regval = rd32(E1000_TXSTMPH); 6357 regval = rd32(E1000_RXSTMPH); 6358 6359 return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? 6360 -EFAULT : 0; 6361 } 6362 6363 /** 6364 * igb_ioctl - 6365 * @netdev: 6366 * @ifreq: 6367 * @cmd: 6368 **/ 6369 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) 6370 { 6371 switch (cmd) { 6372 case SIOCGMIIPHY: 6373 case SIOCGMIIREG: 6374 case SIOCSMIIREG: 6375 return igb_mii_ioctl(netdev, ifr, cmd); 6376 case SIOCSHWTSTAMP: 6377 return igb_hwtstamp_ioctl(netdev, ifr, cmd); 6378 default: 6379 return -EOPNOTSUPP; 6380 } 6381 } 6382 6383 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value) 6384 { 6385 struct igb_adapter *adapter = hw->back; 6386 u16 cap_offset; 6387 6388 cap_offset = adapter->pdev->pcie_cap; 6389 if (!cap_offset) 6390 return -E1000_ERR_CONFIG; 6391 6392 pci_read_config_word(adapter->pdev, cap_offset + reg, value); 6393 6394 return 0; 6395 } 6396 6397 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value) 6398 { 6399 struct igb_adapter *adapter = hw->back; 6400 u16 cap_offset; 6401 6402 cap_offset = adapter->pdev->pcie_cap; 6403 if (!cap_offset) 6404 return -E1000_ERR_CONFIG; 6405 6406 pci_write_config_word(adapter->pdev, cap_offset + reg, *value); 6407 6408 return 0; 6409 } 6410 6411 static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features) 6412 { 6413 struct igb_adapter *adapter = netdev_priv(netdev); 6414 struct e1000_hw *hw = &adapter->hw; 6415 u32 ctrl, rctl; 6416 bool enable = !!(features & NETIF_F_HW_VLAN_RX); 6417 6418 if (enable) { 6419 /* enable VLAN tag insert/strip */ 6420 ctrl = rd32(E1000_CTRL); 6421 ctrl |= E1000_CTRL_VME; 6422 wr32(E1000_CTRL, ctrl); 6423 6424 /* Disable CFI check */ 6425 rctl = rd32(E1000_RCTL); 6426 rctl &= ~E1000_RCTL_CFIEN; 6427 wr32(E1000_RCTL, rctl); 6428 } else { 6429 /* disable VLAN tag insert/strip */ 6430 ctrl = rd32(E1000_CTRL); 6431 ctrl &= ~E1000_CTRL_VME; 6432 wr32(E1000_CTRL, ctrl); 6433 } 6434 6435 igb_rlpml_set(adapter); 6436 } 6437 6438 static int igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid) 6439 { 6440 struct igb_adapter *adapter = netdev_priv(netdev); 6441 struct e1000_hw *hw = &adapter->hw; 6442 int pf_id = adapter->vfs_allocated_count; 6443 6444 /* attempt to add filter to vlvf array */ 6445 igb_vlvf_set(adapter, vid, true, pf_id); 6446 6447 /* add the filter since PF can receive vlans w/o entry in vlvf */ 6448 igb_vfta_set(hw, vid, true); 6449 6450 set_bit(vid, adapter->active_vlans); 6451 6452 return 0; 6453 } 6454 6455 static int igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid) 6456 { 6457 struct igb_adapter *adapter = netdev_priv(netdev); 6458 struct e1000_hw *hw = &adapter->hw; 6459 int pf_id = adapter->vfs_allocated_count; 6460 s32 err; 6461 6462 /* remove vlan from VLVF table array */ 6463 err = igb_vlvf_set(adapter, vid, false, pf_id); 6464 6465 /* if vid was not present in VLVF just remove it from table */ 6466 if (err) 6467 igb_vfta_set(hw, vid, false); 6468 6469 clear_bit(vid, adapter->active_vlans); 6470 6471 return 0; 6472 } 6473 6474 static void igb_restore_vlan(struct igb_adapter *adapter) 6475 { 6476 u16 vid; 6477 6478 igb_vlan_mode(adapter->netdev, adapter->netdev->features); 6479 6480 for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID) 6481 igb_vlan_rx_add_vid(adapter->netdev, vid); 6482 } 6483 6484 int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx) 6485 { 6486 struct pci_dev *pdev = adapter->pdev; 6487 struct e1000_mac_info *mac = &adapter->hw.mac; 6488 6489 mac->autoneg = 0; 6490 6491 /* Make sure dplx is at most 1 bit and lsb of speed is not set 6492 * for the switch() below to work */ 6493 if ((spd & 1) || (dplx & ~1)) 6494 goto err_inval; 6495 6496 /* Fiber NIC's only allow 1000 Gbps Full duplex */ 6497 if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) && 6498 spd != SPEED_1000 && 6499 dplx != DUPLEX_FULL) 6500 goto err_inval; 6501 6502 switch (spd + dplx) { 6503 case SPEED_10 + DUPLEX_HALF: 6504 mac->forced_speed_duplex = ADVERTISE_10_HALF; 6505 break; 6506 case SPEED_10 + DUPLEX_FULL: 6507 mac->forced_speed_duplex = ADVERTISE_10_FULL; 6508 break; 6509 case SPEED_100 + DUPLEX_HALF: 6510 mac->forced_speed_duplex = ADVERTISE_100_HALF; 6511 break; 6512 case SPEED_100 + DUPLEX_FULL: 6513 mac->forced_speed_duplex = ADVERTISE_100_FULL; 6514 break; 6515 case SPEED_1000 + DUPLEX_FULL: 6516 mac->autoneg = 1; 6517 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL; 6518 break; 6519 case SPEED_1000 + DUPLEX_HALF: /* not supported */ 6520 default: 6521 goto err_inval; 6522 } 6523 return 0; 6524 6525 err_inval: 6526 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n"); 6527 return -EINVAL; 6528 } 6529 6530 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake, 6531 bool runtime) 6532 { 6533 struct net_device *netdev = pci_get_drvdata(pdev); 6534 struct igb_adapter *adapter = netdev_priv(netdev); 6535 struct e1000_hw *hw = &adapter->hw; 6536 u32 ctrl, rctl, status; 6537 u32 wufc = runtime ? E1000_WUFC_LNKC : adapter->wol; 6538 #ifdef CONFIG_PM 6539 int retval = 0; 6540 #endif 6541 6542 netif_device_detach(netdev); 6543 6544 if (netif_running(netdev)) 6545 __igb_close(netdev, true); 6546 6547 igb_clear_interrupt_scheme(adapter); 6548 6549 #ifdef CONFIG_PM 6550 retval = pci_save_state(pdev); 6551 if (retval) 6552 return retval; 6553 #endif 6554 6555 status = rd32(E1000_STATUS); 6556 if (status & E1000_STATUS_LU) 6557 wufc &= ~E1000_WUFC_LNKC; 6558 6559 if (wufc) { 6560 igb_setup_rctl(adapter); 6561 igb_set_rx_mode(netdev); 6562 6563 /* turn on all-multi mode if wake on multicast is enabled */ 6564 if (wufc & E1000_WUFC_MC) { 6565 rctl = rd32(E1000_RCTL); 6566 rctl |= E1000_RCTL_MPE; 6567 wr32(E1000_RCTL, rctl); 6568 } 6569 6570 ctrl = rd32(E1000_CTRL); 6571 /* advertise wake from D3Cold */ 6572 #define E1000_CTRL_ADVD3WUC 0x00100000 6573 /* phy power management enable */ 6574 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000 6575 ctrl |= E1000_CTRL_ADVD3WUC; 6576 wr32(E1000_CTRL, ctrl); 6577 6578 /* Allow time for pending master requests to run */ 6579 igb_disable_pcie_master(hw); 6580 6581 wr32(E1000_WUC, E1000_WUC_PME_EN); 6582 wr32(E1000_WUFC, wufc); 6583 } else { 6584 wr32(E1000_WUC, 0); 6585 wr32(E1000_WUFC, 0); 6586 } 6587 6588 *enable_wake = wufc || adapter->en_mng_pt; 6589 if (!*enable_wake) 6590 igb_power_down_link(adapter); 6591 else 6592 igb_power_up_link(adapter); 6593 6594 /* Release control of h/w to f/w. If f/w is AMT enabled, this 6595 * would have already happened in close and is redundant. */ 6596 igb_release_hw_control(adapter); 6597 6598 pci_disable_device(pdev); 6599 6600 return 0; 6601 } 6602 6603 #ifdef CONFIG_PM 6604 #ifdef CONFIG_PM_SLEEP 6605 static int igb_suspend(struct device *dev) 6606 { 6607 int retval; 6608 bool wake; 6609 struct pci_dev *pdev = to_pci_dev(dev); 6610 6611 retval = __igb_shutdown(pdev, &wake, 0); 6612 if (retval) 6613 return retval; 6614 6615 if (wake) { 6616 pci_prepare_to_sleep(pdev); 6617 } else { 6618 pci_wake_from_d3(pdev, false); 6619 pci_set_power_state(pdev, PCI_D3hot); 6620 } 6621 6622 return 0; 6623 } 6624 #endif /* CONFIG_PM_SLEEP */ 6625 6626 static int igb_resume(struct device *dev) 6627 { 6628 struct pci_dev *pdev = to_pci_dev(dev); 6629 struct net_device *netdev = pci_get_drvdata(pdev); 6630 struct igb_adapter *adapter = netdev_priv(netdev); 6631 struct e1000_hw *hw = &adapter->hw; 6632 u32 err; 6633 6634 pci_set_power_state(pdev, PCI_D0); 6635 pci_restore_state(pdev); 6636 pci_save_state(pdev); 6637 6638 err = pci_enable_device_mem(pdev); 6639 if (err) { 6640 dev_err(&pdev->dev, 6641 "igb: Cannot enable PCI device from suspend\n"); 6642 return err; 6643 } 6644 pci_set_master(pdev); 6645 6646 pci_enable_wake(pdev, PCI_D3hot, 0); 6647 pci_enable_wake(pdev, PCI_D3cold, 0); 6648 6649 if (!rtnl_is_locked()) { 6650 /* 6651 * shut up ASSERT_RTNL() warning in 6652 * netif_set_real_num_tx/rx_queues. 6653 */ 6654 rtnl_lock(); 6655 err = igb_init_interrupt_scheme(adapter); 6656 rtnl_unlock(); 6657 } else { 6658 err = igb_init_interrupt_scheme(adapter); 6659 } 6660 if (err) { 6661 dev_err(&pdev->dev, "Unable to allocate memory for queues\n"); 6662 return -ENOMEM; 6663 } 6664 6665 igb_reset(adapter); 6666 6667 /* let the f/w know that the h/w is now under the control of the 6668 * driver. */ 6669 igb_get_hw_control(adapter); 6670 6671 wr32(E1000_WUS, ~0); 6672 6673 if (netdev->flags & IFF_UP) { 6674 err = __igb_open(netdev, true); 6675 if (err) 6676 return err; 6677 } 6678 6679 netif_device_attach(netdev); 6680 return 0; 6681 } 6682 6683 #ifdef CONFIG_PM_RUNTIME 6684 static int igb_runtime_idle(struct device *dev) 6685 { 6686 struct pci_dev *pdev = to_pci_dev(dev); 6687 struct net_device *netdev = pci_get_drvdata(pdev); 6688 struct igb_adapter *adapter = netdev_priv(netdev); 6689 6690 if (!igb_has_link(adapter)) 6691 pm_schedule_suspend(dev, MSEC_PER_SEC * 5); 6692 6693 return -EBUSY; 6694 } 6695 6696 static int igb_runtime_suspend(struct device *dev) 6697 { 6698 struct pci_dev *pdev = to_pci_dev(dev); 6699 int retval; 6700 bool wake; 6701 6702 retval = __igb_shutdown(pdev, &wake, 1); 6703 if (retval) 6704 return retval; 6705 6706 if (wake) { 6707 pci_prepare_to_sleep(pdev); 6708 } else { 6709 pci_wake_from_d3(pdev, false); 6710 pci_set_power_state(pdev, PCI_D3hot); 6711 } 6712 6713 return 0; 6714 } 6715 6716 static int igb_runtime_resume(struct device *dev) 6717 { 6718 return igb_resume(dev); 6719 } 6720 #endif /* CONFIG_PM_RUNTIME */ 6721 #endif 6722 6723 static void igb_shutdown(struct pci_dev *pdev) 6724 { 6725 bool wake; 6726 6727 __igb_shutdown(pdev, &wake, 0); 6728 6729 if (system_state == SYSTEM_POWER_OFF) { 6730 pci_wake_from_d3(pdev, wake); 6731 pci_set_power_state(pdev, PCI_D3hot); 6732 } 6733 } 6734 6735 #ifdef CONFIG_NET_POLL_CONTROLLER 6736 /* 6737 * Polling 'interrupt' - used by things like netconsole to send skbs 6738 * without having to re-enable interrupts. It's not called while 6739 * the interrupt routine is executing. 6740 */ 6741 static void igb_netpoll(struct net_device *netdev) 6742 { 6743 struct igb_adapter *adapter = netdev_priv(netdev); 6744 struct e1000_hw *hw = &adapter->hw; 6745 struct igb_q_vector *q_vector; 6746 int i; 6747 6748 for (i = 0; i < adapter->num_q_vectors; i++) { 6749 q_vector = adapter->q_vector[i]; 6750 if (adapter->msix_entries) 6751 wr32(E1000_EIMC, q_vector->eims_value); 6752 else 6753 igb_irq_disable(adapter); 6754 napi_schedule(&q_vector->napi); 6755 } 6756 } 6757 #endif /* CONFIG_NET_POLL_CONTROLLER */ 6758 6759 /** 6760 * igb_io_error_detected - called when PCI error is detected 6761 * @pdev: Pointer to PCI device 6762 * @state: The current pci connection state 6763 * 6764 * This function is called after a PCI bus error affecting 6765 * this device has been detected. 6766 */ 6767 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev, 6768 pci_channel_state_t state) 6769 { 6770 struct net_device *netdev = pci_get_drvdata(pdev); 6771 struct igb_adapter *adapter = netdev_priv(netdev); 6772 6773 netif_device_detach(netdev); 6774 6775 if (state == pci_channel_io_perm_failure) 6776 return PCI_ERS_RESULT_DISCONNECT; 6777 6778 if (netif_running(netdev)) 6779 igb_down(adapter); 6780 pci_disable_device(pdev); 6781 6782 /* Request a slot slot reset. */ 6783 return PCI_ERS_RESULT_NEED_RESET; 6784 } 6785 6786 /** 6787 * igb_io_slot_reset - called after the pci bus has been reset. 6788 * @pdev: Pointer to PCI device 6789 * 6790 * Restart the card from scratch, as if from a cold-boot. Implementation 6791 * resembles the first-half of the igb_resume routine. 6792 */ 6793 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev) 6794 { 6795 struct net_device *netdev = pci_get_drvdata(pdev); 6796 struct igb_adapter *adapter = netdev_priv(netdev); 6797 struct e1000_hw *hw = &adapter->hw; 6798 pci_ers_result_t result; 6799 int err; 6800 6801 if (pci_enable_device_mem(pdev)) { 6802 dev_err(&pdev->dev, 6803 "Cannot re-enable PCI device after reset.\n"); 6804 result = PCI_ERS_RESULT_DISCONNECT; 6805 } else { 6806 pci_set_master(pdev); 6807 pci_restore_state(pdev); 6808 pci_save_state(pdev); 6809 6810 pci_enable_wake(pdev, PCI_D3hot, 0); 6811 pci_enable_wake(pdev, PCI_D3cold, 0); 6812 6813 igb_reset(adapter); 6814 wr32(E1000_WUS, ~0); 6815 result = PCI_ERS_RESULT_RECOVERED; 6816 } 6817 6818 err = pci_cleanup_aer_uncorrect_error_status(pdev); 6819 if (err) { 6820 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status " 6821 "failed 0x%0x\n", err); 6822 /* non-fatal, continue */ 6823 } 6824 6825 return result; 6826 } 6827 6828 /** 6829 * igb_io_resume - called when traffic can start flowing again. 6830 * @pdev: Pointer to PCI device 6831 * 6832 * This callback is called when the error recovery driver tells us that 6833 * its OK to resume normal operation. Implementation resembles the 6834 * second-half of the igb_resume routine. 6835 */ 6836 static void igb_io_resume(struct pci_dev *pdev) 6837 { 6838 struct net_device *netdev = pci_get_drvdata(pdev); 6839 struct igb_adapter *adapter = netdev_priv(netdev); 6840 6841 if (netif_running(netdev)) { 6842 if (igb_up(adapter)) { 6843 dev_err(&pdev->dev, "igb_up failed after reset\n"); 6844 return; 6845 } 6846 } 6847 6848 netif_device_attach(netdev); 6849 6850 /* let the f/w know that the h/w is now under the control of the 6851 * driver. */ 6852 igb_get_hw_control(adapter); 6853 } 6854 6855 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index, 6856 u8 qsel) 6857 { 6858 u32 rar_low, rar_high; 6859 struct e1000_hw *hw = &adapter->hw; 6860 6861 /* HW expects these in little endian so we reverse the byte order 6862 * from network order (big endian) to little endian 6863 */ 6864 rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) | 6865 ((u32) addr[2] << 16) | ((u32) addr[3] << 24)); 6866 rar_high = ((u32) addr[4] | ((u32) addr[5] << 8)); 6867 6868 /* Indicate to hardware the Address is Valid. */ 6869 rar_high |= E1000_RAH_AV; 6870 6871 if (hw->mac.type == e1000_82575) 6872 rar_high |= E1000_RAH_POOL_1 * qsel; 6873 else 6874 rar_high |= E1000_RAH_POOL_1 << qsel; 6875 6876 wr32(E1000_RAL(index), rar_low); 6877 wrfl(); 6878 wr32(E1000_RAH(index), rar_high); 6879 wrfl(); 6880 } 6881 6882 static int igb_set_vf_mac(struct igb_adapter *adapter, 6883 int vf, unsigned char *mac_addr) 6884 { 6885 struct e1000_hw *hw = &adapter->hw; 6886 /* VF MAC addresses start at end of receive addresses and moves 6887 * torwards the first, as a result a collision should not be possible */ 6888 int rar_entry = hw->mac.rar_entry_count - (vf + 1); 6889 6890 memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN); 6891 6892 igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf); 6893 6894 return 0; 6895 } 6896 6897 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac) 6898 { 6899 struct igb_adapter *adapter = netdev_priv(netdev); 6900 if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count)) 6901 return -EINVAL; 6902 adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC; 6903 dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf); 6904 dev_info(&adapter->pdev->dev, "Reload the VF driver to make this" 6905 " change effective."); 6906 if (test_bit(__IGB_DOWN, &adapter->state)) { 6907 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set," 6908 " but the PF device is not up.\n"); 6909 dev_warn(&adapter->pdev->dev, "Bring the PF device up before" 6910 " attempting to use the VF device.\n"); 6911 } 6912 return igb_set_vf_mac(adapter, vf, mac); 6913 } 6914 6915 static int igb_link_mbps(int internal_link_speed) 6916 { 6917 switch (internal_link_speed) { 6918 case SPEED_100: 6919 return 100; 6920 case SPEED_1000: 6921 return 1000; 6922 default: 6923 return 0; 6924 } 6925 } 6926 6927 static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate, 6928 int link_speed) 6929 { 6930 int rf_dec, rf_int; 6931 u32 bcnrc_val; 6932 6933 if (tx_rate != 0) { 6934 /* Calculate the rate factor values to set */ 6935 rf_int = link_speed / tx_rate; 6936 rf_dec = (link_speed - (rf_int * tx_rate)); 6937 rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate; 6938 6939 bcnrc_val = E1000_RTTBCNRC_RS_ENA; 6940 bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) & 6941 E1000_RTTBCNRC_RF_INT_MASK); 6942 bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK); 6943 } else { 6944 bcnrc_val = 0; 6945 } 6946 6947 wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */ 6948 wr32(E1000_RTTBCNRC, bcnrc_val); 6949 } 6950 6951 static void igb_check_vf_rate_limit(struct igb_adapter *adapter) 6952 { 6953 int actual_link_speed, i; 6954 bool reset_rate = false; 6955 6956 /* VF TX rate limit was not set or not supported */ 6957 if ((adapter->vf_rate_link_speed == 0) || 6958 (adapter->hw.mac.type != e1000_82576)) 6959 return; 6960 6961 actual_link_speed = igb_link_mbps(adapter->link_speed); 6962 if (actual_link_speed != adapter->vf_rate_link_speed) { 6963 reset_rate = true; 6964 adapter->vf_rate_link_speed = 0; 6965 dev_info(&adapter->pdev->dev, 6966 "Link speed has been changed. VF Transmit " 6967 "rate is disabled\n"); 6968 } 6969 6970 for (i = 0; i < adapter->vfs_allocated_count; i++) { 6971 if (reset_rate) 6972 adapter->vf_data[i].tx_rate = 0; 6973 6974 igb_set_vf_rate_limit(&adapter->hw, i, 6975 adapter->vf_data[i].tx_rate, 6976 actual_link_speed); 6977 } 6978 } 6979 6980 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate) 6981 { 6982 struct igb_adapter *adapter = netdev_priv(netdev); 6983 struct e1000_hw *hw = &adapter->hw; 6984 int actual_link_speed; 6985 6986 if (hw->mac.type != e1000_82576) 6987 return -EOPNOTSUPP; 6988 6989 actual_link_speed = igb_link_mbps(adapter->link_speed); 6990 if ((vf >= adapter->vfs_allocated_count) || 6991 (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) || 6992 (tx_rate < 0) || (tx_rate > actual_link_speed)) 6993 return -EINVAL; 6994 6995 adapter->vf_rate_link_speed = actual_link_speed; 6996 adapter->vf_data[vf].tx_rate = (u16)tx_rate; 6997 igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed); 6998 6999 return 0; 7000 } 7001 7002 static int igb_ndo_get_vf_config(struct net_device *netdev, 7003 int vf, struct ifla_vf_info *ivi) 7004 { 7005 struct igb_adapter *adapter = netdev_priv(netdev); 7006 if (vf >= adapter->vfs_allocated_count) 7007 return -EINVAL; 7008 ivi->vf = vf; 7009 memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN); 7010 ivi->tx_rate = adapter->vf_data[vf].tx_rate; 7011 ivi->vlan = adapter->vf_data[vf].pf_vlan; 7012 ivi->qos = adapter->vf_data[vf].pf_qos; 7013 return 0; 7014 } 7015 7016 static void igb_vmm_control(struct igb_adapter *adapter) 7017 { 7018 struct e1000_hw *hw = &adapter->hw; 7019 u32 reg; 7020 7021 switch (hw->mac.type) { 7022 case e1000_82575: 7023 default: 7024 /* replication is not supported for 82575 */ 7025 return; 7026 case e1000_82576: 7027 /* notify HW that the MAC is adding vlan tags */ 7028 reg = rd32(E1000_DTXCTL); 7029 reg |= E1000_DTXCTL_VLAN_ADDED; 7030 wr32(E1000_DTXCTL, reg); 7031 case e1000_82580: 7032 /* enable replication vlan tag stripping */ 7033 reg = rd32(E1000_RPLOLR); 7034 reg |= E1000_RPLOLR_STRVLAN; 7035 wr32(E1000_RPLOLR, reg); 7036 case e1000_i350: 7037 /* none of the above registers are supported by i350 */ 7038 break; 7039 } 7040 7041 if (adapter->vfs_allocated_count) { 7042 igb_vmdq_set_loopback_pf(hw, true); 7043 igb_vmdq_set_replication_pf(hw, true); 7044 igb_vmdq_set_anti_spoofing_pf(hw, true, 7045 adapter->vfs_allocated_count); 7046 } else { 7047 igb_vmdq_set_loopback_pf(hw, false); 7048 igb_vmdq_set_replication_pf(hw, false); 7049 } 7050 } 7051 7052 static void igb_init_dmac(struct igb_adapter *adapter, u32 pba) 7053 { 7054 struct e1000_hw *hw = &adapter->hw; 7055 u32 dmac_thr; 7056 u16 hwm; 7057 7058 if (hw->mac.type > e1000_82580) { 7059 if (adapter->flags & IGB_FLAG_DMAC) { 7060 u32 reg; 7061 7062 /* force threshold to 0. */ 7063 wr32(E1000_DMCTXTH, 0); 7064 7065 /* 7066 * DMA Coalescing high water mark needs to be greater 7067 * than the Rx threshold. Set hwm to PBA - max frame 7068 * size in 16B units, capping it at PBA - 6KB. 7069 */ 7070 hwm = 64 * pba - adapter->max_frame_size / 16; 7071 if (hwm < 64 * (pba - 6)) 7072 hwm = 64 * (pba - 6); 7073 reg = rd32(E1000_FCRTC); 7074 reg &= ~E1000_FCRTC_RTH_COAL_MASK; 7075 reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT) 7076 & E1000_FCRTC_RTH_COAL_MASK); 7077 wr32(E1000_FCRTC, reg); 7078 7079 /* 7080 * Set the DMA Coalescing Rx threshold to PBA - 2 * max 7081 * frame size, capping it at PBA - 10KB. 7082 */ 7083 dmac_thr = pba - adapter->max_frame_size / 512; 7084 if (dmac_thr < pba - 10) 7085 dmac_thr = pba - 10; 7086 reg = rd32(E1000_DMACR); 7087 reg &= ~E1000_DMACR_DMACTHR_MASK; 7088 reg |= ((dmac_thr << E1000_DMACR_DMACTHR_SHIFT) 7089 & E1000_DMACR_DMACTHR_MASK); 7090 7091 /* transition to L0x or L1 if available..*/ 7092 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK); 7093 7094 /* watchdog timer= +-1000 usec in 32usec intervals */ 7095 reg |= (1000 >> 5); 7096 wr32(E1000_DMACR, reg); 7097 7098 /* 7099 * no lower threshold to disable 7100 * coalescing(smart fifb)-UTRESH=0 7101 */ 7102 wr32(E1000_DMCRTRH, 0); 7103 7104 reg = (IGB_DMCTLX_DCFLUSH_DIS | 0x4); 7105 7106 wr32(E1000_DMCTLX, reg); 7107 7108 /* 7109 * free space in tx packet buffer to wake from 7110 * DMA coal 7111 */ 7112 wr32(E1000_DMCTXTH, (IGB_MIN_TXPBSIZE - 7113 (IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6); 7114 7115 /* 7116 * make low power state decision controlled 7117 * by DMA coal 7118 */ 7119 reg = rd32(E1000_PCIEMISC); 7120 reg &= ~E1000_PCIEMISC_LX_DECISION; 7121 wr32(E1000_PCIEMISC, reg); 7122 } /* endif adapter->dmac is not disabled */ 7123 } else if (hw->mac.type == e1000_82580) { 7124 u32 reg = rd32(E1000_PCIEMISC); 7125 wr32(E1000_PCIEMISC, reg & ~E1000_PCIEMISC_LX_DECISION); 7126 wr32(E1000_DMACR, 0); 7127 } 7128 } 7129 7130 /* igb_main.c */ 7131