1 /**************************************************************************** 2 * Driver for Solarflare network controllers and boards 3 * Copyright 2005-2006 Fen Systems Ltd. 4 * Copyright 2005-2013 Solarflare Communications Inc. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 as published 8 * by the Free Software Foundation, incorporated herein by reference. 9 */ 10 11 #include <linux/module.h> 12 #include <linux/pci.h> 13 #include <linux/netdevice.h> 14 #include <linux/etherdevice.h> 15 #include <linux/delay.h> 16 #include <linux/notifier.h> 17 #include <linux/ip.h> 18 #include <linux/tcp.h> 19 #include <linux/in.h> 20 #include <linux/ethtool.h> 21 #include <linux/topology.h> 22 #include <linux/gfp.h> 23 #include <linux/aer.h> 24 #include <linux/interrupt.h> 25 #include "net_driver.h" 26 #include <net/gre.h> 27 #include <net/udp_tunnel.h> 28 #include "efx.h" 29 #include "nic.h" 30 #include "io.h" 31 #include "selftest.h" 32 #include "sriov.h" 33 34 #include "mcdi.h" 35 #include "mcdi_pcol.h" 36 #include "workarounds.h" 37 38 /************************************************************************** 39 * 40 * Type name strings 41 * 42 ************************************************************************** 43 */ 44 45 /* Loopback mode names (see LOOPBACK_MODE()) */ 46 const unsigned int efx_loopback_mode_max = LOOPBACK_MAX; 47 const char *const efx_loopback_mode_names[] = { 48 [LOOPBACK_NONE] = "NONE", 49 [LOOPBACK_DATA] = "DATAPATH", 50 [LOOPBACK_GMAC] = "GMAC", 51 [LOOPBACK_XGMII] = "XGMII", 52 [LOOPBACK_XGXS] = "XGXS", 53 [LOOPBACK_XAUI] = "XAUI", 54 [LOOPBACK_GMII] = "GMII", 55 [LOOPBACK_SGMII] = "SGMII", 56 [LOOPBACK_XGBR] = "XGBR", 57 [LOOPBACK_XFI] = "XFI", 58 [LOOPBACK_XAUI_FAR] = "XAUI_FAR", 59 [LOOPBACK_GMII_FAR] = "GMII_FAR", 60 [LOOPBACK_SGMII_FAR] = "SGMII_FAR", 61 [LOOPBACK_XFI_FAR] = "XFI_FAR", 62 [LOOPBACK_GPHY] = "GPHY", 63 [LOOPBACK_PHYXS] = "PHYXS", 64 [LOOPBACK_PCS] = "PCS", 65 [LOOPBACK_PMAPMD] = "PMA/PMD", 66 [LOOPBACK_XPORT] = "XPORT", 67 [LOOPBACK_XGMII_WS] = "XGMII_WS", 68 [LOOPBACK_XAUI_WS] = "XAUI_WS", 69 [LOOPBACK_XAUI_WS_FAR] = "XAUI_WS_FAR", 70 [LOOPBACK_XAUI_WS_NEAR] = "XAUI_WS_NEAR", 71 [LOOPBACK_GMII_WS] = "GMII_WS", 72 [LOOPBACK_XFI_WS] = "XFI_WS", 73 [LOOPBACK_XFI_WS_FAR] = "XFI_WS_FAR", 74 [LOOPBACK_PHYXS_WS] = "PHYXS_WS", 75 }; 76 77 const unsigned int efx_reset_type_max = RESET_TYPE_MAX; 78 const char *const efx_reset_type_names[] = { 79 [RESET_TYPE_INVISIBLE] = "INVISIBLE", 80 [RESET_TYPE_ALL] = "ALL", 81 [RESET_TYPE_RECOVER_OR_ALL] = "RECOVER_OR_ALL", 82 [RESET_TYPE_WORLD] = "WORLD", 83 [RESET_TYPE_RECOVER_OR_DISABLE] = "RECOVER_OR_DISABLE", 84 [RESET_TYPE_DATAPATH] = "DATAPATH", 85 [RESET_TYPE_MC_BIST] = "MC_BIST", 86 [RESET_TYPE_DISABLE] = "DISABLE", 87 [RESET_TYPE_TX_WATCHDOG] = "TX_WATCHDOG", 88 [RESET_TYPE_INT_ERROR] = "INT_ERROR", 89 [RESET_TYPE_DMA_ERROR] = "DMA_ERROR", 90 [RESET_TYPE_TX_SKIP] = "TX_SKIP", 91 [RESET_TYPE_MC_FAILURE] = "MC_FAILURE", 92 [RESET_TYPE_MCDI_TIMEOUT] = "MCDI_TIMEOUT (FLR)", 93 }; 94 95 /* UDP tunnel type names */ 96 static const char *const efx_udp_tunnel_type_names[] = { 97 [TUNNEL_ENCAP_UDP_PORT_ENTRY_VXLAN] = "vxlan", 98 [TUNNEL_ENCAP_UDP_PORT_ENTRY_GENEVE] = "geneve", 99 }; 100 101 void efx_get_udp_tunnel_type_name(u16 type, char *buf, size_t buflen) 102 { 103 if (type < ARRAY_SIZE(efx_udp_tunnel_type_names) && 104 efx_udp_tunnel_type_names[type] != NULL) 105 snprintf(buf, buflen, "%s", efx_udp_tunnel_type_names[type]); 106 else 107 snprintf(buf, buflen, "type %d", type); 108 } 109 110 /* Reset workqueue. If any NIC has a hardware failure then a reset will be 111 * queued onto this work queue. This is not a per-nic work queue, because 112 * efx_reset_work() acquires the rtnl lock, so resets are naturally serialised. 113 */ 114 static struct workqueue_struct *reset_workqueue; 115 116 /* How often and how many times to poll for a reset while waiting for a 117 * BIST that another function started to complete. 118 */ 119 #define BIST_WAIT_DELAY_MS 100 120 #define BIST_WAIT_DELAY_COUNT 100 121 122 /************************************************************************** 123 * 124 * Configurable values 125 * 126 *************************************************************************/ 127 128 /* 129 * Use separate channels for TX and RX events 130 * 131 * Set this to 1 to use separate channels for TX and RX. It allows us 132 * to control interrupt affinity separately for TX and RX. 133 * 134 * This is only used in MSI-X interrupt mode 135 */ 136 bool efx_separate_tx_channels; 137 module_param(efx_separate_tx_channels, bool, 0444); 138 MODULE_PARM_DESC(efx_separate_tx_channels, 139 "Use separate channels for TX and RX"); 140 141 /* This is the weight assigned to each of the (per-channel) virtual 142 * NAPI devices. 143 */ 144 static int napi_weight = 64; 145 146 /* This is the time (in jiffies) between invocations of the hardware 147 * monitor. 148 * On Falcon-based NICs, this will: 149 * - Check the on-board hardware monitor; 150 * - Poll the link state and reconfigure the hardware as necessary. 151 * On Siena-based NICs for power systems with EEH support, this will give EEH a 152 * chance to start. 153 */ 154 static unsigned int efx_monitor_interval = 1 * HZ; 155 156 /* Initial interrupt moderation settings. They can be modified after 157 * module load with ethtool. 158 * 159 * The default for RX should strike a balance between increasing the 160 * round-trip latency and reducing overhead. 161 */ 162 static unsigned int rx_irq_mod_usec = 60; 163 164 /* Initial interrupt moderation settings. They can be modified after 165 * module load with ethtool. 166 * 167 * This default is chosen to ensure that a 10G link does not go idle 168 * while a TX queue is stopped after it has become full. A queue is 169 * restarted when it drops below half full. The time this takes (assuming 170 * worst case 3 descriptors per packet and 1024 descriptors) is 171 * 512 / 3 * 1.2 = 205 usec. 172 */ 173 static unsigned int tx_irq_mod_usec = 150; 174 175 /* This is the first interrupt mode to try out of: 176 * 0 => MSI-X 177 * 1 => MSI 178 * 2 => legacy 179 */ 180 static unsigned int interrupt_mode; 181 182 /* This is the requested number of CPUs to use for Receive-Side Scaling (RSS), 183 * i.e. the number of CPUs among which we may distribute simultaneous 184 * interrupt handling. 185 * 186 * Cards without MSI-X will only target one CPU via legacy or MSI interrupt. 187 * The default (0) means to assign an interrupt to each core. 188 */ 189 static unsigned int rss_cpus; 190 module_param(rss_cpus, uint, 0444); 191 MODULE_PARM_DESC(rss_cpus, "Number of CPUs to use for Receive-Side Scaling"); 192 193 static bool phy_flash_cfg; 194 module_param(phy_flash_cfg, bool, 0644); 195 MODULE_PARM_DESC(phy_flash_cfg, "Set PHYs into reflash mode initially"); 196 197 static unsigned irq_adapt_low_thresh = 8000; 198 module_param(irq_adapt_low_thresh, uint, 0644); 199 MODULE_PARM_DESC(irq_adapt_low_thresh, 200 "Threshold score for reducing IRQ moderation"); 201 202 static unsigned irq_adapt_high_thresh = 16000; 203 module_param(irq_adapt_high_thresh, uint, 0644); 204 MODULE_PARM_DESC(irq_adapt_high_thresh, 205 "Threshold score for increasing IRQ moderation"); 206 207 static unsigned debug = (NETIF_MSG_DRV | NETIF_MSG_PROBE | 208 NETIF_MSG_LINK | NETIF_MSG_IFDOWN | 209 NETIF_MSG_IFUP | NETIF_MSG_RX_ERR | 210 NETIF_MSG_TX_ERR | NETIF_MSG_HW); 211 module_param(debug, uint, 0); 212 MODULE_PARM_DESC(debug, "Bitmapped debugging message enable value"); 213 214 /************************************************************************** 215 * 216 * Utility functions and prototypes 217 * 218 *************************************************************************/ 219 220 static int efx_soft_enable_interrupts(struct efx_nic *efx); 221 static void efx_soft_disable_interrupts(struct efx_nic *efx); 222 static void efx_remove_channel(struct efx_channel *channel); 223 static void efx_remove_channels(struct efx_nic *efx); 224 static const struct efx_channel_type efx_default_channel_type; 225 static void efx_remove_port(struct efx_nic *efx); 226 static void efx_init_napi_channel(struct efx_channel *channel); 227 static void efx_fini_napi(struct efx_nic *efx); 228 static void efx_fini_napi_channel(struct efx_channel *channel); 229 static void efx_fini_struct(struct efx_nic *efx); 230 static void efx_start_all(struct efx_nic *efx); 231 static void efx_stop_all(struct efx_nic *efx); 232 233 #define EFX_ASSERT_RESET_SERIALISED(efx) \ 234 do { \ 235 if ((efx->state == STATE_READY) || \ 236 (efx->state == STATE_RECOVERY) || \ 237 (efx->state == STATE_DISABLED)) \ 238 ASSERT_RTNL(); \ 239 } while (0) 240 241 static int efx_check_disabled(struct efx_nic *efx) 242 { 243 if (efx->state == STATE_DISABLED || efx->state == STATE_RECOVERY) { 244 netif_err(efx, drv, efx->net_dev, 245 "device is disabled due to earlier errors\n"); 246 return -EIO; 247 } 248 return 0; 249 } 250 251 /************************************************************************** 252 * 253 * Event queue processing 254 * 255 *************************************************************************/ 256 257 /* Process channel's event queue 258 * 259 * This function is responsible for processing the event queue of a 260 * single channel. The caller must guarantee that this function will 261 * never be concurrently called more than once on the same channel, 262 * though different channels may be being processed concurrently. 263 */ 264 static int efx_process_channel(struct efx_channel *channel, int budget) 265 { 266 struct efx_tx_queue *tx_queue; 267 int spent; 268 269 if (unlikely(!channel->enabled)) 270 return 0; 271 272 efx_for_each_channel_tx_queue(tx_queue, channel) { 273 tx_queue->pkts_compl = 0; 274 tx_queue->bytes_compl = 0; 275 } 276 277 spent = efx_nic_process_eventq(channel, budget); 278 if (spent && efx_channel_has_rx_queue(channel)) { 279 struct efx_rx_queue *rx_queue = 280 efx_channel_get_rx_queue(channel); 281 282 efx_rx_flush_packet(channel); 283 efx_fast_push_rx_descriptors(rx_queue, true); 284 } 285 286 /* Update BQL */ 287 efx_for_each_channel_tx_queue(tx_queue, channel) { 288 if (tx_queue->bytes_compl) { 289 netdev_tx_completed_queue(tx_queue->core_txq, 290 tx_queue->pkts_compl, tx_queue->bytes_compl); 291 } 292 } 293 294 return spent; 295 } 296 297 /* NAPI poll handler 298 * 299 * NAPI guarantees serialisation of polls of the same device, which 300 * provides the guarantee required by efx_process_channel(). 301 */ 302 static void efx_update_irq_mod(struct efx_nic *efx, struct efx_channel *channel) 303 { 304 int step = efx->irq_mod_step_us; 305 306 if (channel->irq_mod_score < irq_adapt_low_thresh) { 307 if (channel->irq_moderation_us > step) { 308 channel->irq_moderation_us -= step; 309 efx->type->push_irq_moderation(channel); 310 } 311 } else if (channel->irq_mod_score > irq_adapt_high_thresh) { 312 if (channel->irq_moderation_us < 313 efx->irq_rx_moderation_us) { 314 channel->irq_moderation_us += step; 315 efx->type->push_irq_moderation(channel); 316 } 317 } 318 319 channel->irq_count = 0; 320 channel->irq_mod_score = 0; 321 } 322 323 static int efx_poll(struct napi_struct *napi, int budget) 324 { 325 struct efx_channel *channel = 326 container_of(napi, struct efx_channel, napi_str); 327 struct efx_nic *efx = channel->efx; 328 int spent; 329 330 netif_vdbg(efx, intr, efx->net_dev, 331 "channel %d NAPI poll executing on CPU %d\n", 332 channel->channel, raw_smp_processor_id()); 333 334 spent = efx_process_channel(channel, budget); 335 336 if (spent < budget) { 337 if (efx_channel_has_rx_queue(channel) && 338 efx->irq_rx_adaptive && 339 unlikely(++channel->irq_count == 1000)) { 340 efx_update_irq_mod(efx, channel); 341 } 342 343 #ifdef CONFIG_RFS_ACCEL 344 /* Perhaps expire some ARFS filters */ 345 schedule_work(&channel->filter_work); 346 #endif 347 348 /* There is no race here; although napi_disable() will 349 * only wait for napi_complete(), this isn't a problem 350 * since efx_nic_eventq_read_ack() will have no effect if 351 * interrupts have already been disabled. 352 */ 353 if (napi_complete_done(napi, spent)) 354 efx_nic_eventq_read_ack(channel); 355 } 356 357 return spent; 358 } 359 360 /* Create event queue 361 * Event queue memory allocations are done only once. If the channel 362 * is reset, the memory buffer will be reused; this guards against 363 * errors during channel reset and also simplifies interrupt handling. 364 */ 365 static int efx_probe_eventq(struct efx_channel *channel) 366 { 367 struct efx_nic *efx = channel->efx; 368 unsigned long entries; 369 370 netif_dbg(efx, probe, efx->net_dev, 371 "chan %d create event queue\n", channel->channel); 372 373 /* Build an event queue with room for one event per tx and rx buffer, 374 * plus some extra for link state events and MCDI completions. */ 375 entries = roundup_pow_of_two(efx->rxq_entries + efx->txq_entries + 128); 376 EFX_WARN_ON_PARANOID(entries > EFX_MAX_EVQ_SIZE); 377 channel->eventq_mask = max(entries, EFX_MIN_EVQ_SIZE) - 1; 378 379 return efx_nic_probe_eventq(channel); 380 } 381 382 /* Prepare channel's event queue */ 383 static int efx_init_eventq(struct efx_channel *channel) 384 { 385 struct efx_nic *efx = channel->efx; 386 int rc; 387 388 EFX_WARN_ON_PARANOID(channel->eventq_init); 389 390 netif_dbg(efx, drv, efx->net_dev, 391 "chan %d init event queue\n", channel->channel); 392 393 rc = efx_nic_init_eventq(channel); 394 if (rc == 0) { 395 efx->type->push_irq_moderation(channel); 396 channel->eventq_read_ptr = 0; 397 channel->eventq_init = true; 398 } 399 return rc; 400 } 401 402 /* Enable event queue processing and NAPI */ 403 void efx_start_eventq(struct efx_channel *channel) 404 { 405 netif_dbg(channel->efx, ifup, channel->efx->net_dev, 406 "chan %d start event queue\n", channel->channel); 407 408 /* Make sure the NAPI handler sees the enabled flag set */ 409 channel->enabled = true; 410 smp_wmb(); 411 412 napi_enable(&channel->napi_str); 413 efx_nic_eventq_read_ack(channel); 414 } 415 416 /* Disable event queue processing and NAPI */ 417 void efx_stop_eventq(struct efx_channel *channel) 418 { 419 if (!channel->enabled) 420 return; 421 422 napi_disable(&channel->napi_str); 423 channel->enabled = false; 424 } 425 426 static void efx_fini_eventq(struct efx_channel *channel) 427 { 428 if (!channel->eventq_init) 429 return; 430 431 netif_dbg(channel->efx, drv, channel->efx->net_dev, 432 "chan %d fini event queue\n", channel->channel); 433 434 efx_nic_fini_eventq(channel); 435 channel->eventq_init = false; 436 } 437 438 static void efx_remove_eventq(struct efx_channel *channel) 439 { 440 netif_dbg(channel->efx, drv, channel->efx->net_dev, 441 "chan %d remove event queue\n", channel->channel); 442 443 efx_nic_remove_eventq(channel); 444 } 445 446 /************************************************************************** 447 * 448 * Channel handling 449 * 450 *************************************************************************/ 451 452 /* Allocate and initialise a channel structure. */ 453 static struct efx_channel * 454 efx_alloc_channel(struct efx_nic *efx, int i, struct efx_channel *old_channel) 455 { 456 struct efx_channel *channel; 457 struct efx_rx_queue *rx_queue; 458 struct efx_tx_queue *tx_queue; 459 int j; 460 461 channel = kzalloc(sizeof(*channel), GFP_KERNEL); 462 if (!channel) 463 return NULL; 464 465 channel->efx = efx; 466 channel->channel = i; 467 channel->type = &efx_default_channel_type; 468 469 for (j = 0; j < EFX_TXQ_TYPES; j++) { 470 tx_queue = &channel->tx_queue[j]; 471 tx_queue->efx = efx; 472 tx_queue->queue = i * EFX_TXQ_TYPES + j; 473 tx_queue->channel = channel; 474 } 475 476 #ifdef CONFIG_RFS_ACCEL 477 INIT_WORK(&channel->filter_work, efx_filter_rfs_expire); 478 #endif 479 480 rx_queue = &channel->rx_queue; 481 rx_queue->efx = efx; 482 timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0); 483 484 return channel; 485 } 486 487 /* Allocate and initialise a channel structure, copying parameters 488 * (but not resources) from an old channel structure. 489 */ 490 static struct efx_channel * 491 efx_copy_channel(const struct efx_channel *old_channel) 492 { 493 struct efx_channel *channel; 494 struct efx_rx_queue *rx_queue; 495 struct efx_tx_queue *tx_queue; 496 int j; 497 498 channel = kmalloc(sizeof(*channel), GFP_KERNEL); 499 if (!channel) 500 return NULL; 501 502 *channel = *old_channel; 503 504 channel->napi_dev = NULL; 505 INIT_HLIST_NODE(&channel->napi_str.napi_hash_node); 506 channel->napi_str.napi_id = 0; 507 channel->napi_str.state = 0; 508 memset(&channel->eventq, 0, sizeof(channel->eventq)); 509 510 for (j = 0; j < EFX_TXQ_TYPES; j++) { 511 tx_queue = &channel->tx_queue[j]; 512 if (tx_queue->channel) 513 tx_queue->channel = channel; 514 tx_queue->buffer = NULL; 515 memset(&tx_queue->txd, 0, sizeof(tx_queue->txd)); 516 } 517 518 rx_queue = &channel->rx_queue; 519 rx_queue->buffer = NULL; 520 memset(&rx_queue->rxd, 0, sizeof(rx_queue->rxd)); 521 timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0); 522 #ifdef CONFIG_RFS_ACCEL 523 INIT_WORK(&channel->filter_work, efx_filter_rfs_expire); 524 #endif 525 526 return channel; 527 } 528 529 static int efx_probe_channel(struct efx_channel *channel) 530 { 531 struct efx_tx_queue *tx_queue; 532 struct efx_rx_queue *rx_queue; 533 int rc; 534 535 netif_dbg(channel->efx, probe, channel->efx->net_dev, 536 "creating channel %d\n", channel->channel); 537 538 rc = channel->type->pre_probe(channel); 539 if (rc) 540 goto fail; 541 542 rc = efx_probe_eventq(channel); 543 if (rc) 544 goto fail; 545 546 efx_for_each_channel_tx_queue(tx_queue, channel) { 547 rc = efx_probe_tx_queue(tx_queue); 548 if (rc) 549 goto fail; 550 } 551 552 efx_for_each_channel_rx_queue(rx_queue, channel) { 553 rc = efx_probe_rx_queue(rx_queue); 554 if (rc) 555 goto fail; 556 } 557 558 return 0; 559 560 fail: 561 efx_remove_channel(channel); 562 return rc; 563 } 564 565 static void 566 efx_get_channel_name(struct efx_channel *channel, char *buf, size_t len) 567 { 568 struct efx_nic *efx = channel->efx; 569 const char *type; 570 int number; 571 572 number = channel->channel; 573 if (efx->tx_channel_offset == 0) { 574 type = ""; 575 } else if (channel->channel < efx->tx_channel_offset) { 576 type = "-rx"; 577 } else { 578 type = "-tx"; 579 number -= efx->tx_channel_offset; 580 } 581 snprintf(buf, len, "%s%s-%d", efx->name, type, number); 582 } 583 584 static void efx_set_channel_names(struct efx_nic *efx) 585 { 586 struct efx_channel *channel; 587 588 efx_for_each_channel(channel, efx) 589 channel->type->get_name(channel, 590 efx->msi_context[channel->channel].name, 591 sizeof(efx->msi_context[0].name)); 592 } 593 594 static int efx_probe_channels(struct efx_nic *efx) 595 { 596 struct efx_channel *channel; 597 int rc; 598 599 /* Restart special buffer allocation */ 600 efx->next_buffer_table = 0; 601 602 /* Probe channels in reverse, so that any 'extra' channels 603 * use the start of the buffer table. This allows the traffic 604 * channels to be resized without moving them or wasting the 605 * entries before them. 606 */ 607 efx_for_each_channel_rev(channel, efx) { 608 rc = efx_probe_channel(channel); 609 if (rc) { 610 netif_err(efx, probe, efx->net_dev, 611 "failed to create channel %d\n", 612 channel->channel); 613 goto fail; 614 } 615 } 616 efx_set_channel_names(efx); 617 618 return 0; 619 620 fail: 621 efx_remove_channels(efx); 622 return rc; 623 } 624 625 /* Channels are shutdown and reinitialised whilst the NIC is running 626 * to propagate configuration changes (mtu, checksum offload), or 627 * to clear hardware error conditions 628 */ 629 static void efx_start_datapath(struct efx_nic *efx) 630 { 631 netdev_features_t old_features = efx->net_dev->features; 632 bool old_rx_scatter = efx->rx_scatter; 633 struct efx_tx_queue *tx_queue; 634 struct efx_rx_queue *rx_queue; 635 struct efx_channel *channel; 636 size_t rx_buf_len; 637 638 /* Calculate the rx buffer allocation parameters required to 639 * support the current MTU, including padding for header 640 * alignment and overruns. 641 */ 642 efx->rx_dma_len = (efx->rx_prefix_size + 643 EFX_MAX_FRAME_LEN(efx->net_dev->mtu) + 644 efx->type->rx_buffer_padding); 645 rx_buf_len = (sizeof(struct efx_rx_page_state) + 646 efx->rx_ip_align + efx->rx_dma_len); 647 if (rx_buf_len <= PAGE_SIZE) { 648 efx->rx_scatter = efx->type->always_rx_scatter; 649 efx->rx_buffer_order = 0; 650 } else if (efx->type->can_rx_scatter) { 651 BUILD_BUG_ON(EFX_RX_USR_BUF_SIZE % L1_CACHE_BYTES); 652 BUILD_BUG_ON(sizeof(struct efx_rx_page_state) + 653 2 * ALIGN(NET_IP_ALIGN + EFX_RX_USR_BUF_SIZE, 654 EFX_RX_BUF_ALIGNMENT) > 655 PAGE_SIZE); 656 efx->rx_scatter = true; 657 efx->rx_dma_len = EFX_RX_USR_BUF_SIZE; 658 efx->rx_buffer_order = 0; 659 } else { 660 efx->rx_scatter = false; 661 efx->rx_buffer_order = get_order(rx_buf_len); 662 } 663 664 efx_rx_config_page_split(efx); 665 if (efx->rx_buffer_order) 666 netif_dbg(efx, drv, efx->net_dev, 667 "RX buf len=%u; page order=%u batch=%u\n", 668 efx->rx_dma_len, efx->rx_buffer_order, 669 efx->rx_pages_per_batch); 670 else 671 netif_dbg(efx, drv, efx->net_dev, 672 "RX buf len=%u step=%u bpp=%u; page batch=%u\n", 673 efx->rx_dma_len, efx->rx_page_buf_step, 674 efx->rx_bufs_per_page, efx->rx_pages_per_batch); 675 676 /* Restore previously fixed features in hw_features and remove 677 * features which are fixed now 678 */ 679 efx->net_dev->hw_features |= efx->net_dev->features; 680 efx->net_dev->hw_features &= ~efx->fixed_features; 681 efx->net_dev->features |= efx->fixed_features; 682 if (efx->net_dev->features != old_features) 683 netdev_features_change(efx->net_dev); 684 685 /* RX filters may also have scatter-enabled flags */ 686 if (efx->rx_scatter != old_rx_scatter) 687 efx->type->filter_update_rx_scatter(efx); 688 689 /* We must keep at least one descriptor in a TX ring empty. 690 * We could avoid this when the queue size does not exactly 691 * match the hardware ring size, but it's not that important. 692 * Therefore we stop the queue when one more skb might fill 693 * the ring completely. We wake it when half way back to 694 * empty. 695 */ 696 efx->txq_stop_thresh = efx->txq_entries - efx_tx_max_skb_descs(efx); 697 efx->txq_wake_thresh = efx->txq_stop_thresh / 2; 698 699 /* Initialise the channels */ 700 efx_for_each_channel(channel, efx) { 701 efx_for_each_channel_tx_queue(tx_queue, channel) { 702 efx_init_tx_queue(tx_queue); 703 atomic_inc(&efx->active_queues); 704 } 705 706 efx_for_each_channel_rx_queue(rx_queue, channel) { 707 efx_init_rx_queue(rx_queue); 708 atomic_inc(&efx->active_queues); 709 efx_stop_eventq(channel); 710 efx_fast_push_rx_descriptors(rx_queue, false); 711 efx_start_eventq(channel); 712 } 713 714 WARN_ON(channel->rx_pkt_n_frags); 715 } 716 717 efx_ptp_start_datapath(efx); 718 719 if (netif_device_present(efx->net_dev)) 720 netif_tx_wake_all_queues(efx->net_dev); 721 } 722 723 static void efx_stop_datapath(struct efx_nic *efx) 724 { 725 struct efx_channel *channel; 726 struct efx_tx_queue *tx_queue; 727 struct efx_rx_queue *rx_queue; 728 int rc; 729 730 EFX_ASSERT_RESET_SERIALISED(efx); 731 BUG_ON(efx->port_enabled); 732 733 efx_ptp_stop_datapath(efx); 734 735 /* Stop RX refill */ 736 efx_for_each_channel(channel, efx) { 737 efx_for_each_channel_rx_queue(rx_queue, channel) 738 rx_queue->refill_enabled = false; 739 } 740 741 efx_for_each_channel(channel, efx) { 742 /* RX packet processing is pipelined, so wait for the 743 * NAPI handler to complete. At least event queue 0 744 * might be kept active by non-data events, so don't 745 * use napi_synchronize() but actually disable NAPI 746 * temporarily. 747 */ 748 if (efx_channel_has_rx_queue(channel)) { 749 efx_stop_eventq(channel); 750 efx_start_eventq(channel); 751 } 752 } 753 754 rc = efx->type->fini_dmaq(efx); 755 if (rc) { 756 netif_err(efx, drv, efx->net_dev, "failed to flush queues\n"); 757 } else { 758 netif_dbg(efx, drv, efx->net_dev, 759 "successfully flushed all queues\n"); 760 } 761 762 efx_for_each_channel(channel, efx) { 763 efx_for_each_channel_rx_queue(rx_queue, channel) 764 efx_fini_rx_queue(rx_queue); 765 efx_for_each_possible_channel_tx_queue(tx_queue, channel) 766 efx_fini_tx_queue(tx_queue); 767 } 768 } 769 770 static void efx_remove_channel(struct efx_channel *channel) 771 { 772 struct efx_tx_queue *tx_queue; 773 struct efx_rx_queue *rx_queue; 774 775 netif_dbg(channel->efx, drv, channel->efx->net_dev, 776 "destroy chan %d\n", channel->channel); 777 778 efx_for_each_channel_rx_queue(rx_queue, channel) 779 efx_remove_rx_queue(rx_queue); 780 efx_for_each_possible_channel_tx_queue(tx_queue, channel) 781 efx_remove_tx_queue(tx_queue); 782 efx_remove_eventq(channel); 783 channel->type->post_remove(channel); 784 } 785 786 static void efx_remove_channels(struct efx_nic *efx) 787 { 788 struct efx_channel *channel; 789 790 efx_for_each_channel(channel, efx) 791 efx_remove_channel(channel); 792 } 793 794 int 795 efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries) 796 { 797 struct efx_channel *other_channel[EFX_MAX_CHANNELS], *channel; 798 u32 old_rxq_entries, old_txq_entries; 799 unsigned i, next_buffer_table = 0; 800 int rc, rc2; 801 802 rc = efx_check_disabled(efx); 803 if (rc) 804 return rc; 805 806 /* Not all channels should be reallocated. We must avoid 807 * reallocating their buffer table entries. 808 */ 809 efx_for_each_channel(channel, efx) { 810 struct efx_rx_queue *rx_queue; 811 struct efx_tx_queue *tx_queue; 812 813 if (channel->type->copy) 814 continue; 815 next_buffer_table = max(next_buffer_table, 816 channel->eventq.index + 817 channel->eventq.entries); 818 efx_for_each_channel_rx_queue(rx_queue, channel) 819 next_buffer_table = max(next_buffer_table, 820 rx_queue->rxd.index + 821 rx_queue->rxd.entries); 822 efx_for_each_channel_tx_queue(tx_queue, channel) 823 next_buffer_table = max(next_buffer_table, 824 tx_queue->txd.index + 825 tx_queue->txd.entries); 826 } 827 828 efx_device_detach_sync(efx); 829 efx_stop_all(efx); 830 efx_soft_disable_interrupts(efx); 831 832 /* Clone channels (where possible) */ 833 memset(other_channel, 0, sizeof(other_channel)); 834 for (i = 0; i < efx->n_channels; i++) { 835 channel = efx->channel[i]; 836 if (channel->type->copy) 837 channel = channel->type->copy(channel); 838 if (!channel) { 839 rc = -ENOMEM; 840 goto out; 841 } 842 other_channel[i] = channel; 843 } 844 845 /* Swap entry counts and channel pointers */ 846 old_rxq_entries = efx->rxq_entries; 847 old_txq_entries = efx->txq_entries; 848 efx->rxq_entries = rxq_entries; 849 efx->txq_entries = txq_entries; 850 for (i = 0; i < efx->n_channels; i++) { 851 channel = efx->channel[i]; 852 efx->channel[i] = other_channel[i]; 853 other_channel[i] = channel; 854 } 855 856 /* Restart buffer table allocation */ 857 efx->next_buffer_table = next_buffer_table; 858 859 for (i = 0; i < efx->n_channels; i++) { 860 channel = efx->channel[i]; 861 if (!channel->type->copy) 862 continue; 863 rc = efx_probe_channel(channel); 864 if (rc) 865 goto rollback; 866 efx_init_napi_channel(efx->channel[i]); 867 } 868 869 out: 870 /* Destroy unused channel structures */ 871 for (i = 0; i < efx->n_channels; i++) { 872 channel = other_channel[i]; 873 if (channel && channel->type->copy) { 874 efx_fini_napi_channel(channel); 875 efx_remove_channel(channel); 876 kfree(channel); 877 } 878 } 879 880 rc2 = efx_soft_enable_interrupts(efx); 881 if (rc2) { 882 rc = rc ? rc : rc2; 883 netif_err(efx, drv, efx->net_dev, 884 "unable to restart interrupts on channel reallocation\n"); 885 efx_schedule_reset(efx, RESET_TYPE_DISABLE); 886 } else { 887 efx_start_all(efx); 888 efx_device_attach_if_not_resetting(efx); 889 } 890 return rc; 891 892 rollback: 893 /* Swap back */ 894 efx->rxq_entries = old_rxq_entries; 895 efx->txq_entries = old_txq_entries; 896 for (i = 0; i < efx->n_channels; i++) { 897 channel = efx->channel[i]; 898 efx->channel[i] = other_channel[i]; 899 other_channel[i] = channel; 900 } 901 goto out; 902 } 903 904 void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue) 905 { 906 mod_timer(&rx_queue->slow_fill, jiffies + msecs_to_jiffies(100)); 907 } 908 909 static bool efx_default_channel_want_txqs(struct efx_channel *channel) 910 { 911 return channel->channel - channel->efx->tx_channel_offset < 912 channel->efx->n_tx_channels; 913 } 914 915 static const struct efx_channel_type efx_default_channel_type = { 916 .pre_probe = efx_channel_dummy_op_int, 917 .post_remove = efx_channel_dummy_op_void, 918 .get_name = efx_get_channel_name, 919 .copy = efx_copy_channel, 920 .want_txqs = efx_default_channel_want_txqs, 921 .keep_eventq = false, 922 .want_pio = true, 923 }; 924 925 int efx_channel_dummy_op_int(struct efx_channel *channel) 926 { 927 return 0; 928 } 929 930 void efx_channel_dummy_op_void(struct efx_channel *channel) 931 { 932 } 933 934 /************************************************************************** 935 * 936 * Port handling 937 * 938 **************************************************************************/ 939 940 /* This ensures that the kernel is kept informed (via 941 * netif_carrier_on/off) of the link status, and also maintains the 942 * link status's stop on the port's TX queue. 943 */ 944 void efx_link_status_changed(struct efx_nic *efx) 945 { 946 struct efx_link_state *link_state = &efx->link_state; 947 948 /* SFC Bug 5356: A net_dev notifier is registered, so we must ensure 949 * that no events are triggered between unregister_netdev() and the 950 * driver unloading. A more general condition is that NETDEV_CHANGE 951 * can only be generated between NETDEV_UP and NETDEV_DOWN */ 952 if (!netif_running(efx->net_dev)) 953 return; 954 955 if (link_state->up != netif_carrier_ok(efx->net_dev)) { 956 efx->n_link_state_changes++; 957 958 if (link_state->up) 959 netif_carrier_on(efx->net_dev); 960 else 961 netif_carrier_off(efx->net_dev); 962 } 963 964 /* Status message for kernel log */ 965 if (link_state->up) 966 netif_info(efx, link, efx->net_dev, 967 "link up at %uMbps %s-duplex (MTU %d)\n", 968 link_state->speed, link_state->fd ? "full" : "half", 969 efx->net_dev->mtu); 970 else 971 netif_info(efx, link, efx->net_dev, "link down\n"); 972 } 973 974 void efx_link_set_advertising(struct efx_nic *efx, 975 const unsigned long *advertising) 976 { 977 memcpy(efx->link_advertising, advertising, 978 sizeof(__ETHTOOL_DECLARE_LINK_MODE_MASK())); 979 980 efx->link_advertising[0] |= ADVERTISED_Autoneg; 981 if (advertising[0] & ADVERTISED_Pause) 982 efx->wanted_fc |= (EFX_FC_TX | EFX_FC_RX); 983 else 984 efx->wanted_fc &= ~(EFX_FC_TX | EFX_FC_RX); 985 if (advertising[0] & ADVERTISED_Asym_Pause) 986 efx->wanted_fc ^= EFX_FC_TX; 987 } 988 989 /* Equivalent to efx_link_set_advertising with all-zeroes, except does not 990 * force the Autoneg bit on. 991 */ 992 void efx_link_clear_advertising(struct efx_nic *efx) 993 { 994 bitmap_zero(efx->link_advertising, __ETHTOOL_LINK_MODE_MASK_NBITS); 995 efx->wanted_fc &= ~(EFX_FC_TX | EFX_FC_RX); 996 } 997 998 void efx_link_set_wanted_fc(struct efx_nic *efx, u8 wanted_fc) 999 { 1000 efx->wanted_fc = wanted_fc; 1001 if (efx->link_advertising[0]) { 1002 if (wanted_fc & EFX_FC_RX) 1003 efx->link_advertising[0] |= (ADVERTISED_Pause | 1004 ADVERTISED_Asym_Pause); 1005 else 1006 efx->link_advertising[0] &= ~(ADVERTISED_Pause | 1007 ADVERTISED_Asym_Pause); 1008 if (wanted_fc & EFX_FC_TX) 1009 efx->link_advertising[0] ^= ADVERTISED_Asym_Pause; 1010 } 1011 } 1012 1013 static void efx_fini_port(struct efx_nic *efx); 1014 1015 /* We assume that efx->type->reconfigure_mac will always try to sync RX 1016 * filters and therefore needs to read-lock the filter table against freeing 1017 */ 1018 void efx_mac_reconfigure(struct efx_nic *efx) 1019 { 1020 down_read(&efx->filter_sem); 1021 efx->type->reconfigure_mac(efx); 1022 up_read(&efx->filter_sem); 1023 } 1024 1025 /* Push loopback/power/transmit disable settings to the PHY, and reconfigure 1026 * the MAC appropriately. All other PHY configuration changes are pushed 1027 * through phy_op->set_settings(), and pushed asynchronously to the MAC 1028 * through efx_monitor(). 1029 * 1030 * Callers must hold the mac_lock 1031 */ 1032 int __efx_reconfigure_port(struct efx_nic *efx) 1033 { 1034 enum efx_phy_mode phy_mode; 1035 int rc; 1036 1037 WARN_ON(!mutex_is_locked(&efx->mac_lock)); 1038 1039 /* Disable PHY transmit in mac level loopbacks */ 1040 phy_mode = efx->phy_mode; 1041 if (LOOPBACK_INTERNAL(efx)) 1042 efx->phy_mode |= PHY_MODE_TX_DISABLED; 1043 else 1044 efx->phy_mode &= ~PHY_MODE_TX_DISABLED; 1045 1046 rc = efx->type->reconfigure_port(efx); 1047 1048 if (rc) 1049 efx->phy_mode = phy_mode; 1050 1051 return rc; 1052 } 1053 1054 /* Reinitialise the MAC to pick up new PHY settings, even if the port is 1055 * disabled. */ 1056 int efx_reconfigure_port(struct efx_nic *efx) 1057 { 1058 int rc; 1059 1060 EFX_ASSERT_RESET_SERIALISED(efx); 1061 1062 mutex_lock(&efx->mac_lock); 1063 rc = __efx_reconfigure_port(efx); 1064 mutex_unlock(&efx->mac_lock); 1065 1066 return rc; 1067 } 1068 1069 /* Asynchronous work item for changing MAC promiscuity and multicast 1070 * hash. Avoid a drain/rx_ingress enable by reconfiguring the current 1071 * MAC directly. */ 1072 static void efx_mac_work(struct work_struct *data) 1073 { 1074 struct efx_nic *efx = container_of(data, struct efx_nic, mac_work); 1075 1076 mutex_lock(&efx->mac_lock); 1077 if (efx->port_enabled) 1078 efx_mac_reconfigure(efx); 1079 mutex_unlock(&efx->mac_lock); 1080 } 1081 1082 static int efx_probe_port(struct efx_nic *efx) 1083 { 1084 int rc; 1085 1086 netif_dbg(efx, probe, efx->net_dev, "create port\n"); 1087 1088 if (phy_flash_cfg) 1089 efx->phy_mode = PHY_MODE_SPECIAL; 1090 1091 /* Connect up MAC/PHY operations table */ 1092 rc = efx->type->probe_port(efx); 1093 if (rc) 1094 return rc; 1095 1096 /* Initialise MAC address to permanent address */ 1097 ether_addr_copy(efx->net_dev->dev_addr, efx->net_dev->perm_addr); 1098 1099 return 0; 1100 } 1101 1102 static int efx_init_port(struct efx_nic *efx) 1103 { 1104 int rc; 1105 1106 netif_dbg(efx, drv, efx->net_dev, "init port\n"); 1107 1108 mutex_lock(&efx->mac_lock); 1109 1110 rc = efx->phy_op->init(efx); 1111 if (rc) 1112 goto fail1; 1113 1114 efx->port_initialized = true; 1115 1116 /* Reconfigure the MAC before creating dma queues (required for 1117 * Falcon/A1 where RX_INGR_EN/TX_DRAIN_EN isn't supported) */ 1118 efx_mac_reconfigure(efx); 1119 1120 /* Ensure the PHY advertises the correct flow control settings */ 1121 rc = efx->phy_op->reconfigure(efx); 1122 if (rc && rc != -EPERM) 1123 goto fail2; 1124 1125 mutex_unlock(&efx->mac_lock); 1126 return 0; 1127 1128 fail2: 1129 efx->phy_op->fini(efx); 1130 fail1: 1131 mutex_unlock(&efx->mac_lock); 1132 return rc; 1133 } 1134 1135 static void efx_start_port(struct efx_nic *efx) 1136 { 1137 netif_dbg(efx, ifup, efx->net_dev, "start port\n"); 1138 BUG_ON(efx->port_enabled); 1139 1140 mutex_lock(&efx->mac_lock); 1141 efx->port_enabled = true; 1142 1143 /* Ensure MAC ingress/egress is enabled */ 1144 efx_mac_reconfigure(efx); 1145 1146 mutex_unlock(&efx->mac_lock); 1147 } 1148 1149 /* Cancel work for MAC reconfiguration, periodic hardware monitoring 1150 * and the async self-test, wait for them to finish and prevent them 1151 * being scheduled again. This doesn't cover online resets, which 1152 * should only be cancelled when removing the device. 1153 */ 1154 static void efx_stop_port(struct efx_nic *efx) 1155 { 1156 netif_dbg(efx, ifdown, efx->net_dev, "stop port\n"); 1157 1158 EFX_ASSERT_RESET_SERIALISED(efx); 1159 1160 mutex_lock(&efx->mac_lock); 1161 efx->port_enabled = false; 1162 mutex_unlock(&efx->mac_lock); 1163 1164 /* Serialise against efx_set_multicast_list() */ 1165 netif_addr_lock_bh(efx->net_dev); 1166 netif_addr_unlock_bh(efx->net_dev); 1167 1168 cancel_delayed_work_sync(&efx->monitor_work); 1169 efx_selftest_async_cancel(efx); 1170 cancel_work_sync(&efx->mac_work); 1171 } 1172 1173 static void efx_fini_port(struct efx_nic *efx) 1174 { 1175 netif_dbg(efx, drv, efx->net_dev, "shut down port\n"); 1176 1177 if (!efx->port_initialized) 1178 return; 1179 1180 efx->phy_op->fini(efx); 1181 efx->port_initialized = false; 1182 1183 efx->link_state.up = false; 1184 efx_link_status_changed(efx); 1185 } 1186 1187 static void efx_remove_port(struct efx_nic *efx) 1188 { 1189 netif_dbg(efx, drv, efx->net_dev, "destroying port\n"); 1190 1191 efx->type->remove_port(efx); 1192 } 1193 1194 /************************************************************************** 1195 * 1196 * NIC handling 1197 * 1198 **************************************************************************/ 1199 1200 static LIST_HEAD(efx_primary_list); 1201 static LIST_HEAD(efx_unassociated_list); 1202 1203 static bool efx_same_controller(struct efx_nic *left, struct efx_nic *right) 1204 { 1205 return left->type == right->type && 1206 left->vpd_sn && right->vpd_sn && 1207 !strcmp(left->vpd_sn, right->vpd_sn); 1208 } 1209 1210 static void efx_associate(struct efx_nic *efx) 1211 { 1212 struct efx_nic *other, *next; 1213 1214 if (efx->primary == efx) { 1215 /* Adding primary function; look for secondaries */ 1216 1217 netif_dbg(efx, probe, efx->net_dev, "adding to primary list\n"); 1218 list_add_tail(&efx->node, &efx_primary_list); 1219 1220 list_for_each_entry_safe(other, next, &efx_unassociated_list, 1221 node) { 1222 if (efx_same_controller(efx, other)) { 1223 list_del(&other->node); 1224 netif_dbg(other, probe, other->net_dev, 1225 "moving to secondary list of %s %s\n", 1226 pci_name(efx->pci_dev), 1227 efx->net_dev->name); 1228 list_add_tail(&other->node, 1229 &efx->secondary_list); 1230 other->primary = efx; 1231 } 1232 } 1233 } else { 1234 /* Adding secondary function; look for primary */ 1235 1236 list_for_each_entry(other, &efx_primary_list, node) { 1237 if (efx_same_controller(efx, other)) { 1238 netif_dbg(efx, probe, efx->net_dev, 1239 "adding to secondary list of %s %s\n", 1240 pci_name(other->pci_dev), 1241 other->net_dev->name); 1242 list_add_tail(&efx->node, 1243 &other->secondary_list); 1244 efx->primary = other; 1245 return; 1246 } 1247 } 1248 1249 netif_dbg(efx, probe, efx->net_dev, 1250 "adding to unassociated list\n"); 1251 list_add_tail(&efx->node, &efx_unassociated_list); 1252 } 1253 } 1254 1255 static void efx_dissociate(struct efx_nic *efx) 1256 { 1257 struct efx_nic *other, *next; 1258 1259 list_del(&efx->node); 1260 efx->primary = NULL; 1261 1262 list_for_each_entry_safe(other, next, &efx->secondary_list, node) { 1263 list_del(&other->node); 1264 netif_dbg(other, probe, other->net_dev, 1265 "moving to unassociated list\n"); 1266 list_add_tail(&other->node, &efx_unassociated_list); 1267 other->primary = NULL; 1268 } 1269 } 1270 1271 /* This configures the PCI device to enable I/O and DMA. */ 1272 static int efx_init_io(struct efx_nic *efx) 1273 { 1274 struct pci_dev *pci_dev = efx->pci_dev; 1275 dma_addr_t dma_mask = efx->type->max_dma_mask; 1276 unsigned int mem_map_size = efx->type->mem_map_size(efx); 1277 int rc, bar; 1278 1279 netif_dbg(efx, probe, efx->net_dev, "initialising I/O\n"); 1280 1281 bar = efx->type->mem_bar(efx); 1282 1283 rc = pci_enable_device(pci_dev); 1284 if (rc) { 1285 netif_err(efx, probe, efx->net_dev, 1286 "failed to enable PCI device\n"); 1287 goto fail1; 1288 } 1289 1290 pci_set_master(pci_dev); 1291 1292 /* Set the PCI DMA mask. Try all possibilities from our 1293 * genuine mask down to 32 bits, because some architectures 1294 * (e.g. x86_64 with iommu_sac_force set) will allow 40 bit 1295 * masks event though they reject 46 bit masks. 1296 */ 1297 while (dma_mask > 0x7fffffffUL) { 1298 rc = dma_set_mask_and_coherent(&pci_dev->dev, dma_mask); 1299 if (rc == 0) 1300 break; 1301 dma_mask >>= 1; 1302 } 1303 if (rc) { 1304 netif_err(efx, probe, efx->net_dev, 1305 "could not find a suitable DMA mask\n"); 1306 goto fail2; 1307 } 1308 netif_dbg(efx, probe, efx->net_dev, 1309 "using DMA mask %llx\n", (unsigned long long) dma_mask); 1310 1311 efx->membase_phys = pci_resource_start(efx->pci_dev, bar); 1312 rc = pci_request_region(pci_dev, bar, "sfc"); 1313 if (rc) { 1314 netif_err(efx, probe, efx->net_dev, 1315 "request for memory BAR failed\n"); 1316 rc = -EIO; 1317 goto fail3; 1318 } 1319 efx->membase = ioremap_nocache(efx->membase_phys, mem_map_size); 1320 if (!efx->membase) { 1321 netif_err(efx, probe, efx->net_dev, 1322 "could not map memory BAR at %llx+%x\n", 1323 (unsigned long long)efx->membase_phys, mem_map_size); 1324 rc = -ENOMEM; 1325 goto fail4; 1326 } 1327 netif_dbg(efx, probe, efx->net_dev, 1328 "memory BAR at %llx+%x (virtual %p)\n", 1329 (unsigned long long)efx->membase_phys, mem_map_size, 1330 efx->membase); 1331 1332 return 0; 1333 1334 fail4: 1335 pci_release_region(efx->pci_dev, bar); 1336 fail3: 1337 efx->membase_phys = 0; 1338 fail2: 1339 pci_disable_device(efx->pci_dev); 1340 fail1: 1341 return rc; 1342 } 1343 1344 static void efx_fini_io(struct efx_nic *efx) 1345 { 1346 int bar; 1347 1348 netif_dbg(efx, drv, efx->net_dev, "shutting down I/O\n"); 1349 1350 if (efx->membase) { 1351 iounmap(efx->membase); 1352 efx->membase = NULL; 1353 } 1354 1355 if (efx->membase_phys) { 1356 bar = efx->type->mem_bar(efx); 1357 pci_release_region(efx->pci_dev, bar); 1358 efx->membase_phys = 0; 1359 } 1360 1361 /* Don't disable bus-mastering if VFs are assigned */ 1362 if (!pci_vfs_assigned(efx->pci_dev)) 1363 pci_disable_device(efx->pci_dev); 1364 } 1365 1366 void efx_set_default_rx_indir_table(struct efx_nic *efx, 1367 struct efx_rss_context *ctx) 1368 { 1369 size_t i; 1370 1371 for (i = 0; i < ARRAY_SIZE(ctx->rx_indir_table); i++) 1372 ctx->rx_indir_table[i] = 1373 ethtool_rxfh_indir_default(i, efx->rss_spread); 1374 } 1375 1376 static unsigned int efx_wanted_parallelism(struct efx_nic *efx) 1377 { 1378 cpumask_var_t thread_mask; 1379 unsigned int count; 1380 int cpu; 1381 1382 if (rss_cpus) { 1383 count = rss_cpus; 1384 } else { 1385 if (unlikely(!zalloc_cpumask_var(&thread_mask, GFP_KERNEL))) { 1386 netif_warn(efx, probe, efx->net_dev, 1387 "RSS disabled due to allocation failure\n"); 1388 return 1; 1389 } 1390 1391 count = 0; 1392 for_each_online_cpu(cpu) { 1393 if (!cpumask_test_cpu(cpu, thread_mask)) { 1394 ++count; 1395 cpumask_or(thread_mask, thread_mask, 1396 topology_sibling_cpumask(cpu)); 1397 } 1398 } 1399 1400 free_cpumask_var(thread_mask); 1401 } 1402 1403 if (count > EFX_MAX_RX_QUEUES) { 1404 netif_cond_dbg(efx, probe, efx->net_dev, !rss_cpus, warn, 1405 "Reducing number of rx queues from %u to %u.\n", 1406 count, EFX_MAX_RX_QUEUES); 1407 count = EFX_MAX_RX_QUEUES; 1408 } 1409 1410 /* If RSS is requested for the PF *and* VFs then we can't write RSS 1411 * table entries that are inaccessible to VFs 1412 */ 1413 #ifdef CONFIG_SFC_SRIOV 1414 if (efx->type->sriov_wanted) { 1415 if (efx->type->sriov_wanted(efx) && efx_vf_size(efx) > 1 && 1416 count > efx_vf_size(efx)) { 1417 netif_warn(efx, probe, efx->net_dev, 1418 "Reducing number of RSS channels from %u to %u for " 1419 "VF support. Increase vf-msix-limit to use more " 1420 "channels on the PF.\n", 1421 count, efx_vf_size(efx)); 1422 count = efx_vf_size(efx); 1423 } 1424 } 1425 #endif 1426 1427 return count; 1428 } 1429 1430 /* Probe the number and type of interrupts we are able to obtain, and 1431 * the resulting numbers of channels and RX queues. 1432 */ 1433 static int efx_probe_interrupts(struct efx_nic *efx) 1434 { 1435 unsigned int extra_channels = 0; 1436 unsigned int i, j; 1437 int rc; 1438 1439 for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++) 1440 if (efx->extra_channel_type[i]) 1441 ++extra_channels; 1442 1443 if (efx->interrupt_mode == EFX_INT_MODE_MSIX) { 1444 struct msix_entry xentries[EFX_MAX_CHANNELS]; 1445 unsigned int n_channels; 1446 1447 n_channels = efx_wanted_parallelism(efx); 1448 if (efx_separate_tx_channels) 1449 n_channels *= 2; 1450 n_channels += extra_channels; 1451 n_channels = min(n_channels, efx->max_channels); 1452 1453 for (i = 0; i < n_channels; i++) 1454 xentries[i].entry = i; 1455 rc = pci_enable_msix_range(efx->pci_dev, 1456 xentries, 1, n_channels); 1457 if (rc < 0) { 1458 /* Fall back to single channel MSI */ 1459 netif_err(efx, drv, efx->net_dev, 1460 "could not enable MSI-X\n"); 1461 if (efx->type->min_interrupt_mode >= EFX_INT_MODE_MSI) 1462 efx->interrupt_mode = EFX_INT_MODE_MSI; 1463 else 1464 return rc; 1465 } else if (rc < n_channels) { 1466 netif_err(efx, drv, efx->net_dev, 1467 "WARNING: Insufficient MSI-X vectors" 1468 " available (%d < %u).\n", rc, n_channels); 1469 netif_err(efx, drv, efx->net_dev, 1470 "WARNING: Performance may be reduced.\n"); 1471 n_channels = rc; 1472 } 1473 1474 if (rc > 0) { 1475 efx->n_channels = n_channels; 1476 if (n_channels > extra_channels) 1477 n_channels -= extra_channels; 1478 if (efx_separate_tx_channels) { 1479 efx->n_tx_channels = min(max(n_channels / 2, 1480 1U), 1481 efx->max_tx_channels); 1482 efx->n_rx_channels = max(n_channels - 1483 efx->n_tx_channels, 1484 1U); 1485 } else { 1486 efx->n_tx_channels = min(n_channels, 1487 efx->max_tx_channels); 1488 efx->n_rx_channels = n_channels; 1489 } 1490 for (i = 0; i < efx->n_channels; i++) 1491 efx_get_channel(efx, i)->irq = 1492 xentries[i].vector; 1493 } 1494 } 1495 1496 /* Try single interrupt MSI */ 1497 if (efx->interrupt_mode == EFX_INT_MODE_MSI) { 1498 efx->n_channels = 1; 1499 efx->n_rx_channels = 1; 1500 efx->n_tx_channels = 1; 1501 rc = pci_enable_msi(efx->pci_dev); 1502 if (rc == 0) { 1503 efx_get_channel(efx, 0)->irq = efx->pci_dev->irq; 1504 } else { 1505 netif_err(efx, drv, efx->net_dev, 1506 "could not enable MSI\n"); 1507 if (efx->type->min_interrupt_mode >= EFX_INT_MODE_LEGACY) 1508 efx->interrupt_mode = EFX_INT_MODE_LEGACY; 1509 else 1510 return rc; 1511 } 1512 } 1513 1514 /* Assume legacy interrupts */ 1515 if (efx->interrupt_mode == EFX_INT_MODE_LEGACY) { 1516 efx->n_channels = 1 + (efx_separate_tx_channels ? 1 : 0); 1517 efx->n_rx_channels = 1; 1518 efx->n_tx_channels = 1; 1519 efx->legacy_irq = efx->pci_dev->irq; 1520 } 1521 1522 /* Assign extra channels if possible */ 1523 efx->n_extra_tx_channels = 0; 1524 j = efx->n_channels; 1525 for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++) { 1526 if (!efx->extra_channel_type[i]) 1527 continue; 1528 if (efx->interrupt_mode != EFX_INT_MODE_MSIX || 1529 efx->n_channels <= extra_channels) { 1530 efx->extra_channel_type[i]->handle_no_channel(efx); 1531 } else { 1532 --j; 1533 efx_get_channel(efx, j)->type = 1534 efx->extra_channel_type[i]; 1535 if (efx_channel_has_tx_queues(efx_get_channel(efx, j))) 1536 efx->n_extra_tx_channels++; 1537 } 1538 } 1539 1540 /* RSS might be usable on VFs even if it is disabled on the PF */ 1541 #ifdef CONFIG_SFC_SRIOV 1542 if (efx->type->sriov_wanted) { 1543 efx->rss_spread = ((efx->n_rx_channels > 1 || 1544 !efx->type->sriov_wanted(efx)) ? 1545 efx->n_rx_channels : efx_vf_size(efx)); 1546 return 0; 1547 } 1548 #endif 1549 efx->rss_spread = efx->n_rx_channels; 1550 1551 return 0; 1552 } 1553 1554 static int efx_soft_enable_interrupts(struct efx_nic *efx) 1555 { 1556 struct efx_channel *channel, *end_channel; 1557 int rc; 1558 1559 BUG_ON(efx->state == STATE_DISABLED); 1560 1561 efx->irq_soft_enabled = true; 1562 smp_wmb(); 1563 1564 efx_for_each_channel(channel, efx) { 1565 if (!channel->type->keep_eventq) { 1566 rc = efx_init_eventq(channel); 1567 if (rc) 1568 goto fail; 1569 } 1570 efx_start_eventq(channel); 1571 } 1572 1573 efx_mcdi_mode_event(efx); 1574 1575 return 0; 1576 fail: 1577 end_channel = channel; 1578 efx_for_each_channel(channel, efx) { 1579 if (channel == end_channel) 1580 break; 1581 efx_stop_eventq(channel); 1582 if (!channel->type->keep_eventq) 1583 efx_fini_eventq(channel); 1584 } 1585 1586 return rc; 1587 } 1588 1589 static void efx_soft_disable_interrupts(struct efx_nic *efx) 1590 { 1591 struct efx_channel *channel; 1592 1593 if (efx->state == STATE_DISABLED) 1594 return; 1595 1596 efx_mcdi_mode_poll(efx); 1597 1598 efx->irq_soft_enabled = false; 1599 smp_wmb(); 1600 1601 if (efx->legacy_irq) 1602 synchronize_irq(efx->legacy_irq); 1603 1604 efx_for_each_channel(channel, efx) { 1605 if (channel->irq) 1606 synchronize_irq(channel->irq); 1607 1608 efx_stop_eventq(channel); 1609 if (!channel->type->keep_eventq) 1610 efx_fini_eventq(channel); 1611 } 1612 1613 /* Flush the asynchronous MCDI request queue */ 1614 efx_mcdi_flush_async(efx); 1615 } 1616 1617 static int efx_enable_interrupts(struct efx_nic *efx) 1618 { 1619 struct efx_channel *channel, *end_channel; 1620 int rc; 1621 1622 BUG_ON(efx->state == STATE_DISABLED); 1623 1624 if (efx->eeh_disabled_legacy_irq) { 1625 enable_irq(efx->legacy_irq); 1626 efx->eeh_disabled_legacy_irq = false; 1627 } 1628 1629 efx->type->irq_enable_master(efx); 1630 1631 efx_for_each_channel(channel, efx) { 1632 if (channel->type->keep_eventq) { 1633 rc = efx_init_eventq(channel); 1634 if (rc) 1635 goto fail; 1636 } 1637 } 1638 1639 rc = efx_soft_enable_interrupts(efx); 1640 if (rc) 1641 goto fail; 1642 1643 return 0; 1644 1645 fail: 1646 end_channel = channel; 1647 efx_for_each_channel(channel, efx) { 1648 if (channel == end_channel) 1649 break; 1650 if (channel->type->keep_eventq) 1651 efx_fini_eventq(channel); 1652 } 1653 1654 efx->type->irq_disable_non_ev(efx); 1655 1656 return rc; 1657 } 1658 1659 static void efx_disable_interrupts(struct efx_nic *efx) 1660 { 1661 struct efx_channel *channel; 1662 1663 efx_soft_disable_interrupts(efx); 1664 1665 efx_for_each_channel(channel, efx) { 1666 if (channel->type->keep_eventq) 1667 efx_fini_eventq(channel); 1668 } 1669 1670 efx->type->irq_disable_non_ev(efx); 1671 } 1672 1673 static void efx_remove_interrupts(struct efx_nic *efx) 1674 { 1675 struct efx_channel *channel; 1676 1677 /* Remove MSI/MSI-X interrupts */ 1678 efx_for_each_channel(channel, efx) 1679 channel->irq = 0; 1680 pci_disable_msi(efx->pci_dev); 1681 pci_disable_msix(efx->pci_dev); 1682 1683 /* Remove legacy interrupt */ 1684 efx->legacy_irq = 0; 1685 } 1686 1687 static void efx_set_channels(struct efx_nic *efx) 1688 { 1689 struct efx_channel *channel; 1690 struct efx_tx_queue *tx_queue; 1691 1692 efx->tx_channel_offset = 1693 efx_separate_tx_channels ? 1694 efx->n_channels - efx->n_tx_channels : 0; 1695 1696 /* We need to mark which channels really have RX and TX 1697 * queues, and adjust the TX queue numbers if we have separate 1698 * RX-only and TX-only channels. 1699 */ 1700 efx_for_each_channel(channel, efx) { 1701 if (channel->channel < efx->n_rx_channels) 1702 channel->rx_queue.core_index = channel->channel; 1703 else 1704 channel->rx_queue.core_index = -1; 1705 1706 efx_for_each_channel_tx_queue(tx_queue, channel) 1707 tx_queue->queue -= (efx->tx_channel_offset * 1708 EFX_TXQ_TYPES); 1709 } 1710 } 1711 1712 static int efx_probe_nic(struct efx_nic *efx) 1713 { 1714 int rc; 1715 1716 netif_dbg(efx, probe, efx->net_dev, "creating NIC\n"); 1717 1718 /* Carry out hardware-type specific initialisation */ 1719 rc = efx->type->probe(efx); 1720 if (rc) 1721 return rc; 1722 1723 do { 1724 if (!efx->max_channels || !efx->max_tx_channels) { 1725 netif_err(efx, drv, efx->net_dev, 1726 "Insufficient resources to allocate" 1727 " any channels\n"); 1728 rc = -ENOSPC; 1729 goto fail1; 1730 } 1731 1732 /* Determine the number of channels and queues by trying 1733 * to hook in MSI-X interrupts. 1734 */ 1735 rc = efx_probe_interrupts(efx); 1736 if (rc) 1737 goto fail1; 1738 1739 efx_set_channels(efx); 1740 1741 /* dimension_resources can fail with EAGAIN */ 1742 rc = efx->type->dimension_resources(efx); 1743 if (rc != 0 && rc != -EAGAIN) 1744 goto fail2; 1745 1746 if (rc == -EAGAIN) 1747 /* try again with new max_channels */ 1748 efx_remove_interrupts(efx); 1749 1750 } while (rc == -EAGAIN); 1751 1752 if (efx->n_channels > 1) 1753 netdev_rss_key_fill(efx->rss_context.rx_hash_key, 1754 sizeof(efx->rss_context.rx_hash_key)); 1755 efx_set_default_rx_indir_table(efx, &efx->rss_context); 1756 1757 netif_set_real_num_tx_queues(efx->net_dev, efx->n_tx_channels); 1758 netif_set_real_num_rx_queues(efx->net_dev, efx->n_rx_channels); 1759 1760 /* Initialise the interrupt moderation settings */ 1761 efx->irq_mod_step_us = DIV_ROUND_UP(efx->timer_quantum_ns, 1000); 1762 efx_init_irq_moderation(efx, tx_irq_mod_usec, rx_irq_mod_usec, true, 1763 true); 1764 1765 return 0; 1766 1767 fail2: 1768 efx_remove_interrupts(efx); 1769 fail1: 1770 efx->type->remove(efx); 1771 return rc; 1772 } 1773 1774 static void efx_remove_nic(struct efx_nic *efx) 1775 { 1776 netif_dbg(efx, drv, efx->net_dev, "destroying NIC\n"); 1777 1778 efx_remove_interrupts(efx); 1779 efx->type->remove(efx); 1780 } 1781 1782 static int efx_probe_filters(struct efx_nic *efx) 1783 { 1784 int rc; 1785 1786 init_rwsem(&efx->filter_sem); 1787 mutex_lock(&efx->mac_lock); 1788 down_write(&efx->filter_sem); 1789 rc = efx->type->filter_table_probe(efx); 1790 if (rc) 1791 goto out_unlock; 1792 1793 #ifdef CONFIG_RFS_ACCEL 1794 if (efx->type->offload_features & NETIF_F_NTUPLE) { 1795 struct efx_channel *channel; 1796 int i, success = 1; 1797 1798 efx_for_each_channel(channel, efx) { 1799 channel->rps_flow_id = 1800 kcalloc(efx->type->max_rx_ip_filters, 1801 sizeof(*channel->rps_flow_id), 1802 GFP_KERNEL); 1803 if (!channel->rps_flow_id) 1804 success = 0; 1805 else 1806 for (i = 0; 1807 i < efx->type->max_rx_ip_filters; 1808 ++i) 1809 channel->rps_flow_id[i] = 1810 RPS_FLOW_ID_INVALID; 1811 } 1812 1813 if (!success) { 1814 efx_for_each_channel(channel, efx) 1815 kfree(channel->rps_flow_id); 1816 efx->type->filter_table_remove(efx); 1817 rc = -ENOMEM; 1818 goto out_unlock; 1819 } 1820 1821 efx->rps_expire_index = efx->rps_expire_channel = 0; 1822 } 1823 #endif 1824 out_unlock: 1825 up_write(&efx->filter_sem); 1826 mutex_unlock(&efx->mac_lock); 1827 return rc; 1828 } 1829 1830 static void efx_remove_filters(struct efx_nic *efx) 1831 { 1832 #ifdef CONFIG_RFS_ACCEL 1833 struct efx_channel *channel; 1834 1835 efx_for_each_channel(channel, efx) 1836 kfree(channel->rps_flow_id); 1837 #endif 1838 down_write(&efx->filter_sem); 1839 efx->type->filter_table_remove(efx); 1840 up_write(&efx->filter_sem); 1841 } 1842 1843 static void efx_restore_filters(struct efx_nic *efx) 1844 { 1845 down_read(&efx->filter_sem); 1846 efx->type->filter_table_restore(efx); 1847 up_read(&efx->filter_sem); 1848 } 1849 1850 /************************************************************************** 1851 * 1852 * NIC startup/shutdown 1853 * 1854 *************************************************************************/ 1855 1856 static int efx_probe_all(struct efx_nic *efx) 1857 { 1858 int rc; 1859 1860 rc = efx_probe_nic(efx); 1861 if (rc) { 1862 netif_err(efx, probe, efx->net_dev, "failed to create NIC\n"); 1863 goto fail1; 1864 } 1865 1866 rc = efx_probe_port(efx); 1867 if (rc) { 1868 netif_err(efx, probe, efx->net_dev, "failed to create port\n"); 1869 goto fail2; 1870 } 1871 1872 BUILD_BUG_ON(EFX_DEFAULT_DMAQ_SIZE < EFX_RXQ_MIN_ENT); 1873 if (WARN_ON(EFX_DEFAULT_DMAQ_SIZE < EFX_TXQ_MIN_ENT(efx))) { 1874 rc = -EINVAL; 1875 goto fail3; 1876 } 1877 efx->rxq_entries = efx->txq_entries = EFX_DEFAULT_DMAQ_SIZE; 1878 1879 #ifdef CONFIG_SFC_SRIOV 1880 rc = efx->type->vswitching_probe(efx); 1881 if (rc) /* not fatal; the PF will still work fine */ 1882 netif_warn(efx, probe, efx->net_dev, 1883 "failed to setup vswitching rc=%d;" 1884 " VFs may not function\n", rc); 1885 #endif 1886 1887 rc = efx_probe_filters(efx); 1888 if (rc) { 1889 netif_err(efx, probe, efx->net_dev, 1890 "failed to create filter tables\n"); 1891 goto fail4; 1892 } 1893 1894 rc = efx_probe_channels(efx); 1895 if (rc) 1896 goto fail5; 1897 1898 return 0; 1899 1900 fail5: 1901 efx_remove_filters(efx); 1902 fail4: 1903 #ifdef CONFIG_SFC_SRIOV 1904 efx->type->vswitching_remove(efx); 1905 #endif 1906 fail3: 1907 efx_remove_port(efx); 1908 fail2: 1909 efx_remove_nic(efx); 1910 fail1: 1911 return rc; 1912 } 1913 1914 /* If the interface is supposed to be running but is not, start 1915 * the hardware and software data path, regular activity for the port 1916 * (MAC statistics, link polling, etc.) and schedule the port to be 1917 * reconfigured. Interrupts must already be enabled. This function 1918 * is safe to call multiple times, so long as the NIC is not disabled. 1919 * Requires the RTNL lock. 1920 */ 1921 static void efx_start_all(struct efx_nic *efx) 1922 { 1923 EFX_ASSERT_RESET_SERIALISED(efx); 1924 BUG_ON(efx->state == STATE_DISABLED); 1925 1926 /* Check that it is appropriate to restart the interface. All 1927 * of these flags are safe to read under just the rtnl lock */ 1928 if (efx->port_enabled || !netif_running(efx->net_dev) || 1929 efx->reset_pending) 1930 return; 1931 1932 efx_start_port(efx); 1933 efx_start_datapath(efx); 1934 1935 /* Start the hardware monitor if there is one */ 1936 if (efx->type->monitor != NULL) 1937 queue_delayed_work(efx->workqueue, &efx->monitor_work, 1938 efx_monitor_interval); 1939 1940 /* Link state detection is normally event-driven; we have 1941 * to poll now because we could have missed a change 1942 */ 1943 mutex_lock(&efx->mac_lock); 1944 if (efx->phy_op->poll(efx)) 1945 efx_link_status_changed(efx); 1946 mutex_unlock(&efx->mac_lock); 1947 1948 efx->type->start_stats(efx); 1949 efx->type->pull_stats(efx); 1950 spin_lock_bh(&efx->stats_lock); 1951 efx->type->update_stats(efx, NULL, NULL); 1952 spin_unlock_bh(&efx->stats_lock); 1953 } 1954 1955 /* Quiesce the hardware and software data path, and regular activity 1956 * for the port without bringing the link down. Safe to call multiple 1957 * times with the NIC in almost any state, but interrupts should be 1958 * enabled. Requires the RTNL lock. 1959 */ 1960 static void efx_stop_all(struct efx_nic *efx) 1961 { 1962 EFX_ASSERT_RESET_SERIALISED(efx); 1963 1964 /* port_enabled can be read safely under the rtnl lock */ 1965 if (!efx->port_enabled) 1966 return; 1967 1968 /* update stats before we go down so we can accurately count 1969 * rx_nodesc_drops 1970 */ 1971 efx->type->pull_stats(efx); 1972 spin_lock_bh(&efx->stats_lock); 1973 efx->type->update_stats(efx, NULL, NULL); 1974 spin_unlock_bh(&efx->stats_lock); 1975 efx->type->stop_stats(efx); 1976 efx_stop_port(efx); 1977 1978 /* Stop the kernel transmit interface. This is only valid if 1979 * the device is stopped or detached; otherwise the watchdog 1980 * may fire immediately. 1981 */ 1982 WARN_ON(netif_running(efx->net_dev) && 1983 netif_device_present(efx->net_dev)); 1984 netif_tx_disable(efx->net_dev); 1985 1986 efx_stop_datapath(efx); 1987 } 1988 1989 static void efx_remove_all(struct efx_nic *efx) 1990 { 1991 efx_remove_channels(efx); 1992 efx_remove_filters(efx); 1993 #ifdef CONFIG_SFC_SRIOV 1994 efx->type->vswitching_remove(efx); 1995 #endif 1996 efx_remove_port(efx); 1997 efx_remove_nic(efx); 1998 } 1999 2000 /************************************************************************** 2001 * 2002 * Interrupt moderation 2003 * 2004 **************************************************************************/ 2005 unsigned int efx_usecs_to_ticks(struct efx_nic *efx, unsigned int usecs) 2006 { 2007 if (usecs == 0) 2008 return 0; 2009 if (usecs * 1000 < efx->timer_quantum_ns) 2010 return 1; /* never round down to 0 */ 2011 return usecs * 1000 / efx->timer_quantum_ns; 2012 } 2013 2014 unsigned int efx_ticks_to_usecs(struct efx_nic *efx, unsigned int ticks) 2015 { 2016 /* We must round up when converting ticks to microseconds 2017 * because we round down when converting the other way. 2018 */ 2019 return DIV_ROUND_UP(ticks * efx->timer_quantum_ns, 1000); 2020 } 2021 2022 /* Set interrupt moderation parameters */ 2023 int efx_init_irq_moderation(struct efx_nic *efx, unsigned int tx_usecs, 2024 unsigned int rx_usecs, bool rx_adaptive, 2025 bool rx_may_override_tx) 2026 { 2027 struct efx_channel *channel; 2028 unsigned int timer_max_us; 2029 2030 EFX_ASSERT_RESET_SERIALISED(efx); 2031 2032 timer_max_us = efx->timer_max_ns / 1000; 2033 2034 if (tx_usecs > timer_max_us || rx_usecs > timer_max_us) 2035 return -EINVAL; 2036 2037 if (tx_usecs != rx_usecs && efx->tx_channel_offset == 0 && 2038 !rx_may_override_tx) { 2039 netif_err(efx, drv, efx->net_dev, "Channels are shared. " 2040 "RX and TX IRQ moderation must be equal\n"); 2041 return -EINVAL; 2042 } 2043 2044 efx->irq_rx_adaptive = rx_adaptive; 2045 efx->irq_rx_moderation_us = rx_usecs; 2046 efx_for_each_channel(channel, efx) { 2047 if (efx_channel_has_rx_queue(channel)) 2048 channel->irq_moderation_us = rx_usecs; 2049 else if (efx_channel_has_tx_queues(channel)) 2050 channel->irq_moderation_us = tx_usecs; 2051 } 2052 2053 return 0; 2054 } 2055 2056 void efx_get_irq_moderation(struct efx_nic *efx, unsigned int *tx_usecs, 2057 unsigned int *rx_usecs, bool *rx_adaptive) 2058 { 2059 *rx_adaptive = efx->irq_rx_adaptive; 2060 *rx_usecs = efx->irq_rx_moderation_us; 2061 2062 /* If channels are shared between RX and TX, so is IRQ 2063 * moderation. Otherwise, IRQ moderation is the same for all 2064 * TX channels and is not adaptive. 2065 */ 2066 if (efx->tx_channel_offset == 0) { 2067 *tx_usecs = *rx_usecs; 2068 } else { 2069 struct efx_channel *tx_channel; 2070 2071 tx_channel = efx->channel[efx->tx_channel_offset]; 2072 *tx_usecs = tx_channel->irq_moderation_us; 2073 } 2074 } 2075 2076 /************************************************************************** 2077 * 2078 * Hardware monitor 2079 * 2080 **************************************************************************/ 2081 2082 /* Run periodically off the general workqueue */ 2083 static void efx_monitor(struct work_struct *data) 2084 { 2085 struct efx_nic *efx = container_of(data, struct efx_nic, 2086 monitor_work.work); 2087 2088 netif_vdbg(efx, timer, efx->net_dev, 2089 "hardware monitor executing on CPU %d\n", 2090 raw_smp_processor_id()); 2091 BUG_ON(efx->type->monitor == NULL); 2092 2093 /* If the mac_lock is already held then it is likely a port 2094 * reconfiguration is already in place, which will likely do 2095 * most of the work of monitor() anyway. */ 2096 if (mutex_trylock(&efx->mac_lock)) { 2097 if (efx->port_enabled) 2098 efx->type->monitor(efx); 2099 mutex_unlock(&efx->mac_lock); 2100 } 2101 2102 queue_delayed_work(efx->workqueue, &efx->monitor_work, 2103 efx_monitor_interval); 2104 } 2105 2106 /************************************************************************** 2107 * 2108 * ioctls 2109 * 2110 *************************************************************************/ 2111 2112 /* Net device ioctl 2113 * Context: process, rtnl_lock() held. 2114 */ 2115 static int efx_ioctl(struct net_device *net_dev, struct ifreq *ifr, int cmd) 2116 { 2117 struct efx_nic *efx = netdev_priv(net_dev); 2118 struct mii_ioctl_data *data = if_mii(ifr); 2119 2120 if (cmd == SIOCSHWTSTAMP) 2121 return efx_ptp_set_ts_config(efx, ifr); 2122 if (cmd == SIOCGHWTSTAMP) 2123 return efx_ptp_get_ts_config(efx, ifr); 2124 2125 /* Convert phy_id from older PRTAD/DEVAD format */ 2126 if ((cmd == SIOCGMIIREG || cmd == SIOCSMIIREG) && 2127 (data->phy_id & 0xfc00) == 0x0400) 2128 data->phy_id ^= MDIO_PHY_ID_C45 | 0x0400; 2129 2130 return mdio_mii_ioctl(&efx->mdio, data, cmd); 2131 } 2132 2133 /************************************************************************** 2134 * 2135 * NAPI interface 2136 * 2137 **************************************************************************/ 2138 2139 static void efx_init_napi_channel(struct efx_channel *channel) 2140 { 2141 struct efx_nic *efx = channel->efx; 2142 2143 channel->napi_dev = efx->net_dev; 2144 netif_napi_add(channel->napi_dev, &channel->napi_str, 2145 efx_poll, napi_weight); 2146 } 2147 2148 static void efx_init_napi(struct efx_nic *efx) 2149 { 2150 struct efx_channel *channel; 2151 2152 efx_for_each_channel(channel, efx) 2153 efx_init_napi_channel(channel); 2154 } 2155 2156 static void efx_fini_napi_channel(struct efx_channel *channel) 2157 { 2158 if (channel->napi_dev) 2159 netif_napi_del(&channel->napi_str); 2160 2161 channel->napi_dev = NULL; 2162 } 2163 2164 static void efx_fini_napi(struct efx_nic *efx) 2165 { 2166 struct efx_channel *channel; 2167 2168 efx_for_each_channel(channel, efx) 2169 efx_fini_napi_channel(channel); 2170 } 2171 2172 /************************************************************************** 2173 * 2174 * Kernel netpoll interface 2175 * 2176 *************************************************************************/ 2177 2178 #ifdef CONFIG_NET_POLL_CONTROLLER 2179 2180 /* Although in the common case interrupts will be disabled, this is not 2181 * guaranteed. However, all our work happens inside the NAPI callback, 2182 * so no locking is required. 2183 */ 2184 static void efx_netpoll(struct net_device *net_dev) 2185 { 2186 struct efx_nic *efx = netdev_priv(net_dev); 2187 struct efx_channel *channel; 2188 2189 efx_for_each_channel(channel, efx) 2190 efx_schedule_channel(channel); 2191 } 2192 2193 #endif 2194 2195 /************************************************************************** 2196 * 2197 * Kernel net device interface 2198 * 2199 *************************************************************************/ 2200 2201 /* Context: process, rtnl_lock() held. */ 2202 int efx_net_open(struct net_device *net_dev) 2203 { 2204 struct efx_nic *efx = netdev_priv(net_dev); 2205 int rc; 2206 2207 netif_dbg(efx, ifup, efx->net_dev, "opening device on CPU %d\n", 2208 raw_smp_processor_id()); 2209 2210 rc = efx_check_disabled(efx); 2211 if (rc) 2212 return rc; 2213 if (efx->phy_mode & PHY_MODE_SPECIAL) 2214 return -EBUSY; 2215 if (efx_mcdi_poll_reboot(efx) && efx_reset(efx, RESET_TYPE_ALL)) 2216 return -EIO; 2217 2218 /* Notify the kernel of the link state polled during driver load, 2219 * before the monitor starts running */ 2220 efx_link_status_changed(efx); 2221 2222 efx_start_all(efx); 2223 if (efx->state == STATE_DISABLED || efx->reset_pending) 2224 netif_device_detach(efx->net_dev); 2225 efx_selftest_async_start(efx); 2226 return 0; 2227 } 2228 2229 /* Context: process, rtnl_lock() held. 2230 * Note that the kernel will ignore our return code; this method 2231 * should really be a void. 2232 */ 2233 int efx_net_stop(struct net_device *net_dev) 2234 { 2235 struct efx_nic *efx = netdev_priv(net_dev); 2236 2237 netif_dbg(efx, ifdown, efx->net_dev, "closing on CPU %d\n", 2238 raw_smp_processor_id()); 2239 2240 /* Stop the device and flush all the channels */ 2241 efx_stop_all(efx); 2242 2243 return 0; 2244 } 2245 2246 /* Context: process, dev_base_lock or RTNL held, non-blocking. */ 2247 static void efx_net_stats(struct net_device *net_dev, 2248 struct rtnl_link_stats64 *stats) 2249 { 2250 struct efx_nic *efx = netdev_priv(net_dev); 2251 2252 spin_lock_bh(&efx->stats_lock); 2253 efx->type->update_stats(efx, NULL, stats); 2254 spin_unlock_bh(&efx->stats_lock); 2255 } 2256 2257 /* Context: netif_tx_lock held, BHs disabled. */ 2258 static void efx_watchdog(struct net_device *net_dev) 2259 { 2260 struct efx_nic *efx = netdev_priv(net_dev); 2261 2262 netif_err(efx, tx_err, efx->net_dev, 2263 "TX stuck with port_enabled=%d: resetting channels\n", 2264 efx->port_enabled); 2265 2266 efx_schedule_reset(efx, RESET_TYPE_TX_WATCHDOG); 2267 } 2268 2269 2270 /* Context: process, rtnl_lock() held. */ 2271 static int efx_change_mtu(struct net_device *net_dev, int new_mtu) 2272 { 2273 struct efx_nic *efx = netdev_priv(net_dev); 2274 int rc; 2275 2276 rc = efx_check_disabled(efx); 2277 if (rc) 2278 return rc; 2279 2280 netif_dbg(efx, drv, efx->net_dev, "changing MTU to %d\n", new_mtu); 2281 2282 efx_device_detach_sync(efx); 2283 efx_stop_all(efx); 2284 2285 mutex_lock(&efx->mac_lock); 2286 net_dev->mtu = new_mtu; 2287 efx_mac_reconfigure(efx); 2288 mutex_unlock(&efx->mac_lock); 2289 2290 efx_start_all(efx); 2291 efx_device_attach_if_not_resetting(efx); 2292 return 0; 2293 } 2294 2295 static int efx_set_mac_address(struct net_device *net_dev, void *data) 2296 { 2297 struct efx_nic *efx = netdev_priv(net_dev); 2298 struct sockaddr *addr = data; 2299 u8 *new_addr = addr->sa_data; 2300 u8 old_addr[6]; 2301 int rc; 2302 2303 if (!is_valid_ether_addr(new_addr)) { 2304 netif_err(efx, drv, efx->net_dev, 2305 "invalid ethernet MAC address requested: %pM\n", 2306 new_addr); 2307 return -EADDRNOTAVAIL; 2308 } 2309 2310 /* save old address */ 2311 ether_addr_copy(old_addr, net_dev->dev_addr); 2312 ether_addr_copy(net_dev->dev_addr, new_addr); 2313 if (efx->type->set_mac_address) { 2314 rc = efx->type->set_mac_address(efx); 2315 if (rc) { 2316 ether_addr_copy(net_dev->dev_addr, old_addr); 2317 return rc; 2318 } 2319 } 2320 2321 /* Reconfigure the MAC */ 2322 mutex_lock(&efx->mac_lock); 2323 efx_mac_reconfigure(efx); 2324 mutex_unlock(&efx->mac_lock); 2325 2326 return 0; 2327 } 2328 2329 /* Context: netif_addr_lock held, BHs disabled. */ 2330 static void efx_set_rx_mode(struct net_device *net_dev) 2331 { 2332 struct efx_nic *efx = netdev_priv(net_dev); 2333 2334 if (efx->port_enabled) 2335 queue_work(efx->workqueue, &efx->mac_work); 2336 /* Otherwise efx_start_port() will do this */ 2337 } 2338 2339 static int efx_set_features(struct net_device *net_dev, netdev_features_t data) 2340 { 2341 struct efx_nic *efx = netdev_priv(net_dev); 2342 int rc; 2343 2344 /* If disabling RX n-tuple filtering, clear existing filters */ 2345 if (net_dev->features & ~data & NETIF_F_NTUPLE) { 2346 rc = efx->type->filter_clear_rx(efx, EFX_FILTER_PRI_MANUAL); 2347 if (rc) 2348 return rc; 2349 } 2350 2351 /* If Rx VLAN filter is changed, update filters via mac_reconfigure. 2352 * If rx-fcs is changed, mac_reconfigure updates that too. 2353 */ 2354 if ((net_dev->features ^ data) & (NETIF_F_HW_VLAN_CTAG_FILTER | 2355 NETIF_F_RXFCS)) { 2356 /* efx_set_rx_mode() will schedule MAC work to update filters 2357 * when a new features are finally set in net_dev. 2358 */ 2359 efx_set_rx_mode(net_dev); 2360 } 2361 2362 return 0; 2363 } 2364 2365 static int efx_get_phys_port_id(struct net_device *net_dev, 2366 struct netdev_phys_item_id *ppid) 2367 { 2368 struct efx_nic *efx = netdev_priv(net_dev); 2369 2370 if (efx->type->get_phys_port_id) 2371 return efx->type->get_phys_port_id(efx, ppid); 2372 else 2373 return -EOPNOTSUPP; 2374 } 2375 2376 static int efx_get_phys_port_name(struct net_device *net_dev, 2377 char *name, size_t len) 2378 { 2379 struct efx_nic *efx = netdev_priv(net_dev); 2380 2381 if (snprintf(name, len, "p%u", efx->port_num) >= len) 2382 return -EINVAL; 2383 return 0; 2384 } 2385 2386 static int efx_vlan_rx_add_vid(struct net_device *net_dev, __be16 proto, u16 vid) 2387 { 2388 struct efx_nic *efx = netdev_priv(net_dev); 2389 2390 if (efx->type->vlan_rx_add_vid) 2391 return efx->type->vlan_rx_add_vid(efx, proto, vid); 2392 else 2393 return -EOPNOTSUPP; 2394 } 2395 2396 static int efx_vlan_rx_kill_vid(struct net_device *net_dev, __be16 proto, u16 vid) 2397 { 2398 struct efx_nic *efx = netdev_priv(net_dev); 2399 2400 if (efx->type->vlan_rx_kill_vid) 2401 return efx->type->vlan_rx_kill_vid(efx, proto, vid); 2402 else 2403 return -EOPNOTSUPP; 2404 } 2405 2406 static int efx_udp_tunnel_type_map(enum udp_parsable_tunnel_type in) 2407 { 2408 switch (in) { 2409 case UDP_TUNNEL_TYPE_VXLAN: 2410 return TUNNEL_ENCAP_UDP_PORT_ENTRY_VXLAN; 2411 case UDP_TUNNEL_TYPE_GENEVE: 2412 return TUNNEL_ENCAP_UDP_PORT_ENTRY_GENEVE; 2413 default: 2414 return -1; 2415 } 2416 } 2417 2418 static void efx_udp_tunnel_add(struct net_device *dev, struct udp_tunnel_info *ti) 2419 { 2420 struct efx_nic *efx = netdev_priv(dev); 2421 struct efx_udp_tunnel tnl; 2422 int efx_tunnel_type; 2423 2424 efx_tunnel_type = efx_udp_tunnel_type_map(ti->type); 2425 if (efx_tunnel_type < 0) 2426 return; 2427 2428 tnl.type = (u16)efx_tunnel_type; 2429 tnl.port = ti->port; 2430 2431 if (efx->type->udp_tnl_add_port) 2432 (void)efx->type->udp_tnl_add_port(efx, tnl); 2433 } 2434 2435 static void efx_udp_tunnel_del(struct net_device *dev, struct udp_tunnel_info *ti) 2436 { 2437 struct efx_nic *efx = netdev_priv(dev); 2438 struct efx_udp_tunnel tnl; 2439 int efx_tunnel_type; 2440 2441 efx_tunnel_type = efx_udp_tunnel_type_map(ti->type); 2442 if (efx_tunnel_type < 0) 2443 return; 2444 2445 tnl.type = (u16)efx_tunnel_type; 2446 tnl.port = ti->port; 2447 2448 if (efx->type->udp_tnl_del_port) 2449 (void)efx->type->udp_tnl_del_port(efx, tnl); 2450 } 2451 2452 static const struct net_device_ops efx_netdev_ops = { 2453 .ndo_open = efx_net_open, 2454 .ndo_stop = efx_net_stop, 2455 .ndo_get_stats64 = efx_net_stats, 2456 .ndo_tx_timeout = efx_watchdog, 2457 .ndo_start_xmit = efx_hard_start_xmit, 2458 .ndo_validate_addr = eth_validate_addr, 2459 .ndo_do_ioctl = efx_ioctl, 2460 .ndo_change_mtu = efx_change_mtu, 2461 .ndo_set_mac_address = efx_set_mac_address, 2462 .ndo_set_rx_mode = efx_set_rx_mode, 2463 .ndo_set_features = efx_set_features, 2464 .ndo_vlan_rx_add_vid = efx_vlan_rx_add_vid, 2465 .ndo_vlan_rx_kill_vid = efx_vlan_rx_kill_vid, 2466 #ifdef CONFIG_SFC_SRIOV 2467 .ndo_set_vf_mac = efx_sriov_set_vf_mac, 2468 .ndo_set_vf_vlan = efx_sriov_set_vf_vlan, 2469 .ndo_set_vf_spoofchk = efx_sriov_set_vf_spoofchk, 2470 .ndo_get_vf_config = efx_sriov_get_vf_config, 2471 .ndo_set_vf_link_state = efx_sriov_set_vf_link_state, 2472 #endif 2473 .ndo_get_phys_port_id = efx_get_phys_port_id, 2474 .ndo_get_phys_port_name = efx_get_phys_port_name, 2475 #ifdef CONFIG_NET_POLL_CONTROLLER 2476 .ndo_poll_controller = efx_netpoll, 2477 #endif 2478 .ndo_setup_tc = efx_setup_tc, 2479 #ifdef CONFIG_RFS_ACCEL 2480 .ndo_rx_flow_steer = efx_filter_rfs, 2481 #endif 2482 .ndo_udp_tunnel_add = efx_udp_tunnel_add, 2483 .ndo_udp_tunnel_del = efx_udp_tunnel_del, 2484 }; 2485 2486 static void efx_update_name(struct efx_nic *efx) 2487 { 2488 strcpy(efx->name, efx->net_dev->name); 2489 efx_mtd_rename(efx); 2490 efx_set_channel_names(efx); 2491 } 2492 2493 static int efx_netdev_event(struct notifier_block *this, 2494 unsigned long event, void *ptr) 2495 { 2496 struct net_device *net_dev = netdev_notifier_info_to_dev(ptr); 2497 2498 if ((net_dev->netdev_ops == &efx_netdev_ops) && 2499 event == NETDEV_CHANGENAME) 2500 efx_update_name(netdev_priv(net_dev)); 2501 2502 return NOTIFY_DONE; 2503 } 2504 2505 static struct notifier_block efx_netdev_notifier = { 2506 .notifier_call = efx_netdev_event, 2507 }; 2508 2509 static ssize_t 2510 show_phy_type(struct device *dev, struct device_attribute *attr, char *buf) 2511 { 2512 struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev)); 2513 return sprintf(buf, "%d\n", efx->phy_type); 2514 } 2515 static DEVICE_ATTR(phy_type, 0444, show_phy_type, NULL); 2516 2517 #ifdef CONFIG_SFC_MCDI_LOGGING 2518 static ssize_t show_mcdi_log(struct device *dev, struct device_attribute *attr, 2519 char *buf) 2520 { 2521 struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev)); 2522 struct efx_mcdi_iface *mcdi = efx_mcdi(efx); 2523 2524 return scnprintf(buf, PAGE_SIZE, "%d\n", mcdi->logging_enabled); 2525 } 2526 static ssize_t set_mcdi_log(struct device *dev, struct device_attribute *attr, 2527 const char *buf, size_t count) 2528 { 2529 struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev)); 2530 struct efx_mcdi_iface *mcdi = efx_mcdi(efx); 2531 bool enable = count > 0 && *buf != '0'; 2532 2533 mcdi->logging_enabled = enable; 2534 return count; 2535 } 2536 static DEVICE_ATTR(mcdi_logging, 0644, show_mcdi_log, set_mcdi_log); 2537 #endif 2538 2539 static int efx_register_netdev(struct efx_nic *efx) 2540 { 2541 struct net_device *net_dev = efx->net_dev; 2542 struct efx_channel *channel; 2543 int rc; 2544 2545 net_dev->watchdog_timeo = 5 * HZ; 2546 net_dev->irq = efx->pci_dev->irq; 2547 net_dev->netdev_ops = &efx_netdev_ops; 2548 if (efx_nic_rev(efx) >= EFX_REV_HUNT_A0) 2549 net_dev->priv_flags |= IFF_UNICAST_FLT; 2550 net_dev->ethtool_ops = &efx_ethtool_ops; 2551 net_dev->gso_max_segs = EFX_TSO_MAX_SEGS; 2552 net_dev->min_mtu = EFX_MIN_MTU; 2553 net_dev->max_mtu = EFX_MAX_MTU; 2554 2555 rtnl_lock(); 2556 2557 /* Enable resets to be scheduled and check whether any were 2558 * already requested. If so, the NIC is probably hosed so we 2559 * abort. 2560 */ 2561 efx->state = STATE_READY; 2562 smp_mb(); /* ensure we change state before checking reset_pending */ 2563 if (efx->reset_pending) { 2564 netif_err(efx, probe, efx->net_dev, 2565 "aborting probe due to scheduled reset\n"); 2566 rc = -EIO; 2567 goto fail_locked; 2568 } 2569 2570 rc = dev_alloc_name(net_dev, net_dev->name); 2571 if (rc < 0) 2572 goto fail_locked; 2573 efx_update_name(efx); 2574 2575 /* Always start with carrier off; PHY events will detect the link */ 2576 netif_carrier_off(net_dev); 2577 2578 rc = register_netdevice(net_dev); 2579 if (rc) 2580 goto fail_locked; 2581 2582 efx_for_each_channel(channel, efx) { 2583 struct efx_tx_queue *tx_queue; 2584 efx_for_each_channel_tx_queue(tx_queue, channel) 2585 efx_init_tx_queue_core_txq(tx_queue); 2586 } 2587 2588 efx_associate(efx); 2589 2590 rtnl_unlock(); 2591 2592 rc = device_create_file(&efx->pci_dev->dev, &dev_attr_phy_type); 2593 if (rc) { 2594 netif_err(efx, drv, efx->net_dev, 2595 "failed to init net dev attributes\n"); 2596 goto fail_registered; 2597 } 2598 #ifdef CONFIG_SFC_MCDI_LOGGING 2599 rc = device_create_file(&efx->pci_dev->dev, &dev_attr_mcdi_logging); 2600 if (rc) { 2601 netif_err(efx, drv, efx->net_dev, 2602 "failed to init net dev attributes\n"); 2603 goto fail_attr_mcdi_logging; 2604 } 2605 #endif 2606 2607 return 0; 2608 2609 #ifdef CONFIG_SFC_MCDI_LOGGING 2610 fail_attr_mcdi_logging: 2611 device_remove_file(&efx->pci_dev->dev, &dev_attr_phy_type); 2612 #endif 2613 fail_registered: 2614 rtnl_lock(); 2615 efx_dissociate(efx); 2616 unregister_netdevice(net_dev); 2617 fail_locked: 2618 efx->state = STATE_UNINIT; 2619 rtnl_unlock(); 2620 netif_err(efx, drv, efx->net_dev, "could not register net dev\n"); 2621 return rc; 2622 } 2623 2624 static void efx_unregister_netdev(struct efx_nic *efx) 2625 { 2626 if (!efx->net_dev) 2627 return; 2628 2629 BUG_ON(netdev_priv(efx->net_dev) != efx); 2630 2631 if (efx_dev_registered(efx)) { 2632 strlcpy(efx->name, pci_name(efx->pci_dev), sizeof(efx->name)); 2633 #ifdef CONFIG_SFC_MCDI_LOGGING 2634 device_remove_file(&efx->pci_dev->dev, &dev_attr_mcdi_logging); 2635 #endif 2636 device_remove_file(&efx->pci_dev->dev, &dev_attr_phy_type); 2637 unregister_netdev(efx->net_dev); 2638 } 2639 } 2640 2641 /************************************************************************** 2642 * 2643 * Device reset and suspend 2644 * 2645 **************************************************************************/ 2646 2647 /* Tears down the entire software state and most of the hardware state 2648 * before reset. */ 2649 void efx_reset_down(struct efx_nic *efx, enum reset_type method) 2650 { 2651 EFX_ASSERT_RESET_SERIALISED(efx); 2652 2653 if (method == RESET_TYPE_MCDI_TIMEOUT) 2654 efx->type->prepare_flr(efx); 2655 2656 efx_stop_all(efx); 2657 efx_disable_interrupts(efx); 2658 2659 mutex_lock(&efx->mac_lock); 2660 mutex_lock(&efx->rss_lock); 2661 if (efx->port_initialized && method != RESET_TYPE_INVISIBLE && 2662 method != RESET_TYPE_DATAPATH) 2663 efx->phy_op->fini(efx); 2664 efx->type->fini(efx); 2665 } 2666 2667 /* This function will always ensure that the locks acquired in 2668 * efx_reset_down() are released. A failure return code indicates 2669 * that we were unable to reinitialise the hardware, and the 2670 * driver should be disabled. If ok is false, then the rx and tx 2671 * engines are not restarted, pending a RESET_DISABLE. */ 2672 int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok) 2673 { 2674 int rc; 2675 2676 EFX_ASSERT_RESET_SERIALISED(efx); 2677 2678 if (method == RESET_TYPE_MCDI_TIMEOUT) 2679 efx->type->finish_flr(efx); 2680 2681 /* Ensure that SRAM is initialised even if we're disabling the device */ 2682 rc = efx->type->init(efx); 2683 if (rc) { 2684 netif_err(efx, drv, efx->net_dev, "failed to initialise NIC\n"); 2685 goto fail; 2686 } 2687 2688 if (!ok) 2689 goto fail; 2690 2691 if (efx->port_initialized && method != RESET_TYPE_INVISIBLE && 2692 method != RESET_TYPE_DATAPATH) { 2693 rc = efx->phy_op->init(efx); 2694 if (rc) 2695 goto fail; 2696 rc = efx->phy_op->reconfigure(efx); 2697 if (rc && rc != -EPERM) 2698 netif_err(efx, drv, efx->net_dev, 2699 "could not restore PHY settings\n"); 2700 } 2701 2702 rc = efx_enable_interrupts(efx); 2703 if (rc) 2704 goto fail; 2705 2706 #ifdef CONFIG_SFC_SRIOV 2707 rc = efx->type->vswitching_restore(efx); 2708 if (rc) /* not fatal; the PF will still work fine */ 2709 netif_warn(efx, probe, efx->net_dev, 2710 "failed to restore vswitching rc=%d;" 2711 " VFs may not function\n", rc); 2712 #endif 2713 2714 if (efx->type->rx_restore_rss_contexts) 2715 efx->type->rx_restore_rss_contexts(efx); 2716 mutex_unlock(&efx->rss_lock); 2717 down_read(&efx->filter_sem); 2718 efx_restore_filters(efx); 2719 up_read(&efx->filter_sem); 2720 if (efx->type->sriov_reset) 2721 efx->type->sriov_reset(efx); 2722 2723 mutex_unlock(&efx->mac_lock); 2724 2725 efx_start_all(efx); 2726 2727 if (efx->type->udp_tnl_push_ports) 2728 efx->type->udp_tnl_push_ports(efx); 2729 2730 return 0; 2731 2732 fail: 2733 efx->port_initialized = false; 2734 2735 mutex_unlock(&efx->rss_lock); 2736 mutex_unlock(&efx->mac_lock); 2737 2738 return rc; 2739 } 2740 2741 /* Reset the NIC using the specified method. Note that the reset may 2742 * fail, in which case the card will be left in an unusable state. 2743 * 2744 * Caller must hold the rtnl_lock. 2745 */ 2746 int efx_reset(struct efx_nic *efx, enum reset_type method) 2747 { 2748 int rc, rc2; 2749 bool disabled; 2750 2751 netif_info(efx, drv, efx->net_dev, "resetting (%s)\n", 2752 RESET_TYPE(method)); 2753 2754 efx_device_detach_sync(efx); 2755 efx_reset_down(efx, method); 2756 2757 rc = efx->type->reset(efx, method); 2758 if (rc) { 2759 netif_err(efx, drv, efx->net_dev, "failed to reset hardware\n"); 2760 goto out; 2761 } 2762 2763 /* Clear flags for the scopes we covered. We assume the NIC and 2764 * driver are now quiescent so that there is no race here. 2765 */ 2766 if (method < RESET_TYPE_MAX_METHOD) 2767 efx->reset_pending &= -(1 << (method + 1)); 2768 else /* it doesn't fit into the well-ordered scope hierarchy */ 2769 __clear_bit(method, &efx->reset_pending); 2770 2771 /* Reinitialise bus-mastering, which may have been turned off before 2772 * the reset was scheduled. This is still appropriate, even in the 2773 * RESET_TYPE_DISABLE since this driver generally assumes the hardware 2774 * can respond to requests. */ 2775 pci_set_master(efx->pci_dev); 2776 2777 out: 2778 /* Leave device stopped if necessary */ 2779 disabled = rc || 2780 method == RESET_TYPE_DISABLE || 2781 method == RESET_TYPE_RECOVER_OR_DISABLE; 2782 rc2 = efx_reset_up(efx, method, !disabled); 2783 if (rc2) { 2784 disabled = true; 2785 if (!rc) 2786 rc = rc2; 2787 } 2788 2789 if (disabled) { 2790 dev_close(efx->net_dev); 2791 netif_err(efx, drv, efx->net_dev, "has been disabled\n"); 2792 efx->state = STATE_DISABLED; 2793 } else { 2794 netif_dbg(efx, drv, efx->net_dev, "reset complete\n"); 2795 efx_device_attach_if_not_resetting(efx); 2796 } 2797 return rc; 2798 } 2799 2800 /* Try recovery mechanisms. 2801 * For now only EEH is supported. 2802 * Returns 0 if the recovery mechanisms are unsuccessful. 2803 * Returns a non-zero value otherwise. 2804 */ 2805 int efx_try_recovery(struct efx_nic *efx) 2806 { 2807 #ifdef CONFIG_EEH 2808 /* A PCI error can occur and not be seen by EEH because nothing 2809 * happens on the PCI bus. In this case the driver may fail and 2810 * schedule a 'recover or reset', leading to this recovery handler. 2811 * Manually call the eeh failure check function. 2812 */ 2813 struct eeh_dev *eehdev = pci_dev_to_eeh_dev(efx->pci_dev); 2814 if (eeh_dev_check_failure(eehdev)) { 2815 /* The EEH mechanisms will handle the error and reset the 2816 * device if necessary. 2817 */ 2818 return 1; 2819 } 2820 #endif 2821 return 0; 2822 } 2823 2824 static void efx_wait_for_bist_end(struct efx_nic *efx) 2825 { 2826 int i; 2827 2828 for (i = 0; i < BIST_WAIT_DELAY_COUNT; ++i) { 2829 if (efx_mcdi_poll_reboot(efx)) 2830 goto out; 2831 msleep(BIST_WAIT_DELAY_MS); 2832 } 2833 2834 netif_err(efx, drv, efx->net_dev, "Warning: No MC reboot after BIST mode\n"); 2835 out: 2836 /* Either way unset the BIST flag. If we found no reboot we probably 2837 * won't recover, but we should try. 2838 */ 2839 efx->mc_bist_for_other_fn = false; 2840 } 2841 2842 /* The worker thread exists so that code that cannot sleep can 2843 * schedule a reset for later. 2844 */ 2845 static void efx_reset_work(struct work_struct *data) 2846 { 2847 struct efx_nic *efx = container_of(data, struct efx_nic, reset_work); 2848 unsigned long pending; 2849 enum reset_type method; 2850 2851 pending = READ_ONCE(efx->reset_pending); 2852 method = fls(pending) - 1; 2853 2854 if (method == RESET_TYPE_MC_BIST) 2855 efx_wait_for_bist_end(efx); 2856 2857 if ((method == RESET_TYPE_RECOVER_OR_DISABLE || 2858 method == RESET_TYPE_RECOVER_OR_ALL) && 2859 efx_try_recovery(efx)) 2860 return; 2861 2862 if (!pending) 2863 return; 2864 2865 rtnl_lock(); 2866 2867 /* We checked the state in efx_schedule_reset() but it may 2868 * have changed by now. Now that we have the RTNL lock, 2869 * it cannot change again. 2870 */ 2871 if (efx->state == STATE_READY) 2872 (void)efx_reset(efx, method); 2873 2874 rtnl_unlock(); 2875 } 2876 2877 void efx_schedule_reset(struct efx_nic *efx, enum reset_type type) 2878 { 2879 enum reset_type method; 2880 2881 if (efx->state == STATE_RECOVERY) { 2882 netif_dbg(efx, drv, efx->net_dev, 2883 "recovering: skip scheduling %s reset\n", 2884 RESET_TYPE(type)); 2885 return; 2886 } 2887 2888 switch (type) { 2889 case RESET_TYPE_INVISIBLE: 2890 case RESET_TYPE_ALL: 2891 case RESET_TYPE_RECOVER_OR_ALL: 2892 case RESET_TYPE_WORLD: 2893 case RESET_TYPE_DISABLE: 2894 case RESET_TYPE_RECOVER_OR_DISABLE: 2895 case RESET_TYPE_DATAPATH: 2896 case RESET_TYPE_MC_BIST: 2897 case RESET_TYPE_MCDI_TIMEOUT: 2898 method = type; 2899 netif_dbg(efx, drv, efx->net_dev, "scheduling %s reset\n", 2900 RESET_TYPE(method)); 2901 break; 2902 default: 2903 method = efx->type->map_reset_reason(type); 2904 netif_dbg(efx, drv, efx->net_dev, 2905 "scheduling %s reset for %s\n", 2906 RESET_TYPE(method), RESET_TYPE(type)); 2907 break; 2908 } 2909 2910 set_bit(method, &efx->reset_pending); 2911 smp_mb(); /* ensure we change reset_pending before checking state */ 2912 2913 /* If we're not READY then just leave the flags set as the cue 2914 * to abort probing or reschedule the reset later. 2915 */ 2916 if (READ_ONCE(efx->state) != STATE_READY) 2917 return; 2918 2919 /* efx_process_channel() will no longer read events once a 2920 * reset is scheduled. So switch back to poll'd MCDI completions. */ 2921 efx_mcdi_mode_poll(efx); 2922 2923 queue_work(reset_workqueue, &efx->reset_work); 2924 } 2925 2926 /************************************************************************** 2927 * 2928 * List of NICs we support 2929 * 2930 **************************************************************************/ 2931 2932 /* PCI device ID table */ 2933 static const struct pci_device_id efx_pci_table[] = { 2934 {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0803), /* SFC9020 */ 2935 .driver_data = (unsigned long) &siena_a0_nic_type}, 2936 {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0813), /* SFL9021 */ 2937 .driver_data = (unsigned long) &siena_a0_nic_type}, 2938 {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0903), /* SFC9120 PF */ 2939 .driver_data = (unsigned long) &efx_hunt_a0_nic_type}, 2940 {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x1903), /* SFC9120 VF */ 2941 .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type}, 2942 {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0923), /* SFC9140 PF */ 2943 .driver_data = (unsigned long) &efx_hunt_a0_nic_type}, 2944 {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x1923), /* SFC9140 VF */ 2945 .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type}, 2946 {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0a03), /* SFC9220 PF */ 2947 .driver_data = (unsigned long) &efx_hunt_a0_nic_type}, 2948 {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x1a03), /* SFC9220 VF */ 2949 .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type}, 2950 {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0b03), /* SFC9250 PF */ 2951 .driver_data = (unsigned long) &efx_hunt_a0_nic_type}, 2952 {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x1b03), /* SFC9250 VF */ 2953 .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type}, 2954 {0} /* end of list */ 2955 }; 2956 2957 /************************************************************************** 2958 * 2959 * Dummy PHY/MAC operations 2960 * 2961 * Can be used for some unimplemented operations 2962 * Needed so all function pointers are valid and do not have to be tested 2963 * before use 2964 * 2965 **************************************************************************/ 2966 int efx_port_dummy_op_int(struct efx_nic *efx) 2967 { 2968 return 0; 2969 } 2970 void efx_port_dummy_op_void(struct efx_nic *efx) {} 2971 2972 static bool efx_port_dummy_op_poll(struct efx_nic *efx) 2973 { 2974 return false; 2975 } 2976 2977 static const struct efx_phy_operations efx_dummy_phy_operations = { 2978 .init = efx_port_dummy_op_int, 2979 .reconfigure = efx_port_dummy_op_int, 2980 .poll = efx_port_dummy_op_poll, 2981 .fini = efx_port_dummy_op_void, 2982 }; 2983 2984 /************************************************************************** 2985 * 2986 * Data housekeeping 2987 * 2988 **************************************************************************/ 2989 2990 /* This zeroes out and then fills in the invariants in a struct 2991 * efx_nic (including all sub-structures). 2992 */ 2993 static int efx_init_struct(struct efx_nic *efx, 2994 struct pci_dev *pci_dev, struct net_device *net_dev) 2995 { 2996 int rc = -ENOMEM, i; 2997 2998 /* Initialise common structures */ 2999 INIT_LIST_HEAD(&efx->node); 3000 INIT_LIST_HEAD(&efx->secondary_list); 3001 spin_lock_init(&efx->biu_lock); 3002 #ifdef CONFIG_SFC_MTD 3003 INIT_LIST_HEAD(&efx->mtd_list); 3004 #endif 3005 INIT_WORK(&efx->reset_work, efx_reset_work); 3006 INIT_DELAYED_WORK(&efx->monitor_work, efx_monitor); 3007 INIT_DELAYED_WORK(&efx->selftest_work, efx_selftest_async_work); 3008 efx->pci_dev = pci_dev; 3009 efx->msg_enable = debug; 3010 efx->state = STATE_UNINIT; 3011 strlcpy(efx->name, pci_name(pci_dev), sizeof(efx->name)); 3012 3013 efx->net_dev = net_dev; 3014 efx->rx_prefix_size = efx->type->rx_prefix_size; 3015 efx->rx_ip_align = 3016 NET_IP_ALIGN ? (efx->rx_prefix_size + NET_IP_ALIGN) % 4 : 0; 3017 efx->rx_packet_hash_offset = 3018 efx->type->rx_hash_offset - efx->type->rx_prefix_size; 3019 efx->rx_packet_ts_offset = 3020 efx->type->rx_ts_offset - efx->type->rx_prefix_size; 3021 INIT_LIST_HEAD(&efx->rss_context.list); 3022 mutex_init(&efx->rss_lock); 3023 spin_lock_init(&efx->stats_lock); 3024 efx->vi_stride = EFX_DEFAULT_VI_STRIDE; 3025 efx->num_mac_stats = MC_CMD_MAC_NSTATS; 3026 BUILD_BUG_ON(MC_CMD_MAC_NSTATS - 1 != MC_CMD_MAC_GENERATION_END); 3027 mutex_init(&efx->mac_lock); 3028 #ifdef CONFIG_RFS_ACCEL 3029 mutex_init(&efx->rps_mutex); 3030 spin_lock_init(&efx->rps_hash_lock); 3031 /* Failure to allocate is not fatal, but may degrade ARFS performance */ 3032 efx->rps_hash_table = kcalloc(EFX_ARFS_HASH_TABLE_SIZE, 3033 sizeof(*efx->rps_hash_table), GFP_KERNEL); 3034 #endif 3035 efx->phy_op = &efx_dummy_phy_operations; 3036 efx->mdio.dev = net_dev; 3037 INIT_WORK(&efx->mac_work, efx_mac_work); 3038 init_waitqueue_head(&efx->flush_wq); 3039 3040 for (i = 0; i < EFX_MAX_CHANNELS; i++) { 3041 efx->channel[i] = efx_alloc_channel(efx, i, NULL); 3042 if (!efx->channel[i]) 3043 goto fail; 3044 efx->msi_context[i].efx = efx; 3045 efx->msi_context[i].index = i; 3046 } 3047 3048 /* Higher numbered interrupt modes are less capable! */ 3049 if (WARN_ON_ONCE(efx->type->max_interrupt_mode > 3050 efx->type->min_interrupt_mode)) { 3051 rc = -EIO; 3052 goto fail; 3053 } 3054 efx->interrupt_mode = max(efx->type->max_interrupt_mode, 3055 interrupt_mode); 3056 efx->interrupt_mode = min(efx->type->min_interrupt_mode, 3057 interrupt_mode); 3058 3059 /* Would be good to use the net_dev name, but we're too early */ 3060 snprintf(efx->workqueue_name, sizeof(efx->workqueue_name), "sfc%s", 3061 pci_name(pci_dev)); 3062 efx->workqueue = create_singlethread_workqueue(efx->workqueue_name); 3063 if (!efx->workqueue) 3064 goto fail; 3065 3066 return 0; 3067 3068 fail: 3069 efx_fini_struct(efx); 3070 return rc; 3071 } 3072 3073 static void efx_fini_struct(struct efx_nic *efx) 3074 { 3075 int i; 3076 3077 #ifdef CONFIG_RFS_ACCEL 3078 kfree(efx->rps_hash_table); 3079 #endif 3080 3081 for (i = 0; i < EFX_MAX_CHANNELS; i++) 3082 kfree(efx->channel[i]); 3083 3084 kfree(efx->vpd_sn); 3085 3086 if (efx->workqueue) { 3087 destroy_workqueue(efx->workqueue); 3088 efx->workqueue = NULL; 3089 } 3090 } 3091 3092 void efx_update_sw_stats(struct efx_nic *efx, u64 *stats) 3093 { 3094 u64 n_rx_nodesc_trunc = 0; 3095 struct efx_channel *channel; 3096 3097 efx_for_each_channel(channel, efx) 3098 n_rx_nodesc_trunc += channel->n_rx_nodesc_trunc; 3099 stats[GENERIC_STAT_rx_nodesc_trunc] = n_rx_nodesc_trunc; 3100 stats[GENERIC_STAT_rx_noskb_drops] = atomic_read(&efx->n_rx_noskb_drops); 3101 } 3102 3103 bool efx_filter_spec_equal(const struct efx_filter_spec *left, 3104 const struct efx_filter_spec *right) 3105 { 3106 if ((left->match_flags ^ right->match_flags) | 3107 ((left->flags ^ right->flags) & 3108 (EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_TX))) 3109 return false; 3110 3111 return memcmp(&left->outer_vid, &right->outer_vid, 3112 sizeof(struct efx_filter_spec) - 3113 offsetof(struct efx_filter_spec, outer_vid)) == 0; 3114 } 3115 3116 u32 efx_filter_spec_hash(const struct efx_filter_spec *spec) 3117 { 3118 BUILD_BUG_ON(offsetof(struct efx_filter_spec, outer_vid) & 3); 3119 return jhash2((const u32 *)&spec->outer_vid, 3120 (sizeof(struct efx_filter_spec) - 3121 offsetof(struct efx_filter_spec, outer_vid)) / 4, 3122 0); 3123 } 3124 3125 #ifdef CONFIG_RFS_ACCEL 3126 bool efx_rps_check_rule(struct efx_arfs_rule *rule, unsigned int filter_idx, 3127 bool *force) 3128 { 3129 if (rule->filter_id == EFX_ARFS_FILTER_ID_PENDING) { 3130 /* ARFS is currently updating this entry, leave it */ 3131 return false; 3132 } 3133 if (rule->filter_id == EFX_ARFS_FILTER_ID_ERROR) { 3134 /* ARFS tried and failed to update this, so it's probably out 3135 * of date. Remove the filter and the ARFS rule entry. 3136 */ 3137 rule->filter_id = EFX_ARFS_FILTER_ID_REMOVING; 3138 *force = true; 3139 return true; 3140 } else if (WARN_ON(rule->filter_id != filter_idx)) { /* can't happen */ 3141 /* ARFS has moved on, so old filter is not needed. Since we did 3142 * not mark the rule with EFX_ARFS_FILTER_ID_REMOVING, it will 3143 * not be removed by efx_rps_hash_del() subsequently. 3144 */ 3145 *force = true; 3146 return true; 3147 } 3148 /* Remove it iff ARFS wants to. */ 3149 return true; 3150 } 3151 3152 struct hlist_head *efx_rps_hash_bucket(struct efx_nic *efx, 3153 const struct efx_filter_spec *spec) 3154 { 3155 u32 hash = efx_filter_spec_hash(spec); 3156 3157 WARN_ON(!spin_is_locked(&efx->rps_hash_lock)); 3158 if (!efx->rps_hash_table) 3159 return NULL; 3160 return &efx->rps_hash_table[hash % EFX_ARFS_HASH_TABLE_SIZE]; 3161 } 3162 3163 struct efx_arfs_rule *efx_rps_hash_find(struct efx_nic *efx, 3164 const struct efx_filter_spec *spec) 3165 { 3166 struct efx_arfs_rule *rule; 3167 struct hlist_head *head; 3168 struct hlist_node *node; 3169 3170 head = efx_rps_hash_bucket(efx, spec); 3171 if (!head) 3172 return NULL; 3173 hlist_for_each(node, head) { 3174 rule = container_of(node, struct efx_arfs_rule, node); 3175 if (efx_filter_spec_equal(spec, &rule->spec)) 3176 return rule; 3177 } 3178 return NULL; 3179 } 3180 3181 struct efx_arfs_rule *efx_rps_hash_add(struct efx_nic *efx, 3182 const struct efx_filter_spec *spec, 3183 bool *new) 3184 { 3185 struct efx_arfs_rule *rule; 3186 struct hlist_head *head; 3187 struct hlist_node *node; 3188 3189 head = efx_rps_hash_bucket(efx, spec); 3190 if (!head) 3191 return NULL; 3192 hlist_for_each(node, head) { 3193 rule = container_of(node, struct efx_arfs_rule, node); 3194 if (efx_filter_spec_equal(spec, &rule->spec)) { 3195 *new = false; 3196 return rule; 3197 } 3198 } 3199 rule = kmalloc(sizeof(*rule), GFP_ATOMIC); 3200 *new = true; 3201 if (rule) { 3202 memcpy(&rule->spec, spec, sizeof(rule->spec)); 3203 hlist_add_head(&rule->node, head); 3204 } 3205 return rule; 3206 } 3207 3208 void efx_rps_hash_del(struct efx_nic *efx, const struct efx_filter_spec *spec) 3209 { 3210 struct efx_arfs_rule *rule; 3211 struct hlist_head *head; 3212 struct hlist_node *node; 3213 3214 head = efx_rps_hash_bucket(efx, spec); 3215 if (WARN_ON(!head)) 3216 return; 3217 hlist_for_each(node, head) { 3218 rule = container_of(node, struct efx_arfs_rule, node); 3219 if (efx_filter_spec_equal(spec, &rule->spec)) { 3220 /* Someone already reused the entry. We know that if 3221 * this check doesn't fire (i.e. filter_id == REMOVING) 3222 * then the REMOVING mark was put there by our caller, 3223 * because caller is holding a lock on filter table and 3224 * only holders of that lock set REMOVING. 3225 */ 3226 if (rule->filter_id != EFX_ARFS_FILTER_ID_REMOVING) 3227 return; 3228 hlist_del(node); 3229 kfree(rule); 3230 return; 3231 } 3232 } 3233 /* We didn't find it. */ 3234 WARN_ON(1); 3235 } 3236 #endif 3237 3238 /* RSS contexts. We're using linked lists and crappy O(n) algorithms, because 3239 * (a) this is an infrequent control-plane operation and (b) n is small (max 64) 3240 */ 3241 struct efx_rss_context *efx_alloc_rss_context_entry(struct efx_nic *efx) 3242 { 3243 struct list_head *head = &efx->rss_context.list; 3244 struct efx_rss_context *ctx, *new; 3245 u32 id = 1; /* Don't use zero, that refers to the master RSS context */ 3246 3247 WARN_ON(!mutex_is_locked(&efx->rss_lock)); 3248 3249 /* Search for first gap in the numbering */ 3250 list_for_each_entry(ctx, head, list) { 3251 if (ctx->user_id != id) 3252 break; 3253 id++; 3254 /* Check for wrap. If this happens, we have nearly 2^32 3255 * allocated RSS contexts, which seems unlikely. 3256 */ 3257 if (WARN_ON_ONCE(!id)) 3258 return NULL; 3259 } 3260 3261 /* Create the new entry */ 3262 new = kmalloc(sizeof(struct efx_rss_context), GFP_KERNEL); 3263 if (!new) 3264 return NULL; 3265 new->context_id = EFX_EF10_RSS_CONTEXT_INVALID; 3266 new->rx_hash_udp_4tuple = false; 3267 3268 /* Insert the new entry into the gap */ 3269 new->user_id = id; 3270 list_add_tail(&new->list, &ctx->list); 3271 return new; 3272 } 3273 3274 struct efx_rss_context *efx_find_rss_context_entry(struct efx_nic *efx, u32 id) 3275 { 3276 struct list_head *head = &efx->rss_context.list; 3277 struct efx_rss_context *ctx; 3278 3279 WARN_ON(!mutex_is_locked(&efx->rss_lock)); 3280 3281 list_for_each_entry(ctx, head, list) 3282 if (ctx->user_id == id) 3283 return ctx; 3284 return NULL; 3285 } 3286 3287 void efx_free_rss_context_entry(struct efx_rss_context *ctx) 3288 { 3289 list_del(&ctx->list); 3290 kfree(ctx); 3291 } 3292 3293 /************************************************************************** 3294 * 3295 * PCI interface 3296 * 3297 **************************************************************************/ 3298 3299 /* Main body of final NIC shutdown code 3300 * This is called only at module unload (or hotplug removal). 3301 */ 3302 static void efx_pci_remove_main(struct efx_nic *efx) 3303 { 3304 /* Flush reset_work. It can no longer be scheduled since we 3305 * are not READY. 3306 */ 3307 BUG_ON(efx->state == STATE_READY); 3308 cancel_work_sync(&efx->reset_work); 3309 3310 efx_disable_interrupts(efx); 3311 efx_nic_fini_interrupt(efx); 3312 efx_fini_port(efx); 3313 efx->type->fini(efx); 3314 efx_fini_napi(efx); 3315 efx_remove_all(efx); 3316 } 3317 3318 /* Final NIC shutdown 3319 * This is called only at module unload (or hotplug removal). A PF can call 3320 * this on its VFs to ensure they are unbound first. 3321 */ 3322 static void efx_pci_remove(struct pci_dev *pci_dev) 3323 { 3324 struct efx_nic *efx; 3325 3326 efx = pci_get_drvdata(pci_dev); 3327 if (!efx) 3328 return; 3329 3330 /* Mark the NIC as fini, then stop the interface */ 3331 rtnl_lock(); 3332 efx_dissociate(efx); 3333 dev_close(efx->net_dev); 3334 efx_disable_interrupts(efx); 3335 efx->state = STATE_UNINIT; 3336 rtnl_unlock(); 3337 3338 if (efx->type->sriov_fini) 3339 efx->type->sriov_fini(efx); 3340 3341 efx_unregister_netdev(efx); 3342 3343 efx_mtd_remove(efx); 3344 3345 efx_pci_remove_main(efx); 3346 3347 efx_fini_io(efx); 3348 netif_dbg(efx, drv, efx->net_dev, "shutdown successful\n"); 3349 3350 efx_fini_struct(efx); 3351 free_netdev(efx->net_dev); 3352 3353 pci_disable_pcie_error_reporting(pci_dev); 3354 }; 3355 3356 /* NIC VPD information 3357 * Called during probe to display the part number of the 3358 * installed NIC. VPD is potentially very large but this should 3359 * always appear within the first 512 bytes. 3360 */ 3361 #define SFC_VPD_LEN 512 3362 static void efx_probe_vpd_strings(struct efx_nic *efx) 3363 { 3364 struct pci_dev *dev = efx->pci_dev; 3365 char vpd_data[SFC_VPD_LEN]; 3366 ssize_t vpd_size; 3367 int ro_start, ro_size, i, j; 3368 3369 /* Get the vpd data from the device */ 3370 vpd_size = pci_read_vpd(dev, 0, sizeof(vpd_data), vpd_data); 3371 if (vpd_size <= 0) { 3372 netif_err(efx, drv, efx->net_dev, "Unable to read VPD\n"); 3373 return; 3374 } 3375 3376 /* Get the Read only section */ 3377 ro_start = pci_vpd_find_tag(vpd_data, 0, vpd_size, PCI_VPD_LRDT_RO_DATA); 3378 if (ro_start < 0) { 3379 netif_err(efx, drv, efx->net_dev, "VPD Read-only not found\n"); 3380 return; 3381 } 3382 3383 ro_size = pci_vpd_lrdt_size(&vpd_data[ro_start]); 3384 j = ro_size; 3385 i = ro_start + PCI_VPD_LRDT_TAG_SIZE; 3386 if (i + j > vpd_size) 3387 j = vpd_size - i; 3388 3389 /* Get the Part number */ 3390 i = pci_vpd_find_info_keyword(vpd_data, i, j, "PN"); 3391 if (i < 0) { 3392 netif_err(efx, drv, efx->net_dev, "Part number not found\n"); 3393 return; 3394 } 3395 3396 j = pci_vpd_info_field_size(&vpd_data[i]); 3397 i += PCI_VPD_INFO_FLD_HDR_SIZE; 3398 if (i + j > vpd_size) { 3399 netif_err(efx, drv, efx->net_dev, "Incomplete part number\n"); 3400 return; 3401 } 3402 3403 netif_info(efx, drv, efx->net_dev, 3404 "Part Number : %.*s\n", j, &vpd_data[i]); 3405 3406 i = ro_start + PCI_VPD_LRDT_TAG_SIZE; 3407 j = ro_size; 3408 i = pci_vpd_find_info_keyword(vpd_data, i, j, "SN"); 3409 if (i < 0) { 3410 netif_err(efx, drv, efx->net_dev, "Serial number not found\n"); 3411 return; 3412 } 3413 3414 j = pci_vpd_info_field_size(&vpd_data[i]); 3415 i += PCI_VPD_INFO_FLD_HDR_SIZE; 3416 if (i + j > vpd_size) { 3417 netif_err(efx, drv, efx->net_dev, "Incomplete serial number\n"); 3418 return; 3419 } 3420 3421 efx->vpd_sn = kmalloc(j + 1, GFP_KERNEL); 3422 if (!efx->vpd_sn) 3423 return; 3424 3425 snprintf(efx->vpd_sn, j + 1, "%s", &vpd_data[i]); 3426 } 3427 3428 3429 /* Main body of NIC initialisation 3430 * This is called at module load (or hotplug insertion, theoretically). 3431 */ 3432 static int efx_pci_probe_main(struct efx_nic *efx) 3433 { 3434 int rc; 3435 3436 /* Do start-of-day initialisation */ 3437 rc = efx_probe_all(efx); 3438 if (rc) 3439 goto fail1; 3440 3441 efx_init_napi(efx); 3442 3443 rc = efx->type->init(efx); 3444 if (rc) { 3445 netif_err(efx, probe, efx->net_dev, 3446 "failed to initialise NIC\n"); 3447 goto fail3; 3448 } 3449 3450 rc = efx_init_port(efx); 3451 if (rc) { 3452 netif_err(efx, probe, efx->net_dev, 3453 "failed to initialise port\n"); 3454 goto fail4; 3455 } 3456 3457 rc = efx_nic_init_interrupt(efx); 3458 if (rc) 3459 goto fail5; 3460 rc = efx_enable_interrupts(efx); 3461 if (rc) 3462 goto fail6; 3463 3464 return 0; 3465 3466 fail6: 3467 efx_nic_fini_interrupt(efx); 3468 fail5: 3469 efx_fini_port(efx); 3470 fail4: 3471 efx->type->fini(efx); 3472 fail3: 3473 efx_fini_napi(efx); 3474 efx_remove_all(efx); 3475 fail1: 3476 return rc; 3477 } 3478 3479 static int efx_pci_probe_post_io(struct efx_nic *efx) 3480 { 3481 struct net_device *net_dev = efx->net_dev; 3482 int rc = efx_pci_probe_main(efx); 3483 3484 if (rc) 3485 return rc; 3486 3487 if (efx->type->sriov_init) { 3488 rc = efx->type->sriov_init(efx); 3489 if (rc) 3490 netif_err(efx, probe, efx->net_dev, 3491 "SR-IOV can't be enabled rc %d\n", rc); 3492 } 3493 3494 /* Determine netdevice features */ 3495 net_dev->features |= (efx->type->offload_features | NETIF_F_SG | 3496 NETIF_F_TSO | NETIF_F_RXCSUM | NETIF_F_RXALL); 3497 if (efx->type->offload_features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM)) 3498 net_dev->features |= NETIF_F_TSO6; 3499 /* Check whether device supports TSO */ 3500 if (!efx->type->tso_versions || !efx->type->tso_versions(efx)) 3501 net_dev->features &= ~NETIF_F_ALL_TSO; 3502 /* Mask for features that also apply to VLAN devices */ 3503 net_dev->vlan_features |= (NETIF_F_HW_CSUM | NETIF_F_SG | 3504 NETIF_F_HIGHDMA | NETIF_F_ALL_TSO | 3505 NETIF_F_RXCSUM); 3506 3507 net_dev->hw_features |= net_dev->features & ~efx->fixed_features; 3508 3509 /* Disable receiving frames with bad FCS, by default. */ 3510 net_dev->features &= ~NETIF_F_RXALL; 3511 3512 /* Disable VLAN filtering by default. It may be enforced if 3513 * the feature is fixed (i.e. VLAN filters are required to 3514 * receive VLAN tagged packets due to vPort restrictions). 3515 */ 3516 net_dev->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER; 3517 net_dev->features |= efx->fixed_features; 3518 3519 rc = efx_register_netdev(efx); 3520 if (!rc) 3521 return 0; 3522 3523 efx_pci_remove_main(efx); 3524 return rc; 3525 } 3526 3527 /* NIC initialisation 3528 * 3529 * This is called at module load (or hotplug insertion, 3530 * theoretically). It sets up PCI mappings, resets the NIC, 3531 * sets up and registers the network devices with the kernel and hooks 3532 * the interrupt service routine. It does not prepare the device for 3533 * transmission; this is left to the first time one of the network 3534 * interfaces is brought up (i.e. efx_net_open). 3535 */ 3536 static int efx_pci_probe(struct pci_dev *pci_dev, 3537 const struct pci_device_id *entry) 3538 { 3539 struct net_device *net_dev; 3540 struct efx_nic *efx; 3541 int rc; 3542 3543 /* Allocate and initialise a struct net_device and struct efx_nic */ 3544 net_dev = alloc_etherdev_mqs(sizeof(*efx), EFX_MAX_CORE_TX_QUEUES, 3545 EFX_MAX_RX_QUEUES); 3546 if (!net_dev) 3547 return -ENOMEM; 3548 efx = netdev_priv(net_dev); 3549 efx->type = (const struct efx_nic_type *) entry->driver_data; 3550 efx->fixed_features |= NETIF_F_HIGHDMA; 3551 3552 pci_set_drvdata(pci_dev, efx); 3553 SET_NETDEV_DEV(net_dev, &pci_dev->dev); 3554 rc = efx_init_struct(efx, pci_dev, net_dev); 3555 if (rc) 3556 goto fail1; 3557 3558 netif_info(efx, probe, efx->net_dev, 3559 "Solarflare NIC detected\n"); 3560 3561 if (!efx->type->is_vf) 3562 efx_probe_vpd_strings(efx); 3563 3564 /* Set up basic I/O (BAR mappings etc) */ 3565 rc = efx_init_io(efx); 3566 if (rc) 3567 goto fail2; 3568 3569 rc = efx_pci_probe_post_io(efx); 3570 if (rc) { 3571 /* On failure, retry once immediately. 3572 * If we aborted probe due to a scheduled reset, dismiss it. 3573 */ 3574 efx->reset_pending = 0; 3575 rc = efx_pci_probe_post_io(efx); 3576 if (rc) { 3577 /* On another failure, retry once more 3578 * after a 50-305ms delay. 3579 */ 3580 unsigned char r; 3581 3582 get_random_bytes(&r, 1); 3583 msleep((unsigned int)r + 50); 3584 efx->reset_pending = 0; 3585 rc = efx_pci_probe_post_io(efx); 3586 } 3587 } 3588 if (rc) 3589 goto fail3; 3590 3591 netif_dbg(efx, probe, efx->net_dev, "initialisation successful\n"); 3592 3593 /* Try to create MTDs, but allow this to fail */ 3594 rtnl_lock(); 3595 rc = efx_mtd_probe(efx); 3596 rtnl_unlock(); 3597 if (rc && rc != -EPERM) 3598 netif_warn(efx, probe, efx->net_dev, 3599 "failed to create MTDs (%d)\n", rc); 3600 3601 rc = pci_enable_pcie_error_reporting(pci_dev); 3602 if (rc && rc != -EINVAL) 3603 netif_notice(efx, probe, efx->net_dev, 3604 "PCIE error reporting unavailable (%d).\n", 3605 rc); 3606 3607 if (efx->type->udp_tnl_push_ports) 3608 efx->type->udp_tnl_push_ports(efx); 3609 3610 return 0; 3611 3612 fail3: 3613 efx_fini_io(efx); 3614 fail2: 3615 efx_fini_struct(efx); 3616 fail1: 3617 WARN_ON(rc > 0); 3618 netif_dbg(efx, drv, efx->net_dev, "initialisation failed. rc=%d\n", rc); 3619 free_netdev(net_dev); 3620 return rc; 3621 } 3622 3623 /* efx_pci_sriov_configure returns the actual number of Virtual Functions 3624 * enabled on success 3625 */ 3626 #ifdef CONFIG_SFC_SRIOV 3627 static int efx_pci_sriov_configure(struct pci_dev *dev, int num_vfs) 3628 { 3629 int rc; 3630 struct efx_nic *efx = pci_get_drvdata(dev); 3631 3632 if (efx->type->sriov_configure) { 3633 rc = efx->type->sriov_configure(efx, num_vfs); 3634 if (rc) 3635 return rc; 3636 else 3637 return num_vfs; 3638 } else 3639 return -EOPNOTSUPP; 3640 } 3641 #endif 3642 3643 static int efx_pm_freeze(struct device *dev) 3644 { 3645 struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev)); 3646 3647 rtnl_lock(); 3648 3649 if (efx->state != STATE_DISABLED) { 3650 efx->state = STATE_UNINIT; 3651 3652 efx_device_detach_sync(efx); 3653 3654 efx_stop_all(efx); 3655 efx_disable_interrupts(efx); 3656 } 3657 3658 rtnl_unlock(); 3659 3660 return 0; 3661 } 3662 3663 static int efx_pm_thaw(struct device *dev) 3664 { 3665 int rc; 3666 struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev)); 3667 3668 rtnl_lock(); 3669 3670 if (efx->state != STATE_DISABLED) { 3671 rc = efx_enable_interrupts(efx); 3672 if (rc) 3673 goto fail; 3674 3675 mutex_lock(&efx->mac_lock); 3676 efx->phy_op->reconfigure(efx); 3677 mutex_unlock(&efx->mac_lock); 3678 3679 efx_start_all(efx); 3680 3681 efx_device_attach_if_not_resetting(efx); 3682 3683 efx->state = STATE_READY; 3684 3685 efx->type->resume_wol(efx); 3686 } 3687 3688 rtnl_unlock(); 3689 3690 /* Reschedule any quenched resets scheduled during efx_pm_freeze() */ 3691 queue_work(reset_workqueue, &efx->reset_work); 3692 3693 return 0; 3694 3695 fail: 3696 rtnl_unlock(); 3697 3698 return rc; 3699 } 3700 3701 static int efx_pm_poweroff(struct device *dev) 3702 { 3703 struct pci_dev *pci_dev = to_pci_dev(dev); 3704 struct efx_nic *efx = pci_get_drvdata(pci_dev); 3705 3706 efx->type->fini(efx); 3707 3708 efx->reset_pending = 0; 3709 3710 pci_save_state(pci_dev); 3711 return pci_set_power_state(pci_dev, PCI_D3hot); 3712 } 3713 3714 /* Used for both resume and restore */ 3715 static int efx_pm_resume(struct device *dev) 3716 { 3717 struct pci_dev *pci_dev = to_pci_dev(dev); 3718 struct efx_nic *efx = pci_get_drvdata(pci_dev); 3719 int rc; 3720 3721 rc = pci_set_power_state(pci_dev, PCI_D0); 3722 if (rc) 3723 return rc; 3724 pci_restore_state(pci_dev); 3725 rc = pci_enable_device(pci_dev); 3726 if (rc) 3727 return rc; 3728 pci_set_master(efx->pci_dev); 3729 rc = efx->type->reset(efx, RESET_TYPE_ALL); 3730 if (rc) 3731 return rc; 3732 rc = efx->type->init(efx); 3733 if (rc) 3734 return rc; 3735 rc = efx_pm_thaw(dev); 3736 return rc; 3737 } 3738 3739 static int efx_pm_suspend(struct device *dev) 3740 { 3741 int rc; 3742 3743 efx_pm_freeze(dev); 3744 rc = efx_pm_poweroff(dev); 3745 if (rc) 3746 efx_pm_resume(dev); 3747 return rc; 3748 } 3749 3750 static const struct dev_pm_ops efx_pm_ops = { 3751 .suspend = efx_pm_suspend, 3752 .resume = efx_pm_resume, 3753 .freeze = efx_pm_freeze, 3754 .thaw = efx_pm_thaw, 3755 .poweroff = efx_pm_poweroff, 3756 .restore = efx_pm_resume, 3757 }; 3758 3759 /* A PCI error affecting this device was detected. 3760 * At this point MMIO and DMA may be disabled. 3761 * Stop the software path and request a slot reset. 3762 */ 3763 static pci_ers_result_t efx_io_error_detected(struct pci_dev *pdev, 3764 enum pci_channel_state state) 3765 { 3766 pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED; 3767 struct efx_nic *efx = pci_get_drvdata(pdev); 3768 3769 if (state == pci_channel_io_perm_failure) 3770 return PCI_ERS_RESULT_DISCONNECT; 3771 3772 rtnl_lock(); 3773 3774 if (efx->state != STATE_DISABLED) { 3775 efx->state = STATE_RECOVERY; 3776 efx->reset_pending = 0; 3777 3778 efx_device_detach_sync(efx); 3779 3780 efx_stop_all(efx); 3781 efx_disable_interrupts(efx); 3782 3783 status = PCI_ERS_RESULT_NEED_RESET; 3784 } else { 3785 /* If the interface is disabled we don't want to do anything 3786 * with it. 3787 */ 3788 status = PCI_ERS_RESULT_RECOVERED; 3789 } 3790 3791 rtnl_unlock(); 3792 3793 pci_disable_device(pdev); 3794 3795 return status; 3796 } 3797 3798 /* Fake a successful reset, which will be performed later in efx_io_resume. */ 3799 static pci_ers_result_t efx_io_slot_reset(struct pci_dev *pdev) 3800 { 3801 struct efx_nic *efx = pci_get_drvdata(pdev); 3802 pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED; 3803 int rc; 3804 3805 if (pci_enable_device(pdev)) { 3806 netif_err(efx, hw, efx->net_dev, 3807 "Cannot re-enable PCI device after reset.\n"); 3808 status = PCI_ERS_RESULT_DISCONNECT; 3809 } 3810 3811 rc = pci_cleanup_aer_uncorrect_error_status(pdev); 3812 if (rc) { 3813 netif_err(efx, hw, efx->net_dev, 3814 "pci_cleanup_aer_uncorrect_error_status failed (%d)\n", rc); 3815 /* Non-fatal error. Continue. */ 3816 } 3817 3818 return status; 3819 } 3820 3821 /* Perform the actual reset and resume I/O operations. */ 3822 static void efx_io_resume(struct pci_dev *pdev) 3823 { 3824 struct efx_nic *efx = pci_get_drvdata(pdev); 3825 int rc; 3826 3827 rtnl_lock(); 3828 3829 if (efx->state == STATE_DISABLED) 3830 goto out; 3831 3832 rc = efx_reset(efx, RESET_TYPE_ALL); 3833 if (rc) { 3834 netif_err(efx, hw, efx->net_dev, 3835 "efx_reset failed after PCI error (%d)\n", rc); 3836 } else { 3837 efx->state = STATE_READY; 3838 netif_dbg(efx, hw, efx->net_dev, 3839 "Done resetting and resuming IO after PCI error.\n"); 3840 } 3841 3842 out: 3843 rtnl_unlock(); 3844 } 3845 3846 /* For simplicity and reliability, we always require a slot reset and try to 3847 * reset the hardware when a pci error affecting the device is detected. 3848 * We leave both the link_reset and mmio_enabled callback unimplemented: 3849 * with our request for slot reset the mmio_enabled callback will never be 3850 * called, and the link_reset callback is not used by AER or EEH mechanisms. 3851 */ 3852 static const struct pci_error_handlers efx_err_handlers = { 3853 .error_detected = efx_io_error_detected, 3854 .slot_reset = efx_io_slot_reset, 3855 .resume = efx_io_resume, 3856 }; 3857 3858 static struct pci_driver efx_pci_driver = { 3859 .name = KBUILD_MODNAME, 3860 .id_table = efx_pci_table, 3861 .probe = efx_pci_probe, 3862 .remove = efx_pci_remove, 3863 .driver.pm = &efx_pm_ops, 3864 .err_handler = &efx_err_handlers, 3865 #ifdef CONFIG_SFC_SRIOV 3866 .sriov_configure = efx_pci_sriov_configure, 3867 #endif 3868 }; 3869 3870 /************************************************************************** 3871 * 3872 * Kernel module interface 3873 * 3874 *************************************************************************/ 3875 3876 module_param(interrupt_mode, uint, 0444); 3877 MODULE_PARM_DESC(interrupt_mode, 3878 "Interrupt mode (0=>MSIX 1=>MSI 2=>legacy)"); 3879 3880 static int __init efx_init_module(void) 3881 { 3882 int rc; 3883 3884 printk(KERN_INFO "Solarflare NET driver v" EFX_DRIVER_VERSION "\n"); 3885 3886 rc = register_netdevice_notifier(&efx_netdev_notifier); 3887 if (rc) 3888 goto err_notifier; 3889 3890 #ifdef CONFIG_SFC_SRIOV 3891 rc = efx_init_sriov(); 3892 if (rc) 3893 goto err_sriov; 3894 #endif 3895 3896 reset_workqueue = create_singlethread_workqueue("sfc_reset"); 3897 if (!reset_workqueue) { 3898 rc = -ENOMEM; 3899 goto err_reset; 3900 } 3901 3902 rc = pci_register_driver(&efx_pci_driver); 3903 if (rc < 0) 3904 goto err_pci; 3905 3906 return 0; 3907 3908 err_pci: 3909 destroy_workqueue(reset_workqueue); 3910 err_reset: 3911 #ifdef CONFIG_SFC_SRIOV 3912 efx_fini_sriov(); 3913 err_sriov: 3914 #endif 3915 unregister_netdevice_notifier(&efx_netdev_notifier); 3916 err_notifier: 3917 return rc; 3918 } 3919 3920 static void __exit efx_exit_module(void) 3921 { 3922 printk(KERN_INFO "Solarflare NET driver unloading\n"); 3923 3924 pci_unregister_driver(&efx_pci_driver); 3925 destroy_workqueue(reset_workqueue); 3926 #ifdef CONFIG_SFC_SRIOV 3927 efx_fini_sriov(); 3928 #endif 3929 unregister_netdevice_notifier(&efx_netdev_notifier); 3930 3931 } 3932 3933 module_init(efx_init_module); 3934 module_exit(efx_exit_module); 3935 3936 MODULE_AUTHOR("Solarflare Communications and " 3937 "Michael Brown <mbrown@fensystems.co.uk>"); 3938 MODULE_DESCRIPTION("Solarflare network driver"); 3939 MODULE_LICENSE("GPL"); 3940 MODULE_DEVICE_TABLE(pci, efx_pci_table); 3941 MODULE_VERSION(EFX_DRIVER_VERSION); 3942