1 // SPDX-License-Identifier: GPL-2.0-only 2 /**************************************************************************** 3 * Driver for Solarflare network controllers and boards 4 * Copyright 2005-2006 Fen Systems Ltd. 5 * Copyright 2005-2013 Solarflare Communications Inc. 6 */ 7 8 #include <linux/module.h> 9 #include <linux/pci.h> 10 #include <linux/netdevice.h> 11 #include <linux/etherdevice.h> 12 #include <linux/delay.h> 13 #include <linux/notifier.h> 14 #include <linux/ip.h> 15 #include <linux/tcp.h> 16 #include <linux/in.h> 17 #include <linux/ethtool.h> 18 #include <linux/topology.h> 19 #include <linux/gfp.h> 20 #include <linux/aer.h> 21 #include <linux/interrupt.h> 22 #include "net_driver.h" 23 #include <net/gre.h> 24 #include <net/udp_tunnel.h> 25 #include "efx.h" 26 #include "nic.h" 27 #include "io.h" 28 #include "selftest.h" 29 #include "sriov.h" 30 31 #include "mcdi.h" 32 #include "mcdi_pcol.h" 33 #include "workarounds.h" 34 35 /************************************************************************** 36 * 37 * Type name strings 38 * 39 ************************************************************************** 40 */ 41 42 /* Loopback mode names (see LOOPBACK_MODE()) */ 43 const unsigned int efx_loopback_mode_max = LOOPBACK_MAX; 44 const char *const efx_loopback_mode_names[] = { 45 [LOOPBACK_NONE] = "NONE", 46 [LOOPBACK_DATA] = "DATAPATH", 47 [LOOPBACK_GMAC] = "GMAC", 48 [LOOPBACK_XGMII] = "XGMII", 49 [LOOPBACK_XGXS] = "XGXS", 50 [LOOPBACK_XAUI] = "XAUI", 51 [LOOPBACK_GMII] = "GMII", 52 [LOOPBACK_SGMII] = "SGMII", 53 [LOOPBACK_XGBR] = "XGBR", 54 [LOOPBACK_XFI] = "XFI", 55 [LOOPBACK_XAUI_FAR] = "XAUI_FAR", 56 [LOOPBACK_GMII_FAR] = "GMII_FAR", 57 [LOOPBACK_SGMII_FAR] = "SGMII_FAR", 58 [LOOPBACK_XFI_FAR] = "XFI_FAR", 59 [LOOPBACK_GPHY] = "GPHY", 60 [LOOPBACK_PHYXS] = "PHYXS", 61 [LOOPBACK_PCS] = "PCS", 62 [LOOPBACK_PMAPMD] = "PMA/PMD", 63 [LOOPBACK_XPORT] = "XPORT", 64 [LOOPBACK_XGMII_WS] = "XGMII_WS", 65 [LOOPBACK_XAUI_WS] = "XAUI_WS", 66 [LOOPBACK_XAUI_WS_FAR] = "XAUI_WS_FAR", 67 [LOOPBACK_XAUI_WS_NEAR] = "XAUI_WS_NEAR", 68 [LOOPBACK_GMII_WS] = "GMII_WS", 69 [LOOPBACK_XFI_WS] = "XFI_WS", 70 [LOOPBACK_XFI_WS_FAR] = "XFI_WS_FAR", 71 [LOOPBACK_PHYXS_WS] = "PHYXS_WS", 72 }; 73 74 const unsigned int efx_reset_type_max = RESET_TYPE_MAX; 75 const char *const efx_reset_type_names[] = { 76 [RESET_TYPE_INVISIBLE] = "INVISIBLE", 77 [RESET_TYPE_ALL] = "ALL", 78 [RESET_TYPE_RECOVER_OR_ALL] = "RECOVER_OR_ALL", 79 [RESET_TYPE_WORLD] = "WORLD", 80 [RESET_TYPE_RECOVER_OR_DISABLE] = "RECOVER_OR_DISABLE", 81 [RESET_TYPE_DATAPATH] = "DATAPATH", 82 [RESET_TYPE_MC_BIST] = "MC_BIST", 83 [RESET_TYPE_DISABLE] = "DISABLE", 84 [RESET_TYPE_TX_WATCHDOG] = "TX_WATCHDOG", 85 [RESET_TYPE_INT_ERROR] = "INT_ERROR", 86 [RESET_TYPE_DMA_ERROR] = "DMA_ERROR", 87 [RESET_TYPE_TX_SKIP] = "TX_SKIP", 88 [RESET_TYPE_MC_FAILURE] = "MC_FAILURE", 89 [RESET_TYPE_MCDI_TIMEOUT] = "MCDI_TIMEOUT (FLR)", 90 }; 91 92 /* UDP tunnel type names */ 93 static const char *const efx_udp_tunnel_type_names[] = { 94 [TUNNEL_ENCAP_UDP_PORT_ENTRY_VXLAN] = "vxlan", 95 [TUNNEL_ENCAP_UDP_PORT_ENTRY_GENEVE] = "geneve", 96 }; 97 98 void efx_get_udp_tunnel_type_name(u16 type, char *buf, size_t buflen) 99 { 100 if (type < ARRAY_SIZE(efx_udp_tunnel_type_names) && 101 efx_udp_tunnel_type_names[type] != NULL) 102 snprintf(buf, buflen, "%s", efx_udp_tunnel_type_names[type]); 103 else 104 snprintf(buf, buflen, "type %d", type); 105 } 106 107 /* Reset workqueue. If any NIC has a hardware failure then a reset will be 108 * queued onto this work queue. This is not a per-nic work queue, because 109 * efx_reset_work() acquires the rtnl lock, so resets are naturally serialised. 110 */ 111 static struct workqueue_struct *reset_workqueue; 112 113 /* How often and how many times to poll for a reset while waiting for a 114 * BIST that another function started to complete. 115 */ 116 #define BIST_WAIT_DELAY_MS 100 117 #define BIST_WAIT_DELAY_COUNT 100 118 119 /************************************************************************** 120 * 121 * Configurable values 122 * 123 *************************************************************************/ 124 125 /* 126 * Use separate channels for TX and RX events 127 * 128 * Set this to 1 to use separate channels for TX and RX. It allows us 129 * to control interrupt affinity separately for TX and RX. 130 * 131 * This is only used in MSI-X interrupt mode 132 */ 133 bool efx_separate_tx_channels; 134 module_param(efx_separate_tx_channels, bool, 0444); 135 MODULE_PARM_DESC(efx_separate_tx_channels, 136 "Use separate channels for TX and RX"); 137 138 /* This is the weight assigned to each of the (per-channel) virtual 139 * NAPI devices. 140 */ 141 static int napi_weight = 64; 142 143 /* This is the time (in jiffies) between invocations of the hardware 144 * monitor. 145 * On Falcon-based NICs, this will: 146 * - Check the on-board hardware monitor; 147 * - Poll the link state and reconfigure the hardware as necessary. 148 * On Siena-based NICs for power systems with EEH support, this will give EEH a 149 * chance to start. 150 */ 151 static unsigned int efx_monitor_interval = 1 * HZ; 152 153 /* Initial interrupt moderation settings. They can be modified after 154 * module load with ethtool. 155 * 156 * The default for RX should strike a balance between increasing the 157 * round-trip latency and reducing overhead. 158 */ 159 static unsigned int rx_irq_mod_usec = 60; 160 161 /* Initial interrupt moderation settings. They can be modified after 162 * module load with ethtool. 163 * 164 * This default is chosen to ensure that a 10G link does not go idle 165 * while a TX queue is stopped after it has become full. A queue is 166 * restarted when it drops below half full. The time this takes (assuming 167 * worst case 3 descriptors per packet and 1024 descriptors) is 168 * 512 / 3 * 1.2 = 205 usec. 169 */ 170 static unsigned int tx_irq_mod_usec = 150; 171 172 /* This is the first interrupt mode to try out of: 173 * 0 => MSI-X 174 * 1 => MSI 175 * 2 => legacy 176 */ 177 static unsigned int interrupt_mode; 178 179 /* This is the requested number of CPUs to use for Receive-Side Scaling (RSS), 180 * i.e. the number of CPUs among which we may distribute simultaneous 181 * interrupt handling. 182 * 183 * Cards without MSI-X will only target one CPU via legacy or MSI interrupt. 184 * The default (0) means to assign an interrupt to each core. 185 */ 186 static unsigned int rss_cpus; 187 module_param(rss_cpus, uint, 0444); 188 MODULE_PARM_DESC(rss_cpus, "Number of CPUs to use for Receive-Side Scaling"); 189 190 static bool phy_flash_cfg; 191 module_param(phy_flash_cfg, bool, 0644); 192 MODULE_PARM_DESC(phy_flash_cfg, "Set PHYs into reflash mode initially"); 193 194 static unsigned irq_adapt_low_thresh = 8000; 195 module_param(irq_adapt_low_thresh, uint, 0644); 196 MODULE_PARM_DESC(irq_adapt_low_thresh, 197 "Threshold score for reducing IRQ moderation"); 198 199 static unsigned irq_adapt_high_thresh = 16000; 200 module_param(irq_adapt_high_thresh, uint, 0644); 201 MODULE_PARM_DESC(irq_adapt_high_thresh, 202 "Threshold score for increasing IRQ moderation"); 203 204 static unsigned debug = (NETIF_MSG_DRV | NETIF_MSG_PROBE | 205 NETIF_MSG_LINK | NETIF_MSG_IFDOWN | 206 NETIF_MSG_IFUP | NETIF_MSG_RX_ERR | 207 NETIF_MSG_TX_ERR | NETIF_MSG_HW); 208 module_param(debug, uint, 0); 209 MODULE_PARM_DESC(debug, "Bitmapped debugging message enable value"); 210 211 /************************************************************************** 212 * 213 * Utility functions and prototypes 214 * 215 *************************************************************************/ 216 217 static int efx_soft_enable_interrupts(struct efx_nic *efx); 218 static void efx_soft_disable_interrupts(struct efx_nic *efx); 219 static void efx_remove_channel(struct efx_channel *channel); 220 static void efx_remove_channels(struct efx_nic *efx); 221 static const struct efx_channel_type efx_default_channel_type; 222 static void efx_remove_port(struct efx_nic *efx); 223 static void efx_init_napi_channel(struct efx_channel *channel); 224 static void efx_fini_napi(struct efx_nic *efx); 225 static void efx_fini_napi_channel(struct efx_channel *channel); 226 static void efx_fini_struct(struct efx_nic *efx); 227 static void efx_start_all(struct efx_nic *efx); 228 static void efx_stop_all(struct efx_nic *efx); 229 230 #define EFX_ASSERT_RESET_SERIALISED(efx) \ 231 do { \ 232 if ((efx->state == STATE_READY) || \ 233 (efx->state == STATE_RECOVERY) || \ 234 (efx->state == STATE_DISABLED)) \ 235 ASSERT_RTNL(); \ 236 } while (0) 237 238 static int efx_check_disabled(struct efx_nic *efx) 239 { 240 if (efx->state == STATE_DISABLED || efx->state == STATE_RECOVERY) { 241 netif_err(efx, drv, efx->net_dev, 242 "device is disabled due to earlier errors\n"); 243 return -EIO; 244 } 245 return 0; 246 } 247 248 /************************************************************************** 249 * 250 * Event queue processing 251 * 252 *************************************************************************/ 253 254 /* Process channel's event queue 255 * 256 * This function is responsible for processing the event queue of a 257 * single channel. The caller must guarantee that this function will 258 * never be concurrently called more than once on the same channel, 259 * though different channels may be being processed concurrently. 260 */ 261 static int efx_process_channel(struct efx_channel *channel, int budget) 262 { 263 struct efx_tx_queue *tx_queue; 264 struct list_head rx_list; 265 int spent; 266 267 if (unlikely(!channel->enabled)) 268 return 0; 269 270 /* Prepare the batch receive list */ 271 EFX_WARN_ON_PARANOID(channel->rx_list != NULL); 272 INIT_LIST_HEAD(&rx_list); 273 channel->rx_list = &rx_list; 274 275 efx_for_each_channel_tx_queue(tx_queue, channel) { 276 tx_queue->pkts_compl = 0; 277 tx_queue->bytes_compl = 0; 278 } 279 280 spent = efx_nic_process_eventq(channel, budget); 281 if (spent && efx_channel_has_rx_queue(channel)) { 282 struct efx_rx_queue *rx_queue = 283 efx_channel_get_rx_queue(channel); 284 285 efx_rx_flush_packet(channel); 286 efx_fast_push_rx_descriptors(rx_queue, true); 287 } 288 289 /* Update BQL */ 290 efx_for_each_channel_tx_queue(tx_queue, channel) { 291 if (tx_queue->bytes_compl) { 292 netdev_tx_completed_queue(tx_queue->core_txq, 293 tx_queue->pkts_compl, tx_queue->bytes_compl); 294 } 295 } 296 297 /* Receive any packets we queued up */ 298 netif_receive_skb_list(channel->rx_list); 299 channel->rx_list = NULL; 300 301 return spent; 302 } 303 304 /* NAPI poll handler 305 * 306 * NAPI guarantees serialisation of polls of the same device, which 307 * provides the guarantee required by efx_process_channel(). 308 */ 309 static void efx_update_irq_mod(struct efx_nic *efx, struct efx_channel *channel) 310 { 311 int step = efx->irq_mod_step_us; 312 313 if (channel->irq_mod_score < irq_adapt_low_thresh) { 314 if (channel->irq_moderation_us > step) { 315 channel->irq_moderation_us -= step; 316 efx->type->push_irq_moderation(channel); 317 } 318 } else if (channel->irq_mod_score > irq_adapt_high_thresh) { 319 if (channel->irq_moderation_us < 320 efx->irq_rx_moderation_us) { 321 channel->irq_moderation_us += step; 322 efx->type->push_irq_moderation(channel); 323 } 324 } 325 326 channel->irq_count = 0; 327 channel->irq_mod_score = 0; 328 } 329 330 static int efx_poll(struct napi_struct *napi, int budget) 331 { 332 struct efx_channel *channel = 333 container_of(napi, struct efx_channel, napi_str); 334 struct efx_nic *efx = channel->efx; 335 int spent; 336 337 netif_vdbg(efx, intr, efx->net_dev, 338 "channel %d NAPI poll executing on CPU %d\n", 339 channel->channel, raw_smp_processor_id()); 340 341 spent = efx_process_channel(channel, budget); 342 343 if (spent < budget) { 344 if (efx_channel_has_rx_queue(channel) && 345 efx->irq_rx_adaptive && 346 unlikely(++channel->irq_count == 1000)) { 347 efx_update_irq_mod(efx, channel); 348 } 349 350 #ifdef CONFIG_RFS_ACCEL 351 /* Perhaps expire some ARFS filters */ 352 schedule_work(&channel->filter_work); 353 #endif 354 355 /* There is no race here; although napi_disable() will 356 * only wait for napi_complete(), this isn't a problem 357 * since efx_nic_eventq_read_ack() will have no effect if 358 * interrupts have already been disabled. 359 */ 360 if (napi_complete_done(napi, spent)) 361 efx_nic_eventq_read_ack(channel); 362 } 363 364 return spent; 365 } 366 367 /* Create event queue 368 * Event queue memory allocations are done only once. If the channel 369 * is reset, the memory buffer will be reused; this guards against 370 * errors during channel reset and also simplifies interrupt handling. 371 */ 372 static int efx_probe_eventq(struct efx_channel *channel) 373 { 374 struct efx_nic *efx = channel->efx; 375 unsigned long entries; 376 377 netif_dbg(efx, probe, efx->net_dev, 378 "chan %d create event queue\n", channel->channel); 379 380 /* Build an event queue with room for one event per tx and rx buffer, 381 * plus some extra for link state events and MCDI completions. */ 382 entries = roundup_pow_of_two(efx->rxq_entries + efx->txq_entries + 128); 383 EFX_WARN_ON_PARANOID(entries > EFX_MAX_EVQ_SIZE); 384 channel->eventq_mask = max(entries, EFX_MIN_EVQ_SIZE) - 1; 385 386 return efx_nic_probe_eventq(channel); 387 } 388 389 /* Prepare channel's event queue */ 390 static int efx_init_eventq(struct efx_channel *channel) 391 { 392 struct efx_nic *efx = channel->efx; 393 int rc; 394 395 EFX_WARN_ON_PARANOID(channel->eventq_init); 396 397 netif_dbg(efx, drv, efx->net_dev, 398 "chan %d init event queue\n", channel->channel); 399 400 rc = efx_nic_init_eventq(channel); 401 if (rc == 0) { 402 efx->type->push_irq_moderation(channel); 403 channel->eventq_read_ptr = 0; 404 channel->eventq_init = true; 405 } 406 return rc; 407 } 408 409 /* Enable event queue processing and NAPI */ 410 void efx_start_eventq(struct efx_channel *channel) 411 { 412 netif_dbg(channel->efx, ifup, channel->efx->net_dev, 413 "chan %d start event queue\n", channel->channel); 414 415 /* Make sure the NAPI handler sees the enabled flag set */ 416 channel->enabled = true; 417 smp_wmb(); 418 419 napi_enable(&channel->napi_str); 420 efx_nic_eventq_read_ack(channel); 421 } 422 423 /* Disable event queue processing and NAPI */ 424 void efx_stop_eventq(struct efx_channel *channel) 425 { 426 if (!channel->enabled) 427 return; 428 429 napi_disable(&channel->napi_str); 430 channel->enabled = false; 431 } 432 433 static void efx_fini_eventq(struct efx_channel *channel) 434 { 435 if (!channel->eventq_init) 436 return; 437 438 netif_dbg(channel->efx, drv, channel->efx->net_dev, 439 "chan %d fini event queue\n", channel->channel); 440 441 efx_nic_fini_eventq(channel); 442 channel->eventq_init = false; 443 } 444 445 static void efx_remove_eventq(struct efx_channel *channel) 446 { 447 netif_dbg(channel->efx, drv, channel->efx->net_dev, 448 "chan %d remove event queue\n", channel->channel); 449 450 efx_nic_remove_eventq(channel); 451 } 452 453 /************************************************************************** 454 * 455 * Channel handling 456 * 457 *************************************************************************/ 458 459 /* Allocate and initialise a channel structure. */ 460 static struct efx_channel * 461 efx_alloc_channel(struct efx_nic *efx, int i, struct efx_channel *old_channel) 462 { 463 struct efx_channel *channel; 464 struct efx_rx_queue *rx_queue; 465 struct efx_tx_queue *tx_queue; 466 int j; 467 468 channel = kzalloc(sizeof(*channel), GFP_KERNEL); 469 if (!channel) 470 return NULL; 471 472 channel->efx = efx; 473 channel->channel = i; 474 channel->type = &efx_default_channel_type; 475 476 for (j = 0; j < EFX_TXQ_TYPES; j++) { 477 tx_queue = &channel->tx_queue[j]; 478 tx_queue->efx = efx; 479 tx_queue->queue = i * EFX_TXQ_TYPES + j; 480 tx_queue->channel = channel; 481 } 482 483 #ifdef CONFIG_RFS_ACCEL 484 INIT_WORK(&channel->filter_work, efx_filter_rfs_expire); 485 #endif 486 487 rx_queue = &channel->rx_queue; 488 rx_queue->efx = efx; 489 timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0); 490 491 return channel; 492 } 493 494 /* Allocate and initialise a channel structure, copying parameters 495 * (but not resources) from an old channel structure. 496 */ 497 static struct efx_channel * 498 efx_copy_channel(const struct efx_channel *old_channel) 499 { 500 struct efx_channel *channel; 501 struct efx_rx_queue *rx_queue; 502 struct efx_tx_queue *tx_queue; 503 int j; 504 505 channel = kmalloc(sizeof(*channel), GFP_KERNEL); 506 if (!channel) 507 return NULL; 508 509 *channel = *old_channel; 510 511 channel->napi_dev = NULL; 512 INIT_HLIST_NODE(&channel->napi_str.napi_hash_node); 513 channel->napi_str.napi_id = 0; 514 channel->napi_str.state = 0; 515 memset(&channel->eventq, 0, sizeof(channel->eventq)); 516 517 for (j = 0; j < EFX_TXQ_TYPES; j++) { 518 tx_queue = &channel->tx_queue[j]; 519 if (tx_queue->channel) 520 tx_queue->channel = channel; 521 tx_queue->buffer = NULL; 522 memset(&tx_queue->txd, 0, sizeof(tx_queue->txd)); 523 } 524 525 rx_queue = &channel->rx_queue; 526 rx_queue->buffer = NULL; 527 memset(&rx_queue->rxd, 0, sizeof(rx_queue->rxd)); 528 timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0); 529 #ifdef CONFIG_RFS_ACCEL 530 INIT_WORK(&channel->filter_work, efx_filter_rfs_expire); 531 #endif 532 533 return channel; 534 } 535 536 static int efx_probe_channel(struct efx_channel *channel) 537 { 538 struct efx_tx_queue *tx_queue; 539 struct efx_rx_queue *rx_queue; 540 int rc; 541 542 netif_dbg(channel->efx, probe, channel->efx->net_dev, 543 "creating channel %d\n", channel->channel); 544 545 rc = channel->type->pre_probe(channel); 546 if (rc) 547 goto fail; 548 549 rc = efx_probe_eventq(channel); 550 if (rc) 551 goto fail; 552 553 efx_for_each_channel_tx_queue(tx_queue, channel) { 554 rc = efx_probe_tx_queue(tx_queue); 555 if (rc) 556 goto fail; 557 } 558 559 efx_for_each_channel_rx_queue(rx_queue, channel) { 560 rc = efx_probe_rx_queue(rx_queue); 561 if (rc) 562 goto fail; 563 } 564 565 channel->rx_list = NULL; 566 567 return 0; 568 569 fail: 570 efx_remove_channel(channel); 571 return rc; 572 } 573 574 static void 575 efx_get_channel_name(struct efx_channel *channel, char *buf, size_t len) 576 { 577 struct efx_nic *efx = channel->efx; 578 const char *type; 579 int number; 580 581 number = channel->channel; 582 if (efx->tx_channel_offset == 0) { 583 type = ""; 584 } else if (channel->channel < efx->tx_channel_offset) { 585 type = "-rx"; 586 } else { 587 type = "-tx"; 588 number -= efx->tx_channel_offset; 589 } 590 snprintf(buf, len, "%s%s-%d", efx->name, type, number); 591 } 592 593 static void efx_set_channel_names(struct efx_nic *efx) 594 { 595 struct efx_channel *channel; 596 597 efx_for_each_channel(channel, efx) 598 channel->type->get_name(channel, 599 efx->msi_context[channel->channel].name, 600 sizeof(efx->msi_context[0].name)); 601 } 602 603 static int efx_probe_channels(struct efx_nic *efx) 604 { 605 struct efx_channel *channel; 606 int rc; 607 608 /* Restart special buffer allocation */ 609 efx->next_buffer_table = 0; 610 611 /* Probe channels in reverse, so that any 'extra' channels 612 * use the start of the buffer table. This allows the traffic 613 * channels to be resized without moving them or wasting the 614 * entries before them. 615 */ 616 efx_for_each_channel_rev(channel, efx) { 617 rc = efx_probe_channel(channel); 618 if (rc) { 619 netif_err(efx, probe, efx->net_dev, 620 "failed to create channel %d\n", 621 channel->channel); 622 goto fail; 623 } 624 } 625 efx_set_channel_names(efx); 626 627 return 0; 628 629 fail: 630 efx_remove_channels(efx); 631 return rc; 632 } 633 634 /* Channels are shutdown and reinitialised whilst the NIC is running 635 * to propagate configuration changes (mtu, checksum offload), or 636 * to clear hardware error conditions 637 */ 638 static void efx_start_datapath(struct efx_nic *efx) 639 { 640 netdev_features_t old_features = efx->net_dev->features; 641 bool old_rx_scatter = efx->rx_scatter; 642 struct efx_tx_queue *tx_queue; 643 struct efx_rx_queue *rx_queue; 644 struct efx_channel *channel; 645 size_t rx_buf_len; 646 647 /* Calculate the rx buffer allocation parameters required to 648 * support the current MTU, including padding for header 649 * alignment and overruns. 650 */ 651 efx->rx_dma_len = (efx->rx_prefix_size + 652 EFX_MAX_FRAME_LEN(efx->net_dev->mtu) + 653 efx->type->rx_buffer_padding); 654 rx_buf_len = (sizeof(struct efx_rx_page_state) + 655 efx->rx_ip_align + efx->rx_dma_len); 656 if (rx_buf_len <= PAGE_SIZE) { 657 efx->rx_scatter = efx->type->always_rx_scatter; 658 efx->rx_buffer_order = 0; 659 } else if (efx->type->can_rx_scatter) { 660 BUILD_BUG_ON(EFX_RX_USR_BUF_SIZE % L1_CACHE_BYTES); 661 BUILD_BUG_ON(sizeof(struct efx_rx_page_state) + 662 2 * ALIGN(NET_IP_ALIGN + EFX_RX_USR_BUF_SIZE, 663 EFX_RX_BUF_ALIGNMENT) > 664 PAGE_SIZE); 665 efx->rx_scatter = true; 666 efx->rx_dma_len = EFX_RX_USR_BUF_SIZE; 667 efx->rx_buffer_order = 0; 668 } else { 669 efx->rx_scatter = false; 670 efx->rx_buffer_order = get_order(rx_buf_len); 671 } 672 673 efx_rx_config_page_split(efx); 674 if (efx->rx_buffer_order) 675 netif_dbg(efx, drv, efx->net_dev, 676 "RX buf len=%u; page order=%u batch=%u\n", 677 efx->rx_dma_len, efx->rx_buffer_order, 678 efx->rx_pages_per_batch); 679 else 680 netif_dbg(efx, drv, efx->net_dev, 681 "RX buf len=%u step=%u bpp=%u; page batch=%u\n", 682 efx->rx_dma_len, efx->rx_page_buf_step, 683 efx->rx_bufs_per_page, efx->rx_pages_per_batch); 684 685 /* Restore previously fixed features in hw_features and remove 686 * features which are fixed now 687 */ 688 efx->net_dev->hw_features |= efx->net_dev->features; 689 efx->net_dev->hw_features &= ~efx->fixed_features; 690 efx->net_dev->features |= efx->fixed_features; 691 if (efx->net_dev->features != old_features) 692 netdev_features_change(efx->net_dev); 693 694 /* RX filters may also have scatter-enabled flags */ 695 if (efx->rx_scatter != old_rx_scatter) 696 efx->type->filter_update_rx_scatter(efx); 697 698 /* We must keep at least one descriptor in a TX ring empty. 699 * We could avoid this when the queue size does not exactly 700 * match the hardware ring size, but it's not that important. 701 * Therefore we stop the queue when one more skb might fill 702 * the ring completely. We wake it when half way back to 703 * empty. 704 */ 705 efx->txq_stop_thresh = efx->txq_entries - efx_tx_max_skb_descs(efx); 706 efx->txq_wake_thresh = efx->txq_stop_thresh / 2; 707 708 /* Initialise the channels */ 709 efx_for_each_channel(channel, efx) { 710 efx_for_each_channel_tx_queue(tx_queue, channel) { 711 efx_init_tx_queue(tx_queue); 712 atomic_inc(&efx->active_queues); 713 } 714 715 efx_for_each_channel_rx_queue(rx_queue, channel) { 716 efx_init_rx_queue(rx_queue); 717 atomic_inc(&efx->active_queues); 718 efx_stop_eventq(channel); 719 efx_fast_push_rx_descriptors(rx_queue, false); 720 efx_start_eventq(channel); 721 } 722 723 WARN_ON(channel->rx_pkt_n_frags); 724 } 725 726 efx_ptp_start_datapath(efx); 727 728 if (netif_device_present(efx->net_dev)) 729 netif_tx_wake_all_queues(efx->net_dev); 730 } 731 732 static void efx_stop_datapath(struct efx_nic *efx) 733 { 734 struct efx_channel *channel; 735 struct efx_tx_queue *tx_queue; 736 struct efx_rx_queue *rx_queue; 737 int rc; 738 739 EFX_ASSERT_RESET_SERIALISED(efx); 740 BUG_ON(efx->port_enabled); 741 742 efx_ptp_stop_datapath(efx); 743 744 /* Stop RX refill */ 745 efx_for_each_channel(channel, efx) { 746 efx_for_each_channel_rx_queue(rx_queue, channel) 747 rx_queue->refill_enabled = false; 748 } 749 750 efx_for_each_channel(channel, efx) { 751 /* RX packet processing is pipelined, so wait for the 752 * NAPI handler to complete. At least event queue 0 753 * might be kept active by non-data events, so don't 754 * use napi_synchronize() but actually disable NAPI 755 * temporarily. 756 */ 757 if (efx_channel_has_rx_queue(channel)) { 758 efx_stop_eventq(channel); 759 efx_start_eventq(channel); 760 } 761 } 762 763 rc = efx->type->fini_dmaq(efx); 764 if (rc) { 765 netif_err(efx, drv, efx->net_dev, "failed to flush queues\n"); 766 } else { 767 netif_dbg(efx, drv, efx->net_dev, 768 "successfully flushed all queues\n"); 769 } 770 771 efx_for_each_channel(channel, efx) { 772 efx_for_each_channel_rx_queue(rx_queue, channel) 773 efx_fini_rx_queue(rx_queue); 774 efx_for_each_possible_channel_tx_queue(tx_queue, channel) 775 efx_fini_tx_queue(tx_queue); 776 } 777 } 778 779 static void efx_remove_channel(struct efx_channel *channel) 780 { 781 struct efx_tx_queue *tx_queue; 782 struct efx_rx_queue *rx_queue; 783 784 netif_dbg(channel->efx, drv, channel->efx->net_dev, 785 "destroy chan %d\n", channel->channel); 786 787 efx_for_each_channel_rx_queue(rx_queue, channel) 788 efx_remove_rx_queue(rx_queue); 789 efx_for_each_possible_channel_tx_queue(tx_queue, channel) 790 efx_remove_tx_queue(tx_queue); 791 efx_remove_eventq(channel); 792 channel->type->post_remove(channel); 793 } 794 795 static void efx_remove_channels(struct efx_nic *efx) 796 { 797 struct efx_channel *channel; 798 799 efx_for_each_channel(channel, efx) 800 efx_remove_channel(channel); 801 } 802 803 int 804 efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries) 805 { 806 struct efx_channel *other_channel[EFX_MAX_CHANNELS], *channel; 807 u32 old_rxq_entries, old_txq_entries; 808 unsigned i, next_buffer_table = 0; 809 int rc, rc2; 810 811 rc = efx_check_disabled(efx); 812 if (rc) 813 return rc; 814 815 /* Not all channels should be reallocated. We must avoid 816 * reallocating their buffer table entries. 817 */ 818 efx_for_each_channel(channel, efx) { 819 struct efx_rx_queue *rx_queue; 820 struct efx_tx_queue *tx_queue; 821 822 if (channel->type->copy) 823 continue; 824 next_buffer_table = max(next_buffer_table, 825 channel->eventq.index + 826 channel->eventq.entries); 827 efx_for_each_channel_rx_queue(rx_queue, channel) 828 next_buffer_table = max(next_buffer_table, 829 rx_queue->rxd.index + 830 rx_queue->rxd.entries); 831 efx_for_each_channel_tx_queue(tx_queue, channel) 832 next_buffer_table = max(next_buffer_table, 833 tx_queue->txd.index + 834 tx_queue->txd.entries); 835 } 836 837 efx_device_detach_sync(efx); 838 efx_stop_all(efx); 839 efx_soft_disable_interrupts(efx); 840 841 /* Clone channels (where possible) */ 842 memset(other_channel, 0, sizeof(other_channel)); 843 for (i = 0; i < efx->n_channels; i++) { 844 channel = efx->channel[i]; 845 if (channel->type->copy) 846 channel = channel->type->copy(channel); 847 if (!channel) { 848 rc = -ENOMEM; 849 goto out; 850 } 851 other_channel[i] = channel; 852 } 853 854 /* Swap entry counts and channel pointers */ 855 old_rxq_entries = efx->rxq_entries; 856 old_txq_entries = efx->txq_entries; 857 efx->rxq_entries = rxq_entries; 858 efx->txq_entries = txq_entries; 859 for (i = 0; i < efx->n_channels; i++) { 860 channel = efx->channel[i]; 861 efx->channel[i] = other_channel[i]; 862 other_channel[i] = channel; 863 } 864 865 /* Restart buffer table allocation */ 866 efx->next_buffer_table = next_buffer_table; 867 868 for (i = 0; i < efx->n_channels; i++) { 869 channel = efx->channel[i]; 870 if (!channel->type->copy) 871 continue; 872 rc = efx_probe_channel(channel); 873 if (rc) 874 goto rollback; 875 efx_init_napi_channel(efx->channel[i]); 876 } 877 878 out: 879 /* Destroy unused channel structures */ 880 for (i = 0; i < efx->n_channels; i++) { 881 channel = other_channel[i]; 882 if (channel && channel->type->copy) { 883 efx_fini_napi_channel(channel); 884 efx_remove_channel(channel); 885 kfree(channel); 886 } 887 } 888 889 rc2 = efx_soft_enable_interrupts(efx); 890 if (rc2) { 891 rc = rc ? rc : rc2; 892 netif_err(efx, drv, efx->net_dev, 893 "unable to restart interrupts on channel reallocation\n"); 894 efx_schedule_reset(efx, RESET_TYPE_DISABLE); 895 } else { 896 efx_start_all(efx); 897 efx_device_attach_if_not_resetting(efx); 898 } 899 return rc; 900 901 rollback: 902 /* Swap back */ 903 efx->rxq_entries = old_rxq_entries; 904 efx->txq_entries = old_txq_entries; 905 for (i = 0; i < efx->n_channels; i++) { 906 channel = efx->channel[i]; 907 efx->channel[i] = other_channel[i]; 908 other_channel[i] = channel; 909 } 910 goto out; 911 } 912 913 void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue) 914 { 915 mod_timer(&rx_queue->slow_fill, jiffies + msecs_to_jiffies(10)); 916 } 917 918 static bool efx_default_channel_want_txqs(struct efx_channel *channel) 919 { 920 return channel->channel - channel->efx->tx_channel_offset < 921 channel->efx->n_tx_channels; 922 } 923 924 static const struct efx_channel_type efx_default_channel_type = { 925 .pre_probe = efx_channel_dummy_op_int, 926 .post_remove = efx_channel_dummy_op_void, 927 .get_name = efx_get_channel_name, 928 .copy = efx_copy_channel, 929 .want_txqs = efx_default_channel_want_txqs, 930 .keep_eventq = false, 931 .want_pio = true, 932 }; 933 934 int efx_channel_dummy_op_int(struct efx_channel *channel) 935 { 936 return 0; 937 } 938 939 void efx_channel_dummy_op_void(struct efx_channel *channel) 940 { 941 } 942 943 /************************************************************************** 944 * 945 * Port handling 946 * 947 **************************************************************************/ 948 949 /* This ensures that the kernel is kept informed (via 950 * netif_carrier_on/off) of the link status, and also maintains the 951 * link status's stop on the port's TX queue. 952 */ 953 void efx_link_status_changed(struct efx_nic *efx) 954 { 955 struct efx_link_state *link_state = &efx->link_state; 956 957 /* SFC Bug 5356: A net_dev notifier is registered, so we must ensure 958 * that no events are triggered between unregister_netdev() and the 959 * driver unloading. A more general condition is that NETDEV_CHANGE 960 * can only be generated between NETDEV_UP and NETDEV_DOWN */ 961 if (!netif_running(efx->net_dev)) 962 return; 963 964 if (link_state->up != netif_carrier_ok(efx->net_dev)) { 965 efx->n_link_state_changes++; 966 967 if (link_state->up) 968 netif_carrier_on(efx->net_dev); 969 else 970 netif_carrier_off(efx->net_dev); 971 } 972 973 /* Status message for kernel log */ 974 if (link_state->up) 975 netif_info(efx, link, efx->net_dev, 976 "link up at %uMbps %s-duplex (MTU %d)\n", 977 link_state->speed, link_state->fd ? "full" : "half", 978 efx->net_dev->mtu); 979 else 980 netif_info(efx, link, efx->net_dev, "link down\n"); 981 } 982 983 void efx_link_set_advertising(struct efx_nic *efx, 984 const unsigned long *advertising) 985 { 986 memcpy(efx->link_advertising, advertising, 987 sizeof(__ETHTOOL_DECLARE_LINK_MODE_MASK())); 988 989 efx->link_advertising[0] |= ADVERTISED_Autoneg; 990 if (advertising[0] & ADVERTISED_Pause) 991 efx->wanted_fc |= (EFX_FC_TX | EFX_FC_RX); 992 else 993 efx->wanted_fc &= ~(EFX_FC_TX | EFX_FC_RX); 994 if (advertising[0] & ADVERTISED_Asym_Pause) 995 efx->wanted_fc ^= EFX_FC_TX; 996 } 997 998 /* Equivalent to efx_link_set_advertising with all-zeroes, except does not 999 * force the Autoneg bit on. 1000 */ 1001 void efx_link_clear_advertising(struct efx_nic *efx) 1002 { 1003 bitmap_zero(efx->link_advertising, __ETHTOOL_LINK_MODE_MASK_NBITS); 1004 efx->wanted_fc &= ~(EFX_FC_TX | EFX_FC_RX); 1005 } 1006 1007 void efx_link_set_wanted_fc(struct efx_nic *efx, u8 wanted_fc) 1008 { 1009 efx->wanted_fc = wanted_fc; 1010 if (efx->link_advertising[0]) { 1011 if (wanted_fc & EFX_FC_RX) 1012 efx->link_advertising[0] |= (ADVERTISED_Pause | 1013 ADVERTISED_Asym_Pause); 1014 else 1015 efx->link_advertising[0] &= ~(ADVERTISED_Pause | 1016 ADVERTISED_Asym_Pause); 1017 if (wanted_fc & EFX_FC_TX) 1018 efx->link_advertising[0] ^= ADVERTISED_Asym_Pause; 1019 } 1020 } 1021 1022 static void efx_fini_port(struct efx_nic *efx); 1023 1024 /* We assume that efx->type->reconfigure_mac will always try to sync RX 1025 * filters and therefore needs to read-lock the filter table against freeing 1026 */ 1027 void efx_mac_reconfigure(struct efx_nic *efx) 1028 { 1029 down_read(&efx->filter_sem); 1030 efx->type->reconfigure_mac(efx); 1031 up_read(&efx->filter_sem); 1032 } 1033 1034 /* Push loopback/power/transmit disable settings to the PHY, and reconfigure 1035 * the MAC appropriately. All other PHY configuration changes are pushed 1036 * through phy_op->set_settings(), and pushed asynchronously to the MAC 1037 * through efx_monitor(). 1038 * 1039 * Callers must hold the mac_lock 1040 */ 1041 int __efx_reconfigure_port(struct efx_nic *efx) 1042 { 1043 enum efx_phy_mode phy_mode; 1044 int rc; 1045 1046 WARN_ON(!mutex_is_locked(&efx->mac_lock)); 1047 1048 /* Disable PHY transmit in mac level loopbacks */ 1049 phy_mode = efx->phy_mode; 1050 if (LOOPBACK_INTERNAL(efx)) 1051 efx->phy_mode |= PHY_MODE_TX_DISABLED; 1052 else 1053 efx->phy_mode &= ~PHY_MODE_TX_DISABLED; 1054 1055 rc = efx->type->reconfigure_port(efx); 1056 1057 if (rc) 1058 efx->phy_mode = phy_mode; 1059 1060 return rc; 1061 } 1062 1063 /* Reinitialise the MAC to pick up new PHY settings, even if the port is 1064 * disabled. */ 1065 int efx_reconfigure_port(struct efx_nic *efx) 1066 { 1067 int rc; 1068 1069 EFX_ASSERT_RESET_SERIALISED(efx); 1070 1071 mutex_lock(&efx->mac_lock); 1072 rc = __efx_reconfigure_port(efx); 1073 mutex_unlock(&efx->mac_lock); 1074 1075 return rc; 1076 } 1077 1078 /* Asynchronous work item for changing MAC promiscuity and multicast 1079 * hash. Avoid a drain/rx_ingress enable by reconfiguring the current 1080 * MAC directly. */ 1081 static void efx_mac_work(struct work_struct *data) 1082 { 1083 struct efx_nic *efx = container_of(data, struct efx_nic, mac_work); 1084 1085 mutex_lock(&efx->mac_lock); 1086 if (efx->port_enabled) 1087 efx_mac_reconfigure(efx); 1088 mutex_unlock(&efx->mac_lock); 1089 } 1090 1091 static int efx_probe_port(struct efx_nic *efx) 1092 { 1093 int rc; 1094 1095 netif_dbg(efx, probe, efx->net_dev, "create port\n"); 1096 1097 if (phy_flash_cfg) 1098 efx->phy_mode = PHY_MODE_SPECIAL; 1099 1100 /* Connect up MAC/PHY operations table */ 1101 rc = efx->type->probe_port(efx); 1102 if (rc) 1103 return rc; 1104 1105 /* Initialise MAC address to permanent address */ 1106 ether_addr_copy(efx->net_dev->dev_addr, efx->net_dev->perm_addr); 1107 1108 return 0; 1109 } 1110 1111 static int efx_init_port(struct efx_nic *efx) 1112 { 1113 int rc; 1114 1115 netif_dbg(efx, drv, efx->net_dev, "init port\n"); 1116 1117 mutex_lock(&efx->mac_lock); 1118 1119 rc = efx->phy_op->init(efx); 1120 if (rc) 1121 goto fail1; 1122 1123 efx->port_initialized = true; 1124 1125 /* Reconfigure the MAC before creating dma queues (required for 1126 * Falcon/A1 where RX_INGR_EN/TX_DRAIN_EN isn't supported) */ 1127 efx_mac_reconfigure(efx); 1128 1129 /* Ensure the PHY advertises the correct flow control settings */ 1130 rc = efx->phy_op->reconfigure(efx); 1131 if (rc && rc != -EPERM) 1132 goto fail2; 1133 1134 mutex_unlock(&efx->mac_lock); 1135 return 0; 1136 1137 fail2: 1138 efx->phy_op->fini(efx); 1139 fail1: 1140 mutex_unlock(&efx->mac_lock); 1141 return rc; 1142 } 1143 1144 static void efx_start_port(struct efx_nic *efx) 1145 { 1146 netif_dbg(efx, ifup, efx->net_dev, "start port\n"); 1147 BUG_ON(efx->port_enabled); 1148 1149 mutex_lock(&efx->mac_lock); 1150 efx->port_enabled = true; 1151 1152 /* Ensure MAC ingress/egress is enabled */ 1153 efx_mac_reconfigure(efx); 1154 1155 mutex_unlock(&efx->mac_lock); 1156 } 1157 1158 /* Cancel work for MAC reconfiguration, periodic hardware monitoring 1159 * and the async self-test, wait for them to finish and prevent them 1160 * being scheduled again. This doesn't cover online resets, which 1161 * should only be cancelled when removing the device. 1162 */ 1163 static void efx_stop_port(struct efx_nic *efx) 1164 { 1165 netif_dbg(efx, ifdown, efx->net_dev, "stop port\n"); 1166 1167 EFX_ASSERT_RESET_SERIALISED(efx); 1168 1169 mutex_lock(&efx->mac_lock); 1170 efx->port_enabled = false; 1171 mutex_unlock(&efx->mac_lock); 1172 1173 /* Serialise against efx_set_multicast_list() */ 1174 netif_addr_lock_bh(efx->net_dev); 1175 netif_addr_unlock_bh(efx->net_dev); 1176 1177 cancel_delayed_work_sync(&efx->monitor_work); 1178 efx_selftest_async_cancel(efx); 1179 cancel_work_sync(&efx->mac_work); 1180 } 1181 1182 static void efx_fini_port(struct efx_nic *efx) 1183 { 1184 netif_dbg(efx, drv, efx->net_dev, "shut down port\n"); 1185 1186 if (!efx->port_initialized) 1187 return; 1188 1189 efx->phy_op->fini(efx); 1190 efx->port_initialized = false; 1191 1192 efx->link_state.up = false; 1193 efx_link_status_changed(efx); 1194 } 1195 1196 static void efx_remove_port(struct efx_nic *efx) 1197 { 1198 netif_dbg(efx, drv, efx->net_dev, "destroying port\n"); 1199 1200 efx->type->remove_port(efx); 1201 } 1202 1203 /************************************************************************** 1204 * 1205 * NIC handling 1206 * 1207 **************************************************************************/ 1208 1209 static LIST_HEAD(efx_primary_list); 1210 static LIST_HEAD(efx_unassociated_list); 1211 1212 static bool efx_same_controller(struct efx_nic *left, struct efx_nic *right) 1213 { 1214 return left->type == right->type && 1215 left->vpd_sn && right->vpd_sn && 1216 !strcmp(left->vpd_sn, right->vpd_sn); 1217 } 1218 1219 static void efx_associate(struct efx_nic *efx) 1220 { 1221 struct efx_nic *other, *next; 1222 1223 if (efx->primary == efx) { 1224 /* Adding primary function; look for secondaries */ 1225 1226 netif_dbg(efx, probe, efx->net_dev, "adding to primary list\n"); 1227 list_add_tail(&efx->node, &efx_primary_list); 1228 1229 list_for_each_entry_safe(other, next, &efx_unassociated_list, 1230 node) { 1231 if (efx_same_controller(efx, other)) { 1232 list_del(&other->node); 1233 netif_dbg(other, probe, other->net_dev, 1234 "moving to secondary list of %s %s\n", 1235 pci_name(efx->pci_dev), 1236 efx->net_dev->name); 1237 list_add_tail(&other->node, 1238 &efx->secondary_list); 1239 other->primary = efx; 1240 } 1241 } 1242 } else { 1243 /* Adding secondary function; look for primary */ 1244 1245 list_for_each_entry(other, &efx_primary_list, node) { 1246 if (efx_same_controller(efx, other)) { 1247 netif_dbg(efx, probe, efx->net_dev, 1248 "adding to secondary list of %s %s\n", 1249 pci_name(other->pci_dev), 1250 other->net_dev->name); 1251 list_add_tail(&efx->node, 1252 &other->secondary_list); 1253 efx->primary = other; 1254 return; 1255 } 1256 } 1257 1258 netif_dbg(efx, probe, efx->net_dev, 1259 "adding to unassociated list\n"); 1260 list_add_tail(&efx->node, &efx_unassociated_list); 1261 } 1262 } 1263 1264 static void efx_dissociate(struct efx_nic *efx) 1265 { 1266 struct efx_nic *other, *next; 1267 1268 list_del(&efx->node); 1269 efx->primary = NULL; 1270 1271 list_for_each_entry_safe(other, next, &efx->secondary_list, node) { 1272 list_del(&other->node); 1273 netif_dbg(other, probe, other->net_dev, 1274 "moving to unassociated list\n"); 1275 list_add_tail(&other->node, &efx_unassociated_list); 1276 other->primary = NULL; 1277 } 1278 } 1279 1280 /* This configures the PCI device to enable I/O and DMA. */ 1281 static int efx_init_io(struct efx_nic *efx) 1282 { 1283 struct pci_dev *pci_dev = efx->pci_dev; 1284 dma_addr_t dma_mask = efx->type->max_dma_mask; 1285 unsigned int mem_map_size = efx->type->mem_map_size(efx); 1286 int rc, bar; 1287 1288 netif_dbg(efx, probe, efx->net_dev, "initialising I/O\n"); 1289 1290 bar = efx->type->mem_bar(efx); 1291 1292 rc = pci_enable_device(pci_dev); 1293 if (rc) { 1294 netif_err(efx, probe, efx->net_dev, 1295 "failed to enable PCI device\n"); 1296 goto fail1; 1297 } 1298 1299 pci_set_master(pci_dev); 1300 1301 /* Set the PCI DMA mask. Try all possibilities from our genuine mask 1302 * down to 32 bits, because some architectures will allow 40 bit 1303 * masks event though they reject 46 bit masks. 1304 */ 1305 while (dma_mask > 0x7fffffffUL) { 1306 rc = dma_set_mask_and_coherent(&pci_dev->dev, dma_mask); 1307 if (rc == 0) 1308 break; 1309 dma_mask >>= 1; 1310 } 1311 if (rc) { 1312 netif_err(efx, probe, efx->net_dev, 1313 "could not find a suitable DMA mask\n"); 1314 goto fail2; 1315 } 1316 netif_dbg(efx, probe, efx->net_dev, 1317 "using DMA mask %llx\n", (unsigned long long) dma_mask); 1318 1319 efx->membase_phys = pci_resource_start(efx->pci_dev, bar); 1320 rc = pci_request_region(pci_dev, bar, "sfc"); 1321 if (rc) { 1322 netif_err(efx, probe, efx->net_dev, 1323 "request for memory BAR failed\n"); 1324 rc = -EIO; 1325 goto fail3; 1326 } 1327 efx->membase = ioremap_nocache(efx->membase_phys, mem_map_size); 1328 if (!efx->membase) { 1329 netif_err(efx, probe, efx->net_dev, 1330 "could not map memory BAR at %llx+%x\n", 1331 (unsigned long long)efx->membase_phys, mem_map_size); 1332 rc = -ENOMEM; 1333 goto fail4; 1334 } 1335 netif_dbg(efx, probe, efx->net_dev, 1336 "memory BAR at %llx+%x (virtual %p)\n", 1337 (unsigned long long)efx->membase_phys, mem_map_size, 1338 efx->membase); 1339 1340 return 0; 1341 1342 fail4: 1343 pci_release_region(efx->pci_dev, bar); 1344 fail3: 1345 efx->membase_phys = 0; 1346 fail2: 1347 pci_disable_device(efx->pci_dev); 1348 fail1: 1349 return rc; 1350 } 1351 1352 static void efx_fini_io(struct efx_nic *efx) 1353 { 1354 int bar; 1355 1356 netif_dbg(efx, drv, efx->net_dev, "shutting down I/O\n"); 1357 1358 if (efx->membase) { 1359 iounmap(efx->membase); 1360 efx->membase = NULL; 1361 } 1362 1363 if (efx->membase_phys) { 1364 bar = efx->type->mem_bar(efx); 1365 pci_release_region(efx->pci_dev, bar); 1366 efx->membase_phys = 0; 1367 } 1368 1369 /* Don't disable bus-mastering if VFs are assigned */ 1370 if (!pci_vfs_assigned(efx->pci_dev)) 1371 pci_disable_device(efx->pci_dev); 1372 } 1373 1374 void efx_set_default_rx_indir_table(struct efx_nic *efx, 1375 struct efx_rss_context *ctx) 1376 { 1377 size_t i; 1378 1379 for (i = 0; i < ARRAY_SIZE(ctx->rx_indir_table); i++) 1380 ctx->rx_indir_table[i] = 1381 ethtool_rxfh_indir_default(i, efx->rss_spread); 1382 } 1383 1384 static unsigned int efx_wanted_parallelism(struct efx_nic *efx) 1385 { 1386 cpumask_var_t thread_mask; 1387 unsigned int count; 1388 int cpu; 1389 1390 if (rss_cpus) { 1391 count = rss_cpus; 1392 } else { 1393 if (unlikely(!zalloc_cpumask_var(&thread_mask, GFP_KERNEL))) { 1394 netif_warn(efx, probe, efx->net_dev, 1395 "RSS disabled due to allocation failure\n"); 1396 return 1; 1397 } 1398 1399 count = 0; 1400 for_each_online_cpu(cpu) { 1401 if (!cpumask_test_cpu(cpu, thread_mask)) { 1402 ++count; 1403 cpumask_or(thread_mask, thread_mask, 1404 topology_sibling_cpumask(cpu)); 1405 } 1406 } 1407 1408 free_cpumask_var(thread_mask); 1409 } 1410 1411 if (count > EFX_MAX_RX_QUEUES) { 1412 netif_cond_dbg(efx, probe, efx->net_dev, !rss_cpus, warn, 1413 "Reducing number of rx queues from %u to %u.\n", 1414 count, EFX_MAX_RX_QUEUES); 1415 count = EFX_MAX_RX_QUEUES; 1416 } 1417 1418 /* If RSS is requested for the PF *and* VFs then we can't write RSS 1419 * table entries that are inaccessible to VFs 1420 */ 1421 #ifdef CONFIG_SFC_SRIOV 1422 if (efx->type->sriov_wanted) { 1423 if (efx->type->sriov_wanted(efx) && efx_vf_size(efx) > 1 && 1424 count > efx_vf_size(efx)) { 1425 netif_warn(efx, probe, efx->net_dev, 1426 "Reducing number of RSS channels from %u to %u for " 1427 "VF support. Increase vf-msix-limit to use more " 1428 "channels on the PF.\n", 1429 count, efx_vf_size(efx)); 1430 count = efx_vf_size(efx); 1431 } 1432 } 1433 #endif 1434 1435 return count; 1436 } 1437 1438 /* Probe the number and type of interrupts we are able to obtain, and 1439 * the resulting numbers of channels and RX queues. 1440 */ 1441 static int efx_probe_interrupts(struct efx_nic *efx) 1442 { 1443 unsigned int extra_channels = 0; 1444 unsigned int i, j; 1445 int rc; 1446 1447 for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++) 1448 if (efx->extra_channel_type[i]) 1449 ++extra_channels; 1450 1451 if (efx->interrupt_mode == EFX_INT_MODE_MSIX) { 1452 struct msix_entry xentries[EFX_MAX_CHANNELS]; 1453 unsigned int n_channels; 1454 1455 n_channels = efx_wanted_parallelism(efx); 1456 if (efx_separate_tx_channels) 1457 n_channels *= 2; 1458 n_channels += extra_channels; 1459 n_channels = min(n_channels, efx->max_channels); 1460 1461 for (i = 0; i < n_channels; i++) 1462 xentries[i].entry = i; 1463 rc = pci_enable_msix_range(efx->pci_dev, 1464 xentries, 1, n_channels); 1465 if (rc < 0) { 1466 /* Fall back to single channel MSI */ 1467 netif_err(efx, drv, efx->net_dev, 1468 "could not enable MSI-X\n"); 1469 if (efx->type->min_interrupt_mode >= EFX_INT_MODE_MSI) 1470 efx->interrupt_mode = EFX_INT_MODE_MSI; 1471 else 1472 return rc; 1473 } else if (rc < n_channels) { 1474 netif_err(efx, drv, efx->net_dev, 1475 "WARNING: Insufficient MSI-X vectors" 1476 " available (%d < %u).\n", rc, n_channels); 1477 netif_err(efx, drv, efx->net_dev, 1478 "WARNING: Performance may be reduced.\n"); 1479 n_channels = rc; 1480 } 1481 1482 if (rc > 0) { 1483 efx->n_channels = n_channels; 1484 if (n_channels > extra_channels) 1485 n_channels -= extra_channels; 1486 if (efx_separate_tx_channels) { 1487 efx->n_tx_channels = min(max(n_channels / 2, 1488 1U), 1489 efx->max_tx_channels); 1490 efx->n_rx_channels = max(n_channels - 1491 efx->n_tx_channels, 1492 1U); 1493 } else { 1494 efx->n_tx_channels = min(n_channels, 1495 efx->max_tx_channels); 1496 efx->n_rx_channels = n_channels; 1497 } 1498 for (i = 0; i < efx->n_channels; i++) 1499 efx_get_channel(efx, i)->irq = 1500 xentries[i].vector; 1501 } 1502 } 1503 1504 /* Try single interrupt MSI */ 1505 if (efx->interrupt_mode == EFX_INT_MODE_MSI) { 1506 efx->n_channels = 1; 1507 efx->n_rx_channels = 1; 1508 efx->n_tx_channels = 1; 1509 rc = pci_enable_msi(efx->pci_dev); 1510 if (rc == 0) { 1511 efx_get_channel(efx, 0)->irq = efx->pci_dev->irq; 1512 } else { 1513 netif_err(efx, drv, efx->net_dev, 1514 "could not enable MSI\n"); 1515 if (efx->type->min_interrupt_mode >= EFX_INT_MODE_LEGACY) 1516 efx->interrupt_mode = EFX_INT_MODE_LEGACY; 1517 else 1518 return rc; 1519 } 1520 } 1521 1522 /* Assume legacy interrupts */ 1523 if (efx->interrupt_mode == EFX_INT_MODE_LEGACY) { 1524 efx->n_channels = 1 + (efx_separate_tx_channels ? 1 : 0); 1525 efx->n_rx_channels = 1; 1526 efx->n_tx_channels = 1; 1527 efx->legacy_irq = efx->pci_dev->irq; 1528 } 1529 1530 /* Assign extra channels if possible */ 1531 efx->n_extra_tx_channels = 0; 1532 j = efx->n_channels; 1533 for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++) { 1534 if (!efx->extra_channel_type[i]) 1535 continue; 1536 if (efx->interrupt_mode != EFX_INT_MODE_MSIX || 1537 efx->n_channels <= extra_channels) { 1538 efx->extra_channel_type[i]->handle_no_channel(efx); 1539 } else { 1540 --j; 1541 efx_get_channel(efx, j)->type = 1542 efx->extra_channel_type[i]; 1543 if (efx_channel_has_tx_queues(efx_get_channel(efx, j))) 1544 efx->n_extra_tx_channels++; 1545 } 1546 } 1547 1548 /* RSS might be usable on VFs even if it is disabled on the PF */ 1549 #ifdef CONFIG_SFC_SRIOV 1550 if (efx->type->sriov_wanted) { 1551 efx->rss_spread = ((efx->n_rx_channels > 1 || 1552 !efx->type->sriov_wanted(efx)) ? 1553 efx->n_rx_channels : efx_vf_size(efx)); 1554 return 0; 1555 } 1556 #endif 1557 efx->rss_spread = efx->n_rx_channels; 1558 1559 return 0; 1560 } 1561 1562 #if defined(CONFIG_SMP) 1563 static void efx_set_interrupt_affinity(struct efx_nic *efx) 1564 { 1565 struct efx_channel *channel; 1566 unsigned int cpu; 1567 1568 efx_for_each_channel(channel, efx) { 1569 cpu = cpumask_local_spread(channel->channel, 1570 pcibus_to_node(efx->pci_dev->bus)); 1571 irq_set_affinity_hint(channel->irq, cpumask_of(cpu)); 1572 } 1573 } 1574 1575 static void efx_clear_interrupt_affinity(struct efx_nic *efx) 1576 { 1577 struct efx_channel *channel; 1578 1579 efx_for_each_channel(channel, efx) 1580 irq_set_affinity_hint(channel->irq, NULL); 1581 } 1582 #else 1583 static void 1584 efx_set_interrupt_affinity(struct efx_nic *efx __attribute__ ((unused))) 1585 { 1586 } 1587 1588 static void 1589 efx_clear_interrupt_affinity(struct efx_nic *efx __attribute__ ((unused))) 1590 { 1591 } 1592 #endif /* CONFIG_SMP */ 1593 1594 static int efx_soft_enable_interrupts(struct efx_nic *efx) 1595 { 1596 struct efx_channel *channel, *end_channel; 1597 int rc; 1598 1599 BUG_ON(efx->state == STATE_DISABLED); 1600 1601 efx->irq_soft_enabled = true; 1602 smp_wmb(); 1603 1604 efx_for_each_channel(channel, efx) { 1605 if (!channel->type->keep_eventq) { 1606 rc = efx_init_eventq(channel); 1607 if (rc) 1608 goto fail; 1609 } 1610 efx_start_eventq(channel); 1611 } 1612 1613 efx_mcdi_mode_event(efx); 1614 1615 return 0; 1616 fail: 1617 end_channel = channel; 1618 efx_for_each_channel(channel, efx) { 1619 if (channel == end_channel) 1620 break; 1621 efx_stop_eventq(channel); 1622 if (!channel->type->keep_eventq) 1623 efx_fini_eventq(channel); 1624 } 1625 1626 return rc; 1627 } 1628 1629 static void efx_soft_disable_interrupts(struct efx_nic *efx) 1630 { 1631 struct efx_channel *channel; 1632 1633 if (efx->state == STATE_DISABLED) 1634 return; 1635 1636 efx_mcdi_mode_poll(efx); 1637 1638 efx->irq_soft_enabled = false; 1639 smp_wmb(); 1640 1641 if (efx->legacy_irq) 1642 synchronize_irq(efx->legacy_irq); 1643 1644 efx_for_each_channel(channel, efx) { 1645 if (channel->irq) 1646 synchronize_irq(channel->irq); 1647 1648 efx_stop_eventq(channel); 1649 if (!channel->type->keep_eventq) 1650 efx_fini_eventq(channel); 1651 } 1652 1653 /* Flush the asynchronous MCDI request queue */ 1654 efx_mcdi_flush_async(efx); 1655 } 1656 1657 static int efx_enable_interrupts(struct efx_nic *efx) 1658 { 1659 struct efx_channel *channel, *end_channel; 1660 int rc; 1661 1662 BUG_ON(efx->state == STATE_DISABLED); 1663 1664 if (efx->eeh_disabled_legacy_irq) { 1665 enable_irq(efx->legacy_irq); 1666 efx->eeh_disabled_legacy_irq = false; 1667 } 1668 1669 efx->type->irq_enable_master(efx); 1670 1671 efx_for_each_channel(channel, efx) { 1672 if (channel->type->keep_eventq) { 1673 rc = efx_init_eventq(channel); 1674 if (rc) 1675 goto fail; 1676 } 1677 } 1678 1679 rc = efx_soft_enable_interrupts(efx); 1680 if (rc) 1681 goto fail; 1682 1683 return 0; 1684 1685 fail: 1686 end_channel = channel; 1687 efx_for_each_channel(channel, efx) { 1688 if (channel == end_channel) 1689 break; 1690 if (channel->type->keep_eventq) 1691 efx_fini_eventq(channel); 1692 } 1693 1694 efx->type->irq_disable_non_ev(efx); 1695 1696 return rc; 1697 } 1698 1699 static void efx_disable_interrupts(struct efx_nic *efx) 1700 { 1701 struct efx_channel *channel; 1702 1703 efx_soft_disable_interrupts(efx); 1704 1705 efx_for_each_channel(channel, efx) { 1706 if (channel->type->keep_eventq) 1707 efx_fini_eventq(channel); 1708 } 1709 1710 efx->type->irq_disable_non_ev(efx); 1711 } 1712 1713 static void efx_remove_interrupts(struct efx_nic *efx) 1714 { 1715 struct efx_channel *channel; 1716 1717 /* Remove MSI/MSI-X interrupts */ 1718 efx_for_each_channel(channel, efx) 1719 channel->irq = 0; 1720 pci_disable_msi(efx->pci_dev); 1721 pci_disable_msix(efx->pci_dev); 1722 1723 /* Remove legacy interrupt */ 1724 efx->legacy_irq = 0; 1725 } 1726 1727 static void efx_set_channels(struct efx_nic *efx) 1728 { 1729 struct efx_channel *channel; 1730 struct efx_tx_queue *tx_queue; 1731 1732 efx->tx_channel_offset = 1733 efx_separate_tx_channels ? 1734 efx->n_channels - efx->n_tx_channels : 0; 1735 1736 /* We need to mark which channels really have RX and TX 1737 * queues, and adjust the TX queue numbers if we have separate 1738 * RX-only and TX-only channels. 1739 */ 1740 efx_for_each_channel(channel, efx) { 1741 if (channel->channel < efx->n_rx_channels) 1742 channel->rx_queue.core_index = channel->channel; 1743 else 1744 channel->rx_queue.core_index = -1; 1745 1746 efx_for_each_channel_tx_queue(tx_queue, channel) 1747 tx_queue->queue -= (efx->tx_channel_offset * 1748 EFX_TXQ_TYPES); 1749 } 1750 } 1751 1752 static int efx_probe_nic(struct efx_nic *efx) 1753 { 1754 int rc; 1755 1756 netif_dbg(efx, probe, efx->net_dev, "creating NIC\n"); 1757 1758 /* Carry out hardware-type specific initialisation */ 1759 rc = efx->type->probe(efx); 1760 if (rc) 1761 return rc; 1762 1763 do { 1764 if (!efx->max_channels || !efx->max_tx_channels) { 1765 netif_err(efx, drv, efx->net_dev, 1766 "Insufficient resources to allocate" 1767 " any channels\n"); 1768 rc = -ENOSPC; 1769 goto fail1; 1770 } 1771 1772 /* Determine the number of channels and queues by trying 1773 * to hook in MSI-X interrupts. 1774 */ 1775 rc = efx_probe_interrupts(efx); 1776 if (rc) 1777 goto fail1; 1778 1779 efx_set_channels(efx); 1780 1781 /* dimension_resources can fail with EAGAIN */ 1782 rc = efx->type->dimension_resources(efx); 1783 if (rc != 0 && rc != -EAGAIN) 1784 goto fail2; 1785 1786 if (rc == -EAGAIN) 1787 /* try again with new max_channels */ 1788 efx_remove_interrupts(efx); 1789 1790 } while (rc == -EAGAIN); 1791 1792 if (efx->n_channels > 1) 1793 netdev_rss_key_fill(efx->rss_context.rx_hash_key, 1794 sizeof(efx->rss_context.rx_hash_key)); 1795 efx_set_default_rx_indir_table(efx, &efx->rss_context); 1796 1797 netif_set_real_num_tx_queues(efx->net_dev, efx->n_tx_channels); 1798 netif_set_real_num_rx_queues(efx->net_dev, efx->n_rx_channels); 1799 1800 /* Initialise the interrupt moderation settings */ 1801 efx->irq_mod_step_us = DIV_ROUND_UP(efx->timer_quantum_ns, 1000); 1802 efx_init_irq_moderation(efx, tx_irq_mod_usec, rx_irq_mod_usec, true, 1803 true); 1804 1805 return 0; 1806 1807 fail2: 1808 efx_remove_interrupts(efx); 1809 fail1: 1810 efx->type->remove(efx); 1811 return rc; 1812 } 1813 1814 static void efx_remove_nic(struct efx_nic *efx) 1815 { 1816 netif_dbg(efx, drv, efx->net_dev, "destroying NIC\n"); 1817 1818 efx_remove_interrupts(efx); 1819 efx->type->remove(efx); 1820 } 1821 1822 static int efx_probe_filters(struct efx_nic *efx) 1823 { 1824 int rc; 1825 1826 init_rwsem(&efx->filter_sem); 1827 mutex_lock(&efx->mac_lock); 1828 down_write(&efx->filter_sem); 1829 rc = efx->type->filter_table_probe(efx); 1830 if (rc) 1831 goto out_unlock; 1832 1833 #ifdef CONFIG_RFS_ACCEL 1834 if (efx->type->offload_features & NETIF_F_NTUPLE) { 1835 struct efx_channel *channel; 1836 int i, success = 1; 1837 1838 efx_for_each_channel(channel, efx) { 1839 channel->rps_flow_id = 1840 kcalloc(efx->type->max_rx_ip_filters, 1841 sizeof(*channel->rps_flow_id), 1842 GFP_KERNEL); 1843 if (!channel->rps_flow_id) 1844 success = 0; 1845 else 1846 for (i = 0; 1847 i < efx->type->max_rx_ip_filters; 1848 ++i) 1849 channel->rps_flow_id[i] = 1850 RPS_FLOW_ID_INVALID; 1851 } 1852 1853 if (!success) { 1854 efx_for_each_channel(channel, efx) 1855 kfree(channel->rps_flow_id); 1856 efx->type->filter_table_remove(efx); 1857 rc = -ENOMEM; 1858 goto out_unlock; 1859 } 1860 1861 efx->rps_expire_index = efx->rps_expire_channel = 0; 1862 } 1863 #endif 1864 out_unlock: 1865 up_write(&efx->filter_sem); 1866 mutex_unlock(&efx->mac_lock); 1867 return rc; 1868 } 1869 1870 static void efx_remove_filters(struct efx_nic *efx) 1871 { 1872 #ifdef CONFIG_RFS_ACCEL 1873 struct efx_channel *channel; 1874 1875 efx_for_each_channel(channel, efx) 1876 kfree(channel->rps_flow_id); 1877 #endif 1878 down_write(&efx->filter_sem); 1879 efx->type->filter_table_remove(efx); 1880 up_write(&efx->filter_sem); 1881 } 1882 1883 1884 /************************************************************************** 1885 * 1886 * NIC startup/shutdown 1887 * 1888 *************************************************************************/ 1889 1890 static int efx_probe_all(struct efx_nic *efx) 1891 { 1892 int rc; 1893 1894 rc = efx_probe_nic(efx); 1895 if (rc) { 1896 netif_err(efx, probe, efx->net_dev, "failed to create NIC\n"); 1897 goto fail1; 1898 } 1899 1900 rc = efx_probe_port(efx); 1901 if (rc) { 1902 netif_err(efx, probe, efx->net_dev, "failed to create port\n"); 1903 goto fail2; 1904 } 1905 1906 BUILD_BUG_ON(EFX_DEFAULT_DMAQ_SIZE < EFX_RXQ_MIN_ENT); 1907 if (WARN_ON(EFX_DEFAULT_DMAQ_SIZE < EFX_TXQ_MIN_ENT(efx))) { 1908 rc = -EINVAL; 1909 goto fail3; 1910 } 1911 efx->rxq_entries = efx->txq_entries = EFX_DEFAULT_DMAQ_SIZE; 1912 1913 #ifdef CONFIG_SFC_SRIOV 1914 rc = efx->type->vswitching_probe(efx); 1915 if (rc) /* not fatal; the PF will still work fine */ 1916 netif_warn(efx, probe, efx->net_dev, 1917 "failed to setup vswitching rc=%d;" 1918 " VFs may not function\n", rc); 1919 #endif 1920 1921 rc = efx_probe_filters(efx); 1922 if (rc) { 1923 netif_err(efx, probe, efx->net_dev, 1924 "failed to create filter tables\n"); 1925 goto fail4; 1926 } 1927 1928 rc = efx_probe_channels(efx); 1929 if (rc) 1930 goto fail5; 1931 1932 return 0; 1933 1934 fail5: 1935 efx_remove_filters(efx); 1936 fail4: 1937 #ifdef CONFIG_SFC_SRIOV 1938 efx->type->vswitching_remove(efx); 1939 #endif 1940 fail3: 1941 efx_remove_port(efx); 1942 fail2: 1943 efx_remove_nic(efx); 1944 fail1: 1945 return rc; 1946 } 1947 1948 /* If the interface is supposed to be running but is not, start 1949 * the hardware and software data path, regular activity for the port 1950 * (MAC statistics, link polling, etc.) and schedule the port to be 1951 * reconfigured. Interrupts must already be enabled. This function 1952 * is safe to call multiple times, so long as the NIC is not disabled. 1953 * Requires the RTNL lock. 1954 */ 1955 static void efx_start_all(struct efx_nic *efx) 1956 { 1957 EFX_ASSERT_RESET_SERIALISED(efx); 1958 BUG_ON(efx->state == STATE_DISABLED); 1959 1960 /* Check that it is appropriate to restart the interface. All 1961 * of these flags are safe to read under just the rtnl lock */ 1962 if (efx->port_enabled || !netif_running(efx->net_dev) || 1963 efx->reset_pending) 1964 return; 1965 1966 efx_start_port(efx); 1967 efx_start_datapath(efx); 1968 1969 /* Start the hardware monitor if there is one */ 1970 if (efx->type->monitor != NULL) 1971 queue_delayed_work(efx->workqueue, &efx->monitor_work, 1972 efx_monitor_interval); 1973 1974 /* Link state detection is normally event-driven; we have 1975 * to poll now because we could have missed a change 1976 */ 1977 mutex_lock(&efx->mac_lock); 1978 if (efx->phy_op->poll(efx)) 1979 efx_link_status_changed(efx); 1980 mutex_unlock(&efx->mac_lock); 1981 1982 efx->type->start_stats(efx); 1983 efx->type->pull_stats(efx); 1984 spin_lock_bh(&efx->stats_lock); 1985 efx->type->update_stats(efx, NULL, NULL); 1986 spin_unlock_bh(&efx->stats_lock); 1987 } 1988 1989 /* Quiesce the hardware and software data path, and regular activity 1990 * for the port without bringing the link down. Safe to call multiple 1991 * times with the NIC in almost any state, but interrupts should be 1992 * enabled. Requires the RTNL lock. 1993 */ 1994 static void efx_stop_all(struct efx_nic *efx) 1995 { 1996 EFX_ASSERT_RESET_SERIALISED(efx); 1997 1998 /* port_enabled can be read safely under the rtnl lock */ 1999 if (!efx->port_enabled) 2000 return; 2001 2002 /* update stats before we go down so we can accurately count 2003 * rx_nodesc_drops 2004 */ 2005 efx->type->pull_stats(efx); 2006 spin_lock_bh(&efx->stats_lock); 2007 efx->type->update_stats(efx, NULL, NULL); 2008 spin_unlock_bh(&efx->stats_lock); 2009 efx->type->stop_stats(efx); 2010 efx_stop_port(efx); 2011 2012 /* Stop the kernel transmit interface. This is only valid if 2013 * the device is stopped or detached; otherwise the watchdog 2014 * may fire immediately. 2015 */ 2016 WARN_ON(netif_running(efx->net_dev) && 2017 netif_device_present(efx->net_dev)); 2018 netif_tx_disable(efx->net_dev); 2019 2020 efx_stop_datapath(efx); 2021 } 2022 2023 static void efx_remove_all(struct efx_nic *efx) 2024 { 2025 efx_remove_channels(efx); 2026 efx_remove_filters(efx); 2027 #ifdef CONFIG_SFC_SRIOV 2028 efx->type->vswitching_remove(efx); 2029 #endif 2030 efx_remove_port(efx); 2031 efx_remove_nic(efx); 2032 } 2033 2034 /************************************************************************** 2035 * 2036 * Interrupt moderation 2037 * 2038 **************************************************************************/ 2039 unsigned int efx_usecs_to_ticks(struct efx_nic *efx, unsigned int usecs) 2040 { 2041 if (usecs == 0) 2042 return 0; 2043 if (usecs * 1000 < efx->timer_quantum_ns) 2044 return 1; /* never round down to 0 */ 2045 return usecs * 1000 / efx->timer_quantum_ns; 2046 } 2047 2048 unsigned int efx_ticks_to_usecs(struct efx_nic *efx, unsigned int ticks) 2049 { 2050 /* We must round up when converting ticks to microseconds 2051 * because we round down when converting the other way. 2052 */ 2053 return DIV_ROUND_UP(ticks * efx->timer_quantum_ns, 1000); 2054 } 2055 2056 /* Set interrupt moderation parameters */ 2057 int efx_init_irq_moderation(struct efx_nic *efx, unsigned int tx_usecs, 2058 unsigned int rx_usecs, bool rx_adaptive, 2059 bool rx_may_override_tx) 2060 { 2061 struct efx_channel *channel; 2062 unsigned int timer_max_us; 2063 2064 EFX_ASSERT_RESET_SERIALISED(efx); 2065 2066 timer_max_us = efx->timer_max_ns / 1000; 2067 2068 if (tx_usecs > timer_max_us || rx_usecs > timer_max_us) 2069 return -EINVAL; 2070 2071 if (tx_usecs != rx_usecs && efx->tx_channel_offset == 0 && 2072 !rx_may_override_tx) { 2073 netif_err(efx, drv, efx->net_dev, "Channels are shared. " 2074 "RX and TX IRQ moderation must be equal\n"); 2075 return -EINVAL; 2076 } 2077 2078 efx->irq_rx_adaptive = rx_adaptive; 2079 efx->irq_rx_moderation_us = rx_usecs; 2080 efx_for_each_channel(channel, efx) { 2081 if (efx_channel_has_rx_queue(channel)) 2082 channel->irq_moderation_us = rx_usecs; 2083 else if (efx_channel_has_tx_queues(channel)) 2084 channel->irq_moderation_us = tx_usecs; 2085 } 2086 2087 return 0; 2088 } 2089 2090 void efx_get_irq_moderation(struct efx_nic *efx, unsigned int *tx_usecs, 2091 unsigned int *rx_usecs, bool *rx_adaptive) 2092 { 2093 *rx_adaptive = efx->irq_rx_adaptive; 2094 *rx_usecs = efx->irq_rx_moderation_us; 2095 2096 /* If channels are shared between RX and TX, so is IRQ 2097 * moderation. Otherwise, IRQ moderation is the same for all 2098 * TX channels and is not adaptive. 2099 */ 2100 if (efx->tx_channel_offset == 0) { 2101 *tx_usecs = *rx_usecs; 2102 } else { 2103 struct efx_channel *tx_channel; 2104 2105 tx_channel = efx->channel[efx->tx_channel_offset]; 2106 *tx_usecs = tx_channel->irq_moderation_us; 2107 } 2108 } 2109 2110 /************************************************************************** 2111 * 2112 * Hardware monitor 2113 * 2114 **************************************************************************/ 2115 2116 /* Run periodically off the general workqueue */ 2117 static void efx_monitor(struct work_struct *data) 2118 { 2119 struct efx_nic *efx = container_of(data, struct efx_nic, 2120 monitor_work.work); 2121 2122 netif_vdbg(efx, timer, efx->net_dev, 2123 "hardware monitor executing on CPU %d\n", 2124 raw_smp_processor_id()); 2125 BUG_ON(efx->type->monitor == NULL); 2126 2127 /* If the mac_lock is already held then it is likely a port 2128 * reconfiguration is already in place, which will likely do 2129 * most of the work of monitor() anyway. */ 2130 if (mutex_trylock(&efx->mac_lock)) { 2131 if (efx->port_enabled) 2132 efx->type->monitor(efx); 2133 mutex_unlock(&efx->mac_lock); 2134 } 2135 2136 queue_delayed_work(efx->workqueue, &efx->monitor_work, 2137 efx_monitor_interval); 2138 } 2139 2140 /************************************************************************** 2141 * 2142 * ioctls 2143 * 2144 *************************************************************************/ 2145 2146 /* Net device ioctl 2147 * Context: process, rtnl_lock() held. 2148 */ 2149 static int efx_ioctl(struct net_device *net_dev, struct ifreq *ifr, int cmd) 2150 { 2151 struct efx_nic *efx = netdev_priv(net_dev); 2152 struct mii_ioctl_data *data = if_mii(ifr); 2153 2154 if (cmd == SIOCSHWTSTAMP) 2155 return efx_ptp_set_ts_config(efx, ifr); 2156 if (cmd == SIOCGHWTSTAMP) 2157 return efx_ptp_get_ts_config(efx, ifr); 2158 2159 /* Convert phy_id from older PRTAD/DEVAD format */ 2160 if ((cmd == SIOCGMIIREG || cmd == SIOCSMIIREG) && 2161 (data->phy_id & 0xfc00) == 0x0400) 2162 data->phy_id ^= MDIO_PHY_ID_C45 | 0x0400; 2163 2164 return mdio_mii_ioctl(&efx->mdio, data, cmd); 2165 } 2166 2167 /************************************************************************** 2168 * 2169 * NAPI interface 2170 * 2171 **************************************************************************/ 2172 2173 static void efx_init_napi_channel(struct efx_channel *channel) 2174 { 2175 struct efx_nic *efx = channel->efx; 2176 2177 channel->napi_dev = efx->net_dev; 2178 netif_napi_add(channel->napi_dev, &channel->napi_str, 2179 efx_poll, napi_weight); 2180 } 2181 2182 static void efx_init_napi(struct efx_nic *efx) 2183 { 2184 struct efx_channel *channel; 2185 2186 efx_for_each_channel(channel, efx) 2187 efx_init_napi_channel(channel); 2188 } 2189 2190 static void efx_fini_napi_channel(struct efx_channel *channel) 2191 { 2192 if (channel->napi_dev) 2193 netif_napi_del(&channel->napi_str); 2194 2195 channel->napi_dev = NULL; 2196 } 2197 2198 static void efx_fini_napi(struct efx_nic *efx) 2199 { 2200 struct efx_channel *channel; 2201 2202 efx_for_each_channel(channel, efx) 2203 efx_fini_napi_channel(channel); 2204 } 2205 2206 /************************************************************************** 2207 * 2208 * Kernel net device interface 2209 * 2210 *************************************************************************/ 2211 2212 /* Context: process, rtnl_lock() held. */ 2213 int efx_net_open(struct net_device *net_dev) 2214 { 2215 struct efx_nic *efx = netdev_priv(net_dev); 2216 int rc; 2217 2218 netif_dbg(efx, ifup, efx->net_dev, "opening device on CPU %d\n", 2219 raw_smp_processor_id()); 2220 2221 rc = efx_check_disabled(efx); 2222 if (rc) 2223 return rc; 2224 if (efx->phy_mode & PHY_MODE_SPECIAL) 2225 return -EBUSY; 2226 if (efx_mcdi_poll_reboot(efx) && efx_reset(efx, RESET_TYPE_ALL)) 2227 return -EIO; 2228 2229 /* Notify the kernel of the link state polled during driver load, 2230 * before the monitor starts running */ 2231 efx_link_status_changed(efx); 2232 2233 efx_start_all(efx); 2234 if (efx->state == STATE_DISABLED || efx->reset_pending) 2235 netif_device_detach(efx->net_dev); 2236 efx_selftest_async_start(efx); 2237 return 0; 2238 } 2239 2240 /* Context: process, rtnl_lock() held. 2241 * Note that the kernel will ignore our return code; this method 2242 * should really be a void. 2243 */ 2244 int efx_net_stop(struct net_device *net_dev) 2245 { 2246 struct efx_nic *efx = netdev_priv(net_dev); 2247 2248 netif_dbg(efx, ifdown, efx->net_dev, "closing on CPU %d\n", 2249 raw_smp_processor_id()); 2250 2251 /* Stop the device and flush all the channels */ 2252 efx_stop_all(efx); 2253 2254 return 0; 2255 } 2256 2257 /* Context: process, dev_base_lock or RTNL held, non-blocking. */ 2258 static void efx_net_stats(struct net_device *net_dev, 2259 struct rtnl_link_stats64 *stats) 2260 { 2261 struct efx_nic *efx = netdev_priv(net_dev); 2262 2263 spin_lock_bh(&efx->stats_lock); 2264 efx->type->update_stats(efx, NULL, stats); 2265 spin_unlock_bh(&efx->stats_lock); 2266 } 2267 2268 /* Context: netif_tx_lock held, BHs disabled. */ 2269 static void efx_watchdog(struct net_device *net_dev) 2270 { 2271 struct efx_nic *efx = netdev_priv(net_dev); 2272 2273 netif_err(efx, tx_err, efx->net_dev, 2274 "TX stuck with port_enabled=%d: resetting channels\n", 2275 efx->port_enabled); 2276 2277 efx_schedule_reset(efx, RESET_TYPE_TX_WATCHDOG); 2278 } 2279 2280 2281 /* Context: process, rtnl_lock() held. */ 2282 static int efx_change_mtu(struct net_device *net_dev, int new_mtu) 2283 { 2284 struct efx_nic *efx = netdev_priv(net_dev); 2285 int rc; 2286 2287 rc = efx_check_disabled(efx); 2288 if (rc) 2289 return rc; 2290 2291 netif_dbg(efx, drv, efx->net_dev, "changing MTU to %d\n", new_mtu); 2292 2293 efx_device_detach_sync(efx); 2294 efx_stop_all(efx); 2295 2296 mutex_lock(&efx->mac_lock); 2297 net_dev->mtu = new_mtu; 2298 efx_mac_reconfigure(efx); 2299 mutex_unlock(&efx->mac_lock); 2300 2301 efx_start_all(efx); 2302 efx_device_attach_if_not_resetting(efx); 2303 return 0; 2304 } 2305 2306 static int efx_set_mac_address(struct net_device *net_dev, void *data) 2307 { 2308 struct efx_nic *efx = netdev_priv(net_dev); 2309 struct sockaddr *addr = data; 2310 u8 *new_addr = addr->sa_data; 2311 u8 old_addr[6]; 2312 int rc; 2313 2314 if (!is_valid_ether_addr(new_addr)) { 2315 netif_err(efx, drv, efx->net_dev, 2316 "invalid ethernet MAC address requested: %pM\n", 2317 new_addr); 2318 return -EADDRNOTAVAIL; 2319 } 2320 2321 /* save old address */ 2322 ether_addr_copy(old_addr, net_dev->dev_addr); 2323 ether_addr_copy(net_dev->dev_addr, new_addr); 2324 if (efx->type->set_mac_address) { 2325 rc = efx->type->set_mac_address(efx); 2326 if (rc) { 2327 ether_addr_copy(net_dev->dev_addr, old_addr); 2328 return rc; 2329 } 2330 } 2331 2332 /* Reconfigure the MAC */ 2333 mutex_lock(&efx->mac_lock); 2334 efx_mac_reconfigure(efx); 2335 mutex_unlock(&efx->mac_lock); 2336 2337 return 0; 2338 } 2339 2340 /* Context: netif_addr_lock held, BHs disabled. */ 2341 static void efx_set_rx_mode(struct net_device *net_dev) 2342 { 2343 struct efx_nic *efx = netdev_priv(net_dev); 2344 2345 if (efx->port_enabled) 2346 queue_work(efx->workqueue, &efx->mac_work); 2347 /* Otherwise efx_start_port() will do this */ 2348 } 2349 2350 static int efx_set_features(struct net_device *net_dev, netdev_features_t data) 2351 { 2352 struct efx_nic *efx = netdev_priv(net_dev); 2353 int rc; 2354 2355 /* If disabling RX n-tuple filtering, clear existing filters */ 2356 if (net_dev->features & ~data & NETIF_F_NTUPLE) { 2357 rc = efx->type->filter_clear_rx(efx, EFX_FILTER_PRI_MANUAL); 2358 if (rc) 2359 return rc; 2360 } 2361 2362 /* If Rx VLAN filter is changed, update filters via mac_reconfigure. 2363 * If rx-fcs is changed, mac_reconfigure updates that too. 2364 */ 2365 if ((net_dev->features ^ data) & (NETIF_F_HW_VLAN_CTAG_FILTER | 2366 NETIF_F_RXFCS)) { 2367 /* efx_set_rx_mode() will schedule MAC work to update filters 2368 * when a new features are finally set in net_dev. 2369 */ 2370 efx_set_rx_mode(net_dev); 2371 } 2372 2373 return 0; 2374 } 2375 2376 static int efx_get_phys_port_id(struct net_device *net_dev, 2377 struct netdev_phys_item_id *ppid) 2378 { 2379 struct efx_nic *efx = netdev_priv(net_dev); 2380 2381 if (efx->type->get_phys_port_id) 2382 return efx->type->get_phys_port_id(efx, ppid); 2383 else 2384 return -EOPNOTSUPP; 2385 } 2386 2387 static int efx_get_phys_port_name(struct net_device *net_dev, 2388 char *name, size_t len) 2389 { 2390 struct efx_nic *efx = netdev_priv(net_dev); 2391 2392 if (snprintf(name, len, "p%u", efx->port_num) >= len) 2393 return -EINVAL; 2394 return 0; 2395 } 2396 2397 static int efx_vlan_rx_add_vid(struct net_device *net_dev, __be16 proto, u16 vid) 2398 { 2399 struct efx_nic *efx = netdev_priv(net_dev); 2400 2401 if (efx->type->vlan_rx_add_vid) 2402 return efx->type->vlan_rx_add_vid(efx, proto, vid); 2403 else 2404 return -EOPNOTSUPP; 2405 } 2406 2407 static int efx_vlan_rx_kill_vid(struct net_device *net_dev, __be16 proto, u16 vid) 2408 { 2409 struct efx_nic *efx = netdev_priv(net_dev); 2410 2411 if (efx->type->vlan_rx_kill_vid) 2412 return efx->type->vlan_rx_kill_vid(efx, proto, vid); 2413 else 2414 return -EOPNOTSUPP; 2415 } 2416 2417 static int efx_udp_tunnel_type_map(enum udp_parsable_tunnel_type in) 2418 { 2419 switch (in) { 2420 case UDP_TUNNEL_TYPE_VXLAN: 2421 return TUNNEL_ENCAP_UDP_PORT_ENTRY_VXLAN; 2422 case UDP_TUNNEL_TYPE_GENEVE: 2423 return TUNNEL_ENCAP_UDP_PORT_ENTRY_GENEVE; 2424 default: 2425 return -1; 2426 } 2427 } 2428 2429 static void efx_udp_tunnel_add(struct net_device *dev, struct udp_tunnel_info *ti) 2430 { 2431 struct efx_nic *efx = netdev_priv(dev); 2432 struct efx_udp_tunnel tnl; 2433 int efx_tunnel_type; 2434 2435 efx_tunnel_type = efx_udp_tunnel_type_map(ti->type); 2436 if (efx_tunnel_type < 0) 2437 return; 2438 2439 tnl.type = (u16)efx_tunnel_type; 2440 tnl.port = ti->port; 2441 2442 if (efx->type->udp_tnl_add_port) 2443 (void)efx->type->udp_tnl_add_port(efx, tnl); 2444 } 2445 2446 static void efx_udp_tunnel_del(struct net_device *dev, struct udp_tunnel_info *ti) 2447 { 2448 struct efx_nic *efx = netdev_priv(dev); 2449 struct efx_udp_tunnel tnl; 2450 int efx_tunnel_type; 2451 2452 efx_tunnel_type = efx_udp_tunnel_type_map(ti->type); 2453 if (efx_tunnel_type < 0) 2454 return; 2455 2456 tnl.type = (u16)efx_tunnel_type; 2457 tnl.port = ti->port; 2458 2459 if (efx->type->udp_tnl_del_port) 2460 (void)efx->type->udp_tnl_del_port(efx, tnl); 2461 } 2462 2463 static const struct net_device_ops efx_netdev_ops = { 2464 .ndo_open = efx_net_open, 2465 .ndo_stop = efx_net_stop, 2466 .ndo_get_stats64 = efx_net_stats, 2467 .ndo_tx_timeout = efx_watchdog, 2468 .ndo_start_xmit = efx_hard_start_xmit, 2469 .ndo_validate_addr = eth_validate_addr, 2470 .ndo_do_ioctl = efx_ioctl, 2471 .ndo_change_mtu = efx_change_mtu, 2472 .ndo_set_mac_address = efx_set_mac_address, 2473 .ndo_set_rx_mode = efx_set_rx_mode, 2474 .ndo_set_features = efx_set_features, 2475 .ndo_vlan_rx_add_vid = efx_vlan_rx_add_vid, 2476 .ndo_vlan_rx_kill_vid = efx_vlan_rx_kill_vid, 2477 #ifdef CONFIG_SFC_SRIOV 2478 .ndo_set_vf_mac = efx_sriov_set_vf_mac, 2479 .ndo_set_vf_vlan = efx_sriov_set_vf_vlan, 2480 .ndo_set_vf_spoofchk = efx_sriov_set_vf_spoofchk, 2481 .ndo_get_vf_config = efx_sriov_get_vf_config, 2482 .ndo_set_vf_link_state = efx_sriov_set_vf_link_state, 2483 #endif 2484 .ndo_get_phys_port_id = efx_get_phys_port_id, 2485 .ndo_get_phys_port_name = efx_get_phys_port_name, 2486 .ndo_setup_tc = efx_setup_tc, 2487 #ifdef CONFIG_RFS_ACCEL 2488 .ndo_rx_flow_steer = efx_filter_rfs, 2489 #endif 2490 .ndo_udp_tunnel_add = efx_udp_tunnel_add, 2491 .ndo_udp_tunnel_del = efx_udp_tunnel_del, 2492 }; 2493 2494 static void efx_update_name(struct efx_nic *efx) 2495 { 2496 strcpy(efx->name, efx->net_dev->name); 2497 efx_mtd_rename(efx); 2498 efx_set_channel_names(efx); 2499 } 2500 2501 static int efx_netdev_event(struct notifier_block *this, 2502 unsigned long event, void *ptr) 2503 { 2504 struct net_device *net_dev = netdev_notifier_info_to_dev(ptr); 2505 2506 if ((net_dev->netdev_ops == &efx_netdev_ops) && 2507 event == NETDEV_CHANGENAME) 2508 efx_update_name(netdev_priv(net_dev)); 2509 2510 return NOTIFY_DONE; 2511 } 2512 2513 static struct notifier_block efx_netdev_notifier = { 2514 .notifier_call = efx_netdev_event, 2515 }; 2516 2517 static ssize_t 2518 show_phy_type(struct device *dev, struct device_attribute *attr, char *buf) 2519 { 2520 struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev)); 2521 return sprintf(buf, "%d\n", efx->phy_type); 2522 } 2523 static DEVICE_ATTR(phy_type, 0444, show_phy_type, NULL); 2524 2525 #ifdef CONFIG_SFC_MCDI_LOGGING 2526 static ssize_t show_mcdi_log(struct device *dev, struct device_attribute *attr, 2527 char *buf) 2528 { 2529 struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev)); 2530 struct efx_mcdi_iface *mcdi = efx_mcdi(efx); 2531 2532 return scnprintf(buf, PAGE_SIZE, "%d\n", mcdi->logging_enabled); 2533 } 2534 static ssize_t set_mcdi_log(struct device *dev, struct device_attribute *attr, 2535 const char *buf, size_t count) 2536 { 2537 struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev)); 2538 struct efx_mcdi_iface *mcdi = efx_mcdi(efx); 2539 bool enable = count > 0 && *buf != '0'; 2540 2541 mcdi->logging_enabled = enable; 2542 return count; 2543 } 2544 static DEVICE_ATTR(mcdi_logging, 0644, show_mcdi_log, set_mcdi_log); 2545 #endif 2546 2547 static int efx_register_netdev(struct efx_nic *efx) 2548 { 2549 struct net_device *net_dev = efx->net_dev; 2550 struct efx_channel *channel; 2551 int rc; 2552 2553 net_dev->watchdog_timeo = 5 * HZ; 2554 net_dev->irq = efx->pci_dev->irq; 2555 net_dev->netdev_ops = &efx_netdev_ops; 2556 if (efx_nic_rev(efx) >= EFX_REV_HUNT_A0) 2557 net_dev->priv_flags |= IFF_UNICAST_FLT; 2558 net_dev->ethtool_ops = &efx_ethtool_ops; 2559 net_dev->gso_max_segs = EFX_TSO_MAX_SEGS; 2560 net_dev->min_mtu = EFX_MIN_MTU; 2561 net_dev->max_mtu = EFX_MAX_MTU; 2562 2563 rtnl_lock(); 2564 2565 /* Enable resets to be scheduled and check whether any were 2566 * already requested. If so, the NIC is probably hosed so we 2567 * abort. 2568 */ 2569 efx->state = STATE_READY; 2570 smp_mb(); /* ensure we change state before checking reset_pending */ 2571 if (efx->reset_pending) { 2572 netif_err(efx, probe, efx->net_dev, 2573 "aborting probe due to scheduled reset\n"); 2574 rc = -EIO; 2575 goto fail_locked; 2576 } 2577 2578 rc = dev_alloc_name(net_dev, net_dev->name); 2579 if (rc < 0) 2580 goto fail_locked; 2581 efx_update_name(efx); 2582 2583 /* Always start with carrier off; PHY events will detect the link */ 2584 netif_carrier_off(net_dev); 2585 2586 rc = register_netdevice(net_dev); 2587 if (rc) 2588 goto fail_locked; 2589 2590 efx_for_each_channel(channel, efx) { 2591 struct efx_tx_queue *tx_queue; 2592 efx_for_each_channel_tx_queue(tx_queue, channel) 2593 efx_init_tx_queue_core_txq(tx_queue); 2594 } 2595 2596 efx_associate(efx); 2597 2598 rtnl_unlock(); 2599 2600 rc = device_create_file(&efx->pci_dev->dev, &dev_attr_phy_type); 2601 if (rc) { 2602 netif_err(efx, drv, efx->net_dev, 2603 "failed to init net dev attributes\n"); 2604 goto fail_registered; 2605 } 2606 #ifdef CONFIG_SFC_MCDI_LOGGING 2607 rc = device_create_file(&efx->pci_dev->dev, &dev_attr_mcdi_logging); 2608 if (rc) { 2609 netif_err(efx, drv, efx->net_dev, 2610 "failed to init net dev attributes\n"); 2611 goto fail_attr_mcdi_logging; 2612 } 2613 #endif 2614 2615 return 0; 2616 2617 #ifdef CONFIG_SFC_MCDI_LOGGING 2618 fail_attr_mcdi_logging: 2619 device_remove_file(&efx->pci_dev->dev, &dev_attr_phy_type); 2620 #endif 2621 fail_registered: 2622 rtnl_lock(); 2623 efx_dissociate(efx); 2624 unregister_netdevice(net_dev); 2625 fail_locked: 2626 efx->state = STATE_UNINIT; 2627 rtnl_unlock(); 2628 netif_err(efx, drv, efx->net_dev, "could not register net dev\n"); 2629 return rc; 2630 } 2631 2632 static void efx_unregister_netdev(struct efx_nic *efx) 2633 { 2634 if (!efx->net_dev) 2635 return; 2636 2637 BUG_ON(netdev_priv(efx->net_dev) != efx); 2638 2639 if (efx_dev_registered(efx)) { 2640 strlcpy(efx->name, pci_name(efx->pci_dev), sizeof(efx->name)); 2641 #ifdef CONFIG_SFC_MCDI_LOGGING 2642 device_remove_file(&efx->pci_dev->dev, &dev_attr_mcdi_logging); 2643 #endif 2644 device_remove_file(&efx->pci_dev->dev, &dev_attr_phy_type); 2645 unregister_netdev(efx->net_dev); 2646 } 2647 } 2648 2649 /************************************************************************** 2650 * 2651 * Device reset and suspend 2652 * 2653 **************************************************************************/ 2654 2655 /* Tears down the entire software state and most of the hardware state 2656 * before reset. */ 2657 void efx_reset_down(struct efx_nic *efx, enum reset_type method) 2658 { 2659 EFX_ASSERT_RESET_SERIALISED(efx); 2660 2661 if (method == RESET_TYPE_MCDI_TIMEOUT) 2662 efx->type->prepare_flr(efx); 2663 2664 efx_stop_all(efx); 2665 efx_disable_interrupts(efx); 2666 2667 mutex_lock(&efx->mac_lock); 2668 down_write(&efx->filter_sem); 2669 mutex_lock(&efx->rss_lock); 2670 if (efx->port_initialized && method != RESET_TYPE_INVISIBLE && 2671 method != RESET_TYPE_DATAPATH) 2672 efx->phy_op->fini(efx); 2673 efx->type->fini(efx); 2674 } 2675 2676 /* This function will always ensure that the locks acquired in 2677 * efx_reset_down() are released. A failure return code indicates 2678 * that we were unable to reinitialise the hardware, and the 2679 * driver should be disabled. If ok is false, then the rx and tx 2680 * engines are not restarted, pending a RESET_DISABLE. */ 2681 int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok) 2682 { 2683 int rc; 2684 2685 EFX_ASSERT_RESET_SERIALISED(efx); 2686 2687 if (method == RESET_TYPE_MCDI_TIMEOUT) 2688 efx->type->finish_flr(efx); 2689 2690 /* Ensure that SRAM is initialised even if we're disabling the device */ 2691 rc = efx->type->init(efx); 2692 if (rc) { 2693 netif_err(efx, drv, efx->net_dev, "failed to initialise NIC\n"); 2694 goto fail; 2695 } 2696 2697 if (!ok) 2698 goto fail; 2699 2700 if (efx->port_initialized && method != RESET_TYPE_INVISIBLE && 2701 method != RESET_TYPE_DATAPATH) { 2702 rc = efx->phy_op->init(efx); 2703 if (rc) 2704 goto fail; 2705 rc = efx->phy_op->reconfigure(efx); 2706 if (rc && rc != -EPERM) 2707 netif_err(efx, drv, efx->net_dev, 2708 "could not restore PHY settings\n"); 2709 } 2710 2711 rc = efx_enable_interrupts(efx); 2712 if (rc) 2713 goto fail; 2714 2715 #ifdef CONFIG_SFC_SRIOV 2716 rc = efx->type->vswitching_restore(efx); 2717 if (rc) /* not fatal; the PF will still work fine */ 2718 netif_warn(efx, probe, efx->net_dev, 2719 "failed to restore vswitching rc=%d;" 2720 " VFs may not function\n", rc); 2721 #endif 2722 2723 if (efx->type->rx_restore_rss_contexts) 2724 efx->type->rx_restore_rss_contexts(efx); 2725 mutex_unlock(&efx->rss_lock); 2726 efx->type->filter_table_restore(efx); 2727 up_write(&efx->filter_sem); 2728 if (efx->type->sriov_reset) 2729 efx->type->sriov_reset(efx); 2730 2731 mutex_unlock(&efx->mac_lock); 2732 2733 efx_start_all(efx); 2734 2735 if (efx->type->udp_tnl_push_ports) 2736 efx->type->udp_tnl_push_ports(efx); 2737 2738 return 0; 2739 2740 fail: 2741 efx->port_initialized = false; 2742 2743 mutex_unlock(&efx->rss_lock); 2744 up_write(&efx->filter_sem); 2745 mutex_unlock(&efx->mac_lock); 2746 2747 return rc; 2748 } 2749 2750 /* Reset the NIC using the specified method. Note that the reset may 2751 * fail, in which case the card will be left in an unusable state. 2752 * 2753 * Caller must hold the rtnl_lock. 2754 */ 2755 int efx_reset(struct efx_nic *efx, enum reset_type method) 2756 { 2757 int rc, rc2; 2758 bool disabled; 2759 2760 netif_info(efx, drv, efx->net_dev, "resetting (%s)\n", 2761 RESET_TYPE(method)); 2762 2763 efx_device_detach_sync(efx); 2764 efx_reset_down(efx, method); 2765 2766 rc = efx->type->reset(efx, method); 2767 if (rc) { 2768 netif_err(efx, drv, efx->net_dev, "failed to reset hardware\n"); 2769 goto out; 2770 } 2771 2772 /* Clear flags for the scopes we covered. We assume the NIC and 2773 * driver are now quiescent so that there is no race here. 2774 */ 2775 if (method < RESET_TYPE_MAX_METHOD) 2776 efx->reset_pending &= -(1 << (method + 1)); 2777 else /* it doesn't fit into the well-ordered scope hierarchy */ 2778 __clear_bit(method, &efx->reset_pending); 2779 2780 /* Reinitialise bus-mastering, which may have been turned off before 2781 * the reset was scheduled. This is still appropriate, even in the 2782 * RESET_TYPE_DISABLE since this driver generally assumes the hardware 2783 * can respond to requests. */ 2784 pci_set_master(efx->pci_dev); 2785 2786 out: 2787 /* Leave device stopped if necessary */ 2788 disabled = rc || 2789 method == RESET_TYPE_DISABLE || 2790 method == RESET_TYPE_RECOVER_OR_DISABLE; 2791 rc2 = efx_reset_up(efx, method, !disabled); 2792 if (rc2) { 2793 disabled = true; 2794 if (!rc) 2795 rc = rc2; 2796 } 2797 2798 if (disabled) { 2799 dev_close(efx->net_dev); 2800 netif_err(efx, drv, efx->net_dev, "has been disabled\n"); 2801 efx->state = STATE_DISABLED; 2802 } else { 2803 netif_dbg(efx, drv, efx->net_dev, "reset complete\n"); 2804 efx_device_attach_if_not_resetting(efx); 2805 } 2806 return rc; 2807 } 2808 2809 /* Try recovery mechanisms. 2810 * For now only EEH is supported. 2811 * Returns 0 if the recovery mechanisms are unsuccessful. 2812 * Returns a non-zero value otherwise. 2813 */ 2814 int efx_try_recovery(struct efx_nic *efx) 2815 { 2816 #ifdef CONFIG_EEH 2817 /* A PCI error can occur and not be seen by EEH because nothing 2818 * happens on the PCI bus. In this case the driver may fail and 2819 * schedule a 'recover or reset', leading to this recovery handler. 2820 * Manually call the eeh failure check function. 2821 */ 2822 struct eeh_dev *eehdev = pci_dev_to_eeh_dev(efx->pci_dev); 2823 if (eeh_dev_check_failure(eehdev)) { 2824 /* The EEH mechanisms will handle the error and reset the 2825 * device if necessary. 2826 */ 2827 return 1; 2828 } 2829 #endif 2830 return 0; 2831 } 2832 2833 static void efx_wait_for_bist_end(struct efx_nic *efx) 2834 { 2835 int i; 2836 2837 for (i = 0; i < BIST_WAIT_DELAY_COUNT; ++i) { 2838 if (efx_mcdi_poll_reboot(efx)) 2839 goto out; 2840 msleep(BIST_WAIT_DELAY_MS); 2841 } 2842 2843 netif_err(efx, drv, efx->net_dev, "Warning: No MC reboot after BIST mode\n"); 2844 out: 2845 /* Either way unset the BIST flag. If we found no reboot we probably 2846 * won't recover, but we should try. 2847 */ 2848 efx->mc_bist_for_other_fn = false; 2849 } 2850 2851 /* The worker thread exists so that code that cannot sleep can 2852 * schedule a reset for later. 2853 */ 2854 static void efx_reset_work(struct work_struct *data) 2855 { 2856 struct efx_nic *efx = container_of(data, struct efx_nic, reset_work); 2857 unsigned long pending; 2858 enum reset_type method; 2859 2860 pending = READ_ONCE(efx->reset_pending); 2861 method = fls(pending) - 1; 2862 2863 if (method == RESET_TYPE_MC_BIST) 2864 efx_wait_for_bist_end(efx); 2865 2866 if ((method == RESET_TYPE_RECOVER_OR_DISABLE || 2867 method == RESET_TYPE_RECOVER_OR_ALL) && 2868 efx_try_recovery(efx)) 2869 return; 2870 2871 if (!pending) 2872 return; 2873 2874 rtnl_lock(); 2875 2876 /* We checked the state in efx_schedule_reset() but it may 2877 * have changed by now. Now that we have the RTNL lock, 2878 * it cannot change again. 2879 */ 2880 if (efx->state == STATE_READY) 2881 (void)efx_reset(efx, method); 2882 2883 rtnl_unlock(); 2884 } 2885 2886 void efx_schedule_reset(struct efx_nic *efx, enum reset_type type) 2887 { 2888 enum reset_type method; 2889 2890 if (efx->state == STATE_RECOVERY) { 2891 netif_dbg(efx, drv, efx->net_dev, 2892 "recovering: skip scheduling %s reset\n", 2893 RESET_TYPE(type)); 2894 return; 2895 } 2896 2897 switch (type) { 2898 case RESET_TYPE_INVISIBLE: 2899 case RESET_TYPE_ALL: 2900 case RESET_TYPE_RECOVER_OR_ALL: 2901 case RESET_TYPE_WORLD: 2902 case RESET_TYPE_DISABLE: 2903 case RESET_TYPE_RECOVER_OR_DISABLE: 2904 case RESET_TYPE_DATAPATH: 2905 case RESET_TYPE_MC_BIST: 2906 case RESET_TYPE_MCDI_TIMEOUT: 2907 method = type; 2908 netif_dbg(efx, drv, efx->net_dev, "scheduling %s reset\n", 2909 RESET_TYPE(method)); 2910 break; 2911 default: 2912 method = efx->type->map_reset_reason(type); 2913 netif_dbg(efx, drv, efx->net_dev, 2914 "scheduling %s reset for %s\n", 2915 RESET_TYPE(method), RESET_TYPE(type)); 2916 break; 2917 } 2918 2919 set_bit(method, &efx->reset_pending); 2920 smp_mb(); /* ensure we change reset_pending before checking state */ 2921 2922 /* If we're not READY then just leave the flags set as the cue 2923 * to abort probing or reschedule the reset later. 2924 */ 2925 if (READ_ONCE(efx->state) != STATE_READY) 2926 return; 2927 2928 /* efx_process_channel() will no longer read events once a 2929 * reset is scheduled. So switch back to poll'd MCDI completions. */ 2930 efx_mcdi_mode_poll(efx); 2931 2932 queue_work(reset_workqueue, &efx->reset_work); 2933 } 2934 2935 /************************************************************************** 2936 * 2937 * List of NICs we support 2938 * 2939 **************************************************************************/ 2940 2941 /* PCI device ID table */ 2942 static const struct pci_device_id efx_pci_table[] = { 2943 {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0803), /* SFC9020 */ 2944 .driver_data = (unsigned long) &siena_a0_nic_type}, 2945 {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0813), /* SFL9021 */ 2946 .driver_data = (unsigned long) &siena_a0_nic_type}, 2947 {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0903), /* SFC9120 PF */ 2948 .driver_data = (unsigned long) &efx_hunt_a0_nic_type}, 2949 {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x1903), /* SFC9120 VF */ 2950 .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type}, 2951 {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0923), /* SFC9140 PF */ 2952 .driver_data = (unsigned long) &efx_hunt_a0_nic_type}, 2953 {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x1923), /* SFC9140 VF */ 2954 .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type}, 2955 {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0a03), /* SFC9220 PF */ 2956 .driver_data = (unsigned long) &efx_hunt_a0_nic_type}, 2957 {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x1a03), /* SFC9220 VF */ 2958 .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type}, 2959 {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0b03), /* SFC9250 PF */ 2960 .driver_data = (unsigned long) &efx_hunt_a0_nic_type}, 2961 {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x1b03), /* SFC9250 VF */ 2962 .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type}, 2963 {0} /* end of list */ 2964 }; 2965 2966 /************************************************************************** 2967 * 2968 * Dummy PHY/MAC operations 2969 * 2970 * Can be used for some unimplemented operations 2971 * Needed so all function pointers are valid and do not have to be tested 2972 * before use 2973 * 2974 **************************************************************************/ 2975 int efx_port_dummy_op_int(struct efx_nic *efx) 2976 { 2977 return 0; 2978 } 2979 void efx_port_dummy_op_void(struct efx_nic *efx) {} 2980 2981 static bool efx_port_dummy_op_poll(struct efx_nic *efx) 2982 { 2983 return false; 2984 } 2985 2986 static const struct efx_phy_operations efx_dummy_phy_operations = { 2987 .init = efx_port_dummy_op_int, 2988 .reconfigure = efx_port_dummy_op_int, 2989 .poll = efx_port_dummy_op_poll, 2990 .fini = efx_port_dummy_op_void, 2991 }; 2992 2993 /************************************************************************** 2994 * 2995 * Data housekeeping 2996 * 2997 **************************************************************************/ 2998 2999 /* This zeroes out and then fills in the invariants in a struct 3000 * efx_nic (including all sub-structures). 3001 */ 3002 static int efx_init_struct(struct efx_nic *efx, 3003 struct pci_dev *pci_dev, struct net_device *net_dev) 3004 { 3005 int rc = -ENOMEM, i; 3006 3007 /* Initialise common structures */ 3008 INIT_LIST_HEAD(&efx->node); 3009 INIT_LIST_HEAD(&efx->secondary_list); 3010 spin_lock_init(&efx->biu_lock); 3011 #ifdef CONFIG_SFC_MTD 3012 INIT_LIST_HEAD(&efx->mtd_list); 3013 #endif 3014 INIT_WORK(&efx->reset_work, efx_reset_work); 3015 INIT_DELAYED_WORK(&efx->monitor_work, efx_monitor); 3016 INIT_DELAYED_WORK(&efx->selftest_work, efx_selftest_async_work); 3017 efx->pci_dev = pci_dev; 3018 efx->msg_enable = debug; 3019 efx->state = STATE_UNINIT; 3020 strlcpy(efx->name, pci_name(pci_dev), sizeof(efx->name)); 3021 3022 efx->net_dev = net_dev; 3023 efx->rx_prefix_size = efx->type->rx_prefix_size; 3024 efx->rx_ip_align = 3025 NET_IP_ALIGN ? (efx->rx_prefix_size + NET_IP_ALIGN) % 4 : 0; 3026 efx->rx_packet_hash_offset = 3027 efx->type->rx_hash_offset - efx->type->rx_prefix_size; 3028 efx->rx_packet_ts_offset = 3029 efx->type->rx_ts_offset - efx->type->rx_prefix_size; 3030 INIT_LIST_HEAD(&efx->rss_context.list); 3031 mutex_init(&efx->rss_lock); 3032 spin_lock_init(&efx->stats_lock); 3033 efx->vi_stride = EFX_DEFAULT_VI_STRIDE; 3034 efx->num_mac_stats = MC_CMD_MAC_NSTATS; 3035 BUILD_BUG_ON(MC_CMD_MAC_NSTATS - 1 != MC_CMD_MAC_GENERATION_END); 3036 mutex_init(&efx->mac_lock); 3037 #ifdef CONFIG_RFS_ACCEL 3038 mutex_init(&efx->rps_mutex); 3039 spin_lock_init(&efx->rps_hash_lock); 3040 /* Failure to allocate is not fatal, but may degrade ARFS performance */ 3041 efx->rps_hash_table = kcalloc(EFX_ARFS_HASH_TABLE_SIZE, 3042 sizeof(*efx->rps_hash_table), GFP_KERNEL); 3043 #endif 3044 efx->phy_op = &efx_dummy_phy_operations; 3045 efx->mdio.dev = net_dev; 3046 INIT_WORK(&efx->mac_work, efx_mac_work); 3047 init_waitqueue_head(&efx->flush_wq); 3048 3049 for (i = 0; i < EFX_MAX_CHANNELS; i++) { 3050 efx->channel[i] = efx_alloc_channel(efx, i, NULL); 3051 if (!efx->channel[i]) 3052 goto fail; 3053 efx->msi_context[i].efx = efx; 3054 efx->msi_context[i].index = i; 3055 } 3056 3057 /* Higher numbered interrupt modes are less capable! */ 3058 if (WARN_ON_ONCE(efx->type->max_interrupt_mode > 3059 efx->type->min_interrupt_mode)) { 3060 rc = -EIO; 3061 goto fail; 3062 } 3063 efx->interrupt_mode = max(efx->type->max_interrupt_mode, 3064 interrupt_mode); 3065 efx->interrupt_mode = min(efx->type->min_interrupt_mode, 3066 interrupt_mode); 3067 3068 /* Would be good to use the net_dev name, but we're too early */ 3069 snprintf(efx->workqueue_name, sizeof(efx->workqueue_name), "sfc%s", 3070 pci_name(pci_dev)); 3071 efx->workqueue = create_singlethread_workqueue(efx->workqueue_name); 3072 if (!efx->workqueue) 3073 goto fail; 3074 3075 return 0; 3076 3077 fail: 3078 efx_fini_struct(efx); 3079 return rc; 3080 } 3081 3082 static void efx_fini_struct(struct efx_nic *efx) 3083 { 3084 int i; 3085 3086 #ifdef CONFIG_RFS_ACCEL 3087 kfree(efx->rps_hash_table); 3088 #endif 3089 3090 for (i = 0; i < EFX_MAX_CHANNELS; i++) 3091 kfree(efx->channel[i]); 3092 3093 kfree(efx->vpd_sn); 3094 3095 if (efx->workqueue) { 3096 destroy_workqueue(efx->workqueue); 3097 efx->workqueue = NULL; 3098 } 3099 } 3100 3101 void efx_update_sw_stats(struct efx_nic *efx, u64 *stats) 3102 { 3103 u64 n_rx_nodesc_trunc = 0; 3104 struct efx_channel *channel; 3105 3106 efx_for_each_channel(channel, efx) 3107 n_rx_nodesc_trunc += channel->n_rx_nodesc_trunc; 3108 stats[GENERIC_STAT_rx_nodesc_trunc] = n_rx_nodesc_trunc; 3109 stats[GENERIC_STAT_rx_noskb_drops] = atomic_read(&efx->n_rx_noskb_drops); 3110 } 3111 3112 bool efx_filter_spec_equal(const struct efx_filter_spec *left, 3113 const struct efx_filter_spec *right) 3114 { 3115 if ((left->match_flags ^ right->match_flags) | 3116 ((left->flags ^ right->flags) & 3117 (EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_TX))) 3118 return false; 3119 3120 return memcmp(&left->outer_vid, &right->outer_vid, 3121 sizeof(struct efx_filter_spec) - 3122 offsetof(struct efx_filter_spec, outer_vid)) == 0; 3123 } 3124 3125 u32 efx_filter_spec_hash(const struct efx_filter_spec *spec) 3126 { 3127 BUILD_BUG_ON(offsetof(struct efx_filter_spec, outer_vid) & 3); 3128 return jhash2((const u32 *)&spec->outer_vid, 3129 (sizeof(struct efx_filter_spec) - 3130 offsetof(struct efx_filter_spec, outer_vid)) / 4, 3131 0); 3132 } 3133 3134 #ifdef CONFIG_RFS_ACCEL 3135 bool efx_rps_check_rule(struct efx_arfs_rule *rule, unsigned int filter_idx, 3136 bool *force) 3137 { 3138 if (rule->filter_id == EFX_ARFS_FILTER_ID_PENDING) { 3139 /* ARFS is currently updating this entry, leave it */ 3140 return false; 3141 } 3142 if (rule->filter_id == EFX_ARFS_FILTER_ID_ERROR) { 3143 /* ARFS tried and failed to update this, so it's probably out 3144 * of date. Remove the filter and the ARFS rule entry. 3145 */ 3146 rule->filter_id = EFX_ARFS_FILTER_ID_REMOVING; 3147 *force = true; 3148 return true; 3149 } else if (WARN_ON(rule->filter_id != filter_idx)) { /* can't happen */ 3150 /* ARFS has moved on, so old filter is not needed. Since we did 3151 * not mark the rule with EFX_ARFS_FILTER_ID_REMOVING, it will 3152 * not be removed by efx_rps_hash_del() subsequently. 3153 */ 3154 *force = true; 3155 return true; 3156 } 3157 /* Remove it iff ARFS wants to. */ 3158 return true; 3159 } 3160 3161 static 3162 struct hlist_head *efx_rps_hash_bucket(struct efx_nic *efx, 3163 const struct efx_filter_spec *spec) 3164 { 3165 u32 hash = efx_filter_spec_hash(spec); 3166 3167 lockdep_assert_held(&efx->rps_hash_lock); 3168 if (!efx->rps_hash_table) 3169 return NULL; 3170 return &efx->rps_hash_table[hash % EFX_ARFS_HASH_TABLE_SIZE]; 3171 } 3172 3173 struct efx_arfs_rule *efx_rps_hash_find(struct efx_nic *efx, 3174 const struct efx_filter_spec *spec) 3175 { 3176 struct efx_arfs_rule *rule; 3177 struct hlist_head *head; 3178 struct hlist_node *node; 3179 3180 head = efx_rps_hash_bucket(efx, spec); 3181 if (!head) 3182 return NULL; 3183 hlist_for_each(node, head) { 3184 rule = container_of(node, struct efx_arfs_rule, node); 3185 if (efx_filter_spec_equal(spec, &rule->spec)) 3186 return rule; 3187 } 3188 return NULL; 3189 } 3190 3191 struct efx_arfs_rule *efx_rps_hash_add(struct efx_nic *efx, 3192 const struct efx_filter_spec *spec, 3193 bool *new) 3194 { 3195 struct efx_arfs_rule *rule; 3196 struct hlist_head *head; 3197 struct hlist_node *node; 3198 3199 head = efx_rps_hash_bucket(efx, spec); 3200 if (!head) 3201 return NULL; 3202 hlist_for_each(node, head) { 3203 rule = container_of(node, struct efx_arfs_rule, node); 3204 if (efx_filter_spec_equal(spec, &rule->spec)) { 3205 *new = false; 3206 return rule; 3207 } 3208 } 3209 rule = kmalloc(sizeof(*rule), GFP_ATOMIC); 3210 *new = true; 3211 if (rule) { 3212 memcpy(&rule->spec, spec, sizeof(rule->spec)); 3213 hlist_add_head(&rule->node, head); 3214 } 3215 return rule; 3216 } 3217 3218 void efx_rps_hash_del(struct efx_nic *efx, const struct efx_filter_spec *spec) 3219 { 3220 struct efx_arfs_rule *rule; 3221 struct hlist_head *head; 3222 struct hlist_node *node; 3223 3224 head = efx_rps_hash_bucket(efx, spec); 3225 if (WARN_ON(!head)) 3226 return; 3227 hlist_for_each(node, head) { 3228 rule = container_of(node, struct efx_arfs_rule, node); 3229 if (efx_filter_spec_equal(spec, &rule->spec)) { 3230 /* Someone already reused the entry. We know that if 3231 * this check doesn't fire (i.e. filter_id == REMOVING) 3232 * then the REMOVING mark was put there by our caller, 3233 * because caller is holding a lock on filter table and 3234 * only holders of that lock set REMOVING. 3235 */ 3236 if (rule->filter_id != EFX_ARFS_FILTER_ID_REMOVING) 3237 return; 3238 hlist_del(node); 3239 kfree(rule); 3240 return; 3241 } 3242 } 3243 /* We didn't find it. */ 3244 WARN_ON(1); 3245 } 3246 #endif 3247 3248 /* RSS contexts. We're using linked lists and crappy O(n) algorithms, because 3249 * (a) this is an infrequent control-plane operation and (b) n is small (max 64) 3250 */ 3251 struct efx_rss_context *efx_alloc_rss_context_entry(struct efx_nic *efx) 3252 { 3253 struct list_head *head = &efx->rss_context.list; 3254 struct efx_rss_context *ctx, *new; 3255 u32 id = 1; /* Don't use zero, that refers to the master RSS context */ 3256 3257 WARN_ON(!mutex_is_locked(&efx->rss_lock)); 3258 3259 /* Search for first gap in the numbering */ 3260 list_for_each_entry(ctx, head, list) { 3261 if (ctx->user_id != id) 3262 break; 3263 id++; 3264 /* Check for wrap. If this happens, we have nearly 2^32 3265 * allocated RSS contexts, which seems unlikely. 3266 */ 3267 if (WARN_ON_ONCE(!id)) 3268 return NULL; 3269 } 3270 3271 /* Create the new entry */ 3272 new = kmalloc(sizeof(struct efx_rss_context), GFP_KERNEL); 3273 if (!new) 3274 return NULL; 3275 new->context_id = EFX_EF10_RSS_CONTEXT_INVALID; 3276 new->rx_hash_udp_4tuple = false; 3277 3278 /* Insert the new entry into the gap */ 3279 new->user_id = id; 3280 list_add_tail(&new->list, &ctx->list); 3281 return new; 3282 } 3283 3284 struct efx_rss_context *efx_find_rss_context_entry(struct efx_nic *efx, u32 id) 3285 { 3286 struct list_head *head = &efx->rss_context.list; 3287 struct efx_rss_context *ctx; 3288 3289 WARN_ON(!mutex_is_locked(&efx->rss_lock)); 3290 3291 list_for_each_entry(ctx, head, list) 3292 if (ctx->user_id == id) 3293 return ctx; 3294 return NULL; 3295 } 3296 3297 void efx_free_rss_context_entry(struct efx_rss_context *ctx) 3298 { 3299 list_del(&ctx->list); 3300 kfree(ctx); 3301 } 3302 3303 /************************************************************************** 3304 * 3305 * PCI interface 3306 * 3307 **************************************************************************/ 3308 3309 /* Main body of final NIC shutdown code 3310 * This is called only at module unload (or hotplug removal). 3311 */ 3312 static void efx_pci_remove_main(struct efx_nic *efx) 3313 { 3314 /* Flush reset_work. It can no longer be scheduled since we 3315 * are not READY. 3316 */ 3317 BUG_ON(efx->state == STATE_READY); 3318 cancel_work_sync(&efx->reset_work); 3319 3320 efx_disable_interrupts(efx); 3321 efx_clear_interrupt_affinity(efx); 3322 efx_nic_fini_interrupt(efx); 3323 efx_fini_port(efx); 3324 efx->type->fini(efx); 3325 efx_fini_napi(efx); 3326 efx_remove_all(efx); 3327 } 3328 3329 /* Final NIC shutdown 3330 * This is called only at module unload (or hotplug removal). A PF can call 3331 * this on its VFs to ensure they are unbound first. 3332 */ 3333 static void efx_pci_remove(struct pci_dev *pci_dev) 3334 { 3335 struct efx_nic *efx; 3336 3337 efx = pci_get_drvdata(pci_dev); 3338 if (!efx) 3339 return; 3340 3341 /* Mark the NIC as fini, then stop the interface */ 3342 rtnl_lock(); 3343 efx_dissociate(efx); 3344 dev_close(efx->net_dev); 3345 efx_disable_interrupts(efx); 3346 efx->state = STATE_UNINIT; 3347 rtnl_unlock(); 3348 3349 if (efx->type->sriov_fini) 3350 efx->type->sriov_fini(efx); 3351 3352 efx_unregister_netdev(efx); 3353 3354 efx_mtd_remove(efx); 3355 3356 efx_pci_remove_main(efx); 3357 3358 efx_fini_io(efx); 3359 netif_dbg(efx, drv, efx->net_dev, "shutdown successful\n"); 3360 3361 efx_fini_struct(efx); 3362 free_netdev(efx->net_dev); 3363 3364 pci_disable_pcie_error_reporting(pci_dev); 3365 }; 3366 3367 /* NIC VPD information 3368 * Called during probe to display the part number of the 3369 * installed NIC. VPD is potentially very large but this should 3370 * always appear within the first 512 bytes. 3371 */ 3372 #define SFC_VPD_LEN 512 3373 static void efx_probe_vpd_strings(struct efx_nic *efx) 3374 { 3375 struct pci_dev *dev = efx->pci_dev; 3376 char vpd_data[SFC_VPD_LEN]; 3377 ssize_t vpd_size; 3378 int ro_start, ro_size, i, j; 3379 3380 /* Get the vpd data from the device */ 3381 vpd_size = pci_read_vpd(dev, 0, sizeof(vpd_data), vpd_data); 3382 if (vpd_size <= 0) { 3383 netif_err(efx, drv, efx->net_dev, "Unable to read VPD\n"); 3384 return; 3385 } 3386 3387 /* Get the Read only section */ 3388 ro_start = pci_vpd_find_tag(vpd_data, 0, vpd_size, PCI_VPD_LRDT_RO_DATA); 3389 if (ro_start < 0) { 3390 netif_err(efx, drv, efx->net_dev, "VPD Read-only not found\n"); 3391 return; 3392 } 3393 3394 ro_size = pci_vpd_lrdt_size(&vpd_data[ro_start]); 3395 j = ro_size; 3396 i = ro_start + PCI_VPD_LRDT_TAG_SIZE; 3397 if (i + j > vpd_size) 3398 j = vpd_size - i; 3399 3400 /* Get the Part number */ 3401 i = pci_vpd_find_info_keyword(vpd_data, i, j, "PN"); 3402 if (i < 0) { 3403 netif_err(efx, drv, efx->net_dev, "Part number not found\n"); 3404 return; 3405 } 3406 3407 j = pci_vpd_info_field_size(&vpd_data[i]); 3408 i += PCI_VPD_INFO_FLD_HDR_SIZE; 3409 if (i + j > vpd_size) { 3410 netif_err(efx, drv, efx->net_dev, "Incomplete part number\n"); 3411 return; 3412 } 3413 3414 netif_info(efx, drv, efx->net_dev, 3415 "Part Number : %.*s\n", j, &vpd_data[i]); 3416 3417 i = ro_start + PCI_VPD_LRDT_TAG_SIZE; 3418 j = ro_size; 3419 i = pci_vpd_find_info_keyword(vpd_data, i, j, "SN"); 3420 if (i < 0) { 3421 netif_err(efx, drv, efx->net_dev, "Serial number not found\n"); 3422 return; 3423 } 3424 3425 j = pci_vpd_info_field_size(&vpd_data[i]); 3426 i += PCI_VPD_INFO_FLD_HDR_SIZE; 3427 if (i + j > vpd_size) { 3428 netif_err(efx, drv, efx->net_dev, "Incomplete serial number\n"); 3429 return; 3430 } 3431 3432 efx->vpd_sn = kmalloc(j + 1, GFP_KERNEL); 3433 if (!efx->vpd_sn) 3434 return; 3435 3436 snprintf(efx->vpd_sn, j + 1, "%s", &vpd_data[i]); 3437 } 3438 3439 3440 /* Main body of NIC initialisation 3441 * This is called at module load (or hotplug insertion, theoretically). 3442 */ 3443 static int efx_pci_probe_main(struct efx_nic *efx) 3444 { 3445 int rc; 3446 3447 /* Do start-of-day initialisation */ 3448 rc = efx_probe_all(efx); 3449 if (rc) 3450 goto fail1; 3451 3452 efx_init_napi(efx); 3453 3454 down_write(&efx->filter_sem); 3455 rc = efx->type->init(efx); 3456 up_write(&efx->filter_sem); 3457 if (rc) { 3458 netif_err(efx, probe, efx->net_dev, 3459 "failed to initialise NIC\n"); 3460 goto fail3; 3461 } 3462 3463 rc = efx_init_port(efx); 3464 if (rc) { 3465 netif_err(efx, probe, efx->net_dev, 3466 "failed to initialise port\n"); 3467 goto fail4; 3468 } 3469 3470 rc = efx_nic_init_interrupt(efx); 3471 if (rc) 3472 goto fail5; 3473 3474 efx_set_interrupt_affinity(efx); 3475 rc = efx_enable_interrupts(efx); 3476 if (rc) 3477 goto fail6; 3478 3479 return 0; 3480 3481 fail6: 3482 efx_clear_interrupt_affinity(efx); 3483 efx_nic_fini_interrupt(efx); 3484 fail5: 3485 efx_fini_port(efx); 3486 fail4: 3487 efx->type->fini(efx); 3488 fail3: 3489 efx_fini_napi(efx); 3490 efx_remove_all(efx); 3491 fail1: 3492 return rc; 3493 } 3494 3495 static int efx_pci_probe_post_io(struct efx_nic *efx) 3496 { 3497 struct net_device *net_dev = efx->net_dev; 3498 int rc = efx_pci_probe_main(efx); 3499 3500 if (rc) 3501 return rc; 3502 3503 if (efx->type->sriov_init) { 3504 rc = efx->type->sriov_init(efx); 3505 if (rc) 3506 netif_err(efx, probe, efx->net_dev, 3507 "SR-IOV can't be enabled rc %d\n", rc); 3508 } 3509 3510 /* Determine netdevice features */ 3511 net_dev->features |= (efx->type->offload_features | NETIF_F_SG | 3512 NETIF_F_TSO | NETIF_F_RXCSUM | NETIF_F_RXALL); 3513 if (efx->type->offload_features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM)) 3514 net_dev->features |= NETIF_F_TSO6; 3515 /* Check whether device supports TSO */ 3516 if (!efx->type->tso_versions || !efx->type->tso_versions(efx)) 3517 net_dev->features &= ~NETIF_F_ALL_TSO; 3518 /* Mask for features that also apply to VLAN devices */ 3519 net_dev->vlan_features |= (NETIF_F_HW_CSUM | NETIF_F_SG | 3520 NETIF_F_HIGHDMA | NETIF_F_ALL_TSO | 3521 NETIF_F_RXCSUM); 3522 3523 net_dev->hw_features |= net_dev->features & ~efx->fixed_features; 3524 3525 /* Disable receiving frames with bad FCS, by default. */ 3526 net_dev->features &= ~NETIF_F_RXALL; 3527 3528 /* Disable VLAN filtering by default. It may be enforced if 3529 * the feature is fixed (i.e. VLAN filters are required to 3530 * receive VLAN tagged packets due to vPort restrictions). 3531 */ 3532 net_dev->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER; 3533 net_dev->features |= efx->fixed_features; 3534 3535 rc = efx_register_netdev(efx); 3536 if (!rc) 3537 return 0; 3538 3539 efx_pci_remove_main(efx); 3540 return rc; 3541 } 3542 3543 /* NIC initialisation 3544 * 3545 * This is called at module load (or hotplug insertion, 3546 * theoretically). It sets up PCI mappings, resets the NIC, 3547 * sets up and registers the network devices with the kernel and hooks 3548 * the interrupt service routine. It does not prepare the device for 3549 * transmission; this is left to the first time one of the network 3550 * interfaces is brought up (i.e. efx_net_open). 3551 */ 3552 static int efx_pci_probe(struct pci_dev *pci_dev, 3553 const struct pci_device_id *entry) 3554 { 3555 struct net_device *net_dev; 3556 struct efx_nic *efx; 3557 int rc; 3558 3559 /* Allocate and initialise a struct net_device and struct efx_nic */ 3560 net_dev = alloc_etherdev_mqs(sizeof(*efx), EFX_MAX_CORE_TX_QUEUES, 3561 EFX_MAX_RX_QUEUES); 3562 if (!net_dev) 3563 return -ENOMEM; 3564 efx = netdev_priv(net_dev); 3565 efx->type = (const struct efx_nic_type *) entry->driver_data; 3566 efx->fixed_features |= NETIF_F_HIGHDMA; 3567 3568 pci_set_drvdata(pci_dev, efx); 3569 SET_NETDEV_DEV(net_dev, &pci_dev->dev); 3570 rc = efx_init_struct(efx, pci_dev, net_dev); 3571 if (rc) 3572 goto fail1; 3573 3574 netif_info(efx, probe, efx->net_dev, 3575 "Solarflare NIC detected\n"); 3576 3577 if (!efx->type->is_vf) 3578 efx_probe_vpd_strings(efx); 3579 3580 /* Set up basic I/O (BAR mappings etc) */ 3581 rc = efx_init_io(efx); 3582 if (rc) 3583 goto fail2; 3584 3585 rc = efx_pci_probe_post_io(efx); 3586 if (rc) { 3587 /* On failure, retry once immediately. 3588 * If we aborted probe due to a scheduled reset, dismiss it. 3589 */ 3590 efx->reset_pending = 0; 3591 rc = efx_pci_probe_post_io(efx); 3592 if (rc) { 3593 /* On another failure, retry once more 3594 * after a 50-305ms delay. 3595 */ 3596 unsigned char r; 3597 3598 get_random_bytes(&r, 1); 3599 msleep((unsigned int)r + 50); 3600 efx->reset_pending = 0; 3601 rc = efx_pci_probe_post_io(efx); 3602 } 3603 } 3604 if (rc) 3605 goto fail3; 3606 3607 netif_dbg(efx, probe, efx->net_dev, "initialisation successful\n"); 3608 3609 /* Try to create MTDs, but allow this to fail */ 3610 rtnl_lock(); 3611 rc = efx_mtd_probe(efx); 3612 rtnl_unlock(); 3613 if (rc && rc != -EPERM) 3614 netif_warn(efx, probe, efx->net_dev, 3615 "failed to create MTDs (%d)\n", rc); 3616 3617 (void)pci_enable_pcie_error_reporting(pci_dev); 3618 3619 if (efx->type->udp_tnl_push_ports) 3620 efx->type->udp_tnl_push_ports(efx); 3621 3622 return 0; 3623 3624 fail3: 3625 efx_fini_io(efx); 3626 fail2: 3627 efx_fini_struct(efx); 3628 fail1: 3629 WARN_ON(rc > 0); 3630 netif_dbg(efx, drv, efx->net_dev, "initialisation failed. rc=%d\n", rc); 3631 free_netdev(net_dev); 3632 return rc; 3633 } 3634 3635 /* efx_pci_sriov_configure returns the actual number of Virtual Functions 3636 * enabled on success 3637 */ 3638 #ifdef CONFIG_SFC_SRIOV 3639 static int efx_pci_sriov_configure(struct pci_dev *dev, int num_vfs) 3640 { 3641 int rc; 3642 struct efx_nic *efx = pci_get_drvdata(dev); 3643 3644 if (efx->type->sriov_configure) { 3645 rc = efx->type->sriov_configure(efx, num_vfs); 3646 if (rc) 3647 return rc; 3648 else 3649 return num_vfs; 3650 } else 3651 return -EOPNOTSUPP; 3652 } 3653 #endif 3654 3655 static int efx_pm_freeze(struct device *dev) 3656 { 3657 struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev)); 3658 3659 rtnl_lock(); 3660 3661 if (efx->state != STATE_DISABLED) { 3662 efx->state = STATE_UNINIT; 3663 3664 efx_device_detach_sync(efx); 3665 3666 efx_stop_all(efx); 3667 efx_disable_interrupts(efx); 3668 } 3669 3670 rtnl_unlock(); 3671 3672 return 0; 3673 } 3674 3675 static int efx_pm_thaw(struct device *dev) 3676 { 3677 int rc; 3678 struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev)); 3679 3680 rtnl_lock(); 3681 3682 if (efx->state != STATE_DISABLED) { 3683 rc = efx_enable_interrupts(efx); 3684 if (rc) 3685 goto fail; 3686 3687 mutex_lock(&efx->mac_lock); 3688 efx->phy_op->reconfigure(efx); 3689 mutex_unlock(&efx->mac_lock); 3690 3691 efx_start_all(efx); 3692 3693 efx_device_attach_if_not_resetting(efx); 3694 3695 efx->state = STATE_READY; 3696 3697 efx->type->resume_wol(efx); 3698 } 3699 3700 rtnl_unlock(); 3701 3702 /* Reschedule any quenched resets scheduled during efx_pm_freeze() */ 3703 queue_work(reset_workqueue, &efx->reset_work); 3704 3705 return 0; 3706 3707 fail: 3708 rtnl_unlock(); 3709 3710 return rc; 3711 } 3712 3713 static int efx_pm_poweroff(struct device *dev) 3714 { 3715 struct pci_dev *pci_dev = to_pci_dev(dev); 3716 struct efx_nic *efx = pci_get_drvdata(pci_dev); 3717 3718 efx->type->fini(efx); 3719 3720 efx->reset_pending = 0; 3721 3722 pci_save_state(pci_dev); 3723 return pci_set_power_state(pci_dev, PCI_D3hot); 3724 } 3725 3726 /* Used for both resume and restore */ 3727 static int efx_pm_resume(struct device *dev) 3728 { 3729 struct pci_dev *pci_dev = to_pci_dev(dev); 3730 struct efx_nic *efx = pci_get_drvdata(pci_dev); 3731 int rc; 3732 3733 rc = pci_set_power_state(pci_dev, PCI_D0); 3734 if (rc) 3735 return rc; 3736 pci_restore_state(pci_dev); 3737 rc = pci_enable_device(pci_dev); 3738 if (rc) 3739 return rc; 3740 pci_set_master(efx->pci_dev); 3741 rc = efx->type->reset(efx, RESET_TYPE_ALL); 3742 if (rc) 3743 return rc; 3744 down_write(&efx->filter_sem); 3745 rc = efx->type->init(efx); 3746 up_write(&efx->filter_sem); 3747 if (rc) 3748 return rc; 3749 rc = efx_pm_thaw(dev); 3750 return rc; 3751 } 3752 3753 static int efx_pm_suspend(struct device *dev) 3754 { 3755 int rc; 3756 3757 efx_pm_freeze(dev); 3758 rc = efx_pm_poweroff(dev); 3759 if (rc) 3760 efx_pm_resume(dev); 3761 return rc; 3762 } 3763 3764 static const struct dev_pm_ops efx_pm_ops = { 3765 .suspend = efx_pm_suspend, 3766 .resume = efx_pm_resume, 3767 .freeze = efx_pm_freeze, 3768 .thaw = efx_pm_thaw, 3769 .poweroff = efx_pm_poweroff, 3770 .restore = efx_pm_resume, 3771 }; 3772 3773 /* A PCI error affecting this device was detected. 3774 * At this point MMIO and DMA may be disabled. 3775 * Stop the software path and request a slot reset. 3776 */ 3777 static pci_ers_result_t efx_io_error_detected(struct pci_dev *pdev, 3778 enum pci_channel_state state) 3779 { 3780 pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED; 3781 struct efx_nic *efx = pci_get_drvdata(pdev); 3782 3783 if (state == pci_channel_io_perm_failure) 3784 return PCI_ERS_RESULT_DISCONNECT; 3785 3786 rtnl_lock(); 3787 3788 if (efx->state != STATE_DISABLED) { 3789 efx->state = STATE_RECOVERY; 3790 efx->reset_pending = 0; 3791 3792 efx_device_detach_sync(efx); 3793 3794 efx_stop_all(efx); 3795 efx_disable_interrupts(efx); 3796 3797 status = PCI_ERS_RESULT_NEED_RESET; 3798 } else { 3799 /* If the interface is disabled we don't want to do anything 3800 * with it. 3801 */ 3802 status = PCI_ERS_RESULT_RECOVERED; 3803 } 3804 3805 rtnl_unlock(); 3806 3807 pci_disable_device(pdev); 3808 3809 return status; 3810 } 3811 3812 /* Fake a successful reset, which will be performed later in efx_io_resume. */ 3813 static pci_ers_result_t efx_io_slot_reset(struct pci_dev *pdev) 3814 { 3815 struct efx_nic *efx = pci_get_drvdata(pdev); 3816 pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED; 3817 3818 if (pci_enable_device(pdev)) { 3819 netif_err(efx, hw, efx->net_dev, 3820 "Cannot re-enable PCI device after reset.\n"); 3821 status = PCI_ERS_RESULT_DISCONNECT; 3822 } 3823 3824 return status; 3825 } 3826 3827 /* Perform the actual reset and resume I/O operations. */ 3828 static void efx_io_resume(struct pci_dev *pdev) 3829 { 3830 struct efx_nic *efx = pci_get_drvdata(pdev); 3831 int rc; 3832 3833 rtnl_lock(); 3834 3835 if (efx->state == STATE_DISABLED) 3836 goto out; 3837 3838 rc = efx_reset(efx, RESET_TYPE_ALL); 3839 if (rc) { 3840 netif_err(efx, hw, efx->net_dev, 3841 "efx_reset failed after PCI error (%d)\n", rc); 3842 } else { 3843 efx->state = STATE_READY; 3844 netif_dbg(efx, hw, efx->net_dev, 3845 "Done resetting and resuming IO after PCI error.\n"); 3846 } 3847 3848 out: 3849 rtnl_unlock(); 3850 } 3851 3852 /* For simplicity and reliability, we always require a slot reset and try to 3853 * reset the hardware when a pci error affecting the device is detected. 3854 * We leave both the link_reset and mmio_enabled callback unimplemented: 3855 * with our request for slot reset the mmio_enabled callback will never be 3856 * called, and the link_reset callback is not used by AER or EEH mechanisms. 3857 */ 3858 static const struct pci_error_handlers efx_err_handlers = { 3859 .error_detected = efx_io_error_detected, 3860 .slot_reset = efx_io_slot_reset, 3861 .resume = efx_io_resume, 3862 }; 3863 3864 static struct pci_driver efx_pci_driver = { 3865 .name = KBUILD_MODNAME, 3866 .id_table = efx_pci_table, 3867 .probe = efx_pci_probe, 3868 .remove = efx_pci_remove, 3869 .driver.pm = &efx_pm_ops, 3870 .err_handler = &efx_err_handlers, 3871 #ifdef CONFIG_SFC_SRIOV 3872 .sriov_configure = efx_pci_sriov_configure, 3873 #endif 3874 }; 3875 3876 /************************************************************************** 3877 * 3878 * Kernel module interface 3879 * 3880 *************************************************************************/ 3881 3882 module_param(interrupt_mode, uint, 0444); 3883 MODULE_PARM_DESC(interrupt_mode, 3884 "Interrupt mode (0=>MSIX 1=>MSI 2=>legacy)"); 3885 3886 static int __init efx_init_module(void) 3887 { 3888 int rc; 3889 3890 printk(KERN_INFO "Solarflare NET driver v" EFX_DRIVER_VERSION "\n"); 3891 3892 rc = register_netdevice_notifier(&efx_netdev_notifier); 3893 if (rc) 3894 goto err_notifier; 3895 3896 #ifdef CONFIG_SFC_SRIOV 3897 rc = efx_init_sriov(); 3898 if (rc) 3899 goto err_sriov; 3900 #endif 3901 3902 reset_workqueue = create_singlethread_workqueue("sfc_reset"); 3903 if (!reset_workqueue) { 3904 rc = -ENOMEM; 3905 goto err_reset; 3906 } 3907 3908 rc = pci_register_driver(&efx_pci_driver); 3909 if (rc < 0) 3910 goto err_pci; 3911 3912 return 0; 3913 3914 err_pci: 3915 destroy_workqueue(reset_workqueue); 3916 err_reset: 3917 #ifdef CONFIG_SFC_SRIOV 3918 efx_fini_sriov(); 3919 err_sriov: 3920 #endif 3921 unregister_netdevice_notifier(&efx_netdev_notifier); 3922 err_notifier: 3923 return rc; 3924 } 3925 3926 static void __exit efx_exit_module(void) 3927 { 3928 printk(KERN_INFO "Solarflare NET driver unloading\n"); 3929 3930 pci_unregister_driver(&efx_pci_driver); 3931 destroy_workqueue(reset_workqueue); 3932 #ifdef CONFIG_SFC_SRIOV 3933 efx_fini_sriov(); 3934 #endif 3935 unregister_netdevice_notifier(&efx_netdev_notifier); 3936 3937 } 3938 3939 module_init(efx_init_module); 3940 module_exit(efx_exit_module); 3941 3942 MODULE_AUTHOR("Solarflare Communications and " 3943 "Michael Brown <mbrown@fensystems.co.uk>"); 3944 MODULE_DESCRIPTION("Solarflare network driver"); 3945 MODULE_LICENSE("GPL"); 3946 MODULE_DEVICE_TABLE(pci, efx_pci_table); 3947 MODULE_VERSION(EFX_DRIVER_VERSION); 3948