1 // SPDX-License-Identifier: GPL-2.0-only 2 /**************************************************************************** 3 * Driver for Solarflare network controllers and boards 4 * Copyright 2005-2006 Fen Systems Ltd. 5 * Copyright 2005-2013 Solarflare Communications Inc. 6 */ 7 8 #include <linux/module.h> 9 #include <linux/pci.h> 10 #include <linux/netdevice.h> 11 #include <linux/etherdevice.h> 12 #include <linux/delay.h> 13 #include <linux/notifier.h> 14 #include <linux/ip.h> 15 #include <linux/tcp.h> 16 #include <linux/in.h> 17 #include <linux/ethtool.h> 18 #include <linux/topology.h> 19 #include <linux/gfp.h> 20 #include <linux/aer.h> 21 #include <linux/interrupt.h> 22 #include "net_driver.h" 23 #include <net/gre.h> 24 #include <net/udp_tunnel.h> 25 #include "efx.h" 26 #include "efx_common.h" 27 #include "efx_channels.h" 28 #include "rx_common.h" 29 #include "tx_common.h" 30 #include "nic.h" 31 #include "io.h" 32 #include "selftest.h" 33 #include "sriov.h" 34 35 #include "mcdi.h" 36 #include "mcdi_pcol.h" 37 #include "workarounds.h" 38 39 /************************************************************************** 40 * 41 * Type name strings 42 * 43 ************************************************************************** 44 */ 45 46 /* Loopback mode names (see LOOPBACK_MODE()) */ 47 const unsigned int efx_loopback_mode_max = LOOPBACK_MAX; 48 const char *const efx_loopback_mode_names[] = { 49 [LOOPBACK_NONE] = "NONE", 50 [LOOPBACK_DATA] = "DATAPATH", 51 [LOOPBACK_GMAC] = "GMAC", 52 [LOOPBACK_XGMII] = "XGMII", 53 [LOOPBACK_XGXS] = "XGXS", 54 [LOOPBACK_XAUI] = "XAUI", 55 [LOOPBACK_GMII] = "GMII", 56 [LOOPBACK_SGMII] = "SGMII", 57 [LOOPBACK_XGBR] = "XGBR", 58 [LOOPBACK_XFI] = "XFI", 59 [LOOPBACK_XAUI_FAR] = "XAUI_FAR", 60 [LOOPBACK_GMII_FAR] = "GMII_FAR", 61 [LOOPBACK_SGMII_FAR] = "SGMII_FAR", 62 [LOOPBACK_XFI_FAR] = "XFI_FAR", 63 [LOOPBACK_GPHY] = "GPHY", 64 [LOOPBACK_PHYXS] = "PHYXS", 65 [LOOPBACK_PCS] = "PCS", 66 [LOOPBACK_PMAPMD] = "PMA/PMD", 67 [LOOPBACK_XPORT] = "XPORT", 68 [LOOPBACK_XGMII_WS] = "XGMII_WS", 69 [LOOPBACK_XAUI_WS] = "XAUI_WS", 70 [LOOPBACK_XAUI_WS_FAR] = "XAUI_WS_FAR", 71 [LOOPBACK_XAUI_WS_NEAR] = "XAUI_WS_NEAR", 72 [LOOPBACK_GMII_WS] = "GMII_WS", 73 [LOOPBACK_XFI_WS] = "XFI_WS", 74 [LOOPBACK_XFI_WS_FAR] = "XFI_WS_FAR", 75 [LOOPBACK_PHYXS_WS] = "PHYXS_WS", 76 }; 77 78 const unsigned int efx_reset_type_max = RESET_TYPE_MAX; 79 const char *const efx_reset_type_names[] = { 80 [RESET_TYPE_INVISIBLE] = "INVISIBLE", 81 [RESET_TYPE_ALL] = "ALL", 82 [RESET_TYPE_RECOVER_OR_ALL] = "RECOVER_OR_ALL", 83 [RESET_TYPE_WORLD] = "WORLD", 84 [RESET_TYPE_RECOVER_OR_DISABLE] = "RECOVER_OR_DISABLE", 85 [RESET_TYPE_DATAPATH] = "DATAPATH", 86 [RESET_TYPE_MC_BIST] = "MC_BIST", 87 [RESET_TYPE_DISABLE] = "DISABLE", 88 [RESET_TYPE_TX_WATCHDOG] = "TX_WATCHDOG", 89 [RESET_TYPE_INT_ERROR] = "INT_ERROR", 90 [RESET_TYPE_DMA_ERROR] = "DMA_ERROR", 91 [RESET_TYPE_TX_SKIP] = "TX_SKIP", 92 [RESET_TYPE_MC_FAILURE] = "MC_FAILURE", 93 [RESET_TYPE_MCDI_TIMEOUT] = "MCDI_TIMEOUT (FLR)", 94 }; 95 96 /* UDP tunnel type names */ 97 static const char *const efx_udp_tunnel_type_names[] = { 98 [TUNNEL_ENCAP_UDP_PORT_ENTRY_VXLAN] = "vxlan", 99 [TUNNEL_ENCAP_UDP_PORT_ENTRY_GENEVE] = "geneve", 100 }; 101 102 void efx_get_udp_tunnel_type_name(u16 type, char *buf, size_t buflen) 103 { 104 if (type < ARRAY_SIZE(efx_udp_tunnel_type_names) && 105 efx_udp_tunnel_type_names[type] != NULL) 106 snprintf(buf, buflen, "%s", efx_udp_tunnel_type_names[type]); 107 else 108 snprintf(buf, buflen, "type %d", type); 109 } 110 111 /* How often and how many times to poll for a reset while waiting for a 112 * BIST that another function started to complete. 113 */ 114 #define BIST_WAIT_DELAY_MS 100 115 #define BIST_WAIT_DELAY_COUNT 100 116 117 /************************************************************************** 118 * 119 * Configurable values 120 * 121 *************************************************************************/ 122 123 /* 124 * Use separate channels for TX and RX events 125 * 126 * Set this to 1 to use separate channels for TX and RX. It allows us 127 * to control interrupt affinity separately for TX and RX. 128 * 129 * This is only used in MSI-X interrupt mode 130 */ 131 bool efx_separate_tx_channels; 132 module_param(efx_separate_tx_channels, bool, 0444); 133 MODULE_PARM_DESC(efx_separate_tx_channels, 134 "Use separate channels for TX and RX"); 135 136 /* This is the weight assigned to each of the (per-channel) virtual 137 * NAPI devices. 138 */ 139 static int napi_weight = 64; 140 141 /* This is the time (in jiffies) between invocations of the hardware 142 * monitor. 143 * On Falcon-based NICs, this will: 144 * - Check the on-board hardware monitor; 145 * - Poll the link state and reconfigure the hardware as necessary. 146 * On Siena-based NICs for power systems with EEH support, this will give EEH a 147 * chance to start. 148 */ 149 static unsigned int efx_monitor_interval = 1 * HZ; 150 151 /* Initial interrupt moderation settings. They can be modified after 152 * module load with ethtool. 153 * 154 * The default for RX should strike a balance between increasing the 155 * round-trip latency and reducing overhead. 156 */ 157 static unsigned int rx_irq_mod_usec = 60; 158 159 /* Initial interrupt moderation settings. They can be modified after 160 * module load with ethtool. 161 * 162 * This default is chosen to ensure that a 10G link does not go idle 163 * while a TX queue is stopped after it has become full. A queue is 164 * restarted when it drops below half full. The time this takes (assuming 165 * worst case 3 descriptors per packet and 1024 descriptors) is 166 * 512 / 3 * 1.2 = 205 usec. 167 */ 168 static unsigned int tx_irq_mod_usec = 150; 169 170 /* This is the first interrupt mode to try out of: 171 * 0 => MSI-X 172 * 1 => MSI 173 * 2 => legacy 174 */ 175 static unsigned int interrupt_mode; 176 177 /* This is the requested number of CPUs to use for Receive-Side Scaling (RSS), 178 * i.e. the number of CPUs among which we may distribute simultaneous 179 * interrupt handling. 180 * 181 * Cards without MSI-X will only target one CPU via legacy or MSI interrupt. 182 * The default (0) means to assign an interrupt to each core. 183 */ 184 static unsigned int rss_cpus; 185 module_param(rss_cpus, uint, 0444); 186 MODULE_PARM_DESC(rss_cpus, "Number of CPUs to use for Receive-Side Scaling"); 187 188 static bool phy_flash_cfg; 189 module_param(phy_flash_cfg, bool, 0644); 190 MODULE_PARM_DESC(phy_flash_cfg, "Set PHYs into reflash mode initially"); 191 192 static unsigned irq_adapt_low_thresh = 8000; 193 module_param(irq_adapt_low_thresh, uint, 0644); 194 MODULE_PARM_DESC(irq_adapt_low_thresh, 195 "Threshold score for reducing IRQ moderation"); 196 197 static unsigned irq_adapt_high_thresh = 16000; 198 module_param(irq_adapt_high_thresh, uint, 0644); 199 MODULE_PARM_DESC(irq_adapt_high_thresh, 200 "Threshold score for increasing IRQ moderation"); 201 202 static unsigned debug = (NETIF_MSG_DRV | NETIF_MSG_PROBE | 203 NETIF_MSG_LINK | NETIF_MSG_IFDOWN | 204 NETIF_MSG_IFUP | NETIF_MSG_RX_ERR | 205 NETIF_MSG_TX_ERR | NETIF_MSG_HW); 206 module_param(debug, uint, 0); 207 MODULE_PARM_DESC(debug, "Bitmapped debugging message enable value"); 208 209 /************************************************************************** 210 * 211 * Utility functions and prototypes 212 * 213 *************************************************************************/ 214 215 static const struct efx_channel_type efx_default_channel_type; 216 static void efx_remove_port(struct efx_nic *efx); 217 static int efx_xdp_setup_prog(struct efx_nic *efx, struct bpf_prog *prog); 218 static int efx_xdp(struct net_device *dev, struct netdev_bpf *xdp); 219 static int efx_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **xdpfs, 220 u32 flags); 221 222 #define EFX_ASSERT_RESET_SERIALISED(efx) \ 223 do { \ 224 if ((efx->state == STATE_READY) || \ 225 (efx->state == STATE_RECOVERY) || \ 226 (efx->state == STATE_DISABLED)) \ 227 ASSERT_RTNL(); \ 228 } while (0) 229 230 /************************************************************************** 231 * 232 * Event queue processing 233 * 234 *************************************************************************/ 235 236 /* Process channel's event queue 237 * 238 * This function is responsible for processing the event queue of a 239 * single channel. The caller must guarantee that this function will 240 * never be concurrently called more than once on the same channel, 241 * though different channels may be being processed concurrently. 242 */ 243 static int efx_process_channel(struct efx_channel *channel, int budget) 244 { 245 struct efx_tx_queue *tx_queue; 246 struct list_head rx_list; 247 int spent; 248 249 if (unlikely(!channel->enabled)) 250 return 0; 251 252 /* Prepare the batch receive list */ 253 EFX_WARN_ON_PARANOID(channel->rx_list != NULL); 254 INIT_LIST_HEAD(&rx_list); 255 channel->rx_list = &rx_list; 256 257 efx_for_each_channel_tx_queue(tx_queue, channel) { 258 tx_queue->pkts_compl = 0; 259 tx_queue->bytes_compl = 0; 260 } 261 262 spent = efx_nic_process_eventq(channel, budget); 263 if (spent && efx_channel_has_rx_queue(channel)) { 264 struct efx_rx_queue *rx_queue = 265 efx_channel_get_rx_queue(channel); 266 267 efx_rx_flush_packet(channel); 268 efx_fast_push_rx_descriptors(rx_queue, true); 269 } 270 271 /* Update BQL */ 272 efx_for_each_channel_tx_queue(tx_queue, channel) { 273 if (tx_queue->bytes_compl) { 274 netdev_tx_completed_queue(tx_queue->core_txq, 275 tx_queue->pkts_compl, tx_queue->bytes_compl); 276 } 277 } 278 279 /* Receive any packets we queued up */ 280 netif_receive_skb_list(channel->rx_list); 281 channel->rx_list = NULL; 282 283 return spent; 284 } 285 286 /* NAPI poll handler 287 * 288 * NAPI guarantees serialisation of polls of the same device, which 289 * provides the guarantee required by efx_process_channel(). 290 */ 291 static void efx_update_irq_mod(struct efx_nic *efx, struct efx_channel *channel) 292 { 293 int step = efx->irq_mod_step_us; 294 295 if (channel->irq_mod_score < irq_adapt_low_thresh) { 296 if (channel->irq_moderation_us > step) { 297 channel->irq_moderation_us -= step; 298 efx->type->push_irq_moderation(channel); 299 } 300 } else if (channel->irq_mod_score > irq_adapt_high_thresh) { 301 if (channel->irq_moderation_us < 302 efx->irq_rx_moderation_us) { 303 channel->irq_moderation_us += step; 304 efx->type->push_irq_moderation(channel); 305 } 306 } 307 308 channel->irq_count = 0; 309 channel->irq_mod_score = 0; 310 } 311 312 static int efx_poll(struct napi_struct *napi, int budget) 313 { 314 struct efx_channel *channel = 315 container_of(napi, struct efx_channel, napi_str); 316 struct efx_nic *efx = channel->efx; 317 int spent; 318 319 netif_vdbg(efx, intr, efx->net_dev, 320 "channel %d NAPI poll executing on CPU %d\n", 321 channel->channel, raw_smp_processor_id()); 322 323 spent = efx_process_channel(channel, budget); 324 325 xdp_do_flush_map(); 326 327 if (spent < budget) { 328 if (efx_channel_has_rx_queue(channel) && 329 efx->irq_rx_adaptive && 330 unlikely(++channel->irq_count == 1000)) { 331 efx_update_irq_mod(efx, channel); 332 } 333 334 #ifdef CONFIG_RFS_ACCEL 335 /* Perhaps expire some ARFS filters */ 336 mod_delayed_work(system_wq, &channel->filter_work, 0); 337 #endif 338 339 /* There is no race here; although napi_disable() will 340 * only wait for napi_complete(), this isn't a problem 341 * since efx_nic_eventq_read_ack() will have no effect if 342 * interrupts have already been disabled. 343 */ 344 if (napi_complete_done(napi, spent)) 345 efx_nic_eventq_read_ack(channel); 346 } 347 348 return spent; 349 } 350 351 /* Create event queue 352 * Event queue memory allocations are done only once. If the channel 353 * is reset, the memory buffer will be reused; this guards against 354 * errors during channel reset and also simplifies interrupt handling. 355 */ 356 int efx_probe_eventq(struct efx_channel *channel) 357 { 358 struct efx_nic *efx = channel->efx; 359 unsigned long entries; 360 361 netif_dbg(efx, probe, efx->net_dev, 362 "chan %d create event queue\n", channel->channel); 363 364 /* Build an event queue with room for one event per tx and rx buffer, 365 * plus some extra for link state events and MCDI completions. */ 366 entries = roundup_pow_of_two(efx->rxq_entries + efx->txq_entries + 128); 367 EFX_WARN_ON_PARANOID(entries > EFX_MAX_EVQ_SIZE); 368 channel->eventq_mask = max(entries, EFX_MIN_EVQ_SIZE) - 1; 369 370 return efx_nic_probe_eventq(channel); 371 } 372 373 /* Prepare channel's event queue */ 374 int efx_init_eventq(struct efx_channel *channel) 375 { 376 struct efx_nic *efx = channel->efx; 377 int rc; 378 379 EFX_WARN_ON_PARANOID(channel->eventq_init); 380 381 netif_dbg(efx, drv, efx->net_dev, 382 "chan %d init event queue\n", channel->channel); 383 384 rc = efx_nic_init_eventq(channel); 385 if (rc == 0) { 386 efx->type->push_irq_moderation(channel); 387 channel->eventq_read_ptr = 0; 388 channel->eventq_init = true; 389 } 390 return rc; 391 } 392 393 /* Enable event queue processing and NAPI */ 394 void efx_start_eventq(struct efx_channel *channel) 395 { 396 netif_dbg(channel->efx, ifup, channel->efx->net_dev, 397 "chan %d start event queue\n", channel->channel); 398 399 /* Make sure the NAPI handler sees the enabled flag set */ 400 channel->enabled = true; 401 smp_wmb(); 402 403 napi_enable(&channel->napi_str); 404 efx_nic_eventq_read_ack(channel); 405 } 406 407 /* Disable event queue processing and NAPI */ 408 void efx_stop_eventq(struct efx_channel *channel) 409 { 410 if (!channel->enabled) 411 return; 412 413 napi_disable(&channel->napi_str); 414 channel->enabled = false; 415 } 416 417 void efx_fini_eventq(struct efx_channel *channel) 418 { 419 if (!channel->eventq_init) 420 return; 421 422 netif_dbg(channel->efx, drv, channel->efx->net_dev, 423 "chan %d fini event queue\n", channel->channel); 424 425 efx_nic_fini_eventq(channel); 426 channel->eventq_init = false; 427 } 428 429 void efx_remove_eventq(struct efx_channel *channel) 430 { 431 netif_dbg(channel->efx, drv, channel->efx->net_dev, 432 "chan %d remove event queue\n", channel->channel); 433 434 efx_nic_remove_eventq(channel); 435 } 436 437 /************************************************************************** 438 * 439 * Channel handling 440 * 441 *************************************************************************/ 442 443 /* Allocate and initialise a channel structure. */ 444 struct efx_channel * 445 efx_alloc_channel(struct efx_nic *efx, int i, struct efx_channel *old_channel) 446 { 447 struct efx_channel *channel; 448 struct efx_rx_queue *rx_queue; 449 struct efx_tx_queue *tx_queue; 450 int j; 451 452 channel = kzalloc(sizeof(*channel), GFP_KERNEL); 453 if (!channel) 454 return NULL; 455 456 channel->efx = efx; 457 channel->channel = i; 458 channel->type = &efx_default_channel_type; 459 460 for (j = 0; j < EFX_TXQ_TYPES; j++) { 461 tx_queue = &channel->tx_queue[j]; 462 tx_queue->efx = efx; 463 tx_queue->queue = i * EFX_TXQ_TYPES + j; 464 tx_queue->channel = channel; 465 } 466 467 #ifdef CONFIG_RFS_ACCEL 468 INIT_DELAYED_WORK(&channel->filter_work, efx_filter_rfs_expire); 469 #endif 470 471 rx_queue = &channel->rx_queue; 472 rx_queue->efx = efx; 473 timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0); 474 475 return channel; 476 } 477 478 /* Allocate and initialise a channel structure, copying parameters 479 * (but not resources) from an old channel structure. 480 */ 481 struct efx_channel *efx_copy_channel(const struct efx_channel *old_channel) 482 { 483 struct efx_channel *channel; 484 struct efx_rx_queue *rx_queue; 485 struct efx_tx_queue *tx_queue; 486 int j; 487 488 channel = kmalloc(sizeof(*channel), GFP_KERNEL); 489 if (!channel) 490 return NULL; 491 492 *channel = *old_channel; 493 494 channel->napi_dev = NULL; 495 INIT_HLIST_NODE(&channel->napi_str.napi_hash_node); 496 channel->napi_str.napi_id = 0; 497 channel->napi_str.state = 0; 498 memset(&channel->eventq, 0, sizeof(channel->eventq)); 499 500 for (j = 0; j < EFX_TXQ_TYPES; j++) { 501 tx_queue = &channel->tx_queue[j]; 502 if (tx_queue->channel) 503 tx_queue->channel = channel; 504 tx_queue->buffer = NULL; 505 memset(&tx_queue->txd, 0, sizeof(tx_queue->txd)); 506 } 507 508 rx_queue = &channel->rx_queue; 509 rx_queue->buffer = NULL; 510 memset(&rx_queue->rxd, 0, sizeof(rx_queue->rxd)); 511 timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0); 512 #ifdef CONFIG_RFS_ACCEL 513 INIT_DELAYED_WORK(&channel->filter_work, efx_filter_rfs_expire); 514 #endif 515 516 return channel; 517 } 518 519 static int efx_probe_channel(struct efx_channel *channel) 520 { 521 struct efx_tx_queue *tx_queue; 522 struct efx_rx_queue *rx_queue; 523 int rc; 524 525 netif_dbg(channel->efx, probe, channel->efx->net_dev, 526 "creating channel %d\n", channel->channel); 527 528 rc = channel->type->pre_probe(channel); 529 if (rc) 530 goto fail; 531 532 rc = efx_probe_eventq(channel); 533 if (rc) 534 goto fail; 535 536 efx_for_each_channel_tx_queue(tx_queue, channel) { 537 rc = efx_probe_tx_queue(tx_queue); 538 if (rc) 539 goto fail; 540 } 541 542 efx_for_each_channel_rx_queue(rx_queue, channel) { 543 rc = efx_probe_rx_queue(rx_queue); 544 if (rc) 545 goto fail; 546 } 547 548 channel->rx_list = NULL; 549 550 return 0; 551 552 fail: 553 efx_remove_channel(channel); 554 return rc; 555 } 556 557 void efx_get_channel_name(struct efx_channel *channel, char *buf, size_t len) 558 { 559 struct efx_nic *efx = channel->efx; 560 const char *type; 561 int number; 562 563 number = channel->channel; 564 565 if (number >= efx->xdp_channel_offset && 566 !WARN_ON_ONCE(!efx->n_xdp_channels)) { 567 type = "-xdp"; 568 number -= efx->xdp_channel_offset; 569 } else if (efx->tx_channel_offset == 0) { 570 type = ""; 571 } else if (number < efx->tx_channel_offset) { 572 type = "-rx"; 573 } else { 574 type = "-tx"; 575 number -= efx->tx_channel_offset; 576 } 577 snprintf(buf, len, "%s%s-%d", efx->name, type, number); 578 } 579 580 void efx_set_channel_names(struct efx_nic *efx) 581 { 582 struct efx_channel *channel; 583 584 efx_for_each_channel(channel, efx) 585 channel->type->get_name(channel, 586 efx->msi_context[channel->channel].name, 587 sizeof(efx->msi_context[0].name)); 588 } 589 590 int efx_probe_channels(struct efx_nic *efx) 591 { 592 struct efx_channel *channel; 593 int rc; 594 595 /* Restart special buffer allocation */ 596 efx->next_buffer_table = 0; 597 598 /* Probe channels in reverse, so that any 'extra' channels 599 * use the start of the buffer table. This allows the traffic 600 * channels to be resized without moving them or wasting the 601 * entries before them. 602 */ 603 efx_for_each_channel_rev(channel, efx) { 604 rc = efx_probe_channel(channel); 605 if (rc) { 606 netif_err(efx, probe, efx->net_dev, 607 "failed to create channel %d\n", 608 channel->channel); 609 goto fail; 610 } 611 } 612 efx_set_channel_names(efx); 613 614 return 0; 615 616 fail: 617 efx_remove_channels(efx); 618 return rc; 619 } 620 621 /* Channels are shutdown and reinitialised whilst the NIC is running 622 * to propagate configuration changes (mtu, checksum offload), or 623 * to clear hardware error conditions 624 */ 625 static void efx_start_datapath(struct efx_nic *efx) 626 { 627 netdev_features_t old_features = efx->net_dev->features; 628 bool old_rx_scatter = efx->rx_scatter; 629 struct efx_tx_queue *tx_queue; 630 struct efx_rx_queue *rx_queue; 631 struct efx_channel *channel; 632 size_t rx_buf_len; 633 634 /* Calculate the rx buffer allocation parameters required to 635 * support the current MTU, including padding for header 636 * alignment and overruns. 637 */ 638 efx->rx_dma_len = (efx->rx_prefix_size + 639 EFX_MAX_FRAME_LEN(efx->net_dev->mtu) + 640 efx->type->rx_buffer_padding); 641 rx_buf_len = (sizeof(struct efx_rx_page_state) + XDP_PACKET_HEADROOM + 642 efx->rx_ip_align + efx->rx_dma_len); 643 if (rx_buf_len <= PAGE_SIZE) { 644 efx->rx_scatter = efx->type->always_rx_scatter; 645 efx->rx_buffer_order = 0; 646 } else if (efx->type->can_rx_scatter) { 647 BUILD_BUG_ON(EFX_RX_USR_BUF_SIZE % L1_CACHE_BYTES); 648 BUILD_BUG_ON(sizeof(struct efx_rx_page_state) + 649 2 * ALIGN(NET_IP_ALIGN + EFX_RX_USR_BUF_SIZE, 650 EFX_RX_BUF_ALIGNMENT) > 651 PAGE_SIZE); 652 efx->rx_scatter = true; 653 efx->rx_dma_len = EFX_RX_USR_BUF_SIZE; 654 efx->rx_buffer_order = 0; 655 } else { 656 efx->rx_scatter = false; 657 efx->rx_buffer_order = get_order(rx_buf_len); 658 } 659 660 efx_rx_config_page_split(efx); 661 if (efx->rx_buffer_order) 662 netif_dbg(efx, drv, efx->net_dev, 663 "RX buf len=%u; page order=%u batch=%u\n", 664 efx->rx_dma_len, efx->rx_buffer_order, 665 efx->rx_pages_per_batch); 666 else 667 netif_dbg(efx, drv, efx->net_dev, 668 "RX buf len=%u step=%u bpp=%u; page batch=%u\n", 669 efx->rx_dma_len, efx->rx_page_buf_step, 670 efx->rx_bufs_per_page, efx->rx_pages_per_batch); 671 672 /* Restore previously fixed features in hw_features and remove 673 * features which are fixed now 674 */ 675 efx->net_dev->hw_features |= efx->net_dev->features; 676 efx->net_dev->hw_features &= ~efx->fixed_features; 677 efx->net_dev->features |= efx->fixed_features; 678 if (efx->net_dev->features != old_features) 679 netdev_features_change(efx->net_dev); 680 681 /* RX filters may also have scatter-enabled flags */ 682 if (efx->rx_scatter != old_rx_scatter) 683 efx->type->filter_update_rx_scatter(efx); 684 685 /* We must keep at least one descriptor in a TX ring empty. 686 * We could avoid this when the queue size does not exactly 687 * match the hardware ring size, but it's not that important. 688 * Therefore we stop the queue when one more skb might fill 689 * the ring completely. We wake it when half way back to 690 * empty. 691 */ 692 efx->txq_stop_thresh = efx->txq_entries - efx_tx_max_skb_descs(efx); 693 efx->txq_wake_thresh = efx->txq_stop_thresh / 2; 694 695 /* Initialise the channels */ 696 efx_for_each_channel(channel, efx) { 697 efx_for_each_channel_tx_queue(tx_queue, channel) { 698 efx_init_tx_queue(tx_queue); 699 atomic_inc(&efx->active_queues); 700 } 701 702 efx_for_each_channel_rx_queue(rx_queue, channel) { 703 efx_init_rx_queue(rx_queue); 704 atomic_inc(&efx->active_queues); 705 efx_stop_eventq(channel); 706 efx_fast_push_rx_descriptors(rx_queue, false); 707 efx_start_eventq(channel); 708 } 709 710 WARN_ON(channel->rx_pkt_n_frags); 711 } 712 713 efx_ptp_start_datapath(efx); 714 715 if (netif_device_present(efx->net_dev)) 716 netif_tx_wake_all_queues(efx->net_dev); 717 } 718 719 static void efx_stop_datapath(struct efx_nic *efx) 720 { 721 struct efx_channel *channel; 722 struct efx_tx_queue *tx_queue; 723 struct efx_rx_queue *rx_queue; 724 int rc; 725 726 EFX_ASSERT_RESET_SERIALISED(efx); 727 BUG_ON(efx->port_enabled); 728 729 efx_ptp_stop_datapath(efx); 730 731 /* Stop RX refill */ 732 efx_for_each_channel(channel, efx) { 733 efx_for_each_channel_rx_queue(rx_queue, channel) 734 rx_queue->refill_enabled = false; 735 } 736 737 efx_for_each_channel(channel, efx) { 738 /* RX packet processing is pipelined, so wait for the 739 * NAPI handler to complete. At least event queue 0 740 * might be kept active by non-data events, so don't 741 * use napi_synchronize() but actually disable NAPI 742 * temporarily. 743 */ 744 if (efx_channel_has_rx_queue(channel)) { 745 efx_stop_eventq(channel); 746 efx_start_eventq(channel); 747 } 748 } 749 750 rc = efx->type->fini_dmaq(efx); 751 if (rc) { 752 netif_err(efx, drv, efx->net_dev, "failed to flush queues\n"); 753 } else { 754 netif_dbg(efx, drv, efx->net_dev, 755 "successfully flushed all queues\n"); 756 } 757 758 efx_for_each_channel(channel, efx) { 759 efx_for_each_channel_rx_queue(rx_queue, channel) 760 efx_fini_rx_queue(rx_queue); 761 efx_for_each_possible_channel_tx_queue(tx_queue, channel) 762 efx_fini_tx_queue(tx_queue); 763 } 764 efx->xdp_rxq_info_failed = false; 765 } 766 767 void efx_remove_channel(struct efx_channel *channel) 768 { 769 struct efx_tx_queue *tx_queue; 770 struct efx_rx_queue *rx_queue; 771 772 netif_dbg(channel->efx, drv, channel->efx->net_dev, 773 "destroy chan %d\n", channel->channel); 774 775 efx_for_each_channel_rx_queue(rx_queue, channel) 776 efx_remove_rx_queue(rx_queue); 777 efx_for_each_possible_channel_tx_queue(tx_queue, channel) 778 efx_remove_tx_queue(tx_queue); 779 efx_remove_eventq(channel); 780 channel->type->post_remove(channel); 781 } 782 783 void efx_remove_channels(struct efx_nic *efx) 784 { 785 struct efx_channel *channel; 786 787 efx_for_each_channel(channel, efx) 788 efx_remove_channel(channel); 789 790 kfree(efx->xdp_tx_queues); 791 } 792 793 int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries) 794 { 795 struct efx_channel *other_channel[EFX_MAX_CHANNELS], *channel; 796 u32 old_rxq_entries, old_txq_entries; 797 unsigned i, next_buffer_table = 0; 798 int rc, rc2; 799 800 rc = efx_check_disabled(efx); 801 if (rc) 802 return rc; 803 804 /* Not all channels should be reallocated. We must avoid 805 * reallocating their buffer table entries. 806 */ 807 efx_for_each_channel(channel, efx) { 808 struct efx_rx_queue *rx_queue; 809 struct efx_tx_queue *tx_queue; 810 811 if (channel->type->copy) 812 continue; 813 next_buffer_table = max(next_buffer_table, 814 channel->eventq.index + 815 channel->eventq.entries); 816 efx_for_each_channel_rx_queue(rx_queue, channel) 817 next_buffer_table = max(next_buffer_table, 818 rx_queue->rxd.index + 819 rx_queue->rxd.entries); 820 efx_for_each_channel_tx_queue(tx_queue, channel) 821 next_buffer_table = max(next_buffer_table, 822 tx_queue->txd.index + 823 tx_queue->txd.entries); 824 } 825 826 efx_device_detach_sync(efx); 827 efx_stop_all(efx); 828 efx_soft_disable_interrupts(efx); 829 830 /* Clone channels (where possible) */ 831 memset(other_channel, 0, sizeof(other_channel)); 832 for (i = 0; i < efx->n_channels; i++) { 833 channel = efx->channel[i]; 834 if (channel->type->copy) 835 channel = channel->type->copy(channel); 836 if (!channel) { 837 rc = -ENOMEM; 838 goto out; 839 } 840 other_channel[i] = channel; 841 } 842 843 /* Swap entry counts and channel pointers */ 844 old_rxq_entries = efx->rxq_entries; 845 old_txq_entries = efx->txq_entries; 846 efx->rxq_entries = rxq_entries; 847 efx->txq_entries = txq_entries; 848 for (i = 0; i < efx->n_channels; i++) { 849 channel = efx->channel[i]; 850 efx->channel[i] = other_channel[i]; 851 other_channel[i] = channel; 852 } 853 854 /* Restart buffer table allocation */ 855 efx->next_buffer_table = next_buffer_table; 856 857 for (i = 0; i < efx->n_channels; i++) { 858 channel = efx->channel[i]; 859 if (!channel->type->copy) 860 continue; 861 rc = efx_probe_channel(channel); 862 if (rc) 863 goto rollback; 864 efx_init_napi_channel(efx->channel[i]); 865 } 866 867 out: 868 /* Destroy unused channel structures */ 869 for (i = 0; i < efx->n_channels; i++) { 870 channel = other_channel[i]; 871 if (channel && channel->type->copy) { 872 efx_fini_napi_channel(channel); 873 efx_remove_channel(channel); 874 kfree(channel); 875 } 876 } 877 878 rc2 = efx_soft_enable_interrupts(efx); 879 if (rc2) { 880 rc = rc ? rc : rc2; 881 netif_err(efx, drv, efx->net_dev, 882 "unable to restart interrupts on channel reallocation\n"); 883 efx_schedule_reset(efx, RESET_TYPE_DISABLE); 884 } else { 885 efx_start_all(efx); 886 efx_device_attach_if_not_resetting(efx); 887 } 888 return rc; 889 890 rollback: 891 /* Swap back */ 892 efx->rxq_entries = old_rxq_entries; 893 efx->txq_entries = old_txq_entries; 894 for (i = 0; i < efx->n_channels; i++) { 895 channel = efx->channel[i]; 896 efx->channel[i] = other_channel[i]; 897 other_channel[i] = channel; 898 } 899 goto out; 900 } 901 902 void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue) 903 { 904 mod_timer(&rx_queue->slow_fill, jiffies + msecs_to_jiffies(10)); 905 } 906 907 bool efx_default_channel_want_txqs(struct efx_channel *channel) 908 { 909 return channel->channel - channel->efx->tx_channel_offset < 910 channel->efx->n_tx_channels; 911 } 912 913 static const struct efx_channel_type efx_default_channel_type = { 914 .pre_probe = efx_channel_dummy_op_int, 915 .post_remove = efx_channel_dummy_op_void, 916 .get_name = efx_get_channel_name, 917 .copy = efx_copy_channel, 918 .want_txqs = efx_default_channel_want_txqs, 919 .keep_eventq = false, 920 .want_pio = true, 921 }; 922 923 int efx_channel_dummy_op_int(struct efx_channel *channel) 924 { 925 return 0; 926 } 927 928 void efx_channel_dummy_op_void(struct efx_channel *channel) 929 { 930 } 931 932 /************************************************************************** 933 * 934 * Port handling 935 * 936 **************************************************************************/ 937 938 /* This ensures that the kernel is kept informed (via 939 * netif_carrier_on/off) of the link status, and also maintains the 940 * link status's stop on the port's TX queue. 941 */ 942 void efx_link_status_changed(struct efx_nic *efx) 943 { 944 struct efx_link_state *link_state = &efx->link_state; 945 946 /* SFC Bug 5356: A net_dev notifier is registered, so we must ensure 947 * that no events are triggered between unregister_netdev() and the 948 * driver unloading. A more general condition is that NETDEV_CHANGE 949 * can only be generated between NETDEV_UP and NETDEV_DOWN */ 950 if (!netif_running(efx->net_dev)) 951 return; 952 953 if (link_state->up != netif_carrier_ok(efx->net_dev)) { 954 efx->n_link_state_changes++; 955 956 if (link_state->up) 957 netif_carrier_on(efx->net_dev); 958 else 959 netif_carrier_off(efx->net_dev); 960 } 961 962 /* Status message for kernel log */ 963 if (link_state->up) 964 netif_info(efx, link, efx->net_dev, 965 "link up at %uMbps %s-duplex (MTU %d)\n", 966 link_state->speed, link_state->fd ? "full" : "half", 967 efx->net_dev->mtu); 968 else 969 netif_info(efx, link, efx->net_dev, "link down\n"); 970 } 971 972 void efx_link_set_advertising(struct efx_nic *efx, 973 const unsigned long *advertising) 974 { 975 memcpy(efx->link_advertising, advertising, 976 sizeof(__ETHTOOL_DECLARE_LINK_MODE_MASK())); 977 978 efx->link_advertising[0] |= ADVERTISED_Autoneg; 979 if (advertising[0] & ADVERTISED_Pause) 980 efx->wanted_fc |= (EFX_FC_TX | EFX_FC_RX); 981 else 982 efx->wanted_fc &= ~(EFX_FC_TX | EFX_FC_RX); 983 if (advertising[0] & ADVERTISED_Asym_Pause) 984 efx->wanted_fc ^= EFX_FC_TX; 985 } 986 987 /* Equivalent to efx_link_set_advertising with all-zeroes, except does not 988 * force the Autoneg bit on. 989 */ 990 void efx_link_clear_advertising(struct efx_nic *efx) 991 { 992 bitmap_zero(efx->link_advertising, __ETHTOOL_LINK_MODE_MASK_NBITS); 993 efx->wanted_fc &= ~(EFX_FC_TX | EFX_FC_RX); 994 } 995 996 void efx_link_set_wanted_fc(struct efx_nic *efx, u8 wanted_fc) 997 { 998 efx->wanted_fc = wanted_fc; 999 if (efx->link_advertising[0]) { 1000 if (wanted_fc & EFX_FC_RX) 1001 efx->link_advertising[0] |= (ADVERTISED_Pause | 1002 ADVERTISED_Asym_Pause); 1003 else 1004 efx->link_advertising[0] &= ~(ADVERTISED_Pause | 1005 ADVERTISED_Asym_Pause); 1006 if (wanted_fc & EFX_FC_TX) 1007 efx->link_advertising[0] ^= ADVERTISED_Asym_Pause; 1008 } 1009 } 1010 1011 static void efx_fini_port(struct efx_nic *efx); 1012 1013 /* We assume that efx->type->reconfigure_mac will always try to sync RX 1014 * filters and therefore needs to read-lock the filter table against freeing 1015 */ 1016 void efx_mac_reconfigure(struct efx_nic *efx) 1017 { 1018 down_read(&efx->filter_sem); 1019 efx->type->reconfigure_mac(efx); 1020 up_read(&efx->filter_sem); 1021 } 1022 1023 /* Push loopback/power/transmit disable settings to the PHY, and reconfigure 1024 * the MAC appropriately. All other PHY configuration changes are pushed 1025 * through phy_op->set_settings(), and pushed asynchronously to the MAC 1026 * through efx_monitor(). 1027 * 1028 * Callers must hold the mac_lock 1029 */ 1030 int __efx_reconfigure_port(struct efx_nic *efx) 1031 { 1032 enum efx_phy_mode phy_mode; 1033 int rc; 1034 1035 WARN_ON(!mutex_is_locked(&efx->mac_lock)); 1036 1037 /* Disable PHY transmit in mac level loopbacks */ 1038 phy_mode = efx->phy_mode; 1039 if (LOOPBACK_INTERNAL(efx)) 1040 efx->phy_mode |= PHY_MODE_TX_DISABLED; 1041 else 1042 efx->phy_mode &= ~PHY_MODE_TX_DISABLED; 1043 1044 rc = efx->type->reconfigure_port(efx); 1045 1046 if (rc) 1047 efx->phy_mode = phy_mode; 1048 1049 return rc; 1050 } 1051 1052 /* Reinitialise the MAC to pick up new PHY settings, even if the port is 1053 * disabled. */ 1054 int efx_reconfigure_port(struct efx_nic *efx) 1055 { 1056 int rc; 1057 1058 EFX_ASSERT_RESET_SERIALISED(efx); 1059 1060 mutex_lock(&efx->mac_lock); 1061 rc = __efx_reconfigure_port(efx); 1062 mutex_unlock(&efx->mac_lock); 1063 1064 return rc; 1065 } 1066 1067 /* Asynchronous work item for changing MAC promiscuity and multicast 1068 * hash. Avoid a drain/rx_ingress enable by reconfiguring the current 1069 * MAC directly. */ 1070 static void efx_mac_work(struct work_struct *data) 1071 { 1072 struct efx_nic *efx = container_of(data, struct efx_nic, mac_work); 1073 1074 mutex_lock(&efx->mac_lock); 1075 if (efx->port_enabled) 1076 efx_mac_reconfigure(efx); 1077 mutex_unlock(&efx->mac_lock); 1078 } 1079 1080 static int efx_probe_port(struct efx_nic *efx) 1081 { 1082 int rc; 1083 1084 netif_dbg(efx, probe, efx->net_dev, "create port\n"); 1085 1086 if (phy_flash_cfg) 1087 efx->phy_mode = PHY_MODE_SPECIAL; 1088 1089 /* Connect up MAC/PHY operations table */ 1090 rc = efx->type->probe_port(efx); 1091 if (rc) 1092 return rc; 1093 1094 /* Initialise MAC address to permanent address */ 1095 ether_addr_copy(efx->net_dev->dev_addr, efx->net_dev->perm_addr); 1096 1097 return 0; 1098 } 1099 1100 static int efx_init_port(struct efx_nic *efx) 1101 { 1102 int rc; 1103 1104 netif_dbg(efx, drv, efx->net_dev, "init port\n"); 1105 1106 mutex_lock(&efx->mac_lock); 1107 1108 rc = efx->phy_op->init(efx); 1109 if (rc) 1110 goto fail1; 1111 1112 efx->port_initialized = true; 1113 1114 /* Reconfigure the MAC before creating dma queues (required for 1115 * Falcon/A1 where RX_INGR_EN/TX_DRAIN_EN isn't supported) */ 1116 efx_mac_reconfigure(efx); 1117 1118 /* Ensure the PHY advertises the correct flow control settings */ 1119 rc = efx->phy_op->reconfigure(efx); 1120 if (rc && rc != -EPERM) 1121 goto fail2; 1122 1123 mutex_unlock(&efx->mac_lock); 1124 return 0; 1125 1126 fail2: 1127 efx->phy_op->fini(efx); 1128 fail1: 1129 mutex_unlock(&efx->mac_lock); 1130 return rc; 1131 } 1132 1133 static void efx_start_port(struct efx_nic *efx) 1134 { 1135 netif_dbg(efx, ifup, efx->net_dev, "start port\n"); 1136 BUG_ON(efx->port_enabled); 1137 1138 mutex_lock(&efx->mac_lock); 1139 efx->port_enabled = true; 1140 1141 /* Ensure MAC ingress/egress is enabled */ 1142 efx_mac_reconfigure(efx); 1143 1144 mutex_unlock(&efx->mac_lock); 1145 } 1146 1147 /* Cancel work for MAC reconfiguration, periodic hardware monitoring 1148 * and the async self-test, wait for them to finish and prevent them 1149 * being scheduled again. This doesn't cover online resets, which 1150 * should only be cancelled when removing the device. 1151 */ 1152 static void efx_stop_port(struct efx_nic *efx) 1153 { 1154 netif_dbg(efx, ifdown, efx->net_dev, "stop port\n"); 1155 1156 EFX_ASSERT_RESET_SERIALISED(efx); 1157 1158 mutex_lock(&efx->mac_lock); 1159 efx->port_enabled = false; 1160 mutex_unlock(&efx->mac_lock); 1161 1162 /* Serialise against efx_set_multicast_list() */ 1163 netif_addr_lock_bh(efx->net_dev); 1164 netif_addr_unlock_bh(efx->net_dev); 1165 1166 cancel_delayed_work_sync(&efx->monitor_work); 1167 efx_selftest_async_cancel(efx); 1168 cancel_work_sync(&efx->mac_work); 1169 } 1170 1171 static void efx_fini_port(struct efx_nic *efx) 1172 { 1173 netif_dbg(efx, drv, efx->net_dev, "shut down port\n"); 1174 1175 if (!efx->port_initialized) 1176 return; 1177 1178 efx->phy_op->fini(efx); 1179 efx->port_initialized = false; 1180 1181 efx->link_state.up = false; 1182 efx_link_status_changed(efx); 1183 } 1184 1185 static void efx_remove_port(struct efx_nic *efx) 1186 { 1187 netif_dbg(efx, drv, efx->net_dev, "destroying port\n"); 1188 1189 efx->type->remove_port(efx); 1190 } 1191 1192 /************************************************************************** 1193 * 1194 * NIC handling 1195 * 1196 **************************************************************************/ 1197 1198 static LIST_HEAD(efx_primary_list); 1199 static LIST_HEAD(efx_unassociated_list); 1200 1201 static bool efx_same_controller(struct efx_nic *left, struct efx_nic *right) 1202 { 1203 return left->type == right->type && 1204 left->vpd_sn && right->vpd_sn && 1205 !strcmp(left->vpd_sn, right->vpd_sn); 1206 } 1207 1208 static void efx_associate(struct efx_nic *efx) 1209 { 1210 struct efx_nic *other, *next; 1211 1212 if (efx->primary == efx) { 1213 /* Adding primary function; look for secondaries */ 1214 1215 netif_dbg(efx, probe, efx->net_dev, "adding to primary list\n"); 1216 list_add_tail(&efx->node, &efx_primary_list); 1217 1218 list_for_each_entry_safe(other, next, &efx_unassociated_list, 1219 node) { 1220 if (efx_same_controller(efx, other)) { 1221 list_del(&other->node); 1222 netif_dbg(other, probe, other->net_dev, 1223 "moving to secondary list of %s %s\n", 1224 pci_name(efx->pci_dev), 1225 efx->net_dev->name); 1226 list_add_tail(&other->node, 1227 &efx->secondary_list); 1228 other->primary = efx; 1229 } 1230 } 1231 } else { 1232 /* Adding secondary function; look for primary */ 1233 1234 list_for_each_entry(other, &efx_primary_list, node) { 1235 if (efx_same_controller(efx, other)) { 1236 netif_dbg(efx, probe, efx->net_dev, 1237 "adding to secondary list of %s %s\n", 1238 pci_name(other->pci_dev), 1239 other->net_dev->name); 1240 list_add_tail(&efx->node, 1241 &other->secondary_list); 1242 efx->primary = other; 1243 return; 1244 } 1245 } 1246 1247 netif_dbg(efx, probe, efx->net_dev, 1248 "adding to unassociated list\n"); 1249 list_add_tail(&efx->node, &efx_unassociated_list); 1250 } 1251 } 1252 1253 static void efx_dissociate(struct efx_nic *efx) 1254 { 1255 struct efx_nic *other, *next; 1256 1257 list_del(&efx->node); 1258 efx->primary = NULL; 1259 1260 list_for_each_entry_safe(other, next, &efx->secondary_list, node) { 1261 list_del(&other->node); 1262 netif_dbg(other, probe, other->net_dev, 1263 "moving to unassociated list\n"); 1264 list_add_tail(&other->node, &efx_unassociated_list); 1265 other->primary = NULL; 1266 } 1267 } 1268 1269 /* This configures the PCI device to enable I/O and DMA. */ 1270 int efx_init_io(struct efx_nic *efx, int bar, dma_addr_t dma_mask, 1271 unsigned int mem_map_size) 1272 { 1273 struct pci_dev *pci_dev = efx->pci_dev; 1274 int rc; 1275 1276 netif_dbg(efx, probe, efx->net_dev, "initialising I/O\n"); 1277 1278 rc = pci_enable_device(pci_dev); 1279 if (rc) { 1280 netif_err(efx, probe, efx->net_dev, 1281 "failed to enable PCI device\n"); 1282 goto fail1; 1283 } 1284 1285 pci_set_master(pci_dev); 1286 1287 /* Set the PCI DMA mask. Try all possibilities from our genuine mask 1288 * down to 32 bits, because some architectures will allow 40 bit 1289 * masks event though they reject 46 bit masks. 1290 */ 1291 while (dma_mask > 0x7fffffffUL) { 1292 rc = dma_set_mask_and_coherent(&pci_dev->dev, dma_mask); 1293 if (rc == 0) 1294 break; 1295 dma_mask >>= 1; 1296 } 1297 if (rc) { 1298 netif_err(efx, probe, efx->net_dev, 1299 "could not find a suitable DMA mask\n"); 1300 goto fail2; 1301 } 1302 netif_dbg(efx, probe, efx->net_dev, 1303 "using DMA mask %llx\n", (unsigned long long) dma_mask); 1304 1305 efx->membase_phys = pci_resource_start(efx->pci_dev, bar); 1306 rc = pci_request_region(pci_dev, bar, "sfc"); 1307 if (rc) { 1308 netif_err(efx, probe, efx->net_dev, 1309 "request for memory BAR failed\n"); 1310 rc = -EIO; 1311 goto fail3; 1312 } 1313 efx->membase = ioremap_nocache(efx->membase_phys, mem_map_size); 1314 if (!efx->membase) { 1315 netif_err(efx, probe, efx->net_dev, 1316 "could not map memory BAR at %llx+%x\n", 1317 (unsigned long long)efx->membase_phys, mem_map_size); 1318 rc = -ENOMEM; 1319 goto fail4; 1320 } 1321 netif_dbg(efx, probe, efx->net_dev, 1322 "memory BAR at %llx+%x (virtual %p)\n", 1323 (unsigned long long)efx->membase_phys, mem_map_size, 1324 efx->membase); 1325 1326 return 0; 1327 1328 fail4: 1329 pci_release_region(efx->pci_dev, bar); 1330 fail3: 1331 efx->membase_phys = 0; 1332 fail2: 1333 pci_disable_device(efx->pci_dev); 1334 fail1: 1335 return rc; 1336 } 1337 1338 void efx_fini_io(struct efx_nic *efx, int bar) 1339 { 1340 netif_dbg(efx, drv, efx->net_dev, "shutting down I/O\n"); 1341 1342 if (efx->membase) { 1343 iounmap(efx->membase); 1344 efx->membase = NULL; 1345 } 1346 1347 if (efx->membase_phys) { 1348 pci_release_region(efx->pci_dev, bar); 1349 efx->membase_phys = 0; 1350 } 1351 1352 /* Don't disable bus-mastering if VFs are assigned */ 1353 if (!pci_vfs_assigned(efx->pci_dev)) 1354 pci_disable_device(efx->pci_dev); 1355 } 1356 1357 void efx_set_default_rx_indir_table(struct efx_nic *efx, 1358 struct efx_rss_context *ctx) 1359 { 1360 size_t i; 1361 1362 for (i = 0; i < ARRAY_SIZE(ctx->rx_indir_table); i++) 1363 ctx->rx_indir_table[i] = 1364 ethtool_rxfh_indir_default(i, efx->rss_spread); 1365 } 1366 1367 static unsigned int efx_wanted_parallelism(struct efx_nic *efx) 1368 { 1369 cpumask_var_t thread_mask; 1370 unsigned int count; 1371 int cpu; 1372 1373 if (rss_cpus) { 1374 count = rss_cpus; 1375 } else { 1376 if (unlikely(!zalloc_cpumask_var(&thread_mask, GFP_KERNEL))) { 1377 netif_warn(efx, probe, efx->net_dev, 1378 "RSS disabled due to allocation failure\n"); 1379 return 1; 1380 } 1381 1382 count = 0; 1383 for_each_online_cpu(cpu) { 1384 if (!cpumask_test_cpu(cpu, thread_mask)) { 1385 ++count; 1386 cpumask_or(thread_mask, thread_mask, 1387 topology_sibling_cpumask(cpu)); 1388 } 1389 } 1390 1391 free_cpumask_var(thread_mask); 1392 } 1393 1394 if (count > EFX_MAX_RX_QUEUES) { 1395 netif_cond_dbg(efx, probe, efx->net_dev, !rss_cpus, warn, 1396 "Reducing number of rx queues from %u to %u.\n", 1397 count, EFX_MAX_RX_QUEUES); 1398 count = EFX_MAX_RX_QUEUES; 1399 } 1400 1401 /* If RSS is requested for the PF *and* VFs then we can't write RSS 1402 * table entries that are inaccessible to VFs 1403 */ 1404 #ifdef CONFIG_SFC_SRIOV 1405 if (efx->type->sriov_wanted) { 1406 if (efx->type->sriov_wanted(efx) && efx_vf_size(efx) > 1 && 1407 count > efx_vf_size(efx)) { 1408 netif_warn(efx, probe, efx->net_dev, 1409 "Reducing number of RSS channels from %u to %u for " 1410 "VF support. Increase vf-msix-limit to use more " 1411 "channels on the PF.\n", 1412 count, efx_vf_size(efx)); 1413 count = efx_vf_size(efx); 1414 } 1415 } 1416 #endif 1417 1418 return count; 1419 } 1420 1421 static int efx_allocate_msix_channels(struct efx_nic *efx, 1422 unsigned int max_channels, 1423 unsigned int extra_channels, 1424 unsigned int parallelism) 1425 { 1426 unsigned int n_channels = parallelism; 1427 int vec_count; 1428 int n_xdp_tx; 1429 int n_xdp_ev; 1430 1431 if (efx_separate_tx_channels) 1432 n_channels *= 2; 1433 n_channels += extra_channels; 1434 1435 /* To allow XDP transmit to happen from arbitrary NAPI contexts 1436 * we allocate a TX queue per CPU. We share event queues across 1437 * multiple tx queues, assuming tx and ev queues are both 1438 * maximum size. 1439 */ 1440 1441 n_xdp_tx = num_possible_cpus(); 1442 n_xdp_ev = DIV_ROUND_UP(n_xdp_tx, EFX_TXQ_TYPES); 1443 1444 vec_count = pci_msix_vec_count(efx->pci_dev); 1445 if (vec_count < 0) 1446 return vec_count; 1447 1448 max_channels = min_t(unsigned int, vec_count, max_channels); 1449 1450 /* Check resources. 1451 * We need a channel per event queue, plus a VI per tx queue. 1452 * This may be more pessimistic than it needs to be. 1453 */ 1454 if (n_channels + n_xdp_ev > max_channels) { 1455 netif_err(efx, drv, efx->net_dev, 1456 "Insufficient resources for %d XDP event queues (%d other channels, max %d)\n", 1457 n_xdp_ev, n_channels, max_channels); 1458 efx->n_xdp_channels = 0; 1459 efx->xdp_tx_per_channel = 0; 1460 efx->xdp_tx_queue_count = 0; 1461 } else { 1462 efx->n_xdp_channels = n_xdp_ev; 1463 efx->xdp_tx_per_channel = EFX_TXQ_TYPES; 1464 efx->xdp_tx_queue_count = n_xdp_tx; 1465 n_channels += n_xdp_ev; 1466 netif_dbg(efx, drv, efx->net_dev, 1467 "Allocating %d TX and %d event queues for XDP\n", 1468 n_xdp_tx, n_xdp_ev); 1469 } 1470 1471 if (vec_count < n_channels) { 1472 netif_err(efx, drv, efx->net_dev, 1473 "WARNING: Insufficient MSI-X vectors available (%d < %u).\n", 1474 vec_count, n_channels); 1475 netif_err(efx, drv, efx->net_dev, 1476 "WARNING: Performance may be reduced.\n"); 1477 n_channels = vec_count; 1478 } 1479 1480 n_channels = min(n_channels, max_channels); 1481 1482 efx->n_channels = n_channels; 1483 1484 /* Ignore XDP tx channels when creating rx channels. */ 1485 n_channels -= efx->n_xdp_channels; 1486 1487 if (efx_separate_tx_channels) { 1488 efx->n_tx_channels = 1489 min(max(n_channels / 2, 1U), 1490 efx->max_tx_channels); 1491 efx->tx_channel_offset = 1492 n_channels - efx->n_tx_channels; 1493 efx->n_rx_channels = 1494 max(n_channels - 1495 efx->n_tx_channels, 1U); 1496 } else { 1497 efx->n_tx_channels = min(n_channels, efx->max_tx_channels); 1498 efx->tx_channel_offset = 0; 1499 efx->n_rx_channels = n_channels; 1500 } 1501 1502 efx->n_rx_channels = min(efx->n_rx_channels, parallelism); 1503 efx->n_tx_channels = min(efx->n_tx_channels, parallelism); 1504 1505 efx->xdp_channel_offset = n_channels; 1506 1507 netif_dbg(efx, drv, efx->net_dev, 1508 "Allocating %u RX channels\n", 1509 efx->n_rx_channels); 1510 1511 return efx->n_channels; 1512 } 1513 1514 /* Probe the number and type of interrupts we are able to obtain, and 1515 * the resulting numbers of channels and RX queues. 1516 */ 1517 int efx_probe_interrupts(struct efx_nic *efx) 1518 { 1519 unsigned int extra_channels = 0; 1520 unsigned int rss_spread; 1521 unsigned int i, j; 1522 int rc; 1523 1524 for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++) 1525 if (efx->extra_channel_type[i]) 1526 ++extra_channels; 1527 1528 if (efx->interrupt_mode == EFX_INT_MODE_MSIX) { 1529 unsigned int parallelism = efx_wanted_parallelism(efx); 1530 struct msix_entry xentries[EFX_MAX_CHANNELS]; 1531 unsigned int n_channels; 1532 1533 rc = efx_allocate_msix_channels(efx, efx->max_channels, 1534 extra_channels, parallelism); 1535 if (rc >= 0) { 1536 n_channels = rc; 1537 for (i = 0; i < n_channels; i++) 1538 xentries[i].entry = i; 1539 rc = pci_enable_msix_range(efx->pci_dev, xentries, 1, 1540 n_channels); 1541 } 1542 if (rc < 0) { 1543 /* Fall back to single channel MSI */ 1544 netif_err(efx, drv, efx->net_dev, 1545 "could not enable MSI-X\n"); 1546 if (efx->type->min_interrupt_mode >= EFX_INT_MODE_MSI) 1547 efx->interrupt_mode = EFX_INT_MODE_MSI; 1548 else 1549 return rc; 1550 } else if (rc < n_channels) { 1551 netif_err(efx, drv, efx->net_dev, 1552 "WARNING: Insufficient MSI-X vectors" 1553 " available (%d < %u).\n", rc, n_channels); 1554 netif_err(efx, drv, efx->net_dev, 1555 "WARNING: Performance may be reduced.\n"); 1556 n_channels = rc; 1557 } 1558 1559 if (rc > 0) { 1560 for (i = 0; i < efx->n_channels; i++) 1561 efx_get_channel(efx, i)->irq = 1562 xentries[i].vector; 1563 } 1564 } 1565 1566 /* Try single interrupt MSI */ 1567 if (efx->interrupt_mode == EFX_INT_MODE_MSI) { 1568 efx->n_channels = 1; 1569 efx->n_rx_channels = 1; 1570 efx->n_tx_channels = 1; 1571 efx->n_xdp_channels = 0; 1572 efx->xdp_channel_offset = efx->n_channels; 1573 rc = pci_enable_msi(efx->pci_dev); 1574 if (rc == 0) { 1575 efx_get_channel(efx, 0)->irq = efx->pci_dev->irq; 1576 } else { 1577 netif_err(efx, drv, efx->net_dev, 1578 "could not enable MSI\n"); 1579 if (efx->type->min_interrupt_mode >= EFX_INT_MODE_LEGACY) 1580 efx->interrupt_mode = EFX_INT_MODE_LEGACY; 1581 else 1582 return rc; 1583 } 1584 } 1585 1586 /* Assume legacy interrupts */ 1587 if (efx->interrupt_mode == EFX_INT_MODE_LEGACY) { 1588 efx->n_channels = 1 + (efx_separate_tx_channels ? 1 : 0); 1589 efx->n_rx_channels = 1; 1590 efx->n_tx_channels = 1; 1591 efx->n_xdp_channels = 0; 1592 efx->xdp_channel_offset = efx->n_channels; 1593 efx->legacy_irq = efx->pci_dev->irq; 1594 } 1595 1596 /* Assign extra channels if possible, before XDP channels */ 1597 efx->n_extra_tx_channels = 0; 1598 j = efx->xdp_channel_offset; 1599 for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++) { 1600 if (!efx->extra_channel_type[i]) 1601 continue; 1602 if (j <= efx->tx_channel_offset + efx->n_tx_channels) { 1603 efx->extra_channel_type[i]->handle_no_channel(efx); 1604 } else { 1605 --j; 1606 efx_get_channel(efx, j)->type = 1607 efx->extra_channel_type[i]; 1608 if (efx_channel_has_tx_queues(efx_get_channel(efx, j))) 1609 efx->n_extra_tx_channels++; 1610 } 1611 } 1612 1613 rss_spread = efx->n_rx_channels; 1614 /* RSS might be usable on VFs even if it is disabled on the PF */ 1615 #ifdef CONFIG_SFC_SRIOV 1616 if (efx->type->sriov_wanted) { 1617 efx->rss_spread = ((rss_spread > 1 || 1618 !efx->type->sriov_wanted(efx)) ? 1619 rss_spread : efx_vf_size(efx)); 1620 return 0; 1621 } 1622 #endif 1623 efx->rss_spread = rss_spread; 1624 1625 return 0; 1626 } 1627 1628 #if defined(CONFIG_SMP) 1629 void efx_set_interrupt_affinity(struct efx_nic *efx) 1630 { 1631 struct efx_channel *channel; 1632 unsigned int cpu; 1633 1634 efx_for_each_channel(channel, efx) { 1635 cpu = cpumask_local_spread(channel->channel, 1636 pcibus_to_node(efx->pci_dev->bus)); 1637 irq_set_affinity_hint(channel->irq, cpumask_of(cpu)); 1638 } 1639 } 1640 1641 void efx_clear_interrupt_affinity(struct efx_nic *efx) 1642 { 1643 struct efx_channel *channel; 1644 1645 efx_for_each_channel(channel, efx) 1646 irq_set_affinity_hint(channel->irq, NULL); 1647 } 1648 #else 1649 void efx_set_interrupt_affinity(struct efx_nic *efx __attribute__ ((unused))) 1650 { 1651 } 1652 1653 void efx_clear_interrupt_affinity(struct efx_nic *efx __attribute__ ((unused))) 1654 { 1655 } 1656 #endif /* CONFIG_SMP */ 1657 1658 int efx_soft_enable_interrupts(struct efx_nic *efx) 1659 { 1660 struct efx_channel *channel, *end_channel; 1661 int rc; 1662 1663 BUG_ON(efx->state == STATE_DISABLED); 1664 1665 efx->irq_soft_enabled = true; 1666 smp_wmb(); 1667 1668 efx_for_each_channel(channel, efx) { 1669 if (!channel->type->keep_eventq) { 1670 rc = efx_init_eventq(channel); 1671 if (rc) 1672 goto fail; 1673 } 1674 efx_start_eventq(channel); 1675 } 1676 1677 efx_mcdi_mode_event(efx); 1678 1679 return 0; 1680 fail: 1681 end_channel = channel; 1682 efx_for_each_channel(channel, efx) { 1683 if (channel == end_channel) 1684 break; 1685 efx_stop_eventq(channel); 1686 if (!channel->type->keep_eventq) 1687 efx_fini_eventq(channel); 1688 } 1689 1690 return rc; 1691 } 1692 1693 void efx_soft_disable_interrupts(struct efx_nic *efx) 1694 { 1695 struct efx_channel *channel; 1696 1697 if (efx->state == STATE_DISABLED) 1698 return; 1699 1700 efx_mcdi_mode_poll(efx); 1701 1702 efx->irq_soft_enabled = false; 1703 smp_wmb(); 1704 1705 if (efx->legacy_irq) 1706 synchronize_irq(efx->legacy_irq); 1707 1708 efx_for_each_channel(channel, efx) { 1709 if (channel->irq) 1710 synchronize_irq(channel->irq); 1711 1712 efx_stop_eventq(channel); 1713 if (!channel->type->keep_eventq) 1714 efx_fini_eventq(channel); 1715 } 1716 1717 /* Flush the asynchronous MCDI request queue */ 1718 efx_mcdi_flush_async(efx); 1719 } 1720 1721 int efx_enable_interrupts(struct efx_nic *efx) 1722 { 1723 struct efx_channel *channel, *end_channel; 1724 int rc; 1725 1726 BUG_ON(efx->state == STATE_DISABLED); 1727 1728 if (efx->eeh_disabled_legacy_irq) { 1729 enable_irq(efx->legacy_irq); 1730 efx->eeh_disabled_legacy_irq = false; 1731 } 1732 1733 efx->type->irq_enable_master(efx); 1734 1735 efx_for_each_channel(channel, efx) { 1736 if (channel->type->keep_eventq) { 1737 rc = efx_init_eventq(channel); 1738 if (rc) 1739 goto fail; 1740 } 1741 } 1742 1743 rc = efx_soft_enable_interrupts(efx); 1744 if (rc) 1745 goto fail; 1746 1747 return 0; 1748 1749 fail: 1750 end_channel = channel; 1751 efx_for_each_channel(channel, efx) { 1752 if (channel == end_channel) 1753 break; 1754 if (channel->type->keep_eventq) 1755 efx_fini_eventq(channel); 1756 } 1757 1758 efx->type->irq_disable_non_ev(efx); 1759 1760 return rc; 1761 } 1762 1763 void efx_disable_interrupts(struct efx_nic *efx) 1764 { 1765 struct efx_channel *channel; 1766 1767 efx_soft_disable_interrupts(efx); 1768 1769 efx_for_each_channel(channel, efx) { 1770 if (channel->type->keep_eventq) 1771 efx_fini_eventq(channel); 1772 } 1773 1774 efx->type->irq_disable_non_ev(efx); 1775 } 1776 1777 void efx_remove_interrupts(struct efx_nic *efx) 1778 { 1779 struct efx_channel *channel; 1780 1781 /* Remove MSI/MSI-X interrupts */ 1782 efx_for_each_channel(channel, efx) 1783 channel->irq = 0; 1784 pci_disable_msi(efx->pci_dev); 1785 pci_disable_msix(efx->pci_dev); 1786 1787 /* Remove legacy interrupt */ 1788 efx->legacy_irq = 0; 1789 } 1790 1791 int efx_set_channels(struct efx_nic *efx) 1792 { 1793 struct efx_channel *channel; 1794 struct efx_tx_queue *tx_queue; 1795 int xdp_queue_number; 1796 1797 efx->tx_channel_offset = 1798 efx_separate_tx_channels ? 1799 efx->n_channels - efx->n_tx_channels : 0; 1800 1801 if (efx->xdp_tx_queue_count) { 1802 EFX_WARN_ON_PARANOID(efx->xdp_tx_queues); 1803 1804 /* Allocate array for XDP TX queue lookup. */ 1805 efx->xdp_tx_queues = kcalloc(efx->xdp_tx_queue_count, 1806 sizeof(*efx->xdp_tx_queues), 1807 GFP_KERNEL); 1808 if (!efx->xdp_tx_queues) 1809 return -ENOMEM; 1810 } 1811 1812 /* We need to mark which channels really have RX and TX 1813 * queues, and adjust the TX queue numbers if we have separate 1814 * RX-only and TX-only channels. 1815 */ 1816 xdp_queue_number = 0; 1817 efx_for_each_channel(channel, efx) { 1818 if (channel->channel < efx->n_rx_channels) 1819 channel->rx_queue.core_index = channel->channel; 1820 else 1821 channel->rx_queue.core_index = -1; 1822 1823 efx_for_each_channel_tx_queue(tx_queue, channel) { 1824 tx_queue->queue -= (efx->tx_channel_offset * 1825 EFX_TXQ_TYPES); 1826 1827 if (efx_channel_is_xdp_tx(channel) && 1828 xdp_queue_number < efx->xdp_tx_queue_count) { 1829 efx->xdp_tx_queues[xdp_queue_number] = tx_queue; 1830 xdp_queue_number++; 1831 } 1832 } 1833 } 1834 return 0; 1835 } 1836 1837 static int efx_probe_nic(struct efx_nic *efx) 1838 { 1839 int rc; 1840 1841 netif_dbg(efx, probe, efx->net_dev, "creating NIC\n"); 1842 1843 /* Carry out hardware-type specific initialisation */ 1844 rc = efx->type->probe(efx); 1845 if (rc) 1846 return rc; 1847 1848 do { 1849 if (!efx->max_channels || !efx->max_tx_channels) { 1850 netif_err(efx, drv, efx->net_dev, 1851 "Insufficient resources to allocate" 1852 " any channels\n"); 1853 rc = -ENOSPC; 1854 goto fail1; 1855 } 1856 1857 /* Determine the number of channels and queues by trying 1858 * to hook in MSI-X interrupts. 1859 */ 1860 rc = efx_probe_interrupts(efx); 1861 if (rc) 1862 goto fail1; 1863 1864 rc = efx_set_channels(efx); 1865 if (rc) 1866 goto fail1; 1867 1868 /* dimension_resources can fail with EAGAIN */ 1869 rc = efx->type->dimension_resources(efx); 1870 if (rc != 0 && rc != -EAGAIN) 1871 goto fail2; 1872 1873 if (rc == -EAGAIN) 1874 /* try again with new max_channels */ 1875 efx_remove_interrupts(efx); 1876 1877 } while (rc == -EAGAIN); 1878 1879 if (efx->n_channels > 1) 1880 netdev_rss_key_fill(efx->rss_context.rx_hash_key, 1881 sizeof(efx->rss_context.rx_hash_key)); 1882 efx_set_default_rx_indir_table(efx, &efx->rss_context); 1883 1884 netif_set_real_num_tx_queues(efx->net_dev, efx->n_tx_channels); 1885 netif_set_real_num_rx_queues(efx->net_dev, efx->n_rx_channels); 1886 1887 /* Initialise the interrupt moderation settings */ 1888 efx->irq_mod_step_us = DIV_ROUND_UP(efx->timer_quantum_ns, 1000); 1889 efx_init_irq_moderation(efx, tx_irq_mod_usec, rx_irq_mod_usec, true, 1890 true); 1891 1892 return 0; 1893 1894 fail2: 1895 efx_remove_interrupts(efx); 1896 fail1: 1897 efx->type->remove(efx); 1898 return rc; 1899 } 1900 1901 static void efx_remove_nic(struct efx_nic *efx) 1902 { 1903 netif_dbg(efx, drv, efx->net_dev, "destroying NIC\n"); 1904 1905 efx_remove_interrupts(efx); 1906 efx->type->remove(efx); 1907 } 1908 1909 static int efx_probe_filters(struct efx_nic *efx) 1910 { 1911 int rc; 1912 1913 init_rwsem(&efx->filter_sem); 1914 mutex_lock(&efx->mac_lock); 1915 down_write(&efx->filter_sem); 1916 rc = efx->type->filter_table_probe(efx); 1917 if (rc) 1918 goto out_unlock; 1919 1920 #ifdef CONFIG_RFS_ACCEL 1921 if (efx->type->offload_features & NETIF_F_NTUPLE) { 1922 struct efx_channel *channel; 1923 int i, success = 1; 1924 1925 efx_for_each_channel(channel, efx) { 1926 channel->rps_flow_id = 1927 kcalloc(efx->type->max_rx_ip_filters, 1928 sizeof(*channel->rps_flow_id), 1929 GFP_KERNEL); 1930 if (!channel->rps_flow_id) 1931 success = 0; 1932 else 1933 for (i = 0; 1934 i < efx->type->max_rx_ip_filters; 1935 ++i) 1936 channel->rps_flow_id[i] = 1937 RPS_FLOW_ID_INVALID; 1938 channel->rfs_expire_index = 0; 1939 channel->rfs_filter_count = 0; 1940 } 1941 1942 if (!success) { 1943 efx_for_each_channel(channel, efx) 1944 kfree(channel->rps_flow_id); 1945 efx->type->filter_table_remove(efx); 1946 rc = -ENOMEM; 1947 goto out_unlock; 1948 } 1949 } 1950 #endif 1951 out_unlock: 1952 up_write(&efx->filter_sem); 1953 mutex_unlock(&efx->mac_lock); 1954 return rc; 1955 } 1956 1957 static void efx_remove_filters(struct efx_nic *efx) 1958 { 1959 #ifdef CONFIG_RFS_ACCEL 1960 struct efx_channel *channel; 1961 1962 efx_for_each_channel(channel, efx) { 1963 cancel_delayed_work_sync(&channel->filter_work); 1964 kfree(channel->rps_flow_id); 1965 } 1966 #endif 1967 down_write(&efx->filter_sem); 1968 efx->type->filter_table_remove(efx); 1969 up_write(&efx->filter_sem); 1970 } 1971 1972 1973 /************************************************************************** 1974 * 1975 * NIC startup/shutdown 1976 * 1977 *************************************************************************/ 1978 1979 static int efx_probe_all(struct efx_nic *efx) 1980 { 1981 int rc; 1982 1983 rc = efx_probe_nic(efx); 1984 if (rc) { 1985 netif_err(efx, probe, efx->net_dev, "failed to create NIC\n"); 1986 goto fail1; 1987 } 1988 1989 rc = efx_probe_port(efx); 1990 if (rc) { 1991 netif_err(efx, probe, efx->net_dev, "failed to create port\n"); 1992 goto fail2; 1993 } 1994 1995 BUILD_BUG_ON(EFX_DEFAULT_DMAQ_SIZE < EFX_RXQ_MIN_ENT); 1996 if (WARN_ON(EFX_DEFAULT_DMAQ_SIZE < EFX_TXQ_MIN_ENT(efx))) { 1997 rc = -EINVAL; 1998 goto fail3; 1999 } 2000 efx->rxq_entries = efx->txq_entries = EFX_DEFAULT_DMAQ_SIZE; 2001 2002 #ifdef CONFIG_SFC_SRIOV 2003 rc = efx->type->vswitching_probe(efx); 2004 if (rc) /* not fatal; the PF will still work fine */ 2005 netif_warn(efx, probe, efx->net_dev, 2006 "failed to setup vswitching rc=%d;" 2007 " VFs may not function\n", rc); 2008 #endif 2009 2010 rc = efx_probe_filters(efx); 2011 if (rc) { 2012 netif_err(efx, probe, efx->net_dev, 2013 "failed to create filter tables\n"); 2014 goto fail4; 2015 } 2016 2017 rc = efx_probe_channels(efx); 2018 if (rc) 2019 goto fail5; 2020 2021 return 0; 2022 2023 fail5: 2024 efx_remove_filters(efx); 2025 fail4: 2026 #ifdef CONFIG_SFC_SRIOV 2027 efx->type->vswitching_remove(efx); 2028 #endif 2029 fail3: 2030 efx_remove_port(efx); 2031 fail2: 2032 efx_remove_nic(efx); 2033 fail1: 2034 return rc; 2035 } 2036 2037 /* If the interface is supposed to be running but is not, start 2038 * the hardware and software data path, regular activity for the port 2039 * (MAC statistics, link polling, etc.) and schedule the port to be 2040 * reconfigured. Interrupts must already be enabled. This function 2041 * is safe to call multiple times, so long as the NIC is not disabled. 2042 * Requires the RTNL lock. 2043 */ 2044 void efx_start_all(struct efx_nic *efx) 2045 { 2046 EFX_ASSERT_RESET_SERIALISED(efx); 2047 BUG_ON(efx->state == STATE_DISABLED); 2048 2049 /* Check that it is appropriate to restart the interface. All 2050 * of these flags are safe to read under just the rtnl lock */ 2051 if (efx->port_enabled || !netif_running(efx->net_dev) || 2052 efx->reset_pending) 2053 return; 2054 2055 efx_start_port(efx); 2056 efx_start_datapath(efx); 2057 2058 /* Start the hardware monitor if there is one */ 2059 if (efx->type->monitor != NULL) 2060 queue_delayed_work(efx->workqueue, &efx->monitor_work, 2061 efx_monitor_interval); 2062 2063 /* Link state detection is normally event-driven; we have 2064 * to poll now because we could have missed a change 2065 */ 2066 mutex_lock(&efx->mac_lock); 2067 if (efx->phy_op->poll(efx)) 2068 efx_link_status_changed(efx); 2069 mutex_unlock(&efx->mac_lock); 2070 2071 efx->type->start_stats(efx); 2072 efx->type->pull_stats(efx); 2073 spin_lock_bh(&efx->stats_lock); 2074 efx->type->update_stats(efx, NULL, NULL); 2075 spin_unlock_bh(&efx->stats_lock); 2076 } 2077 2078 /* Quiesce the hardware and software data path, and regular activity 2079 * for the port without bringing the link down. Safe to call multiple 2080 * times with the NIC in almost any state, but interrupts should be 2081 * enabled. Requires the RTNL lock. 2082 */ 2083 void efx_stop_all(struct efx_nic *efx) 2084 { 2085 EFX_ASSERT_RESET_SERIALISED(efx); 2086 2087 /* port_enabled can be read safely under the rtnl lock */ 2088 if (!efx->port_enabled) 2089 return; 2090 2091 /* update stats before we go down so we can accurately count 2092 * rx_nodesc_drops 2093 */ 2094 efx->type->pull_stats(efx); 2095 spin_lock_bh(&efx->stats_lock); 2096 efx->type->update_stats(efx, NULL, NULL); 2097 spin_unlock_bh(&efx->stats_lock); 2098 efx->type->stop_stats(efx); 2099 efx_stop_port(efx); 2100 2101 /* Stop the kernel transmit interface. This is only valid if 2102 * the device is stopped or detached; otherwise the watchdog 2103 * may fire immediately. 2104 */ 2105 WARN_ON(netif_running(efx->net_dev) && 2106 netif_device_present(efx->net_dev)); 2107 netif_tx_disable(efx->net_dev); 2108 2109 efx_stop_datapath(efx); 2110 } 2111 2112 static void efx_remove_all(struct efx_nic *efx) 2113 { 2114 rtnl_lock(); 2115 efx_xdp_setup_prog(efx, NULL); 2116 rtnl_unlock(); 2117 2118 efx_remove_channels(efx); 2119 efx_remove_filters(efx); 2120 #ifdef CONFIG_SFC_SRIOV 2121 efx->type->vswitching_remove(efx); 2122 #endif 2123 efx_remove_port(efx); 2124 efx_remove_nic(efx); 2125 } 2126 2127 /************************************************************************** 2128 * 2129 * Interrupt moderation 2130 * 2131 **************************************************************************/ 2132 unsigned int efx_usecs_to_ticks(struct efx_nic *efx, unsigned int usecs) 2133 { 2134 if (usecs == 0) 2135 return 0; 2136 if (usecs * 1000 < efx->timer_quantum_ns) 2137 return 1; /* never round down to 0 */ 2138 return usecs * 1000 / efx->timer_quantum_ns; 2139 } 2140 2141 unsigned int efx_ticks_to_usecs(struct efx_nic *efx, unsigned int ticks) 2142 { 2143 /* We must round up when converting ticks to microseconds 2144 * because we round down when converting the other way. 2145 */ 2146 return DIV_ROUND_UP(ticks * efx->timer_quantum_ns, 1000); 2147 } 2148 2149 /* Set interrupt moderation parameters */ 2150 int efx_init_irq_moderation(struct efx_nic *efx, unsigned int tx_usecs, 2151 unsigned int rx_usecs, bool rx_adaptive, 2152 bool rx_may_override_tx) 2153 { 2154 struct efx_channel *channel; 2155 unsigned int timer_max_us; 2156 2157 EFX_ASSERT_RESET_SERIALISED(efx); 2158 2159 timer_max_us = efx->timer_max_ns / 1000; 2160 2161 if (tx_usecs > timer_max_us || rx_usecs > timer_max_us) 2162 return -EINVAL; 2163 2164 if (tx_usecs != rx_usecs && efx->tx_channel_offset == 0 && 2165 !rx_may_override_tx) { 2166 netif_err(efx, drv, efx->net_dev, "Channels are shared. " 2167 "RX and TX IRQ moderation must be equal\n"); 2168 return -EINVAL; 2169 } 2170 2171 efx->irq_rx_adaptive = rx_adaptive; 2172 efx->irq_rx_moderation_us = rx_usecs; 2173 efx_for_each_channel(channel, efx) { 2174 if (efx_channel_has_rx_queue(channel)) 2175 channel->irq_moderation_us = rx_usecs; 2176 else if (efx_channel_has_tx_queues(channel)) 2177 channel->irq_moderation_us = tx_usecs; 2178 else if (efx_channel_is_xdp_tx(channel)) 2179 channel->irq_moderation_us = tx_usecs; 2180 } 2181 2182 return 0; 2183 } 2184 2185 void efx_get_irq_moderation(struct efx_nic *efx, unsigned int *tx_usecs, 2186 unsigned int *rx_usecs, bool *rx_adaptive) 2187 { 2188 *rx_adaptive = efx->irq_rx_adaptive; 2189 *rx_usecs = efx->irq_rx_moderation_us; 2190 2191 /* If channels are shared between RX and TX, so is IRQ 2192 * moderation. Otherwise, IRQ moderation is the same for all 2193 * TX channels and is not adaptive. 2194 */ 2195 if (efx->tx_channel_offset == 0) { 2196 *tx_usecs = *rx_usecs; 2197 } else { 2198 struct efx_channel *tx_channel; 2199 2200 tx_channel = efx->channel[efx->tx_channel_offset]; 2201 *tx_usecs = tx_channel->irq_moderation_us; 2202 } 2203 } 2204 2205 /************************************************************************** 2206 * 2207 * Hardware monitor 2208 * 2209 **************************************************************************/ 2210 2211 /* Run periodically off the general workqueue */ 2212 static void efx_monitor(struct work_struct *data) 2213 { 2214 struct efx_nic *efx = container_of(data, struct efx_nic, 2215 monitor_work.work); 2216 2217 netif_vdbg(efx, timer, efx->net_dev, 2218 "hardware monitor executing on CPU %d\n", 2219 raw_smp_processor_id()); 2220 BUG_ON(efx->type->monitor == NULL); 2221 2222 /* If the mac_lock is already held then it is likely a port 2223 * reconfiguration is already in place, which will likely do 2224 * most of the work of monitor() anyway. */ 2225 if (mutex_trylock(&efx->mac_lock)) { 2226 if (efx->port_enabled) 2227 efx->type->monitor(efx); 2228 mutex_unlock(&efx->mac_lock); 2229 } 2230 2231 queue_delayed_work(efx->workqueue, &efx->monitor_work, 2232 efx_monitor_interval); 2233 } 2234 2235 /************************************************************************** 2236 * 2237 * ioctls 2238 * 2239 *************************************************************************/ 2240 2241 /* Net device ioctl 2242 * Context: process, rtnl_lock() held. 2243 */ 2244 static int efx_ioctl(struct net_device *net_dev, struct ifreq *ifr, int cmd) 2245 { 2246 struct efx_nic *efx = netdev_priv(net_dev); 2247 struct mii_ioctl_data *data = if_mii(ifr); 2248 2249 if (cmd == SIOCSHWTSTAMP) 2250 return efx_ptp_set_ts_config(efx, ifr); 2251 if (cmd == SIOCGHWTSTAMP) 2252 return efx_ptp_get_ts_config(efx, ifr); 2253 2254 /* Convert phy_id from older PRTAD/DEVAD format */ 2255 if ((cmd == SIOCGMIIREG || cmd == SIOCSMIIREG) && 2256 (data->phy_id & 0xfc00) == 0x0400) 2257 data->phy_id ^= MDIO_PHY_ID_C45 | 0x0400; 2258 2259 return mdio_mii_ioctl(&efx->mdio, data, cmd); 2260 } 2261 2262 /************************************************************************** 2263 * 2264 * NAPI interface 2265 * 2266 **************************************************************************/ 2267 2268 void efx_init_napi_channel(struct efx_channel *channel) 2269 { 2270 struct efx_nic *efx = channel->efx; 2271 2272 channel->napi_dev = efx->net_dev; 2273 netif_napi_add(channel->napi_dev, &channel->napi_str, 2274 efx_poll, napi_weight); 2275 } 2276 2277 void efx_init_napi(struct efx_nic *efx) 2278 { 2279 struct efx_channel *channel; 2280 2281 efx_for_each_channel(channel, efx) 2282 efx_init_napi_channel(channel); 2283 } 2284 2285 void efx_fini_napi_channel(struct efx_channel *channel) 2286 { 2287 if (channel->napi_dev) 2288 netif_napi_del(&channel->napi_str); 2289 2290 channel->napi_dev = NULL; 2291 } 2292 2293 void efx_fini_napi(struct efx_nic *efx) 2294 { 2295 struct efx_channel *channel; 2296 2297 efx_for_each_channel(channel, efx) 2298 efx_fini_napi_channel(channel); 2299 } 2300 2301 /************************************************************************** 2302 * 2303 * Kernel net device interface 2304 * 2305 *************************************************************************/ 2306 2307 /* Context: process, rtnl_lock() held. */ 2308 int efx_net_open(struct net_device *net_dev) 2309 { 2310 struct efx_nic *efx = netdev_priv(net_dev); 2311 int rc; 2312 2313 netif_dbg(efx, ifup, efx->net_dev, "opening device on CPU %d\n", 2314 raw_smp_processor_id()); 2315 2316 rc = efx_check_disabled(efx); 2317 if (rc) 2318 return rc; 2319 if (efx->phy_mode & PHY_MODE_SPECIAL) 2320 return -EBUSY; 2321 if (efx_mcdi_poll_reboot(efx) && efx_reset(efx, RESET_TYPE_ALL)) 2322 return -EIO; 2323 2324 /* Notify the kernel of the link state polled during driver load, 2325 * before the monitor starts running */ 2326 efx_link_status_changed(efx); 2327 2328 efx_start_all(efx); 2329 if (efx->state == STATE_DISABLED || efx->reset_pending) 2330 netif_device_detach(efx->net_dev); 2331 efx_selftest_async_start(efx); 2332 return 0; 2333 } 2334 2335 /* Context: process, rtnl_lock() held. 2336 * Note that the kernel will ignore our return code; this method 2337 * should really be a void. 2338 */ 2339 int efx_net_stop(struct net_device *net_dev) 2340 { 2341 struct efx_nic *efx = netdev_priv(net_dev); 2342 2343 netif_dbg(efx, ifdown, efx->net_dev, "closing on CPU %d\n", 2344 raw_smp_processor_id()); 2345 2346 /* Stop the device and flush all the channels */ 2347 efx_stop_all(efx); 2348 2349 return 0; 2350 } 2351 2352 /* Context: process, dev_base_lock or RTNL held, non-blocking. */ 2353 static void efx_net_stats(struct net_device *net_dev, 2354 struct rtnl_link_stats64 *stats) 2355 { 2356 struct efx_nic *efx = netdev_priv(net_dev); 2357 2358 spin_lock_bh(&efx->stats_lock); 2359 efx->type->update_stats(efx, NULL, stats); 2360 spin_unlock_bh(&efx->stats_lock); 2361 } 2362 2363 /* Context: netif_tx_lock held, BHs disabled. */ 2364 static void efx_watchdog(struct net_device *net_dev, unsigned int txqueue) 2365 { 2366 struct efx_nic *efx = netdev_priv(net_dev); 2367 2368 netif_err(efx, tx_err, efx->net_dev, 2369 "TX stuck with port_enabled=%d: resetting channels\n", 2370 efx->port_enabled); 2371 2372 efx_schedule_reset(efx, RESET_TYPE_TX_WATCHDOG); 2373 } 2374 2375 static unsigned int efx_xdp_max_mtu(struct efx_nic *efx) 2376 { 2377 /* The maximum MTU that we can fit in a single page, allowing for 2378 * framing, overhead and XDP headroom. 2379 */ 2380 int overhead = EFX_MAX_FRAME_LEN(0) + sizeof(struct efx_rx_page_state) + 2381 efx->rx_prefix_size + efx->type->rx_buffer_padding + 2382 efx->rx_ip_align + XDP_PACKET_HEADROOM; 2383 2384 return PAGE_SIZE - overhead; 2385 } 2386 2387 /* Context: process, rtnl_lock() held. */ 2388 static int efx_change_mtu(struct net_device *net_dev, int new_mtu) 2389 { 2390 struct efx_nic *efx = netdev_priv(net_dev); 2391 int rc; 2392 2393 rc = efx_check_disabled(efx); 2394 if (rc) 2395 return rc; 2396 2397 if (rtnl_dereference(efx->xdp_prog) && 2398 new_mtu > efx_xdp_max_mtu(efx)) { 2399 netif_err(efx, drv, efx->net_dev, 2400 "Requested MTU of %d too big for XDP (max: %d)\n", 2401 new_mtu, efx_xdp_max_mtu(efx)); 2402 return -EINVAL; 2403 } 2404 2405 netif_dbg(efx, drv, efx->net_dev, "changing MTU to %d\n", new_mtu); 2406 2407 efx_device_detach_sync(efx); 2408 efx_stop_all(efx); 2409 2410 mutex_lock(&efx->mac_lock); 2411 net_dev->mtu = new_mtu; 2412 efx_mac_reconfigure(efx); 2413 mutex_unlock(&efx->mac_lock); 2414 2415 efx_start_all(efx); 2416 efx_device_attach_if_not_resetting(efx); 2417 return 0; 2418 } 2419 2420 static int efx_set_mac_address(struct net_device *net_dev, void *data) 2421 { 2422 struct efx_nic *efx = netdev_priv(net_dev); 2423 struct sockaddr *addr = data; 2424 u8 *new_addr = addr->sa_data; 2425 u8 old_addr[6]; 2426 int rc; 2427 2428 if (!is_valid_ether_addr(new_addr)) { 2429 netif_err(efx, drv, efx->net_dev, 2430 "invalid ethernet MAC address requested: %pM\n", 2431 new_addr); 2432 return -EADDRNOTAVAIL; 2433 } 2434 2435 /* save old address */ 2436 ether_addr_copy(old_addr, net_dev->dev_addr); 2437 ether_addr_copy(net_dev->dev_addr, new_addr); 2438 if (efx->type->set_mac_address) { 2439 rc = efx->type->set_mac_address(efx); 2440 if (rc) { 2441 ether_addr_copy(net_dev->dev_addr, old_addr); 2442 return rc; 2443 } 2444 } 2445 2446 /* Reconfigure the MAC */ 2447 mutex_lock(&efx->mac_lock); 2448 efx_mac_reconfigure(efx); 2449 mutex_unlock(&efx->mac_lock); 2450 2451 return 0; 2452 } 2453 2454 /* Context: netif_addr_lock held, BHs disabled. */ 2455 static void efx_set_rx_mode(struct net_device *net_dev) 2456 { 2457 struct efx_nic *efx = netdev_priv(net_dev); 2458 2459 if (efx->port_enabled) 2460 queue_work(efx->workqueue, &efx->mac_work); 2461 /* Otherwise efx_start_port() will do this */ 2462 } 2463 2464 static int efx_set_features(struct net_device *net_dev, netdev_features_t data) 2465 { 2466 struct efx_nic *efx = netdev_priv(net_dev); 2467 int rc; 2468 2469 /* If disabling RX n-tuple filtering, clear existing filters */ 2470 if (net_dev->features & ~data & NETIF_F_NTUPLE) { 2471 rc = efx->type->filter_clear_rx(efx, EFX_FILTER_PRI_MANUAL); 2472 if (rc) 2473 return rc; 2474 } 2475 2476 /* If Rx VLAN filter is changed, update filters via mac_reconfigure. 2477 * If rx-fcs is changed, mac_reconfigure updates that too. 2478 */ 2479 if ((net_dev->features ^ data) & (NETIF_F_HW_VLAN_CTAG_FILTER | 2480 NETIF_F_RXFCS)) { 2481 /* efx_set_rx_mode() will schedule MAC work to update filters 2482 * when a new features are finally set in net_dev. 2483 */ 2484 efx_set_rx_mode(net_dev); 2485 } 2486 2487 return 0; 2488 } 2489 2490 static int efx_get_phys_port_id(struct net_device *net_dev, 2491 struct netdev_phys_item_id *ppid) 2492 { 2493 struct efx_nic *efx = netdev_priv(net_dev); 2494 2495 if (efx->type->get_phys_port_id) 2496 return efx->type->get_phys_port_id(efx, ppid); 2497 else 2498 return -EOPNOTSUPP; 2499 } 2500 2501 static int efx_get_phys_port_name(struct net_device *net_dev, 2502 char *name, size_t len) 2503 { 2504 struct efx_nic *efx = netdev_priv(net_dev); 2505 2506 if (snprintf(name, len, "p%u", efx->port_num) >= len) 2507 return -EINVAL; 2508 return 0; 2509 } 2510 2511 static int efx_vlan_rx_add_vid(struct net_device *net_dev, __be16 proto, u16 vid) 2512 { 2513 struct efx_nic *efx = netdev_priv(net_dev); 2514 2515 if (efx->type->vlan_rx_add_vid) 2516 return efx->type->vlan_rx_add_vid(efx, proto, vid); 2517 else 2518 return -EOPNOTSUPP; 2519 } 2520 2521 static int efx_vlan_rx_kill_vid(struct net_device *net_dev, __be16 proto, u16 vid) 2522 { 2523 struct efx_nic *efx = netdev_priv(net_dev); 2524 2525 if (efx->type->vlan_rx_kill_vid) 2526 return efx->type->vlan_rx_kill_vid(efx, proto, vid); 2527 else 2528 return -EOPNOTSUPP; 2529 } 2530 2531 static int efx_udp_tunnel_type_map(enum udp_parsable_tunnel_type in) 2532 { 2533 switch (in) { 2534 case UDP_TUNNEL_TYPE_VXLAN: 2535 return TUNNEL_ENCAP_UDP_PORT_ENTRY_VXLAN; 2536 case UDP_TUNNEL_TYPE_GENEVE: 2537 return TUNNEL_ENCAP_UDP_PORT_ENTRY_GENEVE; 2538 default: 2539 return -1; 2540 } 2541 } 2542 2543 static void efx_udp_tunnel_add(struct net_device *dev, struct udp_tunnel_info *ti) 2544 { 2545 struct efx_nic *efx = netdev_priv(dev); 2546 struct efx_udp_tunnel tnl; 2547 int efx_tunnel_type; 2548 2549 efx_tunnel_type = efx_udp_tunnel_type_map(ti->type); 2550 if (efx_tunnel_type < 0) 2551 return; 2552 2553 tnl.type = (u16)efx_tunnel_type; 2554 tnl.port = ti->port; 2555 2556 if (efx->type->udp_tnl_add_port) 2557 (void)efx->type->udp_tnl_add_port(efx, tnl); 2558 } 2559 2560 static void efx_udp_tunnel_del(struct net_device *dev, struct udp_tunnel_info *ti) 2561 { 2562 struct efx_nic *efx = netdev_priv(dev); 2563 struct efx_udp_tunnel tnl; 2564 int efx_tunnel_type; 2565 2566 efx_tunnel_type = efx_udp_tunnel_type_map(ti->type); 2567 if (efx_tunnel_type < 0) 2568 return; 2569 2570 tnl.type = (u16)efx_tunnel_type; 2571 tnl.port = ti->port; 2572 2573 if (efx->type->udp_tnl_del_port) 2574 (void)efx->type->udp_tnl_del_port(efx, tnl); 2575 } 2576 2577 static const struct net_device_ops efx_netdev_ops = { 2578 .ndo_open = efx_net_open, 2579 .ndo_stop = efx_net_stop, 2580 .ndo_get_stats64 = efx_net_stats, 2581 .ndo_tx_timeout = efx_watchdog, 2582 .ndo_start_xmit = efx_hard_start_xmit, 2583 .ndo_validate_addr = eth_validate_addr, 2584 .ndo_do_ioctl = efx_ioctl, 2585 .ndo_change_mtu = efx_change_mtu, 2586 .ndo_set_mac_address = efx_set_mac_address, 2587 .ndo_set_rx_mode = efx_set_rx_mode, 2588 .ndo_set_features = efx_set_features, 2589 .ndo_vlan_rx_add_vid = efx_vlan_rx_add_vid, 2590 .ndo_vlan_rx_kill_vid = efx_vlan_rx_kill_vid, 2591 #ifdef CONFIG_SFC_SRIOV 2592 .ndo_set_vf_mac = efx_sriov_set_vf_mac, 2593 .ndo_set_vf_vlan = efx_sriov_set_vf_vlan, 2594 .ndo_set_vf_spoofchk = efx_sriov_set_vf_spoofchk, 2595 .ndo_get_vf_config = efx_sriov_get_vf_config, 2596 .ndo_set_vf_link_state = efx_sriov_set_vf_link_state, 2597 #endif 2598 .ndo_get_phys_port_id = efx_get_phys_port_id, 2599 .ndo_get_phys_port_name = efx_get_phys_port_name, 2600 .ndo_setup_tc = efx_setup_tc, 2601 #ifdef CONFIG_RFS_ACCEL 2602 .ndo_rx_flow_steer = efx_filter_rfs, 2603 #endif 2604 .ndo_udp_tunnel_add = efx_udp_tunnel_add, 2605 .ndo_udp_tunnel_del = efx_udp_tunnel_del, 2606 .ndo_xdp_xmit = efx_xdp_xmit, 2607 .ndo_bpf = efx_xdp 2608 }; 2609 2610 static int efx_xdp_setup_prog(struct efx_nic *efx, struct bpf_prog *prog) 2611 { 2612 struct bpf_prog *old_prog; 2613 2614 if (efx->xdp_rxq_info_failed) { 2615 netif_err(efx, drv, efx->net_dev, 2616 "Unable to bind XDP program due to previous failure of rxq_info\n"); 2617 return -EINVAL; 2618 } 2619 2620 if (prog && efx->net_dev->mtu > efx_xdp_max_mtu(efx)) { 2621 netif_err(efx, drv, efx->net_dev, 2622 "Unable to configure XDP with MTU of %d (max: %d)\n", 2623 efx->net_dev->mtu, efx_xdp_max_mtu(efx)); 2624 return -EINVAL; 2625 } 2626 2627 old_prog = rtnl_dereference(efx->xdp_prog); 2628 rcu_assign_pointer(efx->xdp_prog, prog); 2629 /* Release the reference that was originally passed by the caller. */ 2630 if (old_prog) 2631 bpf_prog_put(old_prog); 2632 2633 return 0; 2634 } 2635 2636 /* Context: process, rtnl_lock() held. */ 2637 static int efx_xdp(struct net_device *dev, struct netdev_bpf *xdp) 2638 { 2639 struct efx_nic *efx = netdev_priv(dev); 2640 struct bpf_prog *xdp_prog; 2641 2642 switch (xdp->command) { 2643 case XDP_SETUP_PROG: 2644 return efx_xdp_setup_prog(efx, xdp->prog); 2645 case XDP_QUERY_PROG: 2646 xdp_prog = rtnl_dereference(efx->xdp_prog); 2647 xdp->prog_id = xdp_prog ? xdp_prog->aux->id : 0; 2648 return 0; 2649 default: 2650 return -EINVAL; 2651 } 2652 } 2653 2654 static int efx_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **xdpfs, 2655 u32 flags) 2656 { 2657 struct efx_nic *efx = netdev_priv(dev); 2658 2659 if (!netif_running(dev)) 2660 return -EINVAL; 2661 2662 return efx_xdp_tx_buffers(efx, n, xdpfs, flags & XDP_XMIT_FLUSH); 2663 } 2664 2665 static void efx_update_name(struct efx_nic *efx) 2666 { 2667 strcpy(efx->name, efx->net_dev->name); 2668 efx_mtd_rename(efx); 2669 efx_set_channel_names(efx); 2670 } 2671 2672 static int efx_netdev_event(struct notifier_block *this, 2673 unsigned long event, void *ptr) 2674 { 2675 struct net_device *net_dev = netdev_notifier_info_to_dev(ptr); 2676 2677 if ((net_dev->netdev_ops == &efx_netdev_ops) && 2678 event == NETDEV_CHANGENAME) 2679 efx_update_name(netdev_priv(net_dev)); 2680 2681 return NOTIFY_DONE; 2682 } 2683 2684 static struct notifier_block efx_netdev_notifier = { 2685 .notifier_call = efx_netdev_event, 2686 }; 2687 2688 static ssize_t 2689 show_phy_type(struct device *dev, struct device_attribute *attr, char *buf) 2690 { 2691 struct efx_nic *efx = dev_get_drvdata(dev); 2692 return sprintf(buf, "%d\n", efx->phy_type); 2693 } 2694 static DEVICE_ATTR(phy_type, 0444, show_phy_type, NULL); 2695 2696 #ifdef CONFIG_SFC_MCDI_LOGGING 2697 static ssize_t show_mcdi_log(struct device *dev, struct device_attribute *attr, 2698 char *buf) 2699 { 2700 struct efx_nic *efx = dev_get_drvdata(dev); 2701 struct efx_mcdi_iface *mcdi = efx_mcdi(efx); 2702 2703 return scnprintf(buf, PAGE_SIZE, "%d\n", mcdi->logging_enabled); 2704 } 2705 static ssize_t set_mcdi_log(struct device *dev, struct device_attribute *attr, 2706 const char *buf, size_t count) 2707 { 2708 struct efx_nic *efx = dev_get_drvdata(dev); 2709 struct efx_mcdi_iface *mcdi = efx_mcdi(efx); 2710 bool enable = count > 0 && *buf != '0'; 2711 2712 mcdi->logging_enabled = enable; 2713 return count; 2714 } 2715 static DEVICE_ATTR(mcdi_logging, 0644, show_mcdi_log, set_mcdi_log); 2716 #endif 2717 2718 static int efx_register_netdev(struct efx_nic *efx) 2719 { 2720 struct net_device *net_dev = efx->net_dev; 2721 struct efx_channel *channel; 2722 int rc; 2723 2724 net_dev->watchdog_timeo = 5 * HZ; 2725 net_dev->irq = efx->pci_dev->irq; 2726 net_dev->netdev_ops = &efx_netdev_ops; 2727 if (efx_nic_rev(efx) >= EFX_REV_HUNT_A0) 2728 net_dev->priv_flags |= IFF_UNICAST_FLT; 2729 net_dev->ethtool_ops = &efx_ethtool_ops; 2730 net_dev->gso_max_segs = EFX_TSO_MAX_SEGS; 2731 net_dev->min_mtu = EFX_MIN_MTU; 2732 net_dev->max_mtu = EFX_MAX_MTU; 2733 2734 rtnl_lock(); 2735 2736 /* Enable resets to be scheduled and check whether any were 2737 * already requested. If so, the NIC is probably hosed so we 2738 * abort. 2739 */ 2740 efx->state = STATE_READY; 2741 smp_mb(); /* ensure we change state before checking reset_pending */ 2742 if (efx->reset_pending) { 2743 netif_err(efx, probe, efx->net_dev, 2744 "aborting probe due to scheduled reset\n"); 2745 rc = -EIO; 2746 goto fail_locked; 2747 } 2748 2749 rc = dev_alloc_name(net_dev, net_dev->name); 2750 if (rc < 0) 2751 goto fail_locked; 2752 efx_update_name(efx); 2753 2754 /* Always start with carrier off; PHY events will detect the link */ 2755 netif_carrier_off(net_dev); 2756 2757 rc = register_netdevice(net_dev); 2758 if (rc) 2759 goto fail_locked; 2760 2761 efx_for_each_channel(channel, efx) { 2762 struct efx_tx_queue *tx_queue; 2763 efx_for_each_channel_tx_queue(tx_queue, channel) 2764 efx_init_tx_queue_core_txq(tx_queue); 2765 } 2766 2767 efx_associate(efx); 2768 2769 rtnl_unlock(); 2770 2771 rc = device_create_file(&efx->pci_dev->dev, &dev_attr_phy_type); 2772 if (rc) { 2773 netif_err(efx, drv, efx->net_dev, 2774 "failed to init net dev attributes\n"); 2775 goto fail_registered; 2776 } 2777 #ifdef CONFIG_SFC_MCDI_LOGGING 2778 rc = device_create_file(&efx->pci_dev->dev, &dev_attr_mcdi_logging); 2779 if (rc) { 2780 netif_err(efx, drv, efx->net_dev, 2781 "failed to init net dev attributes\n"); 2782 goto fail_attr_mcdi_logging; 2783 } 2784 #endif 2785 2786 return 0; 2787 2788 #ifdef CONFIG_SFC_MCDI_LOGGING 2789 fail_attr_mcdi_logging: 2790 device_remove_file(&efx->pci_dev->dev, &dev_attr_phy_type); 2791 #endif 2792 fail_registered: 2793 rtnl_lock(); 2794 efx_dissociate(efx); 2795 unregister_netdevice(net_dev); 2796 fail_locked: 2797 efx->state = STATE_UNINIT; 2798 rtnl_unlock(); 2799 netif_err(efx, drv, efx->net_dev, "could not register net dev\n"); 2800 return rc; 2801 } 2802 2803 static void efx_unregister_netdev(struct efx_nic *efx) 2804 { 2805 if (!efx->net_dev) 2806 return; 2807 2808 BUG_ON(netdev_priv(efx->net_dev) != efx); 2809 2810 if (efx_dev_registered(efx)) { 2811 strlcpy(efx->name, pci_name(efx->pci_dev), sizeof(efx->name)); 2812 #ifdef CONFIG_SFC_MCDI_LOGGING 2813 device_remove_file(&efx->pci_dev->dev, &dev_attr_mcdi_logging); 2814 #endif 2815 device_remove_file(&efx->pci_dev->dev, &dev_attr_phy_type); 2816 unregister_netdev(efx->net_dev); 2817 } 2818 } 2819 2820 /************************************************************************** 2821 * 2822 * Device reset and suspend 2823 * 2824 **************************************************************************/ 2825 2826 /* Tears down the entire software state and most of the hardware state 2827 * before reset. */ 2828 void efx_reset_down(struct efx_nic *efx, enum reset_type method) 2829 { 2830 EFX_ASSERT_RESET_SERIALISED(efx); 2831 2832 if (method == RESET_TYPE_MCDI_TIMEOUT) 2833 efx->type->prepare_flr(efx); 2834 2835 efx_stop_all(efx); 2836 efx_disable_interrupts(efx); 2837 2838 mutex_lock(&efx->mac_lock); 2839 down_write(&efx->filter_sem); 2840 mutex_lock(&efx->rss_lock); 2841 if (efx->port_initialized && method != RESET_TYPE_INVISIBLE && 2842 method != RESET_TYPE_DATAPATH) 2843 efx->phy_op->fini(efx); 2844 efx->type->fini(efx); 2845 } 2846 2847 /* This function will always ensure that the locks acquired in 2848 * efx_reset_down() are released. A failure return code indicates 2849 * that we were unable to reinitialise the hardware, and the 2850 * driver should be disabled. If ok is false, then the rx and tx 2851 * engines are not restarted, pending a RESET_DISABLE. */ 2852 int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok) 2853 { 2854 int rc; 2855 2856 EFX_ASSERT_RESET_SERIALISED(efx); 2857 2858 if (method == RESET_TYPE_MCDI_TIMEOUT) 2859 efx->type->finish_flr(efx); 2860 2861 /* Ensure that SRAM is initialised even if we're disabling the device */ 2862 rc = efx->type->init(efx); 2863 if (rc) { 2864 netif_err(efx, drv, efx->net_dev, "failed to initialise NIC\n"); 2865 goto fail; 2866 } 2867 2868 if (!ok) 2869 goto fail; 2870 2871 if (efx->port_initialized && method != RESET_TYPE_INVISIBLE && 2872 method != RESET_TYPE_DATAPATH) { 2873 rc = efx->phy_op->init(efx); 2874 if (rc) 2875 goto fail; 2876 rc = efx->phy_op->reconfigure(efx); 2877 if (rc && rc != -EPERM) 2878 netif_err(efx, drv, efx->net_dev, 2879 "could not restore PHY settings\n"); 2880 } 2881 2882 rc = efx_enable_interrupts(efx); 2883 if (rc) 2884 goto fail; 2885 2886 #ifdef CONFIG_SFC_SRIOV 2887 rc = efx->type->vswitching_restore(efx); 2888 if (rc) /* not fatal; the PF will still work fine */ 2889 netif_warn(efx, probe, efx->net_dev, 2890 "failed to restore vswitching rc=%d;" 2891 " VFs may not function\n", rc); 2892 #endif 2893 2894 if (efx->type->rx_restore_rss_contexts) 2895 efx->type->rx_restore_rss_contexts(efx); 2896 mutex_unlock(&efx->rss_lock); 2897 efx->type->filter_table_restore(efx); 2898 up_write(&efx->filter_sem); 2899 if (efx->type->sriov_reset) 2900 efx->type->sriov_reset(efx); 2901 2902 mutex_unlock(&efx->mac_lock); 2903 2904 efx_start_all(efx); 2905 2906 if (efx->type->udp_tnl_push_ports) 2907 efx->type->udp_tnl_push_ports(efx); 2908 2909 return 0; 2910 2911 fail: 2912 efx->port_initialized = false; 2913 2914 mutex_unlock(&efx->rss_lock); 2915 up_write(&efx->filter_sem); 2916 mutex_unlock(&efx->mac_lock); 2917 2918 return rc; 2919 } 2920 2921 /* Reset the NIC using the specified method. Note that the reset may 2922 * fail, in which case the card will be left in an unusable state. 2923 * 2924 * Caller must hold the rtnl_lock. 2925 */ 2926 int efx_reset(struct efx_nic *efx, enum reset_type method) 2927 { 2928 int rc, rc2; 2929 bool disabled; 2930 2931 netif_info(efx, drv, efx->net_dev, "resetting (%s)\n", 2932 RESET_TYPE(method)); 2933 2934 efx_device_detach_sync(efx); 2935 efx_reset_down(efx, method); 2936 2937 rc = efx->type->reset(efx, method); 2938 if (rc) { 2939 netif_err(efx, drv, efx->net_dev, "failed to reset hardware\n"); 2940 goto out; 2941 } 2942 2943 /* Clear flags for the scopes we covered. We assume the NIC and 2944 * driver are now quiescent so that there is no race here. 2945 */ 2946 if (method < RESET_TYPE_MAX_METHOD) 2947 efx->reset_pending &= -(1 << (method + 1)); 2948 else /* it doesn't fit into the well-ordered scope hierarchy */ 2949 __clear_bit(method, &efx->reset_pending); 2950 2951 /* Reinitialise bus-mastering, which may have been turned off before 2952 * the reset was scheduled. This is still appropriate, even in the 2953 * RESET_TYPE_DISABLE since this driver generally assumes the hardware 2954 * can respond to requests. */ 2955 pci_set_master(efx->pci_dev); 2956 2957 out: 2958 /* Leave device stopped if necessary */ 2959 disabled = rc || 2960 method == RESET_TYPE_DISABLE || 2961 method == RESET_TYPE_RECOVER_OR_DISABLE; 2962 rc2 = efx_reset_up(efx, method, !disabled); 2963 if (rc2) { 2964 disabled = true; 2965 if (!rc) 2966 rc = rc2; 2967 } 2968 2969 if (disabled) { 2970 dev_close(efx->net_dev); 2971 netif_err(efx, drv, efx->net_dev, "has been disabled\n"); 2972 efx->state = STATE_DISABLED; 2973 } else { 2974 netif_dbg(efx, drv, efx->net_dev, "reset complete\n"); 2975 efx_device_attach_if_not_resetting(efx); 2976 } 2977 return rc; 2978 } 2979 2980 /* Try recovery mechanisms. 2981 * For now only EEH is supported. 2982 * Returns 0 if the recovery mechanisms are unsuccessful. 2983 * Returns a non-zero value otherwise. 2984 */ 2985 int efx_try_recovery(struct efx_nic *efx) 2986 { 2987 #ifdef CONFIG_EEH 2988 /* A PCI error can occur and not be seen by EEH because nothing 2989 * happens on the PCI bus. In this case the driver may fail and 2990 * schedule a 'recover or reset', leading to this recovery handler. 2991 * Manually call the eeh failure check function. 2992 */ 2993 struct eeh_dev *eehdev = pci_dev_to_eeh_dev(efx->pci_dev); 2994 if (eeh_dev_check_failure(eehdev)) { 2995 /* The EEH mechanisms will handle the error and reset the 2996 * device if necessary. 2997 */ 2998 return 1; 2999 } 3000 #endif 3001 return 0; 3002 } 3003 3004 static void efx_wait_for_bist_end(struct efx_nic *efx) 3005 { 3006 int i; 3007 3008 for (i = 0; i < BIST_WAIT_DELAY_COUNT; ++i) { 3009 if (efx_mcdi_poll_reboot(efx)) 3010 goto out; 3011 msleep(BIST_WAIT_DELAY_MS); 3012 } 3013 3014 netif_err(efx, drv, efx->net_dev, "Warning: No MC reboot after BIST mode\n"); 3015 out: 3016 /* Either way unset the BIST flag. If we found no reboot we probably 3017 * won't recover, but we should try. 3018 */ 3019 efx->mc_bist_for_other_fn = false; 3020 } 3021 3022 /* The worker thread exists so that code that cannot sleep can 3023 * schedule a reset for later. 3024 */ 3025 static void efx_reset_work(struct work_struct *data) 3026 { 3027 struct efx_nic *efx = container_of(data, struct efx_nic, reset_work); 3028 unsigned long pending; 3029 enum reset_type method; 3030 3031 pending = READ_ONCE(efx->reset_pending); 3032 method = fls(pending) - 1; 3033 3034 if (method == RESET_TYPE_MC_BIST) 3035 efx_wait_for_bist_end(efx); 3036 3037 if ((method == RESET_TYPE_RECOVER_OR_DISABLE || 3038 method == RESET_TYPE_RECOVER_OR_ALL) && 3039 efx_try_recovery(efx)) 3040 return; 3041 3042 if (!pending) 3043 return; 3044 3045 rtnl_lock(); 3046 3047 /* We checked the state in efx_schedule_reset() but it may 3048 * have changed by now. Now that we have the RTNL lock, 3049 * it cannot change again. 3050 */ 3051 if (efx->state == STATE_READY) 3052 (void)efx_reset(efx, method); 3053 3054 rtnl_unlock(); 3055 } 3056 3057 void efx_schedule_reset(struct efx_nic *efx, enum reset_type type) 3058 { 3059 enum reset_type method; 3060 3061 if (efx->state == STATE_RECOVERY) { 3062 netif_dbg(efx, drv, efx->net_dev, 3063 "recovering: skip scheduling %s reset\n", 3064 RESET_TYPE(type)); 3065 return; 3066 } 3067 3068 switch (type) { 3069 case RESET_TYPE_INVISIBLE: 3070 case RESET_TYPE_ALL: 3071 case RESET_TYPE_RECOVER_OR_ALL: 3072 case RESET_TYPE_WORLD: 3073 case RESET_TYPE_DISABLE: 3074 case RESET_TYPE_RECOVER_OR_DISABLE: 3075 case RESET_TYPE_DATAPATH: 3076 case RESET_TYPE_MC_BIST: 3077 case RESET_TYPE_MCDI_TIMEOUT: 3078 method = type; 3079 netif_dbg(efx, drv, efx->net_dev, "scheduling %s reset\n", 3080 RESET_TYPE(method)); 3081 break; 3082 default: 3083 method = efx->type->map_reset_reason(type); 3084 netif_dbg(efx, drv, efx->net_dev, 3085 "scheduling %s reset for %s\n", 3086 RESET_TYPE(method), RESET_TYPE(type)); 3087 break; 3088 } 3089 3090 set_bit(method, &efx->reset_pending); 3091 smp_mb(); /* ensure we change reset_pending before checking state */ 3092 3093 /* If we're not READY then just leave the flags set as the cue 3094 * to abort probing or reschedule the reset later. 3095 */ 3096 if (READ_ONCE(efx->state) != STATE_READY) 3097 return; 3098 3099 /* efx_process_channel() will no longer read events once a 3100 * reset is scheduled. So switch back to poll'd MCDI completions. */ 3101 efx_mcdi_mode_poll(efx); 3102 3103 efx_queue_reset_work(efx); 3104 } 3105 3106 /************************************************************************** 3107 * 3108 * List of NICs we support 3109 * 3110 **************************************************************************/ 3111 3112 /* PCI device ID table */ 3113 static const struct pci_device_id efx_pci_table[] = { 3114 {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0803), /* SFC9020 */ 3115 .driver_data = (unsigned long) &siena_a0_nic_type}, 3116 {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0813), /* SFL9021 */ 3117 .driver_data = (unsigned long) &siena_a0_nic_type}, 3118 {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0903), /* SFC9120 PF */ 3119 .driver_data = (unsigned long) &efx_hunt_a0_nic_type}, 3120 {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x1903), /* SFC9120 VF */ 3121 .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type}, 3122 {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0923), /* SFC9140 PF */ 3123 .driver_data = (unsigned long) &efx_hunt_a0_nic_type}, 3124 {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x1923), /* SFC9140 VF */ 3125 .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type}, 3126 {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0a03), /* SFC9220 PF */ 3127 .driver_data = (unsigned long) &efx_hunt_a0_nic_type}, 3128 {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x1a03), /* SFC9220 VF */ 3129 .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type}, 3130 {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0b03), /* SFC9250 PF */ 3131 .driver_data = (unsigned long) &efx_hunt_a0_nic_type}, 3132 {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x1b03), /* SFC9250 VF */ 3133 .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type}, 3134 {0} /* end of list */ 3135 }; 3136 3137 /************************************************************************** 3138 * 3139 * Dummy PHY/MAC operations 3140 * 3141 * Can be used for some unimplemented operations 3142 * Needed so all function pointers are valid and do not have to be tested 3143 * before use 3144 * 3145 **************************************************************************/ 3146 int efx_port_dummy_op_int(struct efx_nic *efx) 3147 { 3148 return 0; 3149 } 3150 void efx_port_dummy_op_void(struct efx_nic *efx) {} 3151 3152 static bool efx_port_dummy_op_poll(struct efx_nic *efx) 3153 { 3154 return false; 3155 } 3156 3157 static const struct efx_phy_operations efx_dummy_phy_operations = { 3158 .init = efx_port_dummy_op_int, 3159 .reconfigure = efx_port_dummy_op_int, 3160 .poll = efx_port_dummy_op_poll, 3161 .fini = efx_port_dummy_op_void, 3162 }; 3163 3164 /************************************************************************** 3165 * 3166 * Data housekeeping 3167 * 3168 **************************************************************************/ 3169 3170 /* This zeroes out and then fills in the invariants in a struct 3171 * efx_nic (including all sub-structures). 3172 */ 3173 int efx_init_struct(struct efx_nic *efx, struct pci_dev *pci_dev, 3174 struct net_device *net_dev) 3175 { 3176 int rc = -ENOMEM, i; 3177 3178 /* Initialise common structures */ 3179 INIT_LIST_HEAD(&efx->node); 3180 INIT_LIST_HEAD(&efx->secondary_list); 3181 spin_lock_init(&efx->biu_lock); 3182 #ifdef CONFIG_SFC_MTD 3183 INIT_LIST_HEAD(&efx->mtd_list); 3184 #endif 3185 INIT_WORK(&efx->reset_work, efx_reset_work); 3186 INIT_DELAYED_WORK(&efx->monitor_work, efx_monitor); 3187 INIT_DELAYED_WORK(&efx->selftest_work, efx_selftest_async_work); 3188 efx->pci_dev = pci_dev; 3189 efx->msg_enable = debug; 3190 efx->state = STATE_UNINIT; 3191 strlcpy(efx->name, pci_name(pci_dev), sizeof(efx->name)); 3192 3193 efx->net_dev = net_dev; 3194 efx->rx_prefix_size = efx->type->rx_prefix_size; 3195 efx->rx_ip_align = 3196 NET_IP_ALIGN ? (efx->rx_prefix_size + NET_IP_ALIGN) % 4 : 0; 3197 efx->rx_packet_hash_offset = 3198 efx->type->rx_hash_offset - efx->type->rx_prefix_size; 3199 efx->rx_packet_ts_offset = 3200 efx->type->rx_ts_offset - efx->type->rx_prefix_size; 3201 INIT_LIST_HEAD(&efx->rss_context.list); 3202 mutex_init(&efx->rss_lock); 3203 spin_lock_init(&efx->stats_lock); 3204 efx->vi_stride = EFX_DEFAULT_VI_STRIDE; 3205 efx->num_mac_stats = MC_CMD_MAC_NSTATS; 3206 BUILD_BUG_ON(MC_CMD_MAC_NSTATS - 1 != MC_CMD_MAC_GENERATION_END); 3207 mutex_init(&efx->mac_lock); 3208 #ifdef CONFIG_RFS_ACCEL 3209 mutex_init(&efx->rps_mutex); 3210 spin_lock_init(&efx->rps_hash_lock); 3211 /* Failure to allocate is not fatal, but may degrade ARFS performance */ 3212 efx->rps_hash_table = kcalloc(EFX_ARFS_HASH_TABLE_SIZE, 3213 sizeof(*efx->rps_hash_table), GFP_KERNEL); 3214 #endif 3215 efx->phy_op = &efx_dummy_phy_operations; 3216 efx->mdio.dev = net_dev; 3217 INIT_WORK(&efx->mac_work, efx_mac_work); 3218 init_waitqueue_head(&efx->flush_wq); 3219 3220 for (i = 0; i < EFX_MAX_CHANNELS; i++) { 3221 efx->channel[i] = efx_alloc_channel(efx, i, NULL); 3222 if (!efx->channel[i]) 3223 goto fail; 3224 efx->msi_context[i].efx = efx; 3225 efx->msi_context[i].index = i; 3226 } 3227 3228 /* Higher numbered interrupt modes are less capable! */ 3229 if (WARN_ON_ONCE(efx->type->max_interrupt_mode > 3230 efx->type->min_interrupt_mode)) { 3231 rc = -EIO; 3232 goto fail; 3233 } 3234 efx->interrupt_mode = max(efx->type->max_interrupt_mode, 3235 interrupt_mode); 3236 efx->interrupt_mode = min(efx->type->min_interrupt_mode, 3237 interrupt_mode); 3238 3239 /* Would be good to use the net_dev name, but we're too early */ 3240 snprintf(efx->workqueue_name, sizeof(efx->workqueue_name), "sfc%s", 3241 pci_name(pci_dev)); 3242 efx->workqueue = create_singlethread_workqueue(efx->workqueue_name); 3243 if (!efx->workqueue) 3244 goto fail; 3245 3246 return 0; 3247 3248 fail: 3249 efx_fini_struct(efx); 3250 return rc; 3251 } 3252 3253 void efx_fini_struct(struct efx_nic *efx) 3254 { 3255 int i; 3256 3257 #ifdef CONFIG_RFS_ACCEL 3258 kfree(efx->rps_hash_table); 3259 #endif 3260 3261 for (i = 0; i < EFX_MAX_CHANNELS; i++) 3262 kfree(efx->channel[i]); 3263 3264 kfree(efx->vpd_sn); 3265 3266 if (efx->workqueue) { 3267 destroy_workqueue(efx->workqueue); 3268 efx->workqueue = NULL; 3269 } 3270 } 3271 3272 void efx_update_sw_stats(struct efx_nic *efx, u64 *stats) 3273 { 3274 u64 n_rx_nodesc_trunc = 0; 3275 struct efx_channel *channel; 3276 3277 efx_for_each_channel(channel, efx) 3278 n_rx_nodesc_trunc += channel->n_rx_nodesc_trunc; 3279 stats[GENERIC_STAT_rx_nodesc_trunc] = n_rx_nodesc_trunc; 3280 stats[GENERIC_STAT_rx_noskb_drops] = atomic_read(&efx->n_rx_noskb_drops); 3281 } 3282 3283 bool efx_filter_spec_equal(const struct efx_filter_spec *left, 3284 const struct efx_filter_spec *right) 3285 { 3286 if ((left->match_flags ^ right->match_flags) | 3287 ((left->flags ^ right->flags) & 3288 (EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_TX))) 3289 return false; 3290 3291 return memcmp(&left->outer_vid, &right->outer_vid, 3292 sizeof(struct efx_filter_spec) - 3293 offsetof(struct efx_filter_spec, outer_vid)) == 0; 3294 } 3295 3296 u32 efx_filter_spec_hash(const struct efx_filter_spec *spec) 3297 { 3298 BUILD_BUG_ON(offsetof(struct efx_filter_spec, outer_vid) & 3); 3299 return jhash2((const u32 *)&spec->outer_vid, 3300 (sizeof(struct efx_filter_spec) - 3301 offsetof(struct efx_filter_spec, outer_vid)) / 4, 3302 0); 3303 } 3304 3305 #ifdef CONFIG_RFS_ACCEL 3306 bool efx_rps_check_rule(struct efx_arfs_rule *rule, unsigned int filter_idx, 3307 bool *force) 3308 { 3309 if (rule->filter_id == EFX_ARFS_FILTER_ID_PENDING) { 3310 /* ARFS is currently updating this entry, leave it */ 3311 return false; 3312 } 3313 if (rule->filter_id == EFX_ARFS_FILTER_ID_ERROR) { 3314 /* ARFS tried and failed to update this, so it's probably out 3315 * of date. Remove the filter and the ARFS rule entry. 3316 */ 3317 rule->filter_id = EFX_ARFS_FILTER_ID_REMOVING; 3318 *force = true; 3319 return true; 3320 } else if (WARN_ON(rule->filter_id != filter_idx)) { /* can't happen */ 3321 /* ARFS has moved on, so old filter is not needed. Since we did 3322 * not mark the rule with EFX_ARFS_FILTER_ID_REMOVING, it will 3323 * not be removed by efx_rps_hash_del() subsequently. 3324 */ 3325 *force = true; 3326 return true; 3327 } 3328 /* Remove it iff ARFS wants to. */ 3329 return true; 3330 } 3331 3332 static 3333 struct hlist_head *efx_rps_hash_bucket(struct efx_nic *efx, 3334 const struct efx_filter_spec *spec) 3335 { 3336 u32 hash = efx_filter_spec_hash(spec); 3337 3338 lockdep_assert_held(&efx->rps_hash_lock); 3339 if (!efx->rps_hash_table) 3340 return NULL; 3341 return &efx->rps_hash_table[hash % EFX_ARFS_HASH_TABLE_SIZE]; 3342 } 3343 3344 struct efx_arfs_rule *efx_rps_hash_find(struct efx_nic *efx, 3345 const struct efx_filter_spec *spec) 3346 { 3347 struct efx_arfs_rule *rule; 3348 struct hlist_head *head; 3349 struct hlist_node *node; 3350 3351 head = efx_rps_hash_bucket(efx, spec); 3352 if (!head) 3353 return NULL; 3354 hlist_for_each(node, head) { 3355 rule = container_of(node, struct efx_arfs_rule, node); 3356 if (efx_filter_spec_equal(spec, &rule->spec)) 3357 return rule; 3358 } 3359 return NULL; 3360 } 3361 3362 struct efx_arfs_rule *efx_rps_hash_add(struct efx_nic *efx, 3363 const struct efx_filter_spec *spec, 3364 bool *new) 3365 { 3366 struct efx_arfs_rule *rule; 3367 struct hlist_head *head; 3368 struct hlist_node *node; 3369 3370 head = efx_rps_hash_bucket(efx, spec); 3371 if (!head) 3372 return NULL; 3373 hlist_for_each(node, head) { 3374 rule = container_of(node, struct efx_arfs_rule, node); 3375 if (efx_filter_spec_equal(spec, &rule->spec)) { 3376 *new = false; 3377 return rule; 3378 } 3379 } 3380 rule = kmalloc(sizeof(*rule), GFP_ATOMIC); 3381 *new = true; 3382 if (rule) { 3383 memcpy(&rule->spec, spec, sizeof(rule->spec)); 3384 hlist_add_head(&rule->node, head); 3385 } 3386 return rule; 3387 } 3388 3389 void efx_rps_hash_del(struct efx_nic *efx, const struct efx_filter_spec *spec) 3390 { 3391 struct efx_arfs_rule *rule; 3392 struct hlist_head *head; 3393 struct hlist_node *node; 3394 3395 head = efx_rps_hash_bucket(efx, spec); 3396 if (WARN_ON(!head)) 3397 return; 3398 hlist_for_each(node, head) { 3399 rule = container_of(node, struct efx_arfs_rule, node); 3400 if (efx_filter_spec_equal(spec, &rule->spec)) { 3401 /* Someone already reused the entry. We know that if 3402 * this check doesn't fire (i.e. filter_id == REMOVING) 3403 * then the REMOVING mark was put there by our caller, 3404 * because caller is holding a lock on filter table and 3405 * only holders of that lock set REMOVING. 3406 */ 3407 if (rule->filter_id != EFX_ARFS_FILTER_ID_REMOVING) 3408 return; 3409 hlist_del(node); 3410 kfree(rule); 3411 return; 3412 } 3413 } 3414 /* We didn't find it. */ 3415 WARN_ON(1); 3416 } 3417 #endif 3418 3419 /* RSS contexts. We're using linked lists and crappy O(n) algorithms, because 3420 * (a) this is an infrequent control-plane operation and (b) n is small (max 64) 3421 */ 3422 struct efx_rss_context *efx_alloc_rss_context_entry(struct efx_nic *efx) 3423 { 3424 struct list_head *head = &efx->rss_context.list; 3425 struct efx_rss_context *ctx, *new; 3426 u32 id = 1; /* Don't use zero, that refers to the master RSS context */ 3427 3428 WARN_ON(!mutex_is_locked(&efx->rss_lock)); 3429 3430 /* Search for first gap in the numbering */ 3431 list_for_each_entry(ctx, head, list) { 3432 if (ctx->user_id != id) 3433 break; 3434 id++; 3435 /* Check for wrap. If this happens, we have nearly 2^32 3436 * allocated RSS contexts, which seems unlikely. 3437 */ 3438 if (WARN_ON_ONCE(!id)) 3439 return NULL; 3440 } 3441 3442 /* Create the new entry */ 3443 new = kmalloc(sizeof(struct efx_rss_context), GFP_KERNEL); 3444 if (!new) 3445 return NULL; 3446 new->context_id = EFX_EF10_RSS_CONTEXT_INVALID; 3447 new->rx_hash_udp_4tuple = false; 3448 3449 /* Insert the new entry into the gap */ 3450 new->user_id = id; 3451 list_add_tail(&new->list, &ctx->list); 3452 return new; 3453 } 3454 3455 struct efx_rss_context *efx_find_rss_context_entry(struct efx_nic *efx, u32 id) 3456 { 3457 struct list_head *head = &efx->rss_context.list; 3458 struct efx_rss_context *ctx; 3459 3460 WARN_ON(!mutex_is_locked(&efx->rss_lock)); 3461 3462 list_for_each_entry(ctx, head, list) 3463 if (ctx->user_id == id) 3464 return ctx; 3465 return NULL; 3466 } 3467 3468 void efx_free_rss_context_entry(struct efx_rss_context *ctx) 3469 { 3470 list_del(&ctx->list); 3471 kfree(ctx); 3472 } 3473 3474 /************************************************************************** 3475 * 3476 * PCI interface 3477 * 3478 **************************************************************************/ 3479 3480 /* Main body of final NIC shutdown code 3481 * This is called only at module unload (or hotplug removal). 3482 */ 3483 static void efx_pci_remove_main(struct efx_nic *efx) 3484 { 3485 /* Flush reset_work. It can no longer be scheduled since we 3486 * are not READY. 3487 */ 3488 BUG_ON(efx->state == STATE_READY); 3489 efx_flush_reset_workqueue(efx); 3490 3491 efx_disable_interrupts(efx); 3492 efx_clear_interrupt_affinity(efx); 3493 efx_nic_fini_interrupt(efx); 3494 efx_fini_port(efx); 3495 efx->type->fini(efx); 3496 efx_fini_napi(efx); 3497 efx_remove_all(efx); 3498 } 3499 3500 /* Final NIC shutdown 3501 * This is called only at module unload (or hotplug removal). A PF can call 3502 * this on its VFs to ensure they are unbound first. 3503 */ 3504 static void efx_pci_remove(struct pci_dev *pci_dev) 3505 { 3506 struct efx_nic *efx; 3507 3508 efx = pci_get_drvdata(pci_dev); 3509 if (!efx) 3510 return; 3511 3512 /* Mark the NIC as fini, then stop the interface */ 3513 rtnl_lock(); 3514 efx_dissociate(efx); 3515 dev_close(efx->net_dev); 3516 efx_disable_interrupts(efx); 3517 efx->state = STATE_UNINIT; 3518 rtnl_unlock(); 3519 3520 if (efx->type->sriov_fini) 3521 efx->type->sriov_fini(efx); 3522 3523 efx_unregister_netdev(efx); 3524 3525 efx_mtd_remove(efx); 3526 3527 efx_pci_remove_main(efx); 3528 3529 efx_fini_io(efx, efx->type->mem_bar(efx)); 3530 netif_dbg(efx, drv, efx->net_dev, "shutdown successful\n"); 3531 3532 efx_fini_struct(efx); 3533 free_netdev(efx->net_dev); 3534 3535 pci_disable_pcie_error_reporting(pci_dev); 3536 }; 3537 3538 /* NIC VPD information 3539 * Called during probe to display the part number of the 3540 * installed NIC. VPD is potentially very large but this should 3541 * always appear within the first 512 bytes. 3542 */ 3543 #define SFC_VPD_LEN 512 3544 static void efx_probe_vpd_strings(struct efx_nic *efx) 3545 { 3546 struct pci_dev *dev = efx->pci_dev; 3547 char vpd_data[SFC_VPD_LEN]; 3548 ssize_t vpd_size; 3549 int ro_start, ro_size, i, j; 3550 3551 /* Get the vpd data from the device */ 3552 vpd_size = pci_read_vpd(dev, 0, sizeof(vpd_data), vpd_data); 3553 if (vpd_size <= 0) { 3554 netif_err(efx, drv, efx->net_dev, "Unable to read VPD\n"); 3555 return; 3556 } 3557 3558 /* Get the Read only section */ 3559 ro_start = pci_vpd_find_tag(vpd_data, 0, vpd_size, PCI_VPD_LRDT_RO_DATA); 3560 if (ro_start < 0) { 3561 netif_err(efx, drv, efx->net_dev, "VPD Read-only not found\n"); 3562 return; 3563 } 3564 3565 ro_size = pci_vpd_lrdt_size(&vpd_data[ro_start]); 3566 j = ro_size; 3567 i = ro_start + PCI_VPD_LRDT_TAG_SIZE; 3568 if (i + j > vpd_size) 3569 j = vpd_size - i; 3570 3571 /* Get the Part number */ 3572 i = pci_vpd_find_info_keyword(vpd_data, i, j, "PN"); 3573 if (i < 0) { 3574 netif_err(efx, drv, efx->net_dev, "Part number not found\n"); 3575 return; 3576 } 3577 3578 j = pci_vpd_info_field_size(&vpd_data[i]); 3579 i += PCI_VPD_INFO_FLD_HDR_SIZE; 3580 if (i + j > vpd_size) { 3581 netif_err(efx, drv, efx->net_dev, "Incomplete part number\n"); 3582 return; 3583 } 3584 3585 netif_info(efx, drv, efx->net_dev, 3586 "Part Number : %.*s\n", j, &vpd_data[i]); 3587 3588 i = ro_start + PCI_VPD_LRDT_TAG_SIZE; 3589 j = ro_size; 3590 i = pci_vpd_find_info_keyword(vpd_data, i, j, "SN"); 3591 if (i < 0) { 3592 netif_err(efx, drv, efx->net_dev, "Serial number not found\n"); 3593 return; 3594 } 3595 3596 j = pci_vpd_info_field_size(&vpd_data[i]); 3597 i += PCI_VPD_INFO_FLD_HDR_SIZE; 3598 if (i + j > vpd_size) { 3599 netif_err(efx, drv, efx->net_dev, "Incomplete serial number\n"); 3600 return; 3601 } 3602 3603 efx->vpd_sn = kmalloc(j + 1, GFP_KERNEL); 3604 if (!efx->vpd_sn) 3605 return; 3606 3607 snprintf(efx->vpd_sn, j + 1, "%s", &vpd_data[i]); 3608 } 3609 3610 3611 /* Main body of NIC initialisation 3612 * This is called at module load (or hotplug insertion, theoretically). 3613 */ 3614 static int efx_pci_probe_main(struct efx_nic *efx) 3615 { 3616 int rc; 3617 3618 /* Do start-of-day initialisation */ 3619 rc = efx_probe_all(efx); 3620 if (rc) 3621 goto fail1; 3622 3623 efx_init_napi(efx); 3624 3625 down_write(&efx->filter_sem); 3626 rc = efx->type->init(efx); 3627 up_write(&efx->filter_sem); 3628 if (rc) { 3629 netif_err(efx, probe, efx->net_dev, 3630 "failed to initialise NIC\n"); 3631 goto fail3; 3632 } 3633 3634 rc = efx_init_port(efx); 3635 if (rc) { 3636 netif_err(efx, probe, efx->net_dev, 3637 "failed to initialise port\n"); 3638 goto fail4; 3639 } 3640 3641 rc = efx_nic_init_interrupt(efx); 3642 if (rc) 3643 goto fail5; 3644 3645 efx_set_interrupt_affinity(efx); 3646 rc = efx_enable_interrupts(efx); 3647 if (rc) 3648 goto fail6; 3649 3650 return 0; 3651 3652 fail6: 3653 efx_clear_interrupt_affinity(efx); 3654 efx_nic_fini_interrupt(efx); 3655 fail5: 3656 efx_fini_port(efx); 3657 fail4: 3658 efx->type->fini(efx); 3659 fail3: 3660 efx_fini_napi(efx); 3661 efx_remove_all(efx); 3662 fail1: 3663 return rc; 3664 } 3665 3666 static int efx_pci_probe_post_io(struct efx_nic *efx) 3667 { 3668 struct net_device *net_dev = efx->net_dev; 3669 int rc = efx_pci_probe_main(efx); 3670 3671 if (rc) 3672 return rc; 3673 3674 if (efx->type->sriov_init) { 3675 rc = efx->type->sriov_init(efx); 3676 if (rc) 3677 netif_err(efx, probe, efx->net_dev, 3678 "SR-IOV can't be enabled rc %d\n", rc); 3679 } 3680 3681 /* Determine netdevice features */ 3682 net_dev->features |= (efx->type->offload_features | NETIF_F_SG | 3683 NETIF_F_TSO | NETIF_F_RXCSUM | NETIF_F_RXALL); 3684 if (efx->type->offload_features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM)) 3685 net_dev->features |= NETIF_F_TSO6; 3686 /* Check whether device supports TSO */ 3687 if (!efx->type->tso_versions || !efx->type->tso_versions(efx)) 3688 net_dev->features &= ~NETIF_F_ALL_TSO; 3689 /* Mask for features that also apply to VLAN devices */ 3690 net_dev->vlan_features |= (NETIF_F_HW_CSUM | NETIF_F_SG | 3691 NETIF_F_HIGHDMA | NETIF_F_ALL_TSO | 3692 NETIF_F_RXCSUM); 3693 3694 net_dev->hw_features |= net_dev->features & ~efx->fixed_features; 3695 3696 /* Disable receiving frames with bad FCS, by default. */ 3697 net_dev->features &= ~NETIF_F_RXALL; 3698 3699 /* Disable VLAN filtering by default. It may be enforced if 3700 * the feature is fixed (i.e. VLAN filters are required to 3701 * receive VLAN tagged packets due to vPort restrictions). 3702 */ 3703 net_dev->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER; 3704 net_dev->features |= efx->fixed_features; 3705 3706 rc = efx_register_netdev(efx); 3707 if (!rc) 3708 return 0; 3709 3710 efx_pci_remove_main(efx); 3711 return rc; 3712 } 3713 3714 /* NIC initialisation 3715 * 3716 * This is called at module load (or hotplug insertion, 3717 * theoretically). It sets up PCI mappings, resets the NIC, 3718 * sets up and registers the network devices with the kernel and hooks 3719 * the interrupt service routine. It does not prepare the device for 3720 * transmission; this is left to the first time one of the network 3721 * interfaces is brought up (i.e. efx_net_open). 3722 */ 3723 static int efx_pci_probe(struct pci_dev *pci_dev, 3724 const struct pci_device_id *entry) 3725 { 3726 struct net_device *net_dev; 3727 struct efx_nic *efx; 3728 int rc; 3729 3730 /* Allocate and initialise a struct net_device and struct efx_nic */ 3731 net_dev = alloc_etherdev_mqs(sizeof(*efx), EFX_MAX_CORE_TX_QUEUES, 3732 EFX_MAX_RX_QUEUES); 3733 if (!net_dev) 3734 return -ENOMEM; 3735 efx = netdev_priv(net_dev); 3736 efx->type = (const struct efx_nic_type *) entry->driver_data; 3737 efx->fixed_features |= NETIF_F_HIGHDMA; 3738 3739 pci_set_drvdata(pci_dev, efx); 3740 SET_NETDEV_DEV(net_dev, &pci_dev->dev); 3741 rc = efx_init_struct(efx, pci_dev, net_dev); 3742 if (rc) 3743 goto fail1; 3744 3745 netif_info(efx, probe, efx->net_dev, 3746 "Solarflare NIC detected\n"); 3747 3748 if (!efx->type->is_vf) 3749 efx_probe_vpd_strings(efx); 3750 3751 /* Set up basic I/O (BAR mappings etc) */ 3752 rc = efx_init_io(efx, efx->type->mem_bar(efx), efx->type->max_dma_mask, 3753 efx->type->mem_map_size(efx)); 3754 if (rc) 3755 goto fail2; 3756 3757 rc = efx_pci_probe_post_io(efx); 3758 if (rc) { 3759 /* On failure, retry once immediately. 3760 * If we aborted probe due to a scheduled reset, dismiss it. 3761 */ 3762 efx->reset_pending = 0; 3763 rc = efx_pci_probe_post_io(efx); 3764 if (rc) { 3765 /* On another failure, retry once more 3766 * after a 50-305ms delay. 3767 */ 3768 unsigned char r; 3769 3770 get_random_bytes(&r, 1); 3771 msleep((unsigned int)r + 50); 3772 efx->reset_pending = 0; 3773 rc = efx_pci_probe_post_io(efx); 3774 } 3775 } 3776 if (rc) 3777 goto fail3; 3778 3779 netif_dbg(efx, probe, efx->net_dev, "initialisation successful\n"); 3780 3781 /* Try to create MTDs, but allow this to fail */ 3782 rtnl_lock(); 3783 rc = efx_mtd_probe(efx); 3784 rtnl_unlock(); 3785 if (rc && rc != -EPERM) 3786 netif_warn(efx, probe, efx->net_dev, 3787 "failed to create MTDs (%d)\n", rc); 3788 3789 (void)pci_enable_pcie_error_reporting(pci_dev); 3790 3791 if (efx->type->udp_tnl_push_ports) 3792 efx->type->udp_tnl_push_ports(efx); 3793 3794 return 0; 3795 3796 fail3: 3797 efx_fini_io(efx, efx->type->mem_bar(efx)); 3798 fail2: 3799 efx_fini_struct(efx); 3800 fail1: 3801 WARN_ON(rc > 0); 3802 netif_dbg(efx, drv, efx->net_dev, "initialisation failed. rc=%d\n", rc); 3803 free_netdev(net_dev); 3804 return rc; 3805 } 3806 3807 /* efx_pci_sriov_configure returns the actual number of Virtual Functions 3808 * enabled on success 3809 */ 3810 #ifdef CONFIG_SFC_SRIOV 3811 static int efx_pci_sriov_configure(struct pci_dev *dev, int num_vfs) 3812 { 3813 int rc; 3814 struct efx_nic *efx = pci_get_drvdata(dev); 3815 3816 if (efx->type->sriov_configure) { 3817 rc = efx->type->sriov_configure(efx, num_vfs); 3818 if (rc) 3819 return rc; 3820 else 3821 return num_vfs; 3822 } else 3823 return -EOPNOTSUPP; 3824 } 3825 #endif 3826 3827 static int efx_pm_freeze(struct device *dev) 3828 { 3829 struct efx_nic *efx = dev_get_drvdata(dev); 3830 3831 rtnl_lock(); 3832 3833 if (efx->state != STATE_DISABLED) { 3834 efx->state = STATE_UNINIT; 3835 3836 efx_device_detach_sync(efx); 3837 3838 efx_stop_all(efx); 3839 efx_disable_interrupts(efx); 3840 } 3841 3842 rtnl_unlock(); 3843 3844 return 0; 3845 } 3846 3847 static int efx_pm_thaw(struct device *dev) 3848 { 3849 int rc; 3850 struct efx_nic *efx = dev_get_drvdata(dev); 3851 3852 rtnl_lock(); 3853 3854 if (efx->state != STATE_DISABLED) { 3855 rc = efx_enable_interrupts(efx); 3856 if (rc) 3857 goto fail; 3858 3859 mutex_lock(&efx->mac_lock); 3860 efx->phy_op->reconfigure(efx); 3861 mutex_unlock(&efx->mac_lock); 3862 3863 efx_start_all(efx); 3864 3865 efx_device_attach_if_not_resetting(efx); 3866 3867 efx->state = STATE_READY; 3868 3869 efx->type->resume_wol(efx); 3870 } 3871 3872 rtnl_unlock(); 3873 3874 /* Reschedule any quenched resets scheduled during efx_pm_freeze() */ 3875 efx_queue_reset_work(efx); 3876 3877 return 0; 3878 3879 fail: 3880 rtnl_unlock(); 3881 3882 return rc; 3883 } 3884 3885 static int efx_pm_poweroff(struct device *dev) 3886 { 3887 struct pci_dev *pci_dev = to_pci_dev(dev); 3888 struct efx_nic *efx = pci_get_drvdata(pci_dev); 3889 3890 efx->type->fini(efx); 3891 3892 efx->reset_pending = 0; 3893 3894 pci_save_state(pci_dev); 3895 return pci_set_power_state(pci_dev, PCI_D3hot); 3896 } 3897 3898 /* Used for both resume and restore */ 3899 static int efx_pm_resume(struct device *dev) 3900 { 3901 struct pci_dev *pci_dev = to_pci_dev(dev); 3902 struct efx_nic *efx = pci_get_drvdata(pci_dev); 3903 int rc; 3904 3905 rc = pci_set_power_state(pci_dev, PCI_D0); 3906 if (rc) 3907 return rc; 3908 pci_restore_state(pci_dev); 3909 rc = pci_enable_device(pci_dev); 3910 if (rc) 3911 return rc; 3912 pci_set_master(efx->pci_dev); 3913 rc = efx->type->reset(efx, RESET_TYPE_ALL); 3914 if (rc) 3915 return rc; 3916 down_write(&efx->filter_sem); 3917 rc = efx->type->init(efx); 3918 up_write(&efx->filter_sem); 3919 if (rc) 3920 return rc; 3921 rc = efx_pm_thaw(dev); 3922 return rc; 3923 } 3924 3925 static int efx_pm_suspend(struct device *dev) 3926 { 3927 int rc; 3928 3929 efx_pm_freeze(dev); 3930 rc = efx_pm_poweroff(dev); 3931 if (rc) 3932 efx_pm_resume(dev); 3933 return rc; 3934 } 3935 3936 static const struct dev_pm_ops efx_pm_ops = { 3937 .suspend = efx_pm_suspend, 3938 .resume = efx_pm_resume, 3939 .freeze = efx_pm_freeze, 3940 .thaw = efx_pm_thaw, 3941 .poweroff = efx_pm_poweroff, 3942 .restore = efx_pm_resume, 3943 }; 3944 3945 /* A PCI error affecting this device was detected. 3946 * At this point MMIO and DMA may be disabled. 3947 * Stop the software path and request a slot reset. 3948 */ 3949 static pci_ers_result_t efx_io_error_detected(struct pci_dev *pdev, 3950 enum pci_channel_state state) 3951 { 3952 pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED; 3953 struct efx_nic *efx = pci_get_drvdata(pdev); 3954 3955 if (state == pci_channel_io_perm_failure) 3956 return PCI_ERS_RESULT_DISCONNECT; 3957 3958 rtnl_lock(); 3959 3960 if (efx->state != STATE_DISABLED) { 3961 efx->state = STATE_RECOVERY; 3962 efx->reset_pending = 0; 3963 3964 efx_device_detach_sync(efx); 3965 3966 efx_stop_all(efx); 3967 efx_disable_interrupts(efx); 3968 3969 status = PCI_ERS_RESULT_NEED_RESET; 3970 } else { 3971 /* If the interface is disabled we don't want to do anything 3972 * with it. 3973 */ 3974 status = PCI_ERS_RESULT_RECOVERED; 3975 } 3976 3977 rtnl_unlock(); 3978 3979 pci_disable_device(pdev); 3980 3981 return status; 3982 } 3983 3984 /* Fake a successful reset, which will be performed later in efx_io_resume. */ 3985 static pci_ers_result_t efx_io_slot_reset(struct pci_dev *pdev) 3986 { 3987 struct efx_nic *efx = pci_get_drvdata(pdev); 3988 pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED; 3989 3990 if (pci_enable_device(pdev)) { 3991 netif_err(efx, hw, efx->net_dev, 3992 "Cannot re-enable PCI device after reset.\n"); 3993 status = PCI_ERS_RESULT_DISCONNECT; 3994 } 3995 3996 return status; 3997 } 3998 3999 /* Perform the actual reset and resume I/O operations. */ 4000 static void efx_io_resume(struct pci_dev *pdev) 4001 { 4002 struct efx_nic *efx = pci_get_drvdata(pdev); 4003 int rc; 4004 4005 rtnl_lock(); 4006 4007 if (efx->state == STATE_DISABLED) 4008 goto out; 4009 4010 rc = efx_reset(efx, RESET_TYPE_ALL); 4011 if (rc) { 4012 netif_err(efx, hw, efx->net_dev, 4013 "efx_reset failed after PCI error (%d)\n", rc); 4014 } else { 4015 efx->state = STATE_READY; 4016 netif_dbg(efx, hw, efx->net_dev, 4017 "Done resetting and resuming IO after PCI error.\n"); 4018 } 4019 4020 out: 4021 rtnl_unlock(); 4022 } 4023 4024 /* For simplicity and reliability, we always require a slot reset and try to 4025 * reset the hardware when a pci error affecting the device is detected. 4026 * We leave both the link_reset and mmio_enabled callback unimplemented: 4027 * with our request for slot reset the mmio_enabled callback will never be 4028 * called, and the link_reset callback is not used by AER or EEH mechanisms. 4029 */ 4030 static const struct pci_error_handlers efx_err_handlers = { 4031 .error_detected = efx_io_error_detected, 4032 .slot_reset = efx_io_slot_reset, 4033 .resume = efx_io_resume, 4034 }; 4035 4036 static struct pci_driver efx_pci_driver = { 4037 .name = KBUILD_MODNAME, 4038 .id_table = efx_pci_table, 4039 .probe = efx_pci_probe, 4040 .remove = efx_pci_remove, 4041 .driver.pm = &efx_pm_ops, 4042 .err_handler = &efx_err_handlers, 4043 #ifdef CONFIG_SFC_SRIOV 4044 .sriov_configure = efx_pci_sriov_configure, 4045 #endif 4046 }; 4047 4048 /************************************************************************** 4049 * 4050 * Kernel module interface 4051 * 4052 *************************************************************************/ 4053 4054 module_param(interrupt_mode, uint, 0444); 4055 MODULE_PARM_DESC(interrupt_mode, 4056 "Interrupt mode (0=>MSIX 1=>MSI 2=>legacy)"); 4057 4058 static int __init efx_init_module(void) 4059 { 4060 int rc; 4061 4062 printk(KERN_INFO "Solarflare NET driver v" EFX_DRIVER_VERSION "\n"); 4063 4064 rc = register_netdevice_notifier(&efx_netdev_notifier); 4065 if (rc) 4066 goto err_notifier; 4067 4068 #ifdef CONFIG_SFC_SRIOV 4069 rc = efx_init_sriov(); 4070 if (rc) 4071 goto err_sriov; 4072 #endif 4073 4074 rc = efx_create_reset_workqueue(); 4075 if (rc) 4076 goto err_reset; 4077 4078 rc = pci_register_driver(&efx_pci_driver); 4079 if (rc < 0) 4080 goto err_pci; 4081 4082 return 0; 4083 4084 err_pci: 4085 efx_destroy_reset_workqueue(); 4086 err_reset: 4087 #ifdef CONFIG_SFC_SRIOV 4088 efx_fini_sriov(); 4089 err_sriov: 4090 #endif 4091 unregister_netdevice_notifier(&efx_netdev_notifier); 4092 err_notifier: 4093 return rc; 4094 } 4095 4096 static void __exit efx_exit_module(void) 4097 { 4098 printk(KERN_INFO "Solarflare NET driver unloading\n"); 4099 4100 pci_unregister_driver(&efx_pci_driver); 4101 efx_destroy_reset_workqueue(); 4102 #ifdef CONFIG_SFC_SRIOV 4103 efx_fini_sriov(); 4104 #endif 4105 unregister_netdevice_notifier(&efx_netdev_notifier); 4106 4107 } 4108 4109 module_init(efx_init_module); 4110 module_exit(efx_exit_module); 4111 4112 MODULE_AUTHOR("Solarflare Communications and " 4113 "Michael Brown <mbrown@fensystems.co.uk>"); 4114 MODULE_DESCRIPTION("Solarflare network driver"); 4115 MODULE_LICENSE("GPL"); 4116 MODULE_DEVICE_TABLE(pci, efx_pci_table); 4117 MODULE_VERSION(EFX_DRIVER_VERSION); 4118