1 // SPDX-License-Identifier: GPL-2.0-only 2 /**************************************************************************** 3 * Driver for Solarflare network controllers and boards 4 * Copyright 2018 Solarflare Communications Inc. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 as published 8 * by the Free Software Foundation, incorporated herein by reference. 9 */ 10 11 #include "net_driver.h" 12 #include <linux/filter.h> 13 #include <linux/module.h> 14 #include <linux/netdevice.h> 15 #include <net/gre.h> 16 #include "efx_common.h" 17 #include "efx_channels.h" 18 #include "efx.h" 19 #include "mcdi.h" 20 #include "selftest.h" 21 #include "rx_common.h" 22 #include "tx_common.h" 23 #include "nic.h" 24 #include "mcdi_port_common.h" 25 #include "io.h" 26 #include "mcdi_pcol.h" 27 #include "ef100_rep.h" 28 29 static unsigned int debug = (NETIF_MSG_DRV | NETIF_MSG_PROBE | 30 NETIF_MSG_LINK | NETIF_MSG_IFDOWN | 31 NETIF_MSG_IFUP | NETIF_MSG_RX_ERR | 32 NETIF_MSG_TX_ERR | NETIF_MSG_HW); 33 module_param(debug, uint, 0); 34 MODULE_PARM_DESC(debug, "Bitmapped debugging message enable value"); 35 36 /* This is the time (in jiffies) between invocations of the hardware 37 * monitor. 38 * On Falcon-based NICs, this will: 39 * - Check the on-board hardware monitor; 40 * - Poll the link state and reconfigure the hardware as necessary. 41 * On Siena-based NICs for power systems with EEH support, this will give EEH a 42 * chance to start. 43 */ 44 static unsigned int efx_monitor_interval = 1 * HZ; 45 46 /* How often and how many times to poll for a reset while waiting for a 47 * BIST that another function started to complete. 48 */ 49 #define BIST_WAIT_DELAY_MS 100 50 #define BIST_WAIT_DELAY_COUNT 100 51 52 /* Default stats update time */ 53 #define STATS_PERIOD_MS_DEFAULT 1000 54 55 static const unsigned int efx_reset_type_max = RESET_TYPE_MAX; 56 static const char *const efx_reset_type_names[] = { 57 [RESET_TYPE_INVISIBLE] = "INVISIBLE", 58 [RESET_TYPE_ALL] = "ALL", 59 [RESET_TYPE_RECOVER_OR_ALL] = "RECOVER_OR_ALL", 60 [RESET_TYPE_WORLD] = "WORLD", 61 [RESET_TYPE_RECOVER_OR_DISABLE] = "RECOVER_OR_DISABLE", 62 [RESET_TYPE_DATAPATH] = "DATAPATH", 63 [RESET_TYPE_MC_BIST] = "MC_BIST", 64 [RESET_TYPE_DISABLE] = "DISABLE", 65 [RESET_TYPE_TX_WATCHDOG] = "TX_WATCHDOG", 66 [RESET_TYPE_INT_ERROR] = "INT_ERROR", 67 [RESET_TYPE_DMA_ERROR] = "DMA_ERROR", 68 [RESET_TYPE_TX_SKIP] = "TX_SKIP", 69 [RESET_TYPE_MC_FAILURE] = "MC_FAILURE", 70 [RESET_TYPE_MCDI_TIMEOUT] = "MCDI_TIMEOUT (FLR)", 71 }; 72 73 #define RESET_TYPE(type) \ 74 STRING_TABLE_LOOKUP(type, efx_reset_type) 75 76 /* Loopback mode names (see LOOPBACK_MODE()) */ 77 const unsigned int efx_loopback_mode_max = LOOPBACK_MAX; 78 const char *const efx_loopback_mode_names[] = { 79 [LOOPBACK_NONE] = "NONE", 80 [LOOPBACK_DATA] = "DATAPATH", 81 [LOOPBACK_GMAC] = "GMAC", 82 [LOOPBACK_XGMII] = "XGMII", 83 [LOOPBACK_XGXS] = "XGXS", 84 [LOOPBACK_XAUI] = "XAUI", 85 [LOOPBACK_GMII] = "GMII", 86 [LOOPBACK_SGMII] = "SGMII", 87 [LOOPBACK_XGBR] = "XGBR", 88 [LOOPBACK_XFI] = "XFI", 89 [LOOPBACK_XAUI_FAR] = "XAUI_FAR", 90 [LOOPBACK_GMII_FAR] = "GMII_FAR", 91 [LOOPBACK_SGMII_FAR] = "SGMII_FAR", 92 [LOOPBACK_XFI_FAR] = "XFI_FAR", 93 [LOOPBACK_GPHY] = "GPHY", 94 [LOOPBACK_PHYXS] = "PHYXS", 95 [LOOPBACK_PCS] = "PCS", 96 [LOOPBACK_PMAPMD] = "PMA/PMD", 97 [LOOPBACK_XPORT] = "XPORT", 98 [LOOPBACK_XGMII_WS] = "XGMII_WS", 99 [LOOPBACK_XAUI_WS] = "XAUI_WS", 100 [LOOPBACK_XAUI_WS_FAR] = "XAUI_WS_FAR", 101 [LOOPBACK_XAUI_WS_NEAR] = "XAUI_WS_NEAR", 102 [LOOPBACK_GMII_WS] = "GMII_WS", 103 [LOOPBACK_XFI_WS] = "XFI_WS", 104 [LOOPBACK_XFI_WS_FAR] = "XFI_WS_FAR", 105 [LOOPBACK_PHYXS_WS] = "PHYXS_WS", 106 }; 107 108 /* Reset workqueue. If any NIC has a hardware failure then a reset will be 109 * queued onto this work queue. This is not a per-nic work queue, because 110 * efx_reset_work() acquires the rtnl lock, so resets are naturally serialised. 111 */ 112 static struct workqueue_struct *reset_workqueue; 113 114 int efx_create_reset_workqueue(void) 115 { 116 reset_workqueue = create_singlethread_workqueue("sfc_reset"); 117 if (!reset_workqueue) { 118 printk(KERN_ERR "Failed to create reset workqueue\n"); 119 return -ENOMEM; 120 } 121 122 return 0; 123 } 124 125 void efx_queue_reset_work(struct efx_nic *efx) 126 { 127 queue_work(reset_workqueue, &efx->reset_work); 128 } 129 130 void efx_flush_reset_workqueue(struct efx_nic *efx) 131 { 132 cancel_work_sync(&efx->reset_work); 133 } 134 135 void efx_destroy_reset_workqueue(void) 136 { 137 if (reset_workqueue) { 138 destroy_workqueue(reset_workqueue); 139 reset_workqueue = NULL; 140 } 141 } 142 143 /* We assume that efx->type->reconfigure_mac will always try to sync RX 144 * filters and therefore needs to read-lock the filter table against freeing 145 */ 146 void efx_mac_reconfigure(struct efx_nic *efx, bool mtu_only) 147 { 148 if (efx->type->reconfigure_mac) { 149 down_read(&efx->filter_sem); 150 efx->type->reconfigure_mac(efx, mtu_only); 151 up_read(&efx->filter_sem); 152 } 153 } 154 155 /* Asynchronous work item for changing MAC promiscuity and multicast 156 * hash. Avoid a drain/rx_ingress enable by reconfiguring the current 157 * MAC directly. 158 */ 159 static void efx_mac_work(struct work_struct *data) 160 { 161 struct efx_nic *efx = container_of(data, struct efx_nic, mac_work); 162 163 mutex_lock(&efx->mac_lock); 164 if (efx->port_enabled) 165 efx_mac_reconfigure(efx, false); 166 mutex_unlock(&efx->mac_lock); 167 } 168 169 int efx_set_mac_address(struct net_device *net_dev, void *data) 170 { 171 struct efx_nic *efx = efx_netdev_priv(net_dev); 172 struct sockaddr *addr = data; 173 u8 *new_addr = addr->sa_data; 174 u8 old_addr[6]; 175 int rc; 176 177 if (!is_valid_ether_addr(new_addr)) { 178 netif_err(efx, drv, efx->net_dev, 179 "invalid ethernet MAC address requested: %pM\n", 180 new_addr); 181 return -EADDRNOTAVAIL; 182 } 183 184 /* save old address */ 185 ether_addr_copy(old_addr, net_dev->dev_addr); 186 eth_hw_addr_set(net_dev, new_addr); 187 if (efx->type->set_mac_address) { 188 rc = efx->type->set_mac_address(efx); 189 if (rc) { 190 eth_hw_addr_set(net_dev, old_addr); 191 return rc; 192 } 193 } 194 195 /* Reconfigure the MAC */ 196 mutex_lock(&efx->mac_lock); 197 efx_mac_reconfigure(efx, false); 198 mutex_unlock(&efx->mac_lock); 199 200 return 0; 201 } 202 203 /* Context: netif_addr_lock held, BHs disabled. */ 204 void efx_set_rx_mode(struct net_device *net_dev) 205 { 206 struct efx_nic *efx = efx_netdev_priv(net_dev); 207 208 if (efx->port_enabled) 209 queue_work(efx->workqueue, &efx->mac_work); 210 /* Otherwise efx_start_port() will do this */ 211 } 212 213 int efx_set_features(struct net_device *net_dev, netdev_features_t data) 214 { 215 struct efx_nic *efx = efx_netdev_priv(net_dev); 216 int rc; 217 218 /* If disabling RX n-tuple filtering, clear existing filters */ 219 if (net_dev->features & ~data & NETIF_F_NTUPLE) { 220 rc = efx->type->filter_clear_rx(efx, EFX_FILTER_PRI_MANUAL); 221 if (rc) 222 return rc; 223 } 224 225 /* If Rx VLAN filter is changed, update filters via mac_reconfigure. 226 * If rx-fcs is changed, mac_reconfigure updates that too. 227 */ 228 if ((net_dev->features ^ data) & (NETIF_F_HW_VLAN_CTAG_FILTER | 229 NETIF_F_RXFCS)) { 230 /* efx_set_rx_mode() will schedule MAC work to update filters 231 * when a new features are finally set in net_dev. 232 */ 233 efx_set_rx_mode(net_dev); 234 } 235 236 return 0; 237 } 238 239 /* This ensures that the kernel is kept informed (via 240 * netif_carrier_on/off) of the link status, and also maintains the 241 * link status's stop on the port's TX queue. 242 */ 243 void efx_link_status_changed(struct efx_nic *efx) 244 { 245 struct efx_link_state *link_state = &efx->link_state; 246 247 /* SFC Bug 5356: A net_dev notifier is registered, so we must ensure 248 * that no events are triggered between unregister_netdev() and the 249 * driver unloading. A more general condition is that NETDEV_CHANGE 250 * can only be generated between NETDEV_UP and NETDEV_DOWN 251 */ 252 if (!netif_running(efx->net_dev)) 253 return; 254 255 if (link_state->up != netif_carrier_ok(efx->net_dev)) { 256 efx->n_link_state_changes++; 257 258 if (link_state->up) 259 netif_carrier_on(efx->net_dev); 260 else 261 netif_carrier_off(efx->net_dev); 262 } 263 264 /* Status message for kernel log */ 265 if (link_state->up) 266 netif_info(efx, link, efx->net_dev, 267 "link up at %uMbps %s-duplex (MTU %d)\n", 268 link_state->speed, link_state->fd ? "full" : "half", 269 efx->net_dev->mtu); 270 else 271 netif_info(efx, link, efx->net_dev, "link down\n"); 272 } 273 274 unsigned int efx_xdp_max_mtu(struct efx_nic *efx) 275 { 276 /* The maximum MTU that we can fit in a single page, allowing for 277 * framing, overhead and XDP headroom + tailroom. 278 */ 279 int overhead = EFX_MAX_FRAME_LEN(0) + sizeof(struct efx_rx_page_state) + 280 efx->rx_prefix_size + efx->type->rx_buffer_padding + 281 efx->rx_ip_align + EFX_XDP_HEADROOM + EFX_XDP_TAILROOM; 282 283 return PAGE_SIZE - overhead; 284 } 285 286 /* Context: process, rtnl_lock() held. */ 287 int efx_change_mtu(struct net_device *net_dev, int new_mtu) 288 { 289 struct efx_nic *efx = efx_netdev_priv(net_dev); 290 int rc; 291 292 rc = efx_check_disabled(efx); 293 if (rc) 294 return rc; 295 296 if (rtnl_dereference(efx->xdp_prog) && 297 new_mtu > efx_xdp_max_mtu(efx)) { 298 netif_err(efx, drv, efx->net_dev, 299 "Requested MTU of %d too big for XDP (max: %d)\n", 300 new_mtu, efx_xdp_max_mtu(efx)); 301 return -EINVAL; 302 } 303 304 netif_dbg(efx, drv, efx->net_dev, "changing MTU to %d\n", new_mtu); 305 306 efx_device_detach_sync(efx); 307 efx_stop_all(efx); 308 309 mutex_lock(&efx->mac_lock); 310 net_dev->mtu = new_mtu; 311 efx_mac_reconfigure(efx, true); 312 mutex_unlock(&efx->mac_lock); 313 314 efx_start_all(efx); 315 efx_device_attach_if_not_resetting(efx); 316 return 0; 317 } 318 319 /************************************************************************** 320 * 321 * Hardware monitor 322 * 323 **************************************************************************/ 324 325 /* Run periodically off the general workqueue */ 326 static void efx_monitor(struct work_struct *data) 327 { 328 struct efx_nic *efx = container_of(data, struct efx_nic, 329 monitor_work.work); 330 331 netif_vdbg(efx, timer, efx->net_dev, 332 "hardware monitor executing on CPU %d\n", 333 raw_smp_processor_id()); 334 BUG_ON(efx->type->monitor == NULL); 335 336 /* If the mac_lock is already held then it is likely a port 337 * reconfiguration is already in place, which will likely do 338 * most of the work of monitor() anyway. 339 */ 340 if (mutex_trylock(&efx->mac_lock)) { 341 if (efx->port_enabled && efx->type->monitor) 342 efx->type->monitor(efx); 343 mutex_unlock(&efx->mac_lock); 344 } 345 346 efx_start_monitor(efx); 347 } 348 349 void efx_start_monitor(struct efx_nic *efx) 350 { 351 if (efx->type->monitor) 352 queue_delayed_work(efx->workqueue, &efx->monitor_work, 353 efx_monitor_interval); 354 } 355 356 /************************************************************************** 357 * 358 * Event queue processing 359 * 360 *************************************************************************/ 361 362 /* Channels are shutdown and reinitialised whilst the NIC is running 363 * to propagate configuration changes (mtu, checksum offload), or 364 * to clear hardware error conditions 365 */ 366 static void efx_start_datapath(struct efx_nic *efx) 367 { 368 netdev_features_t old_features = efx->net_dev->features; 369 bool old_rx_scatter = efx->rx_scatter; 370 size_t rx_buf_len; 371 372 /* Calculate the rx buffer allocation parameters required to 373 * support the current MTU, including padding for header 374 * alignment and overruns. 375 */ 376 efx->rx_dma_len = (efx->rx_prefix_size + 377 EFX_MAX_FRAME_LEN(efx->net_dev->mtu) + 378 efx->type->rx_buffer_padding); 379 rx_buf_len = (sizeof(struct efx_rx_page_state) + EFX_XDP_HEADROOM + 380 efx->rx_ip_align + efx->rx_dma_len + EFX_XDP_TAILROOM); 381 382 if (rx_buf_len <= PAGE_SIZE) { 383 efx->rx_scatter = efx->type->always_rx_scatter; 384 efx->rx_buffer_order = 0; 385 } else if (efx->type->can_rx_scatter) { 386 BUILD_BUG_ON(EFX_RX_USR_BUF_SIZE % L1_CACHE_BYTES); 387 BUILD_BUG_ON(sizeof(struct efx_rx_page_state) + 388 2 * ALIGN(NET_IP_ALIGN + EFX_RX_USR_BUF_SIZE, 389 EFX_RX_BUF_ALIGNMENT) > 390 PAGE_SIZE); 391 efx->rx_scatter = true; 392 efx->rx_dma_len = EFX_RX_USR_BUF_SIZE; 393 efx->rx_buffer_order = 0; 394 } else { 395 efx->rx_scatter = false; 396 efx->rx_buffer_order = get_order(rx_buf_len); 397 } 398 399 efx_rx_config_page_split(efx); 400 if (efx->rx_buffer_order) 401 netif_dbg(efx, drv, efx->net_dev, 402 "RX buf len=%u; page order=%u batch=%u\n", 403 efx->rx_dma_len, efx->rx_buffer_order, 404 efx->rx_pages_per_batch); 405 else 406 netif_dbg(efx, drv, efx->net_dev, 407 "RX buf len=%u step=%u bpp=%u; page batch=%u\n", 408 efx->rx_dma_len, efx->rx_page_buf_step, 409 efx->rx_bufs_per_page, efx->rx_pages_per_batch); 410 411 /* Restore previously fixed features in hw_features and remove 412 * features which are fixed now 413 */ 414 efx->net_dev->hw_features |= efx->net_dev->features; 415 efx->net_dev->hw_features &= ~efx->fixed_features; 416 efx->net_dev->features |= efx->fixed_features; 417 if (efx->net_dev->features != old_features) 418 netdev_features_change(efx->net_dev); 419 420 /* RX filters may also have scatter-enabled flags */ 421 if ((efx->rx_scatter != old_rx_scatter) && 422 efx->type->filter_update_rx_scatter) 423 efx->type->filter_update_rx_scatter(efx); 424 425 /* We must keep at least one descriptor in a TX ring empty. 426 * We could avoid this when the queue size does not exactly 427 * match the hardware ring size, but it's not that important. 428 * Therefore we stop the queue when one more skb might fill 429 * the ring completely. We wake it when half way back to 430 * empty. 431 */ 432 efx->txq_stop_thresh = efx->txq_entries - efx_tx_max_skb_descs(efx); 433 efx->txq_wake_thresh = efx->txq_stop_thresh / 2; 434 435 /* Initialise the channels */ 436 efx_start_channels(efx); 437 438 efx_ptp_start_datapath(efx); 439 440 if (netif_device_present(efx->net_dev)) 441 netif_tx_wake_all_queues(efx->net_dev); 442 } 443 444 static void efx_stop_datapath(struct efx_nic *efx) 445 { 446 EFX_ASSERT_RESET_SERIALISED(efx); 447 BUG_ON(efx->port_enabled); 448 449 efx_ptp_stop_datapath(efx); 450 451 efx_stop_channels(efx); 452 } 453 454 /************************************************************************** 455 * 456 * Port handling 457 * 458 **************************************************************************/ 459 460 /* Equivalent to efx_link_set_advertising with all-zeroes, except does not 461 * force the Autoneg bit on. 462 */ 463 void efx_link_clear_advertising(struct efx_nic *efx) 464 { 465 bitmap_zero(efx->link_advertising, __ETHTOOL_LINK_MODE_MASK_NBITS); 466 efx->wanted_fc &= ~(EFX_FC_TX | EFX_FC_RX); 467 } 468 469 void efx_link_set_wanted_fc(struct efx_nic *efx, u8 wanted_fc) 470 { 471 efx->wanted_fc = wanted_fc; 472 if (efx->link_advertising[0]) { 473 if (wanted_fc & EFX_FC_RX) 474 efx->link_advertising[0] |= (ADVERTISED_Pause | 475 ADVERTISED_Asym_Pause); 476 else 477 efx->link_advertising[0] &= ~(ADVERTISED_Pause | 478 ADVERTISED_Asym_Pause); 479 if (wanted_fc & EFX_FC_TX) 480 efx->link_advertising[0] ^= ADVERTISED_Asym_Pause; 481 } 482 } 483 484 static void efx_start_port(struct efx_nic *efx) 485 { 486 netif_dbg(efx, ifup, efx->net_dev, "start port\n"); 487 BUG_ON(efx->port_enabled); 488 489 mutex_lock(&efx->mac_lock); 490 efx->port_enabled = true; 491 492 /* Ensure MAC ingress/egress is enabled */ 493 efx_mac_reconfigure(efx, false); 494 495 mutex_unlock(&efx->mac_lock); 496 } 497 498 /* Cancel work for MAC reconfiguration, periodic hardware monitoring 499 * and the async self-test, wait for them to finish and prevent them 500 * being scheduled again. This doesn't cover online resets, which 501 * should only be cancelled when removing the device. 502 */ 503 static void efx_stop_port(struct efx_nic *efx) 504 { 505 netif_dbg(efx, ifdown, efx->net_dev, "stop port\n"); 506 507 EFX_ASSERT_RESET_SERIALISED(efx); 508 509 mutex_lock(&efx->mac_lock); 510 efx->port_enabled = false; 511 mutex_unlock(&efx->mac_lock); 512 513 /* Serialise against efx_set_multicast_list() */ 514 netif_addr_lock_bh(efx->net_dev); 515 netif_addr_unlock_bh(efx->net_dev); 516 517 cancel_delayed_work_sync(&efx->monitor_work); 518 efx_selftest_async_cancel(efx); 519 cancel_work_sync(&efx->mac_work); 520 } 521 522 /* If the interface is supposed to be running but is not, start 523 * the hardware and software data path, regular activity for the port 524 * (MAC statistics, link polling, etc.) and schedule the port to be 525 * reconfigured. Interrupts must already be enabled. This function 526 * is safe to call multiple times, so long as the NIC is not disabled. 527 * Requires the RTNL lock. 528 */ 529 void efx_start_all(struct efx_nic *efx) 530 { 531 EFX_ASSERT_RESET_SERIALISED(efx); 532 BUG_ON(efx->state == STATE_DISABLED); 533 534 /* Check that it is appropriate to restart the interface. All 535 * of these flags are safe to read under just the rtnl lock 536 */ 537 if (efx->port_enabled || !netif_running(efx->net_dev) || 538 efx->reset_pending) 539 return; 540 541 efx_start_port(efx); 542 efx_start_datapath(efx); 543 544 /* Start the hardware monitor if there is one */ 545 efx_start_monitor(efx); 546 547 efx_selftest_async_start(efx); 548 549 /* Link state detection is normally event-driven; we have 550 * to poll now because we could have missed a change 551 */ 552 mutex_lock(&efx->mac_lock); 553 if (efx_mcdi_phy_poll(efx)) 554 efx_link_status_changed(efx); 555 mutex_unlock(&efx->mac_lock); 556 557 if (efx->type->start_stats) { 558 efx->type->start_stats(efx); 559 efx->type->pull_stats(efx); 560 spin_lock_bh(&efx->stats_lock); 561 efx->type->update_stats(efx, NULL, NULL); 562 spin_unlock_bh(&efx->stats_lock); 563 } 564 } 565 566 /* Quiesce the hardware and software data path, and regular activity 567 * for the port without bringing the link down. Safe to call multiple 568 * times with the NIC in almost any state, but interrupts should be 569 * enabled. Requires the RTNL lock. 570 */ 571 void efx_stop_all(struct efx_nic *efx) 572 { 573 EFX_ASSERT_RESET_SERIALISED(efx); 574 575 /* port_enabled can be read safely under the rtnl lock */ 576 if (!efx->port_enabled) 577 return; 578 579 if (efx->type->update_stats) { 580 /* update stats before we go down so we can accurately count 581 * rx_nodesc_drops 582 */ 583 efx->type->pull_stats(efx); 584 spin_lock_bh(&efx->stats_lock); 585 efx->type->update_stats(efx, NULL, NULL); 586 spin_unlock_bh(&efx->stats_lock); 587 efx->type->stop_stats(efx); 588 } 589 590 efx_stop_port(efx); 591 592 /* Stop the kernel transmit interface. This is only valid if 593 * the device is stopped or detached; otherwise the watchdog 594 * may fire immediately. 595 */ 596 WARN_ON(netif_running(efx->net_dev) && 597 netif_device_present(efx->net_dev)); 598 netif_tx_disable(efx->net_dev); 599 600 efx_stop_datapath(efx); 601 } 602 603 /* Context: process, dev_base_lock or RTNL held, non-blocking. */ 604 void efx_net_stats(struct net_device *net_dev, struct rtnl_link_stats64 *stats) 605 { 606 struct efx_nic *efx = efx_netdev_priv(net_dev); 607 608 spin_lock_bh(&efx->stats_lock); 609 efx_nic_update_stats_atomic(efx, NULL, stats); 610 spin_unlock_bh(&efx->stats_lock); 611 } 612 613 /* Push loopback/power/transmit disable settings to the PHY, and reconfigure 614 * the MAC appropriately. All other PHY configuration changes are pushed 615 * through phy_op->set_settings(), and pushed asynchronously to the MAC 616 * through efx_monitor(). 617 * 618 * Callers must hold the mac_lock 619 */ 620 int __efx_reconfigure_port(struct efx_nic *efx) 621 { 622 enum efx_phy_mode phy_mode; 623 int rc = 0; 624 625 WARN_ON(!mutex_is_locked(&efx->mac_lock)); 626 627 /* Disable PHY transmit in mac level loopbacks */ 628 phy_mode = efx->phy_mode; 629 if (LOOPBACK_INTERNAL(efx)) 630 efx->phy_mode |= PHY_MODE_TX_DISABLED; 631 else 632 efx->phy_mode &= ~PHY_MODE_TX_DISABLED; 633 634 if (efx->type->reconfigure_port) 635 rc = efx->type->reconfigure_port(efx); 636 637 if (rc) 638 efx->phy_mode = phy_mode; 639 640 return rc; 641 } 642 643 /* Reinitialise the MAC to pick up new PHY settings, even if the port is 644 * disabled. 645 */ 646 int efx_reconfigure_port(struct efx_nic *efx) 647 { 648 int rc; 649 650 EFX_ASSERT_RESET_SERIALISED(efx); 651 652 mutex_lock(&efx->mac_lock); 653 rc = __efx_reconfigure_port(efx); 654 mutex_unlock(&efx->mac_lock); 655 656 return rc; 657 } 658 659 /************************************************************************** 660 * 661 * Device reset and suspend 662 * 663 **************************************************************************/ 664 665 static void efx_wait_for_bist_end(struct efx_nic *efx) 666 { 667 int i; 668 669 for (i = 0; i < BIST_WAIT_DELAY_COUNT; ++i) { 670 if (efx_mcdi_poll_reboot(efx)) 671 goto out; 672 msleep(BIST_WAIT_DELAY_MS); 673 } 674 675 netif_err(efx, drv, efx->net_dev, "Warning: No MC reboot after BIST mode\n"); 676 out: 677 /* Either way unset the BIST flag. If we found no reboot we probably 678 * won't recover, but we should try. 679 */ 680 efx->mc_bist_for_other_fn = false; 681 } 682 683 /* Try recovery mechanisms. 684 * For now only EEH is supported. 685 * Returns 0 if the recovery mechanisms are unsuccessful. 686 * Returns a non-zero value otherwise. 687 */ 688 int efx_try_recovery(struct efx_nic *efx) 689 { 690 #ifdef CONFIG_EEH 691 /* A PCI error can occur and not be seen by EEH because nothing 692 * happens on the PCI bus. In this case the driver may fail and 693 * schedule a 'recover or reset', leading to this recovery handler. 694 * Manually call the eeh failure check function. 695 */ 696 struct eeh_dev *eehdev = pci_dev_to_eeh_dev(efx->pci_dev); 697 if (eeh_dev_check_failure(eehdev)) { 698 /* The EEH mechanisms will handle the error and reset the 699 * device if necessary. 700 */ 701 return 1; 702 } 703 #endif 704 return 0; 705 } 706 707 /* Tears down the entire software state and most of the hardware state 708 * before reset. 709 */ 710 void efx_reset_down(struct efx_nic *efx, enum reset_type method) 711 { 712 EFX_ASSERT_RESET_SERIALISED(efx); 713 714 if (method == RESET_TYPE_MCDI_TIMEOUT) 715 efx->type->prepare_flr(efx); 716 717 efx_stop_all(efx); 718 efx_disable_interrupts(efx); 719 720 mutex_lock(&efx->mac_lock); 721 down_write(&efx->filter_sem); 722 mutex_lock(&efx->rss_lock); 723 efx->type->fini(efx); 724 } 725 726 /* Context: netif_tx_lock held, BHs disabled. */ 727 void efx_watchdog(struct net_device *net_dev, unsigned int txqueue) 728 { 729 struct efx_nic *efx = efx_netdev_priv(net_dev); 730 731 netif_err(efx, tx_err, efx->net_dev, 732 "TX stuck with port_enabled=%d: resetting channels\n", 733 efx->port_enabled); 734 735 efx_schedule_reset(efx, RESET_TYPE_TX_WATCHDOG); 736 } 737 738 /* This function will always ensure that the locks acquired in 739 * efx_reset_down() are released. A failure return code indicates 740 * that we were unable to reinitialise the hardware, and the 741 * driver should be disabled. If ok is false, then the rx and tx 742 * engines are not restarted, pending a RESET_DISABLE. 743 */ 744 int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok) 745 { 746 int rc; 747 748 EFX_ASSERT_RESET_SERIALISED(efx); 749 750 if (method == RESET_TYPE_MCDI_TIMEOUT) 751 efx->type->finish_flr(efx); 752 753 /* Ensure that SRAM is initialised even if we're disabling the device */ 754 rc = efx->type->init(efx); 755 if (rc) { 756 netif_err(efx, drv, efx->net_dev, "failed to initialise NIC\n"); 757 goto fail; 758 } 759 760 if (!ok) 761 goto fail; 762 763 if (efx->port_initialized && method != RESET_TYPE_INVISIBLE && 764 method != RESET_TYPE_DATAPATH) { 765 rc = efx_mcdi_port_reconfigure(efx); 766 if (rc && rc != -EPERM) 767 netif_err(efx, drv, efx->net_dev, 768 "could not restore PHY settings\n"); 769 } 770 771 rc = efx_enable_interrupts(efx); 772 if (rc) 773 goto fail; 774 775 #ifdef CONFIG_SFC_SRIOV 776 rc = efx->type->vswitching_restore(efx); 777 if (rc) /* not fatal; the PF will still work fine */ 778 netif_warn(efx, probe, efx->net_dev, 779 "failed to restore vswitching rc=%d;" 780 " VFs may not function\n", rc); 781 #endif 782 783 if (efx->type->rx_restore_rss_contexts) 784 efx->type->rx_restore_rss_contexts(efx); 785 mutex_unlock(&efx->rss_lock); 786 efx->type->filter_table_restore(efx); 787 up_write(&efx->filter_sem); 788 if (efx->type->sriov_reset) 789 efx->type->sriov_reset(efx); 790 791 mutex_unlock(&efx->mac_lock); 792 793 efx_start_all(efx); 794 795 if (efx->type->udp_tnl_push_ports) 796 efx->type->udp_tnl_push_ports(efx); 797 798 return 0; 799 800 fail: 801 efx->port_initialized = false; 802 803 mutex_unlock(&efx->rss_lock); 804 up_write(&efx->filter_sem); 805 mutex_unlock(&efx->mac_lock); 806 807 return rc; 808 } 809 810 /* Reset the NIC using the specified method. Note that the reset may 811 * fail, in which case the card will be left in an unusable state. 812 * 813 * Caller must hold the rtnl_lock. 814 */ 815 int efx_reset(struct efx_nic *efx, enum reset_type method) 816 { 817 int rc, rc2 = 0; 818 bool disabled; 819 820 netif_info(efx, drv, efx->net_dev, "resetting (%s)\n", 821 RESET_TYPE(method)); 822 823 efx_device_detach_sync(efx); 824 /* efx_reset_down() grabs locks that prevent recovery on EF100. 825 * EF100 reset is handled in the efx_nic_type callback below. 826 */ 827 if (efx_nic_rev(efx) != EFX_REV_EF100) 828 efx_reset_down(efx, method); 829 830 rc = efx->type->reset(efx, method); 831 if (rc) { 832 netif_err(efx, drv, efx->net_dev, "failed to reset hardware\n"); 833 goto out; 834 } 835 836 /* Clear flags for the scopes we covered. We assume the NIC and 837 * driver are now quiescent so that there is no race here. 838 */ 839 if (method < RESET_TYPE_MAX_METHOD) 840 efx->reset_pending &= -(1 << (method + 1)); 841 else /* it doesn't fit into the well-ordered scope hierarchy */ 842 __clear_bit(method, &efx->reset_pending); 843 844 /* Reinitialise bus-mastering, which may have been turned off before 845 * the reset was scheduled. This is still appropriate, even in the 846 * RESET_TYPE_DISABLE since this driver generally assumes the hardware 847 * can respond to requests. 848 */ 849 pci_set_master(efx->pci_dev); 850 851 out: 852 /* Leave device stopped if necessary */ 853 disabled = rc || 854 method == RESET_TYPE_DISABLE || 855 method == RESET_TYPE_RECOVER_OR_DISABLE; 856 if (efx_nic_rev(efx) != EFX_REV_EF100) 857 rc2 = efx_reset_up(efx, method, !disabled); 858 if (rc2) { 859 disabled = true; 860 if (!rc) 861 rc = rc2; 862 } 863 864 if (disabled) { 865 dev_close(efx->net_dev); 866 netif_err(efx, drv, efx->net_dev, "has been disabled\n"); 867 efx->state = STATE_DISABLED; 868 } else { 869 netif_dbg(efx, drv, efx->net_dev, "reset complete\n"); 870 efx_device_attach_if_not_resetting(efx); 871 } 872 return rc; 873 } 874 875 /* The worker thread exists so that code that cannot sleep can 876 * schedule a reset for later. 877 */ 878 static void efx_reset_work(struct work_struct *data) 879 { 880 struct efx_nic *efx = container_of(data, struct efx_nic, reset_work); 881 unsigned long pending; 882 enum reset_type method; 883 884 pending = READ_ONCE(efx->reset_pending); 885 method = fls(pending) - 1; 886 887 if (method == RESET_TYPE_MC_BIST) 888 efx_wait_for_bist_end(efx); 889 890 if ((method == RESET_TYPE_RECOVER_OR_DISABLE || 891 method == RESET_TYPE_RECOVER_OR_ALL) && 892 efx_try_recovery(efx)) 893 return; 894 895 if (!pending) 896 return; 897 898 rtnl_lock(); 899 900 /* We checked the state in efx_schedule_reset() but it may 901 * have changed by now. Now that we have the RTNL lock, 902 * it cannot change again. 903 */ 904 if (efx_net_active(efx->state)) 905 (void)efx_reset(efx, method); 906 907 rtnl_unlock(); 908 } 909 910 void efx_schedule_reset(struct efx_nic *efx, enum reset_type type) 911 { 912 enum reset_type method; 913 914 if (efx_recovering(efx->state)) { 915 netif_dbg(efx, drv, efx->net_dev, 916 "recovering: skip scheduling %s reset\n", 917 RESET_TYPE(type)); 918 return; 919 } 920 921 switch (type) { 922 case RESET_TYPE_INVISIBLE: 923 case RESET_TYPE_ALL: 924 case RESET_TYPE_RECOVER_OR_ALL: 925 case RESET_TYPE_WORLD: 926 case RESET_TYPE_DISABLE: 927 case RESET_TYPE_RECOVER_OR_DISABLE: 928 case RESET_TYPE_DATAPATH: 929 case RESET_TYPE_MC_BIST: 930 case RESET_TYPE_MCDI_TIMEOUT: 931 method = type; 932 netif_dbg(efx, drv, efx->net_dev, "scheduling %s reset\n", 933 RESET_TYPE(method)); 934 break; 935 default: 936 method = efx->type->map_reset_reason(type); 937 netif_dbg(efx, drv, efx->net_dev, 938 "scheduling %s reset for %s\n", 939 RESET_TYPE(method), RESET_TYPE(type)); 940 break; 941 } 942 943 set_bit(method, &efx->reset_pending); 944 smp_mb(); /* ensure we change reset_pending before checking state */ 945 946 /* If we're not READY then just leave the flags set as the cue 947 * to abort probing or reschedule the reset later. 948 */ 949 if (!efx_net_active(READ_ONCE(efx->state))) 950 return; 951 952 /* efx_process_channel() will no longer read events once a 953 * reset is scheduled. So switch back to poll'd MCDI completions. 954 */ 955 efx_mcdi_mode_poll(efx); 956 957 efx_queue_reset_work(efx); 958 } 959 960 /************************************************************************** 961 * 962 * Dummy NIC operations 963 * 964 * Can be used for some unimplemented operations 965 * Needed so all function pointers are valid and do not have to be tested 966 * before use 967 * 968 **************************************************************************/ 969 int efx_port_dummy_op_int(struct efx_nic *efx) 970 { 971 return 0; 972 } 973 void efx_port_dummy_op_void(struct efx_nic *efx) {} 974 975 /************************************************************************** 976 * 977 * Data housekeeping 978 * 979 **************************************************************************/ 980 981 /* This zeroes out and then fills in the invariants in a struct 982 * efx_nic (including all sub-structures). 983 */ 984 int efx_init_struct(struct efx_nic *efx, struct pci_dev *pci_dev) 985 { 986 int rc = -ENOMEM; 987 988 /* Initialise common structures */ 989 INIT_LIST_HEAD(&efx->node); 990 INIT_LIST_HEAD(&efx->secondary_list); 991 spin_lock_init(&efx->biu_lock); 992 #ifdef CONFIG_SFC_MTD 993 INIT_LIST_HEAD(&efx->mtd_list); 994 #endif 995 INIT_WORK(&efx->reset_work, efx_reset_work); 996 INIT_DELAYED_WORK(&efx->monitor_work, efx_monitor); 997 efx_selftest_async_init(efx); 998 efx->pci_dev = pci_dev; 999 efx->msg_enable = debug; 1000 efx->state = STATE_UNINIT; 1001 strscpy(efx->name, pci_name(pci_dev), sizeof(efx->name)); 1002 1003 efx->rx_prefix_size = efx->type->rx_prefix_size; 1004 efx->rx_ip_align = 1005 NET_IP_ALIGN ? (efx->rx_prefix_size + NET_IP_ALIGN) % 4 : 0; 1006 efx->rx_packet_hash_offset = 1007 efx->type->rx_hash_offset - efx->type->rx_prefix_size; 1008 efx->rx_packet_ts_offset = 1009 efx->type->rx_ts_offset - efx->type->rx_prefix_size; 1010 INIT_LIST_HEAD(&efx->rss_context.list); 1011 efx->rss_context.context_id = EFX_MCDI_RSS_CONTEXT_INVALID; 1012 mutex_init(&efx->rss_lock); 1013 efx->vport_id = EVB_PORT_ID_ASSIGNED; 1014 spin_lock_init(&efx->stats_lock); 1015 efx->vi_stride = EFX_DEFAULT_VI_STRIDE; 1016 efx->num_mac_stats = MC_CMD_MAC_NSTATS; 1017 BUILD_BUG_ON(MC_CMD_MAC_NSTATS - 1 != MC_CMD_MAC_GENERATION_END); 1018 mutex_init(&efx->mac_lock); 1019 init_rwsem(&efx->filter_sem); 1020 #ifdef CONFIG_RFS_ACCEL 1021 mutex_init(&efx->rps_mutex); 1022 spin_lock_init(&efx->rps_hash_lock); 1023 /* Failure to allocate is not fatal, but may degrade ARFS performance */ 1024 efx->rps_hash_table = kcalloc(EFX_ARFS_HASH_TABLE_SIZE, 1025 sizeof(*efx->rps_hash_table), GFP_KERNEL); 1026 #endif 1027 spin_lock_init(&efx->vf_reps_lock); 1028 INIT_LIST_HEAD(&efx->vf_reps); 1029 INIT_WORK(&efx->mac_work, efx_mac_work); 1030 init_waitqueue_head(&efx->flush_wq); 1031 1032 efx->tx_queues_per_channel = 1; 1033 efx->rxq_entries = EFX_DEFAULT_DMAQ_SIZE; 1034 efx->txq_entries = EFX_DEFAULT_DMAQ_SIZE; 1035 1036 efx->mem_bar = UINT_MAX; 1037 1038 rc = efx_init_channels(efx); 1039 if (rc) 1040 goto fail; 1041 1042 /* Would be good to use the net_dev name, but we're too early */ 1043 snprintf(efx->workqueue_name, sizeof(efx->workqueue_name), "sfc%s", 1044 pci_name(pci_dev)); 1045 efx->workqueue = create_singlethread_workqueue(efx->workqueue_name); 1046 if (!efx->workqueue) { 1047 rc = -ENOMEM; 1048 goto fail; 1049 } 1050 1051 return 0; 1052 1053 fail: 1054 efx_fini_struct(efx); 1055 return rc; 1056 } 1057 1058 void efx_fini_struct(struct efx_nic *efx) 1059 { 1060 #ifdef CONFIG_RFS_ACCEL 1061 kfree(efx->rps_hash_table); 1062 #endif 1063 1064 efx_fini_channels(efx); 1065 1066 kfree(efx->vpd_sn); 1067 1068 if (efx->workqueue) { 1069 destroy_workqueue(efx->workqueue); 1070 efx->workqueue = NULL; 1071 } 1072 } 1073 1074 /* This configures the PCI device to enable I/O and DMA. */ 1075 int efx_init_io(struct efx_nic *efx, int bar, dma_addr_t dma_mask, 1076 unsigned int mem_map_size) 1077 { 1078 struct pci_dev *pci_dev = efx->pci_dev; 1079 int rc; 1080 1081 efx->mem_bar = UINT_MAX; 1082 pci_dbg(pci_dev, "initialising I/O bar=%d\n", bar); 1083 1084 rc = pci_enable_device(pci_dev); 1085 if (rc) { 1086 pci_err(pci_dev, "failed to enable PCI device\n"); 1087 goto fail1; 1088 } 1089 1090 pci_set_master(pci_dev); 1091 1092 rc = dma_set_mask_and_coherent(&pci_dev->dev, dma_mask); 1093 if (rc) { 1094 pci_err(efx->pci_dev, "could not find a suitable DMA mask\n"); 1095 goto fail2; 1096 } 1097 pci_dbg(efx->pci_dev, "using DMA mask %llx\n", (unsigned long long)dma_mask); 1098 1099 efx->membase_phys = pci_resource_start(efx->pci_dev, bar); 1100 if (!efx->membase_phys) { 1101 pci_err(efx->pci_dev, 1102 "ERROR: No BAR%d mapping from the BIOS. Try pci=realloc on the kernel command line\n", 1103 bar); 1104 rc = -ENODEV; 1105 goto fail3; 1106 } 1107 1108 rc = pci_request_region(pci_dev, bar, "sfc"); 1109 if (rc) { 1110 pci_err(efx->pci_dev, 1111 "request for memory BAR[%d] failed\n", bar); 1112 rc = -EIO; 1113 goto fail3; 1114 } 1115 efx->mem_bar = bar; 1116 efx->membase = ioremap(efx->membase_phys, mem_map_size); 1117 if (!efx->membase) { 1118 pci_err(efx->pci_dev, 1119 "could not map memory BAR[%d] at %llx+%x\n", bar, 1120 (unsigned long long)efx->membase_phys, mem_map_size); 1121 rc = -ENOMEM; 1122 goto fail4; 1123 } 1124 pci_dbg(efx->pci_dev, 1125 "memory BAR[%d] at %llx+%x (virtual %p)\n", bar, 1126 (unsigned long long)efx->membase_phys, mem_map_size, 1127 efx->membase); 1128 1129 return 0; 1130 1131 fail4: 1132 pci_release_region(efx->pci_dev, bar); 1133 fail3: 1134 efx->membase_phys = 0; 1135 fail2: 1136 pci_disable_device(efx->pci_dev); 1137 fail1: 1138 return rc; 1139 } 1140 1141 void efx_fini_io(struct efx_nic *efx) 1142 { 1143 pci_dbg(efx->pci_dev, "shutting down I/O\n"); 1144 1145 if (efx->membase) { 1146 iounmap(efx->membase); 1147 efx->membase = NULL; 1148 } 1149 1150 if (efx->membase_phys) { 1151 pci_release_region(efx->pci_dev, efx->mem_bar); 1152 efx->membase_phys = 0; 1153 efx->mem_bar = UINT_MAX; 1154 } 1155 1156 /* Don't disable bus-mastering if VFs are assigned */ 1157 if (!pci_vfs_assigned(efx->pci_dev)) 1158 pci_disable_device(efx->pci_dev); 1159 } 1160 1161 #ifdef CONFIG_SFC_MCDI_LOGGING 1162 static ssize_t mcdi_logging_show(struct device *dev, 1163 struct device_attribute *attr, 1164 char *buf) 1165 { 1166 struct efx_nic *efx = dev_get_drvdata(dev); 1167 struct efx_mcdi_iface *mcdi = efx_mcdi(efx); 1168 1169 return sysfs_emit(buf, "%d\n", mcdi->logging_enabled); 1170 } 1171 1172 static ssize_t mcdi_logging_store(struct device *dev, 1173 struct device_attribute *attr, 1174 const char *buf, size_t count) 1175 { 1176 struct efx_nic *efx = dev_get_drvdata(dev); 1177 struct efx_mcdi_iface *mcdi = efx_mcdi(efx); 1178 bool enable = count > 0 && *buf != '0'; 1179 1180 mcdi->logging_enabled = enable; 1181 return count; 1182 } 1183 1184 static DEVICE_ATTR_RW(mcdi_logging); 1185 1186 void efx_init_mcdi_logging(struct efx_nic *efx) 1187 { 1188 int rc = device_create_file(&efx->pci_dev->dev, &dev_attr_mcdi_logging); 1189 1190 if (rc) { 1191 netif_warn(efx, drv, efx->net_dev, 1192 "failed to init net dev attributes\n"); 1193 } 1194 } 1195 1196 void efx_fini_mcdi_logging(struct efx_nic *efx) 1197 { 1198 device_remove_file(&efx->pci_dev->dev, &dev_attr_mcdi_logging); 1199 } 1200 #endif 1201 1202 /* A PCI error affecting this device was detected. 1203 * At this point MMIO and DMA may be disabled. 1204 * Stop the software path and request a slot reset. 1205 */ 1206 static pci_ers_result_t efx_io_error_detected(struct pci_dev *pdev, 1207 pci_channel_state_t state) 1208 { 1209 pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED; 1210 struct efx_nic *efx = pci_get_drvdata(pdev); 1211 1212 if (state == pci_channel_io_perm_failure) 1213 return PCI_ERS_RESULT_DISCONNECT; 1214 1215 rtnl_lock(); 1216 1217 if (efx->state != STATE_DISABLED) { 1218 efx->state = efx_recover(efx->state); 1219 efx->reset_pending = 0; 1220 1221 efx_device_detach_sync(efx); 1222 1223 if (efx_net_active(efx->state)) { 1224 efx_stop_all(efx); 1225 efx_disable_interrupts(efx); 1226 } 1227 1228 status = PCI_ERS_RESULT_NEED_RESET; 1229 } else { 1230 /* If the interface is disabled we don't want to do anything 1231 * with it. 1232 */ 1233 status = PCI_ERS_RESULT_RECOVERED; 1234 } 1235 1236 rtnl_unlock(); 1237 1238 pci_disable_device(pdev); 1239 1240 return status; 1241 } 1242 1243 /* Fake a successful reset, which will be performed later in efx_io_resume. */ 1244 static pci_ers_result_t efx_io_slot_reset(struct pci_dev *pdev) 1245 { 1246 struct efx_nic *efx = pci_get_drvdata(pdev); 1247 pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED; 1248 1249 if (pci_enable_device(pdev)) { 1250 netif_err(efx, hw, efx->net_dev, 1251 "Cannot re-enable PCI device after reset.\n"); 1252 status = PCI_ERS_RESULT_DISCONNECT; 1253 } 1254 1255 return status; 1256 } 1257 1258 /* Perform the actual reset and resume I/O operations. */ 1259 static void efx_io_resume(struct pci_dev *pdev) 1260 { 1261 struct efx_nic *efx = pci_get_drvdata(pdev); 1262 int rc; 1263 1264 rtnl_lock(); 1265 1266 if (efx->state == STATE_DISABLED) 1267 goto out; 1268 1269 rc = efx_reset(efx, RESET_TYPE_ALL); 1270 if (rc) { 1271 netif_err(efx, hw, efx->net_dev, 1272 "efx_reset failed after PCI error (%d)\n", rc); 1273 } else { 1274 efx->state = efx_recovered(efx->state); 1275 netif_dbg(efx, hw, efx->net_dev, 1276 "Done resetting and resuming IO after PCI error.\n"); 1277 } 1278 1279 out: 1280 rtnl_unlock(); 1281 } 1282 1283 /* For simplicity and reliability, we always require a slot reset and try to 1284 * reset the hardware when a pci error affecting the device is detected. 1285 * We leave both the link_reset and mmio_enabled callback unimplemented: 1286 * with our request for slot reset the mmio_enabled callback will never be 1287 * called, and the link_reset callback is not used by AER or EEH mechanisms. 1288 */ 1289 const struct pci_error_handlers efx_err_handlers = { 1290 .error_detected = efx_io_error_detected, 1291 .slot_reset = efx_io_slot_reset, 1292 .resume = efx_io_resume, 1293 }; 1294 1295 /* Determine whether the NIC will be able to handle TX offloads for a given 1296 * encapsulated packet. 1297 */ 1298 static bool efx_can_encap_offloads(struct efx_nic *efx, struct sk_buff *skb) 1299 { 1300 struct gre_base_hdr *greh; 1301 __be16 dst_port; 1302 u8 ipproto; 1303 1304 /* Does the NIC support encap offloads? 1305 * If not, we should never get here, because we shouldn't have 1306 * advertised encap offload feature flags in the first place. 1307 */ 1308 if (WARN_ON_ONCE(!efx->type->udp_tnl_has_port)) 1309 return false; 1310 1311 /* Determine encapsulation protocol in use */ 1312 switch (skb->protocol) { 1313 case htons(ETH_P_IP): 1314 ipproto = ip_hdr(skb)->protocol; 1315 break; 1316 case htons(ETH_P_IPV6): 1317 /* If there are extension headers, this will cause us to 1318 * think we can't offload something that we maybe could have. 1319 */ 1320 ipproto = ipv6_hdr(skb)->nexthdr; 1321 break; 1322 default: 1323 /* Not IP, so can't offload it */ 1324 return false; 1325 } 1326 switch (ipproto) { 1327 case IPPROTO_GRE: 1328 /* We support NVGRE but not IP over GRE or random gretaps. 1329 * Specifically, the NIC will accept GRE as encapsulated if 1330 * the inner protocol is Ethernet, but only handle it 1331 * correctly if the GRE header is 8 bytes long. Moreover, 1332 * it will not update the Checksum or Sequence Number fields 1333 * if they are present. (The Routing Present flag, 1334 * GRE_ROUTING, cannot be set else the header would be more 1335 * than 8 bytes long; so we don't have to worry about it.) 1336 */ 1337 if (skb->inner_protocol_type != ENCAP_TYPE_ETHER) 1338 return false; 1339 if (ntohs(skb->inner_protocol) != ETH_P_TEB) 1340 return false; 1341 if (skb_inner_mac_header(skb) - skb_transport_header(skb) != 8) 1342 return false; 1343 greh = (struct gre_base_hdr *)skb_transport_header(skb); 1344 return !(greh->flags & (GRE_CSUM | GRE_SEQ)); 1345 case IPPROTO_UDP: 1346 /* If the port is registered for a UDP tunnel, we assume the 1347 * packet is for that tunnel, and the NIC will handle it as 1348 * such. If not, the NIC won't know what to do with it. 1349 */ 1350 dst_port = udp_hdr(skb)->dest; 1351 return efx->type->udp_tnl_has_port(efx, dst_port); 1352 default: 1353 return false; 1354 } 1355 } 1356 1357 netdev_features_t efx_features_check(struct sk_buff *skb, struct net_device *dev, 1358 netdev_features_t features) 1359 { 1360 struct efx_nic *efx = efx_netdev_priv(dev); 1361 1362 if (skb->encapsulation) { 1363 if (features & NETIF_F_GSO_MASK) 1364 /* Hardware can only do TSO with at most 208 bytes 1365 * of headers. 1366 */ 1367 if (skb_inner_transport_offset(skb) > 1368 EFX_TSO2_MAX_HDRLEN) 1369 features &= ~(NETIF_F_GSO_MASK); 1370 if (features & (NETIF_F_GSO_MASK | NETIF_F_CSUM_MASK)) 1371 if (!efx_can_encap_offloads(efx, skb)) 1372 features &= ~(NETIF_F_GSO_MASK | 1373 NETIF_F_CSUM_MASK); 1374 } 1375 return features; 1376 } 1377 1378 int efx_get_phys_port_id(struct net_device *net_dev, 1379 struct netdev_phys_item_id *ppid) 1380 { 1381 struct efx_nic *efx = efx_netdev_priv(net_dev); 1382 1383 if (efx->type->get_phys_port_id) 1384 return efx->type->get_phys_port_id(efx, ppid); 1385 else 1386 return -EOPNOTSUPP; 1387 } 1388 1389 int efx_get_phys_port_name(struct net_device *net_dev, char *name, size_t len) 1390 { 1391 struct efx_nic *efx = efx_netdev_priv(net_dev); 1392 1393 if (snprintf(name, len, "p%u", efx->port_num) >= len) 1394 return -EINVAL; 1395 return 0; 1396 } 1397 1398 void efx_detach_reps(struct efx_nic *efx) 1399 { 1400 struct net_device *rep_dev; 1401 struct efx_rep *efv; 1402 1403 ASSERT_RTNL(); 1404 netif_dbg(efx, drv, efx->net_dev, "Detaching VF representors\n"); 1405 list_for_each_entry(efv, &efx->vf_reps, list) { 1406 rep_dev = efv->net_dev; 1407 if (!rep_dev) 1408 continue; 1409 netif_carrier_off(rep_dev); 1410 /* See efx_device_detach_sync() */ 1411 netif_tx_lock_bh(rep_dev); 1412 netif_tx_stop_all_queues(rep_dev); 1413 netif_tx_unlock_bh(rep_dev); 1414 } 1415 } 1416 1417 void efx_attach_reps(struct efx_nic *efx) 1418 { 1419 struct net_device *rep_dev; 1420 struct efx_rep *efv; 1421 1422 ASSERT_RTNL(); 1423 netif_dbg(efx, drv, efx->net_dev, "Attaching VF representors\n"); 1424 list_for_each_entry(efv, &efx->vf_reps, list) { 1425 rep_dev = efv->net_dev; 1426 if (!rep_dev) 1427 continue; 1428 netif_tx_wake_all_queues(rep_dev); 1429 netif_carrier_on(rep_dev); 1430 } 1431 } 1432