1 // SPDX-License-Identifier: GPL-2.0-only 2 /**************************************************************************** 3 * Driver for Solarflare network controllers and boards 4 * Copyright 2018 Solarflare Communications Inc. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 as published 8 * by the Free Software Foundation, incorporated herein by reference. 9 */ 10 11 #include "net_driver.h" 12 #include <linux/filter.h> 13 #include <linux/module.h> 14 #include <linux/netdevice.h> 15 #include <net/gre.h> 16 #include "efx_common.h" 17 #include "efx_channels.h" 18 #include "efx.h" 19 #include "mcdi.h" 20 #include "selftest.h" 21 #include "rx_common.h" 22 #include "tx_common.h" 23 #include "nic.h" 24 #include "mcdi_port_common.h" 25 #include "io.h" 26 #include "mcdi_pcol.h" 27 28 static unsigned int debug = (NETIF_MSG_DRV | NETIF_MSG_PROBE | 29 NETIF_MSG_LINK | NETIF_MSG_IFDOWN | 30 NETIF_MSG_IFUP | NETIF_MSG_RX_ERR | 31 NETIF_MSG_TX_ERR | NETIF_MSG_HW); 32 module_param(debug, uint, 0); 33 MODULE_PARM_DESC(debug, "Bitmapped debugging message enable value"); 34 35 /* This is the time (in jiffies) between invocations of the hardware 36 * monitor. 37 * On Falcon-based NICs, this will: 38 * - Check the on-board hardware monitor; 39 * - Poll the link state and reconfigure the hardware as necessary. 40 * On Siena-based NICs for power systems with EEH support, this will give EEH a 41 * chance to start. 42 */ 43 static unsigned int efx_monitor_interval = 1 * HZ; 44 45 /* How often and how many times to poll for a reset while waiting for a 46 * BIST that another function started to complete. 47 */ 48 #define BIST_WAIT_DELAY_MS 100 49 #define BIST_WAIT_DELAY_COUNT 100 50 51 /* Default stats update time */ 52 #define STATS_PERIOD_MS_DEFAULT 1000 53 54 const unsigned int efx_reset_type_max = RESET_TYPE_MAX; 55 const char *const efx_reset_type_names[] = { 56 [RESET_TYPE_INVISIBLE] = "INVISIBLE", 57 [RESET_TYPE_ALL] = "ALL", 58 [RESET_TYPE_RECOVER_OR_ALL] = "RECOVER_OR_ALL", 59 [RESET_TYPE_WORLD] = "WORLD", 60 [RESET_TYPE_RECOVER_OR_DISABLE] = "RECOVER_OR_DISABLE", 61 [RESET_TYPE_DATAPATH] = "DATAPATH", 62 [RESET_TYPE_MC_BIST] = "MC_BIST", 63 [RESET_TYPE_DISABLE] = "DISABLE", 64 [RESET_TYPE_TX_WATCHDOG] = "TX_WATCHDOG", 65 [RESET_TYPE_INT_ERROR] = "INT_ERROR", 66 [RESET_TYPE_DMA_ERROR] = "DMA_ERROR", 67 [RESET_TYPE_TX_SKIP] = "TX_SKIP", 68 [RESET_TYPE_MC_FAILURE] = "MC_FAILURE", 69 [RESET_TYPE_MCDI_TIMEOUT] = "MCDI_TIMEOUT (FLR)", 70 }; 71 72 #define RESET_TYPE(type) \ 73 STRING_TABLE_LOOKUP(type, efx_reset_type) 74 75 /* Loopback mode names (see LOOPBACK_MODE()) */ 76 const unsigned int efx_loopback_mode_max = LOOPBACK_MAX; 77 const char *const efx_loopback_mode_names[] = { 78 [LOOPBACK_NONE] = "NONE", 79 [LOOPBACK_DATA] = "DATAPATH", 80 [LOOPBACK_GMAC] = "GMAC", 81 [LOOPBACK_XGMII] = "XGMII", 82 [LOOPBACK_XGXS] = "XGXS", 83 [LOOPBACK_XAUI] = "XAUI", 84 [LOOPBACK_GMII] = "GMII", 85 [LOOPBACK_SGMII] = "SGMII", 86 [LOOPBACK_XGBR] = "XGBR", 87 [LOOPBACK_XFI] = "XFI", 88 [LOOPBACK_XAUI_FAR] = "XAUI_FAR", 89 [LOOPBACK_GMII_FAR] = "GMII_FAR", 90 [LOOPBACK_SGMII_FAR] = "SGMII_FAR", 91 [LOOPBACK_XFI_FAR] = "XFI_FAR", 92 [LOOPBACK_GPHY] = "GPHY", 93 [LOOPBACK_PHYXS] = "PHYXS", 94 [LOOPBACK_PCS] = "PCS", 95 [LOOPBACK_PMAPMD] = "PMA/PMD", 96 [LOOPBACK_XPORT] = "XPORT", 97 [LOOPBACK_XGMII_WS] = "XGMII_WS", 98 [LOOPBACK_XAUI_WS] = "XAUI_WS", 99 [LOOPBACK_XAUI_WS_FAR] = "XAUI_WS_FAR", 100 [LOOPBACK_XAUI_WS_NEAR] = "XAUI_WS_NEAR", 101 [LOOPBACK_GMII_WS] = "GMII_WS", 102 [LOOPBACK_XFI_WS] = "XFI_WS", 103 [LOOPBACK_XFI_WS_FAR] = "XFI_WS_FAR", 104 [LOOPBACK_PHYXS_WS] = "PHYXS_WS", 105 }; 106 107 /* Reset workqueue. If any NIC has a hardware failure then a reset will be 108 * queued onto this work queue. This is not a per-nic work queue, because 109 * efx_reset_work() acquires the rtnl lock, so resets are naturally serialised. 110 */ 111 static struct workqueue_struct *reset_workqueue; 112 113 int efx_create_reset_workqueue(void) 114 { 115 reset_workqueue = create_singlethread_workqueue("sfc_reset"); 116 if (!reset_workqueue) { 117 printk(KERN_ERR "Failed to create reset workqueue\n"); 118 return -ENOMEM; 119 } 120 121 return 0; 122 } 123 124 void efx_queue_reset_work(struct efx_nic *efx) 125 { 126 queue_work(reset_workqueue, &efx->reset_work); 127 } 128 129 void efx_flush_reset_workqueue(struct efx_nic *efx) 130 { 131 cancel_work_sync(&efx->reset_work); 132 } 133 134 void efx_destroy_reset_workqueue(void) 135 { 136 if (reset_workqueue) { 137 destroy_workqueue(reset_workqueue); 138 reset_workqueue = NULL; 139 } 140 } 141 142 /* We assume that efx->type->reconfigure_mac will always try to sync RX 143 * filters and therefore needs to read-lock the filter table against freeing 144 */ 145 void efx_mac_reconfigure(struct efx_nic *efx, bool mtu_only) 146 { 147 if (efx->type->reconfigure_mac) { 148 down_read(&efx->filter_sem); 149 efx->type->reconfigure_mac(efx, mtu_only); 150 up_read(&efx->filter_sem); 151 } 152 } 153 154 /* Asynchronous work item for changing MAC promiscuity and multicast 155 * hash. Avoid a drain/rx_ingress enable by reconfiguring the current 156 * MAC directly. 157 */ 158 static void efx_mac_work(struct work_struct *data) 159 { 160 struct efx_nic *efx = container_of(data, struct efx_nic, mac_work); 161 162 mutex_lock(&efx->mac_lock); 163 if (efx->port_enabled) 164 efx_mac_reconfigure(efx, false); 165 mutex_unlock(&efx->mac_lock); 166 } 167 168 int efx_set_mac_address(struct net_device *net_dev, void *data) 169 { 170 struct efx_nic *efx = netdev_priv(net_dev); 171 struct sockaddr *addr = data; 172 u8 *new_addr = addr->sa_data; 173 u8 old_addr[6]; 174 int rc; 175 176 if (!is_valid_ether_addr(new_addr)) { 177 netif_err(efx, drv, efx->net_dev, 178 "invalid ethernet MAC address requested: %pM\n", 179 new_addr); 180 return -EADDRNOTAVAIL; 181 } 182 183 /* save old address */ 184 ether_addr_copy(old_addr, net_dev->dev_addr); 185 eth_hw_addr_set(net_dev, new_addr); 186 if (efx->type->set_mac_address) { 187 rc = efx->type->set_mac_address(efx); 188 if (rc) { 189 eth_hw_addr_set(net_dev, old_addr); 190 return rc; 191 } 192 } 193 194 /* Reconfigure the MAC */ 195 mutex_lock(&efx->mac_lock); 196 efx_mac_reconfigure(efx, false); 197 mutex_unlock(&efx->mac_lock); 198 199 return 0; 200 } 201 202 /* Context: netif_addr_lock held, BHs disabled. */ 203 void efx_set_rx_mode(struct net_device *net_dev) 204 { 205 struct efx_nic *efx = netdev_priv(net_dev); 206 207 if (efx->port_enabled) 208 queue_work(efx->workqueue, &efx->mac_work); 209 /* Otherwise efx_start_port() will do this */ 210 } 211 212 int efx_set_features(struct net_device *net_dev, netdev_features_t data) 213 { 214 struct efx_nic *efx = netdev_priv(net_dev); 215 int rc; 216 217 /* If disabling RX n-tuple filtering, clear existing filters */ 218 if (net_dev->features & ~data & NETIF_F_NTUPLE) { 219 rc = efx->type->filter_clear_rx(efx, EFX_FILTER_PRI_MANUAL); 220 if (rc) 221 return rc; 222 } 223 224 /* If Rx VLAN filter is changed, update filters via mac_reconfigure. 225 * If rx-fcs is changed, mac_reconfigure updates that too. 226 */ 227 if ((net_dev->features ^ data) & (NETIF_F_HW_VLAN_CTAG_FILTER | 228 NETIF_F_RXFCS)) { 229 /* efx_set_rx_mode() will schedule MAC work to update filters 230 * when a new features are finally set in net_dev. 231 */ 232 efx_set_rx_mode(net_dev); 233 } 234 235 return 0; 236 } 237 238 /* This ensures that the kernel is kept informed (via 239 * netif_carrier_on/off) of the link status, and also maintains the 240 * link status's stop on the port's TX queue. 241 */ 242 void efx_link_status_changed(struct efx_nic *efx) 243 { 244 struct efx_link_state *link_state = &efx->link_state; 245 246 /* SFC Bug 5356: A net_dev notifier is registered, so we must ensure 247 * that no events are triggered between unregister_netdev() and the 248 * driver unloading. A more general condition is that NETDEV_CHANGE 249 * can only be generated between NETDEV_UP and NETDEV_DOWN 250 */ 251 if (!netif_running(efx->net_dev)) 252 return; 253 254 if (link_state->up != netif_carrier_ok(efx->net_dev)) { 255 efx->n_link_state_changes++; 256 257 if (link_state->up) 258 netif_carrier_on(efx->net_dev); 259 else 260 netif_carrier_off(efx->net_dev); 261 } 262 263 /* Status message for kernel log */ 264 if (link_state->up) 265 netif_info(efx, link, efx->net_dev, 266 "link up at %uMbps %s-duplex (MTU %d)\n", 267 link_state->speed, link_state->fd ? "full" : "half", 268 efx->net_dev->mtu); 269 else 270 netif_info(efx, link, efx->net_dev, "link down\n"); 271 } 272 273 unsigned int efx_xdp_max_mtu(struct efx_nic *efx) 274 { 275 /* The maximum MTU that we can fit in a single page, allowing for 276 * framing, overhead and XDP headroom + tailroom. 277 */ 278 int overhead = EFX_MAX_FRAME_LEN(0) + sizeof(struct efx_rx_page_state) + 279 efx->rx_prefix_size + efx->type->rx_buffer_padding + 280 efx->rx_ip_align + EFX_XDP_HEADROOM + EFX_XDP_TAILROOM; 281 282 return PAGE_SIZE - overhead; 283 } 284 285 /* Context: process, rtnl_lock() held. */ 286 int efx_change_mtu(struct net_device *net_dev, int new_mtu) 287 { 288 struct efx_nic *efx = netdev_priv(net_dev); 289 int rc; 290 291 rc = efx_check_disabled(efx); 292 if (rc) 293 return rc; 294 295 if (rtnl_dereference(efx->xdp_prog) && 296 new_mtu > efx_xdp_max_mtu(efx)) { 297 netif_err(efx, drv, efx->net_dev, 298 "Requested MTU of %d too big for XDP (max: %d)\n", 299 new_mtu, efx_xdp_max_mtu(efx)); 300 return -EINVAL; 301 } 302 303 netif_dbg(efx, drv, efx->net_dev, "changing MTU to %d\n", new_mtu); 304 305 efx_device_detach_sync(efx); 306 efx_stop_all(efx); 307 308 mutex_lock(&efx->mac_lock); 309 net_dev->mtu = new_mtu; 310 efx_mac_reconfigure(efx, true); 311 mutex_unlock(&efx->mac_lock); 312 313 efx_start_all(efx); 314 efx_device_attach_if_not_resetting(efx); 315 return 0; 316 } 317 318 /************************************************************************** 319 * 320 * Hardware monitor 321 * 322 **************************************************************************/ 323 324 /* Run periodically off the general workqueue */ 325 static void efx_monitor(struct work_struct *data) 326 { 327 struct efx_nic *efx = container_of(data, struct efx_nic, 328 monitor_work.work); 329 330 netif_vdbg(efx, timer, efx->net_dev, 331 "hardware monitor executing on CPU %d\n", 332 raw_smp_processor_id()); 333 BUG_ON(efx->type->monitor == NULL); 334 335 /* If the mac_lock is already held then it is likely a port 336 * reconfiguration is already in place, which will likely do 337 * most of the work of monitor() anyway. 338 */ 339 if (mutex_trylock(&efx->mac_lock)) { 340 if (efx->port_enabled && efx->type->monitor) 341 efx->type->monitor(efx); 342 mutex_unlock(&efx->mac_lock); 343 } 344 345 efx_start_monitor(efx); 346 } 347 348 void efx_start_monitor(struct efx_nic *efx) 349 { 350 if (efx->type->monitor) 351 queue_delayed_work(efx->workqueue, &efx->monitor_work, 352 efx_monitor_interval); 353 } 354 355 /************************************************************************** 356 * 357 * Event queue processing 358 * 359 *************************************************************************/ 360 361 /* Channels are shutdown and reinitialised whilst the NIC is running 362 * to propagate configuration changes (mtu, checksum offload), or 363 * to clear hardware error conditions 364 */ 365 static void efx_start_datapath(struct efx_nic *efx) 366 { 367 netdev_features_t old_features = efx->net_dev->features; 368 bool old_rx_scatter = efx->rx_scatter; 369 size_t rx_buf_len; 370 371 /* Calculate the rx buffer allocation parameters required to 372 * support the current MTU, including padding for header 373 * alignment and overruns. 374 */ 375 efx->rx_dma_len = (efx->rx_prefix_size + 376 EFX_MAX_FRAME_LEN(efx->net_dev->mtu) + 377 efx->type->rx_buffer_padding); 378 rx_buf_len = (sizeof(struct efx_rx_page_state) + EFX_XDP_HEADROOM + 379 efx->rx_ip_align + efx->rx_dma_len + EFX_XDP_TAILROOM); 380 381 if (rx_buf_len <= PAGE_SIZE) { 382 efx->rx_scatter = efx->type->always_rx_scatter; 383 efx->rx_buffer_order = 0; 384 } else if (efx->type->can_rx_scatter) { 385 BUILD_BUG_ON(EFX_RX_USR_BUF_SIZE % L1_CACHE_BYTES); 386 BUILD_BUG_ON(sizeof(struct efx_rx_page_state) + 387 2 * ALIGN(NET_IP_ALIGN + EFX_RX_USR_BUF_SIZE, 388 EFX_RX_BUF_ALIGNMENT) > 389 PAGE_SIZE); 390 efx->rx_scatter = true; 391 efx->rx_dma_len = EFX_RX_USR_BUF_SIZE; 392 efx->rx_buffer_order = 0; 393 } else { 394 efx->rx_scatter = false; 395 efx->rx_buffer_order = get_order(rx_buf_len); 396 } 397 398 efx_rx_config_page_split(efx); 399 if (efx->rx_buffer_order) 400 netif_dbg(efx, drv, efx->net_dev, 401 "RX buf len=%u; page order=%u batch=%u\n", 402 efx->rx_dma_len, efx->rx_buffer_order, 403 efx->rx_pages_per_batch); 404 else 405 netif_dbg(efx, drv, efx->net_dev, 406 "RX buf len=%u step=%u bpp=%u; page batch=%u\n", 407 efx->rx_dma_len, efx->rx_page_buf_step, 408 efx->rx_bufs_per_page, efx->rx_pages_per_batch); 409 410 /* Restore previously fixed features in hw_features and remove 411 * features which are fixed now 412 */ 413 efx->net_dev->hw_features |= efx->net_dev->features; 414 efx->net_dev->hw_features &= ~efx->fixed_features; 415 efx->net_dev->features |= efx->fixed_features; 416 if (efx->net_dev->features != old_features) 417 netdev_features_change(efx->net_dev); 418 419 /* RX filters may also have scatter-enabled flags */ 420 if ((efx->rx_scatter != old_rx_scatter) && 421 efx->type->filter_update_rx_scatter) 422 efx->type->filter_update_rx_scatter(efx); 423 424 /* We must keep at least one descriptor in a TX ring empty. 425 * We could avoid this when the queue size does not exactly 426 * match the hardware ring size, but it's not that important. 427 * Therefore we stop the queue when one more skb might fill 428 * the ring completely. We wake it when half way back to 429 * empty. 430 */ 431 efx->txq_stop_thresh = efx->txq_entries - efx_tx_max_skb_descs(efx); 432 efx->txq_wake_thresh = efx->txq_stop_thresh / 2; 433 434 /* Initialise the channels */ 435 efx_start_channels(efx); 436 437 efx_ptp_start_datapath(efx); 438 439 if (netif_device_present(efx->net_dev)) 440 netif_tx_wake_all_queues(efx->net_dev); 441 } 442 443 static void efx_stop_datapath(struct efx_nic *efx) 444 { 445 EFX_ASSERT_RESET_SERIALISED(efx); 446 BUG_ON(efx->port_enabled); 447 448 efx_ptp_stop_datapath(efx); 449 450 efx_stop_channels(efx); 451 } 452 453 /************************************************************************** 454 * 455 * Port handling 456 * 457 **************************************************************************/ 458 459 /* Equivalent to efx_link_set_advertising with all-zeroes, except does not 460 * force the Autoneg bit on. 461 */ 462 void efx_link_clear_advertising(struct efx_nic *efx) 463 { 464 bitmap_zero(efx->link_advertising, __ETHTOOL_LINK_MODE_MASK_NBITS); 465 efx->wanted_fc &= ~(EFX_FC_TX | EFX_FC_RX); 466 } 467 468 void efx_link_set_wanted_fc(struct efx_nic *efx, u8 wanted_fc) 469 { 470 efx->wanted_fc = wanted_fc; 471 if (efx->link_advertising[0]) { 472 if (wanted_fc & EFX_FC_RX) 473 efx->link_advertising[0] |= (ADVERTISED_Pause | 474 ADVERTISED_Asym_Pause); 475 else 476 efx->link_advertising[0] &= ~(ADVERTISED_Pause | 477 ADVERTISED_Asym_Pause); 478 if (wanted_fc & EFX_FC_TX) 479 efx->link_advertising[0] ^= ADVERTISED_Asym_Pause; 480 } 481 } 482 483 static void efx_start_port(struct efx_nic *efx) 484 { 485 netif_dbg(efx, ifup, efx->net_dev, "start port\n"); 486 BUG_ON(efx->port_enabled); 487 488 mutex_lock(&efx->mac_lock); 489 efx->port_enabled = true; 490 491 /* Ensure MAC ingress/egress is enabled */ 492 efx_mac_reconfigure(efx, false); 493 494 mutex_unlock(&efx->mac_lock); 495 } 496 497 /* Cancel work for MAC reconfiguration, periodic hardware monitoring 498 * and the async self-test, wait for them to finish and prevent them 499 * being scheduled again. This doesn't cover online resets, which 500 * should only be cancelled when removing the device. 501 */ 502 static void efx_stop_port(struct efx_nic *efx) 503 { 504 netif_dbg(efx, ifdown, efx->net_dev, "stop port\n"); 505 506 EFX_ASSERT_RESET_SERIALISED(efx); 507 508 mutex_lock(&efx->mac_lock); 509 efx->port_enabled = false; 510 mutex_unlock(&efx->mac_lock); 511 512 /* Serialise against efx_set_multicast_list() */ 513 netif_addr_lock_bh(efx->net_dev); 514 netif_addr_unlock_bh(efx->net_dev); 515 516 cancel_delayed_work_sync(&efx->monitor_work); 517 efx_selftest_async_cancel(efx); 518 cancel_work_sync(&efx->mac_work); 519 } 520 521 /* If the interface is supposed to be running but is not, start 522 * the hardware and software data path, regular activity for the port 523 * (MAC statistics, link polling, etc.) and schedule the port to be 524 * reconfigured. Interrupts must already be enabled. This function 525 * is safe to call multiple times, so long as the NIC is not disabled. 526 * Requires the RTNL lock. 527 */ 528 void efx_start_all(struct efx_nic *efx) 529 { 530 EFX_ASSERT_RESET_SERIALISED(efx); 531 BUG_ON(efx->state == STATE_DISABLED); 532 533 /* Check that it is appropriate to restart the interface. All 534 * of these flags are safe to read under just the rtnl lock 535 */ 536 if (efx->port_enabled || !netif_running(efx->net_dev) || 537 efx->reset_pending) 538 return; 539 540 efx_start_port(efx); 541 efx_start_datapath(efx); 542 543 /* Start the hardware monitor if there is one */ 544 efx_start_monitor(efx); 545 546 /* Link state detection is normally event-driven; we have 547 * to poll now because we could have missed a change 548 */ 549 mutex_lock(&efx->mac_lock); 550 if (efx_mcdi_phy_poll(efx)) 551 efx_link_status_changed(efx); 552 mutex_unlock(&efx->mac_lock); 553 554 if (efx->type->start_stats) { 555 efx->type->start_stats(efx); 556 efx->type->pull_stats(efx); 557 spin_lock_bh(&efx->stats_lock); 558 efx->type->update_stats(efx, NULL, NULL); 559 spin_unlock_bh(&efx->stats_lock); 560 } 561 } 562 563 /* Quiesce the hardware and software data path, and regular activity 564 * for the port without bringing the link down. Safe to call multiple 565 * times with the NIC in almost any state, but interrupts should be 566 * enabled. Requires the RTNL lock. 567 */ 568 void efx_stop_all(struct efx_nic *efx) 569 { 570 EFX_ASSERT_RESET_SERIALISED(efx); 571 572 /* port_enabled can be read safely under the rtnl lock */ 573 if (!efx->port_enabled) 574 return; 575 576 if (efx->type->update_stats) { 577 /* update stats before we go down so we can accurately count 578 * rx_nodesc_drops 579 */ 580 efx->type->pull_stats(efx); 581 spin_lock_bh(&efx->stats_lock); 582 efx->type->update_stats(efx, NULL, NULL); 583 spin_unlock_bh(&efx->stats_lock); 584 efx->type->stop_stats(efx); 585 } 586 587 efx_stop_port(efx); 588 589 /* Stop the kernel transmit interface. This is only valid if 590 * the device is stopped or detached; otherwise the watchdog 591 * may fire immediately. 592 */ 593 WARN_ON(netif_running(efx->net_dev) && 594 netif_device_present(efx->net_dev)); 595 netif_tx_disable(efx->net_dev); 596 597 efx_stop_datapath(efx); 598 } 599 600 /* Context: process, dev_base_lock or RTNL held, non-blocking. */ 601 void efx_net_stats(struct net_device *net_dev, struct rtnl_link_stats64 *stats) 602 { 603 struct efx_nic *efx = netdev_priv(net_dev); 604 605 spin_lock_bh(&efx->stats_lock); 606 efx_nic_update_stats_atomic(efx, NULL, stats); 607 spin_unlock_bh(&efx->stats_lock); 608 } 609 610 /* Push loopback/power/transmit disable settings to the PHY, and reconfigure 611 * the MAC appropriately. All other PHY configuration changes are pushed 612 * through phy_op->set_settings(), and pushed asynchronously to the MAC 613 * through efx_monitor(). 614 * 615 * Callers must hold the mac_lock 616 */ 617 int __efx_reconfigure_port(struct efx_nic *efx) 618 { 619 enum efx_phy_mode phy_mode; 620 int rc = 0; 621 622 WARN_ON(!mutex_is_locked(&efx->mac_lock)); 623 624 /* Disable PHY transmit in mac level loopbacks */ 625 phy_mode = efx->phy_mode; 626 if (LOOPBACK_INTERNAL(efx)) 627 efx->phy_mode |= PHY_MODE_TX_DISABLED; 628 else 629 efx->phy_mode &= ~PHY_MODE_TX_DISABLED; 630 631 if (efx->type->reconfigure_port) 632 rc = efx->type->reconfigure_port(efx); 633 634 if (rc) 635 efx->phy_mode = phy_mode; 636 637 return rc; 638 } 639 640 /* Reinitialise the MAC to pick up new PHY settings, even if the port is 641 * disabled. 642 */ 643 int efx_reconfigure_port(struct efx_nic *efx) 644 { 645 int rc; 646 647 EFX_ASSERT_RESET_SERIALISED(efx); 648 649 mutex_lock(&efx->mac_lock); 650 rc = __efx_reconfigure_port(efx); 651 mutex_unlock(&efx->mac_lock); 652 653 return rc; 654 } 655 656 /************************************************************************** 657 * 658 * Device reset and suspend 659 * 660 **************************************************************************/ 661 662 static void efx_wait_for_bist_end(struct efx_nic *efx) 663 { 664 int i; 665 666 for (i = 0; i < BIST_WAIT_DELAY_COUNT; ++i) { 667 if (efx_mcdi_poll_reboot(efx)) 668 goto out; 669 msleep(BIST_WAIT_DELAY_MS); 670 } 671 672 netif_err(efx, drv, efx->net_dev, "Warning: No MC reboot after BIST mode\n"); 673 out: 674 /* Either way unset the BIST flag. If we found no reboot we probably 675 * won't recover, but we should try. 676 */ 677 efx->mc_bist_for_other_fn = false; 678 } 679 680 /* Try recovery mechanisms. 681 * For now only EEH is supported. 682 * Returns 0 if the recovery mechanisms are unsuccessful. 683 * Returns a non-zero value otherwise. 684 */ 685 int efx_try_recovery(struct efx_nic *efx) 686 { 687 #ifdef CONFIG_EEH 688 /* A PCI error can occur and not be seen by EEH because nothing 689 * happens on the PCI bus. In this case the driver may fail and 690 * schedule a 'recover or reset', leading to this recovery handler. 691 * Manually call the eeh failure check function. 692 */ 693 struct eeh_dev *eehdev = pci_dev_to_eeh_dev(efx->pci_dev); 694 if (eeh_dev_check_failure(eehdev)) { 695 /* The EEH mechanisms will handle the error and reset the 696 * device if necessary. 697 */ 698 return 1; 699 } 700 #endif 701 return 0; 702 } 703 704 /* Tears down the entire software state and most of the hardware state 705 * before reset. 706 */ 707 void efx_reset_down(struct efx_nic *efx, enum reset_type method) 708 { 709 EFX_ASSERT_RESET_SERIALISED(efx); 710 711 if (method == RESET_TYPE_MCDI_TIMEOUT) 712 efx->type->prepare_flr(efx); 713 714 efx_stop_all(efx); 715 efx_disable_interrupts(efx); 716 717 mutex_lock(&efx->mac_lock); 718 down_write(&efx->filter_sem); 719 mutex_lock(&efx->rss_lock); 720 efx->type->fini(efx); 721 } 722 723 /* Context: netif_tx_lock held, BHs disabled. */ 724 void efx_watchdog(struct net_device *net_dev, unsigned int txqueue) 725 { 726 struct efx_nic *efx = netdev_priv(net_dev); 727 728 netif_err(efx, tx_err, efx->net_dev, 729 "TX stuck with port_enabled=%d: resetting channels\n", 730 efx->port_enabled); 731 732 efx_schedule_reset(efx, RESET_TYPE_TX_WATCHDOG); 733 } 734 735 /* This function will always ensure that the locks acquired in 736 * efx_reset_down() are released. A failure return code indicates 737 * that we were unable to reinitialise the hardware, and the 738 * driver should be disabled. If ok is false, then the rx and tx 739 * engines are not restarted, pending a RESET_DISABLE. 740 */ 741 int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok) 742 { 743 int rc; 744 745 EFX_ASSERT_RESET_SERIALISED(efx); 746 747 if (method == RESET_TYPE_MCDI_TIMEOUT) 748 efx->type->finish_flr(efx); 749 750 /* Ensure that SRAM is initialised even if we're disabling the device */ 751 rc = efx->type->init(efx); 752 if (rc) { 753 netif_err(efx, drv, efx->net_dev, "failed to initialise NIC\n"); 754 goto fail; 755 } 756 757 if (!ok) 758 goto fail; 759 760 if (efx->port_initialized && method != RESET_TYPE_INVISIBLE && 761 method != RESET_TYPE_DATAPATH) { 762 rc = efx_mcdi_port_reconfigure(efx); 763 if (rc && rc != -EPERM) 764 netif_err(efx, drv, efx->net_dev, 765 "could not restore PHY settings\n"); 766 } 767 768 rc = efx_enable_interrupts(efx); 769 if (rc) 770 goto fail; 771 772 #ifdef CONFIG_SFC_SRIOV 773 rc = efx->type->vswitching_restore(efx); 774 if (rc) /* not fatal; the PF will still work fine */ 775 netif_warn(efx, probe, efx->net_dev, 776 "failed to restore vswitching rc=%d;" 777 " VFs may not function\n", rc); 778 #endif 779 780 if (efx->type->rx_restore_rss_contexts) 781 efx->type->rx_restore_rss_contexts(efx); 782 mutex_unlock(&efx->rss_lock); 783 efx->type->filter_table_restore(efx); 784 up_write(&efx->filter_sem); 785 if (efx->type->sriov_reset) 786 efx->type->sriov_reset(efx); 787 788 mutex_unlock(&efx->mac_lock); 789 790 efx_start_all(efx); 791 792 if (efx->type->udp_tnl_push_ports) 793 efx->type->udp_tnl_push_ports(efx); 794 795 return 0; 796 797 fail: 798 efx->port_initialized = false; 799 800 mutex_unlock(&efx->rss_lock); 801 up_write(&efx->filter_sem); 802 mutex_unlock(&efx->mac_lock); 803 804 return rc; 805 } 806 807 /* Reset the NIC using the specified method. Note that the reset may 808 * fail, in which case the card will be left in an unusable state. 809 * 810 * Caller must hold the rtnl_lock. 811 */ 812 int efx_reset(struct efx_nic *efx, enum reset_type method) 813 { 814 int rc, rc2 = 0; 815 bool disabled; 816 817 netif_info(efx, drv, efx->net_dev, "resetting (%s)\n", 818 RESET_TYPE(method)); 819 820 efx_device_detach_sync(efx); 821 /* efx_reset_down() grabs locks that prevent recovery on EF100. 822 * EF100 reset is handled in the efx_nic_type callback below. 823 */ 824 if (efx_nic_rev(efx) != EFX_REV_EF100) 825 efx_reset_down(efx, method); 826 827 rc = efx->type->reset(efx, method); 828 if (rc) { 829 netif_err(efx, drv, efx->net_dev, "failed to reset hardware\n"); 830 goto out; 831 } 832 833 /* Clear flags for the scopes we covered. We assume the NIC and 834 * driver are now quiescent so that there is no race here. 835 */ 836 if (method < RESET_TYPE_MAX_METHOD) 837 efx->reset_pending &= -(1 << (method + 1)); 838 else /* it doesn't fit into the well-ordered scope hierarchy */ 839 __clear_bit(method, &efx->reset_pending); 840 841 /* Reinitialise bus-mastering, which may have been turned off before 842 * the reset was scheduled. This is still appropriate, even in the 843 * RESET_TYPE_DISABLE since this driver generally assumes the hardware 844 * can respond to requests. 845 */ 846 pci_set_master(efx->pci_dev); 847 848 out: 849 /* Leave device stopped if necessary */ 850 disabled = rc || 851 method == RESET_TYPE_DISABLE || 852 method == RESET_TYPE_RECOVER_OR_DISABLE; 853 if (efx_nic_rev(efx) != EFX_REV_EF100) 854 rc2 = efx_reset_up(efx, method, !disabled); 855 if (rc2) { 856 disabled = true; 857 if (!rc) 858 rc = rc2; 859 } 860 861 if (disabled) { 862 dev_close(efx->net_dev); 863 netif_err(efx, drv, efx->net_dev, "has been disabled\n"); 864 efx->state = STATE_DISABLED; 865 } else { 866 netif_dbg(efx, drv, efx->net_dev, "reset complete\n"); 867 efx_device_attach_if_not_resetting(efx); 868 } 869 return rc; 870 } 871 872 /* The worker thread exists so that code that cannot sleep can 873 * schedule a reset for later. 874 */ 875 static void efx_reset_work(struct work_struct *data) 876 { 877 struct efx_nic *efx = container_of(data, struct efx_nic, reset_work); 878 unsigned long pending; 879 enum reset_type method; 880 881 pending = READ_ONCE(efx->reset_pending); 882 method = fls(pending) - 1; 883 884 if (method == RESET_TYPE_MC_BIST) 885 efx_wait_for_bist_end(efx); 886 887 if ((method == RESET_TYPE_RECOVER_OR_DISABLE || 888 method == RESET_TYPE_RECOVER_OR_ALL) && 889 efx_try_recovery(efx)) 890 return; 891 892 if (!pending) 893 return; 894 895 rtnl_lock(); 896 897 /* We checked the state in efx_schedule_reset() but it may 898 * have changed by now. Now that we have the RTNL lock, 899 * it cannot change again. 900 */ 901 if (efx->state == STATE_READY) 902 (void)efx_reset(efx, method); 903 904 rtnl_unlock(); 905 } 906 907 void efx_schedule_reset(struct efx_nic *efx, enum reset_type type) 908 { 909 enum reset_type method; 910 911 if (efx->state == STATE_RECOVERY) { 912 netif_dbg(efx, drv, efx->net_dev, 913 "recovering: skip scheduling %s reset\n", 914 RESET_TYPE(type)); 915 return; 916 } 917 918 switch (type) { 919 case RESET_TYPE_INVISIBLE: 920 case RESET_TYPE_ALL: 921 case RESET_TYPE_RECOVER_OR_ALL: 922 case RESET_TYPE_WORLD: 923 case RESET_TYPE_DISABLE: 924 case RESET_TYPE_RECOVER_OR_DISABLE: 925 case RESET_TYPE_DATAPATH: 926 case RESET_TYPE_MC_BIST: 927 case RESET_TYPE_MCDI_TIMEOUT: 928 method = type; 929 netif_dbg(efx, drv, efx->net_dev, "scheduling %s reset\n", 930 RESET_TYPE(method)); 931 break; 932 default: 933 method = efx->type->map_reset_reason(type); 934 netif_dbg(efx, drv, efx->net_dev, 935 "scheduling %s reset for %s\n", 936 RESET_TYPE(method), RESET_TYPE(type)); 937 break; 938 } 939 940 set_bit(method, &efx->reset_pending); 941 smp_mb(); /* ensure we change reset_pending before checking state */ 942 943 /* If we're not READY then just leave the flags set as the cue 944 * to abort probing or reschedule the reset later. 945 */ 946 if (READ_ONCE(efx->state) != STATE_READY) 947 return; 948 949 /* efx_process_channel() will no longer read events once a 950 * reset is scheduled. So switch back to poll'd MCDI completions. 951 */ 952 efx_mcdi_mode_poll(efx); 953 954 efx_queue_reset_work(efx); 955 } 956 957 /************************************************************************** 958 * 959 * Dummy NIC operations 960 * 961 * Can be used for some unimplemented operations 962 * Needed so all function pointers are valid and do not have to be tested 963 * before use 964 * 965 **************************************************************************/ 966 int efx_port_dummy_op_int(struct efx_nic *efx) 967 { 968 return 0; 969 } 970 void efx_port_dummy_op_void(struct efx_nic *efx) {} 971 972 /************************************************************************** 973 * 974 * Data housekeeping 975 * 976 **************************************************************************/ 977 978 /* This zeroes out and then fills in the invariants in a struct 979 * efx_nic (including all sub-structures). 980 */ 981 int efx_init_struct(struct efx_nic *efx, 982 struct pci_dev *pci_dev, struct net_device *net_dev) 983 { 984 int rc = -ENOMEM; 985 986 /* Initialise common structures */ 987 INIT_LIST_HEAD(&efx->node); 988 INIT_LIST_HEAD(&efx->secondary_list); 989 spin_lock_init(&efx->biu_lock); 990 #ifdef CONFIG_SFC_MTD 991 INIT_LIST_HEAD(&efx->mtd_list); 992 #endif 993 INIT_WORK(&efx->reset_work, efx_reset_work); 994 INIT_DELAYED_WORK(&efx->monitor_work, efx_monitor); 995 efx_selftest_async_init(efx); 996 efx->pci_dev = pci_dev; 997 efx->msg_enable = debug; 998 efx->state = STATE_UNINIT; 999 strlcpy(efx->name, pci_name(pci_dev), sizeof(efx->name)); 1000 1001 efx->net_dev = net_dev; 1002 efx->rx_prefix_size = efx->type->rx_prefix_size; 1003 efx->rx_ip_align = 1004 NET_IP_ALIGN ? (efx->rx_prefix_size + NET_IP_ALIGN) % 4 : 0; 1005 efx->rx_packet_hash_offset = 1006 efx->type->rx_hash_offset - efx->type->rx_prefix_size; 1007 efx->rx_packet_ts_offset = 1008 efx->type->rx_ts_offset - efx->type->rx_prefix_size; 1009 INIT_LIST_HEAD(&efx->rss_context.list); 1010 efx->rss_context.context_id = EFX_MCDI_RSS_CONTEXT_INVALID; 1011 mutex_init(&efx->rss_lock); 1012 efx->vport_id = EVB_PORT_ID_ASSIGNED; 1013 spin_lock_init(&efx->stats_lock); 1014 efx->vi_stride = EFX_DEFAULT_VI_STRIDE; 1015 efx->num_mac_stats = MC_CMD_MAC_NSTATS; 1016 BUILD_BUG_ON(MC_CMD_MAC_NSTATS - 1 != MC_CMD_MAC_GENERATION_END); 1017 mutex_init(&efx->mac_lock); 1018 init_rwsem(&efx->filter_sem); 1019 #ifdef CONFIG_RFS_ACCEL 1020 mutex_init(&efx->rps_mutex); 1021 spin_lock_init(&efx->rps_hash_lock); 1022 /* Failure to allocate is not fatal, but may degrade ARFS performance */ 1023 efx->rps_hash_table = kcalloc(EFX_ARFS_HASH_TABLE_SIZE, 1024 sizeof(*efx->rps_hash_table), GFP_KERNEL); 1025 #endif 1026 efx->mdio.dev = net_dev; 1027 INIT_WORK(&efx->mac_work, efx_mac_work); 1028 init_waitqueue_head(&efx->flush_wq); 1029 1030 efx->tx_queues_per_channel = 1; 1031 efx->rxq_entries = EFX_DEFAULT_DMAQ_SIZE; 1032 efx->txq_entries = EFX_DEFAULT_DMAQ_SIZE; 1033 1034 efx->mem_bar = UINT_MAX; 1035 1036 rc = efx_init_channels(efx); 1037 if (rc) 1038 goto fail; 1039 1040 /* Would be good to use the net_dev name, but we're too early */ 1041 snprintf(efx->workqueue_name, sizeof(efx->workqueue_name), "sfc%s", 1042 pci_name(pci_dev)); 1043 efx->workqueue = create_singlethread_workqueue(efx->workqueue_name); 1044 if (!efx->workqueue) { 1045 rc = -ENOMEM; 1046 goto fail; 1047 } 1048 1049 return 0; 1050 1051 fail: 1052 efx_fini_struct(efx); 1053 return rc; 1054 } 1055 1056 void efx_fini_struct(struct efx_nic *efx) 1057 { 1058 #ifdef CONFIG_RFS_ACCEL 1059 kfree(efx->rps_hash_table); 1060 #endif 1061 1062 efx_fini_channels(efx); 1063 1064 kfree(efx->vpd_sn); 1065 1066 if (efx->workqueue) { 1067 destroy_workqueue(efx->workqueue); 1068 efx->workqueue = NULL; 1069 } 1070 } 1071 1072 /* This configures the PCI device to enable I/O and DMA. */ 1073 int efx_init_io(struct efx_nic *efx, int bar, dma_addr_t dma_mask, 1074 unsigned int mem_map_size) 1075 { 1076 struct pci_dev *pci_dev = efx->pci_dev; 1077 int rc; 1078 1079 efx->mem_bar = UINT_MAX; 1080 1081 netif_dbg(efx, probe, efx->net_dev, "initialising I/O bar=%d\n", bar); 1082 1083 rc = pci_enable_device(pci_dev); 1084 if (rc) { 1085 netif_err(efx, probe, efx->net_dev, 1086 "failed to enable PCI device\n"); 1087 goto fail1; 1088 } 1089 1090 pci_set_master(pci_dev); 1091 1092 rc = dma_set_mask_and_coherent(&pci_dev->dev, dma_mask); 1093 if (rc) { 1094 netif_err(efx, probe, efx->net_dev, 1095 "could not find a suitable DMA mask\n"); 1096 goto fail2; 1097 } 1098 netif_dbg(efx, probe, efx->net_dev, 1099 "using DMA mask %llx\n", (unsigned long long)dma_mask); 1100 1101 efx->membase_phys = pci_resource_start(efx->pci_dev, bar); 1102 if (!efx->membase_phys) { 1103 netif_err(efx, probe, efx->net_dev, 1104 "ERROR: No BAR%d mapping from the BIOS. " 1105 "Try pci=realloc on the kernel command line\n", bar); 1106 rc = -ENODEV; 1107 goto fail3; 1108 } 1109 1110 rc = pci_request_region(pci_dev, bar, "sfc"); 1111 if (rc) { 1112 netif_err(efx, probe, efx->net_dev, 1113 "request for memory BAR[%d] failed\n", bar); 1114 rc = -EIO; 1115 goto fail3; 1116 } 1117 efx->mem_bar = bar; 1118 efx->membase = ioremap(efx->membase_phys, mem_map_size); 1119 if (!efx->membase) { 1120 netif_err(efx, probe, efx->net_dev, 1121 "could not map memory BAR[%d] at %llx+%x\n", bar, 1122 (unsigned long long)efx->membase_phys, mem_map_size); 1123 rc = -ENOMEM; 1124 goto fail4; 1125 } 1126 netif_dbg(efx, probe, efx->net_dev, 1127 "memory BAR[%d] at %llx+%x (virtual %p)\n", bar, 1128 (unsigned long long)efx->membase_phys, mem_map_size, 1129 efx->membase); 1130 1131 return 0; 1132 1133 fail4: 1134 pci_release_region(efx->pci_dev, bar); 1135 fail3: 1136 efx->membase_phys = 0; 1137 fail2: 1138 pci_disable_device(efx->pci_dev); 1139 fail1: 1140 return rc; 1141 } 1142 1143 void efx_fini_io(struct efx_nic *efx) 1144 { 1145 netif_dbg(efx, drv, efx->net_dev, "shutting down I/O\n"); 1146 1147 if (efx->membase) { 1148 iounmap(efx->membase); 1149 efx->membase = NULL; 1150 } 1151 1152 if (efx->membase_phys) { 1153 pci_release_region(efx->pci_dev, efx->mem_bar); 1154 efx->membase_phys = 0; 1155 efx->mem_bar = UINT_MAX; 1156 } 1157 1158 /* Don't disable bus-mastering if VFs are assigned */ 1159 if (!pci_vfs_assigned(efx->pci_dev)) 1160 pci_disable_device(efx->pci_dev); 1161 } 1162 1163 #ifdef CONFIG_SFC_MCDI_LOGGING 1164 static ssize_t mcdi_logging_show(struct device *dev, 1165 struct device_attribute *attr, 1166 char *buf) 1167 { 1168 struct efx_nic *efx = dev_get_drvdata(dev); 1169 struct efx_mcdi_iface *mcdi = efx_mcdi(efx); 1170 1171 return scnprintf(buf, PAGE_SIZE, "%d\n", mcdi->logging_enabled); 1172 } 1173 1174 static ssize_t mcdi_logging_store(struct device *dev, 1175 struct device_attribute *attr, 1176 const char *buf, size_t count) 1177 { 1178 struct efx_nic *efx = dev_get_drvdata(dev); 1179 struct efx_mcdi_iface *mcdi = efx_mcdi(efx); 1180 bool enable = count > 0 && *buf != '0'; 1181 1182 mcdi->logging_enabled = enable; 1183 return count; 1184 } 1185 1186 static DEVICE_ATTR_RW(mcdi_logging); 1187 1188 void efx_init_mcdi_logging(struct efx_nic *efx) 1189 { 1190 int rc = device_create_file(&efx->pci_dev->dev, &dev_attr_mcdi_logging); 1191 1192 if (rc) { 1193 netif_warn(efx, drv, efx->net_dev, 1194 "failed to init net dev attributes\n"); 1195 } 1196 } 1197 1198 void efx_fini_mcdi_logging(struct efx_nic *efx) 1199 { 1200 device_remove_file(&efx->pci_dev->dev, &dev_attr_mcdi_logging); 1201 } 1202 #endif 1203 1204 /* A PCI error affecting this device was detected. 1205 * At this point MMIO and DMA may be disabled. 1206 * Stop the software path and request a slot reset. 1207 */ 1208 static pci_ers_result_t efx_io_error_detected(struct pci_dev *pdev, 1209 pci_channel_state_t state) 1210 { 1211 pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED; 1212 struct efx_nic *efx = pci_get_drvdata(pdev); 1213 1214 if (state == pci_channel_io_perm_failure) 1215 return PCI_ERS_RESULT_DISCONNECT; 1216 1217 rtnl_lock(); 1218 1219 if (efx->state != STATE_DISABLED) { 1220 efx->state = STATE_RECOVERY; 1221 efx->reset_pending = 0; 1222 1223 efx_device_detach_sync(efx); 1224 1225 efx_stop_all(efx); 1226 efx_disable_interrupts(efx); 1227 1228 status = PCI_ERS_RESULT_NEED_RESET; 1229 } else { 1230 /* If the interface is disabled we don't want to do anything 1231 * with it. 1232 */ 1233 status = PCI_ERS_RESULT_RECOVERED; 1234 } 1235 1236 rtnl_unlock(); 1237 1238 pci_disable_device(pdev); 1239 1240 return status; 1241 } 1242 1243 /* Fake a successful reset, which will be performed later in efx_io_resume. */ 1244 static pci_ers_result_t efx_io_slot_reset(struct pci_dev *pdev) 1245 { 1246 struct efx_nic *efx = pci_get_drvdata(pdev); 1247 pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED; 1248 1249 if (pci_enable_device(pdev)) { 1250 netif_err(efx, hw, efx->net_dev, 1251 "Cannot re-enable PCI device after reset.\n"); 1252 status = PCI_ERS_RESULT_DISCONNECT; 1253 } 1254 1255 return status; 1256 } 1257 1258 /* Perform the actual reset and resume I/O operations. */ 1259 static void efx_io_resume(struct pci_dev *pdev) 1260 { 1261 struct efx_nic *efx = pci_get_drvdata(pdev); 1262 int rc; 1263 1264 rtnl_lock(); 1265 1266 if (efx->state == STATE_DISABLED) 1267 goto out; 1268 1269 rc = efx_reset(efx, RESET_TYPE_ALL); 1270 if (rc) { 1271 netif_err(efx, hw, efx->net_dev, 1272 "efx_reset failed after PCI error (%d)\n", rc); 1273 } else { 1274 efx->state = STATE_READY; 1275 netif_dbg(efx, hw, efx->net_dev, 1276 "Done resetting and resuming IO after PCI error.\n"); 1277 } 1278 1279 out: 1280 rtnl_unlock(); 1281 } 1282 1283 /* For simplicity and reliability, we always require a slot reset and try to 1284 * reset the hardware when a pci error affecting the device is detected. 1285 * We leave both the link_reset and mmio_enabled callback unimplemented: 1286 * with our request for slot reset the mmio_enabled callback will never be 1287 * called, and the link_reset callback is not used by AER or EEH mechanisms. 1288 */ 1289 const struct pci_error_handlers efx_err_handlers = { 1290 .error_detected = efx_io_error_detected, 1291 .slot_reset = efx_io_slot_reset, 1292 .resume = efx_io_resume, 1293 }; 1294 1295 /* Determine whether the NIC will be able to handle TX offloads for a given 1296 * encapsulated packet. 1297 */ 1298 static bool efx_can_encap_offloads(struct efx_nic *efx, struct sk_buff *skb) 1299 { 1300 struct gre_base_hdr *greh; 1301 __be16 dst_port; 1302 u8 ipproto; 1303 1304 /* Does the NIC support encap offloads? 1305 * If not, we should never get here, because we shouldn't have 1306 * advertised encap offload feature flags in the first place. 1307 */ 1308 if (WARN_ON_ONCE(!efx->type->udp_tnl_has_port)) 1309 return false; 1310 1311 /* Determine encapsulation protocol in use */ 1312 switch (skb->protocol) { 1313 case htons(ETH_P_IP): 1314 ipproto = ip_hdr(skb)->protocol; 1315 break; 1316 case htons(ETH_P_IPV6): 1317 /* If there are extension headers, this will cause us to 1318 * think we can't offload something that we maybe could have. 1319 */ 1320 ipproto = ipv6_hdr(skb)->nexthdr; 1321 break; 1322 default: 1323 /* Not IP, so can't offload it */ 1324 return false; 1325 } 1326 switch (ipproto) { 1327 case IPPROTO_GRE: 1328 /* We support NVGRE but not IP over GRE or random gretaps. 1329 * Specifically, the NIC will accept GRE as encapsulated if 1330 * the inner protocol is Ethernet, but only handle it 1331 * correctly if the GRE header is 8 bytes long. Moreover, 1332 * it will not update the Checksum or Sequence Number fields 1333 * if they are present. (The Routing Present flag, 1334 * GRE_ROUTING, cannot be set else the header would be more 1335 * than 8 bytes long; so we don't have to worry about it.) 1336 */ 1337 if (skb->inner_protocol_type != ENCAP_TYPE_ETHER) 1338 return false; 1339 if (ntohs(skb->inner_protocol) != ETH_P_TEB) 1340 return false; 1341 if (skb_inner_mac_header(skb) - skb_transport_header(skb) != 8) 1342 return false; 1343 greh = (struct gre_base_hdr *)skb_transport_header(skb); 1344 return !(greh->flags & (GRE_CSUM | GRE_SEQ)); 1345 case IPPROTO_UDP: 1346 /* If the port is registered for a UDP tunnel, we assume the 1347 * packet is for that tunnel, and the NIC will handle it as 1348 * such. If not, the NIC won't know what to do with it. 1349 */ 1350 dst_port = udp_hdr(skb)->dest; 1351 return efx->type->udp_tnl_has_port(efx, dst_port); 1352 default: 1353 return false; 1354 } 1355 } 1356 1357 netdev_features_t efx_features_check(struct sk_buff *skb, struct net_device *dev, 1358 netdev_features_t features) 1359 { 1360 struct efx_nic *efx = netdev_priv(dev); 1361 1362 if (skb->encapsulation) { 1363 if (features & NETIF_F_GSO_MASK) 1364 /* Hardware can only do TSO with at most 208 bytes 1365 * of headers. 1366 */ 1367 if (skb_inner_transport_offset(skb) > 1368 EFX_TSO2_MAX_HDRLEN) 1369 features &= ~(NETIF_F_GSO_MASK); 1370 if (features & (NETIF_F_GSO_MASK | NETIF_F_CSUM_MASK)) 1371 if (!efx_can_encap_offloads(efx, skb)) 1372 features &= ~(NETIF_F_GSO_MASK | 1373 NETIF_F_CSUM_MASK); 1374 } 1375 return features; 1376 } 1377 1378 int efx_get_phys_port_id(struct net_device *net_dev, 1379 struct netdev_phys_item_id *ppid) 1380 { 1381 struct efx_nic *efx = netdev_priv(net_dev); 1382 1383 if (efx->type->get_phys_port_id) 1384 return efx->type->get_phys_port_id(efx, ppid); 1385 else 1386 return -EOPNOTSUPP; 1387 } 1388 1389 int efx_get_phys_port_name(struct net_device *net_dev, char *name, size_t len) 1390 { 1391 struct efx_nic *efx = netdev_priv(net_dev); 1392 1393 if (snprintf(name, len, "p%u", efx->port_num) >= len) 1394 return -EINVAL; 1395 return 0; 1396 } 1397