1 // SPDX-License-Identifier: GPL-2.0-only 2 /**************************************************************************** 3 * Driver for Solarflare network controllers and boards 4 * Copyright 2018 Solarflare Communications Inc. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 as published 8 * by the Free Software Foundation, incorporated herein by reference. 9 */ 10 11 #include "net_driver.h" 12 #include <linux/module.h> 13 #include <linux/netdevice.h> 14 #include <net/gre.h> 15 #include "efx_common.h" 16 #include "efx_channels.h" 17 #include "efx.h" 18 #include "mcdi.h" 19 #include "selftest.h" 20 #include "rx_common.h" 21 #include "tx_common.h" 22 #include "nic.h" 23 #include "mcdi_port_common.h" 24 #include "io.h" 25 #include "mcdi_pcol.h" 26 27 static unsigned int debug = (NETIF_MSG_DRV | NETIF_MSG_PROBE | 28 NETIF_MSG_LINK | NETIF_MSG_IFDOWN | 29 NETIF_MSG_IFUP | NETIF_MSG_RX_ERR | 30 NETIF_MSG_TX_ERR | NETIF_MSG_HW); 31 module_param(debug, uint, 0); 32 MODULE_PARM_DESC(debug, "Bitmapped debugging message enable value"); 33 34 /* This is the time (in jiffies) between invocations of the hardware 35 * monitor. 36 * On Falcon-based NICs, this will: 37 * - Check the on-board hardware monitor; 38 * - Poll the link state and reconfigure the hardware as necessary. 39 * On Siena-based NICs for power systems with EEH support, this will give EEH a 40 * chance to start. 41 */ 42 static unsigned int efx_monitor_interval = 1 * HZ; 43 44 /* How often and how many times to poll for a reset while waiting for a 45 * BIST that another function started to complete. 46 */ 47 #define BIST_WAIT_DELAY_MS 100 48 #define BIST_WAIT_DELAY_COUNT 100 49 50 /* Default stats update time */ 51 #define STATS_PERIOD_MS_DEFAULT 1000 52 53 const unsigned int efx_reset_type_max = RESET_TYPE_MAX; 54 const char *const efx_reset_type_names[] = { 55 [RESET_TYPE_INVISIBLE] = "INVISIBLE", 56 [RESET_TYPE_ALL] = "ALL", 57 [RESET_TYPE_RECOVER_OR_ALL] = "RECOVER_OR_ALL", 58 [RESET_TYPE_WORLD] = "WORLD", 59 [RESET_TYPE_RECOVER_OR_DISABLE] = "RECOVER_OR_DISABLE", 60 [RESET_TYPE_DATAPATH] = "DATAPATH", 61 [RESET_TYPE_MC_BIST] = "MC_BIST", 62 [RESET_TYPE_DISABLE] = "DISABLE", 63 [RESET_TYPE_TX_WATCHDOG] = "TX_WATCHDOG", 64 [RESET_TYPE_INT_ERROR] = "INT_ERROR", 65 [RESET_TYPE_DMA_ERROR] = "DMA_ERROR", 66 [RESET_TYPE_TX_SKIP] = "TX_SKIP", 67 [RESET_TYPE_MC_FAILURE] = "MC_FAILURE", 68 [RESET_TYPE_MCDI_TIMEOUT] = "MCDI_TIMEOUT (FLR)", 69 }; 70 71 #define RESET_TYPE(type) \ 72 STRING_TABLE_LOOKUP(type, efx_reset_type) 73 74 /* Loopback mode names (see LOOPBACK_MODE()) */ 75 const unsigned int efx_loopback_mode_max = LOOPBACK_MAX; 76 const char *const efx_loopback_mode_names[] = { 77 [LOOPBACK_NONE] = "NONE", 78 [LOOPBACK_DATA] = "DATAPATH", 79 [LOOPBACK_GMAC] = "GMAC", 80 [LOOPBACK_XGMII] = "XGMII", 81 [LOOPBACK_XGXS] = "XGXS", 82 [LOOPBACK_XAUI] = "XAUI", 83 [LOOPBACK_GMII] = "GMII", 84 [LOOPBACK_SGMII] = "SGMII", 85 [LOOPBACK_XGBR] = "XGBR", 86 [LOOPBACK_XFI] = "XFI", 87 [LOOPBACK_XAUI_FAR] = "XAUI_FAR", 88 [LOOPBACK_GMII_FAR] = "GMII_FAR", 89 [LOOPBACK_SGMII_FAR] = "SGMII_FAR", 90 [LOOPBACK_XFI_FAR] = "XFI_FAR", 91 [LOOPBACK_GPHY] = "GPHY", 92 [LOOPBACK_PHYXS] = "PHYXS", 93 [LOOPBACK_PCS] = "PCS", 94 [LOOPBACK_PMAPMD] = "PMA/PMD", 95 [LOOPBACK_XPORT] = "XPORT", 96 [LOOPBACK_XGMII_WS] = "XGMII_WS", 97 [LOOPBACK_XAUI_WS] = "XAUI_WS", 98 [LOOPBACK_XAUI_WS_FAR] = "XAUI_WS_FAR", 99 [LOOPBACK_XAUI_WS_NEAR] = "XAUI_WS_NEAR", 100 [LOOPBACK_GMII_WS] = "GMII_WS", 101 [LOOPBACK_XFI_WS] = "XFI_WS", 102 [LOOPBACK_XFI_WS_FAR] = "XFI_WS_FAR", 103 [LOOPBACK_PHYXS_WS] = "PHYXS_WS", 104 }; 105 106 /* Reset workqueue. If any NIC has a hardware failure then a reset will be 107 * queued onto this work queue. This is not a per-nic work queue, because 108 * efx_reset_work() acquires the rtnl lock, so resets are naturally serialised. 109 */ 110 static struct workqueue_struct *reset_workqueue; 111 112 int efx_create_reset_workqueue(void) 113 { 114 reset_workqueue = create_singlethread_workqueue("sfc_reset"); 115 if (!reset_workqueue) { 116 printk(KERN_ERR "Failed to create reset workqueue\n"); 117 return -ENOMEM; 118 } 119 120 return 0; 121 } 122 123 void efx_queue_reset_work(struct efx_nic *efx) 124 { 125 queue_work(reset_workqueue, &efx->reset_work); 126 } 127 128 void efx_flush_reset_workqueue(struct efx_nic *efx) 129 { 130 cancel_work_sync(&efx->reset_work); 131 } 132 133 void efx_destroy_reset_workqueue(void) 134 { 135 if (reset_workqueue) { 136 destroy_workqueue(reset_workqueue); 137 reset_workqueue = NULL; 138 } 139 } 140 141 /* We assume that efx->type->reconfigure_mac will always try to sync RX 142 * filters and therefore needs to read-lock the filter table against freeing 143 */ 144 void efx_mac_reconfigure(struct efx_nic *efx, bool mtu_only) 145 { 146 if (efx->type->reconfigure_mac) { 147 down_read(&efx->filter_sem); 148 efx->type->reconfigure_mac(efx, mtu_only); 149 up_read(&efx->filter_sem); 150 } 151 } 152 153 /* Asynchronous work item for changing MAC promiscuity and multicast 154 * hash. Avoid a drain/rx_ingress enable by reconfiguring the current 155 * MAC directly. 156 */ 157 static void efx_mac_work(struct work_struct *data) 158 { 159 struct efx_nic *efx = container_of(data, struct efx_nic, mac_work); 160 161 mutex_lock(&efx->mac_lock); 162 if (efx->port_enabled) 163 efx_mac_reconfigure(efx, false); 164 mutex_unlock(&efx->mac_lock); 165 } 166 167 int efx_set_mac_address(struct net_device *net_dev, void *data) 168 { 169 struct efx_nic *efx = netdev_priv(net_dev); 170 struct sockaddr *addr = data; 171 u8 *new_addr = addr->sa_data; 172 u8 old_addr[6]; 173 int rc; 174 175 if (!is_valid_ether_addr(new_addr)) { 176 netif_err(efx, drv, efx->net_dev, 177 "invalid ethernet MAC address requested: %pM\n", 178 new_addr); 179 return -EADDRNOTAVAIL; 180 } 181 182 /* save old address */ 183 ether_addr_copy(old_addr, net_dev->dev_addr); 184 ether_addr_copy(net_dev->dev_addr, new_addr); 185 if (efx->type->set_mac_address) { 186 rc = efx->type->set_mac_address(efx); 187 if (rc) { 188 ether_addr_copy(net_dev->dev_addr, old_addr); 189 return rc; 190 } 191 } 192 193 /* Reconfigure the MAC */ 194 mutex_lock(&efx->mac_lock); 195 efx_mac_reconfigure(efx, false); 196 mutex_unlock(&efx->mac_lock); 197 198 return 0; 199 } 200 201 /* Context: netif_addr_lock held, BHs disabled. */ 202 void efx_set_rx_mode(struct net_device *net_dev) 203 { 204 struct efx_nic *efx = netdev_priv(net_dev); 205 206 if (efx->port_enabled) 207 queue_work(efx->workqueue, &efx->mac_work); 208 /* Otherwise efx_start_port() will do this */ 209 } 210 211 int efx_set_features(struct net_device *net_dev, netdev_features_t data) 212 { 213 struct efx_nic *efx = netdev_priv(net_dev); 214 int rc; 215 216 /* If disabling RX n-tuple filtering, clear existing filters */ 217 if (net_dev->features & ~data & NETIF_F_NTUPLE) { 218 rc = efx->type->filter_clear_rx(efx, EFX_FILTER_PRI_MANUAL); 219 if (rc) 220 return rc; 221 } 222 223 /* If Rx VLAN filter is changed, update filters via mac_reconfigure. 224 * If rx-fcs is changed, mac_reconfigure updates that too. 225 */ 226 if ((net_dev->features ^ data) & (NETIF_F_HW_VLAN_CTAG_FILTER | 227 NETIF_F_RXFCS)) { 228 /* efx_set_rx_mode() will schedule MAC work to update filters 229 * when a new features are finally set in net_dev. 230 */ 231 efx_set_rx_mode(net_dev); 232 } 233 234 return 0; 235 } 236 237 /* This ensures that the kernel is kept informed (via 238 * netif_carrier_on/off) of the link status, and also maintains the 239 * link status's stop on the port's TX queue. 240 */ 241 void efx_link_status_changed(struct efx_nic *efx) 242 { 243 struct efx_link_state *link_state = &efx->link_state; 244 245 /* SFC Bug 5356: A net_dev notifier is registered, so we must ensure 246 * that no events are triggered between unregister_netdev() and the 247 * driver unloading. A more general condition is that NETDEV_CHANGE 248 * can only be generated between NETDEV_UP and NETDEV_DOWN 249 */ 250 if (!netif_running(efx->net_dev)) 251 return; 252 253 if (link_state->up != netif_carrier_ok(efx->net_dev)) { 254 efx->n_link_state_changes++; 255 256 if (link_state->up) 257 netif_carrier_on(efx->net_dev); 258 else 259 netif_carrier_off(efx->net_dev); 260 } 261 262 /* Status message for kernel log */ 263 if (link_state->up) 264 netif_info(efx, link, efx->net_dev, 265 "link up at %uMbps %s-duplex (MTU %d)\n", 266 link_state->speed, link_state->fd ? "full" : "half", 267 efx->net_dev->mtu); 268 else 269 netif_info(efx, link, efx->net_dev, "link down\n"); 270 } 271 272 unsigned int efx_xdp_max_mtu(struct efx_nic *efx) 273 { 274 /* The maximum MTU that we can fit in a single page, allowing for 275 * framing, overhead and XDP headroom + tailroom. 276 */ 277 int overhead = EFX_MAX_FRAME_LEN(0) + sizeof(struct efx_rx_page_state) + 278 efx->rx_prefix_size + efx->type->rx_buffer_padding + 279 efx->rx_ip_align + EFX_XDP_HEADROOM + EFX_XDP_TAILROOM; 280 281 return PAGE_SIZE - overhead; 282 } 283 284 /* Context: process, rtnl_lock() held. */ 285 int efx_change_mtu(struct net_device *net_dev, int new_mtu) 286 { 287 struct efx_nic *efx = netdev_priv(net_dev); 288 int rc; 289 290 rc = efx_check_disabled(efx); 291 if (rc) 292 return rc; 293 294 if (rtnl_dereference(efx->xdp_prog) && 295 new_mtu > efx_xdp_max_mtu(efx)) { 296 netif_err(efx, drv, efx->net_dev, 297 "Requested MTU of %d too big for XDP (max: %d)\n", 298 new_mtu, efx_xdp_max_mtu(efx)); 299 return -EINVAL; 300 } 301 302 netif_dbg(efx, drv, efx->net_dev, "changing MTU to %d\n", new_mtu); 303 304 efx_device_detach_sync(efx); 305 efx_stop_all(efx); 306 307 mutex_lock(&efx->mac_lock); 308 net_dev->mtu = new_mtu; 309 efx_mac_reconfigure(efx, true); 310 mutex_unlock(&efx->mac_lock); 311 312 efx_start_all(efx); 313 efx_device_attach_if_not_resetting(efx); 314 return 0; 315 } 316 317 /************************************************************************** 318 * 319 * Hardware monitor 320 * 321 **************************************************************************/ 322 323 /* Run periodically off the general workqueue */ 324 static void efx_monitor(struct work_struct *data) 325 { 326 struct efx_nic *efx = container_of(data, struct efx_nic, 327 monitor_work.work); 328 329 netif_vdbg(efx, timer, efx->net_dev, 330 "hardware monitor executing on CPU %d\n", 331 raw_smp_processor_id()); 332 BUG_ON(efx->type->monitor == NULL); 333 334 /* If the mac_lock is already held then it is likely a port 335 * reconfiguration is already in place, which will likely do 336 * most of the work of monitor() anyway. 337 */ 338 if (mutex_trylock(&efx->mac_lock)) { 339 if (efx->port_enabled && efx->type->monitor) 340 efx->type->monitor(efx); 341 mutex_unlock(&efx->mac_lock); 342 } 343 344 efx_start_monitor(efx); 345 } 346 347 void efx_start_monitor(struct efx_nic *efx) 348 { 349 if (efx->type->monitor) 350 queue_delayed_work(efx->workqueue, &efx->monitor_work, 351 efx_monitor_interval); 352 } 353 354 /************************************************************************** 355 * 356 * Event queue processing 357 * 358 *************************************************************************/ 359 360 /* Channels are shutdown and reinitialised whilst the NIC is running 361 * to propagate configuration changes (mtu, checksum offload), or 362 * to clear hardware error conditions 363 */ 364 static void efx_start_datapath(struct efx_nic *efx) 365 { 366 netdev_features_t old_features = efx->net_dev->features; 367 bool old_rx_scatter = efx->rx_scatter; 368 size_t rx_buf_len; 369 370 /* Calculate the rx buffer allocation parameters required to 371 * support the current MTU, including padding for header 372 * alignment and overruns. 373 */ 374 efx->rx_dma_len = (efx->rx_prefix_size + 375 EFX_MAX_FRAME_LEN(efx->net_dev->mtu) + 376 efx->type->rx_buffer_padding); 377 rx_buf_len = (sizeof(struct efx_rx_page_state) + EFX_XDP_HEADROOM + 378 efx->rx_ip_align + efx->rx_dma_len + EFX_XDP_TAILROOM); 379 380 if (rx_buf_len <= PAGE_SIZE) { 381 efx->rx_scatter = efx->type->always_rx_scatter; 382 efx->rx_buffer_order = 0; 383 } else if (efx->type->can_rx_scatter) { 384 BUILD_BUG_ON(EFX_RX_USR_BUF_SIZE % L1_CACHE_BYTES); 385 BUILD_BUG_ON(sizeof(struct efx_rx_page_state) + 386 2 * ALIGN(NET_IP_ALIGN + EFX_RX_USR_BUF_SIZE, 387 EFX_RX_BUF_ALIGNMENT) > 388 PAGE_SIZE); 389 efx->rx_scatter = true; 390 efx->rx_dma_len = EFX_RX_USR_BUF_SIZE; 391 efx->rx_buffer_order = 0; 392 } else { 393 efx->rx_scatter = false; 394 efx->rx_buffer_order = get_order(rx_buf_len); 395 } 396 397 efx_rx_config_page_split(efx); 398 if (efx->rx_buffer_order) 399 netif_dbg(efx, drv, efx->net_dev, 400 "RX buf len=%u; page order=%u batch=%u\n", 401 efx->rx_dma_len, efx->rx_buffer_order, 402 efx->rx_pages_per_batch); 403 else 404 netif_dbg(efx, drv, efx->net_dev, 405 "RX buf len=%u step=%u bpp=%u; page batch=%u\n", 406 efx->rx_dma_len, efx->rx_page_buf_step, 407 efx->rx_bufs_per_page, efx->rx_pages_per_batch); 408 409 /* Restore previously fixed features in hw_features and remove 410 * features which are fixed now 411 */ 412 efx->net_dev->hw_features |= efx->net_dev->features; 413 efx->net_dev->hw_features &= ~efx->fixed_features; 414 efx->net_dev->features |= efx->fixed_features; 415 if (efx->net_dev->features != old_features) 416 netdev_features_change(efx->net_dev); 417 418 /* RX filters may also have scatter-enabled flags */ 419 if ((efx->rx_scatter != old_rx_scatter) && 420 efx->type->filter_update_rx_scatter) 421 efx->type->filter_update_rx_scatter(efx); 422 423 /* We must keep at least one descriptor in a TX ring empty. 424 * We could avoid this when the queue size does not exactly 425 * match the hardware ring size, but it's not that important. 426 * Therefore we stop the queue when one more skb might fill 427 * the ring completely. We wake it when half way back to 428 * empty. 429 */ 430 efx->txq_stop_thresh = efx->txq_entries - efx_tx_max_skb_descs(efx); 431 efx->txq_wake_thresh = efx->txq_stop_thresh / 2; 432 433 /* Initialise the channels */ 434 efx_start_channels(efx); 435 436 efx_ptp_start_datapath(efx); 437 438 if (netif_device_present(efx->net_dev)) 439 netif_tx_wake_all_queues(efx->net_dev); 440 } 441 442 static void efx_stop_datapath(struct efx_nic *efx) 443 { 444 EFX_ASSERT_RESET_SERIALISED(efx); 445 BUG_ON(efx->port_enabled); 446 447 efx_ptp_stop_datapath(efx); 448 449 efx_stop_channels(efx); 450 } 451 452 /************************************************************************** 453 * 454 * Port handling 455 * 456 **************************************************************************/ 457 458 /* Equivalent to efx_link_set_advertising with all-zeroes, except does not 459 * force the Autoneg bit on. 460 */ 461 void efx_link_clear_advertising(struct efx_nic *efx) 462 { 463 bitmap_zero(efx->link_advertising, __ETHTOOL_LINK_MODE_MASK_NBITS); 464 efx->wanted_fc &= ~(EFX_FC_TX | EFX_FC_RX); 465 } 466 467 void efx_link_set_wanted_fc(struct efx_nic *efx, u8 wanted_fc) 468 { 469 efx->wanted_fc = wanted_fc; 470 if (efx->link_advertising[0]) { 471 if (wanted_fc & EFX_FC_RX) 472 efx->link_advertising[0] |= (ADVERTISED_Pause | 473 ADVERTISED_Asym_Pause); 474 else 475 efx->link_advertising[0] &= ~(ADVERTISED_Pause | 476 ADVERTISED_Asym_Pause); 477 if (wanted_fc & EFX_FC_TX) 478 efx->link_advertising[0] ^= ADVERTISED_Asym_Pause; 479 } 480 } 481 482 static void efx_start_port(struct efx_nic *efx) 483 { 484 netif_dbg(efx, ifup, efx->net_dev, "start port\n"); 485 BUG_ON(efx->port_enabled); 486 487 mutex_lock(&efx->mac_lock); 488 efx->port_enabled = true; 489 490 /* Ensure MAC ingress/egress is enabled */ 491 efx_mac_reconfigure(efx, false); 492 493 mutex_unlock(&efx->mac_lock); 494 } 495 496 /* Cancel work for MAC reconfiguration, periodic hardware monitoring 497 * and the async self-test, wait for them to finish and prevent them 498 * being scheduled again. This doesn't cover online resets, which 499 * should only be cancelled when removing the device. 500 */ 501 static void efx_stop_port(struct efx_nic *efx) 502 { 503 netif_dbg(efx, ifdown, efx->net_dev, "stop port\n"); 504 505 EFX_ASSERT_RESET_SERIALISED(efx); 506 507 mutex_lock(&efx->mac_lock); 508 efx->port_enabled = false; 509 mutex_unlock(&efx->mac_lock); 510 511 /* Serialise against efx_set_multicast_list() */ 512 netif_addr_lock_bh(efx->net_dev); 513 netif_addr_unlock_bh(efx->net_dev); 514 515 cancel_delayed_work_sync(&efx->monitor_work); 516 efx_selftest_async_cancel(efx); 517 cancel_work_sync(&efx->mac_work); 518 } 519 520 /* If the interface is supposed to be running but is not, start 521 * the hardware and software data path, regular activity for the port 522 * (MAC statistics, link polling, etc.) and schedule the port to be 523 * reconfigured. Interrupts must already be enabled. This function 524 * is safe to call multiple times, so long as the NIC is not disabled. 525 * Requires the RTNL lock. 526 */ 527 void efx_start_all(struct efx_nic *efx) 528 { 529 EFX_ASSERT_RESET_SERIALISED(efx); 530 BUG_ON(efx->state == STATE_DISABLED); 531 532 /* Check that it is appropriate to restart the interface. All 533 * of these flags are safe to read under just the rtnl lock 534 */ 535 if (efx->port_enabled || !netif_running(efx->net_dev) || 536 efx->reset_pending) 537 return; 538 539 efx_start_port(efx); 540 efx_start_datapath(efx); 541 542 /* Start the hardware monitor if there is one */ 543 efx_start_monitor(efx); 544 545 /* Link state detection is normally event-driven; we have 546 * to poll now because we could have missed a change 547 */ 548 mutex_lock(&efx->mac_lock); 549 if (efx_mcdi_phy_poll(efx)) 550 efx_link_status_changed(efx); 551 mutex_unlock(&efx->mac_lock); 552 553 if (efx->type->start_stats) { 554 efx->type->start_stats(efx); 555 efx->type->pull_stats(efx); 556 spin_lock_bh(&efx->stats_lock); 557 efx->type->update_stats(efx, NULL, NULL); 558 spin_unlock_bh(&efx->stats_lock); 559 } 560 } 561 562 /* Quiesce the hardware and software data path, and regular activity 563 * for the port without bringing the link down. Safe to call multiple 564 * times with the NIC in almost any state, but interrupts should be 565 * enabled. Requires the RTNL lock. 566 */ 567 void efx_stop_all(struct efx_nic *efx) 568 { 569 EFX_ASSERT_RESET_SERIALISED(efx); 570 571 /* port_enabled can be read safely under the rtnl lock */ 572 if (!efx->port_enabled) 573 return; 574 575 if (efx->type->update_stats) { 576 /* update stats before we go down so we can accurately count 577 * rx_nodesc_drops 578 */ 579 efx->type->pull_stats(efx); 580 spin_lock_bh(&efx->stats_lock); 581 efx->type->update_stats(efx, NULL, NULL); 582 spin_unlock_bh(&efx->stats_lock); 583 efx->type->stop_stats(efx); 584 } 585 586 efx_stop_port(efx); 587 588 /* Stop the kernel transmit interface. This is only valid if 589 * the device is stopped or detached; otherwise the watchdog 590 * may fire immediately. 591 */ 592 WARN_ON(netif_running(efx->net_dev) && 593 netif_device_present(efx->net_dev)); 594 netif_tx_disable(efx->net_dev); 595 596 efx_stop_datapath(efx); 597 } 598 599 /* Context: process, dev_base_lock or RTNL held, non-blocking. */ 600 void efx_net_stats(struct net_device *net_dev, struct rtnl_link_stats64 *stats) 601 { 602 struct efx_nic *efx = netdev_priv(net_dev); 603 604 spin_lock_bh(&efx->stats_lock); 605 efx_nic_update_stats_atomic(efx, NULL, stats); 606 spin_unlock_bh(&efx->stats_lock); 607 } 608 609 /* Push loopback/power/transmit disable settings to the PHY, and reconfigure 610 * the MAC appropriately. All other PHY configuration changes are pushed 611 * through phy_op->set_settings(), and pushed asynchronously to the MAC 612 * through efx_monitor(). 613 * 614 * Callers must hold the mac_lock 615 */ 616 int __efx_reconfigure_port(struct efx_nic *efx) 617 { 618 enum efx_phy_mode phy_mode; 619 int rc = 0; 620 621 WARN_ON(!mutex_is_locked(&efx->mac_lock)); 622 623 /* Disable PHY transmit in mac level loopbacks */ 624 phy_mode = efx->phy_mode; 625 if (LOOPBACK_INTERNAL(efx)) 626 efx->phy_mode |= PHY_MODE_TX_DISABLED; 627 else 628 efx->phy_mode &= ~PHY_MODE_TX_DISABLED; 629 630 if (efx->type->reconfigure_port) 631 rc = efx->type->reconfigure_port(efx); 632 633 if (rc) 634 efx->phy_mode = phy_mode; 635 636 return rc; 637 } 638 639 /* Reinitialise the MAC to pick up new PHY settings, even if the port is 640 * disabled. 641 */ 642 int efx_reconfigure_port(struct efx_nic *efx) 643 { 644 int rc; 645 646 EFX_ASSERT_RESET_SERIALISED(efx); 647 648 mutex_lock(&efx->mac_lock); 649 rc = __efx_reconfigure_port(efx); 650 mutex_unlock(&efx->mac_lock); 651 652 return rc; 653 } 654 655 /************************************************************************** 656 * 657 * Device reset and suspend 658 * 659 **************************************************************************/ 660 661 static void efx_wait_for_bist_end(struct efx_nic *efx) 662 { 663 int i; 664 665 for (i = 0; i < BIST_WAIT_DELAY_COUNT; ++i) { 666 if (efx_mcdi_poll_reboot(efx)) 667 goto out; 668 msleep(BIST_WAIT_DELAY_MS); 669 } 670 671 netif_err(efx, drv, efx->net_dev, "Warning: No MC reboot after BIST mode\n"); 672 out: 673 /* Either way unset the BIST flag. If we found no reboot we probably 674 * won't recover, but we should try. 675 */ 676 efx->mc_bist_for_other_fn = false; 677 } 678 679 /* Try recovery mechanisms. 680 * For now only EEH is supported. 681 * Returns 0 if the recovery mechanisms are unsuccessful. 682 * Returns a non-zero value otherwise. 683 */ 684 int efx_try_recovery(struct efx_nic *efx) 685 { 686 #ifdef CONFIG_EEH 687 /* A PCI error can occur and not be seen by EEH because nothing 688 * happens on the PCI bus. In this case the driver may fail and 689 * schedule a 'recover or reset', leading to this recovery handler. 690 * Manually call the eeh failure check function. 691 */ 692 struct eeh_dev *eehdev = pci_dev_to_eeh_dev(efx->pci_dev); 693 if (eeh_dev_check_failure(eehdev)) { 694 /* The EEH mechanisms will handle the error and reset the 695 * device if necessary. 696 */ 697 return 1; 698 } 699 #endif 700 return 0; 701 } 702 703 /* Tears down the entire software state and most of the hardware state 704 * before reset. 705 */ 706 void efx_reset_down(struct efx_nic *efx, enum reset_type method) 707 { 708 EFX_ASSERT_RESET_SERIALISED(efx); 709 710 if (method == RESET_TYPE_MCDI_TIMEOUT) 711 efx->type->prepare_flr(efx); 712 713 efx_stop_all(efx); 714 efx_disable_interrupts(efx); 715 716 mutex_lock(&efx->mac_lock); 717 down_write(&efx->filter_sem); 718 mutex_lock(&efx->rss_lock); 719 efx->type->fini(efx); 720 } 721 722 /* Context: netif_tx_lock held, BHs disabled. */ 723 void efx_watchdog(struct net_device *net_dev, unsigned int txqueue) 724 { 725 struct efx_nic *efx = netdev_priv(net_dev); 726 727 netif_err(efx, tx_err, efx->net_dev, 728 "TX stuck with port_enabled=%d: resetting channels\n", 729 efx->port_enabled); 730 731 efx_schedule_reset(efx, RESET_TYPE_TX_WATCHDOG); 732 } 733 734 /* This function will always ensure that the locks acquired in 735 * efx_reset_down() are released. A failure return code indicates 736 * that we were unable to reinitialise the hardware, and the 737 * driver should be disabled. If ok is false, then the rx and tx 738 * engines are not restarted, pending a RESET_DISABLE. 739 */ 740 int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok) 741 { 742 int rc; 743 744 EFX_ASSERT_RESET_SERIALISED(efx); 745 746 if (method == RESET_TYPE_MCDI_TIMEOUT) 747 efx->type->finish_flr(efx); 748 749 /* Ensure that SRAM is initialised even if we're disabling the device */ 750 rc = efx->type->init(efx); 751 if (rc) { 752 netif_err(efx, drv, efx->net_dev, "failed to initialise NIC\n"); 753 goto fail; 754 } 755 756 if (!ok) 757 goto fail; 758 759 if (efx->port_initialized && method != RESET_TYPE_INVISIBLE && 760 method != RESET_TYPE_DATAPATH) { 761 rc = efx_mcdi_port_reconfigure(efx); 762 if (rc && rc != -EPERM) 763 netif_err(efx, drv, efx->net_dev, 764 "could not restore PHY settings\n"); 765 } 766 767 rc = efx_enable_interrupts(efx); 768 if (rc) 769 goto fail; 770 771 #ifdef CONFIG_SFC_SRIOV 772 rc = efx->type->vswitching_restore(efx); 773 if (rc) /* not fatal; the PF will still work fine */ 774 netif_warn(efx, probe, efx->net_dev, 775 "failed to restore vswitching rc=%d;" 776 " VFs may not function\n", rc); 777 #endif 778 779 if (efx->type->rx_restore_rss_contexts) 780 efx->type->rx_restore_rss_contexts(efx); 781 mutex_unlock(&efx->rss_lock); 782 efx->type->filter_table_restore(efx); 783 up_write(&efx->filter_sem); 784 if (efx->type->sriov_reset) 785 efx->type->sriov_reset(efx); 786 787 mutex_unlock(&efx->mac_lock); 788 789 efx_start_all(efx); 790 791 if (efx->type->udp_tnl_push_ports) 792 efx->type->udp_tnl_push_ports(efx); 793 794 return 0; 795 796 fail: 797 efx->port_initialized = false; 798 799 mutex_unlock(&efx->rss_lock); 800 up_write(&efx->filter_sem); 801 mutex_unlock(&efx->mac_lock); 802 803 return rc; 804 } 805 806 /* Reset the NIC using the specified method. Note that the reset may 807 * fail, in which case the card will be left in an unusable state. 808 * 809 * Caller must hold the rtnl_lock. 810 */ 811 int efx_reset(struct efx_nic *efx, enum reset_type method) 812 { 813 int rc, rc2 = 0; 814 bool disabled; 815 816 netif_info(efx, drv, efx->net_dev, "resetting (%s)\n", 817 RESET_TYPE(method)); 818 819 efx_device_detach_sync(efx); 820 /* efx_reset_down() grabs locks that prevent recovery on EF100. 821 * EF100 reset is handled in the efx_nic_type callback below. 822 */ 823 if (efx_nic_rev(efx) != EFX_REV_EF100) 824 efx_reset_down(efx, method); 825 826 rc = efx->type->reset(efx, method); 827 if (rc) { 828 netif_err(efx, drv, efx->net_dev, "failed to reset hardware\n"); 829 goto out; 830 } 831 832 /* Clear flags for the scopes we covered. We assume the NIC and 833 * driver are now quiescent so that there is no race here. 834 */ 835 if (method < RESET_TYPE_MAX_METHOD) 836 efx->reset_pending &= -(1 << (method + 1)); 837 else /* it doesn't fit into the well-ordered scope hierarchy */ 838 __clear_bit(method, &efx->reset_pending); 839 840 /* Reinitialise bus-mastering, which may have been turned off before 841 * the reset was scheduled. This is still appropriate, even in the 842 * RESET_TYPE_DISABLE since this driver generally assumes the hardware 843 * can respond to requests. 844 */ 845 pci_set_master(efx->pci_dev); 846 847 out: 848 /* Leave device stopped if necessary */ 849 disabled = rc || 850 method == RESET_TYPE_DISABLE || 851 method == RESET_TYPE_RECOVER_OR_DISABLE; 852 if (efx_nic_rev(efx) != EFX_REV_EF100) 853 rc2 = efx_reset_up(efx, method, !disabled); 854 if (rc2) { 855 disabled = true; 856 if (!rc) 857 rc = rc2; 858 } 859 860 if (disabled) { 861 dev_close(efx->net_dev); 862 netif_err(efx, drv, efx->net_dev, "has been disabled\n"); 863 efx->state = STATE_DISABLED; 864 } else { 865 netif_dbg(efx, drv, efx->net_dev, "reset complete\n"); 866 efx_device_attach_if_not_resetting(efx); 867 } 868 return rc; 869 } 870 871 /* The worker thread exists so that code that cannot sleep can 872 * schedule a reset for later. 873 */ 874 static void efx_reset_work(struct work_struct *data) 875 { 876 struct efx_nic *efx = container_of(data, struct efx_nic, reset_work); 877 unsigned long pending; 878 enum reset_type method; 879 880 pending = READ_ONCE(efx->reset_pending); 881 method = fls(pending) - 1; 882 883 if (method == RESET_TYPE_MC_BIST) 884 efx_wait_for_bist_end(efx); 885 886 if ((method == RESET_TYPE_RECOVER_OR_DISABLE || 887 method == RESET_TYPE_RECOVER_OR_ALL) && 888 efx_try_recovery(efx)) 889 return; 890 891 if (!pending) 892 return; 893 894 rtnl_lock(); 895 896 /* We checked the state in efx_schedule_reset() but it may 897 * have changed by now. Now that we have the RTNL lock, 898 * it cannot change again. 899 */ 900 if (efx->state == STATE_READY) 901 (void)efx_reset(efx, method); 902 903 rtnl_unlock(); 904 } 905 906 void efx_schedule_reset(struct efx_nic *efx, enum reset_type type) 907 { 908 enum reset_type method; 909 910 if (efx->state == STATE_RECOVERY) { 911 netif_dbg(efx, drv, efx->net_dev, 912 "recovering: skip scheduling %s reset\n", 913 RESET_TYPE(type)); 914 return; 915 } 916 917 switch (type) { 918 case RESET_TYPE_INVISIBLE: 919 case RESET_TYPE_ALL: 920 case RESET_TYPE_RECOVER_OR_ALL: 921 case RESET_TYPE_WORLD: 922 case RESET_TYPE_DISABLE: 923 case RESET_TYPE_RECOVER_OR_DISABLE: 924 case RESET_TYPE_DATAPATH: 925 case RESET_TYPE_MC_BIST: 926 case RESET_TYPE_MCDI_TIMEOUT: 927 method = type; 928 netif_dbg(efx, drv, efx->net_dev, "scheduling %s reset\n", 929 RESET_TYPE(method)); 930 break; 931 default: 932 method = efx->type->map_reset_reason(type); 933 netif_dbg(efx, drv, efx->net_dev, 934 "scheduling %s reset for %s\n", 935 RESET_TYPE(method), RESET_TYPE(type)); 936 break; 937 } 938 939 set_bit(method, &efx->reset_pending); 940 smp_mb(); /* ensure we change reset_pending before checking state */ 941 942 /* If we're not READY then just leave the flags set as the cue 943 * to abort probing or reschedule the reset later. 944 */ 945 if (READ_ONCE(efx->state) != STATE_READY) 946 return; 947 948 /* efx_process_channel() will no longer read events once a 949 * reset is scheduled. So switch back to poll'd MCDI completions. 950 */ 951 efx_mcdi_mode_poll(efx); 952 953 efx_queue_reset_work(efx); 954 } 955 956 /************************************************************************** 957 * 958 * Dummy NIC operations 959 * 960 * Can be used for some unimplemented operations 961 * Needed so all function pointers are valid and do not have to be tested 962 * before use 963 * 964 **************************************************************************/ 965 int efx_port_dummy_op_int(struct efx_nic *efx) 966 { 967 return 0; 968 } 969 void efx_port_dummy_op_void(struct efx_nic *efx) {} 970 971 /************************************************************************** 972 * 973 * Data housekeeping 974 * 975 **************************************************************************/ 976 977 /* This zeroes out and then fills in the invariants in a struct 978 * efx_nic (including all sub-structures). 979 */ 980 int efx_init_struct(struct efx_nic *efx, 981 struct pci_dev *pci_dev, struct net_device *net_dev) 982 { 983 int rc = -ENOMEM; 984 985 /* Initialise common structures */ 986 INIT_LIST_HEAD(&efx->node); 987 INIT_LIST_HEAD(&efx->secondary_list); 988 spin_lock_init(&efx->biu_lock); 989 #ifdef CONFIG_SFC_MTD 990 INIT_LIST_HEAD(&efx->mtd_list); 991 #endif 992 INIT_WORK(&efx->reset_work, efx_reset_work); 993 INIT_DELAYED_WORK(&efx->monitor_work, efx_monitor); 994 efx_selftest_async_init(efx); 995 efx->pci_dev = pci_dev; 996 efx->msg_enable = debug; 997 efx->state = STATE_UNINIT; 998 strlcpy(efx->name, pci_name(pci_dev), sizeof(efx->name)); 999 1000 efx->net_dev = net_dev; 1001 efx->rx_prefix_size = efx->type->rx_prefix_size; 1002 efx->rx_ip_align = 1003 NET_IP_ALIGN ? (efx->rx_prefix_size + NET_IP_ALIGN) % 4 : 0; 1004 efx->rx_packet_hash_offset = 1005 efx->type->rx_hash_offset - efx->type->rx_prefix_size; 1006 efx->rx_packet_ts_offset = 1007 efx->type->rx_ts_offset - efx->type->rx_prefix_size; 1008 INIT_LIST_HEAD(&efx->rss_context.list); 1009 efx->rss_context.context_id = EFX_MCDI_RSS_CONTEXT_INVALID; 1010 mutex_init(&efx->rss_lock); 1011 efx->vport_id = EVB_PORT_ID_ASSIGNED; 1012 spin_lock_init(&efx->stats_lock); 1013 efx->vi_stride = EFX_DEFAULT_VI_STRIDE; 1014 efx->num_mac_stats = MC_CMD_MAC_NSTATS; 1015 BUILD_BUG_ON(MC_CMD_MAC_NSTATS - 1 != MC_CMD_MAC_GENERATION_END); 1016 mutex_init(&efx->mac_lock); 1017 init_rwsem(&efx->filter_sem); 1018 #ifdef CONFIG_RFS_ACCEL 1019 mutex_init(&efx->rps_mutex); 1020 spin_lock_init(&efx->rps_hash_lock); 1021 /* Failure to allocate is not fatal, but may degrade ARFS performance */ 1022 efx->rps_hash_table = kcalloc(EFX_ARFS_HASH_TABLE_SIZE, 1023 sizeof(*efx->rps_hash_table), GFP_KERNEL); 1024 #endif 1025 efx->mdio.dev = net_dev; 1026 INIT_WORK(&efx->mac_work, efx_mac_work); 1027 init_waitqueue_head(&efx->flush_wq); 1028 1029 efx->tx_queues_per_channel = 1; 1030 efx->rxq_entries = EFX_DEFAULT_DMAQ_SIZE; 1031 efx->txq_entries = EFX_DEFAULT_DMAQ_SIZE; 1032 1033 efx->mem_bar = UINT_MAX; 1034 1035 rc = efx_init_channels(efx); 1036 if (rc) 1037 goto fail; 1038 1039 /* Would be good to use the net_dev name, but we're too early */ 1040 snprintf(efx->workqueue_name, sizeof(efx->workqueue_name), "sfc%s", 1041 pci_name(pci_dev)); 1042 efx->workqueue = create_singlethread_workqueue(efx->workqueue_name); 1043 if (!efx->workqueue) { 1044 rc = -ENOMEM; 1045 goto fail; 1046 } 1047 1048 return 0; 1049 1050 fail: 1051 efx_fini_struct(efx); 1052 return rc; 1053 } 1054 1055 void efx_fini_struct(struct efx_nic *efx) 1056 { 1057 #ifdef CONFIG_RFS_ACCEL 1058 kfree(efx->rps_hash_table); 1059 #endif 1060 1061 efx_fini_channels(efx); 1062 1063 kfree(efx->vpd_sn); 1064 1065 if (efx->workqueue) { 1066 destroy_workqueue(efx->workqueue); 1067 efx->workqueue = NULL; 1068 } 1069 } 1070 1071 /* This configures the PCI device to enable I/O and DMA. */ 1072 int efx_init_io(struct efx_nic *efx, int bar, dma_addr_t dma_mask, 1073 unsigned int mem_map_size) 1074 { 1075 struct pci_dev *pci_dev = efx->pci_dev; 1076 int rc; 1077 1078 efx->mem_bar = UINT_MAX; 1079 1080 netif_dbg(efx, probe, efx->net_dev, "initialising I/O bar=%d\n", bar); 1081 1082 rc = pci_enable_device(pci_dev); 1083 if (rc) { 1084 netif_err(efx, probe, efx->net_dev, 1085 "failed to enable PCI device\n"); 1086 goto fail1; 1087 } 1088 1089 pci_set_master(pci_dev); 1090 1091 rc = dma_set_mask_and_coherent(&pci_dev->dev, dma_mask); 1092 if (rc) { 1093 netif_err(efx, probe, efx->net_dev, 1094 "could not find a suitable DMA mask\n"); 1095 goto fail2; 1096 } 1097 netif_dbg(efx, probe, efx->net_dev, 1098 "using DMA mask %llx\n", (unsigned long long)dma_mask); 1099 1100 efx->membase_phys = pci_resource_start(efx->pci_dev, bar); 1101 if (!efx->membase_phys) { 1102 netif_err(efx, probe, efx->net_dev, 1103 "ERROR: No BAR%d mapping from the BIOS. " 1104 "Try pci=realloc on the kernel command line\n", bar); 1105 rc = -ENODEV; 1106 goto fail3; 1107 } 1108 1109 rc = pci_request_region(pci_dev, bar, "sfc"); 1110 if (rc) { 1111 netif_err(efx, probe, efx->net_dev, 1112 "request for memory BAR[%d] failed\n", bar); 1113 rc = -EIO; 1114 goto fail3; 1115 } 1116 efx->mem_bar = bar; 1117 efx->membase = ioremap(efx->membase_phys, mem_map_size); 1118 if (!efx->membase) { 1119 netif_err(efx, probe, efx->net_dev, 1120 "could not map memory BAR[%d] at %llx+%x\n", bar, 1121 (unsigned long long)efx->membase_phys, mem_map_size); 1122 rc = -ENOMEM; 1123 goto fail4; 1124 } 1125 netif_dbg(efx, probe, efx->net_dev, 1126 "memory BAR[%d] at %llx+%x (virtual %p)\n", bar, 1127 (unsigned long long)efx->membase_phys, mem_map_size, 1128 efx->membase); 1129 1130 return 0; 1131 1132 fail4: 1133 pci_release_region(efx->pci_dev, bar); 1134 fail3: 1135 efx->membase_phys = 0; 1136 fail2: 1137 pci_disable_device(efx->pci_dev); 1138 fail1: 1139 return rc; 1140 } 1141 1142 void efx_fini_io(struct efx_nic *efx) 1143 { 1144 netif_dbg(efx, drv, efx->net_dev, "shutting down I/O\n"); 1145 1146 if (efx->membase) { 1147 iounmap(efx->membase); 1148 efx->membase = NULL; 1149 } 1150 1151 if (efx->membase_phys) { 1152 pci_release_region(efx->pci_dev, efx->mem_bar); 1153 efx->membase_phys = 0; 1154 efx->mem_bar = UINT_MAX; 1155 } 1156 1157 /* Don't disable bus-mastering if VFs are assigned */ 1158 if (!pci_vfs_assigned(efx->pci_dev)) 1159 pci_disable_device(efx->pci_dev); 1160 } 1161 1162 #ifdef CONFIG_SFC_MCDI_LOGGING 1163 static ssize_t mcdi_logging_show(struct device *dev, 1164 struct device_attribute *attr, 1165 char *buf) 1166 { 1167 struct efx_nic *efx = dev_get_drvdata(dev); 1168 struct efx_mcdi_iface *mcdi = efx_mcdi(efx); 1169 1170 return scnprintf(buf, PAGE_SIZE, "%d\n", mcdi->logging_enabled); 1171 } 1172 1173 static ssize_t mcdi_logging_store(struct device *dev, 1174 struct device_attribute *attr, 1175 const char *buf, size_t count) 1176 { 1177 struct efx_nic *efx = dev_get_drvdata(dev); 1178 struct efx_mcdi_iface *mcdi = efx_mcdi(efx); 1179 bool enable = count > 0 && *buf != '0'; 1180 1181 mcdi->logging_enabled = enable; 1182 return count; 1183 } 1184 1185 static DEVICE_ATTR_RW(mcdi_logging); 1186 1187 void efx_init_mcdi_logging(struct efx_nic *efx) 1188 { 1189 int rc = device_create_file(&efx->pci_dev->dev, &dev_attr_mcdi_logging); 1190 1191 if (rc) { 1192 netif_warn(efx, drv, efx->net_dev, 1193 "failed to init net dev attributes\n"); 1194 } 1195 } 1196 1197 void efx_fini_mcdi_logging(struct efx_nic *efx) 1198 { 1199 device_remove_file(&efx->pci_dev->dev, &dev_attr_mcdi_logging); 1200 } 1201 #endif 1202 1203 /* A PCI error affecting this device was detected. 1204 * At this point MMIO and DMA may be disabled. 1205 * Stop the software path and request a slot reset. 1206 */ 1207 static pci_ers_result_t efx_io_error_detected(struct pci_dev *pdev, 1208 pci_channel_state_t state) 1209 { 1210 pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED; 1211 struct efx_nic *efx = pci_get_drvdata(pdev); 1212 1213 if (state == pci_channel_io_perm_failure) 1214 return PCI_ERS_RESULT_DISCONNECT; 1215 1216 rtnl_lock(); 1217 1218 if (efx->state != STATE_DISABLED) { 1219 efx->state = STATE_RECOVERY; 1220 efx->reset_pending = 0; 1221 1222 efx_device_detach_sync(efx); 1223 1224 efx_stop_all(efx); 1225 efx_disable_interrupts(efx); 1226 1227 status = PCI_ERS_RESULT_NEED_RESET; 1228 } else { 1229 /* If the interface is disabled we don't want to do anything 1230 * with it. 1231 */ 1232 status = PCI_ERS_RESULT_RECOVERED; 1233 } 1234 1235 rtnl_unlock(); 1236 1237 pci_disable_device(pdev); 1238 1239 return status; 1240 } 1241 1242 /* Fake a successful reset, which will be performed later in efx_io_resume. */ 1243 static pci_ers_result_t efx_io_slot_reset(struct pci_dev *pdev) 1244 { 1245 struct efx_nic *efx = pci_get_drvdata(pdev); 1246 pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED; 1247 1248 if (pci_enable_device(pdev)) { 1249 netif_err(efx, hw, efx->net_dev, 1250 "Cannot re-enable PCI device after reset.\n"); 1251 status = PCI_ERS_RESULT_DISCONNECT; 1252 } 1253 1254 return status; 1255 } 1256 1257 /* Perform the actual reset and resume I/O operations. */ 1258 static void efx_io_resume(struct pci_dev *pdev) 1259 { 1260 struct efx_nic *efx = pci_get_drvdata(pdev); 1261 int rc; 1262 1263 rtnl_lock(); 1264 1265 if (efx->state == STATE_DISABLED) 1266 goto out; 1267 1268 rc = efx_reset(efx, RESET_TYPE_ALL); 1269 if (rc) { 1270 netif_err(efx, hw, efx->net_dev, 1271 "efx_reset failed after PCI error (%d)\n", rc); 1272 } else { 1273 efx->state = STATE_READY; 1274 netif_dbg(efx, hw, efx->net_dev, 1275 "Done resetting and resuming IO after PCI error.\n"); 1276 } 1277 1278 out: 1279 rtnl_unlock(); 1280 } 1281 1282 /* For simplicity and reliability, we always require a slot reset and try to 1283 * reset the hardware when a pci error affecting the device is detected. 1284 * We leave both the link_reset and mmio_enabled callback unimplemented: 1285 * with our request for slot reset the mmio_enabled callback will never be 1286 * called, and the link_reset callback is not used by AER or EEH mechanisms. 1287 */ 1288 const struct pci_error_handlers efx_err_handlers = { 1289 .error_detected = efx_io_error_detected, 1290 .slot_reset = efx_io_slot_reset, 1291 .resume = efx_io_resume, 1292 }; 1293 1294 /* Determine whether the NIC will be able to handle TX offloads for a given 1295 * encapsulated packet. 1296 */ 1297 static bool efx_can_encap_offloads(struct efx_nic *efx, struct sk_buff *skb) 1298 { 1299 struct gre_base_hdr *greh; 1300 __be16 dst_port; 1301 u8 ipproto; 1302 1303 /* Does the NIC support encap offloads? 1304 * If not, we should never get here, because we shouldn't have 1305 * advertised encap offload feature flags in the first place. 1306 */ 1307 if (WARN_ON_ONCE(!efx->type->udp_tnl_has_port)) 1308 return false; 1309 1310 /* Determine encapsulation protocol in use */ 1311 switch (skb->protocol) { 1312 case htons(ETH_P_IP): 1313 ipproto = ip_hdr(skb)->protocol; 1314 break; 1315 case htons(ETH_P_IPV6): 1316 /* If there are extension headers, this will cause us to 1317 * think we can't offload something that we maybe could have. 1318 */ 1319 ipproto = ipv6_hdr(skb)->nexthdr; 1320 break; 1321 default: 1322 /* Not IP, so can't offload it */ 1323 return false; 1324 } 1325 switch (ipproto) { 1326 case IPPROTO_GRE: 1327 /* We support NVGRE but not IP over GRE or random gretaps. 1328 * Specifically, the NIC will accept GRE as encapsulated if 1329 * the inner protocol is Ethernet, but only handle it 1330 * correctly if the GRE header is 8 bytes long. Moreover, 1331 * it will not update the Checksum or Sequence Number fields 1332 * if they are present. (The Routing Present flag, 1333 * GRE_ROUTING, cannot be set else the header would be more 1334 * than 8 bytes long; so we don't have to worry about it.) 1335 */ 1336 if (skb->inner_protocol_type != ENCAP_TYPE_ETHER) 1337 return false; 1338 if (ntohs(skb->inner_protocol) != ETH_P_TEB) 1339 return false; 1340 if (skb_inner_mac_header(skb) - skb_transport_header(skb) != 8) 1341 return false; 1342 greh = (struct gre_base_hdr *)skb_transport_header(skb); 1343 return !(greh->flags & (GRE_CSUM | GRE_SEQ)); 1344 case IPPROTO_UDP: 1345 /* If the port is registered for a UDP tunnel, we assume the 1346 * packet is for that tunnel, and the NIC will handle it as 1347 * such. If not, the NIC won't know what to do with it. 1348 */ 1349 dst_port = udp_hdr(skb)->dest; 1350 return efx->type->udp_tnl_has_port(efx, dst_port); 1351 default: 1352 return false; 1353 } 1354 } 1355 1356 netdev_features_t efx_features_check(struct sk_buff *skb, struct net_device *dev, 1357 netdev_features_t features) 1358 { 1359 struct efx_nic *efx = netdev_priv(dev); 1360 1361 if (skb->encapsulation) { 1362 if (features & NETIF_F_GSO_MASK) 1363 /* Hardware can only do TSO with at most 208 bytes 1364 * of headers. 1365 */ 1366 if (skb_inner_transport_offset(skb) > 1367 EFX_TSO2_MAX_HDRLEN) 1368 features &= ~(NETIF_F_GSO_MASK); 1369 if (features & (NETIF_F_GSO_MASK | NETIF_F_CSUM_MASK)) 1370 if (!efx_can_encap_offloads(efx, skb)) 1371 features &= ~(NETIF_F_GSO_MASK | 1372 NETIF_F_CSUM_MASK); 1373 } 1374 return features; 1375 } 1376 1377 int efx_get_phys_port_id(struct net_device *net_dev, 1378 struct netdev_phys_item_id *ppid) 1379 { 1380 struct efx_nic *efx = netdev_priv(net_dev); 1381 1382 if (efx->type->get_phys_port_id) 1383 return efx->type->get_phys_port_id(efx, ppid); 1384 else 1385 return -EOPNOTSUPP; 1386 } 1387 1388 int efx_get_phys_port_name(struct net_device *net_dev, char *name, size_t len) 1389 { 1390 struct efx_nic *efx = netdev_priv(net_dev); 1391 1392 if (snprintf(name, len, "p%u", efx->port_num) >= len) 1393 return -EINVAL; 1394 return 0; 1395 } 1396