xref: /openbmc/linux/drivers/net/ethernet/sfc/efx.c (revision dc6a81c3)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /****************************************************************************
3  * Driver for Solarflare network controllers and boards
4  * Copyright 2005-2006 Fen Systems Ltd.
5  * Copyright 2005-2013 Solarflare Communications Inc.
6  */
7 
8 #include <linux/module.h>
9 #include <linux/pci.h>
10 #include <linux/netdevice.h>
11 #include <linux/etherdevice.h>
12 #include <linux/delay.h>
13 #include <linux/notifier.h>
14 #include <linux/ip.h>
15 #include <linux/tcp.h>
16 #include <linux/in.h>
17 #include <linux/ethtool.h>
18 #include <linux/topology.h>
19 #include <linux/gfp.h>
20 #include <linux/aer.h>
21 #include <linux/interrupt.h>
22 #include "net_driver.h"
23 #include <net/gre.h>
24 #include <net/udp_tunnel.h>
25 #include "efx.h"
26 #include "efx_common.h"
27 #include "efx_channels.h"
28 #include "rx_common.h"
29 #include "tx_common.h"
30 #include "nic.h"
31 #include "io.h"
32 #include "selftest.h"
33 #include "sriov.h"
34 
35 #include "mcdi.h"
36 #include "mcdi_pcol.h"
37 #include "workarounds.h"
38 
39 /**************************************************************************
40  *
41  * Type name strings
42  *
43  **************************************************************************
44  */
45 
46 /* UDP tunnel type names */
47 static const char *const efx_udp_tunnel_type_names[] = {
48 	[TUNNEL_ENCAP_UDP_PORT_ENTRY_VXLAN] = "vxlan",
49 	[TUNNEL_ENCAP_UDP_PORT_ENTRY_GENEVE] = "geneve",
50 };
51 
52 void efx_get_udp_tunnel_type_name(u16 type, char *buf, size_t buflen)
53 {
54 	if (type < ARRAY_SIZE(efx_udp_tunnel_type_names) &&
55 	    efx_udp_tunnel_type_names[type] != NULL)
56 		snprintf(buf, buflen, "%s", efx_udp_tunnel_type_names[type]);
57 	else
58 		snprintf(buf, buflen, "type %d", type);
59 }
60 
61 /**************************************************************************
62  *
63  * Configurable values
64  *
65  *************************************************************************/
66 
67 /*
68  * Use separate channels for TX and RX events
69  *
70  * Set this to 1 to use separate channels for TX and RX. It allows us
71  * to control interrupt affinity separately for TX and RX.
72  *
73  * This is only used in MSI-X interrupt mode
74  */
75 bool efx_separate_tx_channels;
76 module_param(efx_separate_tx_channels, bool, 0444);
77 MODULE_PARM_DESC(efx_separate_tx_channels,
78 		 "Use separate channels for TX and RX");
79 
80 /* Initial interrupt moderation settings.  They can be modified after
81  * module load with ethtool.
82  *
83  * The default for RX should strike a balance between increasing the
84  * round-trip latency and reducing overhead.
85  */
86 static unsigned int rx_irq_mod_usec = 60;
87 
88 /* Initial interrupt moderation settings.  They can be modified after
89  * module load with ethtool.
90  *
91  * This default is chosen to ensure that a 10G link does not go idle
92  * while a TX queue is stopped after it has become full.  A queue is
93  * restarted when it drops below half full.  The time this takes (assuming
94  * worst case 3 descriptors per packet and 1024 descriptors) is
95  *   512 / 3 * 1.2 = 205 usec.
96  */
97 static unsigned int tx_irq_mod_usec = 150;
98 
99 static bool phy_flash_cfg;
100 module_param(phy_flash_cfg, bool, 0644);
101 MODULE_PARM_DESC(phy_flash_cfg, "Set PHYs into reflash mode initially");
102 
103 static unsigned debug = (NETIF_MSG_DRV | NETIF_MSG_PROBE |
104 			 NETIF_MSG_LINK | NETIF_MSG_IFDOWN |
105 			 NETIF_MSG_IFUP | NETIF_MSG_RX_ERR |
106 			 NETIF_MSG_TX_ERR | NETIF_MSG_HW);
107 module_param(debug, uint, 0);
108 MODULE_PARM_DESC(debug, "Bitmapped debugging message enable value");
109 
110 /**************************************************************************
111  *
112  * Utility functions and prototypes
113  *
114  *************************************************************************/
115 
116 static const struct efx_channel_type efx_default_channel_type;
117 static void efx_remove_port(struct efx_nic *efx);
118 static int efx_xdp_setup_prog(struct efx_nic *efx, struct bpf_prog *prog);
119 static int efx_xdp(struct net_device *dev, struct netdev_bpf *xdp);
120 static int efx_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **xdpfs,
121 			u32 flags);
122 
123 #define EFX_ASSERT_RESET_SERIALISED(efx)		\
124 	do {						\
125 		if ((efx->state == STATE_READY) ||	\
126 		    (efx->state == STATE_RECOVERY) ||	\
127 		    (efx->state == STATE_DISABLED))	\
128 			ASSERT_RTNL();			\
129 	} while (0)
130 
131 /**************************************************************************
132  *
133  * Port handling
134  *
135  **************************************************************************/
136 
137 /* Equivalent to efx_link_set_advertising with all-zeroes, except does not
138  * force the Autoneg bit on.
139  */
140 void efx_link_clear_advertising(struct efx_nic *efx)
141 {
142 	bitmap_zero(efx->link_advertising, __ETHTOOL_LINK_MODE_MASK_NBITS);
143 	efx->wanted_fc &= ~(EFX_FC_TX | EFX_FC_RX);
144 }
145 
146 void efx_link_set_wanted_fc(struct efx_nic *efx, u8 wanted_fc)
147 {
148 	efx->wanted_fc = wanted_fc;
149 	if (efx->link_advertising[0]) {
150 		if (wanted_fc & EFX_FC_RX)
151 			efx->link_advertising[0] |= (ADVERTISED_Pause |
152 						     ADVERTISED_Asym_Pause);
153 		else
154 			efx->link_advertising[0] &= ~(ADVERTISED_Pause |
155 						      ADVERTISED_Asym_Pause);
156 		if (wanted_fc & EFX_FC_TX)
157 			efx->link_advertising[0] ^= ADVERTISED_Asym_Pause;
158 	}
159 }
160 
161 static void efx_fini_port(struct efx_nic *efx);
162 
163 static int efx_probe_port(struct efx_nic *efx)
164 {
165 	int rc;
166 
167 	netif_dbg(efx, probe, efx->net_dev, "create port\n");
168 
169 	if (phy_flash_cfg)
170 		efx->phy_mode = PHY_MODE_SPECIAL;
171 
172 	/* Connect up MAC/PHY operations table */
173 	rc = efx->type->probe_port(efx);
174 	if (rc)
175 		return rc;
176 
177 	/* Initialise MAC address to permanent address */
178 	ether_addr_copy(efx->net_dev->dev_addr, efx->net_dev->perm_addr);
179 
180 	return 0;
181 }
182 
183 static int efx_init_port(struct efx_nic *efx)
184 {
185 	int rc;
186 
187 	netif_dbg(efx, drv, efx->net_dev, "init port\n");
188 
189 	mutex_lock(&efx->mac_lock);
190 
191 	rc = efx->phy_op->init(efx);
192 	if (rc)
193 		goto fail1;
194 
195 	efx->port_initialized = true;
196 
197 	/* Reconfigure the MAC before creating dma queues (required for
198 	 * Falcon/A1 where RX_INGR_EN/TX_DRAIN_EN isn't supported) */
199 	efx_mac_reconfigure(efx);
200 
201 	/* Ensure the PHY advertises the correct flow control settings */
202 	rc = efx->phy_op->reconfigure(efx);
203 	if (rc && rc != -EPERM)
204 		goto fail2;
205 
206 	mutex_unlock(&efx->mac_lock);
207 	return 0;
208 
209 fail2:
210 	efx->phy_op->fini(efx);
211 fail1:
212 	mutex_unlock(&efx->mac_lock);
213 	return rc;
214 }
215 
216 static void efx_fini_port(struct efx_nic *efx)
217 {
218 	netif_dbg(efx, drv, efx->net_dev, "shut down port\n");
219 
220 	if (!efx->port_initialized)
221 		return;
222 
223 	efx->phy_op->fini(efx);
224 	efx->port_initialized = false;
225 
226 	efx->link_state.up = false;
227 	efx_link_status_changed(efx);
228 }
229 
230 static void efx_remove_port(struct efx_nic *efx)
231 {
232 	netif_dbg(efx, drv, efx->net_dev, "destroying port\n");
233 
234 	efx->type->remove_port(efx);
235 }
236 
237 /**************************************************************************
238  *
239  * NIC handling
240  *
241  **************************************************************************/
242 
243 static LIST_HEAD(efx_primary_list);
244 static LIST_HEAD(efx_unassociated_list);
245 
246 static bool efx_same_controller(struct efx_nic *left, struct efx_nic *right)
247 {
248 	return left->type == right->type &&
249 		left->vpd_sn && right->vpd_sn &&
250 		!strcmp(left->vpd_sn, right->vpd_sn);
251 }
252 
253 static void efx_associate(struct efx_nic *efx)
254 {
255 	struct efx_nic *other, *next;
256 
257 	if (efx->primary == efx) {
258 		/* Adding primary function; look for secondaries */
259 
260 		netif_dbg(efx, probe, efx->net_dev, "adding to primary list\n");
261 		list_add_tail(&efx->node, &efx_primary_list);
262 
263 		list_for_each_entry_safe(other, next, &efx_unassociated_list,
264 					 node) {
265 			if (efx_same_controller(efx, other)) {
266 				list_del(&other->node);
267 				netif_dbg(other, probe, other->net_dev,
268 					  "moving to secondary list of %s %s\n",
269 					  pci_name(efx->pci_dev),
270 					  efx->net_dev->name);
271 				list_add_tail(&other->node,
272 					      &efx->secondary_list);
273 				other->primary = efx;
274 			}
275 		}
276 	} else {
277 		/* Adding secondary function; look for primary */
278 
279 		list_for_each_entry(other, &efx_primary_list, node) {
280 			if (efx_same_controller(efx, other)) {
281 				netif_dbg(efx, probe, efx->net_dev,
282 					  "adding to secondary list of %s %s\n",
283 					  pci_name(other->pci_dev),
284 					  other->net_dev->name);
285 				list_add_tail(&efx->node,
286 					      &other->secondary_list);
287 				efx->primary = other;
288 				return;
289 			}
290 		}
291 
292 		netif_dbg(efx, probe, efx->net_dev,
293 			  "adding to unassociated list\n");
294 		list_add_tail(&efx->node, &efx_unassociated_list);
295 	}
296 }
297 
298 static void efx_dissociate(struct efx_nic *efx)
299 {
300 	struct efx_nic *other, *next;
301 
302 	list_del(&efx->node);
303 	efx->primary = NULL;
304 
305 	list_for_each_entry_safe(other, next, &efx->secondary_list, node) {
306 		list_del(&other->node);
307 		netif_dbg(other, probe, other->net_dev,
308 			  "moving to unassociated list\n");
309 		list_add_tail(&other->node, &efx_unassociated_list);
310 		other->primary = NULL;
311 	}
312 }
313 
314 static int efx_probe_nic(struct efx_nic *efx)
315 {
316 	int rc;
317 
318 	netif_dbg(efx, probe, efx->net_dev, "creating NIC\n");
319 
320 	/* Carry out hardware-type specific initialisation */
321 	rc = efx->type->probe(efx);
322 	if (rc)
323 		return rc;
324 
325 	do {
326 		if (!efx->max_channels || !efx->max_tx_channels) {
327 			netif_err(efx, drv, efx->net_dev,
328 				  "Insufficient resources to allocate"
329 				  " any channels\n");
330 			rc = -ENOSPC;
331 			goto fail1;
332 		}
333 
334 		/* Determine the number of channels and queues by trying
335 		 * to hook in MSI-X interrupts.
336 		 */
337 		rc = efx_probe_interrupts(efx);
338 		if (rc)
339 			goto fail1;
340 
341 		rc = efx_set_channels(efx);
342 		if (rc)
343 			goto fail1;
344 
345 		/* dimension_resources can fail with EAGAIN */
346 		rc = efx->type->dimension_resources(efx);
347 		if (rc != 0 && rc != -EAGAIN)
348 			goto fail2;
349 
350 		if (rc == -EAGAIN)
351 			/* try again with new max_channels */
352 			efx_remove_interrupts(efx);
353 
354 	} while (rc == -EAGAIN);
355 
356 	if (efx->n_channels > 1)
357 		netdev_rss_key_fill(efx->rss_context.rx_hash_key,
358 				    sizeof(efx->rss_context.rx_hash_key));
359 	efx_set_default_rx_indir_table(efx, &efx->rss_context);
360 
361 	netif_set_real_num_tx_queues(efx->net_dev, efx->n_tx_channels);
362 	netif_set_real_num_rx_queues(efx->net_dev, efx->n_rx_channels);
363 
364 	/* Initialise the interrupt moderation settings */
365 	efx->irq_mod_step_us = DIV_ROUND_UP(efx->timer_quantum_ns, 1000);
366 	efx_init_irq_moderation(efx, tx_irq_mod_usec, rx_irq_mod_usec, true,
367 				true);
368 
369 	return 0;
370 
371 fail2:
372 	efx_remove_interrupts(efx);
373 fail1:
374 	efx->type->remove(efx);
375 	return rc;
376 }
377 
378 static void efx_remove_nic(struct efx_nic *efx)
379 {
380 	netif_dbg(efx, drv, efx->net_dev, "destroying NIC\n");
381 
382 	efx_remove_interrupts(efx);
383 	efx->type->remove(efx);
384 }
385 
386 /**************************************************************************
387  *
388  * NIC startup/shutdown
389  *
390  *************************************************************************/
391 
392 static int efx_probe_all(struct efx_nic *efx)
393 {
394 	int rc;
395 
396 	rc = efx_probe_nic(efx);
397 	if (rc) {
398 		netif_err(efx, probe, efx->net_dev, "failed to create NIC\n");
399 		goto fail1;
400 	}
401 
402 	rc = efx_probe_port(efx);
403 	if (rc) {
404 		netif_err(efx, probe, efx->net_dev, "failed to create port\n");
405 		goto fail2;
406 	}
407 
408 	BUILD_BUG_ON(EFX_DEFAULT_DMAQ_SIZE < EFX_RXQ_MIN_ENT);
409 	if (WARN_ON(EFX_DEFAULT_DMAQ_SIZE < EFX_TXQ_MIN_ENT(efx))) {
410 		rc = -EINVAL;
411 		goto fail3;
412 	}
413 	efx->rxq_entries = efx->txq_entries = EFX_DEFAULT_DMAQ_SIZE;
414 
415 #ifdef CONFIG_SFC_SRIOV
416 	rc = efx->type->vswitching_probe(efx);
417 	if (rc) /* not fatal; the PF will still work fine */
418 		netif_warn(efx, probe, efx->net_dev,
419 			   "failed to setup vswitching rc=%d;"
420 			   " VFs may not function\n", rc);
421 #endif
422 
423 	rc = efx_probe_filters(efx);
424 	if (rc) {
425 		netif_err(efx, probe, efx->net_dev,
426 			  "failed to create filter tables\n");
427 		goto fail4;
428 	}
429 
430 	rc = efx_probe_channels(efx);
431 	if (rc)
432 		goto fail5;
433 
434 	return 0;
435 
436  fail5:
437 	efx_remove_filters(efx);
438  fail4:
439 #ifdef CONFIG_SFC_SRIOV
440 	efx->type->vswitching_remove(efx);
441 #endif
442  fail3:
443 	efx_remove_port(efx);
444  fail2:
445 	efx_remove_nic(efx);
446  fail1:
447 	return rc;
448 }
449 
450 static void efx_remove_all(struct efx_nic *efx)
451 {
452 	rtnl_lock();
453 	efx_xdp_setup_prog(efx, NULL);
454 	rtnl_unlock();
455 
456 	efx_remove_channels(efx);
457 	efx_remove_filters(efx);
458 #ifdef CONFIG_SFC_SRIOV
459 	efx->type->vswitching_remove(efx);
460 #endif
461 	efx_remove_port(efx);
462 	efx_remove_nic(efx);
463 }
464 
465 /**************************************************************************
466  *
467  * Interrupt moderation
468  *
469  **************************************************************************/
470 unsigned int efx_usecs_to_ticks(struct efx_nic *efx, unsigned int usecs)
471 {
472 	if (usecs == 0)
473 		return 0;
474 	if (usecs * 1000 < efx->timer_quantum_ns)
475 		return 1; /* never round down to 0 */
476 	return usecs * 1000 / efx->timer_quantum_ns;
477 }
478 
479 unsigned int efx_ticks_to_usecs(struct efx_nic *efx, unsigned int ticks)
480 {
481 	/* We must round up when converting ticks to microseconds
482 	 * because we round down when converting the other way.
483 	 */
484 	return DIV_ROUND_UP(ticks * efx->timer_quantum_ns, 1000);
485 }
486 
487 /* Set interrupt moderation parameters */
488 int efx_init_irq_moderation(struct efx_nic *efx, unsigned int tx_usecs,
489 			    unsigned int rx_usecs, bool rx_adaptive,
490 			    bool rx_may_override_tx)
491 {
492 	struct efx_channel *channel;
493 	unsigned int timer_max_us;
494 
495 	EFX_ASSERT_RESET_SERIALISED(efx);
496 
497 	timer_max_us = efx->timer_max_ns / 1000;
498 
499 	if (tx_usecs > timer_max_us || rx_usecs > timer_max_us)
500 		return -EINVAL;
501 
502 	if (tx_usecs != rx_usecs && efx->tx_channel_offset == 0 &&
503 	    !rx_may_override_tx) {
504 		netif_err(efx, drv, efx->net_dev, "Channels are shared. "
505 			  "RX and TX IRQ moderation must be equal\n");
506 		return -EINVAL;
507 	}
508 
509 	efx->irq_rx_adaptive = rx_adaptive;
510 	efx->irq_rx_moderation_us = rx_usecs;
511 	efx_for_each_channel(channel, efx) {
512 		if (efx_channel_has_rx_queue(channel))
513 			channel->irq_moderation_us = rx_usecs;
514 		else if (efx_channel_has_tx_queues(channel))
515 			channel->irq_moderation_us = tx_usecs;
516 		else if (efx_channel_is_xdp_tx(channel))
517 			channel->irq_moderation_us = tx_usecs;
518 	}
519 
520 	return 0;
521 }
522 
523 void efx_get_irq_moderation(struct efx_nic *efx, unsigned int *tx_usecs,
524 			    unsigned int *rx_usecs, bool *rx_adaptive)
525 {
526 	*rx_adaptive = efx->irq_rx_adaptive;
527 	*rx_usecs = efx->irq_rx_moderation_us;
528 
529 	/* If channels are shared between RX and TX, so is IRQ
530 	 * moderation.  Otherwise, IRQ moderation is the same for all
531 	 * TX channels and is not adaptive.
532 	 */
533 	if (efx->tx_channel_offset == 0) {
534 		*tx_usecs = *rx_usecs;
535 	} else {
536 		struct efx_channel *tx_channel;
537 
538 		tx_channel = efx->channel[efx->tx_channel_offset];
539 		*tx_usecs = tx_channel->irq_moderation_us;
540 	}
541 }
542 
543 /**************************************************************************
544  *
545  * ioctls
546  *
547  *************************************************************************/
548 
549 /* Net device ioctl
550  * Context: process, rtnl_lock() held.
551  */
552 static int efx_ioctl(struct net_device *net_dev, struct ifreq *ifr, int cmd)
553 {
554 	struct efx_nic *efx = netdev_priv(net_dev);
555 	struct mii_ioctl_data *data = if_mii(ifr);
556 
557 	if (cmd == SIOCSHWTSTAMP)
558 		return efx_ptp_set_ts_config(efx, ifr);
559 	if (cmd == SIOCGHWTSTAMP)
560 		return efx_ptp_get_ts_config(efx, ifr);
561 
562 	/* Convert phy_id from older PRTAD/DEVAD format */
563 	if ((cmd == SIOCGMIIREG || cmd == SIOCSMIIREG) &&
564 	    (data->phy_id & 0xfc00) == 0x0400)
565 		data->phy_id ^= MDIO_PHY_ID_C45 | 0x0400;
566 
567 	return mdio_mii_ioctl(&efx->mdio, data, cmd);
568 }
569 
570 /**************************************************************************
571  *
572  * Kernel net device interface
573  *
574  *************************************************************************/
575 
576 /* Context: process, rtnl_lock() held. */
577 int efx_net_open(struct net_device *net_dev)
578 {
579 	struct efx_nic *efx = netdev_priv(net_dev);
580 	int rc;
581 
582 	netif_dbg(efx, ifup, efx->net_dev, "opening device on CPU %d\n",
583 		  raw_smp_processor_id());
584 
585 	rc = efx_check_disabled(efx);
586 	if (rc)
587 		return rc;
588 	if (efx->phy_mode & PHY_MODE_SPECIAL)
589 		return -EBUSY;
590 	if (efx_mcdi_poll_reboot(efx) && efx_reset(efx, RESET_TYPE_ALL))
591 		return -EIO;
592 
593 	/* Notify the kernel of the link state polled during driver load,
594 	 * before the monitor starts running */
595 	efx_link_status_changed(efx);
596 
597 	efx_start_all(efx);
598 	if (efx->state == STATE_DISABLED || efx->reset_pending)
599 		netif_device_detach(efx->net_dev);
600 	efx_selftest_async_start(efx);
601 	return 0;
602 }
603 
604 /* Context: process, rtnl_lock() held.
605  * Note that the kernel will ignore our return code; this method
606  * should really be a void.
607  */
608 int efx_net_stop(struct net_device *net_dev)
609 {
610 	struct efx_nic *efx = netdev_priv(net_dev);
611 
612 	netif_dbg(efx, ifdown, efx->net_dev, "closing on CPU %d\n",
613 		  raw_smp_processor_id());
614 
615 	/* Stop the device and flush all the channels */
616 	efx_stop_all(efx);
617 
618 	return 0;
619 }
620 
621 /* Context: netif_tx_lock held, BHs disabled. */
622 static void efx_watchdog(struct net_device *net_dev, unsigned int txqueue)
623 {
624 	struct efx_nic *efx = netdev_priv(net_dev);
625 
626 	netif_err(efx, tx_err, efx->net_dev,
627 		  "TX stuck with port_enabled=%d: resetting channels\n",
628 		  efx->port_enabled);
629 
630 	efx_schedule_reset(efx, RESET_TYPE_TX_WATCHDOG);
631 }
632 
633 static int efx_set_mac_address(struct net_device *net_dev, void *data)
634 {
635 	struct efx_nic *efx = netdev_priv(net_dev);
636 	struct sockaddr *addr = data;
637 	u8 *new_addr = addr->sa_data;
638 	u8 old_addr[6];
639 	int rc;
640 
641 	if (!is_valid_ether_addr(new_addr)) {
642 		netif_err(efx, drv, efx->net_dev,
643 			  "invalid ethernet MAC address requested: %pM\n",
644 			  new_addr);
645 		return -EADDRNOTAVAIL;
646 	}
647 
648 	/* save old address */
649 	ether_addr_copy(old_addr, net_dev->dev_addr);
650 	ether_addr_copy(net_dev->dev_addr, new_addr);
651 	if (efx->type->set_mac_address) {
652 		rc = efx->type->set_mac_address(efx);
653 		if (rc) {
654 			ether_addr_copy(net_dev->dev_addr, old_addr);
655 			return rc;
656 		}
657 	}
658 
659 	/* Reconfigure the MAC */
660 	mutex_lock(&efx->mac_lock);
661 	efx_mac_reconfigure(efx);
662 	mutex_unlock(&efx->mac_lock);
663 
664 	return 0;
665 }
666 
667 /* Context: netif_addr_lock held, BHs disabled. */
668 static void efx_set_rx_mode(struct net_device *net_dev)
669 {
670 	struct efx_nic *efx = netdev_priv(net_dev);
671 
672 	if (efx->port_enabled)
673 		queue_work(efx->workqueue, &efx->mac_work);
674 	/* Otherwise efx_start_port() will do this */
675 }
676 
677 static int efx_set_features(struct net_device *net_dev, netdev_features_t data)
678 {
679 	struct efx_nic *efx = netdev_priv(net_dev);
680 	int rc;
681 
682 	/* If disabling RX n-tuple filtering, clear existing filters */
683 	if (net_dev->features & ~data & NETIF_F_NTUPLE) {
684 		rc = efx->type->filter_clear_rx(efx, EFX_FILTER_PRI_MANUAL);
685 		if (rc)
686 			return rc;
687 	}
688 
689 	/* If Rx VLAN filter is changed, update filters via mac_reconfigure.
690 	 * If rx-fcs is changed, mac_reconfigure updates that too.
691 	 */
692 	if ((net_dev->features ^ data) & (NETIF_F_HW_VLAN_CTAG_FILTER |
693 					  NETIF_F_RXFCS)) {
694 		/* efx_set_rx_mode() will schedule MAC work to update filters
695 		 * when a new features are finally set in net_dev.
696 		 */
697 		efx_set_rx_mode(net_dev);
698 	}
699 
700 	return 0;
701 }
702 
703 static int efx_get_phys_port_id(struct net_device *net_dev,
704 				struct netdev_phys_item_id *ppid)
705 {
706 	struct efx_nic *efx = netdev_priv(net_dev);
707 
708 	if (efx->type->get_phys_port_id)
709 		return efx->type->get_phys_port_id(efx, ppid);
710 	else
711 		return -EOPNOTSUPP;
712 }
713 
714 static int efx_get_phys_port_name(struct net_device *net_dev,
715 				  char *name, size_t len)
716 {
717 	struct efx_nic *efx = netdev_priv(net_dev);
718 
719 	if (snprintf(name, len, "p%u", efx->port_num) >= len)
720 		return -EINVAL;
721 	return 0;
722 }
723 
724 static int efx_vlan_rx_add_vid(struct net_device *net_dev, __be16 proto, u16 vid)
725 {
726 	struct efx_nic *efx = netdev_priv(net_dev);
727 
728 	if (efx->type->vlan_rx_add_vid)
729 		return efx->type->vlan_rx_add_vid(efx, proto, vid);
730 	else
731 		return -EOPNOTSUPP;
732 }
733 
734 static int efx_vlan_rx_kill_vid(struct net_device *net_dev, __be16 proto, u16 vid)
735 {
736 	struct efx_nic *efx = netdev_priv(net_dev);
737 
738 	if (efx->type->vlan_rx_kill_vid)
739 		return efx->type->vlan_rx_kill_vid(efx, proto, vid);
740 	else
741 		return -EOPNOTSUPP;
742 }
743 
744 static int efx_udp_tunnel_type_map(enum udp_parsable_tunnel_type in)
745 {
746 	switch (in) {
747 	case UDP_TUNNEL_TYPE_VXLAN:
748 		return TUNNEL_ENCAP_UDP_PORT_ENTRY_VXLAN;
749 	case UDP_TUNNEL_TYPE_GENEVE:
750 		return TUNNEL_ENCAP_UDP_PORT_ENTRY_GENEVE;
751 	default:
752 		return -1;
753 	}
754 }
755 
756 static void efx_udp_tunnel_add(struct net_device *dev, struct udp_tunnel_info *ti)
757 {
758 	struct efx_nic *efx = netdev_priv(dev);
759 	struct efx_udp_tunnel tnl;
760 	int efx_tunnel_type;
761 
762 	efx_tunnel_type = efx_udp_tunnel_type_map(ti->type);
763 	if (efx_tunnel_type < 0)
764 		return;
765 
766 	tnl.type = (u16)efx_tunnel_type;
767 	tnl.port = ti->port;
768 
769 	if (efx->type->udp_tnl_add_port)
770 		(void)efx->type->udp_tnl_add_port(efx, tnl);
771 }
772 
773 static void efx_udp_tunnel_del(struct net_device *dev, struct udp_tunnel_info *ti)
774 {
775 	struct efx_nic *efx = netdev_priv(dev);
776 	struct efx_udp_tunnel tnl;
777 	int efx_tunnel_type;
778 
779 	efx_tunnel_type = efx_udp_tunnel_type_map(ti->type);
780 	if (efx_tunnel_type < 0)
781 		return;
782 
783 	tnl.type = (u16)efx_tunnel_type;
784 	tnl.port = ti->port;
785 
786 	if (efx->type->udp_tnl_del_port)
787 		(void)efx->type->udp_tnl_del_port(efx, tnl);
788 }
789 
790 static const struct net_device_ops efx_netdev_ops = {
791 	.ndo_open		= efx_net_open,
792 	.ndo_stop		= efx_net_stop,
793 	.ndo_get_stats64	= efx_net_stats,
794 	.ndo_tx_timeout		= efx_watchdog,
795 	.ndo_start_xmit		= efx_hard_start_xmit,
796 	.ndo_validate_addr	= eth_validate_addr,
797 	.ndo_do_ioctl		= efx_ioctl,
798 	.ndo_change_mtu		= efx_change_mtu,
799 	.ndo_set_mac_address	= efx_set_mac_address,
800 	.ndo_set_rx_mode	= efx_set_rx_mode,
801 	.ndo_set_features	= efx_set_features,
802 	.ndo_vlan_rx_add_vid	= efx_vlan_rx_add_vid,
803 	.ndo_vlan_rx_kill_vid	= efx_vlan_rx_kill_vid,
804 #ifdef CONFIG_SFC_SRIOV
805 	.ndo_set_vf_mac		= efx_sriov_set_vf_mac,
806 	.ndo_set_vf_vlan	= efx_sriov_set_vf_vlan,
807 	.ndo_set_vf_spoofchk	= efx_sriov_set_vf_spoofchk,
808 	.ndo_get_vf_config	= efx_sriov_get_vf_config,
809 	.ndo_set_vf_link_state  = efx_sriov_set_vf_link_state,
810 #endif
811 	.ndo_get_phys_port_id   = efx_get_phys_port_id,
812 	.ndo_get_phys_port_name	= efx_get_phys_port_name,
813 	.ndo_setup_tc		= efx_setup_tc,
814 #ifdef CONFIG_RFS_ACCEL
815 	.ndo_rx_flow_steer	= efx_filter_rfs,
816 #endif
817 	.ndo_udp_tunnel_add	= efx_udp_tunnel_add,
818 	.ndo_udp_tunnel_del	= efx_udp_tunnel_del,
819 	.ndo_xdp_xmit		= efx_xdp_xmit,
820 	.ndo_bpf		= efx_xdp
821 };
822 
823 static int efx_xdp_setup_prog(struct efx_nic *efx, struct bpf_prog *prog)
824 {
825 	struct bpf_prog *old_prog;
826 
827 	if (efx->xdp_rxq_info_failed) {
828 		netif_err(efx, drv, efx->net_dev,
829 			  "Unable to bind XDP program due to previous failure of rxq_info\n");
830 		return -EINVAL;
831 	}
832 
833 	if (prog && efx->net_dev->mtu > efx_xdp_max_mtu(efx)) {
834 		netif_err(efx, drv, efx->net_dev,
835 			  "Unable to configure XDP with MTU of %d (max: %d)\n",
836 			  efx->net_dev->mtu, efx_xdp_max_mtu(efx));
837 		return -EINVAL;
838 	}
839 
840 	old_prog = rtnl_dereference(efx->xdp_prog);
841 	rcu_assign_pointer(efx->xdp_prog, prog);
842 	/* Release the reference that was originally passed by the caller. */
843 	if (old_prog)
844 		bpf_prog_put(old_prog);
845 
846 	return 0;
847 }
848 
849 /* Context: process, rtnl_lock() held. */
850 static int efx_xdp(struct net_device *dev, struct netdev_bpf *xdp)
851 {
852 	struct efx_nic *efx = netdev_priv(dev);
853 	struct bpf_prog *xdp_prog;
854 
855 	switch (xdp->command) {
856 	case XDP_SETUP_PROG:
857 		return efx_xdp_setup_prog(efx, xdp->prog);
858 	case XDP_QUERY_PROG:
859 		xdp_prog = rtnl_dereference(efx->xdp_prog);
860 		xdp->prog_id = xdp_prog ? xdp_prog->aux->id : 0;
861 		return 0;
862 	default:
863 		return -EINVAL;
864 	}
865 }
866 
867 static int efx_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **xdpfs,
868 			u32 flags)
869 {
870 	struct efx_nic *efx = netdev_priv(dev);
871 
872 	if (!netif_running(dev))
873 		return -EINVAL;
874 
875 	return efx_xdp_tx_buffers(efx, n, xdpfs, flags & XDP_XMIT_FLUSH);
876 }
877 
878 static void efx_update_name(struct efx_nic *efx)
879 {
880 	strcpy(efx->name, efx->net_dev->name);
881 	efx_mtd_rename(efx);
882 	efx_set_channel_names(efx);
883 }
884 
885 static int efx_netdev_event(struct notifier_block *this,
886 			    unsigned long event, void *ptr)
887 {
888 	struct net_device *net_dev = netdev_notifier_info_to_dev(ptr);
889 
890 	if ((net_dev->netdev_ops == &efx_netdev_ops) &&
891 	    event == NETDEV_CHANGENAME)
892 		efx_update_name(netdev_priv(net_dev));
893 
894 	return NOTIFY_DONE;
895 }
896 
897 static struct notifier_block efx_netdev_notifier = {
898 	.notifier_call = efx_netdev_event,
899 };
900 
901 static ssize_t
902 show_phy_type(struct device *dev, struct device_attribute *attr, char *buf)
903 {
904 	struct efx_nic *efx = dev_get_drvdata(dev);
905 	return sprintf(buf, "%d\n", efx->phy_type);
906 }
907 static DEVICE_ATTR(phy_type, 0444, show_phy_type, NULL);
908 
909 static int efx_register_netdev(struct efx_nic *efx)
910 {
911 	struct net_device *net_dev = efx->net_dev;
912 	struct efx_channel *channel;
913 	int rc;
914 
915 	net_dev->watchdog_timeo = 5 * HZ;
916 	net_dev->irq = efx->pci_dev->irq;
917 	net_dev->netdev_ops = &efx_netdev_ops;
918 	if (efx_nic_rev(efx) >= EFX_REV_HUNT_A0)
919 		net_dev->priv_flags |= IFF_UNICAST_FLT;
920 	net_dev->ethtool_ops = &efx_ethtool_ops;
921 	net_dev->gso_max_segs = EFX_TSO_MAX_SEGS;
922 	net_dev->min_mtu = EFX_MIN_MTU;
923 	net_dev->max_mtu = EFX_MAX_MTU;
924 
925 	rtnl_lock();
926 
927 	/* Enable resets to be scheduled and check whether any were
928 	 * already requested.  If so, the NIC is probably hosed so we
929 	 * abort.
930 	 */
931 	efx->state = STATE_READY;
932 	smp_mb(); /* ensure we change state before checking reset_pending */
933 	if (efx->reset_pending) {
934 		netif_err(efx, probe, efx->net_dev,
935 			  "aborting probe due to scheduled reset\n");
936 		rc = -EIO;
937 		goto fail_locked;
938 	}
939 
940 	rc = dev_alloc_name(net_dev, net_dev->name);
941 	if (rc < 0)
942 		goto fail_locked;
943 	efx_update_name(efx);
944 
945 	/* Always start with carrier off; PHY events will detect the link */
946 	netif_carrier_off(net_dev);
947 
948 	rc = register_netdevice(net_dev);
949 	if (rc)
950 		goto fail_locked;
951 
952 	efx_for_each_channel(channel, efx) {
953 		struct efx_tx_queue *tx_queue;
954 		efx_for_each_channel_tx_queue(tx_queue, channel)
955 			efx_init_tx_queue_core_txq(tx_queue);
956 	}
957 
958 	efx_associate(efx);
959 
960 	rtnl_unlock();
961 
962 	rc = device_create_file(&efx->pci_dev->dev, &dev_attr_phy_type);
963 	if (rc) {
964 		netif_err(efx, drv, efx->net_dev,
965 			  "failed to init net dev attributes\n");
966 		goto fail_registered;
967 	}
968 
969 	efx_init_mcdi_logging(efx);
970 
971 	return 0;
972 
973 fail_registered:
974 	rtnl_lock();
975 	efx_dissociate(efx);
976 	unregister_netdevice(net_dev);
977 fail_locked:
978 	efx->state = STATE_UNINIT;
979 	rtnl_unlock();
980 	netif_err(efx, drv, efx->net_dev, "could not register net dev\n");
981 	return rc;
982 }
983 
984 static void efx_unregister_netdev(struct efx_nic *efx)
985 {
986 	if (!efx->net_dev)
987 		return;
988 
989 	BUG_ON(netdev_priv(efx->net_dev) != efx);
990 
991 	if (efx_dev_registered(efx)) {
992 		strlcpy(efx->name, pci_name(efx->pci_dev), sizeof(efx->name));
993 		efx_fini_mcdi_logging(efx);
994 		device_remove_file(&efx->pci_dev->dev, &dev_attr_phy_type);
995 		unregister_netdev(efx->net_dev);
996 	}
997 }
998 
999 /**************************************************************************
1000  *
1001  * List of NICs we support
1002  *
1003  **************************************************************************/
1004 
1005 /* PCI device ID table */
1006 static const struct pci_device_id efx_pci_table[] = {
1007 	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0803),	/* SFC9020 */
1008 	 .driver_data = (unsigned long) &siena_a0_nic_type},
1009 	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0813),	/* SFL9021 */
1010 	 .driver_data = (unsigned long) &siena_a0_nic_type},
1011 	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0903),  /* SFC9120 PF */
1012 	 .driver_data = (unsigned long) &efx_hunt_a0_nic_type},
1013 	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x1903),  /* SFC9120 VF */
1014 	 .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type},
1015 	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0923),  /* SFC9140 PF */
1016 	 .driver_data = (unsigned long) &efx_hunt_a0_nic_type},
1017 	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x1923),  /* SFC9140 VF */
1018 	 .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type},
1019 	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0a03),  /* SFC9220 PF */
1020 	 .driver_data = (unsigned long) &efx_hunt_a0_nic_type},
1021 	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x1a03),  /* SFC9220 VF */
1022 	 .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type},
1023 	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0b03),  /* SFC9250 PF */
1024 	 .driver_data = (unsigned long) &efx_hunt_a0_nic_type},
1025 	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x1b03),  /* SFC9250 VF */
1026 	 .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type},
1027 	{0}			/* end of list */
1028 };
1029 
1030 /**************************************************************************
1031  *
1032  * Data housekeeping
1033  *
1034  **************************************************************************/
1035 
1036 void efx_update_sw_stats(struct efx_nic *efx, u64 *stats)
1037 {
1038 	u64 n_rx_nodesc_trunc = 0;
1039 	struct efx_channel *channel;
1040 
1041 	efx_for_each_channel(channel, efx)
1042 		n_rx_nodesc_trunc += channel->n_rx_nodesc_trunc;
1043 	stats[GENERIC_STAT_rx_nodesc_trunc] = n_rx_nodesc_trunc;
1044 	stats[GENERIC_STAT_rx_noskb_drops] = atomic_read(&efx->n_rx_noskb_drops);
1045 }
1046 
1047 /**************************************************************************
1048  *
1049  * PCI interface
1050  *
1051  **************************************************************************/
1052 
1053 /* Main body of final NIC shutdown code
1054  * This is called only at module unload (or hotplug removal).
1055  */
1056 static void efx_pci_remove_main(struct efx_nic *efx)
1057 {
1058 	/* Flush reset_work. It can no longer be scheduled since we
1059 	 * are not READY.
1060 	 */
1061 	BUG_ON(efx->state == STATE_READY);
1062 	efx_flush_reset_workqueue(efx);
1063 
1064 	efx_disable_interrupts(efx);
1065 	efx_clear_interrupt_affinity(efx);
1066 	efx_nic_fini_interrupt(efx);
1067 	efx_fini_port(efx);
1068 	efx->type->fini(efx);
1069 	efx_fini_napi(efx);
1070 	efx_remove_all(efx);
1071 }
1072 
1073 /* Final NIC shutdown
1074  * This is called only at module unload (or hotplug removal).  A PF can call
1075  * this on its VFs to ensure they are unbound first.
1076  */
1077 static void efx_pci_remove(struct pci_dev *pci_dev)
1078 {
1079 	struct efx_nic *efx;
1080 
1081 	efx = pci_get_drvdata(pci_dev);
1082 	if (!efx)
1083 		return;
1084 
1085 	/* Mark the NIC as fini, then stop the interface */
1086 	rtnl_lock();
1087 	efx_dissociate(efx);
1088 	dev_close(efx->net_dev);
1089 	efx_disable_interrupts(efx);
1090 	efx->state = STATE_UNINIT;
1091 	rtnl_unlock();
1092 
1093 	if (efx->type->sriov_fini)
1094 		efx->type->sriov_fini(efx);
1095 
1096 	efx_unregister_netdev(efx);
1097 
1098 	efx_mtd_remove(efx);
1099 
1100 	efx_pci_remove_main(efx);
1101 
1102 	efx_fini_io(efx, efx->type->mem_bar(efx));
1103 	netif_dbg(efx, drv, efx->net_dev, "shutdown successful\n");
1104 
1105 	efx_fini_struct(efx);
1106 	free_netdev(efx->net_dev);
1107 
1108 	pci_disable_pcie_error_reporting(pci_dev);
1109 };
1110 
1111 /* NIC VPD information
1112  * Called during probe to display the part number of the
1113  * installed NIC.  VPD is potentially very large but this should
1114  * always appear within the first 512 bytes.
1115  */
1116 #define SFC_VPD_LEN 512
1117 static void efx_probe_vpd_strings(struct efx_nic *efx)
1118 {
1119 	struct pci_dev *dev = efx->pci_dev;
1120 	char vpd_data[SFC_VPD_LEN];
1121 	ssize_t vpd_size;
1122 	int ro_start, ro_size, i, j;
1123 
1124 	/* Get the vpd data from the device */
1125 	vpd_size = pci_read_vpd(dev, 0, sizeof(vpd_data), vpd_data);
1126 	if (vpd_size <= 0) {
1127 		netif_err(efx, drv, efx->net_dev, "Unable to read VPD\n");
1128 		return;
1129 	}
1130 
1131 	/* Get the Read only section */
1132 	ro_start = pci_vpd_find_tag(vpd_data, 0, vpd_size, PCI_VPD_LRDT_RO_DATA);
1133 	if (ro_start < 0) {
1134 		netif_err(efx, drv, efx->net_dev, "VPD Read-only not found\n");
1135 		return;
1136 	}
1137 
1138 	ro_size = pci_vpd_lrdt_size(&vpd_data[ro_start]);
1139 	j = ro_size;
1140 	i = ro_start + PCI_VPD_LRDT_TAG_SIZE;
1141 	if (i + j > vpd_size)
1142 		j = vpd_size - i;
1143 
1144 	/* Get the Part number */
1145 	i = pci_vpd_find_info_keyword(vpd_data, i, j, "PN");
1146 	if (i < 0) {
1147 		netif_err(efx, drv, efx->net_dev, "Part number not found\n");
1148 		return;
1149 	}
1150 
1151 	j = pci_vpd_info_field_size(&vpd_data[i]);
1152 	i += PCI_VPD_INFO_FLD_HDR_SIZE;
1153 	if (i + j > vpd_size) {
1154 		netif_err(efx, drv, efx->net_dev, "Incomplete part number\n");
1155 		return;
1156 	}
1157 
1158 	netif_info(efx, drv, efx->net_dev,
1159 		   "Part Number : %.*s\n", j, &vpd_data[i]);
1160 
1161 	i = ro_start + PCI_VPD_LRDT_TAG_SIZE;
1162 	j = ro_size;
1163 	i = pci_vpd_find_info_keyword(vpd_data, i, j, "SN");
1164 	if (i < 0) {
1165 		netif_err(efx, drv, efx->net_dev, "Serial number not found\n");
1166 		return;
1167 	}
1168 
1169 	j = pci_vpd_info_field_size(&vpd_data[i]);
1170 	i += PCI_VPD_INFO_FLD_HDR_SIZE;
1171 	if (i + j > vpd_size) {
1172 		netif_err(efx, drv, efx->net_dev, "Incomplete serial number\n");
1173 		return;
1174 	}
1175 
1176 	efx->vpd_sn = kmalloc(j + 1, GFP_KERNEL);
1177 	if (!efx->vpd_sn)
1178 		return;
1179 
1180 	snprintf(efx->vpd_sn, j + 1, "%s", &vpd_data[i]);
1181 }
1182 
1183 
1184 /* Main body of NIC initialisation
1185  * This is called at module load (or hotplug insertion, theoretically).
1186  */
1187 static int efx_pci_probe_main(struct efx_nic *efx)
1188 {
1189 	int rc;
1190 
1191 	/* Do start-of-day initialisation */
1192 	rc = efx_probe_all(efx);
1193 	if (rc)
1194 		goto fail1;
1195 
1196 	efx_init_napi(efx);
1197 
1198 	down_write(&efx->filter_sem);
1199 	rc = efx->type->init(efx);
1200 	up_write(&efx->filter_sem);
1201 	if (rc) {
1202 		netif_err(efx, probe, efx->net_dev,
1203 			  "failed to initialise NIC\n");
1204 		goto fail3;
1205 	}
1206 
1207 	rc = efx_init_port(efx);
1208 	if (rc) {
1209 		netif_err(efx, probe, efx->net_dev,
1210 			  "failed to initialise port\n");
1211 		goto fail4;
1212 	}
1213 
1214 	rc = efx_nic_init_interrupt(efx);
1215 	if (rc)
1216 		goto fail5;
1217 
1218 	efx_set_interrupt_affinity(efx);
1219 	rc = efx_enable_interrupts(efx);
1220 	if (rc)
1221 		goto fail6;
1222 
1223 	return 0;
1224 
1225  fail6:
1226 	efx_clear_interrupt_affinity(efx);
1227 	efx_nic_fini_interrupt(efx);
1228  fail5:
1229 	efx_fini_port(efx);
1230  fail4:
1231 	efx->type->fini(efx);
1232  fail3:
1233 	efx_fini_napi(efx);
1234 	efx_remove_all(efx);
1235  fail1:
1236 	return rc;
1237 }
1238 
1239 static int efx_pci_probe_post_io(struct efx_nic *efx)
1240 {
1241 	struct net_device *net_dev = efx->net_dev;
1242 	int rc = efx_pci_probe_main(efx);
1243 
1244 	if (rc)
1245 		return rc;
1246 
1247 	if (efx->type->sriov_init) {
1248 		rc = efx->type->sriov_init(efx);
1249 		if (rc)
1250 			netif_err(efx, probe, efx->net_dev,
1251 				  "SR-IOV can't be enabled rc %d\n", rc);
1252 	}
1253 
1254 	/* Determine netdevice features */
1255 	net_dev->features |= (efx->type->offload_features | NETIF_F_SG |
1256 			      NETIF_F_TSO | NETIF_F_RXCSUM | NETIF_F_RXALL);
1257 	if (efx->type->offload_features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1258 		net_dev->features |= NETIF_F_TSO6;
1259 	/* Check whether device supports TSO */
1260 	if (!efx->type->tso_versions || !efx->type->tso_versions(efx))
1261 		net_dev->features &= ~NETIF_F_ALL_TSO;
1262 	/* Mask for features that also apply to VLAN devices */
1263 	net_dev->vlan_features |= (NETIF_F_HW_CSUM | NETIF_F_SG |
1264 				   NETIF_F_HIGHDMA | NETIF_F_ALL_TSO |
1265 				   NETIF_F_RXCSUM);
1266 
1267 	net_dev->hw_features |= net_dev->features & ~efx->fixed_features;
1268 
1269 	/* Disable receiving frames with bad FCS, by default. */
1270 	net_dev->features &= ~NETIF_F_RXALL;
1271 
1272 	/* Disable VLAN filtering by default.  It may be enforced if
1273 	 * the feature is fixed (i.e. VLAN filters are required to
1274 	 * receive VLAN tagged packets due to vPort restrictions).
1275 	 */
1276 	net_dev->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
1277 	net_dev->features |= efx->fixed_features;
1278 
1279 	rc = efx_register_netdev(efx);
1280 	if (!rc)
1281 		return 0;
1282 
1283 	efx_pci_remove_main(efx);
1284 	return rc;
1285 }
1286 
1287 /* NIC initialisation
1288  *
1289  * This is called at module load (or hotplug insertion,
1290  * theoretically).  It sets up PCI mappings, resets the NIC,
1291  * sets up and registers the network devices with the kernel and hooks
1292  * the interrupt service routine.  It does not prepare the device for
1293  * transmission; this is left to the first time one of the network
1294  * interfaces is brought up (i.e. efx_net_open).
1295  */
1296 static int efx_pci_probe(struct pci_dev *pci_dev,
1297 			 const struct pci_device_id *entry)
1298 {
1299 	struct net_device *net_dev;
1300 	struct efx_nic *efx;
1301 	int rc;
1302 
1303 	/* Allocate and initialise a struct net_device and struct efx_nic */
1304 	net_dev = alloc_etherdev_mqs(sizeof(*efx), EFX_MAX_CORE_TX_QUEUES,
1305 				     EFX_MAX_RX_QUEUES);
1306 	if (!net_dev)
1307 		return -ENOMEM;
1308 	efx = netdev_priv(net_dev);
1309 	efx->type = (const struct efx_nic_type *) entry->driver_data;
1310 	efx->fixed_features |= NETIF_F_HIGHDMA;
1311 
1312 	pci_set_drvdata(pci_dev, efx);
1313 	SET_NETDEV_DEV(net_dev, &pci_dev->dev);
1314 	rc = efx_init_struct(efx, pci_dev, net_dev);
1315 	if (rc)
1316 		goto fail1;
1317 
1318 	netif_info(efx, probe, efx->net_dev,
1319 		   "Solarflare NIC detected\n");
1320 
1321 	if (!efx->type->is_vf)
1322 		efx_probe_vpd_strings(efx);
1323 
1324 	/* Set up basic I/O (BAR mappings etc) */
1325 	rc = efx_init_io(efx, efx->type->mem_bar(efx), efx->type->max_dma_mask,
1326 			 efx->type->mem_map_size(efx));
1327 	if (rc)
1328 		goto fail2;
1329 
1330 	rc = efx_pci_probe_post_io(efx);
1331 	if (rc) {
1332 		/* On failure, retry once immediately.
1333 		 * If we aborted probe due to a scheduled reset, dismiss it.
1334 		 */
1335 		efx->reset_pending = 0;
1336 		rc = efx_pci_probe_post_io(efx);
1337 		if (rc) {
1338 			/* On another failure, retry once more
1339 			 * after a 50-305ms delay.
1340 			 */
1341 			unsigned char r;
1342 
1343 			get_random_bytes(&r, 1);
1344 			msleep((unsigned int)r + 50);
1345 			efx->reset_pending = 0;
1346 			rc = efx_pci_probe_post_io(efx);
1347 		}
1348 	}
1349 	if (rc)
1350 		goto fail3;
1351 
1352 	netif_dbg(efx, probe, efx->net_dev, "initialisation successful\n");
1353 
1354 	/* Try to create MTDs, but allow this to fail */
1355 	rtnl_lock();
1356 	rc = efx_mtd_probe(efx);
1357 	rtnl_unlock();
1358 	if (rc && rc != -EPERM)
1359 		netif_warn(efx, probe, efx->net_dev,
1360 			   "failed to create MTDs (%d)\n", rc);
1361 
1362 	(void)pci_enable_pcie_error_reporting(pci_dev);
1363 
1364 	if (efx->type->udp_tnl_push_ports)
1365 		efx->type->udp_tnl_push_ports(efx);
1366 
1367 	return 0;
1368 
1369  fail3:
1370 	efx_fini_io(efx, efx->type->mem_bar(efx));
1371  fail2:
1372 	efx_fini_struct(efx);
1373  fail1:
1374 	WARN_ON(rc > 0);
1375 	netif_dbg(efx, drv, efx->net_dev, "initialisation failed. rc=%d\n", rc);
1376 	free_netdev(net_dev);
1377 	return rc;
1378 }
1379 
1380 /* efx_pci_sriov_configure returns the actual number of Virtual Functions
1381  * enabled on success
1382  */
1383 #ifdef CONFIG_SFC_SRIOV
1384 static int efx_pci_sriov_configure(struct pci_dev *dev, int num_vfs)
1385 {
1386 	int rc;
1387 	struct efx_nic *efx = pci_get_drvdata(dev);
1388 
1389 	if (efx->type->sriov_configure) {
1390 		rc = efx->type->sriov_configure(efx, num_vfs);
1391 		if (rc)
1392 			return rc;
1393 		else
1394 			return num_vfs;
1395 	} else
1396 		return -EOPNOTSUPP;
1397 }
1398 #endif
1399 
1400 static int efx_pm_freeze(struct device *dev)
1401 {
1402 	struct efx_nic *efx = dev_get_drvdata(dev);
1403 
1404 	rtnl_lock();
1405 
1406 	if (efx->state != STATE_DISABLED) {
1407 		efx->state = STATE_UNINIT;
1408 
1409 		efx_device_detach_sync(efx);
1410 
1411 		efx_stop_all(efx);
1412 		efx_disable_interrupts(efx);
1413 	}
1414 
1415 	rtnl_unlock();
1416 
1417 	return 0;
1418 }
1419 
1420 static int efx_pm_thaw(struct device *dev)
1421 {
1422 	int rc;
1423 	struct efx_nic *efx = dev_get_drvdata(dev);
1424 
1425 	rtnl_lock();
1426 
1427 	if (efx->state != STATE_DISABLED) {
1428 		rc = efx_enable_interrupts(efx);
1429 		if (rc)
1430 			goto fail;
1431 
1432 		mutex_lock(&efx->mac_lock);
1433 		efx->phy_op->reconfigure(efx);
1434 		mutex_unlock(&efx->mac_lock);
1435 
1436 		efx_start_all(efx);
1437 
1438 		efx_device_attach_if_not_resetting(efx);
1439 
1440 		efx->state = STATE_READY;
1441 
1442 		efx->type->resume_wol(efx);
1443 	}
1444 
1445 	rtnl_unlock();
1446 
1447 	/* Reschedule any quenched resets scheduled during efx_pm_freeze() */
1448 	efx_queue_reset_work(efx);
1449 
1450 	return 0;
1451 
1452 fail:
1453 	rtnl_unlock();
1454 
1455 	return rc;
1456 }
1457 
1458 static int efx_pm_poweroff(struct device *dev)
1459 {
1460 	struct pci_dev *pci_dev = to_pci_dev(dev);
1461 	struct efx_nic *efx = pci_get_drvdata(pci_dev);
1462 
1463 	efx->type->fini(efx);
1464 
1465 	efx->reset_pending = 0;
1466 
1467 	pci_save_state(pci_dev);
1468 	return pci_set_power_state(pci_dev, PCI_D3hot);
1469 }
1470 
1471 /* Used for both resume and restore */
1472 static int efx_pm_resume(struct device *dev)
1473 {
1474 	struct pci_dev *pci_dev = to_pci_dev(dev);
1475 	struct efx_nic *efx = pci_get_drvdata(pci_dev);
1476 	int rc;
1477 
1478 	rc = pci_set_power_state(pci_dev, PCI_D0);
1479 	if (rc)
1480 		return rc;
1481 	pci_restore_state(pci_dev);
1482 	rc = pci_enable_device(pci_dev);
1483 	if (rc)
1484 		return rc;
1485 	pci_set_master(efx->pci_dev);
1486 	rc = efx->type->reset(efx, RESET_TYPE_ALL);
1487 	if (rc)
1488 		return rc;
1489 	down_write(&efx->filter_sem);
1490 	rc = efx->type->init(efx);
1491 	up_write(&efx->filter_sem);
1492 	if (rc)
1493 		return rc;
1494 	rc = efx_pm_thaw(dev);
1495 	return rc;
1496 }
1497 
1498 static int efx_pm_suspend(struct device *dev)
1499 {
1500 	int rc;
1501 
1502 	efx_pm_freeze(dev);
1503 	rc = efx_pm_poweroff(dev);
1504 	if (rc)
1505 		efx_pm_resume(dev);
1506 	return rc;
1507 }
1508 
1509 static const struct dev_pm_ops efx_pm_ops = {
1510 	.suspend	= efx_pm_suspend,
1511 	.resume		= efx_pm_resume,
1512 	.freeze		= efx_pm_freeze,
1513 	.thaw		= efx_pm_thaw,
1514 	.poweroff	= efx_pm_poweroff,
1515 	.restore	= efx_pm_resume,
1516 };
1517 
1518 /* A PCI error affecting this device was detected.
1519  * At this point MMIO and DMA may be disabled.
1520  * Stop the software path and request a slot reset.
1521  */
1522 static pci_ers_result_t efx_io_error_detected(struct pci_dev *pdev,
1523 					      enum pci_channel_state state)
1524 {
1525 	pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED;
1526 	struct efx_nic *efx = pci_get_drvdata(pdev);
1527 
1528 	if (state == pci_channel_io_perm_failure)
1529 		return PCI_ERS_RESULT_DISCONNECT;
1530 
1531 	rtnl_lock();
1532 
1533 	if (efx->state != STATE_DISABLED) {
1534 		efx->state = STATE_RECOVERY;
1535 		efx->reset_pending = 0;
1536 
1537 		efx_device_detach_sync(efx);
1538 
1539 		efx_stop_all(efx);
1540 		efx_disable_interrupts(efx);
1541 
1542 		status = PCI_ERS_RESULT_NEED_RESET;
1543 	} else {
1544 		/* If the interface is disabled we don't want to do anything
1545 		 * with it.
1546 		 */
1547 		status = PCI_ERS_RESULT_RECOVERED;
1548 	}
1549 
1550 	rtnl_unlock();
1551 
1552 	pci_disable_device(pdev);
1553 
1554 	return status;
1555 }
1556 
1557 /* Fake a successful reset, which will be performed later in efx_io_resume. */
1558 static pci_ers_result_t efx_io_slot_reset(struct pci_dev *pdev)
1559 {
1560 	struct efx_nic *efx = pci_get_drvdata(pdev);
1561 	pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED;
1562 
1563 	if (pci_enable_device(pdev)) {
1564 		netif_err(efx, hw, efx->net_dev,
1565 			  "Cannot re-enable PCI device after reset.\n");
1566 		status =  PCI_ERS_RESULT_DISCONNECT;
1567 	}
1568 
1569 	return status;
1570 }
1571 
1572 /* Perform the actual reset and resume I/O operations. */
1573 static void efx_io_resume(struct pci_dev *pdev)
1574 {
1575 	struct efx_nic *efx = pci_get_drvdata(pdev);
1576 	int rc;
1577 
1578 	rtnl_lock();
1579 
1580 	if (efx->state == STATE_DISABLED)
1581 		goto out;
1582 
1583 	rc = efx_reset(efx, RESET_TYPE_ALL);
1584 	if (rc) {
1585 		netif_err(efx, hw, efx->net_dev,
1586 			  "efx_reset failed after PCI error (%d)\n", rc);
1587 	} else {
1588 		efx->state = STATE_READY;
1589 		netif_dbg(efx, hw, efx->net_dev,
1590 			  "Done resetting and resuming IO after PCI error.\n");
1591 	}
1592 
1593 out:
1594 	rtnl_unlock();
1595 }
1596 
1597 /* For simplicity and reliability, we always require a slot reset and try to
1598  * reset the hardware when a pci error affecting the device is detected.
1599  * We leave both the link_reset and mmio_enabled callback unimplemented:
1600  * with our request for slot reset the mmio_enabled callback will never be
1601  * called, and the link_reset callback is not used by AER or EEH mechanisms.
1602  */
1603 static const struct pci_error_handlers efx_err_handlers = {
1604 	.error_detected = efx_io_error_detected,
1605 	.slot_reset	= efx_io_slot_reset,
1606 	.resume		= efx_io_resume,
1607 };
1608 
1609 static struct pci_driver efx_pci_driver = {
1610 	.name		= KBUILD_MODNAME,
1611 	.id_table	= efx_pci_table,
1612 	.probe		= efx_pci_probe,
1613 	.remove		= efx_pci_remove,
1614 	.driver.pm	= &efx_pm_ops,
1615 	.err_handler	= &efx_err_handlers,
1616 #ifdef CONFIG_SFC_SRIOV
1617 	.sriov_configure = efx_pci_sriov_configure,
1618 #endif
1619 };
1620 
1621 /**************************************************************************
1622  *
1623  * Kernel module interface
1624  *
1625  *************************************************************************/
1626 
1627 static int __init efx_init_module(void)
1628 {
1629 	int rc;
1630 
1631 	printk(KERN_INFO "Solarflare NET driver v" EFX_DRIVER_VERSION "\n");
1632 
1633 	rc = register_netdevice_notifier(&efx_netdev_notifier);
1634 	if (rc)
1635 		goto err_notifier;
1636 
1637 #ifdef CONFIG_SFC_SRIOV
1638 	rc = efx_init_sriov();
1639 	if (rc)
1640 		goto err_sriov;
1641 #endif
1642 
1643 	rc = efx_create_reset_workqueue();
1644 	if (rc)
1645 		goto err_reset;
1646 
1647 	rc = pci_register_driver(&efx_pci_driver);
1648 	if (rc < 0)
1649 		goto err_pci;
1650 
1651 	return 0;
1652 
1653  err_pci:
1654 	efx_destroy_reset_workqueue();
1655  err_reset:
1656 #ifdef CONFIG_SFC_SRIOV
1657 	efx_fini_sriov();
1658  err_sriov:
1659 #endif
1660 	unregister_netdevice_notifier(&efx_netdev_notifier);
1661  err_notifier:
1662 	return rc;
1663 }
1664 
1665 static void __exit efx_exit_module(void)
1666 {
1667 	printk(KERN_INFO "Solarflare NET driver unloading\n");
1668 
1669 	pci_unregister_driver(&efx_pci_driver);
1670 	efx_destroy_reset_workqueue();
1671 #ifdef CONFIG_SFC_SRIOV
1672 	efx_fini_sriov();
1673 #endif
1674 	unregister_netdevice_notifier(&efx_netdev_notifier);
1675 
1676 }
1677 
1678 module_init(efx_init_module);
1679 module_exit(efx_exit_module);
1680 
1681 MODULE_AUTHOR("Solarflare Communications and "
1682 	      "Michael Brown <mbrown@fensystems.co.uk>");
1683 MODULE_DESCRIPTION("Solarflare network driver");
1684 MODULE_LICENSE("GPL");
1685 MODULE_DEVICE_TABLE(pci, efx_pci_table);
1686 MODULE_VERSION(EFX_DRIVER_VERSION);
1687