xref: /openbmc/linux/drivers/net/ethernet/sfc/efx.c (revision 11357f10)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /****************************************************************************
3  * Driver for Solarflare network controllers and boards
4  * Copyright 2005-2006 Fen Systems Ltd.
5  * Copyright 2005-2013 Solarflare Communications Inc.
6  */
7 
8 #include <linux/filter.h>
9 #include <linux/module.h>
10 #include <linux/pci.h>
11 #include <linux/netdevice.h>
12 #include <linux/etherdevice.h>
13 #include <linux/delay.h>
14 #include <linux/notifier.h>
15 #include <linux/ip.h>
16 #include <linux/tcp.h>
17 #include <linux/in.h>
18 #include <linux/ethtool.h>
19 #include <linux/topology.h>
20 #include <linux/gfp.h>
21 #include <linux/aer.h>
22 #include <linux/interrupt.h>
23 #include "net_driver.h"
24 #include <net/gre.h>
25 #include <net/udp_tunnel.h>
26 #include "efx.h"
27 #include "efx_common.h"
28 #include "efx_channels.h"
29 #include "ef100.h"
30 #include "rx_common.h"
31 #include "tx_common.h"
32 #include "nic.h"
33 #include "io.h"
34 #include "selftest.h"
35 #include "sriov.h"
36 
37 #include "mcdi_port_common.h"
38 #include "mcdi_pcol.h"
39 #include "workarounds.h"
40 
41 /**************************************************************************
42  *
43  * Configurable values
44  *
45  *************************************************************************/
46 
47 module_param_named(interrupt_mode, efx_interrupt_mode, uint, 0444);
48 MODULE_PARM_DESC(interrupt_mode,
49 		 "Interrupt mode (0=>MSIX 1=>MSI 2=>legacy)");
50 
51 module_param(rss_cpus, uint, 0444);
52 MODULE_PARM_DESC(rss_cpus, "Number of CPUs to use for Receive-Side Scaling");
53 
54 /*
55  * Use separate channels for TX and RX events
56  *
57  * Set this to 1 to use separate channels for TX and RX. It allows us
58  * to control interrupt affinity separately for TX and RX.
59  *
60  * This is only used in MSI-X interrupt mode
61  */
62 bool efx_separate_tx_channels;
63 module_param(efx_separate_tx_channels, bool, 0444);
64 MODULE_PARM_DESC(efx_separate_tx_channels,
65 		 "Use separate channels for TX and RX");
66 
67 /* Initial interrupt moderation settings.  They can be modified after
68  * module load with ethtool.
69  *
70  * The default for RX should strike a balance between increasing the
71  * round-trip latency and reducing overhead.
72  */
73 static unsigned int rx_irq_mod_usec = 60;
74 
75 /* Initial interrupt moderation settings.  They can be modified after
76  * module load with ethtool.
77  *
78  * This default is chosen to ensure that a 10G link does not go idle
79  * while a TX queue is stopped after it has become full.  A queue is
80  * restarted when it drops below half full.  The time this takes (assuming
81  * worst case 3 descriptors per packet and 1024 descriptors) is
82  *   512 / 3 * 1.2 = 205 usec.
83  */
84 static unsigned int tx_irq_mod_usec = 150;
85 
86 static bool phy_flash_cfg;
87 module_param(phy_flash_cfg, bool, 0644);
88 MODULE_PARM_DESC(phy_flash_cfg, "Set PHYs into reflash mode initially");
89 
90 static unsigned debug = (NETIF_MSG_DRV | NETIF_MSG_PROBE |
91 			 NETIF_MSG_LINK | NETIF_MSG_IFDOWN |
92 			 NETIF_MSG_IFUP | NETIF_MSG_RX_ERR |
93 			 NETIF_MSG_TX_ERR | NETIF_MSG_HW);
94 module_param(debug, uint, 0);
95 MODULE_PARM_DESC(debug, "Bitmapped debugging message enable value");
96 
97 /**************************************************************************
98  *
99  * Utility functions and prototypes
100  *
101  *************************************************************************/
102 
103 static void efx_remove_port(struct efx_nic *efx);
104 static int efx_xdp_setup_prog(struct efx_nic *efx, struct bpf_prog *prog);
105 static int efx_xdp(struct net_device *dev, struct netdev_bpf *xdp);
106 static int efx_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **xdpfs,
107 			u32 flags);
108 
109 #define EFX_ASSERT_RESET_SERIALISED(efx)		\
110 	do {						\
111 		if ((efx->state == STATE_READY) ||	\
112 		    (efx->state == STATE_RECOVERY) ||	\
113 		    (efx->state == STATE_DISABLED))	\
114 			ASSERT_RTNL();			\
115 	} while (0)
116 
117 /**************************************************************************
118  *
119  * Port handling
120  *
121  **************************************************************************/
122 
123 static void efx_fini_port(struct efx_nic *efx);
124 
125 static int efx_probe_port(struct efx_nic *efx)
126 {
127 	int rc;
128 
129 	netif_dbg(efx, probe, efx->net_dev, "create port\n");
130 
131 	if (phy_flash_cfg)
132 		efx->phy_mode = PHY_MODE_SPECIAL;
133 
134 	/* Connect up MAC/PHY operations table */
135 	rc = efx->type->probe_port(efx);
136 	if (rc)
137 		return rc;
138 
139 	/* Initialise MAC address to permanent address */
140 	eth_hw_addr_set(efx->net_dev, efx->net_dev->perm_addr);
141 
142 	return 0;
143 }
144 
145 static int efx_init_port(struct efx_nic *efx)
146 {
147 	int rc;
148 
149 	netif_dbg(efx, drv, efx->net_dev, "init port\n");
150 
151 	mutex_lock(&efx->mac_lock);
152 
153 	efx->port_initialized = true;
154 
155 	/* Ensure the PHY advertises the correct flow control settings */
156 	rc = efx_mcdi_port_reconfigure(efx);
157 	if (rc && rc != -EPERM)
158 		goto fail;
159 
160 	mutex_unlock(&efx->mac_lock);
161 	return 0;
162 
163 fail:
164 	mutex_unlock(&efx->mac_lock);
165 	return rc;
166 }
167 
168 static void efx_fini_port(struct efx_nic *efx)
169 {
170 	netif_dbg(efx, drv, efx->net_dev, "shut down port\n");
171 
172 	if (!efx->port_initialized)
173 		return;
174 
175 	efx->port_initialized = false;
176 
177 	efx->link_state.up = false;
178 	efx_link_status_changed(efx);
179 }
180 
181 static void efx_remove_port(struct efx_nic *efx)
182 {
183 	netif_dbg(efx, drv, efx->net_dev, "destroying port\n");
184 
185 	efx->type->remove_port(efx);
186 }
187 
188 /**************************************************************************
189  *
190  * NIC handling
191  *
192  **************************************************************************/
193 
194 static LIST_HEAD(efx_primary_list);
195 static LIST_HEAD(efx_unassociated_list);
196 
197 static bool efx_same_controller(struct efx_nic *left, struct efx_nic *right)
198 {
199 	return left->type == right->type &&
200 		left->vpd_sn && right->vpd_sn &&
201 		!strcmp(left->vpd_sn, right->vpd_sn);
202 }
203 
204 static void efx_associate(struct efx_nic *efx)
205 {
206 	struct efx_nic *other, *next;
207 
208 	if (efx->primary == efx) {
209 		/* Adding primary function; look for secondaries */
210 
211 		netif_dbg(efx, probe, efx->net_dev, "adding to primary list\n");
212 		list_add_tail(&efx->node, &efx_primary_list);
213 
214 		list_for_each_entry_safe(other, next, &efx_unassociated_list,
215 					 node) {
216 			if (efx_same_controller(efx, other)) {
217 				list_del(&other->node);
218 				netif_dbg(other, probe, other->net_dev,
219 					  "moving to secondary list of %s %s\n",
220 					  pci_name(efx->pci_dev),
221 					  efx->net_dev->name);
222 				list_add_tail(&other->node,
223 					      &efx->secondary_list);
224 				other->primary = efx;
225 			}
226 		}
227 	} else {
228 		/* Adding secondary function; look for primary */
229 
230 		list_for_each_entry(other, &efx_primary_list, node) {
231 			if (efx_same_controller(efx, other)) {
232 				netif_dbg(efx, probe, efx->net_dev,
233 					  "adding to secondary list of %s %s\n",
234 					  pci_name(other->pci_dev),
235 					  other->net_dev->name);
236 				list_add_tail(&efx->node,
237 					      &other->secondary_list);
238 				efx->primary = other;
239 				return;
240 			}
241 		}
242 
243 		netif_dbg(efx, probe, efx->net_dev,
244 			  "adding to unassociated list\n");
245 		list_add_tail(&efx->node, &efx_unassociated_list);
246 	}
247 }
248 
249 static void efx_dissociate(struct efx_nic *efx)
250 {
251 	struct efx_nic *other, *next;
252 
253 	list_del(&efx->node);
254 	efx->primary = NULL;
255 
256 	list_for_each_entry_safe(other, next, &efx->secondary_list, node) {
257 		list_del(&other->node);
258 		netif_dbg(other, probe, other->net_dev,
259 			  "moving to unassociated list\n");
260 		list_add_tail(&other->node, &efx_unassociated_list);
261 		other->primary = NULL;
262 	}
263 }
264 
265 static int efx_probe_nic(struct efx_nic *efx)
266 {
267 	int rc;
268 
269 	netif_dbg(efx, probe, efx->net_dev, "creating NIC\n");
270 
271 	/* Carry out hardware-type specific initialisation */
272 	rc = efx->type->probe(efx);
273 	if (rc)
274 		return rc;
275 
276 	do {
277 		if (!efx->max_channels || !efx->max_tx_channels) {
278 			netif_err(efx, drv, efx->net_dev,
279 				  "Insufficient resources to allocate"
280 				  " any channels\n");
281 			rc = -ENOSPC;
282 			goto fail1;
283 		}
284 
285 		/* Determine the number of channels and queues by trying
286 		 * to hook in MSI-X interrupts.
287 		 */
288 		rc = efx_probe_interrupts(efx);
289 		if (rc)
290 			goto fail1;
291 
292 		rc = efx_set_channels(efx);
293 		if (rc)
294 			goto fail1;
295 
296 		/* dimension_resources can fail with EAGAIN */
297 		rc = efx->type->dimension_resources(efx);
298 		if (rc != 0 && rc != -EAGAIN)
299 			goto fail2;
300 
301 		if (rc == -EAGAIN)
302 			/* try again with new max_channels */
303 			efx_remove_interrupts(efx);
304 
305 	} while (rc == -EAGAIN);
306 
307 	if (efx->n_channels > 1)
308 		netdev_rss_key_fill(efx->rss_context.rx_hash_key,
309 				    sizeof(efx->rss_context.rx_hash_key));
310 	efx_set_default_rx_indir_table(efx, &efx->rss_context);
311 
312 	/* Initialise the interrupt moderation settings */
313 	efx->irq_mod_step_us = DIV_ROUND_UP(efx->timer_quantum_ns, 1000);
314 	efx_init_irq_moderation(efx, tx_irq_mod_usec, rx_irq_mod_usec, true,
315 				true);
316 
317 	return 0;
318 
319 fail2:
320 	efx_remove_interrupts(efx);
321 fail1:
322 	efx->type->remove(efx);
323 	return rc;
324 }
325 
326 static void efx_remove_nic(struct efx_nic *efx)
327 {
328 	netif_dbg(efx, drv, efx->net_dev, "destroying NIC\n");
329 
330 	efx_remove_interrupts(efx);
331 	efx->type->remove(efx);
332 }
333 
334 /**************************************************************************
335  *
336  * NIC startup/shutdown
337  *
338  *************************************************************************/
339 
340 static int efx_probe_all(struct efx_nic *efx)
341 {
342 	int rc;
343 
344 	rc = efx_probe_nic(efx);
345 	if (rc) {
346 		netif_err(efx, probe, efx->net_dev, "failed to create NIC\n");
347 		goto fail1;
348 	}
349 
350 	rc = efx_probe_port(efx);
351 	if (rc) {
352 		netif_err(efx, probe, efx->net_dev, "failed to create port\n");
353 		goto fail2;
354 	}
355 
356 	BUILD_BUG_ON(EFX_DEFAULT_DMAQ_SIZE < EFX_RXQ_MIN_ENT);
357 	if (WARN_ON(EFX_DEFAULT_DMAQ_SIZE < EFX_TXQ_MIN_ENT(efx))) {
358 		rc = -EINVAL;
359 		goto fail3;
360 	}
361 
362 #ifdef CONFIG_SFC_SRIOV
363 	rc = efx->type->vswitching_probe(efx);
364 	if (rc) /* not fatal; the PF will still work fine */
365 		netif_warn(efx, probe, efx->net_dev,
366 			   "failed to setup vswitching rc=%d;"
367 			   " VFs may not function\n", rc);
368 #endif
369 
370 	rc = efx_probe_filters(efx);
371 	if (rc) {
372 		netif_err(efx, probe, efx->net_dev,
373 			  "failed to create filter tables\n");
374 		goto fail4;
375 	}
376 
377 	rc = efx_probe_channels(efx);
378 	if (rc)
379 		goto fail5;
380 
381 	return 0;
382 
383  fail5:
384 	efx_remove_filters(efx);
385  fail4:
386 #ifdef CONFIG_SFC_SRIOV
387 	efx->type->vswitching_remove(efx);
388 #endif
389  fail3:
390 	efx_remove_port(efx);
391  fail2:
392 	efx_remove_nic(efx);
393  fail1:
394 	return rc;
395 }
396 
397 static void efx_remove_all(struct efx_nic *efx)
398 {
399 	rtnl_lock();
400 	efx_xdp_setup_prog(efx, NULL);
401 	rtnl_unlock();
402 
403 	efx_remove_channels(efx);
404 	efx_remove_filters(efx);
405 #ifdef CONFIG_SFC_SRIOV
406 	efx->type->vswitching_remove(efx);
407 #endif
408 	efx_remove_port(efx);
409 	efx_remove_nic(efx);
410 }
411 
412 /**************************************************************************
413  *
414  * Interrupt moderation
415  *
416  **************************************************************************/
417 unsigned int efx_usecs_to_ticks(struct efx_nic *efx, unsigned int usecs)
418 {
419 	if (usecs == 0)
420 		return 0;
421 	if (usecs * 1000 < efx->timer_quantum_ns)
422 		return 1; /* never round down to 0 */
423 	return usecs * 1000 / efx->timer_quantum_ns;
424 }
425 
426 unsigned int efx_ticks_to_usecs(struct efx_nic *efx, unsigned int ticks)
427 {
428 	/* We must round up when converting ticks to microseconds
429 	 * because we round down when converting the other way.
430 	 */
431 	return DIV_ROUND_UP(ticks * efx->timer_quantum_ns, 1000);
432 }
433 
434 /* Set interrupt moderation parameters */
435 int efx_init_irq_moderation(struct efx_nic *efx, unsigned int tx_usecs,
436 			    unsigned int rx_usecs, bool rx_adaptive,
437 			    bool rx_may_override_tx)
438 {
439 	struct efx_channel *channel;
440 	unsigned int timer_max_us;
441 
442 	EFX_ASSERT_RESET_SERIALISED(efx);
443 
444 	timer_max_us = efx->timer_max_ns / 1000;
445 
446 	if (tx_usecs > timer_max_us || rx_usecs > timer_max_us)
447 		return -EINVAL;
448 
449 	if (tx_usecs != rx_usecs && efx->tx_channel_offset == 0 &&
450 	    !rx_may_override_tx) {
451 		netif_err(efx, drv, efx->net_dev, "Channels are shared. "
452 			  "RX and TX IRQ moderation must be equal\n");
453 		return -EINVAL;
454 	}
455 
456 	efx->irq_rx_adaptive = rx_adaptive;
457 	efx->irq_rx_moderation_us = rx_usecs;
458 	efx_for_each_channel(channel, efx) {
459 		if (efx_channel_has_rx_queue(channel))
460 			channel->irq_moderation_us = rx_usecs;
461 		else if (efx_channel_has_tx_queues(channel))
462 			channel->irq_moderation_us = tx_usecs;
463 		else if (efx_channel_is_xdp_tx(channel))
464 			channel->irq_moderation_us = tx_usecs;
465 	}
466 
467 	return 0;
468 }
469 
470 void efx_get_irq_moderation(struct efx_nic *efx, unsigned int *tx_usecs,
471 			    unsigned int *rx_usecs, bool *rx_adaptive)
472 {
473 	*rx_adaptive = efx->irq_rx_adaptive;
474 	*rx_usecs = efx->irq_rx_moderation_us;
475 
476 	/* If channels are shared between RX and TX, so is IRQ
477 	 * moderation.  Otherwise, IRQ moderation is the same for all
478 	 * TX channels and is not adaptive.
479 	 */
480 	if (efx->tx_channel_offset == 0) {
481 		*tx_usecs = *rx_usecs;
482 	} else {
483 		struct efx_channel *tx_channel;
484 
485 		tx_channel = efx->channel[efx->tx_channel_offset];
486 		*tx_usecs = tx_channel->irq_moderation_us;
487 	}
488 }
489 
490 /**************************************************************************
491  *
492  * ioctls
493  *
494  *************************************************************************/
495 
496 /* Net device ioctl
497  * Context: process, rtnl_lock() held.
498  */
499 static int efx_ioctl(struct net_device *net_dev, struct ifreq *ifr, int cmd)
500 {
501 	struct efx_nic *efx = netdev_priv(net_dev);
502 	struct mii_ioctl_data *data = if_mii(ifr);
503 
504 	if (cmd == SIOCSHWTSTAMP)
505 		return efx_ptp_set_ts_config(efx, ifr);
506 	if (cmd == SIOCGHWTSTAMP)
507 		return efx_ptp_get_ts_config(efx, ifr);
508 
509 	/* Convert phy_id from older PRTAD/DEVAD format */
510 	if ((cmd == SIOCGMIIREG || cmd == SIOCSMIIREG) &&
511 	    (data->phy_id & 0xfc00) == 0x0400)
512 		data->phy_id ^= MDIO_PHY_ID_C45 | 0x0400;
513 
514 	return mdio_mii_ioctl(&efx->mdio, data, cmd);
515 }
516 
517 /**************************************************************************
518  *
519  * Kernel net device interface
520  *
521  *************************************************************************/
522 
523 /* Context: process, rtnl_lock() held. */
524 int efx_net_open(struct net_device *net_dev)
525 {
526 	struct efx_nic *efx = netdev_priv(net_dev);
527 	int rc;
528 
529 	netif_dbg(efx, ifup, efx->net_dev, "opening device on CPU %d\n",
530 		  raw_smp_processor_id());
531 
532 	rc = efx_check_disabled(efx);
533 	if (rc)
534 		return rc;
535 	if (efx->phy_mode & PHY_MODE_SPECIAL)
536 		return -EBUSY;
537 	if (efx_mcdi_poll_reboot(efx) && efx_reset(efx, RESET_TYPE_ALL))
538 		return -EIO;
539 
540 	/* Notify the kernel of the link state polled during driver load,
541 	 * before the monitor starts running */
542 	efx_link_status_changed(efx);
543 
544 	efx_start_all(efx);
545 	if (efx->state == STATE_DISABLED || efx->reset_pending)
546 		netif_device_detach(efx->net_dev);
547 	efx_selftest_async_start(efx);
548 	return 0;
549 }
550 
551 /* Context: process, rtnl_lock() held.
552  * Note that the kernel will ignore our return code; this method
553  * should really be a void.
554  */
555 int efx_net_stop(struct net_device *net_dev)
556 {
557 	struct efx_nic *efx = netdev_priv(net_dev);
558 
559 	netif_dbg(efx, ifdown, efx->net_dev, "closing on CPU %d\n",
560 		  raw_smp_processor_id());
561 
562 	/* Stop the device and flush all the channels */
563 	efx_stop_all(efx);
564 
565 	return 0;
566 }
567 
568 static int efx_vlan_rx_add_vid(struct net_device *net_dev, __be16 proto, u16 vid)
569 {
570 	struct efx_nic *efx = netdev_priv(net_dev);
571 
572 	if (efx->type->vlan_rx_add_vid)
573 		return efx->type->vlan_rx_add_vid(efx, proto, vid);
574 	else
575 		return -EOPNOTSUPP;
576 }
577 
578 static int efx_vlan_rx_kill_vid(struct net_device *net_dev, __be16 proto, u16 vid)
579 {
580 	struct efx_nic *efx = netdev_priv(net_dev);
581 
582 	if (efx->type->vlan_rx_kill_vid)
583 		return efx->type->vlan_rx_kill_vid(efx, proto, vid);
584 	else
585 		return -EOPNOTSUPP;
586 }
587 
588 static const struct net_device_ops efx_netdev_ops = {
589 	.ndo_open		= efx_net_open,
590 	.ndo_stop		= efx_net_stop,
591 	.ndo_get_stats64	= efx_net_stats,
592 	.ndo_tx_timeout		= efx_watchdog,
593 	.ndo_start_xmit		= efx_hard_start_xmit,
594 	.ndo_validate_addr	= eth_validate_addr,
595 	.ndo_eth_ioctl		= efx_ioctl,
596 	.ndo_change_mtu		= efx_change_mtu,
597 	.ndo_set_mac_address	= efx_set_mac_address,
598 	.ndo_set_rx_mode	= efx_set_rx_mode,
599 	.ndo_set_features	= efx_set_features,
600 	.ndo_features_check	= efx_features_check,
601 	.ndo_vlan_rx_add_vid	= efx_vlan_rx_add_vid,
602 	.ndo_vlan_rx_kill_vid	= efx_vlan_rx_kill_vid,
603 #ifdef CONFIG_SFC_SRIOV
604 	.ndo_set_vf_mac		= efx_sriov_set_vf_mac,
605 	.ndo_set_vf_vlan	= efx_sriov_set_vf_vlan,
606 	.ndo_set_vf_spoofchk	= efx_sriov_set_vf_spoofchk,
607 	.ndo_get_vf_config	= efx_sriov_get_vf_config,
608 	.ndo_set_vf_link_state  = efx_sriov_set_vf_link_state,
609 #endif
610 	.ndo_get_phys_port_id   = efx_get_phys_port_id,
611 	.ndo_get_phys_port_name	= efx_get_phys_port_name,
612 	.ndo_setup_tc		= efx_setup_tc,
613 #ifdef CONFIG_RFS_ACCEL
614 	.ndo_rx_flow_steer	= efx_filter_rfs,
615 #endif
616 	.ndo_xdp_xmit		= efx_xdp_xmit,
617 	.ndo_bpf		= efx_xdp
618 };
619 
620 static int efx_xdp_setup_prog(struct efx_nic *efx, struct bpf_prog *prog)
621 {
622 	struct bpf_prog *old_prog;
623 
624 	if (efx->xdp_rxq_info_failed) {
625 		netif_err(efx, drv, efx->net_dev,
626 			  "Unable to bind XDP program due to previous failure of rxq_info\n");
627 		return -EINVAL;
628 	}
629 
630 	if (prog && efx->net_dev->mtu > efx_xdp_max_mtu(efx)) {
631 		netif_err(efx, drv, efx->net_dev,
632 			  "Unable to configure XDP with MTU of %d (max: %d)\n",
633 			  efx->net_dev->mtu, efx_xdp_max_mtu(efx));
634 		return -EINVAL;
635 	}
636 
637 	old_prog = rtnl_dereference(efx->xdp_prog);
638 	rcu_assign_pointer(efx->xdp_prog, prog);
639 	/* Release the reference that was originally passed by the caller. */
640 	if (old_prog)
641 		bpf_prog_put(old_prog);
642 
643 	return 0;
644 }
645 
646 /* Context: process, rtnl_lock() held. */
647 static int efx_xdp(struct net_device *dev, struct netdev_bpf *xdp)
648 {
649 	struct efx_nic *efx = netdev_priv(dev);
650 
651 	switch (xdp->command) {
652 	case XDP_SETUP_PROG:
653 		return efx_xdp_setup_prog(efx, xdp->prog);
654 	default:
655 		return -EINVAL;
656 	}
657 }
658 
659 static int efx_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **xdpfs,
660 			u32 flags)
661 {
662 	struct efx_nic *efx = netdev_priv(dev);
663 
664 	if (!netif_running(dev))
665 		return -EINVAL;
666 
667 	return efx_xdp_tx_buffers(efx, n, xdpfs, flags & XDP_XMIT_FLUSH);
668 }
669 
670 static void efx_update_name(struct efx_nic *efx)
671 {
672 	strcpy(efx->name, efx->net_dev->name);
673 	efx_mtd_rename(efx);
674 	efx_set_channel_names(efx);
675 }
676 
677 static int efx_netdev_event(struct notifier_block *this,
678 			    unsigned long event, void *ptr)
679 {
680 	struct net_device *net_dev = netdev_notifier_info_to_dev(ptr);
681 
682 	if ((net_dev->netdev_ops == &efx_netdev_ops) &&
683 	    event == NETDEV_CHANGENAME)
684 		efx_update_name(netdev_priv(net_dev));
685 
686 	return NOTIFY_DONE;
687 }
688 
689 static struct notifier_block efx_netdev_notifier = {
690 	.notifier_call = efx_netdev_event,
691 };
692 
693 static ssize_t phy_type_show(struct device *dev,
694 			     struct device_attribute *attr, char *buf)
695 {
696 	struct efx_nic *efx = dev_get_drvdata(dev);
697 	return sprintf(buf, "%d\n", efx->phy_type);
698 }
699 static DEVICE_ATTR_RO(phy_type);
700 
701 static int efx_register_netdev(struct efx_nic *efx)
702 {
703 	struct net_device *net_dev = efx->net_dev;
704 	struct efx_channel *channel;
705 	int rc;
706 
707 	net_dev->watchdog_timeo = 5 * HZ;
708 	net_dev->irq = efx->pci_dev->irq;
709 	net_dev->netdev_ops = &efx_netdev_ops;
710 	if (efx_nic_rev(efx) >= EFX_REV_HUNT_A0)
711 		net_dev->priv_flags |= IFF_UNICAST_FLT;
712 	net_dev->ethtool_ops = &efx_ethtool_ops;
713 	netif_set_tso_max_segs(net_dev, EFX_TSO_MAX_SEGS);
714 	net_dev->min_mtu = EFX_MIN_MTU;
715 	net_dev->max_mtu = EFX_MAX_MTU;
716 
717 	rtnl_lock();
718 
719 	/* Enable resets to be scheduled and check whether any were
720 	 * already requested.  If so, the NIC is probably hosed so we
721 	 * abort.
722 	 */
723 	efx->state = STATE_READY;
724 	smp_mb(); /* ensure we change state before checking reset_pending */
725 	if (efx->reset_pending) {
726 		pci_err(efx->pci_dev, "aborting probe due to scheduled reset\n");
727 		rc = -EIO;
728 		goto fail_locked;
729 	}
730 
731 	rc = dev_alloc_name(net_dev, net_dev->name);
732 	if (rc < 0)
733 		goto fail_locked;
734 	efx_update_name(efx);
735 
736 	/* Always start with carrier off; PHY events will detect the link */
737 	netif_carrier_off(net_dev);
738 
739 	rc = register_netdevice(net_dev);
740 	if (rc)
741 		goto fail_locked;
742 
743 	efx_for_each_channel(channel, efx) {
744 		struct efx_tx_queue *tx_queue;
745 		efx_for_each_channel_tx_queue(tx_queue, channel)
746 			efx_init_tx_queue_core_txq(tx_queue);
747 	}
748 
749 	efx_associate(efx);
750 
751 	rtnl_unlock();
752 
753 	rc = device_create_file(&efx->pci_dev->dev, &dev_attr_phy_type);
754 	if (rc) {
755 		netif_err(efx, drv, efx->net_dev,
756 			  "failed to init net dev attributes\n");
757 		goto fail_registered;
758 	}
759 
760 	efx_init_mcdi_logging(efx);
761 
762 	return 0;
763 
764 fail_registered:
765 	rtnl_lock();
766 	efx_dissociate(efx);
767 	unregister_netdevice(net_dev);
768 fail_locked:
769 	efx->state = STATE_UNINIT;
770 	rtnl_unlock();
771 	netif_err(efx, drv, efx->net_dev, "could not register net dev\n");
772 	return rc;
773 }
774 
775 static void efx_unregister_netdev(struct efx_nic *efx)
776 {
777 	if (!efx->net_dev)
778 		return;
779 
780 	BUG_ON(netdev_priv(efx->net_dev) != efx);
781 
782 	if (efx_dev_registered(efx)) {
783 		strlcpy(efx->name, pci_name(efx->pci_dev), sizeof(efx->name));
784 		efx_fini_mcdi_logging(efx);
785 		device_remove_file(&efx->pci_dev->dev, &dev_attr_phy_type);
786 		unregister_netdev(efx->net_dev);
787 	}
788 }
789 
790 /**************************************************************************
791  *
792  * List of NICs we support
793  *
794  **************************************************************************/
795 
796 /* PCI device ID table */
797 static const struct pci_device_id efx_pci_table[] = {
798 	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0903),  /* SFC9120 PF */
799 	 .driver_data = (unsigned long) &efx_hunt_a0_nic_type},
800 	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x1903),  /* SFC9120 VF */
801 	 .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type},
802 	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0923),  /* SFC9140 PF */
803 	 .driver_data = (unsigned long) &efx_hunt_a0_nic_type},
804 	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x1923),  /* SFC9140 VF */
805 	 .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type},
806 	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0a03),  /* SFC9220 PF */
807 	 .driver_data = (unsigned long) &efx_hunt_a0_nic_type},
808 	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x1a03),  /* SFC9220 VF */
809 	 .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type},
810 	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0b03),  /* SFC9250 PF */
811 	 .driver_data = (unsigned long) &efx_hunt_a0_nic_type},
812 	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x1b03),  /* SFC9250 VF */
813 	 .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type},
814 	{0}			/* end of list */
815 };
816 
817 /**************************************************************************
818  *
819  * Data housekeeping
820  *
821  **************************************************************************/
822 
823 void efx_update_sw_stats(struct efx_nic *efx, u64 *stats)
824 {
825 	u64 n_rx_nodesc_trunc = 0;
826 	struct efx_channel *channel;
827 
828 	efx_for_each_channel(channel, efx)
829 		n_rx_nodesc_trunc += channel->n_rx_nodesc_trunc;
830 	stats[GENERIC_STAT_rx_nodesc_trunc] = n_rx_nodesc_trunc;
831 	stats[GENERIC_STAT_rx_noskb_drops] = atomic_read(&efx->n_rx_noskb_drops);
832 }
833 
834 /**************************************************************************
835  *
836  * PCI interface
837  *
838  **************************************************************************/
839 
840 /* Main body of final NIC shutdown code
841  * This is called only at module unload (or hotplug removal).
842  */
843 static void efx_pci_remove_main(struct efx_nic *efx)
844 {
845 	/* Flush reset_work. It can no longer be scheduled since we
846 	 * are not READY.
847 	 */
848 	BUG_ON(efx->state == STATE_READY);
849 	efx_flush_reset_workqueue(efx);
850 
851 	efx_disable_interrupts(efx);
852 	efx_clear_interrupt_affinity(efx);
853 	efx_nic_fini_interrupt(efx);
854 	efx_fini_port(efx);
855 	efx->type->fini(efx);
856 	efx_fini_napi(efx);
857 	efx_remove_all(efx);
858 }
859 
860 /* Final NIC shutdown
861  * This is called only at module unload (or hotplug removal).  A PF can call
862  * this on its VFs to ensure they are unbound first.
863  */
864 static void efx_pci_remove(struct pci_dev *pci_dev)
865 {
866 	struct efx_nic *efx;
867 
868 	efx = pci_get_drvdata(pci_dev);
869 	if (!efx)
870 		return;
871 
872 	/* Mark the NIC as fini, then stop the interface */
873 	rtnl_lock();
874 	efx_dissociate(efx);
875 	dev_close(efx->net_dev);
876 	efx_disable_interrupts(efx);
877 	efx->state = STATE_UNINIT;
878 	rtnl_unlock();
879 
880 	if (efx->type->sriov_fini)
881 		efx->type->sriov_fini(efx);
882 
883 	efx_unregister_netdev(efx);
884 
885 	efx_mtd_remove(efx);
886 
887 	efx_pci_remove_main(efx);
888 
889 	efx_fini_io(efx);
890 	netif_dbg(efx, drv, efx->net_dev, "shutdown successful\n");
891 
892 	efx_fini_struct(efx);
893 	free_netdev(efx->net_dev);
894 
895 	pci_disable_pcie_error_reporting(pci_dev);
896 };
897 
898 /* NIC VPD information
899  * Called during probe to display the part number of the
900  * installed NIC.
901  */
902 static void efx_probe_vpd_strings(struct efx_nic *efx)
903 {
904 	struct pci_dev *dev = efx->pci_dev;
905 	unsigned int vpd_size, kw_len;
906 	u8 *vpd_data;
907 	int start;
908 
909 	vpd_data = pci_vpd_alloc(dev, &vpd_size);
910 	if (IS_ERR(vpd_data)) {
911 		pci_warn(dev, "Unable to read VPD\n");
912 		return;
913 	}
914 
915 	start = pci_vpd_find_ro_info_keyword(vpd_data, vpd_size,
916 					     PCI_VPD_RO_KEYWORD_PARTNO, &kw_len);
917 	if (start < 0)
918 		pci_err(dev, "Part number not found or incomplete\n");
919 	else
920 		pci_info(dev, "Part Number : %.*s\n", kw_len, vpd_data + start);
921 
922 	start = pci_vpd_find_ro_info_keyword(vpd_data, vpd_size,
923 					     PCI_VPD_RO_KEYWORD_SERIALNO, &kw_len);
924 	if (start < 0)
925 		pci_err(dev, "Serial number not found or incomplete\n");
926 	else
927 		efx->vpd_sn = kmemdup_nul(vpd_data + start, kw_len, GFP_KERNEL);
928 
929 	kfree(vpd_data);
930 }
931 
932 
933 /* Main body of NIC initialisation
934  * This is called at module load (or hotplug insertion, theoretically).
935  */
936 static int efx_pci_probe_main(struct efx_nic *efx)
937 {
938 	int rc;
939 
940 	/* Do start-of-day initialisation */
941 	rc = efx_probe_all(efx);
942 	if (rc)
943 		goto fail1;
944 
945 	efx_init_napi(efx);
946 
947 	down_write(&efx->filter_sem);
948 	rc = efx->type->init(efx);
949 	up_write(&efx->filter_sem);
950 	if (rc) {
951 		pci_err(efx->pci_dev, "failed to initialise NIC\n");
952 		goto fail3;
953 	}
954 
955 	rc = efx_init_port(efx);
956 	if (rc) {
957 		netif_err(efx, probe, efx->net_dev,
958 			  "failed to initialise port\n");
959 		goto fail4;
960 	}
961 
962 	rc = efx_nic_init_interrupt(efx);
963 	if (rc)
964 		goto fail5;
965 
966 	efx_set_interrupt_affinity(efx);
967 	rc = efx_enable_interrupts(efx);
968 	if (rc)
969 		goto fail6;
970 
971 	return 0;
972 
973  fail6:
974 	efx_clear_interrupt_affinity(efx);
975 	efx_nic_fini_interrupt(efx);
976  fail5:
977 	efx_fini_port(efx);
978  fail4:
979 	efx->type->fini(efx);
980  fail3:
981 	efx_fini_napi(efx);
982 	efx_remove_all(efx);
983  fail1:
984 	return rc;
985 }
986 
987 static int efx_pci_probe_post_io(struct efx_nic *efx)
988 {
989 	struct net_device *net_dev = efx->net_dev;
990 	int rc = efx_pci_probe_main(efx);
991 
992 	if (rc)
993 		return rc;
994 
995 	if (efx->type->sriov_init) {
996 		rc = efx->type->sriov_init(efx);
997 		if (rc)
998 			pci_err(efx->pci_dev, "SR-IOV can't be enabled rc %d\n",
999 				rc);
1000 	}
1001 
1002 	/* Determine netdevice features */
1003 	net_dev->features |= (efx->type->offload_features | NETIF_F_SG |
1004 			      NETIF_F_TSO | NETIF_F_RXCSUM | NETIF_F_RXALL);
1005 	if (efx->type->offload_features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1006 		net_dev->features |= NETIF_F_TSO6;
1007 	/* Check whether device supports TSO */
1008 	if (!efx->type->tso_versions || !efx->type->tso_versions(efx))
1009 		net_dev->features &= ~NETIF_F_ALL_TSO;
1010 	/* Mask for features that also apply to VLAN devices */
1011 	net_dev->vlan_features |= (NETIF_F_HW_CSUM | NETIF_F_SG |
1012 				   NETIF_F_HIGHDMA | NETIF_F_ALL_TSO |
1013 				   NETIF_F_RXCSUM);
1014 
1015 	net_dev->hw_features |= net_dev->features & ~efx->fixed_features;
1016 
1017 	/* Disable receiving frames with bad FCS, by default. */
1018 	net_dev->features &= ~NETIF_F_RXALL;
1019 
1020 	/* Disable VLAN filtering by default.  It may be enforced if
1021 	 * the feature is fixed (i.e. VLAN filters are required to
1022 	 * receive VLAN tagged packets due to vPort restrictions).
1023 	 */
1024 	net_dev->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
1025 	net_dev->features |= efx->fixed_features;
1026 
1027 	rc = efx_register_netdev(efx);
1028 	if (!rc)
1029 		return 0;
1030 
1031 	efx_pci_remove_main(efx);
1032 	return rc;
1033 }
1034 
1035 /* NIC initialisation
1036  *
1037  * This is called at module load (or hotplug insertion,
1038  * theoretically).  It sets up PCI mappings, resets the NIC,
1039  * sets up and registers the network devices with the kernel and hooks
1040  * the interrupt service routine.  It does not prepare the device for
1041  * transmission; this is left to the first time one of the network
1042  * interfaces is brought up (i.e. efx_net_open).
1043  */
1044 static int efx_pci_probe(struct pci_dev *pci_dev,
1045 			 const struct pci_device_id *entry)
1046 {
1047 	struct net_device *net_dev;
1048 	struct efx_nic *efx;
1049 	int rc;
1050 
1051 	/* Allocate and initialise a struct net_device and struct efx_nic */
1052 	net_dev = alloc_etherdev_mqs(sizeof(*efx), EFX_MAX_CORE_TX_QUEUES,
1053 				     EFX_MAX_RX_QUEUES);
1054 	if (!net_dev)
1055 		return -ENOMEM;
1056 	efx = netdev_priv(net_dev);
1057 	efx->type = (const struct efx_nic_type *) entry->driver_data;
1058 	efx->fixed_features |= NETIF_F_HIGHDMA;
1059 
1060 	pci_set_drvdata(pci_dev, efx);
1061 	SET_NETDEV_DEV(net_dev, &pci_dev->dev);
1062 	rc = efx_init_struct(efx, pci_dev, net_dev);
1063 	if (rc)
1064 		goto fail1;
1065 
1066 	pci_info(pci_dev, "Solarflare NIC detected\n");
1067 
1068 	if (!efx->type->is_vf)
1069 		efx_probe_vpd_strings(efx);
1070 
1071 	/* Set up basic I/O (BAR mappings etc) */
1072 	rc = efx_init_io(efx, efx->type->mem_bar(efx), efx->type->max_dma_mask,
1073 			 efx->type->mem_map_size(efx));
1074 	if (rc)
1075 		goto fail2;
1076 
1077 	rc = efx_pci_probe_post_io(efx);
1078 	if (rc) {
1079 		/* On failure, retry once immediately.
1080 		 * If we aborted probe due to a scheduled reset, dismiss it.
1081 		 */
1082 		efx->reset_pending = 0;
1083 		rc = efx_pci_probe_post_io(efx);
1084 		if (rc) {
1085 			/* On another failure, retry once more
1086 			 * after a 50-305ms delay.
1087 			 */
1088 			unsigned char r;
1089 
1090 			get_random_bytes(&r, 1);
1091 			msleep((unsigned int)r + 50);
1092 			efx->reset_pending = 0;
1093 			rc = efx_pci_probe_post_io(efx);
1094 		}
1095 	}
1096 	if (rc)
1097 		goto fail3;
1098 
1099 	netif_dbg(efx, probe, efx->net_dev, "initialisation successful\n");
1100 
1101 	/* Try to create MTDs, but allow this to fail */
1102 	rtnl_lock();
1103 	rc = efx_mtd_probe(efx);
1104 	rtnl_unlock();
1105 	if (rc && rc != -EPERM)
1106 		netif_warn(efx, probe, efx->net_dev,
1107 			   "failed to create MTDs (%d)\n", rc);
1108 
1109 	(void)pci_enable_pcie_error_reporting(pci_dev);
1110 
1111 	if (efx->type->udp_tnl_push_ports)
1112 		efx->type->udp_tnl_push_ports(efx);
1113 
1114 	return 0;
1115 
1116  fail3:
1117 	efx_fini_io(efx);
1118  fail2:
1119 	efx_fini_struct(efx);
1120  fail1:
1121 	WARN_ON(rc > 0);
1122 	netif_dbg(efx, drv, efx->net_dev, "initialisation failed. rc=%d\n", rc);
1123 	free_netdev(net_dev);
1124 	return rc;
1125 }
1126 
1127 /* efx_pci_sriov_configure returns the actual number of Virtual Functions
1128  * enabled on success
1129  */
1130 #ifdef CONFIG_SFC_SRIOV
1131 static int efx_pci_sriov_configure(struct pci_dev *dev, int num_vfs)
1132 {
1133 	int rc;
1134 	struct efx_nic *efx = pci_get_drvdata(dev);
1135 
1136 	if (efx->type->sriov_configure) {
1137 		rc = efx->type->sriov_configure(efx, num_vfs);
1138 		if (rc)
1139 			return rc;
1140 		else
1141 			return num_vfs;
1142 	} else
1143 		return -EOPNOTSUPP;
1144 }
1145 #endif
1146 
1147 static int efx_pm_freeze(struct device *dev)
1148 {
1149 	struct efx_nic *efx = dev_get_drvdata(dev);
1150 
1151 	rtnl_lock();
1152 
1153 	if (efx->state != STATE_DISABLED) {
1154 		efx->state = STATE_UNINIT;
1155 
1156 		efx_device_detach_sync(efx);
1157 
1158 		efx_stop_all(efx);
1159 		efx_disable_interrupts(efx);
1160 	}
1161 
1162 	rtnl_unlock();
1163 
1164 	return 0;
1165 }
1166 
1167 static int efx_pm_thaw(struct device *dev)
1168 {
1169 	int rc;
1170 	struct efx_nic *efx = dev_get_drvdata(dev);
1171 
1172 	rtnl_lock();
1173 
1174 	if (efx->state != STATE_DISABLED) {
1175 		rc = efx_enable_interrupts(efx);
1176 		if (rc)
1177 			goto fail;
1178 
1179 		mutex_lock(&efx->mac_lock);
1180 		efx_mcdi_port_reconfigure(efx);
1181 		mutex_unlock(&efx->mac_lock);
1182 
1183 		efx_start_all(efx);
1184 
1185 		efx_device_attach_if_not_resetting(efx);
1186 
1187 		efx->state = STATE_READY;
1188 
1189 		efx->type->resume_wol(efx);
1190 	}
1191 
1192 	rtnl_unlock();
1193 
1194 	/* Reschedule any quenched resets scheduled during efx_pm_freeze() */
1195 	efx_queue_reset_work(efx);
1196 
1197 	return 0;
1198 
1199 fail:
1200 	rtnl_unlock();
1201 
1202 	return rc;
1203 }
1204 
1205 static int efx_pm_poweroff(struct device *dev)
1206 {
1207 	struct pci_dev *pci_dev = to_pci_dev(dev);
1208 	struct efx_nic *efx = pci_get_drvdata(pci_dev);
1209 
1210 	efx->type->fini(efx);
1211 
1212 	efx->reset_pending = 0;
1213 
1214 	pci_save_state(pci_dev);
1215 	return pci_set_power_state(pci_dev, PCI_D3hot);
1216 }
1217 
1218 /* Used for both resume and restore */
1219 static int efx_pm_resume(struct device *dev)
1220 {
1221 	struct pci_dev *pci_dev = to_pci_dev(dev);
1222 	struct efx_nic *efx = pci_get_drvdata(pci_dev);
1223 	int rc;
1224 
1225 	rc = pci_set_power_state(pci_dev, PCI_D0);
1226 	if (rc)
1227 		return rc;
1228 	pci_restore_state(pci_dev);
1229 	rc = pci_enable_device(pci_dev);
1230 	if (rc)
1231 		return rc;
1232 	pci_set_master(efx->pci_dev);
1233 	rc = efx->type->reset(efx, RESET_TYPE_ALL);
1234 	if (rc)
1235 		return rc;
1236 	down_write(&efx->filter_sem);
1237 	rc = efx->type->init(efx);
1238 	up_write(&efx->filter_sem);
1239 	if (rc)
1240 		return rc;
1241 	rc = efx_pm_thaw(dev);
1242 	return rc;
1243 }
1244 
1245 static int efx_pm_suspend(struct device *dev)
1246 {
1247 	int rc;
1248 
1249 	efx_pm_freeze(dev);
1250 	rc = efx_pm_poweroff(dev);
1251 	if (rc)
1252 		efx_pm_resume(dev);
1253 	return rc;
1254 }
1255 
1256 static const struct dev_pm_ops efx_pm_ops = {
1257 	.suspend	= efx_pm_suspend,
1258 	.resume		= efx_pm_resume,
1259 	.freeze		= efx_pm_freeze,
1260 	.thaw		= efx_pm_thaw,
1261 	.poweroff	= efx_pm_poweroff,
1262 	.restore	= efx_pm_resume,
1263 };
1264 
1265 static struct pci_driver efx_pci_driver = {
1266 	.name		= KBUILD_MODNAME,
1267 	.id_table	= efx_pci_table,
1268 	.probe		= efx_pci_probe,
1269 	.remove		= efx_pci_remove,
1270 	.driver.pm	= &efx_pm_ops,
1271 	.err_handler	= &efx_err_handlers,
1272 #ifdef CONFIG_SFC_SRIOV
1273 	.sriov_configure = efx_pci_sriov_configure,
1274 #endif
1275 };
1276 
1277 /**************************************************************************
1278  *
1279  * Kernel module interface
1280  *
1281  *************************************************************************/
1282 
1283 static int __init efx_init_module(void)
1284 {
1285 	int rc;
1286 
1287 	printk(KERN_INFO "Solarflare NET driver\n");
1288 
1289 	rc = register_netdevice_notifier(&efx_netdev_notifier);
1290 	if (rc)
1291 		goto err_notifier;
1292 
1293 	rc = efx_create_reset_workqueue();
1294 	if (rc)
1295 		goto err_reset;
1296 
1297 	rc = pci_register_driver(&efx_pci_driver);
1298 	if (rc < 0)
1299 		goto err_pci;
1300 
1301 	rc = pci_register_driver(&ef100_pci_driver);
1302 	if (rc < 0)
1303 		goto err_pci_ef100;
1304 
1305 	return 0;
1306 
1307  err_pci_ef100:
1308 	pci_unregister_driver(&efx_pci_driver);
1309  err_pci:
1310 	efx_destroy_reset_workqueue();
1311  err_reset:
1312 	unregister_netdevice_notifier(&efx_netdev_notifier);
1313  err_notifier:
1314 	return rc;
1315 }
1316 
1317 static void __exit efx_exit_module(void)
1318 {
1319 	printk(KERN_INFO "Solarflare NET driver unloading\n");
1320 
1321 	pci_unregister_driver(&ef100_pci_driver);
1322 	pci_unregister_driver(&efx_pci_driver);
1323 	efx_destroy_reset_workqueue();
1324 	unregister_netdevice_notifier(&efx_netdev_notifier);
1325 
1326 }
1327 
1328 module_init(efx_init_module);
1329 module_exit(efx_exit_module);
1330 
1331 MODULE_AUTHOR("Solarflare Communications and "
1332 	      "Michael Brown <mbrown@fensystems.co.uk>");
1333 MODULE_DESCRIPTION("Solarflare network driver");
1334 MODULE_LICENSE("GPL");
1335 MODULE_DEVICE_TABLE(pci, efx_pci_table);
1336