xref: /openbmc/linux/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c (revision c51d39010a1bccc9c1294e2d7c00005aefeb2b5c)
1 /*
2  * This file is part of the Chelsio T4 PCI-E SR-IOV Virtual Function Ethernet
3  * driver for Linux.
4  *
5  * Copyright (c) 2009-2010 Chelsio Communications, Inc. All rights reserved.
6  *
7  * This software is available to you under a choice of one of two
8  * licenses.  You may choose to be licensed under the terms of the GNU
9  * General Public License (GPL) Version 2, available from the file
10  * COPYING in the main directory of this source tree, or the
11  * OpenIB.org BSD license below:
12  *
13  *     Redistribution and use in source and binary forms, with or
14  *     without modification, are permitted provided that the following
15  *     conditions are met:
16  *
17  *      - Redistributions of source code must retain the above
18  *        copyright notice, this list of conditions and the following
19  *        disclaimer.
20  *
21  *      - Redistributions in binary form must reproduce the above
22  *        copyright notice, this list of conditions and the following
23  *        disclaimer in the documentation and/or other materials
24  *        provided with the distribution.
25  *
26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33  * SOFTWARE.
34  */
35 
36 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
37 
38 #include <linux/module.h>
39 #include <linux/moduleparam.h>
40 #include <linux/init.h>
41 #include <linux/pci.h>
42 #include <linux/dma-mapping.h>
43 #include <linux/netdevice.h>
44 #include <linux/etherdevice.h>
45 #include <linux/debugfs.h>
46 #include <linux/ethtool.h>
47 #include <linux/mdio.h>
48 
49 #include "t4vf_common.h"
50 #include "t4vf_defs.h"
51 
52 #include "../cxgb4/t4_regs.h"
53 #include "../cxgb4/t4_msg.h"
54 
55 /*
56  * Generic information about the driver.
57  */
58 #define DRV_VERSION "2.0.0-ko"
59 #define DRV_DESC "Chelsio T4/T5/T6 Virtual Function (VF) Network Driver"
60 
61 /*
62  * Module Parameters.
63  * ==================
64  */
65 
66 /*
67  * Default ethtool "message level" for adapters.
68  */
69 #define DFLT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \
70 			 NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP |\
71 			 NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR)
72 
73 static int dflt_msg_enable = DFLT_MSG_ENABLE;
74 
75 module_param(dflt_msg_enable, int, 0644);
76 MODULE_PARM_DESC(dflt_msg_enable,
77 		 "default adapter ethtool message level bitmap, "
78 		 "deprecated parameter");
79 
80 /*
81  * The driver uses the best interrupt scheme available on a platform in the
82  * order MSI-X then MSI.  This parameter determines which of these schemes the
83  * driver may consider as follows:
84  *
85  *     msi = 2: choose from among MSI-X and MSI
86  *     msi = 1: only consider MSI interrupts
87  *
88  * Note that unlike the Physical Function driver, this Virtual Function driver
89  * does _not_ support legacy INTx interrupts (this limitation is mandated by
90  * the PCI-E SR-IOV standard).
91  */
92 #define MSI_MSIX	2
93 #define MSI_MSI		1
94 #define MSI_DEFAULT	MSI_MSIX
95 
96 static int msi = MSI_DEFAULT;
97 
98 module_param(msi, int, 0644);
99 MODULE_PARM_DESC(msi, "whether to use MSI-X or MSI");
100 
101 /*
102  * Fundamental constants.
103  * ======================
104  */
105 
106 enum {
107 	MAX_TXQ_ENTRIES		= 16384,
108 	MAX_RSPQ_ENTRIES	= 16384,
109 	MAX_RX_BUFFERS		= 16384,
110 
111 	MIN_TXQ_ENTRIES		= 32,
112 	MIN_RSPQ_ENTRIES	= 128,
113 	MIN_FL_ENTRIES		= 16,
114 
115 	/*
116 	 * For purposes of manipulating the Free List size we need to
117 	 * recognize that Free Lists are actually Egress Queues (the host
118 	 * produces free buffers which the hardware consumes), Egress Queues
119 	 * indices are all in units of Egress Context Units bytes, and free
120 	 * list entries are 64-bit PCI DMA addresses.  And since the state of
121 	 * the Producer Index == the Consumer Index implies an EMPTY list, we
122 	 * always have at least one Egress Unit's worth of Free List entries
123 	 * unused.  See sge.c for more details ...
124 	 */
125 	EQ_UNIT = SGE_EQ_IDXSIZE,
126 	FL_PER_EQ_UNIT = EQ_UNIT / sizeof(__be64),
127 	MIN_FL_RESID = FL_PER_EQ_UNIT,
128 };
129 
130 /*
131  * Global driver state.
132  * ====================
133  */
134 
135 static struct dentry *cxgb4vf_debugfs_root;
136 
137 /*
138  * OS "Callback" functions.
139  * ========================
140  */
141 
142 /*
143  * The link status has changed on the indicated "port" (Virtual Interface).
144  */
145 void t4vf_os_link_changed(struct adapter *adapter, int pidx, int link_ok)
146 {
147 	struct net_device *dev = adapter->port[pidx];
148 
149 	/*
150 	 * If the port is disabled or the current recorded "link up"
151 	 * status matches the new status, just return.
152 	 */
153 	if (!netif_running(dev) || link_ok == netif_carrier_ok(dev))
154 		return;
155 
156 	/*
157 	 * Tell the OS that the link status has changed and print a short
158 	 * informative message on the console about the event.
159 	 */
160 	if (link_ok) {
161 		const char *s;
162 		const char *fc;
163 		const struct port_info *pi = netdev_priv(dev);
164 
165 		netif_carrier_on(dev);
166 
167 		switch (pi->link_cfg.speed) {
168 		case 40000:
169 			s = "40Gbps";
170 			break;
171 
172 		case 10000:
173 			s = "10Gbps";
174 			break;
175 
176 		case 1000:
177 			s = "1000Mbps";
178 			break;
179 
180 		case 100:
181 			s = "100Mbps";
182 			break;
183 
184 		default:
185 			s = "unknown";
186 			break;
187 		}
188 
189 		switch (pi->link_cfg.fc) {
190 		case PAUSE_RX:
191 			fc = "RX";
192 			break;
193 
194 		case PAUSE_TX:
195 			fc = "TX";
196 			break;
197 
198 		case PAUSE_RX|PAUSE_TX:
199 			fc = "RX/TX";
200 			break;
201 
202 		default:
203 			fc = "no";
204 			break;
205 		}
206 
207 		netdev_info(dev, "link up, %s, full-duplex, %s PAUSE\n", s, fc);
208 	} else {
209 		netif_carrier_off(dev);
210 		netdev_info(dev, "link down\n");
211 	}
212 }
213 
214 /*
215  * THe port module type has changed on the indicated "port" (Virtual
216  * Interface).
217  */
218 void t4vf_os_portmod_changed(struct adapter *adapter, int pidx)
219 {
220 	static const char * const mod_str[] = {
221 		NULL, "LR", "SR", "ER", "passive DA", "active DA", "LRM"
222 	};
223 	const struct net_device *dev = adapter->port[pidx];
224 	const struct port_info *pi = netdev_priv(dev);
225 
226 	if (pi->mod_type == FW_PORT_MOD_TYPE_NONE)
227 		dev_info(adapter->pdev_dev, "%s: port module unplugged\n",
228 			 dev->name);
229 	else if (pi->mod_type < ARRAY_SIZE(mod_str))
230 		dev_info(adapter->pdev_dev, "%s: %s port module inserted\n",
231 			 dev->name, mod_str[pi->mod_type]);
232 	else if (pi->mod_type == FW_PORT_MOD_TYPE_NOTSUPPORTED)
233 		dev_info(adapter->pdev_dev, "%s: unsupported optical port "
234 			 "module inserted\n", dev->name);
235 	else if (pi->mod_type == FW_PORT_MOD_TYPE_UNKNOWN)
236 		dev_info(adapter->pdev_dev, "%s: unknown port module inserted,"
237 			 "forcing TWINAX\n", dev->name);
238 	else if (pi->mod_type == FW_PORT_MOD_TYPE_ERROR)
239 		dev_info(adapter->pdev_dev, "%s: transceiver module error\n",
240 			 dev->name);
241 	else
242 		dev_info(adapter->pdev_dev, "%s: unknown module type %d "
243 			 "inserted\n", dev->name, pi->mod_type);
244 }
245 
246 /*
247  * Net device operations.
248  * ======================
249  */
250 
251 
252 
253 
254 /*
255  * Perform the MAC and PHY actions needed to enable a "port" (Virtual
256  * Interface).
257  */
258 static int link_start(struct net_device *dev)
259 {
260 	int ret;
261 	struct port_info *pi = netdev_priv(dev);
262 
263 	/*
264 	 * We do not set address filters and promiscuity here, the stack does
265 	 * that step explicitly. Enable vlan accel.
266 	 */
267 	ret = t4vf_set_rxmode(pi->adapter, pi->viid, dev->mtu, -1, -1, -1, 1,
268 			      true);
269 	if (ret == 0) {
270 		ret = t4vf_change_mac(pi->adapter, pi->viid,
271 				      pi->xact_addr_filt, dev->dev_addr, true);
272 		if (ret >= 0) {
273 			pi->xact_addr_filt = ret;
274 			ret = 0;
275 		}
276 	}
277 
278 	/*
279 	 * We don't need to actually "start the link" itself since the
280 	 * firmware will do that for us when the first Virtual Interface
281 	 * is enabled on a port.
282 	 */
283 	if (ret == 0)
284 		ret = t4vf_enable_vi(pi->adapter, pi->viid, true, true);
285 	return ret;
286 }
287 
288 /*
289  * Name the MSI-X interrupts.
290  */
291 static void name_msix_vecs(struct adapter *adapter)
292 {
293 	int namelen = sizeof(adapter->msix_info[0].desc) - 1;
294 	int pidx;
295 
296 	/*
297 	 * Firmware events.
298 	 */
299 	snprintf(adapter->msix_info[MSIX_FW].desc, namelen,
300 		 "%s-FWeventq", adapter->name);
301 	adapter->msix_info[MSIX_FW].desc[namelen] = 0;
302 
303 	/*
304 	 * Ethernet queues.
305 	 */
306 	for_each_port(adapter, pidx) {
307 		struct net_device *dev = adapter->port[pidx];
308 		const struct port_info *pi = netdev_priv(dev);
309 		int qs, msi;
310 
311 		for (qs = 0, msi = MSIX_IQFLINT; qs < pi->nqsets; qs++, msi++) {
312 			snprintf(adapter->msix_info[msi].desc, namelen,
313 				 "%s-%d", dev->name, qs);
314 			adapter->msix_info[msi].desc[namelen] = 0;
315 		}
316 	}
317 }
318 
319 /*
320  * Request all of our MSI-X resources.
321  */
322 static int request_msix_queue_irqs(struct adapter *adapter)
323 {
324 	struct sge *s = &adapter->sge;
325 	int rxq, msi, err;
326 
327 	/*
328 	 * Firmware events.
329 	 */
330 	err = request_irq(adapter->msix_info[MSIX_FW].vec, t4vf_sge_intr_msix,
331 			  0, adapter->msix_info[MSIX_FW].desc, &s->fw_evtq);
332 	if (err)
333 		return err;
334 
335 	/*
336 	 * Ethernet queues.
337 	 */
338 	msi = MSIX_IQFLINT;
339 	for_each_ethrxq(s, rxq) {
340 		err = request_irq(adapter->msix_info[msi].vec,
341 				  t4vf_sge_intr_msix, 0,
342 				  adapter->msix_info[msi].desc,
343 				  &s->ethrxq[rxq].rspq);
344 		if (err)
345 			goto err_free_irqs;
346 		msi++;
347 	}
348 	return 0;
349 
350 err_free_irqs:
351 	while (--rxq >= 0)
352 		free_irq(adapter->msix_info[--msi].vec, &s->ethrxq[rxq].rspq);
353 	free_irq(adapter->msix_info[MSIX_FW].vec, &s->fw_evtq);
354 	return err;
355 }
356 
357 /*
358  * Free our MSI-X resources.
359  */
360 static void free_msix_queue_irqs(struct adapter *adapter)
361 {
362 	struct sge *s = &adapter->sge;
363 	int rxq, msi;
364 
365 	free_irq(adapter->msix_info[MSIX_FW].vec, &s->fw_evtq);
366 	msi = MSIX_IQFLINT;
367 	for_each_ethrxq(s, rxq)
368 		free_irq(adapter->msix_info[msi++].vec,
369 			 &s->ethrxq[rxq].rspq);
370 }
371 
372 /*
373  * Turn on NAPI and start up interrupts on a response queue.
374  */
375 static void qenable(struct sge_rspq *rspq)
376 {
377 	napi_enable(&rspq->napi);
378 
379 	/*
380 	 * 0-increment the Going To Sleep register to start the timer and
381 	 * enable interrupts.
382 	 */
383 	t4_write_reg(rspq->adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS,
384 		     CIDXINC_V(0) |
385 		     SEINTARM_V(rspq->intr_params) |
386 		     INGRESSQID_V(rspq->cntxt_id));
387 }
388 
389 /*
390  * Enable NAPI scheduling and interrupt generation for all Receive Queues.
391  */
392 static void enable_rx(struct adapter *adapter)
393 {
394 	int rxq;
395 	struct sge *s = &adapter->sge;
396 
397 	for_each_ethrxq(s, rxq)
398 		qenable(&s->ethrxq[rxq].rspq);
399 	qenable(&s->fw_evtq);
400 
401 	/*
402 	 * The interrupt queue doesn't use NAPI so we do the 0-increment of
403 	 * its Going To Sleep register here to get it started.
404 	 */
405 	if (adapter->flags & USING_MSI)
406 		t4_write_reg(adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS,
407 			     CIDXINC_V(0) |
408 			     SEINTARM_V(s->intrq.intr_params) |
409 			     INGRESSQID_V(s->intrq.cntxt_id));
410 
411 }
412 
413 /*
414  * Wait until all NAPI handlers are descheduled.
415  */
416 static void quiesce_rx(struct adapter *adapter)
417 {
418 	struct sge *s = &adapter->sge;
419 	int rxq;
420 
421 	for_each_ethrxq(s, rxq)
422 		napi_disable(&s->ethrxq[rxq].rspq.napi);
423 	napi_disable(&s->fw_evtq.napi);
424 }
425 
426 /*
427  * Response queue handler for the firmware event queue.
428  */
429 static int fwevtq_handler(struct sge_rspq *rspq, const __be64 *rsp,
430 			  const struct pkt_gl *gl)
431 {
432 	/*
433 	 * Extract response opcode and get pointer to CPL message body.
434 	 */
435 	struct adapter *adapter = rspq->adapter;
436 	u8 opcode = ((const struct rss_header *)rsp)->opcode;
437 	void *cpl = (void *)(rsp + 1);
438 
439 	switch (opcode) {
440 	case CPL_FW6_MSG: {
441 		/*
442 		 * We've received an asynchronous message from the firmware.
443 		 */
444 		const struct cpl_fw6_msg *fw_msg = cpl;
445 		if (fw_msg->type == FW6_TYPE_CMD_RPL)
446 			t4vf_handle_fw_rpl(adapter, fw_msg->data);
447 		break;
448 	}
449 
450 	case CPL_FW4_MSG: {
451 		/* FW can send EGR_UPDATEs encapsulated in a CPL_FW4_MSG.
452 		 */
453 		const struct cpl_sge_egr_update *p = (void *)(rsp + 3);
454 		opcode = CPL_OPCODE_G(ntohl(p->opcode_qid));
455 		if (opcode != CPL_SGE_EGR_UPDATE) {
456 			dev_err(adapter->pdev_dev, "unexpected FW4/CPL %#x on FW event queue\n"
457 				, opcode);
458 			break;
459 		}
460 		cpl = (void *)p;
461 		/*FALLTHROUGH*/
462 	}
463 
464 	case CPL_SGE_EGR_UPDATE: {
465 		/*
466 		 * We've received an Egress Queue Status Update message.  We
467 		 * get these, if the SGE is configured to send these when the
468 		 * firmware passes certain points in processing our TX
469 		 * Ethernet Queue or if we make an explicit request for one.
470 		 * We use these updates to determine when we may need to
471 		 * restart a TX Ethernet Queue which was stopped for lack of
472 		 * free TX Queue Descriptors ...
473 		 */
474 		const struct cpl_sge_egr_update *p = cpl;
475 		unsigned int qid = EGR_QID_G(be32_to_cpu(p->opcode_qid));
476 		struct sge *s = &adapter->sge;
477 		struct sge_txq *tq;
478 		struct sge_eth_txq *txq;
479 		unsigned int eq_idx;
480 
481 		/*
482 		 * Perform sanity checking on the Queue ID to make sure it
483 		 * really refers to one of our TX Ethernet Egress Queues which
484 		 * is active and matches the queue's ID.  None of these error
485 		 * conditions should ever happen so we may want to either make
486 		 * them fatal and/or conditionalized under DEBUG.
487 		 */
488 		eq_idx = EQ_IDX(s, qid);
489 		if (unlikely(eq_idx >= MAX_EGRQ)) {
490 			dev_err(adapter->pdev_dev,
491 				"Egress Update QID %d out of range\n", qid);
492 			break;
493 		}
494 		tq = s->egr_map[eq_idx];
495 		if (unlikely(tq == NULL)) {
496 			dev_err(adapter->pdev_dev,
497 				"Egress Update QID %d TXQ=NULL\n", qid);
498 			break;
499 		}
500 		txq = container_of(tq, struct sge_eth_txq, q);
501 		if (unlikely(tq->abs_id != qid)) {
502 			dev_err(adapter->pdev_dev,
503 				"Egress Update QID %d refers to TXQ %d\n",
504 				qid, tq->abs_id);
505 			break;
506 		}
507 
508 		/*
509 		 * Restart a stopped TX Queue which has less than half of its
510 		 * TX ring in use ...
511 		 */
512 		txq->q.restarts++;
513 		netif_tx_wake_queue(txq->txq);
514 		break;
515 	}
516 
517 	default:
518 		dev_err(adapter->pdev_dev,
519 			"unexpected CPL %#x on FW event queue\n", opcode);
520 	}
521 
522 	return 0;
523 }
524 
525 /*
526  * Allocate SGE TX/RX response queues.  Determine how many sets of SGE queues
527  * to use and initializes them.  We support multiple "Queue Sets" per port if
528  * we have MSI-X, otherwise just one queue set per port.
529  */
530 static int setup_sge_queues(struct adapter *adapter)
531 {
532 	struct sge *s = &adapter->sge;
533 	int err, pidx, msix;
534 
535 	/*
536 	 * Clear "Queue Set" Free List Starving and TX Queue Mapping Error
537 	 * state.
538 	 */
539 	bitmap_zero(s->starving_fl, MAX_EGRQ);
540 
541 	/*
542 	 * If we're using MSI interrupt mode we need to set up a "forwarded
543 	 * interrupt" queue which we'll set up with our MSI vector.  The rest
544 	 * of the ingress queues will be set up to forward their interrupts to
545 	 * this queue ...  This must be first since t4vf_sge_alloc_rxq() uses
546 	 * the intrq's queue ID as the interrupt forwarding queue for the
547 	 * subsequent calls ...
548 	 */
549 	if (adapter->flags & USING_MSI) {
550 		err = t4vf_sge_alloc_rxq(adapter, &s->intrq, false,
551 					 adapter->port[0], 0, NULL, NULL);
552 		if (err)
553 			goto err_free_queues;
554 	}
555 
556 	/*
557 	 * Allocate our ingress queue for asynchronous firmware messages.
558 	 */
559 	err = t4vf_sge_alloc_rxq(adapter, &s->fw_evtq, true, adapter->port[0],
560 				 MSIX_FW, NULL, fwevtq_handler);
561 	if (err)
562 		goto err_free_queues;
563 
564 	/*
565 	 * Allocate each "port"'s initial Queue Sets.  These can be changed
566 	 * later on ... up to the point where any interface on the adapter is
567 	 * brought up at which point lots of things get nailed down
568 	 * permanently ...
569 	 */
570 	msix = MSIX_IQFLINT;
571 	for_each_port(adapter, pidx) {
572 		struct net_device *dev = adapter->port[pidx];
573 		struct port_info *pi = netdev_priv(dev);
574 		struct sge_eth_rxq *rxq = &s->ethrxq[pi->first_qset];
575 		struct sge_eth_txq *txq = &s->ethtxq[pi->first_qset];
576 		int qs;
577 
578 		for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
579 			err = t4vf_sge_alloc_rxq(adapter, &rxq->rspq, false,
580 						 dev, msix++,
581 						 &rxq->fl, t4vf_ethrx_handler);
582 			if (err)
583 				goto err_free_queues;
584 
585 			err = t4vf_sge_alloc_eth_txq(adapter, txq, dev,
586 					     netdev_get_tx_queue(dev, qs),
587 					     s->fw_evtq.cntxt_id);
588 			if (err)
589 				goto err_free_queues;
590 
591 			rxq->rspq.idx = qs;
592 			memset(&rxq->stats, 0, sizeof(rxq->stats));
593 		}
594 	}
595 
596 	/*
597 	 * Create the reverse mappings for the queues.
598 	 */
599 	s->egr_base = s->ethtxq[0].q.abs_id - s->ethtxq[0].q.cntxt_id;
600 	s->ingr_base = s->ethrxq[0].rspq.abs_id - s->ethrxq[0].rspq.cntxt_id;
601 	IQ_MAP(s, s->fw_evtq.abs_id) = &s->fw_evtq;
602 	for_each_port(adapter, pidx) {
603 		struct net_device *dev = adapter->port[pidx];
604 		struct port_info *pi = netdev_priv(dev);
605 		struct sge_eth_rxq *rxq = &s->ethrxq[pi->first_qset];
606 		struct sge_eth_txq *txq = &s->ethtxq[pi->first_qset];
607 		int qs;
608 
609 		for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
610 			IQ_MAP(s, rxq->rspq.abs_id) = &rxq->rspq;
611 			EQ_MAP(s, txq->q.abs_id) = &txq->q;
612 
613 			/*
614 			 * The FW_IQ_CMD doesn't return the Absolute Queue IDs
615 			 * for Free Lists but since all of the Egress Queues
616 			 * (including Free Lists) have Relative Queue IDs
617 			 * which are computed as Absolute - Base Queue ID, we
618 			 * can synthesize the Absolute Queue IDs for the Free
619 			 * Lists.  This is useful for debugging purposes when
620 			 * we want to dump Queue Contexts via the PF Driver.
621 			 */
622 			rxq->fl.abs_id = rxq->fl.cntxt_id + s->egr_base;
623 			EQ_MAP(s, rxq->fl.abs_id) = &rxq->fl;
624 		}
625 	}
626 	return 0;
627 
628 err_free_queues:
629 	t4vf_free_sge_resources(adapter);
630 	return err;
631 }
632 
633 /*
634  * Set up Receive Side Scaling (RSS) to distribute packets to multiple receive
635  * queues.  We configure the RSS CPU lookup table to distribute to the number
636  * of HW receive queues, and the response queue lookup table to narrow that
637  * down to the response queues actually configured for each "port" (Virtual
638  * Interface).  We always configure the RSS mapping for all ports since the
639  * mapping table has plenty of entries.
640  */
641 static int setup_rss(struct adapter *adapter)
642 {
643 	int pidx;
644 
645 	for_each_port(adapter, pidx) {
646 		struct port_info *pi = adap2pinfo(adapter, pidx);
647 		struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[pi->first_qset];
648 		u16 rss[MAX_PORT_QSETS];
649 		int qs, err;
650 
651 		for (qs = 0; qs < pi->nqsets; qs++)
652 			rss[qs] = rxq[qs].rspq.abs_id;
653 
654 		err = t4vf_config_rss_range(adapter, pi->viid,
655 					    0, pi->rss_size, rss, pi->nqsets);
656 		if (err)
657 			return err;
658 
659 		/*
660 		 * Perform Global RSS Mode-specific initialization.
661 		 */
662 		switch (adapter->params.rss.mode) {
663 		case FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL:
664 			/*
665 			 * If Tunnel All Lookup isn't specified in the global
666 			 * RSS Configuration, then we need to specify a
667 			 * default Ingress Queue for any ingress packets which
668 			 * aren't hashed.  We'll use our first ingress queue
669 			 * ...
670 			 */
671 			if (!adapter->params.rss.u.basicvirtual.tnlalllookup) {
672 				union rss_vi_config config;
673 				err = t4vf_read_rss_vi_config(adapter,
674 							      pi->viid,
675 							      &config);
676 				if (err)
677 					return err;
678 				config.basicvirtual.defaultq =
679 					rxq[0].rspq.abs_id;
680 				err = t4vf_write_rss_vi_config(adapter,
681 							       pi->viid,
682 							       &config);
683 				if (err)
684 					return err;
685 			}
686 			break;
687 		}
688 	}
689 
690 	return 0;
691 }
692 
693 /*
694  * Bring the adapter up.  Called whenever we go from no "ports" open to having
695  * one open.  This function performs the actions necessary to make an adapter
696  * operational, such as completing the initialization of HW modules, and
697  * enabling interrupts.  Must be called with the rtnl lock held.  (Note that
698  * this is called "cxgb_up" in the PF Driver.)
699  */
700 static int adapter_up(struct adapter *adapter)
701 {
702 	int err;
703 
704 	/*
705 	 * If this is the first time we've been called, perform basic
706 	 * adapter setup.  Once we've done this, many of our adapter
707 	 * parameters can no longer be changed ...
708 	 */
709 	if ((adapter->flags & FULL_INIT_DONE) == 0) {
710 		err = setup_sge_queues(adapter);
711 		if (err)
712 			return err;
713 		err = setup_rss(adapter);
714 		if (err) {
715 			t4vf_free_sge_resources(adapter);
716 			return err;
717 		}
718 
719 		if (adapter->flags & USING_MSIX)
720 			name_msix_vecs(adapter);
721 		adapter->flags |= FULL_INIT_DONE;
722 	}
723 
724 	/*
725 	 * Acquire our interrupt resources.  We only support MSI-X and MSI.
726 	 */
727 	BUG_ON((adapter->flags & (USING_MSIX|USING_MSI)) == 0);
728 	if (adapter->flags & USING_MSIX)
729 		err = request_msix_queue_irqs(adapter);
730 	else
731 		err = request_irq(adapter->pdev->irq,
732 				  t4vf_intr_handler(adapter), 0,
733 				  adapter->name, adapter);
734 	if (err) {
735 		dev_err(adapter->pdev_dev, "request_irq failed, err %d\n",
736 			err);
737 		return err;
738 	}
739 
740 	/*
741 	 * Enable NAPI ingress processing and return success.
742 	 */
743 	enable_rx(adapter);
744 	t4vf_sge_start(adapter);
745 
746 	/* Initialize hash mac addr list*/
747 	INIT_LIST_HEAD(&adapter->mac_hlist);
748 	return 0;
749 }
750 
751 /*
752  * Bring the adapter down.  Called whenever the last "port" (Virtual
753  * Interface) closed.  (Note that this routine is called "cxgb_down" in the PF
754  * Driver.)
755  */
756 static void adapter_down(struct adapter *adapter)
757 {
758 	/*
759 	 * Free interrupt resources.
760 	 */
761 	if (adapter->flags & USING_MSIX)
762 		free_msix_queue_irqs(adapter);
763 	else
764 		free_irq(adapter->pdev->irq, adapter);
765 
766 	/*
767 	 * Wait for NAPI handlers to finish.
768 	 */
769 	quiesce_rx(adapter);
770 }
771 
772 /*
773  * Start up a net device.
774  */
775 static int cxgb4vf_open(struct net_device *dev)
776 {
777 	int err;
778 	struct port_info *pi = netdev_priv(dev);
779 	struct adapter *adapter = pi->adapter;
780 
781 	/*
782 	 * If this is the first interface that we're opening on the "adapter",
783 	 * bring the "adapter" up now.
784 	 */
785 	if (adapter->open_device_map == 0) {
786 		err = adapter_up(adapter);
787 		if (err)
788 			return err;
789 	}
790 
791 	/*
792 	 * Note that this interface is up and start everything up ...
793 	 */
794 	err = link_start(dev);
795 	if (err)
796 		goto err_unwind;
797 
798 	netif_tx_start_all_queues(dev);
799 	set_bit(pi->port_id, &adapter->open_device_map);
800 	return 0;
801 
802 err_unwind:
803 	if (adapter->open_device_map == 0)
804 		adapter_down(adapter);
805 	return err;
806 }
807 
808 /*
809  * Shut down a net device.  This routine is called "cxgb_close" in the PF
810  * Driver ...
811  */
812 static int cxgb4vf_stop(struct net_device *dev)
813 {
814 	struct port_info *pi = netdev_priv(dev);
815 	struct adapter *adapter = pi->adapter;
816 
817 	netif_tx_stop_all_queues(dev);
818 	netif_carrier_off(dev);
819 	t4vf_enable_vi(adapter, pi->viid, false, false);
820 	pi->link_cfg.link_ok = 0;
821 
822 	clear_bit(pi->port_id, &adapter->open_device_map);
823 	if (adapter->open_device_map == 0)
824 		adapter_down(adapter);
825 	return 0;
826 }
827 
828 /*
829  * Translate our basic statistics into the standard "ifconfig" statistics.
830  */
831 static struct net_device_stats *cxgb4vf_get_stats(struct net_device *dev)
832 {
833 	struct t4vf_port_stats stats;
834 	struct port_info *pi = netdev2pinfo(dev);
835 	struct adapter *adapter = pi->adapter;
836 	struct net_device_stats *ns = &dev->stats;
837 	int err;
838 
839 	spin_lock(&adapter->stats_lock);
840 	err = t4vf_get_port_stats(adapter, pi->pidx, &stats);
841 	spin_unlock(&adapter->stats_lock);
842 
843 	memset(ns, 0, sizeof(*ns));
844 	if (err)
845 		return ns;
846 
847 	ns->tx_bytes = (stats.tx_bcast_bytes + stats.tx_mcast_bytes +
848 			stats.tx_ucast_bytes + stats.tx_offload_bytes);
849 	ns->tx_packets = (stats.tx_bcast_frames + stats.tx_mcast_frames +
850 			  stats.tx_ucast_frames + stats.tx_offload_frames);
851 	ns->rx_bytes = (stats.rx_bcast_bytes + stats.rx_mcast_bytes +
852 			stats.rx_ucast_bytes);
853 	ns->rx_packets = (stats.rx_bcast_frames + stats.rx_mcast_frames +
854 			  stats.rx_ucast_frames);
855 	ns->multicast = stats.rx_mcast_frames;
856 	ns->tx_errors = stats.tx_drop_frames;
857 	ns->rx_errors = stats.rx_err_frames;
858 
859 	return ns;
860 }
861 
862 static inline int cxgb4vf_set_addr_hash(struct port_info *pi)
863 {
864 	struct adapter *adapter = pi->adapter;
865 	u64 vec = 0;
866 	bool ucast = false;
867 	struct hash_mac_addr *entry;
868 
869 	/* Calculate the hash vector for the updated list and program it */
870 	list_for_each_entry(entry, &adapter->mac_hlist, list) {
871 		ucast |= is_unicast_ether_addr(entry->addr);
872 		vec |= (1ULL << hash_mac_addr(entry->addr));
873 	}
874 	return t4vf_set_addr_hash(adapter, pi->viid, ucast, vec, false);
875 }
876 
877 static int cxgb4vf_mac_sync(struct net_device *netdev, const u8 *mac_addr)
878 {
879 	struct port_info *pi = netdev_priv(netdev);
880 	struct adapter *adapter = pi->adapter;
881 	int ret;
882 	u64 mhash = 0;
883 	u64 uhash = 0;
884 	bool free = false;
885 	bool ucast = is_unicast_ether_addr(mac_addr);
886 	const u8 *maclist[1] = {mac_addr};
887 	struct hash_mac_addr *new_entry;
888 
889 	ret = t4vf_alloc_mac_filt(adapter, pi->viid, free, 1, maclist,
890 				  NULL, ucast ? &uhash : &mhash, false);
891 	if (ret < 0)
892 		goto out;
893 	/* if hash != 0, then add the addr to hash addr list
894 	 * so on the end we will calculate the hash for the
895 	 * list and program it
896 	 */
897 	if (uhash || mhash) {
898 		new_entry = kzalloc(sizeof(*new_entry), GFP_ATOMIC);
899 		if (!new_entry)
900 			return -ENOMEM;
901 		ether_addr_copy(new_entry->addr, mac_addr);
902 		list_add_tail(&new_entry->list, &adapter->mac_hlist);
903 		ret = cxgb4vf_set_addr_hash(pi);
904 	}
905 out:
906 	return ret < 0 ? ret : 0;
907 }
908 
909 static int cxgb4vf_mac_unsync(struct net_device *netdev, const u8 *mac_addr)
910 {
911 	struct port_info *pi = netdev_priv(netdev);
912 	struct adapter *adapter = pi->adapter;
913 	int ret;
914 	const u8 *maclist[1] = {mac_addr};
915 	struct hash_mac_addr *entry, *tmp;
916 
917 	/* If the MAC address to be removed is in the hash addr
918 	 * list, delete it from the list and update hash vector
919 	 */
920 	list_for_each_entry_safe(entry, tmp, &adapter->mac_hlist, list) {
921 		if (ether_addr_equal(entry->addr, mac_addr)) {
922 			list_del(&entry->list);
923 			kfree(entry);
924 			return cxgb4vf_set_addr_hash(pi);
925 		}
926 	}
927 
928 	ret = t4vf_free_mac_filt(adapter, pi->viid, 1, maclist, false);
929 	return ret < 0 ? -EINVAL : 0;
930 }
931 
932 /*
933  * Set RX properties of a port, such as promiscruity, address filters, and MTU.
934  * If @mtu is -1 it is left unchanged.
935  */
936 static int set_rxmode(struct net_device *dev, int mtu, bool sleep_ok)
937 {
938 	struct port_info *pi = netdev_priv(dev);
939 
940 	__dev_uc_sync(dev, cxgb4vf_mac_sync, cxgb4vf_mac_unsync);
941 	__dev_mc_sync(dev, cxgb4vf_mac_sync, cxgb4vf_mac_unsync);
942 	return t4vf_set_rxmode(pi->adapter, pi->viid, -1,
943 			       (dev->flags & IFF_PROMISC) != 0,
944 			       (dev->flags & IFF_ALLMULTI) != 0,
945 			       1, -1, sleep_ok);
946 }
947 
948 /*
949  * Set the current receive modes on the device.
950  */
951 static void cxgb4vf_set_rxmode(struct net_device *dev)
952 {
953 	/* unfortunately we can't return errors to the stack */
954 	set_rxmode(dev, -1, false);
955 }
956 
957 /*
958  * Find the entry in the interrupt holdoff timer value array which comes
959  * closest to the specified interrupt holdoff value.
960  */
961 static int closest_timer(const struct sge *s, int us)
962 {
963 	int i, timer_idx = 0, min_delta = INT_MAX;
964 
965 	for (i = 0; i < ARRAY_SIZE(s->timer_val); i++) {
966 		int delta = us - s->timer_val[i];
967 		if (delta < 0)
968 			delta = -delta;
969 		if (delta < min_delta) {
970 			min_delta = delta;
971 			timer_idx = i;
972 		}
973 	}
974 	return timer_idx;
975 }
976 
977 static int closest_thres(const struct sge *s, int thres)
978 {
979 	int i, delta, pktcnt_idx = 0, min_delta = INT_MAX;
980 
981 	for (i = 0; i < ARRAY_SIZE(s->counter_val); i++) {
982 		delta = thres - s->counter_val[i];
983 		if (delta < 0)
984 			delta = -delta;
985 		if (delta < min_delta) {
986 			min_delta = delta;
987 			pktcnt_idx = i;
988 		}
989 	}
990 	return pktcnt_idx;
991 }
992 
993 /*
994  * Return a queue's interrupt hold-off time in us.  0 means no timer.
995  */
996 static unsigned int qtimer_val(const struct adapter *adapter,
997 			       const struct sge_rspq *rspq)
998 {
999 	unsigned int timer_idx = QINTR_TIMER_IDX_G(rspq->intr_params);
1000 
1001 	return timer_idx < SGE_NTIMERS
1002 		? adapter->sge.timer_val[timer_idx]
1003 		: 0;
1004 }
1005 
1006 /**
1007  *	set_rxq_intr_params - set a queue's interrupt holdoff parameters
1008  *	@adapter: the adapter
1009  *	@rspq: the RX response queue
1010  *	@us: the hold-off time in us, or 0 to disable timer
1011  *	@cnt: the hold-off packet count, or 0 to disable counter
1012  *
1013  *	Sets an RX response queue's interrupt hold-off time and packet count.
1014  *	At least one of the two needs to be enabled for the queue to generate
1015  *	interrupts.
1016  */
1017 static int set_rxq_intr_params(struct adapter *adapter, struct sge_rspq *rspq,
1018 			       unsigned int us, unsigned int cnt)
1019 {
1020 	unsigned int timer_idx;
1021 
1022 	/*
1023 	 * If both the interrupt holdoff timer and count are specified as
1024 	 * zero, default to a holdoff count of 1 ...
1025 	 */
1026 	if ((us | cnt) == 0)
1027 		cnt = 1;
1028 
1029 	/*
1030 	 * If an interrupt holdoff count has been specified, then find the
1031 	 * closest configured holdoff count and use that.  If the response
1032 	 * queue has already been created, then update its queue context
1033 	 * parameters ...
1034 	 */
1035 	if (cnt) {
1036 		int err;
1037 		u32 v, pktcnt_idx;
1038 
1039 		pktcnt_idx = closest_thres(&adapter->sge, cnt);
1040 		if (rspq->desc && rspq->pktcnt_idx != pktcnt_idx) {
1041 			v = FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) |
1042 			    FW_PARAMS_PARAM_X_V(
1043 					FW_PARAMS_PARAM_DMAQ_IQ_INTCNTTHRESH) |
1044 			    FW_PARAMS_PARAM_YZ_V(rspq->cntxt_id);
1045 			err = t4vf_set_params(adapter, 1, &v, &pktcnt_idx);
1046 			if (err)
1047 				return err;
1048 		}
1049 		rspq->pktcnt_idx = pktcnt_idx;
1050 	}
1051 
1052 	/*
1053 	 * Compute the closest holdoff timer index from the supplied holdoff
1054 	 * timer value.
1055 	 */
1056 	timer_idx = (us == 0
1057 		     ? SGE_TIMER_RSTRT_CNTR
1058 		     : closest_timer(&adapter->sge, us));
1059 
1060 	/*
1061 	 * Update the response queue's interrupt coalescing parameters and
1062 	 * return success.
1063 	 */
1064 	rspq->intr_params = (QINTR_TIMER_IDX_V(timer_idx) |
1065 			     QINTR_CNT_EN_V(cnt > 0));
1066 	return 0;
1067 }
1068 
1069 /*
1070  * Return a version number to identify the type of adapter.  The scheme is:
1071  * - bits 0..9: chip version
1072  * - bits 10..15: chip revision
1073  */
1074 static inline unsigned int mk_adap_vers(const struct adapter *adapter)
1075 {
1076 	/*
1077 	 * Chip version 4, revision 0x3f (cxgb4vf).
1078 	 */
1079 	return CHELSIO_CHIP_VERSION(adapter->params.chip) | (0x3f << 10);
1080 }
1081 
1082 /*
1083  * Execute the specified ioctl command.
1084  */
1085 static int cxgb4vf_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1086 {
1087 	int ret = 0;
1088 
1089 	switch (cmd) {
1090 	    /*
1091 	     * The VF Driver doesn't have access to any of the other
1092 	     * common Ethernet device ioctl()'s (like reading/writing
1093 	     * PHY registers, etc.
1094 	     */
1095 
1096 	default:
1097 		ret = -EOPNOTSUPP;
1098 		break;
1099 	}
1100 	return ret;
1101 }
1102 
1103 /*
1104  * Change the device's MTU.
1105  */
1106 static int cxgb4vf_change_mtu(struct net_device *dev, int new_mtu)
1107 {
1108 	int ret;
1109 	struct port_info *pi = netdev_priv(dev);
1110 
1111 	ret = t4vf_set_rxmode(pi->adapter, pi->viid, new_mtu,
1112 			      -1, -1, -1, -1, true);
1113 	if (!ret)
1114 		dev->mtu = new_mtu;
1115 	return ret;
1116 }
1117 
1118 static netdev_features_t cxgb4vf_fix_features(struct net_device *dev,
1119 	netdev_features_t features)
1120 {
1121 	/*
1122 	 * Since there is no support for separate rx/tx vlan accel
1123 	 * enable/disable make sure tx flag is always in same state as rx.
1124 	 */
1125 	if (features & NETIF_F_HW_VLAN_CTAG_RX)
1126 		features |= NETIF_F_HW_VLAN_CTAG_TX;
1127 	else
1128 		features &= ~NETIF_F_HW_VLAN_CTAG_TX;
1129 
1130 	return features;
1131 }
1132 
1133 static int cxgb4vf_set_features(struct net_device *dev,
1134 	netdev_features_t features)
1135 {
1136 	struct port_info *pi = netdev_priv(dev);
1137 	netdev_features_t changed = dev->features ^ features;
1138 
1139 	if (changed & NETIF_F_HW_VLAN_CTAG_RX)
1140 		t4vf_set_rxmode(pi->adapter, pi->viid, -1, -1, -1, -1,
1141 				features & NETIF_F_HW_VLAN_CTAG_TX, 0);
1142 
1143 	return 0;
1144 }
1145 
1146 /*
1147  * Change the devices MAC address.
1148  */
1149 static int cxgb4vf_set_mac_addr(struct net_device *dev, void *_addr)
1150 {
1151 	int ret;
1152 	struct sockaddr *addr = _addr;
1153 	struct port_info *pi = netdev_priv(dev);
1154 
1155 	if (!is_valid_ether_addr(addr->sa_data))
1156 		return -EADDRNOTAVAIL;
1157 
1158 	ret = t4vf_change_mac(pi->adapter, pi->viid, pi->xact_addr_filt,
1159 			      addr->sa_data, true);
1160 	if (ret < 0)
1161 		return ret;
1162 
1163 	memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
1164 	pi->xact_addr_filt = ret;
1165 	return 0;
1166 }
1167 
1168 #ifdef CONFIG_NET_POLL_CONTROLLER
1169 /*
1170  * Poll all of our receive queues.  This is called outside of normal interrupt
1171  * context.
1172  */
1173 static void cxgb4vf_poll_controller(struct net_device *dev)
1174 {
1175 	struct port_info *pi = netdev_priv(dev);
1176 	struct adapter *adapter = pi->adapter;
1177 
1178 	if (adapter->flags & USING_MSIX) {
1179 		struct sge_eth_rxq *rxq;
1180 		int nqsets;
1181 
1182 		rxq = &adapter->sge.ethrxq[pi->first_qset];
1183 		for (nqsets = pi->nqsets; nqsets; nqsets--) {
1184 			t4vf_sge_intr_msix(0, &rxq->rspq);
1185 			rxq++;
1186 		}
1187 	} else
1188 		t4vf_intr_handler(adapter)(0, adapter);
1189 }
1190 #endif
1191 
1192 /*
1193  * Ethtool operations.
1194  * ===================
1195  *
1196  * Note that we don't support any ethtool operations which change the physical
1197  * state of the port to which we're linked.
1198  */
1199 
1200 /**
1201  *	from_fw_port_mod_type - translate Firmware Port/Module type to Ethtool
1202  *	@port_type: Firmware Port Type
1203  *	@mod_type: Firmware Module Type
1204  *
1205  *	Translate Firmware Port/Module type to Ethtool Port Type.
1206  */
1207 static int from_fw_port_mod_type(enum fw_port_type port_type,
1208 				 enum fw_port_module_type mod_type)
1209 {
1210 	if (port_type == FW_PORT_TYPE_BT_SGMII ||
1211 	    port_type == FW_PORT_TYPE_BT_XFI ||
1212 	    port_type == FW_PORT_TYPE_BT_XAUI) {
1213 		return PORT_TP;
1214 	} else if (port_type == FW_PORT_TYPE_FIBER_XFI ||
1215 		   port_type == FW_PORT_TYPE_FIBER_XAUI) {
1216 		return PORT_FIBRE;
1217 	} else if (port_type == FW_PORT_TYPE_SFP ||
1218 		   port_type == FW_PORT_TYPE_QSFP_10G ||
1219 		   port_type == FW_PORT_TYPE_QSA ||
1220 		   port_type == FW_PORT_TYPE_QSFP) {
1221 		if (mod_type == FW_PORT_MOD_TYPE_LR ||
1222 		    mod_type == FW_PORT_MOD_TYPE_SR ||
1223 		    mod_type == FW_PORT_MOD_TYPE_ER ||
1224 		    mod_type == FW_PORT_MOD_TYPE_LRM)
1225 			return PORT_FIBRE;
1226 		else if (mod_type == FW_PORT_MOD_TYPE_TWINAX_PASSIVE ||
1227 			 mod_type == FW_PORT_MOD_TYPE_TWINAX_ACTIVE)
1228 			return PORT_DA;
1229 		else
1230 			return PORT_OTHER;
1231 	}
1232 
1233 	return PORT_OTHER;
1234 }
1235 
1236 /**
1237  *	fw_caps_to_lmm - translate Firmware to ethtool Link Mode Mask
1238  *	@port_type: Firmware Port Type
1239  *	@fw_caps: Firmware Port Capabilities
1240  *	@link_mode_mask: ethtool Link Mode Mask
1241  *
1242  *	Translate a Firmware Port Capabilities specification to an ethtool
1243  *	Link Mode Mask.
1244  */
1245 static void fw_caps_to_lmm(enum fw_port_type port_type,
1246 			   unsigned int fw_caps,
1247 			   unsigned long *link_mode_mask)
1248 {
1249 	#define SET_LMM(__lmm_name) __set_bit(ETHTOOL_LINK_MODE_ ## __lmm_name\
1250 			 ## _BIT, link_mode_mask)
1251 
1252 	#define FW_CAPS_TO_LMM(__fw_name, __lmm_name) \
1253 		do { \
1254 			if (fw_caps & FW_PORT_CAP_ ## __fw_name) \
1255 				SET_LMM(__lmm_name); \
1256 		} while (0)
1257 
1258 	switch (port_type) {
1259 	case FW_PORT_TYPE_BT_SGMII:
1260 	case FW_PORT_TYPE_BT_XFI:
1261 	case FW_PORT_TYPE_BT_XAUI:
1262 		SET_LMM(TP);
1263 		FW_CAPS_TO_LMM(SPEED_100M, 100baseT_Full);
1264 		FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
1265 		FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full);
1266 		break;
1267 
1268 	case FW_PORT_TYPE_KX4:
1269 	case FW_PORT_TYPE_KX:
1270 		SET_LMM(Backplane);
1271 		FW_CAPS_TO_LMM(SPEED_1G, 1000baseKX_Full);
1272 		FW_CAPS_TO_LMM(SPEED_10G, 10000baseKX4_Full);
1273 		break;
1274 
1275 	case FW_PORT_TYPE_KR:
1276 		SET_LMM(Backplane);
1277 		SET_LMM(10000baseKR_Full);
1278 		break;
1279 
1280 	case FW_PORT_TYPE_BP_AP:
1281 		SET_LMM(Backplane);
1282 		SET_LMM(10000baseR_FEC);
1283 		SET_LMM(10000baseKR_Full);
1284 		SET_LMM(1000baseKX_Full);
1285 		break;
1286 
1287 	case FW_PORT_TYPE_BP4_AP:
1288 		SET_LMM(Backplane);
1289 		SET_LMM(10000baseR_FEC);
1290 		SET_LMM(10000baseKR_Full);
1291 		SET_LMM(1000baseKX_Full);
1292 		SET_LMM(10000baseKX4_Full);
1293 		break;
1294 
1295 	case FW_PORT_TYPE_FIBER_XFI:
1296 	case FW_PORT_TYPE_FIBER_XAUI:
1297 	case FW_PORT_TYPE_SFP:
1298 	case FW_PORT_TYPE_QSFP_10G:
1299 	case FW_PORT_TYPE_QSA:
1300 		SET_LMM(FIBRE);
1301 		FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
1302 		FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full);
1303 		break;
1304 
1305 	case FW_PORT_TYPE_BP40_BA:
1306 	case FW_PORT_TYPE_QSFP:
1307 		SET_LMM(FIBRE);
1308 		SET_LMM(40000baseSR4_Full);
1309 		break;
1310 
1311 	case FW_PORT_TYPE_CR_QSFP:
1312 	case FW_PORT_TYPE_SFP28:
1313 		SET_LMM(FIBRE);
1314 		SET_LMM(25000baseCR_Full);
1315 		break;
1316 
1317 	case FW_PORT_TYPE_KR4_100G:
1318 	case FW_PORT_TYPE_CR4_QSFP:
1319 		SET_LMM(FIBRE);
1320 		SET_LMM(100000baseCR4_Full);
1321 		break;
1322 
1323 	default:
1324 		break;
1325 	}
1326 
1327 	FW_CAPS_TO_LMM(ANEG, Autoneg);
1328 	FW_CAPS_TO_LMM(802_3_PAUSE, Pause);
1329 	FW_CAPS_TO_LMM(802_3_ASM_DIR, Asym_Pause);
1330 
1331 	#undef FW_CAPS_TO_LMM
1332 	#undef SET_LMM
1333 }
1334 
1335 static int cxgb4vf_get_link_ksettings(struct net_device *dev,
1336 				      struct ethtool_link_ksettings
1337 							*link_ksettings)
1338 {
1339 	const struct port_info *pi = netdev_priv(dev);
1340 	struct ethtool_link_settings *base = &link_ksettings->base;
1341 
1342 	ethtool_link_ksettings_zero_link_mode(link_ksettings, supported);
1343 	ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising);
1344 	ethtool_link_ksettings_zero_link_mode(link_ksettings, lp_advertising);
1345 
1346 	base->port = from_fw_port_mod_type(pi->port_type, pi->mod_type);
1347 
1348 	if (pi->mdio_addr >= 0) {
1349 		base->phy_address = pi->mdio_addr;
1350 		base->mdio_support = (pi->port_type == FW_PORT_TYPE_BT_SGMII
1351 				      ? ETH_MDIO_SUPPORTS_C22
1352 				      : ETH_MDIO_SUPPORTS_C45);
1353 	} else {
1354 		base->phy_address = 255;
1355 		base->mdio_support = 0;
1356 	}
1357 
1358 	fw_caps_to_lmm(pi->port_type, pi->link_cfg.supported,
1359 		       link_ksettings->link_modes.supported);
1360 	fw_caps_to_lmm(pi->port_type, pi->link_cfg.advertising,
1361 		       link_ksettings->link_modes.advertising);
1362 	fw_caps_to_lmm(pi->port_type, pi->link_cfg.lp_advertising,
1363 		       link_ksettings->link_modes.lp_advertising);
1364 
1365 	if (netif_carrier_ok(dev)) {
1366 		base->speed = pi->link_cfg.speed;
1367 		base->duplex = DUPLEX_FULL;
1368 	} else {
1369 		base->speed = SPEED_UNKNOWN;
1370 		base->duplex = DUPLEX_UNKNOWN;
1371 	}
1372 
1373 	base->autoneg = pi->link_cfg.autoneg;
1374 	if (pi->link_cfg.supported & FW_PORT_CAP_ANEG)
1375 		ethtool_link_ksettings_add_link_mode(link_ksettings,
1376 						     supported, Autoneg);
1377 	if (pi->link_cfg.autoneg)
1378 		ethtool_link_ksettings_add_link_mode(link_ksettings,
1379 						     advertising, Autoneg);
1380 
1381 	return 0;
1382 }
1383 
1384 /*
1385  * Return our driver information.
1386  */
1387 static void cxgb4vf_get_drvinfo(struct net_device *dev,
1388 				struct ethtool_drvinfo *drvinfo)
1389 {
1390 	struct adapter *adapter = netdev2adap(dev);
1391 
1392 	strlcpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver));
1393 	strlcpy(drvinfo->version, DRV_VERSION, sizeof(drvinfo->version));
1394 	strlcpy(drvinfo->bus_info, pci_name(to_pci_dev(dev->dev.parent)),
1395 		sizeof(drvinfo->bus_info));
1396 	snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
1397 		 "%u.%u.%u.%u, TP %u.%u.%u.%u",
1398 		 FW_HDR_FW_VER_MAJOR_G(adapter->params.dev.fwrev),
1399 		 FW_HDR_FW_VER_MINOR_G(adapter->params.dev.fwrev),
1400 		 FW_HDR_FW_VER_MICRO_G(adapter->params.dev.fwrev),
1401 		 FW_HDR_FW_VER_BUILD_G(adapter->params.dev.fwrev),
1402 		 FW_HDR_FW_VER_MAJOR_G(adapter->params.dev.tprev),
1403 		 FW_HDR_FW_VER_MINOR_G(adapter->params.dev.tprev),
1404 		 FW_HDR_FW_VER_MICRO_G(adapter->params.dev.tprev),
1405 		 FW_HDR_FW_VER_BUILD_G(adapter->params.dev.tprev));
1406 }
1407 
1408 /*
1409  * Return current adapter message level.
1410  */
1411 static u32 cxgb4vf_get_msglevel(struct net_device *dev)
1412 {
1413 	return netdev2adap(dev)->msg_enable;
1414 }
1415 
1416 /*
1417  * Set current adapter message level.
1418  */
1419 static void cxgb4vf_set_msglevel(struct net_device *dev, u32 msglevel)
1420 {
1421 	netdev2adap(dev)->msg_enable = msglevel;
1422 }
1423 
1424 /*
1425  * Return the device's current Queue Set ring size parameters along with the
1426  * allowed maximum values.  Since ethtool doesn't understand the concept of
1427  * multi-queue devices, we just return the current values associated with the
1428  * first Queue Set.
1429  */
1430 static void cxgb4vf_get_ringparam(struct net_device *dev,
1431 				  struct ethtool_ringparam *rp)
1432 {
1433 	const struct port_info *pi = netdev_priv(dev);
1434 	const struct sge *s = &pi->adapter->sge;
1435 
1436 	rp->rx_max_pending = MAX_RX_BUFFERS;
1437 	rp->rx_mini_max_pending = MAX_RSPQ_ENTRIES;
1438 	rp->rx_jumbo_max_pending = 0;
1439 	rp->tx_max_pending = MAX_TXQ_ENTRIES;
1440 
1441 	rp->rx_pending = s->ethrxq[pi->first_qset].fl.size - MIN_FL_RESID;
1442 	rp->rx_mini_pending = s->ethrxq[pi->first_qset].rspq.size;
1443 	rp->rx_jumbo_pending = 0;
1444 	rp->tx_pending = s->ethtxq[pi->first_qset].q.size;
1445 }
1446 
1447 /*
1448  * Set the Queue Set ring size parameters for the device.  Again, since
1449  * ethtool doesn't allow for the concept of multiple queues per device, we'll
1450  * apply these new values across all of the Queue Sets associated with the
1451  * device -- after vetting them of course!
1452  */
1453 static int cxgb4vf_set_ringparam(struct net_device *dev,
1454 				 struct ethtool_ringparam *rp)
1455 {
1456 	const struct port_info *pi = netdev_priv(dev);
1457 	struct adapter *adapter = pi->adapter;
1458 	struct sge *s = &adapter->sge;
1459 	int qs;
1460 
1461 	if (rp->rx_pending > MAX_RX_BUFFERS ||
1462 	    rp->rx_jumbo_pending ||
1463 	    rp->tx_pending > MAX_TXQ_ENTRIES ||
1464 	    rp->rx_mini_pending > MAX_RSPQ_ENTRIES ||
1465 	    rp->rx_mini_pending < MIN_RSPQ_ENTRIES ||
1466 	    rp->rx_pending < MIN_FL_ENTRIES ||
1467 	    rp->tx_pending < MIN_TXQ_ENTRIES)
1468 		return -EINVAL;
1469 
1470 	if (adapter->flags & FULL_INIT_DONE)
1471 		return -EBUSY;
1472 
1473 	for (qs = pi->first_qset; qs < pi->first_qset + pi->nqsets; qs++) {
1474 		s->ethrxq[qs].fl.size = rp->rx_pending + MIN_FL_RESID;
1475 		s->ethrxq[qs].rspq.size = rp->rx_mini_pending;
1476 		s->ethtxq[qs].q.size = rp->tx_pending;
1477 	}
1478 	return 0;
1479 }
1480 
1481 /*
1482  * Return the interrupt holdoff timer and count for the first Queue Set on the
1483  * device.  Our extension ioctl() (the cxgbtool interface) allows the
1484  * interrupt holdoff timer to be read on all of the device's Queue Sets.
1485  */
1486 static int cxgb4vf_get_coalesce(struct net_device *dev,
1487 				struct ethtool_coalesce *coalesce)
1488 {
1489 	const struct port_info *pi = netdev_priv(dev);
1490 	const struct adapter *adapter = pi->adapter;
1491 	const struct sge_rspq *rspq = &adapter->sge.ethrxq[pi->first_qset].rspq;
1492 
1493 	coalesce->rx_coalesce_usecs = qtimer_val(adapter, rspq);
1494 	coalesce->rx_max_coalesced_frames =
1495 		((rspq->intr_params & QINTR_CNT_EN_F)
1496 		 ? adapter->sge.counter_val[rspq->pktcnt_idx]
1497 		 : 0);
1498 	return 0;
1499 }
1500 
1501 /*
1502  * Set the RX interrupt holdoff timer and count for the first Queue Set on the
1503  * interface.  Our extension ioctl() (the cxgbtool interface) allows us to set
1504  * the interrupt holdoff timer on any of the device's Queue Sets.
1505  */
1506 static int cxgb4vf_set_coalesce(struct net_device *dev,
1507 				struct ethtool_coalesce *coalesce)
1508 {
1509 	const struct port_info *pi = netdev_priv(dev);
1510 	struct adapter *adapter = pi->adapter;
1511 
1512 	return set_rxq_intr_params(adapter,
1513 				   &adapter->sge.ethrxq[pi->first_qset].rspq,
1514 				   coalesce->rx_coalesce_usecs,
1515 				   coalesce->rx_max_coalesced_frames);
1516 }
1517 
1518 /*
1519  * Report current port link pause parameter settings.
1520  */
1521 static void cxgb4vf_get_pauseparam(struct net_device *dev,
1522 				   struct ethtool_pauseparam *pauseparam)
1523 {
1524 	struct port_info *pi = netdev_priv(dev);
1525 
1526 	pauseparam->autoneg = (pi->link_cfg.requested_fc & PAUSE_AUTONEG) != 0;
1527 	pauseparam->rx_pause = (pi->link_cfg.fc & PAUSE_RX) != 0;
1528 	pauseparam->tx_pause = (pi->link_cfg.fc & PAUSE_TX) != 0;
1529 }
1530 
1531 /*
1532  * Identify the port by blinking the port's LED.
1533  */
1534 static int cxgb4vf_phys_id(struct net_device *dev,
1535 			   enum ethtool_phys_id_state state)
1536 {
1537 	unsigned int val;
1538 	struct port_info *pi = netdev_priv(dev);
1539 
1540 	if (state == ETHTOOL_ID_ACTIVE)
1541 		val = 0xffff;
1542 	else if (state == ETHTOOL_ID_INACTIVE)
1543 		val = 0;
1544 	else
1545 		return -EINVAL;
1546 
1547 	return t4vf_identify_port(pi->adapter, pi->viid, val);
1548 }
1549 
1550 /*
1551  * Port stats maintained per queue of the port.
1552  */
1553 struct queue_port_stats {
1554 	u64 tso;
1555 	u64 tx_csum;
1556 	u64 rx_csum;
1557 	u64 vlan_ex;
1558 	u64 vlan_ins;
1559 	u64 lro_pkts;
1560 	u64 lro_merged;
1561 };
1562 
1563 /*
1564  * Strings for the ETH_SS_STATS statistics set ("ethtool -S").  Note that
1565  * these need to match the order of statistics returned by
1566  * t4vf_get_port_stats().
1567  */
1568 static const char stats_strings[][ETH_GSTRING_LEN] = {
1569 	/*
1570 	 * These must match the layout of the t4vf_port_stats structure.
1571 	 */
1572 	"TxBroadcastBytes  ",
1573 	"TxBroadcastFrames ",
1574 	"TxMulticastBytes  ",
1575 	"TxMulticastFrames ",
1576 	"TxUnicastBytes    ",
1577 	"TxUnicastFrames   ",
1578 	"TxDroppedFrames   ",
1579 	"TxOffloadBytes    ",
1580 	"TxOffloadFrames   ",
1581 	"RxBroadcastBytes  ",
1582 	"RxBroadcastFrames ",
1583 	"RxMulticastBytes  ",
1584 	"RxMulticastFrames ",
1585 	"RxUnicastBytes    ",
1586 	"RxUnicastFrames   ",
1587 	"RxErrorFrames     ",
1588 
1589 	/*
1590 	 * These are accumulated per-queue statistics and must match the
1591 	 * order of the fields in the queue_port_stats structure.
1592 	 */
1593 	"TSO               ",
1594 	"TxCsumOffload     ",
1595 	"RxCsumGood        ",
1596 	"VLANextractions   ",
1597 	"VLANinsertions    ",
1598 	"GROPackets        ",
1599 	"GROMerged         ",
1600 };
1601 
1602 /*
1603  * Return the number of statistics in the specified statistics set.
1604  */
1605 static int cxgb4vf_get_sset_count(struct net_device *dev, int sset)
1606 {
1607 	switch (sset) {
1608 	case ETH_SS_STATS:
1609 		return ARRAY_SIZE(stats_strings);
1610 	default:
1611 		return -EOPNOTSUPP;
1612 	}
1613 	/*NOTREACHED*/
1614 }
1615 
1616 /*
1617  * Return the strings for the specified statistics set.
1618  */
1619 static void cxgb4vf_get_strings(struct net_device *dev,
1620 				u32 sset,
1621 				u8 *data)
1622 {
1623 	switch (sset) {
1624 	case ETH_SS_STATS:
1625 		memcpy(data, stats_strings, sizeof(stats_strings));
1626 		break;
1627 	}
1628 }
1629 
1630 /*
1631  * Small utility routine to accumulate queue statistics across the queues of
1632  * a "port".
1633  */
1634 static void collect_sge_port_stats(const struct adapter *adapter,
1635 				   const struct port_info *pi,
1636 				   struct queue_port_stats *stats)
1637 {
1638 	const struct sge_eth_txq *txq = &adapter->sge.ethtxq[pi->first_qset];
1639 	const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[pi->first_qset];
1640 	int qs;
1641 
1642 	memset(stats, 0, sizeof(*stats));
1643 	for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
1644 		stats->tso += txq->tso;
1645 		stats->tx_csum += txq->tx_cso;
1646 		stats->rx_csum += rxq->stats.rx_cso;
1647 		stats->vlan_ex += rxq->stats.vlan_ex;
1648 		stats->vlan_ins += txq->vlan_ins;
1649 		stats->lro_pkts += rxq->stats.lro_pkts;
1650 		stats->lro_merged += rxq->stats.lro_merged;
1651 	}
1652 }
1653 
1654 /*
1655  * Return the ETH_SS_STATS statistics set.
1656  */
1657 static void cxgb4vf_get_ethtool_stats(struct net_device *dev,
1658 				      struct ethtool_stats *stats,
1659 				      u64 *data)
1660 {
1661 	struct port_info *pi = netdev2pinfo(dev);
1662 	struct adapter *adapter = pi->adapter;
1663 	int err = t4vf_get_port_stats(adapter, pi->pidx,
1664 				      (struct t4vf_port_stats *)data);
1665 	if (err)
1666 		memset(data, 0, sizeof(struct t4vf_port_stats));
1667 
1668 	data += sizeof(struct t4vf_port_stats) / sizeof(u64);
1669 	collect_sge_port_stats(adapter, pi, (struct queue_port_stats *)data);
1670 }
1671 
1672 /*
1673  * Return the size of our register map.
1674  */
1675 static int cxgb4vf_get_regs_len(struct net_device *dev)
1676 {
1677 	return T4VF_REGMAP_SIZE;
1678 }
1679 
1680 /*
1681  * Dump a block of registers, start to end inclusive, into a buffer.
1682  */
1683 static void reg_block_dump(struct adapter *adapter, void *regbuf,
1684 			   unsigned int start, unsigned int end)
1685 {
1686 	u32 *bp = regbuf + start - T4VF_REGMAP_START;
1687 
1688 	for ( ; start <= end; start += sizeof(u32)) {
1689 		/*
1690 		 * Avoid reading the Mailbox Control register since that
1691 		 * can trigger a Mailbox Ownership Arbitration cycle and
1692 		 * interfere with communication with the firmware.
1693 		 */
1694 		if (start == T4VF_CIM_BASE_ADDR + CIM_VF_EXT_MAILBOX_CTRL)
1695 			*bp++ = 0xffff;
1696 		else
1697 			*bp++ = t4_read_reg(adapter, start);
1698 	}
1699 }
1700 
1701 /*
1702  * Copy our entire register map into the provided buffer.
1703  */
1704 static void cxgb4vf_get_regs(struct net_device *dev,
1705 			     struct ethtool_regs *regs,
1706 			     void *regbuf)
1707 {
1708 	struct adapter *adapter = netdev2adap(dev);
1709 
1710 	regs->version = mk_adap_vers(adapter);
1711 
1712 	/*
1713 	 * Fill in register buffer with our register map.
1714 	 */
1715 	memset(regbuf, 0, T4VF_REGMAP_SIZE);
1716 
1717 	reg_block_dump(adapter, regbuf,
1718 		       T4VF_SGE_BASE_ADDR + T4VF_MOD_MAP_SGE_FIRST,
1719 		       T4VF_SGE_BASE_ADDR + T4VF_MOD_MAP_SGE_LAST);
1720 	reg_block_dump(adapter, regbuf,
1721 		       T4VF_MPS_BASE_ADDR + T4VF_MOD_MAP_MPS_FIRST,
1722 		       T4VF_MPS_BASE_ADDR + T4VF_MOD_MAP_MPS_LAST);
1723 
1724 	/* T5 adds new registers in the PL Register map.
1725 	 */
1726 	reg_block_dump(adapter, regbuf,
1727 		       T4VF_PL_BASE_ADDR + T4VF_MOD_MAP_PL_FIRST,
1728 		       T4VF_PL_BASE_ADDR + (is_t4(adapter->params.chip)
1729 		       ? PL_VF_WHOAMI_A : PL_VF_REVISION_A));
1730 	reg_block_dump(adapter, regbuf,
1731 		       T4VF_CIM_BASE_ADDR + T4VF_MOD_MAP_CIM_FIRST,
1732 		       T4VF_CIM_BASE_ADDR + T4VF_MOD_MAP_CIM_LAST);
1733 
1734 	reg_block_dump(adapter, regbuf,
1735 		       T4VF_MBDATA_BASE_ADDR + T4VF_MBDATA_FIRST,
1736 		       T4VF_MBDATA_BASE_ADDR + T4VF_MBDATA_LAST);
1737 }
1738 
1739 /*
1740  * Report current Wake On LAN settings.
1741  */
1742 static void cxgb4vf_get_wol(struct net_device *dev,
1743 			    struct ethtool_wolinfo *wol)
1744 {
1745 	wol->supported = 0;
1746 	wol->wolopts = 0;
1747 	memset(&wol->sopass, 0, sizeof(wol->sopass));
1748 }
1749 
1750 /*
1751  * TCP Segmentation Offload flags which we support.
1752  */
1753 #define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN)
1754 
1755 static const struct ethtool_ops cxgb4vf_ethtool_ops = {
1756 	.get_link_ksettings	= cxgb4vf_get_link_ksettings,
1757 	.get_drvinfo		= cxgb4vf_get_drvinfo,
1758 	.get_msglevel		= cxgb4vf_get_msglevel,
1759 	.set_msglevel		= cxgb4vf_set_msglevel,
1760 	.get_ringparam		= cxgb4vf_get_ringparam,
1761 	.set_ringparam		= cxgb4vf_set_ringparam,
1762 	.get_coalesce		= cxgb4vf_get_coalesce,
1763 	.set_coalesce		= cxgb4vf_set_coalesce,
1764 	.get_pauseparam		= cxgb4vf_get_pauseparam,
1765 	.get_link		= ethtool_op_get_link,
1766 	.get_strings		= cxgb4vf_get_strings,
1767 	.set_phys_id		= cxgb4vf_phys_id,
1768 	.get_sset_count		= cxgb4vf_get_sset_count,
1769 	.get_ethtool_stats	= cxgb4vf_get_ethtool_stats,
1770 	.get_regs_len		= cxgb4vf_get_regs_len,
1771 	.get_regs		= cxgb4vf_get_regs,
1772 	.get_wol		= cxgb4vf_get_wol,
1773 };
1774 
1775 /*
1776  * /sys/kernel/debug/cxgb4vf support code and data.
1777  * ================================================
1778  */
1779 
1780 /*
1781  * Show Firmware Mailbox Command/Reply Log
1782  *
1783  * Note that we don't do any locking when dumping the Firmware Mailbox Log so
1784  * it's possible that we can catch things during a log update and therefore
1785  * see partially corrupted log entries.  But i9t's probably Good Enough(tm).
1786  * If we ever decide that we want to make sure that we're dumping a coherent
1787  * log, we'd need to perform locking in the mailbox logging and in
1788  * mboxlog_open() where we'd need to grab the entire mailbox log in one go
1789  * like we do for the Firmware Device Log.  But as stated above, meh ...
1790  */
1791 static int mboxlog_show(struct seq_file *seq, void *v)
1792 {
1793 	struct adapter *adapter = seq->private;
1794 	struct mbox_cmd_log *log = adapter->mbox_log;
1795 	struct mbox_cmd *entry;
1796 	int entry_idx, i;
1797 
1798 	if (v == SEQ_START_TOKEN) {
1799 		seq_printf(seq,
1800 			   "%10s  %15s  %5s  %5s  %s\n",
1801 			   "Seq#", "Tstamp", "Atime", "Etime",
1802 			   "Command/Reply");
1803 		return 0;
1804 	}
1805 
1806 	entry_idx = log->cursor + ((uintptr_t)v - 2);
1807 	if (entry_idx >= log->size)
1808 		entry_idx -= log->size;
1809 	entry = mbox_cmd_log_entry(log, entry_idx);
1810 
1811 	/* skip over unused entries */
1812 	if (entry->timestamp == 0)
1813 		return 0;
1814 
1815 	seq_printf(seq, "%10u  %15llu  %5d  %5d",
1816 		   entry->seqno, entry->timestamp,
1817 		   entry->access, entry->execute);
1818 	for (i = 0; i < MBOX_LEN / 8; i++) {
1819 		u64 flit = entry->cmd[i];
1820 		u32 hi = (u32)(flit >> 32);
1821 		u32 lo = (u32)flit;
1822 
1823 		seq_printf(seq, "  %08x %08x", hi, lo);
1824 	}
1825 	seq_puts(seq, "\n");
1826 	return 0;
1827 }
1828 
1829 static inline void *mboxlog_get_idx(struct seq_file *seq, loff_t pos)
1830 {
1831 	struct adapter *adapter = seq->private;
1832 	struct mbox_cmd_log *log = adapter->mbox_log;
1833 
1834 	return ((pos <= log->size) ? (void *)(uintptr_t)(pos + 1) : NULL);
1835 }
1836 
1837 static void *mboxlog_start(struct seq_file *seq, loff_t *pos)
1838 {
1839 	return *pos ? mboxlog_get_idx(seq, *pos) : SEQ_START_TOKEN;
1840 }
1841 
1842 static void *mboxlog_next(struct seq_file *seq, void *v, loff_t *pos)
1843 {
1844 	++*pos;
1845 	return mboxlog_get_idx(seq, *pos);
1846 }
1847 
1848 static void mboxlog_stop(struct seq_file *seq, void *v)
1849 {
1850 }
1851 
1852 static const struct seq_operations mboxlog_seq_ops = {
1853 	.start = mboxlog_start,
1854 	.next  = mboxlog_next,
1855 	.stop  = mboxlog_stop,
1856 	.show  = mboxlog_show
1857 };
1858 
1859 static int mboxlog_open(struct inode *inode, struct file *file)
1860 {
1861 	int res = seq_open(file, &mboxlog_seq_ops);
1862 
1863 	if (!res) {
1864 		struct seq_file *seq = file->private_data;
1865 
1866 		seq->private = inode->i_private;
1867 	}
1868 	return res;
1869 }
1870 
1871 static const struct file_operations mboxlog_fops = {
1872 	.owner   = THIS_MODULE,
1873 	.open    = mboxlog_open,
1874 	.read    = seq_read,
1875 	.llseek  = seq_lseek,
1876 	.release = seq_release,
1877 };
1878 
1879 /*
1880  * Show SGE Queue Set information.  We display QPL Queues Sets per line.
1881  */
1882 #define QPL	4
1883 
1884 static int sge_qinfo_show(struct seq_file *seq, void *v)
1885 {
1886 	struct adapter *adapter = seq->private;
1887 	int eth_entries = DIV_ROUND_UP(adapter->sge.ethqsets, QPL);
1888 	int qs, r = (uintptr_t)v - 1;
1889 
1890 	if (r)
1891 		seq_putc(seq, '\n');
1892 
1893 	#define S3(fmt_spec, s, v) \
1894 		do {\
1895 			seq_printf(seq, "%-12s", s); \
1896 			for (qs = 0; qs < n; ++qs) \
1897 				seq_printf(seq, " %16" fmt_spec, v); \
1898 			seq_putc(seq, '\n'); \
1899 		} while (0)
1900 	#define S(s, v)		S3("s", s, v)
1901 	#define T(s, v)		S3("u", s, txq[qs].v)
1902 	#define R(s, v)		S3("u", s, rxq[qs].v)
1903 
1904 	if (r < eth_entries) {
1905 		const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[r * QPL];
1906 		const struct sge_eth_txq *txq = &adapter->sge.ethtxq[r * QPL];
1907 		int n = min(QPL, adapter->sge.ethqsets - QPL * r);
1908 
1909 		S("QType:", "Ethernet");
1910 		S("Interface:",
1911 		  (rxq[qs].rspq.netdev
1912 		   ? rxq[qs].rspq.netdev->name
1913 		   : "N/A"));
1914 		S3("d", "Port:",
1915 		   (rxq[qs].rspq.netdev
1916 		    ? ((struct port_info *)
1917 		       netdev_priv(rxq[qs].rspq.netdev))->port_id
1918 		    : -1));
1919 		T("TxQ ID:", q.abs_id);
1920 		T("TxQ size:", q.size);
1921 		T("TxQ inuse:", q.in_use);
1922 		T("TxQ PIdx:", q.pidx);
1923 		T("TxQ CIdx:", q.cidx);
1924 		R("RspQ ID:", rspq.abs_id);
1925 		R("RspQ size:", rspq.size);
1926 		R("RspQE size:", rspq.iqe_len);
1927 		S3("u", "Intr delay:", qtimer_val(adapter, &rxq[qs].rspq));
1928 		S3("u", "Intr pktcnt:",
1929 		   adapter->sge.counter_val[rxq[qs].rspq.pktcnt_idx]);
1930 		R("RspQ CIdx:", rspq.cidx);
1931 		R("RspQ Gen:", rspq.gen);
1932 		R("FL ID:", fl.abs_id);
1933 		R("FL size:", fl.size - MIN_FL_RESID);
1934 		R("FL avail:", fl.avail);
1935 		R("FL PIdx:", fl.pidx);
1936 		R("FL CIdx:", fl.cidx);
1937 		return 0;
1938 	}
1939 
1940 	r -= eth_entries;
1941 	if (r == 0) {
1942 		const struct sge_rspq *evtq = &adapter->sge.fw_evtq;
1943 
1944 		seq_printf(seq, "%-12s %16s\n", "QType:", "FW event queue");
1945 		seq_printf(seq, "%-12s %16u\n", "RspQ ID:", evtq->abs_id);
1946 		seq_printf(seq, "%-12s %16u\n", "Intr delay:",
1947 			   qtimer_val(adapter, evtq));
1948 		seq_printf(seq, "%-12s %16u\n", "Intr pktcnt:",
1949 			   adapter->sge.counter_val[evtq->pktcnt_idx]);
1950 		seq_printf(seq, "%-12s %16u\n", "RspQ Cidx:", evtq->cidx);
1951 		seq_printf(seq, "%-12s %16u\n", "RspQ Gen:", evtq->gen);
1952 	} else if (r == 1) {
1953 		const struct sge_rspq *intrq = &adapter->sge.intrq;
1954 
1955 		seq_printf(seq, "%-12s %16s\n", "QType:", "Interrupt Queue");
1956 		seq_printf(seq, "%-12s %16u\n", "RspQ ID:", intrq->abs_id);
1957 		seq_printf(seq, "%-12s %16u\n", "Intr delay:",
1958 			   qtimer_val(adapter, intrq));
1959 		seq_printf(seq, "%-12s %16u\n", "Intr pktcnt:",
1960 			   adapter->sge.counter_val[intrq->pktcnt_idx]);
1961 		seq_printf(seq, "%-12s %16u\n", "RspQ Cidx:", intrq->cidx);
1962 		seq_printf(seq, "%-12s %16u\n", "RspQ Gen:", intrq->gen);
1963 	}
1964 
1965 	#undef R
1966 	#undef T
1967 	#undef S
1968 	#undef S3
1969 
1970 	return 0;
1971 }
1972 
1973 /*
1974  * Return the number of "entries" in our "file".  We group the multi-Queue
1975  * sections with QPL Queue Sets per "entry".  The sections of the output are:
1976  *
1977  *     Ethernet RX/TX Queue Sets
1978  *     Firmware Event Queue
1979  *     Forwarded Interrupt Queue (if in MSI mode)
1980  */
1981 static int sge_queue_entries(const struct adapter *adapter)
1982 {
1983 	return DIV_ROUND_UP(adapter->sge.ethqsets, QPL) + 1 +
1984 		((adapter->flags & USING_MSI) != 0);
1985 }
1986 
1987 static void *sge_queue_start(struct seq_file *seq, loff_t *pos)
1988 {
1989 	int entries = sge_queue_entries(seq->private);
1990 
1991 	return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
1992 }
1993 
1994 static void sge_queue_stop(struct seq_file *seq, void *v)
1995 {
1996 }
1997 
1998 static void *sge_queue_next(struct seq_file *seq, void *v, loff_t *pos)
1999 {
2000 	int entries = sge_queue_entries(seq->private);
2001 
2002 	++*pos;
2003 	return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
2004 }
2005 
2006 static const struct seq_operations sge_qinfo_seq_ops = {
2007 	.start = sge_queue_start,
2008 	.next  = sge_queue_next,
2009 	.stop  = sge_queue_stop,
2010 	.show  = sge_qinfo_show
2011 };
2012 
2013 static int sge_qinfo_open(struct inode *inode, struct file *file)
2014 {
2015 	int res = seq_open(file, &sge_qinfo_seq_ops);
2016 
2017 	if (!res) {
2018 		struct seq_file *seq = file->private_data;
2019 		seq->private = inode->i_private;
2020 	}
2021 	return res;
2022 }
2023 
2024 static const struct file_operations sge_qinfo_debugfs_fops = {
2025 	.owner   = THIS_MODULE,
2026 	.open    = sge_qinfo_open,
2027 	.read    = seq_read,
2028 	.llseek  = seq_lseek,
2029 	.release = seq_release,
2030 };
2031 
2032 /*
2033  * Show SGE Queue Set statistics.  We display QPL Queues Sets per line.
2034  */
2035 #define QPL	4
2036 
2037 static int sge_qstats_show(struct seq_file *seq, void *v)
2038 {
2039 	struct adapter *adapter = seq->private;
2040 	int eth_entries = DIV_ROUND_UP(adapter->sge.ethqsets, QPL);
2041 	int qs, r = (uintptr_t)v - 1;
2042 
2043 	if (r)
2044 		seq_putc(seq, '\n');
2045 
2046 	#define S3(fmt, s, v) \
2047 		do { \
2048 			seq_printf(seq, "%-16s", s); \
2049 			for (qs = 0; qs < n; ++qs) \
2050 				seq_printf(seq, " %8" fmt, v); \
2051 			seq_putc(seq, '\n'); \
2052 		} while (0)
2053 	#define S(s, v)		S3("s", s, v)
2054 
2055 	#define T3(fmt, s, v)	S3(fmt, s, txq[qs].v)
2056 	#define T(s, v)		T3("lu", s, v)
2057 
2058 	#define R3(fmt, s, v)	S3(fmt, s, rxq[qs].v)
2059 	#define R(s, v)		R3("lu", s, v)
2060 
2061 	if (r < eth_entries) {
2062 		const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[r * QPL];
2063 		const struct sge_eth_txq *txq = &adapter->sge.ethtxq[r * QPL];
2064 		int n = min(QPL, adapter->sge.ethqsets - QPL * r);
2065 
2066 		S("QType:", "Ethernet");
2067 		S("Interface:",
2068 		  (rxq[qs].rspq.netdev
2069 		   ? rxq[qs].rspq.netdev->name
2070 		   : "N/A"));
2071 		R3("u", "RspQNullInts:", rspq.unhandled_irqs);
2072 		R("RxPackets:", stats.pkts);
2073 		R("RxCSO:", stats.rx_cso);
2074 		R("VLANxtract:", stats.vlan_ex);
2075 		R("LROmerged:", stats.lro_merged);
2076 		R("LROpackets:", stats.lro_pkts);
2077 		R("RxDrops:", stats.rx_drops);
2078 		T("TSO:", tso);
2079 		T("TxCSO:", tx_cso);
2080 		T("VLANins:", vlan_ins);
2081 		T("TxQFull:", q.stops);
2082 		T("TxQRestarts:", q.restarts);
2083 		T("TxMapErr:", mapping_err);
2084 		R("FLAllocErr:", fl.alloc_failed);
2085 		R("FLLrgAlcErr:", fl.large_alloc_failed);
2086 		R("FLStarving:", fl.starving);
2087 		return 0;
2088 	}
2089 
2090 	r -= eth_entries;
2091 	if (r == 0) {
2092 		const struct sge_rspq *evtq = &adapter->sge.fw_evtq;
2093 
2094 		seq_printf(seq, "%-8s %16s\n", "QType:", "FW event queue");
2095 		seq_printf(seq, "%-16s %8u\n", "RspQNullInts:",
2096 			   evtq->unhandled_irqs);
2097 		seq_printf(seq, "%-16s %8u\n", "RspQ CIdx:", evtq->cidx);
2098 		seq_printf(seq, "%-16s %8u\n", "RspQ Gen:", evtq->gen);
2099 	} else if (r == 1) {
2100 		const struct sge_rspq *intrq = &adapter->sge.intrq;
2101 
2102 		seq_printf(seq, "%-8s %16s\n", "QType:", "Interrupt Queue");
2103 		seq_printf(seq, "%-16s %8u\n", "RspQNullInts:",
2104 			   intrq->unhandled_irqs);
2105 		seq_printf(seq, "%-16s %8u\n", "RspQ CIdx:", intrq->cidx);
2106 		seq_printf(seq, "%-16s %8u\n", "RspQ Gen:", intrq->gen);
2107 	}
2108 
2109 	#undef R
2110 	#undef T
2111 	#undef S
2112 	#undef R3
2113 	#undef T3
2114 	#undef S3
2115 
2116 	return 0;
2117 }
2118 
2119 /*
2120  * Return the number of "entries" in our "file".  We group the multi-Queue
2121  * sections with QPL Queue Sets per "entry".  The sections of the output are:
2122  *
2123  *     Ethernet RX/TX Queue Sets
2124  *     Firmware Event Queue
2125  *     Forwarded Interrupt Queue (if in MSI mode)
2126  */
2127 static int sge_qstats_entries(const struct adapter *adapter)
2128 {
2129 	return DIV_ROUND_UP(adapter->sge.ethqsets, QPL) + 1 +
2130 		((adapter->flags & USING_MSI) != 0);
2131 }
2132 
2133 static void *sge_qstats_start(struct seq_file *seq, loff_t *pos)
2134 {
2135 	int entries = sge_qstats_entries(seq->private);
2136 
2137 	return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
2138 }
2139 
2140 static void sge_qstats_stop(struct seq_file *seq, void *v)
2141 {
2142 }
2143 
2144 static void *sge_qstats_next(struct seq_file *seq, void *v, loff_t *pos)
2145 {
2146 	int entries = sge_qstats_entries(seq->private);
2147 
2148 	(*pos)++;
2149 	return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
2150 }
2151 
2152 static const struct seq_operations sge_qstats_seq_ops = {
2153 	.start = sge_qstats_start,
2154 	.next  = sge_qstats_next,
2155 	.stop  = sge_qstats_stop,
2156 	.show  = sge_qstats_show
2157 };
2158 
2159 static int sge_qstats_open(struct inode *inode, struct file *file)
2160 {
2161 	int res = seq_open(file, &sge_qstats_seq_ops);
2162 
2163 	if (res == 0) {
2164 		struct seq_file *seq = file->private_data;
2165 		seq->private = inode->i_private;
2166 	}
2167 	return res;
2168 }
2169 
2170 static const struct file_operations sge_qstats_proc_fops = {
2171 	.owner   = THIS_MODULE,
2172 	.open    = sge_qstats_open,
2173 	.read    = seq_read,
2174 	.llseek  = seq_lseek,
2175 	.release = seq_release,
2176 };
2177 
2178 /*
2179  * Show PCI-E SR-IOV Virtual Function Resource Limits.
2180  */
2181 static int resources_show(struct seq_file *seq, void *v)
2182 {
2183 	struct adapter *adapter = seq->private;
2184 	struct vf_resources *vfres = &adapter->params.vfres;
2185 
2186 	#define S(desc, fmt, var) \
2187 		seq_printf(seq, "%-60s " fmt "\n", \
2188 			   desc " (" #var "):", vfres->var)
2189 
2190 	S("Virtual Interfaces", "%d", nvi);
2191 	S("Egress Queues", "%d", neq);
2192 	S("Ethernet Control", "%d", nethctrl);
2193 	S("Ingress Queues/w Free Lists/Interrupts", "%d", niqflint);
2194 	S("Ingress Queues", "%d", niq);
2195 	S("Traffic Class", "%d", tc);
2196 	S("Port Access Rights Mask", "%#x", pmask);
2197 	S("MAC Address Filters", "%d", nexactf);
2198 	S("Firmware Command Read Capabilities", "%#x", r_caps);
2199 	S("Firmware Command Write/Execute Capabilities", "%#x", wx_caps);
2200 
2201 	#undef S
2202 
2203 	return 0;
2204 }
2205 
2206 static int resources_open(struct inode *inode, struct file *file)
2207 {
2208 	return single_open(file, resources_show, inode->i_private);
2209 }
2210 
2211 static const struct file_operations resources_proc_fops = {
2212 	.owner   = THIS_MODULE,
2213 	.open    = resources_open,
2214 	.read    = seq_read,
2215 	.llseek  = seq_lseek,
2216 	.release = single_release,
2217 };
2218 
2219 /*
2220  * Show Virtual Interfaces.
2221  */
2222 static int interfaces_show(struct seq_file *seq, void *v)
2223 {
2224 	if (v == SEQ_START_TOKEN) {
2225 		seq_puts(seq, "Interface  Port   VIID\n");
2226 	} else {
2227 		struct adapter *adapter = seq->private;
2228 		int pidx = (uintptr_t)v - 2;
2229 		struct net_device *dev = adapter->port[pidx];
2230 		struct port_info *pi = netdev_priv(dev);
2231 
2232 		seq_printf(seq, "%9s  %4d  %#5x\n",
2233 			   dev->name, pi->port_id, pi->viid);
2234 	}
2235 	return 0;
2236 }
2237 
2238 static inline void *interfaces_get_idx(struct adapter *adapter, loff_t pos)
2239 {
2240 	return pos <= adapter->params.nports
2241 		? (void *)(uintptr_t)(pos + 1)
2242 		: NULL;
2243 }
2244 
2245 static void *interfaces_start(struct seq_file *seq, loff_t *pos)
2246 {
2247 	return *pos
2248 		? interfaces_get_idx(seq->private, *pos)
2249 		: SEQ_START_TOKEN;
2250 }
2251 
2252 static void *interfaces_next(struct seq_file *seq, void *v, loff_t *pos)
2253 {
2254 	(*pos)++;
2255 	return interfaces_get_idx(seq->private, *pos);
2256 }
2257 
2258 static void interfaces_stop(struct seq_file *seq, void *v)
2259 {
2260 }
2261 
2262 static const struct seq_operations interfaces_seq_ops = {
2263 	.start = interfaces_start,
2264 	.next  = interfaces_next,
2265 	.stop  = interfaces_stop,
2266 	.show  = interfaces_show
2267 };
2268 
2269 static int interfaces_open(struct inode *inode, struct file *file)
2270 {
2271 	int res = seq_open(file, &interfaces_seq_ops);
2272 
2273 	if (res == 0) {
2274 		struct seq_file *seq = file->private_data;
2275 		seq->private = inode->i_private;
2276 	}
2277 	return res;
2278 }
2279 
2280 static const struct file_operations interfaces_proc_fops = {
2281 	.owner   = THIS_MODULE,
2282 	.open    = interfaces_open,
2283 	.read    = seq_read,
2284 	.llseek  = seq_lseek,
2285 	.release = seq_release,
2286 };
2287 
2288 /*
2289  * /sys/kernel/debugfs/cxgb4vf/ files list.
2290  */
2291 struct cxgb4vf_debugfs_entry {
2292 	const char *name;		/* name of debugfs node */
2293 	umode_t mode;			/* file system mode */
2294 	const struct file_operations *fops;
2295 };
2296 
2297 static struct cxgb4vf_debugfs_entry debugfs_files[] = {
2298 	{ "mboxlog",    S_IRUGO, &mboxlog_fops },
2299 	{ "sge_qinfo",  S_IRUGO, &sge_qinfo_debugfs_fops },
2300 	{ "sge_qstats", S_IRUGO, &sge_qstats_proc_fops },
2301 	{ "resources",  S_IRUGO, &resources_proc_fops },
2302 	{ "interfaces", S_IRUGO, &interfaces_proc_fops },
2303 };
2304 
2305 /*
2306  * Module and device initialization and cleanup code.
2307  * ==================================================
2308  */
2309 
2310 /*
2311  * Set up out /sys/kernel/debug/cxgb4vf sub-nodes.  We assume that the
2312  * directory (debugfs_root) has already been set up.
2313  */
2314 static int setup_debugfs(struct adapter *adapter)
2315 {
2316 	int i;
2317 
2318 	BUG_ON(IS_ERR_OR_NULL(adapter->debugfs_root));
2319 
2320 	/*
2321 	 * Debugfs support is best effort.
2322 	 */
2323 	for (i = 0; i < ARRAY_SIZE(debugfs_files); i++)
2324 		(void)debugfs_create_file(debugfs_files[i].name,
2325 				  debugfs_files[i].mode,
2326 				  adapter->debugfs_root,
2327 				  (void *)adapter,
2328 				  debugfs_files[i].fops);
2329 
2330 	return 0;
2331 }
2332 
2333 /*
2334  * Tear down the /sys/kernel/debug/cxgb4vf sub-nodes created above.  We leave
2335  * it to our caller to tear down the directory (debugfs_root).
2336  */
2337 static void cleanup_debugfs(struct adapter *adapter)
2338 {
2339 	BUG_ON(IS_ERR_OR_NULL(adapter->debugfs_root));
2340 
2341 	/*
2342 	 * Unlike our sister routine cleanup_proc(), we don't need to remove
2343 	 * individual entries because a call will be made to
2344 	 * debugfs_remove_recursive().  We just need to clean up any ancillary
2345 	 * persistent state.
2346 	 */
2347 	/* nothing to do */
2348 }
2349 
2350 /* Figure out how many Ports and Queue Sets we can support.  This depends on
2351  * knowing our Virtual Function Resources and may be called a second time if
2352  * we fall back from MSI-X to MSI Interrupt Mode.
2353  */
2354 static void size_nports_qsets(struct adapter *adapter)
2355 {
2356 	struct vf_resources *vfres = &adapter->params.vfres;
2357 	unsigned int ethqsets, pmask_nports;
2358 
2359 	/* The number of "ports" which we support is equal to the number of
2360 	 * Virtual Interfaces with which we've been provisioned.
2361 	 */
2362 	adapter->params.nports = vfres->nvi;
2363 	if (adapter->params.nports > MAX_NPORTS) {
2364 		dev_warn(adapter->pdev_dev, "only using %d of %d maximum"
2365 			 " allowed virtual interfaces\n", MAX_NPORTS,
2366 			 adapter->params.nports);
2367 		adapter->params.nports = MAX_NPORTS;
2368 	}
2369 
2370 	/* We may have been provisioned with more VIs than the number of
2371 	 * ports we're allowed to access (our Port Access Rights Mask).
2372 	 * This is obviously a configuration conflict but we don't want to
2373 	 * crash the kernel or anything silly just because of that.
2374 	 */
2375 	pmask_nports = hweight32(adapter->params.vfres.pmask);
2376 	if (pmask_nports < adapter->params.nports) {
2377 		dev_warn(adapter->pdev_dev, "only using %d of %d provisioned"
2378 			 " virtual interfaces; limited by Port Access Rights"
2379 			 " mask %#x\n", pmask_nports, adapter->params.nports,
2380 			 adapter->params.vfres.pmask);
2381 		adapter->params.nports = pmask_nports;
2382 	}
2383 
2384 	/* We need to reserve an Ingress Queue for the Asynchronous Firmware
2385 	 * Event Queue.  And if we're using MSI Interrupts, we'll also need to
2386 	 * reserve an Ingress Queue for a Forwarded Interrupts.
2387 	 *
2388 	 * The rest of the FL/Intr-capable ingress queues will be matched up
2389 	 * one-for-one with Ethernet/Control egress queues in order to form
2390 	 * "Queue Sets" which will be aportioned between the "ports".  For
2391 	 * each Queue Set, we'll need the ability to allocate two Egress
2392 	 * Contexts -- one for the Ingress Queue Free List and one for the TX
2393 	 * Ethernet Queue.
2394 	 *
2395 	 * Note that even if we're currently configured to use MSI-X
2396 	 * Interrupts (module variable msi == MSI_MSIX) we may get downgraded
2397 	 * to MSI Interrupts if we can't get enough MSI-X Interrupts.  If that
2398 	 * happens we'll need to adjust things later.
2399 	 */
2400 	ethqsets = vfres->niqflint - 1 - (msi == MSI_MSI);
2401 	if (vfres->nethctrl != ethqsets)
2402 		ethqsets = min(vfres->nethctrl, ethqsets);
2403 	if (vfres->neq < ethqsets*2)
2404 		ethqsets = vfres->neq/2;
2405 	if (ethqsets > MAX_ETH_QSETS)
2406 		ethqsets = MAX_ETH_QSETS;
2407 	adapter->sge.max_ethqsets = ethqsets;
2408 
2409 	if (adapter->sge.max_ethqsets < adapter->params.nports) {
2410 		dev_warn(adapter->pdev_dev, "only using %d of %d available"
2411 			 " virtual interfaces (too few Queue Sets)\n",
2412 			 adapter->sge.max_ethqsets, adapter->params.nports);
2413 		adapter->params.nports = adapter->sge.max_ethqsets;
2414 	}
2415 }
2416 
2417 /*
2418  * Perform early "adapter" initialization.  This is where we discover what
2419  * adapter parameters we're going to be using and initialize basic adapter
2420  * hardware support.
2421  */
2422 static int adap_init0(struct adapter *adapter)
2423 {
2424 	struct sge_params *sge_params = &adapter->params.sge;
2425 	struct sge *s = &adapter->sge;
2426 	int err;
2427 	u32 param, val = 0;
2428 
2429 	/*
2430 	 * Some environments do not properly handle PCIE FLRs -- e.g. in Linux
2431 	 * 2.6.31 and later we can't call pci_reset_function() in order to
2432 	 * issue an FLR because of a self- deadlock on the device semaphore.
2433 	 * Meanwhile, the OS infrastructure doesn't issue FLRs in all the
2434 	 * cases where they're needed -- for instance, some versions of KVM
2435 	 * fail to reset "Assigned Devices" when the VM reboots.  Therefore we
2436 	 * use the firmware based reset in order to reset any per function
2437 	 * state.
2438 	 */
2439 	err = t4vf_fw_reset(adapter);
2440 	if (err < 0) {
2441 		dev_err(adapter->pdev_dev, "FW reset failed: err=%d\n", err);
2442 		return err;
2443 	}
2444 
2445 	/*
2446 	 * Grab basic operational parameters.  These will predominantly have
2447 	 * been set up by the Physical Function Driver or will be hard coded
2448 	 * into the adapter.  We just have to live with them ...  Note that
2449 	 * we _must_ get our VPD parameters before our SGE parameters because
2450 	 * we need to know the adapter's core clock from the VPD in order to
2451 	 * properly decode the SGE Timer Values.
2452 	 */
2453 	err = t4vf_get_dev_params(adapter);
2454 	if (err) {
2455 		dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2456 			" device parameters: err=%d\n", err);
2457 		return err;
2458 	}
2459 	err = t4vf_get_vpd_params(adapter);
2460 	if (err) {
2461 		dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2462 			" VPD parameters: err=%d\n", err);
2463 		return err;
2464 	}
2465 	err = t4vf_get_sge_params(adapter);
2466 	if (err) {
2467 		dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2468 			" SGE parameters: err=%d\n", err);
2469 		return err;
2470 	}
2471 	err = t4vf_get_rss_glb_config(adapter);
2472 	if (err) {
2473 		dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2474 			" RSS parameters: err=%d\n", err);
2475 		return err;
2476 	}
2477 	if (adapter->params.rss.mode !=
2478 	    FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL) {
2479 		dev_err(adapter->pdev_dev, "unable to operate with global RSS"
2480 			" mode %d\n", adapter->params.rss.mode);
2481 		return -EINVAL;
2482 	}
2483 	err = t4vf_sge_init(adapter);
2484 	if (err) {
2485 		dev_err(adapter->pdev_dev, "unable to use adapter parameters:"
2486 			" err=%d\n", err);
2487 		return err;
2488 	}
2489 
2490 	/* If we're running on newer firmware, let it know that we're
2491 	 * prepared to deal with encapsulated CPL messages.  Older
2492 	 * firmware won't understand this and we'll just get
2493 	 * unencapsulated messages ...
2494 	 */
2495 	param = FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_PFVF) |
2496 		FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_PFVF_CPLFW4MSG_ENCAP);
2497 	val = 1;
2498 	(void) t4vf_set_params(adapter, 1, &param, &val);
2499 
2500 	/*
2501 	 * Retrieve our RX interrupt holdoff timer values and counter
2502 	 * threshold values from the SGE parameters.
2503 	 */
2504 	s->timer_val[0] = core_ticks_to_us(adapter,
2505 		TIMERVALUE0_G(sge_params->sge_timer_value_0_and_1));
2506 	s->timer_val[1] = core_ticks_to_us(adapter,
2507 		TIMERVALUE1_G(sge_params->sge_timer_value_0_and_1));
2508 	s->timer_val[2] = core_ticks_to_us(adapter,
2509 		TIMERVALUE0_G(sge_params->sge_timer_value_2_and_3));
2510 	s->timer_val[3] = core_ticks_to_us(adapter,
2511 		TIMERVALUE1_G(sge_params->sge_timer_value_2_and_3));
2512 	s->timer_val[4] = core_ticks_to_us(adapter,
2513 		TIMERVALUE0_G(sge_params->sge_timer_value_4_and_5));
2514 	s->timer_val[5] = core_ticks_to_us(adapter,
2515 		TIMERVALUE1_G(sge_params->sge_timer_value_4_and_5));
2516 
2517 	s->counter_val[0] = THRESHOLD_0_G(sge_params->sge_ingress_rx_threshold);
2518 	s->counter_val[1] = THRESHOLD_1_G(sge_params->sge_ingress_rx_threshold);
2519 	s->counter_val[2] = THRESHOLD_2_G(sge_params->sge_ingress_rx_threshold);
2520 	s->counter_val[3] = THRESHOLD_3_G(sge_params->sge_ingress_rx_threshold);
2521 
2522 	/*
2523 	 * Grab our Virtual Interface resource allocation, extract the
2524 	 * features that we're interested in and do a bit of sanity testing on
2525 	 * what we discover.
2526 	 */
2527 	err = t4vf_get_vfres(adapter);
2528 	if (err) {
2529 		dev_err(adapter->pdev_dev, "unable to get virtual interface"
2530 			" resources: err=%d\n", err);
2531 		return err;
2532 	}
2533 
2534 	/* Check for various parameter sanity issues */
2535 	if (adapter->params.vfres.pmask == 0) {
2536 		dev_err(adapter->pdev_dev, "no port access configured\n"
2537 			"usable!\n");
2538 		return -EINVAL;
2539 	}
2540 	if (adapter->params.vfres.nvi == 0) {
2541 		dev_err(adapter->pdev_dev, "no virtual interfaces configured/"
2542 			"usable!\n");
2543 		return -EINVAL;
2544 	}
2545 
2546 	/* Initialize nports and max_ethqsets now that we have our Virtual
2547 	 * Function Resources.
2548 	 */
2549 	size_nports_qsets(adapter);
2550 
2551 	return 0;
2552 }
2553 
2554 static inline void init_rspq(struct sge_rspq *rspq, u8 timer_idx,
2555 			     u8 pkt_cnt_idx, unsigned int size,
2556 			     unsigned int iqe_size)
2557 {
2558 	rspq->intr_params = (QINTR_TIMER_IDX_V(timer_idx) |
2559 			     (pkt_cnt_idx < SGE_NCOUNTERS ?
2560 			      QINTR_CNT_EN_F : 0));
2561 	rspq->pktcnt_idx = (pkt_cnt_idx < SGE_NCOUNTERS
2562 			    ? pkt_cnt_idx
2563 			    : 0);
2564 	rspq->iqe_len = iqe_size;
2565 	rspq->size = size;
2566 }
2567 
2568 /*
2569  * Perform default configuration of DMA queues depending on the number and
2570  * type of ports we found and the number of available CPUs.  Most settings can
2571  * be modified by the admin via ethtool and cxgbtool prior to the adapter
2572  * being brought up for the first time.
2573  */
2574 static void cfg_queues(struct adapter *adapter)
2575 {
2576 	struct sge *s = &adapter->sge;
2577 	int q10g, n10g, qidx, pidx, qs;
2578 	size_t iqe_size;
2579 
2580 	/*
2581 	 * We should not be called till we know how many Queue Sets we can
2582 	 * support.  In particular, this means that we need to know what kind
2583 	 * of interrupts we'll be using ...
2584 	 */
2585 	BUG_ON((adapter->flags & (USING_MSIX|USING_MSI)) == 0);
2586 
2587 	/*
2588 	 * Count the number of 10GbE Virtual Interfaces that we have.
2589 	 */
2590 	n10g = 0;
2591 	for_each_port(adapter, pidx)
2592 		n10g += is_x_10g_port(&adap2pinfo(adapter, pidx)->link_cfg);
2593 
2594 	/*
2595 	 * We default to 1 queue per non-10G port and up to # of cores queues
2596 	 * per 10G port.
2597 	 */
2598 	if (n10g == 0)
2599 		q10g = 0;
2600 	else {
2601 		int n1g = (adapter->params.nports - n10g);
2602 		q10g = (adapter->sge.max_ethqsets - n1g) / n10g;
2603 		if (q10g > num_online_cpus())
2604 			q10g = num_online_cpus();
2605 	}
2606 
2607 	/*
2608 	 * Allocate the "Queue Sets" to the various Virtual Interfaces.
2609 	 * The layout will be established in setup_sge_queues() when the
2610 	 * adapter is brough up for the first time.
2611 	 */
2612 	qidx = 0;
2613 	for_each_port(adapter, pidx) {
2614 		struct port_info *pi = adap2pinfo(adapter, pidx);
2615 
2616 		pi->first_qset = qidx;
2617 		pi->nqsets = is_x_10g_port(&pi->link_cfg) ? q10g : 1;
2618 		qidx += pi->nqsets;
2619 	}
2620 	s->ethqsets = qidx;
2621 
2622 	/*
2623 	 * The Ingress Queue Entry Size for our various Response Queues needs
2624 	 * to be big enough to accommodate the largest message we can receive
2625 	 * from the chip/firmware; which is 64 bytes ...
2626 	 */
2627 	iqe_size = 64;
2628 
2629 	/*
2630 	 * Set up default Queue Set parameters ...  Start off with the
2631 	 * shortest interrupt holdoff timer.
2632 	 */
2633 	for (qs = 0; qs < s->max_ethqsets; qs++) {
2634 		struct sge_eth_rxq *rxq = &s->ethrxq[qs];
2635 		struct sge_eth_txq *txq = &s->ethtxq[qs];
2636 
2637 		init_rspq(&rxq->rspq, 0, 0, 1024, iqe_size);
2638 		rxq->fl.size = 72;
2639 		txq->q.size = 1024;
2640 	}
2641 
2642 	/*
2643 	 * The firmware event queue is used for link state changes and
2644 	 * notifications of TX DMA completions.
2645 	 */
2646 	init_rspq(&s->fw_evtq, SGE_TIMER_RSTRT_CNTR, 0, 512, iqe_size);
2647 
2648 	/*
2649 	 * The forwarded interrupt queue is used when we're in MSI interrupt
2650 	 * mode.  In this mode all interrupts associated with RX queues will
2651 	 * be forwarded to a single queue which we'll associate with our MSI
2652 	 * interrupt vector.  The messages dropped in the forwarded interrupt
2653 	 * queue will indicate which ingress queue needs servicing ...  This
2654 	 * queue needs to be large enough to accommodate all of the ingress
2655 	 * queues which are forwarding their interrupt (+1 to prevent the PIDX
2656 	 * from equalling the CIDX if every ingress queue has an outstanding
2657 	 * interrupt).  The queue doesn't need to be any larger because no
2658 	 * ingress queue will ever have more than one outstanding interrupt at
2659 	 * any time ...
2660 	 */
2661 	init_rspq(&s->intrq, SGE_TIMER_RSTRT_CNTR, 0, MSIX_ENTRIES + 1,
2662 		  iqe_size);
2663 }
2664 
2665 /*
2666  * Reduce the number of Ethernet queues across all ports to at most n.
2667  * n provides at least one queue per port.
2668  */
2669 static void reduce_ethqs(struct adapter *adapter, int n)
2670 {
2671 	int i;
2672 	struct port_info *pi;
2673 
2674 	/*
2675 	 * While we have too many active Ether Queue Sets, interate across the
2676 	 * "ports" and reduce their individual Queue Set allocations.
2677 	 */
2678 	BUG_ON(n < adapter->params.nports);
2679 	while (n < adapter->sge.ethqsets)
2680 		for_each_port(adapter, i) {
2681 			pi = adap2pinfo(adapter, i);
2682 			if (pi->nqsets > 1) {
2683 				pi->nqsets--;
2684 				adapter->sge.ethqsets--;
2685 				if (adapter->sge.ethqsets <= n)
2686 					break;
2687 			}
2688 		}
2689 
2690 	/*
2691 	 * Reassign the starting Queue Sets for each of the "ports" ...
2692 	 */
2693 	n = 0;
2694 	for_each_port(adapter, i) {
2695 		pi = adap2pinfo(adapter, i);
2696 		pi->first_qset = n;
2697 		n += pi->nqsets;
2698 	}
2699 }
2700 
2701 /*
2702  * We need to grab enough MSI-X vectors to cover our interrupt needs.  Ideally
2703  * we get a separate MSI-X vector for every "Queue Set" plus any extras we
2704  * need.  Minimally we need one for every Virtual Interface plus those needed
2705  * for our "extras".  Note that this process may lower the maximum number of
2706  * allowed Queue Sets ...
2707  */
2708 static int enable_msix(struct adapter *adapter)
2709 {
2710 	int i, want, need, nqsets;
2711 	struct msix_entry entries[MSIX_ENTRIES];
2712 	struct sge *s = &adapter->sge;
2713 
2714 	for (i = 0; i < MSIX_ENTRIES; ++i)
2715 		entries[i].entry = i;
2716 
2717 	/*
2718 	 * We _want_ enough MSI-X interrupts to cover all of our "Queue Sets"
2719 	 * plus those needed for our "extras" (for example, the firmware
2720 	 * message queue).  We _need_ at least one "Queue Set" per Virtual
2721 	 * Interface plus those needed for our "extras".  So now we get to see
2722 	 * if the song is right ...
2723 	 */
2724 	want = s->max_ethqsets + MSIX_EXTRAS;
2725 	need = adapter->params.nports + MSIX_EXTRAS;
2726 
2727 	want = pci_enable_msix_range(adapter->pdev, entries, need, want);
2728 	if (want < 0)
2729 		return want;
2730 
2731 	nqsets = want - MSIX_EXTRAS;
2732 	if (nqsets < s->max_ethqsets) {
2733 		dev_warn(adapter->pdev_dev, "only enough MSI-X vectors"
2734 			 " for %d Queue Sets\n", nqsets);
2735 		s->max_ethqsets = nqsets;
2736 		if (nqsets < s->ethqsets)
2737 			reduce_ethqs(adapter, nqsets);
2738 	}
2739 	for (i = 0; i < want; ++i)
2740 		adapter->msix_info[i].vec = entries[i].vector;
2741 
2742 	return 0;
2743 }
2744 
2745 static const struct net_device_ops cxgb4vf_netdev_ops	= {
2746 	.ndo_open		= cxgb4vf_open,
2747 	.ndo_stop		= cxgb4vf_stop,
2748 	.ndo_start_xmit		= t4vf_eth_xmit,
2749 	.ndo_get_stats		= cxgb4vf_get_stats,
2750 	.ndo_set_rx_mode	= cxgb4vf_set_rxmode,
2751 	.ndo_set_mac_address	= cxgb4vf_set_mac_addr,
2752 	.ndo_validate_addr	= eth_validate_addr,
2753 	.ndo_do_ioctl		= cxgb4vf_do_ioctl,
2754 	.ndo_change_mtu		= cxgb4vf_change_mtu,
2755 	.ndo_fix_features	= cxgb4vf_fix_features,
2756 	.ndo_set_features	= cxgb4vf_set_features,
2757 #ifdef CONFIG_NET_POLL_CONTROLLER
2758 	.ndo_poll_controller	= cxgb4vf_poll_controller,
2759 #endif
2760 };
2761 
2762 /*
2763  * "Probe" a device: initialize a device and construct all kernel and driver
2764  * state needed to manage the device.  This routine is called "init_one" in
2765  * the PF Driver ...
2766  */
2767 static int cxgb4vf_pci_probe(struct pci_dev *pdev,
2768 			     const struct pci_device_id *ent)
2769 {
2770 	int pci_using_dac;
2771 	int err, pidx;
2772 	unsigned int pmask;
2773 	struct adapter *adapter;
2774 	struct port_info *pi;
2775 	struct net_device *netdev;
2776 	unsigned int pf;
2777 
2778 	/*
2779 	 * Print our driver banner the first time we're called to initialize a
2780 	 * device.
2781 	 */
2782 	pr_info_once("%s - version %s\n", DRV_DESC, DRV_VERSION);
2783 
2784 	/*
2785 	 * Initialize generic PCI device state.
2786 	 */
2787 	err = pci_enable_device(pdev);
2788 	if (err) {
2789 		dev_err(&pdev->dev, "cannot enable PCI device\n");
2790 		return err;
2791 	}
2792 
2793 	/*
2794 	 * Reserve PCI resources for the device.  If we can't get them some
2795 	 * other driver may have already claimed the device ...
2796 	 */
2797 	err = pci_request_regions(pdev, KBUILD_MODNAME);
2798 	if (err) {
2799 		dev_err(&pdev->dev, "cannot obtain PCI resources\n");
2800 		goto err_disable_device;
2801 	}
2802 
2803 	/*
2804 	 * Set up our DMA mask: try for 64-bit address masking first and
2805 	 * fall back to 32-bit if we can't get 64 bits ...
2806 	 */
2807 	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
2808 	if (err == 0) {
2809 		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
2810 		if (err) {
2811 			dev_err(&pdev->dev, "unable to obtain 64-bit DMA for"
2812 				" coherent allocations\n");
2813 			goto err_release_regions;
2814 		}
2815 		pci_using_dac = 1;
2816 	} else {
2817 		err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
2818 		if (err != 0) {
2819 			dev_err(&pdev->dev, "no usable DMA configuration\n");
2820 			goto err_release_regions;
2821 		}
2822 		pci_using_dac = 0;
2823 	}
2824 
2825 	/*
2826 	 * Enable bus mastering for the device ...
2827 	 */
2828 	pci_set_master(pdev);
2829 
2830 	/*
2831 	 * Allocate our adapter data structure and attach it to the device.
2832 	 */
2833 	adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
2834 	if (!adapter) {
2835 		err = -ENOMEM;
2836 		goto err_release_regions;
2837 	}
2838 	pci_set_drvdata(pdev, adapter);
2839 	adapter->pdev = pdev;
2840 	adapter->pdev_dev = &pdev->dev;
2841 
2842 	adapter->mbox_log = kzalloc(sizeof(*adapter->mbox_log) +
2843 				    (sizeof(struct mbox_cmd) *
2844 				     T4VF_OS_LOG_MBOX_CMDS),
2845 				    GFP_KERNEL);
2846 	if (!adapter->mbox_log) {
2847 		err = -ENOMEM;
2848 		goto err_free_adapter;
2849 	}
2850 	adapter->mbox_log->size = T4VF_OS_LOG_MBOX_CMDS;
2851 
2852 	/*
2853 	 * Initialize SMP data synchronization resources.
2854 	 */
2855 	spin_lock_init(&adapter->stats_lock);
2856 	spin_lock_init(&adapter->mbox_lock);
2857 	INIT_LIST_HEAD(&adapter->mlist.list);
2858 
2859 	/*
2860 	 * Map our I/O registers in BAR0.
2861 	 */
2862 	adapter->regs = pci_ioremap_bar(pdev, 0);
2863 	if (!adapter->regs) {
2864 		dev_err(&pdev->dev, "cannot map device registers\n");
2865 		err = -ENOMEM;
2866 		goto err_free_adapter;
2867 	}
2868 
2869 	/* Wait for the device to become ready before proceeding ...
2870 	 */
2871 	err = t4vf_prep_adapter(adapter);
2872 	if (err) {
2873 		dev_err(adapter->pdev_dev, "device didn't become ready:"
2874 			" err=%d\n", err);
2875 		goto err_unmap_bar0;
2876 	}
2877 
2878 	/* For T5 and later we want to use the new BAR-based User Doorbells,
2879 	 * so we need to map BAR2 here ...
2880 	 */
2881 	if (!is_t4(adapter->params.chip)) {
2882 		adapter->bar2 = ioremap_wc(pci_resource_start(pdev, 2),
2883 					   pci_resource_len(pdev, 2));
2884 		if (!adapter->bar2) {
2885 			dev_err(adapter->pdev_dev, "cannot map BAR2 doorbells\n");
2886 			err = -ENOMEM;
2887 			goto err_unmap_bar0;
2888 		}
2889 	}
2890 	/*
2891 	 * Initialize adapter level features.
2892 	 */
2893 	adapter->name = pci_name(pdev);
2894 	adapter->msg_enable = dflt_msg_enable;
2895 	err = adap_init0(adapter);
2896 	if (err)
2897 		goto err_unmap_bar;
2898 
2899 	/*
2900 	 * Allocate our "adapter ports" and stitch everything together.
2901 	 */
2902 	pmask = adapter->params.vfres.pmask;
2903 	pf = t4vf_get_pf_from_vf(adapter);
2904 	for_each_port(adapter, pidx) {
2905 		int port_id, viid;
2906 		u8 mac[ETH_ALEN];
2907 		unsigned int naddr = 1;
2908 
2909 		/*
2910 		 * We simplistically allocate our virtual interfaces
2911 		 * sequentially across the port numbers to which we have
2912 		 * access rights.  This should be configurable in some manner
2913 		 * ...
2914 		 */
2915 		if (pmask == 0)
2916 			break;
2917 		port_id = ffs(pmask) - 1;
2918 		pmask &= ~(1 << port_id);
2919 		viid = t4vf_alloc_vi(adapter, port_id);
2920 		if (viid < 0) {
2921 			dev_err(&pdev->dev, "cannot allocate VI for port %d:"
2922 				" err=%d\n", port_id, viid);
2923 			err = viid;
2924 			goto err_free_dev;
2925 		}
2926 
2927 		/*
2928 		 * Allocate our network device and stitch things together.
2929 		 */
2930 		netdev = alloc_etherdev_mq(sizeof(struct port_info),
2931 					   MAX_PORT_QSETS);
2932 		if (netdev == NULL) {
2933 			t4vf_free_vi(adapter, viid);
2934 			err = -ENOMEM;
2935 			goto err_free_dev;
2936 		}
2937 		adapter->port[pidx] = netdev;
2938 		SET_NETDEV_DEV(netdev, &pdev->dev);
2939 		pi = netdev_priv(netdev);
2940 		pi->adapter = adapter;
2941 		pi->pidx = pidx;
2942 		pi->port_id = port_id;
2943 		pi->viid = viid;
2944 
2945 		/*
2946 		 * Initialize the starting state of our "port" and register
2947 		 * it.
2948 		 */
2949 		pi->xact_addr_filt = -1;
2950 		netif_carrier_off(netdev);
2951 		netdev->irq = pdev->irq;
2952 
2953 		netdev->hw_features = NETIF_F_SG | TSO_FLAGS |
2954 			NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
2955 			NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_RXCSUM;
2956 		netdev->vlan_features = NETIF_F_SG | TSO_FLAGS |
2957 			NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
2958 			NETIF_F_HIGHDMA;
2959 		netdev->features = netdev->hw_features |
2960 				   NETIF_F_HW_VLAN_CTAG_TX;
2961 		if (pci_using_dac)
2962 			netdev->features |= NETIF_F_HIGHDMA;
2963 
2964 		netdev->priv_flags |= IFF_UNICAST_FLT;
2965 		netdev->min_mtu = 81;
2966 		netdev->max_mtu = ETH_MAX_MTU;
2967 
2968 		netdev->netdev_ops = &cxgb4vf_netdev_ops;
2969 		netdev->ethtool_ops = &cxgb4vf_ethtool_ops;
2970 
2971 		/*
2972 		 * Initialize the hardware/software state for the port.
2973 		 */
2974 		err = t4vf_port_init(adapter, pidx);
2975 		if (err) {
2976 			dev_err(&pdev->dev, "cannot initialize port %d\n",
2977 				pidx);
2978 			goto err_free_dev;
2979 		}
2980 
2981 		err = t4vf_get_vf_mac_acl(adapter, pf, &naddr, mac);
2982 		if (err) {
2983 			dev_err(&pdev->dev,
2984 				"unable to determine MAC ACL address, "
2985 				"continuing anyway.. (status %d)\n", err);
2986 		} else if (naddr && adapter->params.vfres.nvi == 1) {
2987 			struct sockaddr addr;
2988 
2989 			ether_addr_copy(addr.sa_data, mac);
2990 			err = cxgb4vf_set_mac_addr(netdev, &addr);
2991 			if (err) {
2992 				dev_err(&pdev->dev,
2993 					"unable to set MAC address %pM\n",
2994 					mac);
2995 				goto err_free_dev;
2996 			}
2997 			dev_info(&pdev->dev,
2998 				 "Using assigned MAC ACL: %pM\n", mac);
2999 		}
3000 	}
3001 
3002 	/* See what interrupts we'll be using.  If we've been configured to
3003 	 * use MSI-X interrupts, try to enable them but fall back to using
3004 	 * MSI interrupts if we can't enable MSI-X interrupts.  If we can't
3005 	 * get MSI interrupts we bail with the error.
3006 	 */
3007 	if (msi == MSI_MSIX && enable_msix(adapter) == 0)
3008 		adapter->flags |= USING_MSIX;
3009 	else {
3010 		if (msi == MSI_MSIX) {
3011 			dev_info(adapter->pdev_dev,
3012 				 "Unable to use MSI-X Interrupts; falling "
3013 				 "back to MSI Interrupts\n");
3014 
3015 			/* We're going to need a Forwarded Interrupt Queue so
3016 			 * that may cut into how many Queue Sets we can
3017 			 * support.
3018 			 */
3019 			msi = MSI_MSI;
3020 			size_nports_qsets(adapter);
3021 		}
3022 		err = pci_enable_msi(pdev);
3023 		if (err) {
3024 			dev_err(&pdev->dev, "Unable to allocate MSI Interrupts;"
3025 				" err=%d\n", err);
3026 			goto err_free_dev;
3027 		}
3028 		adapter->flags |= USING_MSI;
3029 	}
3030 
3031 	/* Now that we know how many "ports" we have and what interrupt
3032 	 * mechanism we're going to use, we can configure our queue resources.
3033 	 */
3034 	cfg_queues(adapter);
3035 
3036 	/*
3037 	 * The "card" is now ready to go.  If any errors occur during device
3038 	 * registration we do not fail the whole "card" but rather proceed
3039 	 * only with the ports we manage to register successfully.  However we
3040 	 * must register at least one net device.
3041 	 */
3042 	for_each_port(adapter, pidx) {
3043 		struct port_info *pi = netdev_priv(adapter->port[pidx]);
3044 		netdev = adapter->port[pidx];
3045 		if (netdev == NULL)
3046 			continue;
3047 
3048 		netif_set_real_num_tx_queues(netdev, pi->nqsets);
3049 		netif_set_real_num_rx_queues(netdev, pi->nqsets);
3050 
3051 		err = register_netdev(netdev);
3052 		if (err) {
3053 			dev_warn(&pdev->dev, "cannot register net device %s,"
3054 				 " skipping\n", netdev->name);
3055 			continue;
3056 		}
3057 
3058 		set_bit(pidx, &adapter->registered_device_map);
3059 	}
3060 	if (adapter->registered_device_map == 0) {
3061 		dev_err(&pdev->dev, "could not register any net devices\n");
3062 		goto err_disable_interrupts;
3063 	}
3064 
3065 	/*
3066 	 * Set up our debugfs entries.
3067 	 */
3068 	if (!IS_ERR_OR_NULL(cxgb4vf_debugfs_root)) {
3069 		adapter->debugfs_root =
3070 			debugfs_create_dir(pci_name(pdev),
3071 					   cxgb4vf_debugfs_root);
3072 		if (IS_ERR_OR_NULL(adapter->debugfs_root))
3073 			dev_warn(&pdev->dev, "could not create debugfs"
3074 				 " directory");
3075 		else
3076 			setup_debugfs(adapter);
3077 	}
3078 
3079 	/*
3080 	 * Print a short notice on the existence and configuration of the new
3081 	 * VF network device ...
3082 	 */
3083 	for_each_port(adapter, pidx) {
3084 		dev_info(adapter->pdev_dev, "%s: Chelsio VF NIC PCIe %s\n",
3085 			 adapter->port[pidx]->name,
3086 			 (adapter->flags & USING_MSIX) ? "MSI-X" :
3087 			 (adapter->flags & USING_MSI)  ? "MSI" : "");
3088 	}
3089 
3090 	/*
3091 	 * Return success!
3092 	 */
3093 	return 0;
3094 
3095 	/*
3096 	 * Error recovery and exit code.  Unwind state that's been created
3097 	 * so far and return the error.
3098 	 */
3099 err_disable_interrupts:
3100 	if (adapter->flags & USING_MSIX) {
3101 		pci_disable_msix(adapter->pdev);
3102 		adapter->flags &= ~USING_MSIX;
3103 	} else if (adapter->flags & USING_MSI) {
3104 		pci_disable_msi(adapter->pdev);
3105 		adapter->flags &= ~USING_MSI;
3106 	}
3107 
3108 err_free_dev:
3109 	for_each_port(adapter, pidx) {
3110 		netdev = adapter->port[pidx];
3111 		if (netdev == NULL)
3112 			continue;
3113 		pi = netdev_priv(netdev);
3114 		t4vf_free_vi(adapter, pi->viid);
3115 		if (test_bit(pidx, &adapter->registered_device_map))
3116 			unregister_netdev(netdev);
3117 		free_netdev(netdev);
3118 	}
3119 
3120 err_unmap_bar:
3121 	if (!is_t4(adapter->params.chip))
3122 		iounmap(adapter->bar2);
3123 
3124 err_unmap_bar0:
3125 	iounmap(adapter->regs);
3126 
3127 err_free_adapter:
3128 	kfree(adapter->mbox_log);
3129 	kfree(adapter);
3130 
3131 err_release_regions:
3132 	pci_release_regions(pdev);
3133 	pci_clear_master(pdev);
3134 
3135 err_disable_device:
3136 	pci_disable_device(pdev);
3137 
3138 	return err;
3139 }
3140 
3141 /*
3142  * "Remove" a device: tear down all kernel and driver state created in the
3143  * "probe" routine and quiesce the device (disable interrupts, etc.).  (Note
3144  * that this is called "remove_one" in the PF Driver.)
3145  */
3146 static void cxgb4vf_pci_remove(struct pci_dev *pdev)
3147 {
3148 	struct adapter *adapter = pci_get_drvdata(pdev);
3149 
3150 	/*
3151 	 * Tear down driver state associated with device.
3152 	 */
3153 	if (adapter) {
3154 		int pidx;
3155 
3156 		/*
3157 		 * Stop all of our activity.  Unregister network port,
3158 		 * disable interrupts, etc.
3159 		 */
3160 		for_each_port(adapter, pidx)
3161 			if (test_bit(pidx, &adapter->registered_device_map))
3162 				unregister_netdev(adapter->port[pidx]);
3163 		t4vf_sge_stop(adapter);
3164 		if (adapter->flags & USING_MSIX) {
3165 			pci_disable_msix(adapter->pdev);
3166 			adapter->flags &= ~USING_MSIX;
3167 		} else if (adapter->flags & USING_MSI) {
3168 			pci_disable_msi(adapter->pdev);
3169 			adapter->flags &= ~USING_MSI;
3170 		}
3171 
3172 		/*
3173 		 * Tear down our debugfs entries.
3174 		 */
3175 		if (!IS_ERR_OR_NULL(adapter->debugfs_root)) {
3176 			cleanup_debugfs(adapter);
3177 			debugfs_remove_recursive(adapter->debugfs_root);
3178 		}
3179 
3180 		/*
3181 		 * Free all of the various resources which we've acquired ...
3182 		 */
3183 		t4vf_free_sge_resources(adapter);
3184 		for_each_port(adapter, pidx) {
3185 			struct net_device *netdev = adapter->port[pidx];
3186 			struct port_info *pi;
3187 
3188 			if (netdev == NULL)
3189 				continue;
3190 
3191 			pi = netdev_priv(netdev);
3192 			t4vf_free_vi(adapter, pi->viid);
3193 			free_netdev(netdev);
3194 		}
3195 		iounmap(adapter->regs);
3196 		if (!is_t4(adapter->params.chip))
3197 			iounmap(adapter->bar2);
3198 		kfree(adapter->mbox_log);
3199 		kfree(adapter);
3200 	}
3201 
3202 	/*
3203 	 * Disable the device and release its PCI resources.
3204 	 */
3205 	pci_disable_device(pdev);
3206 	pci_clear_master(pdev);
3207 	pci_release_regions(pdev);
3208 }
3209 
3210 /*
3211  * "Shutdown" quiesce the device, stopping Ingress Packet and Interrupt
3212  * delivery.
3213  */
3214 static void cxgb4vf_pci_shutdown(struct pci_dev *pdev)
3215 {
3216 	struct adapter *adapter;
3217 	int pidx;
3218 
3219 	adapter = pci_get_drvdata(pdev);
3220 	if (!adapter)
3221 		return;
3222 
3223 	/* Disable all Virtual Interfaces.  This will shut down the
3224 	 * delivery of all ingress packets into the chip for these
3225 	 * Virtual Interfaces.
3226 	 */
3227 	for_each_port(adapter, pidx)
3228 		if (test_bit(pidx, &adapter->registered_device_map))
3229 			unregister_netdev(adapter->port[pidx]);
3230 
3231 	/* Free up all Queues which will prevent further DMA and
3232 	 * Interrupts allowing various internal pathways to drain.
3233 	 */
3234 	t4vf_sge_stop(adapter);
3235 	if (adapter->flags & USING_MSIX) {
3236 		pci_disable_msix(adapter->pdev);
3237 		adapter->flags &= ~USING_MSIX;
3238 	} else if (adapter->flags & USING_MSI) {
3239 		pci_disable_msi(adapter->pdev);
3240 		adapter->flags &= ~USING_MSI;
3241 	}
3242 
3243 	/*
3244 	 * Free up all Queues which will prevent further DMA and
3245 	 * Interrupts allowing various internal pathways to drain.
3246 	 */
3247 	t4vf_free_sge_resources(adapter);
3248 	pci_set_drvdata(pdev, NULL);
3249 }
3250 
3251 /* Macros needed to support the PCI Device ID Table ...
3252  */
3253 #define CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN \
3254 	static const struct pci_device_id cxgb4vf_pci_tbl[] = {
3255 #define CH_PCI_DEVICE_ID_FUNCTION	0x8
3256 
3257 #define CH_PCI_ID_TABLE_ENTRY(devid) \
3258 		{ PCI_VDEVICE(CHELSIO, (devid)), 0 }
3259 
3260 #define CH_PCI_DEVICE_ID_TABLE_DEFINE_END { 0, } }
3261 
3262 #include "../cxgb4/t4_pci_id_tbl.h"
3263 
3264 MODULE_DESCRIPTION(DRV_DESC);
3265 MODULE_AUTHOR("Chelsio Communications");
3266 MODULE_LICENSE("Dual BSD/GPL");
3267 MODULE_VERSION(DRV_VERSION);
3268 MODULE_DEVICE_TABLE(pci, cxgb4vf_pci_tbl);
3269 
3270 static struct pci_driver cxgb4vf_driver = {
3271 	.name		= KBUILD_MODNAME,
3272 	.id_table	= cxgb4vf_pci_tbl,
3273 	.probe		= cxgb4vf_pci_probe,
3274 	.remove		= cxgb4vf_pci_remove,
3275 	.shutdown	= cxgb4vf_pci_shutdown,
3276 };
3277 
3278 /*
3279  * Initialize global driver state.
3280  */
3281 static int __init cxgb4vf_module_init(void)
3282 {
3283 	int ret;
3284 
3285 	/*
3286 	 * Vet our module parameters.
3287 	 */
3288 	if (msi != MSI_MSIX && msi != MSI_MSI) {
3289 		pr_warn("bad module parameter msi=%d; must be %d (MSI-X or MSI) or %d (MSI)\n",
3290 			msi, MSI_MSIX, MSI_MSI);
3291 		return -EINVAL;
3292 	}
3293 
3294 	/* Debugfs support is optional, just warn if this fails */
3295 	cxgb4vf_debugfs_root = debugfs_create_dir(KBUILD_MODNAME, NULL);
3296 	if (IS_ERR_OR_NULL(cxgb4vf_debugfs_root))
3297 		pr_warn("could not create debugfs entry, continuing\n");
3298 
3299 	ret = pci_register_driver(&cxgb4vf_driver);
3300 	if (ret < 0 && !IS_ERR_OR_NULL(cxgb4vf_debugfs_root))
3301 		debugfs_remove(cxgb4vf_debugfs_root);
3302 	return ret;
3303 }
3304 
3305 /*
3306  * Tear down global driver state.
3307  */
3308 static void __exit cxgb4vf_module_exit(void)
3309 {
3310 	pci_unregister_driver(&cxgb4vf_driver);
3311 	debugfs_remove(cxgb4vf_debugfs_root);
3312 }
3313 
3314 module_init(cxgb4vf_module_init);
3315 module_exit(cxgb4vf_module_exit);
3316