xref: /openbmc/linux/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c (revision a89aa749ece9c6fee7932163472d2ee0efd6ddd3)
1 /*
2  * This file is part of the Chelsio T4 PCI-E SR-IOV Virtual Function Ethernet
3  * driver for Linux.
4  *
5  * Copyright (c) 2009-2010 Chelsio Communications, Inc. All rights reserved.
6  *
7  * This software is available to you under a choice of one of two
8  * licenses.  You may choose to be licensed under the terms of the GNU
9  * General Public License (GPL) Version 2, available from the file
10  * COPYING in the main directory of this source tree, or the
11  * OpenIB.org BSD license below:
12  *
13  *     Redistribution and use in source and binary forms, with or
14  *     without modification, are permitted provided that the following
15  *     conditions are met:
16  *
17  *      - Redistributions of source code must retain the above
18  *        copyright notice, this list of conditions and the following
19  *        disclaimer.
20  *
21  *      - Redistributions in binary form must reproduce the above
22  *        copyright notice, this list of conditions and the following
23  *        disclaimer in the documentation and/or other materials
24  *        provided with the distribution.
25  *
26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33  * SOFTWARE.
34  */
35 
36 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
37 
38 #include <linux/module.h>
39 #include <linux/moduleparam.h>
40 #include <linux/init.h>
41 #include <linux/pci.h>
42 #include <linux/dma-mapping.h>
43 #include <linux/netdevice.h>
44 #include <linux/etherdevice.h>
45 #include <linux/debugfs.h>
46 #include <linux/ethtool.h>
47 #include <linux/mdio.h>
48 
49 #include "t4vf_common.h"
50 #include "t4vf_defs.h"
51 
52 #include "../cxgb4/t4_regs.h"
53 #include "../cxgb4/t4_msg.h"
54 
55 /*
56  * Generic information about the driver.
57  */
58 #define DRV_DESC "Chelsio T4/T5/T6 Virtual Function (VF) Network Driver"
59 
60 /*
61  * Module Parameters.
62  * ==================
63  */
64 
65 /*
66  * Default ethtool "message level" for adapters.
67  */
68 #define DFLT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \
69 			 NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP |\
70 			 NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR)
71 
72 /*
73  * The driver uses the best interrupt scheme available on a platform in the
74  * order MSI-X then MSI.  This parameter determines which of these schemes the
75  * driver may consider as follows:
76  *
77  *     msi = 2: choose from among MSI-X and MSI
78  *     msi = 1: only consider MSI interrupts
79  *
80  * Note that unlike the Physical Function driver, this Virtual Function driver
81  * does _not_ support legacy INTx interrupts (this limitation is mandated by
82  * the PCI-E SR-IOV standard).
83  */
84 #define MSI_MSIX	2
85 #define MSI_MSI		1
86 #define MSI_DEFAULT	MSI_MSIX
87 
88 static int msi = MSI_DEFAULT;
89 
90 module_param(msi, int, 0644);
91 MODULE_PARM_DESC(msi, "whether to use MSI-X or MSI");
92 
93 /*
94  * Fundamental constants.
95  * ======================
96  */
97 
98 enum {
99 	MAX_TXQ_ENTRIES		= 16384,
100 	MAX_RSPQ_ENTRIES	= 16384,
101 	MAX_RX_BUFFERS		= 16384,
102 
103 	MIN_TXQ_ENTRIES		= 32,
104 	MIN_RSPQ_ENTRIES	= 128,
105 	MIN_FL_ENTRIES		= 16,
106 
107 	/*
108 	 * For purposes of manipulating the Free List size we need to
109 	 * recognize that Free Lists are actually Egress Queues (the host
110 	 * produces free buffers which the hardware consumes), Egress Queues
111 	 * indices are all in units of Egress Context Units bytes, and free
112 	 * list entries are 64-bit PCI DMA addresses.  And since the state of
113 	 * the Producer Index == the Consumer Index implies an EMPTY list, we
114 	 * always have at least one Egress Unit's worth of Free List entries
115 	 * unused.  See sge.c for more details ...
116 	 */
117 	EQ_UNIT = SGE_EQ_IDXSIZE,
118 	FL_PER_EQ_UNIT = EQ_UNIT / sizeof(__be64),
119 	MIN_FL_RESID = FL_PER_EQ_UNIT,
120 };
121 
122 /*
123  * Global driver state.
124  * ====================
125  */
126 
127 static struct dentry *cxgb4vf_debugfs_root;
128 
129 /*
130  * OS "Callback" functions.
131  * ========================
132  */
133 
134 /*
135  * The link status has changed on the indicated "port" (Virtual Interface).
136  */
137 void t4vf_os_link_changed(struct adapter *adapter, int pidx, int link_ok)
138 {
139 	struct net_device *dev = adapter->port[pidx];
140 
141 	/*
142 	 * If the port is disabled or the current recorded "link up"
143 	 * status matches the new status, just return.
144 	 */
145 	if (!netif_running(dev) || link_ok == netif_carrier_ok(dev))
146 		return;
147 
148 	/*
149 	 * Tell the OS that the link status has changed and print a short
150 	 * informative message on the console about the event.
151 	 */
152 	if (link_ok) {
153 		const char *s;
154 		const char *fc;
155 		const struct port_info *pi = netdev_priv(dev);
156 
157 		netif_carrier_on(dev);
158 
159 		switch (pi->link_cfg.speed) {
160 		case 100:
161 			s = "100Mbps";
162 			break;
163 		case 1000:
164 			s = "1Gbps";
165 			break;
166 		case 10000:
167 			s = "10Gbps";
168 			break;
169 		case 25000:
170 			s = "25Gbps";
171 			break;
172 		case 40000:
173 			s = "40Gbps";
174 			break;
175 		case 100000:
176 			s = "100Gbps";
177 			break;
178 
179 		default:
180 			s = "unknown";
181 			break;
182 		}
183 
184 		switch ((int)pi->link_cfg.fc) {
185 		case PAUSE_RX:
186 			fc = "RX";
187 			break;
188 
189 		case PAUSE_TX:
190 			fc = "TX";
191 			break;
192 
193 		case PAUSE_RX | PAUSE_TX:
194 			fc = "RX/TX";
195 			break;
196 
197 		default:
198 			fc = "no";
199 			break;
200 		}
201 
202 		netdev_info(dev, "link up, %s, full-duplex, %s PAUSE\n", s, fc);
203 	} else {
204 		netif_carrier_off(dev);
205 		netdev_info(dev, "link down\n");
206 	}
207 }
208 
209 /*
210  * THe port module type has changed on the indicated "port" (Virtual
211  * Interface).
212  */
213 void t4vf_os_portmod_changed(struct adapter *adapter, int pidx)
214 {
215 	static const char * const mod_str[] = {
216 		NULL, "LR", "SR", "ER", "passive DA", "active DA", "LRM"
217 	};
218 	const struct net_device *dev = adapter->port[pidx];
219 	const struct port_info *pi = netdev_priv(dev);
220 
221 	if (pi->mod_type == FW_PORT_MOD_TYPE_NONE)
222 		dev_info(adapter->pdev_dev, "%s: port module unplugged\n",
223 			 dev->name);
224 	else if (pi->mod_type < ARRAY_SIZE(mod_str))
225 		dev_info(adapter->pdev_dev, "%s: %s port module inserted\n",
226 			 dev->name, mod_str[pi->mod_type]);
227 	else if (pi->mod_type == FW_PORT_MOD_TYPE_NOTSUPPORTED)
228 		dev_info(adapter->pdev_dev, "%s: unsupported optical port "
229 			 "module inserted\n", dev->name);
230 	else if (pi->mod_type == FW_PORT_MOD_TYPE_UNKNOWN)
231 		dev_info(adapter->pdev_dev, "%s: unknown port module inserted,"
232 			 "forcing TWINAX\n", dev->name);
233 	else if (pi->mod_type == FW_PORT_MOD_TYPE_ERROR)
234 		dev_info(adapter->pdev_dev, "%s: transceiver module error\n",
235 			 dev->name);
236 	else
237 		dev_info(adapter->pdev_dev, "%s: unknown module type %d "
238 			 "inserted\n", dev->name, pi->mod_type);
239 }
240 
241 static int cxgb4vf_set_addr_hash(struct port_info *pi)
242 {
243 	struct adapter *adapter = pi->adapter;
244 	u64 vec = 0;
245 	bool ucast = false;
246 	struct hash_mac_addr *entry;
247 
248 	/* Calculate the hash vector for the updated list and program it */
249 	list_for_each_entry(entry, &adapter->mac_hlist, list) {
250 		ucast |= is_unicast_ether_addr(entry->addr);
251 		vec |= (1ULL << hash_mac_addr(entry->addr));
252 	}
253 	return t4vf_set_addr_hash(adapter, pi->viid, ucast, vec, false);
254 }
255 
256 /**
257  *	cxgb4vf_change_mac - Update match filter for a MAC address.
258  *	@pi: the port_info
259  *	@viid: the VI id
260  *	@tcam_idx: TCAM index of existing filter for old value of MAC address,
261  *		   or -1
262  *	@addr: the new MAC address value
263  *	@persist: whether a new MAC allocation should be persistent
264  *	@add_smt: if true also add the address to the HW SMT
265  *
266  *	Modifies an MPS filter and sets it to the new MAC address if
267  *	@tcam_idx >= 0, or adds the MAC address to a new filter if
268  *	@tcam_idx < 0. In the latter case the address is added persistently
269  *	if @persist is %true.
270  *	Addresses are programmed to hash region, if tcam runs out of entries.
271  *
272  */
273 static int cxgb4vf_change_mac(struct port_info *pi, unsigned int viid,
274 			      int *tcam_idx, const u8 *addr, bool persistent)
275 {
276 	struct hash_mac_addr *new_entry, *entry;
277 	struct adapter *adapter = pi->adapter;
278 	int ret;
279 
280 	ret = t4vf_change_mac(adapter, viid, *tcam_idx, addr, persistent);
281 	/* We ran out of TCAM entries. try programming hash region. */
282 	if (ret == -ENOMEM) {
283 		/* If the MAC address to be updated is in the hash addr
284 		 * list, update it from the list
285 		 */
286 		list_for_each_entry(entry, &adapter->mac_hlist, list) {
287 			if (entry->iface_mac) {
288 				ether_addr_copy(entry->addr, addr);
289 				goto set_hash;
290 			}
291 		}
292 		new_entry = kzalloc(sizeof(*new_entry), GFP_KERNEL);
293 		if (!new_entry)
294 			return -ENOMEM;
295 		ether_addr_copy(new_entry->addr, addr);
296 		new_entry->iface_mac = true;
297 		list_add_tail(&new_entry->list, &adapter->mac_hlist);
298 set_hash:
299 		ret = cxgb4vf_set_addr_hash(pi);
300 	} else if (ret >= 0) {
301 		*tcam_idx = ret;
302 		ret = 0;
303 	}
304 
305 	return ret;
306 }
307 
308 /*
309  * Net device operations.
310  * ======================
311  */
312 
313 
314 
315 
316 /*
317  * Perform the MAC and PHY actions needed to enable a "port" (Virtual
318  * Interface).
319  */
320 static int link_start(struct net_device *dev)
321 {
322 	int ret;
323 	struct port_info *pi = netdev_priv(dev);
324 
325 	/*
326 	 * We do not set address filters and promiscuity here, the stack does
327 	 * that step explicitly. Enable vlan accel.
328 	 */
329 	ret = t4vf_set_rxmode(pi->adapter, pi->viid, dev->mtu, -1, -1, -1, 1,
330 			      true);
331 	if (ret == 0)
332 		ret = cxgb4vf_change_mac(pi, pi->viid,
333 					 &pi->xact_addr_filt,
334 					 dev->dev_addr, true);
335 
336 	/*
337 	 * We don't need to actually "start the link" itself since the
338 	 * firmware will do that for us when the first Virtual Interface
339 	 * is enabled on a port.
340 	 */
341 	if (ret == 0)
342 		ret = t4vf_enable_pi(pi->adapter, pi, true, true);
343 
344 	return ret;
345 }
346 
347 /*
348  * Name the MSI-X interrupts.
349  */
350 static void name_msix_vecs(struct adapter *adapter)
351 {
352 	int namelen = sizeof(adapter->msix_info[0].desc) - 1;
353 	int pidx;
354 
355 	/*
356 	 * Firmware events.
357 	 */
358 	snprintf(adapter->msix_info[MSIX_FW].desc, namelen,
359 		 "%s-FWeventq", adapter->name);
360 	adapter->msix_info[MSIX_FW].desc[namelen] = 0;
361 
362 	/*
363 	 * Ethernet queues.
364 	 */
365 	for_each_port(adapter, pidx) {
366 		struct net_device *dev = adapter->port[pidx];
367 		const struct port_info *pi = netdev_priv(dev);
368 		int qs, msi;
369 
370 		for (qs = 0, msi = MSIX_IQFLINT; qs < pi->nqsets; qs++, msi++) {
371 			snprintf(adapter->msix_info[msi].desc, namelen,
372 				 "%s-%d", dev->name, qs);
373 			adapter->msix_info[msi].desc[namelen] = 0;
374 		}
375 	}
376 }
377 
378 /*
379  * Request all of our MSI-X resources.
380  */
381 static int request_msix_queue_irqs(struct adapter *adapter)
382 {
383 	struct sge *s = &adapter->sge;
384 	int rxq, msi, err;
385 
386 	/*
387 	 * Firmware events.
388 	 */
389 	err = request_irq(adapter->msix_info[MSIX_FW].vec, t4vf_sge_intr_msix,
390 			  0, adapter->msix_info[MSIX_FW].desc, &s->fw_evtq);
391 	if (err)
392 		return err;
393 
394 	/*
395 	 * Ethernet queues.
396 	 */
397 	msi = MSIX_IQFLINT;
398 	for_each_ethrxq(s, rxq) {
399 		err = request_irq(adapter->msix_info[msi].vec,
400 				  t4vf_sge_intr_msix, 0,
401 				  adapter->msix_info[msi].desc,
402 				  &s->ethrxq[rxq].rspq);
403 		if (err)
404 			goto err_free_irqs;
405 		msi++;
406 	}
407 	return 0;
408 
409 err_free_irqs:
410 	while (--rxq >= 0)
411 		free_irq(adapter->msix_info[--msi].vec, &s->ethrxq[rxq].rspq);
412 	free_irq(adapter->msix_info[MSIX_FW].vec, &s->fw_evtq);
413 	return err;
414 }
415 
416 /*
417  * Free our MSI-X resources.
418  */
419 static void free_msix_queue_irqs(struct adapter *adapter)
420 {
421 	struct sge *s = &adapter->sge;
422 	int rxq, msi;
423 
424 	free_irq(adapter->msix_info[MSIX_FW].vec, &s->fw_evtq);
425 	msi = MSIX_IQFLINT;
426 	for_each_ethrxq(s, rxq)
427 		free_irq(adapter->msix_info[msi++].vec,
428 			 &s->ethrxq[rxq].rspq);
429 }
430 
431 /*
432  * Turn on NAPI and start up interrupts on a response queue.
433  */
434 static void qenable(struct sge_rspq *rspq)
435 {
436 	napi_enable(&rspq->napi);
437 
438 	/*
439 	 * 0-increment the Going To Sleep register to start the timer and
440 	 * enable interrupts.
441 	 */
442 	t4_write_reg(rspq->adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS,
443 		     CIDXINC_V(0) |
444 		     SEINTARM_V(rspq->intr_params) |
445 		     INGRESSQID_V(rspq->cntxt_id));
446 }
447 
448 /*
449  * Enable NAPI scheduling and interrupt generation for all Receive Queues.
450  */
451 static void enable_rx(struct adapter *adapter)
452 {
453 	int rxq;
454 	struct sge *s = &adapter->sge;
455 
456 	for_each_ethrxq(s, rxq)
457 		qenable(&s->ethrxq[rxq].rspq);
458 	qenable(&s->fw_evtq);
459 
460 	/*
461 	 * The interrupt queue doesn't use NAPI so we do the 0-increment of
462 	 * its Going To Sleep register here to get it started.
463 	 */
464 	if (adapter->flags & CXGB4VF_USING_MSI)
465 		t4_write_reg(adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS,
466 			     CIDXINC_V(0) |
467 			     SEINTARM_V(s->intrq.intr_params) |
468 			     INGRESSQID_V(s->intrq.cntxt_id));
469 
470 }
471 
472 /*
473  * Wait until all NAPI handlers are descheduled.
474  */
475 static void quiesce_rx(struct adapter *adapter)
476 {
477 	struct sge *s = &adapter->sge;
478 	int rxq;
479 
480 	for_each_ethrxq(s, rxq)
481 		napi_disable(&s->ethrxq[rxq].rspq.napi);
482 	napi_disable(&s->fw_evtq.napi);
483 }
484 
485 /*
486  * Response queue handler for the firmware event queue.
487  */
488 static int fwevtq_handler(struct sge_rspq *rspq, const __be64 *rsp,
489 			  const struct pkt_gl *gl)
490 {
491 	/*
492 	 * Extract response opcode and get pointer to CPL message body.
493 	 */
494 	struct adapter *adapter = rspq->adapter;
495 	u8 opcode = ((const struct rss_header *)rsp)->opcode;
496 	void *cpl = (void *)(rsp + 1);
497 
498 	switch (opcode) {
499 	case CPL_FW6_MSG: {
500 		/*
501 		 * We've received an asynchronous message from the firmware.
502 		 */
503 		const struct cpl_fw6_msg *fw_msg = cpl;
504 		if (fw_msg->type == FW6_TYPE_CMD_RPL)
505 			t4vf_handle_fw_rpl(adapter, fw_msg->data);
506 		break;
507 	}
508 
509 	case CPL_FW4_MSG: {
510 		/* FW can send EGR_UPDATEs encapsulated in a CPL_FW4_MSG.
511 		 */
512 		const struct cpl_sge_egr_update *p = (void *)(rsp + 3);
513 		opcode = CPL_OPCODE_G(ntohl(p->opcode_qid));
514 		if (opcode != CPL_SGE_EGR_UPDATE) {
515 			dev_err(adapter->pdev_dev, "unexpected FW4/CPL %#x on FW event queue\n"
516 				, opcode);
517 			break;
518 		}
519 		cpl = (void *)p;
520 	}
521 		/* Fall through */
522 
523 	case CPL_SGE_EGR_UPDATE: {
524 		/*
525 		 * We've received an Egress Queue Status Update message.  We
526 		 * get these, if the SGE is configured to send these when the
527 		 * firmware passes certain points in processing our TX
528 		 * Ethernet Queue or if we make an explicit request for one.
529 		 * We use these updates to determine when we may need to
530 		 * restart a TX Ethernet Queue which was stopped for lack of
531 		 * free TX Queue Descriptors ...
532 		 */
533 		const struct cpl_sge_egr_update *p = cpl;
534 		unsigned int qid = EGR_QID_G(be32_to_cpu(p->opcode_qid));
535 		struct sge *s = &adapter->sge;
536 		struct sge_txq *tq;
537 		struct sge_eth_txq *txq;
538 		unsigned int eq_idx;
539 
540 		/*
541 		 * Perform sanity checking on the Queue ID to make sure it
542 		 * really refers to one of our TX Ethernet Egress Queues which
543 		 * is active and matches the queue's ID.  None of these error
544 		 * conditions should ever happen so we may want to either make
545 		 * them fatal and/or conditionalized under DEBUG.
546 		 */
547 		eq_idx = EQ_IDX(s, qid);
548 		if (unlikely(eq_idx >= MAX_EGRQ)) {
549 			dev_err(adapter->pdev_dev,
550 				"Egress Update QID %d out of range\n", qid);
551 			break;
552 		}
553 		tq = s->egr_map[eq_idx];
554 		if (unlikely(tq == NULL)) {
555 			dev_err(adapter->pdev_dev,
556 				"Egress Update QID %d TXQ=NULL\n", qid);
557 			break;
558 		}
559 		txq = container_of(tq, struct sge_eth_txq, q);
560 		if (unlikely(tq->abs_id != qid)) {
561 			dev_err(adapter->pdev_dev,
562 				"Egress Update QID %d refers to TXQ %d\n",
563 				qid, tq->abs_id);
564 			break;
565 		}
566 
567 		/*
568 		 * Restart a stopped TX Queue which has less than half of its
569 		 * TX ring in use ...
570 		 */
571 		txq->q.restarts++;
572 		netif_tx_wake_queue(txq->txq);
573 		break;
574 	}
575 
576 	default:
577 		dev_err(adapter->pdev_dev,
578 			"unexpected CPL %#x on FW event queue\n", opcode);
579 	}
580 
581 	return 0;
582 }
583 
584 /*
585  * Allocate SGE TX/RX response queues.  Determine how many sets of SGE queues
586  * to use and initializes them.  We support multiple "Queue Sets" per port if
587  * we have MSI-X, otherwise just one queue set per port.
588  */
589 static int setup_sge_queues(struct adapter *adapter)
590 {
591 	struct sge *s = &adapter->sge;
592 	int err, pidx, msix;
593 
594 	/*
595 	 * Clear "Queue Set" Free List Starving and TX Queue Mapping Error
596 	 * state.
597 	 */
598 	bitmap_zero(s->starving_fl, MAX_EGRQ);
599 
600 	/*
601 	 * If we're using MSI interrupt mode we need to set up a "forwarded
602 	 * interrupt" queue which we'll set up with our MSI vector.  The rest
603 	 * of the ingress queues will be set up to forward their interrupts to
604 	 * this queue ...  This must be first since t4vf_sge_alloc_rxq() uses
605 	 * the intrq's queue ID as the interrupt forwarding queue for the
606 	 * subsequent calls ...
607 	 */
608 	if (adapter->flags & CXGB4VF_USING_MSI) {
609 		err = t4vf_sge_alloc_rxq(adapter, &s->intrq, false,
610 					 adapter->port[0], 0, NULL, NULL);
611 		if (err)
612 			goto err_free_queues;
613 	}
614 
615 	/*
616 	 * Allocate our ingress queue for asynchronous firmware messages.
617 	 */
618 	err = t4vf_sge_alloc_rxq(adapter, &s->fw_evtq, true, adapter->port[0],
619 				 MSIX_FW, NULL, fwevtq_handler);
620 	if (err)
621 		goto err_free_queues;
622 
623 	/*
624 	 * Allocate each "port"'s initial Queue Sets.  These can be changed
625 	 * later on ... up to the point where any interface on the adapter is
626 	 * brought up at which point lots of things get nailed down
627 	 * permanently ...
628 	 */
629 	msix = MSIX_IQFLINT;
630 	for_each_port(adapter, pidx) {
631 		struct net_device *dev = adapter->port[pidx];
632 		struct port_info *pi = netdev_priv(dev);
633 		struct sge_eth_rxq *rxq = &s->ethrxq[pi->first_qset];
634 		struct sge_eth_txq *txq = &s->ethtxq[pi->first_qset];
635 		int qs;
636 
637 		for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
638 			err = t4vf_sge_alloc_rxq(adapter, &rxq->rspq, false,
639 						 dev, msix++,
640 						 &rxq->fl, t4vf_ethrx_handler);
641 			if (err)
642 				goto err_free_queues;
643 
644 			err = t4vf_sge_alloc_eth_txq(adapter, txq, dev,
645 					     netdev_get_tx_queue(dev, qs),
646 					     s->fw_evtq.cntxt_id);
647 			if (err)
648 				goto err_free_queues;
649 
650 			rxq->rspq.idx = qs;
651 			memset(&rxq->stats, 0, sizeof(rxq->stats));
652 		}
653 	}
654 
655 	/*
656 	 * Create the reverse mappings for the queues.
657 	 */
658 	s->egr_base = s->ethtxq[0].q.abs_id - s->ethtxq[0].q.cntxt_id;
659 	s->ingr_base = s->ethrxq[0].rspq.abs_id - s->ethrxq[0].rspq.cntxt_id;
660 	IQ_MAP(s, s->fw_evtq.abs_id) = &s->fw_evtq;
661 	for_each_port(adapter, pidx) {
662 		struct net_device *dev = adapter->port[pidx];
663 		struct port_info *pi = netdev_priv(dev);
664 		struct sge_eth_rxq *rxq = &s->ethrxq[pi->first_qset];
665 		struct sge_eth_txq *txq = &s->ethtxq[pi->first_qset];
666 		int qs;
667 
668 		for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
669 			IQ_MAP(s, rxq->rspq.abs_id) = &rxq->rspq;
670 			EQ_MAP(s, txq->q.abs_id) = &txq->q;
671 
672 			/*
673 			 * The FW_IQ_CMD doesn't return the Absolute Queue IDs
674 			 * for Free Lists but since all of the Egress Queues
675 			 * (including Free Lists) have Relative Queue IDs
676 			 * which are computed as Absolute - Base Queue ID, we
677 			 * can synthesize the Absolute Queue IDs for the Free
678 			 * Lists.  This is useful for debugging purposes when
679 			 * we want to dump Queue Contexts via the PF Driver.
680 			 */
681 			rxq->fl.abs_id = rxq->fl.cntxt_id + s->egr_base;
682 			EQ_MAP(s, rxq->fl.abs_id) = &rxq->fl;
683 		}
684 	}
685 	return 0;
686 
687 err_free_queues:
688 	t4vf_free_sge_resources(adapter);
689 	return err;
690 }
691 
692 /*
693  * Set up Receive Side Scaling (RSS) to distribute packets to multiple receive
694  * queues.  We configure the RSS CPU lookup table to distribute to the number
695  * of HW receive queues, and the response queue lookup table to narrow that
696  * down to the response queues actually configured for each "port" (Virtual
697  * Interface).  We always configure the RSS mapping for all ports since the
698  * mapping table has plenty of entries.
699  */
700 static int setup_rss(struct adapter *adapter)
701 {
702 	int pidx;
703 
704 	for_each_port(adapter, pidx) {
705 		struct port_info *pi = adap2pinfo(adapter, pidx);
706 		struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[pi->first_qset];
707 		u16 rss[MAX_PORT_QSETS];
708 		int qs, err;
709 
710 		for (qs = 0; qs < pi->nqsets; qs++)
711 			rss[qs] = rxq[qs].rspq.abs_id;
712 
713 		err = t4vf_config_rss_range(adapter, pi->viid,
714 					    0, pi->rss_size, rss, pi->nqsets);
715 		if (err)
716 			return err;
717 
718 		/*
719 		 * Perform Global RSS Mode-specific initialization.
720 		 */
721 		switch (adapter->params.rss.mode) {
722 		case FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL:
723 			/*
724 			 * If Tunnel All Lookup isn't specified in the global
725 			 * RSS Configuration, then we need to specify a
726 			 * default Ingress Queue for any ingress packets which
727 			 * aren't hashed.  We'll use our first ingress queue
728 			 * ...
729 			 */
730 			if (!adapter->params.rss.u.basicvirtual.tnlalllookup) {
731 				union rss_vi_config config;
732 				err = t4vf_read_rss_vi_config(adapter,
733 							      pi->viid,
734 							      &config);
735 				if (err)
736 					return err;
737 				config.basicvirtual.defaultq =
738 					rxq[0].rspq.abs_id;
739 				err = t4vf_write_rss_vi_config(adapter,
740 							       pi->viid,
741 							       &config);
742 				if (err)
743 					return err;
744 			}
745 			break;
746 		}
747 	}
748 
749 	return 0;
750 }
751 
752 /*
753  * Bring the adapter up.  Called whenever we go from no "ports" open to having
754  * one open.  This function performs the actions necessary to make an adapter
755  * operational, such as completing the initialization of HW modules, and
756  * enabling interrupts.  Must be called with the rtnl lock held.  (Note that
757  * this is called "cxgb_up" in the PF Driver.)
758  */
759 static int adapter_up(struct adapter *adapter)
760 {
761 	int err;
762 
763 	/*
764 	 * If this is the first time we've been called, perform basic
765 	 * adapter setup.  Once we've done this, many of our adapter
766 	 * parameters can no longer be changed ...
767 	 */
768 	if ((adapter->flags & CXGB4VF_FULL_INIT_DONE) == 0) {
769 		err = setup_sge_queues(adapter);
770 		if (err)
771 			return err;
772 		err = setup_rss(adapter);
773 		if (err) {
774 			t4vf_free_sge_resources(adapter);
775 			return err;
776 		}
777 
778 		if (adapter->flags & CXGB4VF_USING_MSIX)
779 			name_msix_vecs(adapter);
780 
781 		adapter->flags |= CXGB4VF_FULL_INIT_DONE;
782 	}
783 
784 	/*
785 	 * Acquire our interrupt resources.  We only support MSI-X and MSI.
786 	 */
787 	BUG_ON((adapter->flags &
788 	       (CXGB4VF_USING_MSIX | CXGB4VF_USING_MSI)) == 0);
789 	if (adapter->flags & CXGB4VF_USING_MSIX)
790 		err = request_msix_queue_irqs(adapter);
791 	else
792 		err = request_irq(adapter->pdev->irq,
793 				  t4vf_intr_handler(adapter), 0,
794 				  adapter->name, adapter);
795 	if (err) {
796 		dev_err(adapter->pdev_dev, "request_irq failed, err %d\n",
797 			err);
798 		return err;
799 	}
800 
801 	/*
802 	 * Enable NAPI ingress processing and return success.
803 	 */
804 	enable_rx(adapter);
805 	t4vf_sge_start(adapter);
806 
807 	return 0;
808 }
809 
810 /*
811  * Bring the adapter down.  Called whenever the last "port" (Virtual
812  * Interface) closed.  (Note that this routine is called "cxgb_down" in the PF
813  * Driver.)
814  */
815 static void adapter_down(struct adapter *adapter)
816 {
817 	/*
818 	 * Free interrupt resources.
819 	 */
820 	if (adapter->flags & CXGB4VF_USING_MSIX)
821 		free_msix_queue_irqs(adapter);
822 	else
823 		free_irq(adapter->pdev->irq, adapter);
824 
825 	/*
826 	 * Wait for NAPI handlers to finish.
827 	 */
828 	quiesce_rx(adapter);
829 }
830 
831 /*
832  * Start up a net device.
833  */
834 static int cxgb4vf_open(struct net_device *dev)
835 {
836 	int err;
837 	struct port_info *pi = netdev_priv(dev);
838 	struct adapter *adapter = pi->adapter;
839 
840 	/*
841 	 * If we don't have a connection to the firmware there's nothing we
842 	 * can do.
843 	 */
844 	if (!(adapter->flags & CXGB4VF_FW_OK))
845 		return -ENXIO;
846 
847 	/*
848 	 * If this is the first interface that we're opening on the "adapter",
849 	 * bring the "adapter" up now.
850 	 */
851 	if (adapter->open_device_map == 0) {
852 		err = adapter_up(adapter);
853 		if (err)
854 			return err;
855 	}
856 
857 	/* It's possible that the basic port information could have
858 	 * changed since we first read it.
859 	 */
860 	err = t4vf_update_port_info(pi);
861 	if (err < 0)
862 		return err;
863 
864 	/*
865 	 * Note that this interface is up and start everything up ...
866 	 */
867 	err = link_start(dev);
868 	if (err)
869 		goto err_unwind;
870 
871 	pi->vlan_id = t4vf_get_vf_vlan_acl(adapter);
872 
873 	netif_tx_start_all_queues(dev);
874 	set_bit(pi->port_id, &adapter->open_device_map);
875 	return 0;
876 
877 err_unwind:
878 	if (adapter->open_device_map == 0)
879 		adapter_down(adapter);
880 	return err;
881 }
882 
883 /*
884  * Shut down a net device.  This routine is called "cxgb_close" in the PF
885  * Driver ...
886  */
887 static int cxgb4vf_stop(struct net_device *dev)
888 {
889 	struct port_info *pi = netdev_priv(dev);
890 	struct adapter *adapter = pi->adapter;
891 
892 	netif_tx_stop_all_queues(dev);
893 	netif_carrier_off(dev);
894 	t4vf_enable_pi(adapter, pi, false, false);
895 
896 	clear_bit(pi->port_id, &adapter->open_device_map);
897 	if (adapter->open_device_map == 0)
898 		adapter_down(adapter);
899 	return 0;
900 }
901 
902 /*
903  * Translate our basic statistics into the standard "ifconfig" statistics.
904  */
905 static struct net_device_stats *cxgb4vf_get_stats(struct net_device *dev)
906 {
907 	struct t4vf_port_stats stats;
908 	struct port_info *pi = netdev2pinfo(dev);
909 	struct adapter *adapter = pi->adapter;
910 	struct net_device_stats *ns = &dev->stats;
911 	int err;
912 
913 	spin_lock(&adapter->stats_lock);
914 	err = t4vf_get_port_stats(adapter, pi->pidx, &stats);
915 	spin_unlock(&adapter->stats_lock);
916 
917 	memset(ns, 0, sizeof(*ns));
918 	if (err)
919 		return ns;
920 
921 	ns->tx_bytes = (stats.tx_bcast_bytes + stats.tx_mcast_bytes +
922 			stats.tx_ucast_bytes + stats.tx_offload_bytes);
923 	ns->tx_packets = (stats.tx_bcast_frames + stats.tx_mcast_frames +
924 			  stats.tx_ucast_frames + stats.tx_offload_frames);
925 	ns->rx_bytes = (stats.rx_bcast_bytes + stats.rx_mcast_bytes +
926 			stats.rx_ucast_bytes);
927 	ns->rx_packets = (stats.rx_bcast_frames + stats.rx_mcast_frames +
928 			  stats.rx_ucast_frames);
929 	ns->multicast = stats.rx_mcast_frames;
930 	ns->tx_errors = stats.tx_drop_frames;
931 	ns->rx_errors = stats.rx_err_frames;
932 
933 	return ns;
934 }
935 
936 static int cxgb4vf_mac_sync(struct net_device *netdev, const u8 *mac_addr)
937 {
938 	struct port_info *pi = netdev_priv(netdev);
939 	struct adapter *adapter = pi->adapter;
940 	int ret;
941 	u64 mhash = 0;
942 	u64 uhash = 0;
943 	bool free = false;
944 	bool ucast = is_unicast_ether_addr(mac_addr);
945 	const u8 *maclist[1] = {mac_addr};
946 	struct hash_mac_addr *new_entry;
947 
948 	ret = t4vf_alloc_mac_filt(adapter, pi->viid, free, 1, maclist,
949 				  NULL, ucast ? &uhash : &mhash, false);
950 	if (ret < 0)
951 		goto out;
952 	/* if hash != 0, then add the addr to hash addr list
953 	 * so on the end we will calculate the hash for the
954 	 * list and program it
955 	 */
956 	if (uhash || mhash) {
957 		new_entry = kzalloc(sizeof(*new_entry), GFP_ATOMIC);
958 		if (!new_entry)
959 			return -ENOMEM;
960 		ether_addr_copy(new_entry->addr, mac_addr);
961 		list_add_tail(&new_entry->list, &adapter->mac_hlist);
962 		ret = cxgb4vf_set_addr_hash(pi);
963 	}
964 out:
965 	return ret < 0 ? ret : 0;
966 }
967 
968 static int cxgb4vf_mac_unsync(struct net_device *netdev, const u8 *mac_addr)
969 {
970 	struct port_info *pi = netdev_priv(netdev);
971 	struct adapter *adapter = pi->adapter;
972 	int ret;
973 	const u8 *maclist[1] = {mac_addr};
974 	struct hash_mac_addr *entry, *tmp;
975 
976 	/* If the MAC address to be removed is in the hash addr
977 	 * list, delete it from the list and update hash vector
978 	 */
979 	list_for_each_entry_safe(entry, tmp, &adapter->mac_hlist, list) {
980 		if (ether_addr_equal(entry->addr, mac_addr)) {
981 			list_del(&entry->list);
982 			kfree(entry);
983 			return cxgb4vf_set_addr_hash(pi);
984 		}
985 	}
986 
987 	ret = t4vf_free_mac_filt(adapter, pi->viid, 1, maclist, false);
988 	return ret < 0 ? -EINVAL : 0;
989 }
990 
991 /*
992  * Set RX properties of a port, such as promiscruity, address filters, and MTU.
993  * If @mtu is -1 it is left unchanged.
994  */
995 static int set_rxmode(struct net_device *dev, int mtu, bool sleep_ok)
996 {
997 	struct port_info *pi = netdev_priv(dev);
998 
999 	__dev_uc_sync(dev, cxgb4vf_mac_sync, cxgb4vf_mac_unsync);
1000 	__dev_mc_sync(dev, cxgb4vf_mac_sync, cxgb4vf_mac_unsync);
1001 	return t4vf_set_rxmode(pi->adapter, pi->viid, -1,
1002 			       (dev->flags & IFF_PROMISC) != 0,
1003 			       (dev->flags & IFF_ALLMULTI) != 0,
1004 			       1, -1, sleep_ok);
1005 }
1006 
1007 /*
1008  * Set the current receive modes on the device.
1009  */
1010 static void cxgb4vf_set_rxmode(struct net_device *dev)
1011 {
1012 	/* unfortunately we can't return errors to the stack */
1013 	set_rxmode(dev, -1, false);
1014 }
1015 
1016 /*
1017  * Find the entry in the interrupt holdoff timer value array which comes
1018  * closest to the specified interrupt holdoff value.
1019  */
1020 static int closest_timer(const struct sge *s, int us)
1021 {
1022 	int i, timer_idx = 0, min_delta = INT_MAX;
1023 
1024 	for (i = 0; i < ARRAY_SIZE(s->timer_val); i++) {
1025 		int delta = us - s->timer_val[i];
1026 		if (delta < 0)
1027 			delta = -delta;
1028 		if (delta < min_delta) {
1029 			min_delta = delta;
1030 			timer_idx = i;
1031 		}
1032 	}
1033 	return timer_idx;
1034 }
1035 
1036 static int closest_thres(const struct sge *s, int thres)
1037 {
1038 	int i, delta, pktcnt_idx = 0, min_delta = INT_MAX;
1039 
1040 	for (i = 0; i < ARRAY_SIZE(s->counter_val); i++) {
1041 		delta = thres - s->counter_val[i];
1042 		if (delta < 0)
1043 			delta = -delta;
1044 		if (delta < min_delta) {
1045 			min_delta = delta;
1046 			pktcnt_idx = i;
1047 		}
1048 	}
1049 	return pktcnt_idx;
1050 }
1051 
1052 /*
1053  * Return a queue's interrupt hold-off time in us.  0 means no timer.
1054  */
1055 static unsigned int qtimer_val(const struct adapter *adapter,
1056 			       const struct sge_rspq *rspq)
1057 {
1058 	unsigned int timer_idx = QINTR_TIMER_IDX_G(rspq->intr_params);
1059 
1060 	return timer_idx < SGE_NTIMERS
1061 		? adapter->sge.timer_val[timer_idx]
1062 		: 0;
1063 }
1064 
1065 /**
1066  *	set_rxq_intr_params - set a queue's interrupt holdoff parameters
1067  *	@adapter: the adapter
1068  *	@rspq: the RX response queue
1069  *	@us: the hold-off time in us, or 0 to disable timer
1070  *	@cnt: the hold-off packet count, or 0 to disable counter
1071  *
1072  *	Sets an RX response queue's interrupt hold-off time and packet count.
1073  *	At least one of the two needs to be enabled for the queue to generate
1074  *	interrupts.
1075  */
1076 static int set_rxq_intr_params(struct adapter *adapter, struct sge_rspq *rspq,
1077 			       unsigned int us, unsigned int cnt)
1078 {
1079 	unsigned int timer_idx;
1080 
1081 	/*
1082 	 * If both the interrupt holdoff timer and count are specified as
1083 	 * zero, default to a holdoff count of 1 ...
1084 	 */
1085 	if ((us | cnt) == 0)
1086 		cnt = 1;
1087 
1088 	/*
1089 	 * If an interrupt holdoff count has been specified, then find the
1090 	 * closest configured holdoff count and use that.  If the response
1091 	 * queue has already been created, then update its queue context
1092 	 * parameters ...
1093 	 */
1094 	if (cnt) {
1095 		int err;
1096 		u32 v, pktcnt_idx;
1097 
1098 		pktcnt_idx = closest_thres(&adapter->sge, cnt);
1099 		if (rspq->desc && rspq->pktcnt_idx != pktcnt_idx) {
1100 			v = FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) |
1101 			    FW_PARAMS_PARAM_X_V(
1102 					FW_PARAMS_PARAM_DMAQ_IQ_INTCNTTHRESH) |
1103 			    FW_PARAMS_PARAM_YZ_V(rspq->cntxt_id);
1104 			err = t4vf_set_params(adapter, 1, &v, &pktcnt_idx);
1105 			if (err)
1106 				return err;
1107 		}
1108 		rspq->pktcnt_idx = pktcnt_idx;
1109 	}
1110 
1111 	/*
1112 	 * Compute the closest holdoff timer index from the supplied holdoff
1113 	 * timer value.
1114 	 */
1115 	timer_idx = (us == 0
1116 		     ? SGE_TIMER_RSTRT_CNTR
1117 		     : closest_timer(&adapter->sge, us));
1118 
1119 	/*
1120 	 * Update the response queue's interrupt coalescing parameters and
1121 	 * return success.
1122 	 */
1123 	rspq->intr_params = (QINTR_TIMER_IDX_V(timer_idx) |
1124 			     QINTR_CNT_EN_V(cnt > 0));
1125 	return 0;
1126 }
1127 
1128 /*
1129  * Return a version number to identify the type of adapter.  The scheme is:
1130  * - bits 0..9: chip version
1131  * - bits 10..15: chip revision
1132  */
1133 static inline unsigned int mk_adap_vers(const struct adapter *adapter)
1134 {
1135 	/*
1136 	 * Chip version 4, revision 0x3f (cxgb4vf).
1137 	 */
1138 	return CHELSIO_CHIP_VERSION(adapter->params.chip) | (0x3f << 10);
1139 }
1140 
1141 /*
1142  * Execute the specified ioctl command.
1143  */
1144 static int cxgb4vf_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1145 {
1146 	int ret = 0;
1147 
1148 	switch (cmd) {
1149 	    /*
1150 	     * The VF Driver doesn't have access to any of the other
1151 	     * common Ethernet device ioctl()'s (like reading/writing
1152 	     * PHY registers, etc.
1153 	     */
1154 
1155 	default:
1156 		ret = -EOPNOTSUPP;
1157 		break;
1158 	}
1159 	return ret;
1160 }
1161 
1162 /*
1163  * Change the device's MTU.
1164  */
1165 static int cxgb4vf_change_mtu(struct net_device *dev, int new_mtu)
1166 {
1167 	int ret;
1168 	struct port_info *pi = netdev_priv(dev);
1169 
1170 	ret = t4vf_set_rxmode(pi->adapter, pi->viid, new_mtu,
1171 			      -1, -1, -1, -1, true);
1172 	if (!ret)
1173 		dev->mtu = new_mtu;
1174 	return ret;
1175 }
1176 
1177 static netdev_features_t cxgb4vf_fix_features(struct net_device *dev,
1178 	netdev_features_t features)
1179 {
1180 	/*
1181 	 * Since there is no support for separate rx/tx vlan accel
1182 	 * enable/disable make sure tx flag is always in same state as rx.
1183 	 */
1184 	if (features & NETIF_F_HW_VLAN_CTAG_RX)
1185 		features |= NETIF_F_HW_VLAN_CTAG_TX;
1186 	else
1187 		features &= ~NETIF_F_HW_VLAN_CTAG_TX;
1188 
1189 	return features;
1190 }
1191 
1192 static int cxgb4vf_set_features(struct net_device *dev,
1193 	netdev_features_t features)
1194 {
1195 	struct port_info *pi = netdev_priv(dev);
1196 	netdev_features_t changed = dev->features ^ features;
1197 
1198 	if (changed & NETIF_F_HW_VLAN_CTAG_RX)
1199 		t4vf_set_rxmode(pi->adapter, pi->viid, -1, -1, -1, -1,
1200 				features & NETIF_F_HW_VLAN_CTAG_TX, 0);
1201 
1202 	return 0;
1203 }
1204 
1205 /*
1206  * Change the devices MAC address.
1207  */
1208 static int cxgb4vf_set_mac_addr(struct net_device *dev, void *_addr)
1209 {
1210 	int ret;
1211 	struct sockaddr *addr = _addr;
1212 	struct port_info *pi = netdev_priv(dev);
1213 
1214 	if (!is_valid_ether_addr(addr->sa_data))
1215 		return -EADDRNOTAVAIL;
1216 
1217 	ret = cxgb4vf_change_mac(pi, pi->viid, &pi->xact_addr_filt,
1218 				 addr->sa_data, true);
1219 	if (ret < 0)
1220 		return ret;
1221 
1222 	memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
1223 	return 0;
1224 }
1225 
1226 #ifdef CONFIG_NET_POLL_CONTROLLER
1227 /*
1228  * Poll all of our receive queues.  This is called outside of normal interrupt
1229  * context.
1230  */
1231 static void cxgb4vf_poll_controller(struct net_device *dev)
1232 {
1233 	struct port_info *pi = netdev_priv(dev);
1234 	struct adapter *adapter = pi->adapter;
1235 
1236 	if (adapter->flags & CXGB4VF_USING_MSIX) {
1237 		struct sge_eth_rxq *rxq;
1238 		int nqsets;
1239 
1240 		rxq = &adapter->sge.ethrxq[pi->first_qset];
1241 		for (nqsets = pi->nqsets; nqsets; nqsets--) {
1242 			t4vf_sge_intr_msix(0, &rxq->rspq);
1243 			rxq++;
1244 		}
1245 	} else
1246 		t4vf_intr_handler(adapter)(0, adapter);
1247 }
1248 #endif
1249 
1250 /*
1251  * Ethtool operations.
1252  * ===================
1253  *
1254  * Note that we don't support any ethtool operations which change the physical
1255  * state of the port to which we're linked.
1256  */
1257 
1258 /**
1259  *	from_fw_port_mod_type - translate Firmware Port/Module type to Ethtool
1260  *	@port_type: Firmware Port Type
1261  *	@mod_type: Firmware Module Type
1262  *
1263  *	Translate Firmware Port/Module type to Ethtool Port Type.
1264  */
1265 static int from_fw_port_mod_type(enum fw_port_type port_type,
1266 				 enum fw_port_module_type mod_type)
1267 {
1268 	if (port_type == FW_PORT_TYPE_BT_SGMII ||
1269 	    port_type == FW_PORT_TYPE_BT_XFI ||
1270 	    port_type == FW_PORT_TYPE_BT_XAUI) {
1271 		return PORT_TP;
1272 	} else if (port_type == FW_PORT_TYPE_FIBER_XFI ||
1273 		   port_type == FW_PORT_TYPE_FIBER_XAUI) {
1274 		return PORT_FIBRE;
1275 	} else if (port_type == FW_PORT_TYPE_SFP ||
1276 		   port_type == FW_PORT_TYPE_QSFP_10G ||
1277 		   port_type == FW_PORT_TYPE_QSA ||
1278 		   port_type == FW_PORT_TYPE_QSFP ||
1279 		   port_type == FW_PORT_TYPE_CR4_QSFP ||
1280 		   port_type == FW_PORT_TYPE_CR_QSFP ||
1281 		   port_type == FW_PORT_TYPE_CR2_QSFP ||
1282 		   port_type == FW_PORT_TYPE_SFP28) {
1283 		if (mod_type == FW_PORT_MOD_TYPE_LR ||
1284 		    mod_type == FW_PORT_MOD_TYPE_SR ||
1285 		    mod_type == FW_PORT_MOD_TYPE_ER ||
1286 		    mod_type == FW_PORT_MOD_TYPE_LRM)
1287 			return PORT_FIBRE;
1288 		else if (mod_type == FW_PORT_MOD_TYPE_TWINAX_PASSIVE ||
1289 			 mod_type == FW_PORT_MOD_TYPE_TWINAX_ACTIVE)
1290 			return PORT_DA;
1291 		else
1292 			return PORT_OTHER;
1293 	} else if (port_type == FW_PORT_TYPE_KR4_100G ||
1294 		   port_type == FW_PORT_TYPE_KR_SFP28 ||
1295 		   port_type == FW_PORT_TYPE_KR_XLAUI) {
1296 		return PORT_NONE;
1297 	}
1298 
1299 	return PORT_OTHER;
1300 }
1301 
1302 /**
1303  *	fw_caps_to_lmm - translate Firmware to ethtool Link Mode Mask
1304  *	@port_type: Firmware Port Type
1305  *	@fw_caps: Firmware Port Capabilities
1306  *	@link_mode_mask: ethtool Link Mode Mask
1307  *
1308  *	Translate a Firmware Port Capabilities specification to an ethtool
1309  *	Link Mode Mask.
1310  */
1311 static void fw_caps_to_lmm(enum fw_port_type port_type,
1312 			   unsigned int fw_caps,
1313 			   unsigned long *link_mode_mask)
1314 {
1315 	#define SET_LMM(__lmm_name) \
1316 		__set_bit(ETHTOOL_LINK_MODE_ ## __lmm_name ## _BIT, \
1317 			  link_mode_mask)
1318 
1319 	#define FW_CAPS_TO_LMM(__fw_name, __lmm_name) \
1320 		do { \
1321 			if (fw_caps & FW_PORT_CAP32_ ## __fw_name) \
1322 				SET_LMM(__lmm_name); \
1323 		} while (0)
1324 
1325 	switch (port_type) {
1326 	case FW_PORT_TYPE_BT_SGMII:
1327 	case FW_PORT_TYPE_BT_XFI:
1328 	case FW_PORT_TYPE_BT_XAUI:
1329 		SET_LMM(TP);
1330 		FW_CAPS_TO_LMM(SPEED_100M, 100baseT_Full);
1331 		FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
1332 		FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full);
1333 		break;
1334 
1335 	case FW_PORT_TYPE_KX4:
1336 	case FW_PORT_TYPE_KX:
1337 		SET_LMM(Backplane);
1338 		FW_CAPS_TO_LMM(SPEED_1G, 1000baseKX_Full);
1339 		FW_CAPS_TO_LMM(SPEED_10G, 10000baseKX4_Full);
1340 		break;
1341 
1342 	case FW_PORT_TYPE_KR:
1343 		SET_LMM(Backplane);
1344 		FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
1345 		break;
1346 
1347 	case FW_PORT_TYPE_BP_AP:
1348 		SET_LMM(Backplane);
1349 		FW_CAPS_TO_LMM(SPEED_1G, 1000baseKX_Full);
1350 		FW_CAPS_TO_LMM(SPEED_10G, 10000baseR_FEC);
1351 		FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
1352 		break;
1353 
1354 	case FW_PORT_TYPE_BP4_AP:
1355 		SET_LMM(Backplane);
1356 		FW_CAPS_TO_LMM(SPEED_1G, 1000baseKX_Full);
1357 		FW_CAPS_TO_LMM(SPEED_10G, 10000baseR_FEC);
1358 		FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
1359 		FW_CAPS_TO_LMM(SPEED_10G, 10000baseKX4_Full);
1360 		break;
1361 
1362 	case FW_PORT_TYPE_FIBER_XFI:
1363 	case FW_PORT_TYPE_FIBER_XAUI:
1364 	case FW_PORT_TYPE_SFP:
1365 	case FW_PORT_TYPE_QSFP_10G:
1366 	case FW_PORT_TYPE_QSA:
1367 		SET_LMM(FIBRE);
1368 		FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
1369 		FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full);
1370 		break;
1371 
1372 	case FW_PORT_TYPE_BP40_BA:
1373 	case FW_PORT_TYPE_QSFP:
1374 		SET_LMM(FIBRE);
1375 		FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
1376 		FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full);
1377 		FW_CAPS_TO_LMM(SPEED_40G, 40000baseSR4_Full);
1378 		break;
1379 
1380 	case FW_PORT_TYPE_CR_QSFP:
1381 	case FW_PORT_TYPE_SFP28:
1382 		SET_LMM(FIBRE);
1383 		FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
1384 		FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full);
1385 		FW_CAPS_TO_LMM(SPEED_25G, 25000baseCR_Full);
1386 		break;
1387 
1388 	case FW_PORT_TYPE_KR_SFP28:
1389 		SET_LMM(Backplane);
1390 		FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
1391 		FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
1392 		FW_CAPS_TO_LMM(SPEED_25G, 25000baseKR_Full);
1393 		break;
1394 
1395 	case FW_PORT_TYPE_KR_XLAUI:
1396 		SET_LMM(Backplane);
1397 		FW_CAPS_TO_LMM(SPEED_1G, 1000baseKX_Full);
1398 		FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
1399 		FW_CAPS_TO_LMM(SPEED_40G, 40000baseKR4_Full);
1400 		break;
1401 
1402 	case FW_PORT_TYPE_CR2_QSFP:
1403 		SET_LMM(FIBRE);
1404 		FW_CAPS_TO_LMM(SPEED_50G, 50000baseSR2_Full);
1405 		break;
1406 
1407 	case FW_PORT_TYPE_KR4_100G:
1408 	case FW_PORT_TYPE_CR4_QSFP:
1409 		SET_LMM(FIBRE);
1410 		FW_CAPS_TO_LMM(SPEED_1G,  1000baseT_Full);
1411 		FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
1412 		FW_CAPS_TO_LMM(SPEED_40G, 40000baseSR4_Full);
1413 		FW_CAPS_TO_LMM(SPEED_25G, 25000baseCR_Full);
1414 		FW_CAPS_TO_LMM(SPEED_50G, 50000baseCR2_Full);
1415 		FW_CAPS_TO_LMM(SPEED_100G, 100000baseCR4_Full);
1416 		break;
1417 
1418 	default:
1419 		break;
1420 	}
1421 
1422 	if (fw_caps & FW_PORT_CAP32_FEC_V(FW_PORT_CAP32_FEC_M)) {
1423 		FW_CAPS_TO_LMM(FEC_RS, FEC_RS);
1424 		FW_CAPS_TO_LMM(FEC_BASER_RS, FEC_BASER);
1425 	} else {
1426 		SET_LMM(FEC_NONE);
1427 	}
1428 
1429 	FW_CAPS_TO_LMM(ANEG, Autoneg);
1430 	FW_CAPS_TO_LMM(802_3_PAUSE, Pause);
1431 	FW_CAPS_TO_LMM(802_3_ASM_DIR, Asym_Pause);
1432 
1433 	#undef FW_CAPS_TO_LMM
1434 	#undef SET_LMM
1435 }
1436 
1437 static int cxgb4vf_get_link_ksettings(struct net_device *dev,
1438 				  struct ethtool_link_ksettings *link_ksettings)
1439 {
1440 	struct port_info *pi = netdev_priv(dev);
1441 	struct ethtool_link_settings *base = &link_ksettings->base;
1442 
1443 	/* For the nonce, the Firmware doesn't send up Port State changes
1444 	 * when the Virtual Interface attached to the Port is down.  So
1445 	 * if it's down, let's grab any changes.
1446 	 */
1447 	if (!netif_running(dev))
1448 		(void)t4vf_update_port_info(pi);
1449 
1450 	ethtool_link_ksettings_zero_link_mode(link_ksettings, supported);
1451 	ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising);
1452 	ethtool_link_ksettings_zero_link_mode(link_ksettings, lp_advertising);
1453 
1454 	base->port = from_fw_port_mod_type(pi->port_type, pi->mod_type);
1455 
1456 	if (pi->mdio_addr >= 0) {
1457 		base->phy_address = pi->mdio_addr;
1458 		base->mdio_support = (pi->port_type == FW_PORT_TYPE_BT_SGMII
1459 				      ? ETH_MDIO_SUPPORTS_C22
1460 				      : ETH_MDIO_SUPPORTS_C45);
1461 	} else {
1462 		base->phy_address = 255;
1463 		base->mdio_support = 0;
1464 	}
1465 
1466 	fw_caps_to_lmm(pi->port_type, pi->link_cfg.pcaps,
1467 		       link_ksettings->link_modes.supported);
1468 	fw_caps_to_lmm(pi->port_type, pi->link_cfg.acaps,
1469 		       link_ksettings->link_modes.advertising);
1470 	fw_caps_to_lmm(pi->port_type, pi->link_cfg.lpacaps,
1471 		       link_ksettings->link_modes.lp_advertising);
1472 
1473 	if (netif_carrier_ok(dev)) {
1474 		base->speed = pi->link_cfg.speed;
1475 		base->duplex = DUPLEX_FULL;
1476 	} else {
1477 		base->speed = SPEED_UNKNOWN;
1478 		base->duplex = DUPLEX_UNKNOWN;
1479 	}
1480 
1481 	base->autoneg = pi->link_cfg.autoneg;
1482 	if (pi->link_cfg.pcaps & FW_PORT_CAP32_ANEG)
1483 		ethtool_link_ksettings_add_link_mode(link_ksettings,
1484 						     supported, Autoneg);
1485 	if (pi->link_cfg.autoneg)
1486 		ethtool_link_ksettings_add_link_mode(link_ksettings,
1487 						     advertising, Autoneg);
1488 
1489 	return 0;
1490 }
1491 
1492 /* Translate the Firmware FEC value into the ethtool value. */
1493 static inline unsigned int fwcap_to_eth_fec(unsigned int fw_fec)
1494 {
1495 	unsigned int eth_fec = 0;
1496 
1497 	if (fw_fec & FW_PORT_CAP32_FEC_RS)
1498 		eth_fec |= ETHTOOL_FEC_RS;
1499 	if (fw_fec & FW_PORT_CAP32_FEC_BASER_RS)
1500 		eth_fec |= ETHTOOL_FEC_BASER;
1501 
1502 	/* if nothing is set, then FEC is off */
1503 	if (!eth_fec)
1504 		eth_fec = ETHTOOL_FEC_OFF;
1505 
1506 	return eth_fec;
1507 }
1508 
1509 /* Translate Common Code FEC value into ethtool value. */
1510 static inline unsigned int cc_to_eth_fec(unsigned int cc_fec)
1511 {
1512 	unsigned int eth_fec = 0;
1513 
1514 	if (cc_fec & FEC_AUTO)
1515 		eth_fec |= ETHTOOL_FEC_AUTO;
1516 	if (cc_fec & FEC_RS)
1517 		eth_fec |= ETHTOOL_FEC_RS;
1518 	if (cc_fec & FEC_BASER_RS)
1519 		eth_fec |= ETHTOOL_FEC_BASER;
1520 
1521 	/* if nothing is set, then FEC is off */
1522 	if (!eth_fec)
1523 		eth_fec = ETHTOOL_FEC_OFF;
1524 
1525 	return eth_fec;
1526 }
1527 
1528 static int cxgb4vf_get_fecparam(struct net_device *dev,
1529 				struct ethtool_fecparam *fec)
1530 {
1531 	const struct port_info *pi = netdev_priv(dev);
1532 	const struct link_config *lc = &pi->link_cfg;
1533 
1534 	/* Translate the Firmware FEC Support into the ethtool value.  We
1535 	 * always support IEEE 802.3 "automatic" selection of Link FEC type if
1536 	 * any FEC is supported.
1537 	 */
1538 	fec->fec = fwcap_to_eth_fec(lc->pcaps);
1539 	if (fec->fec != ETHTOOL_FEC_OFF)
1540 		fec->fec |= ETHTOOL_FEC_AUTO;
1541 
1542 	/* Translate the current internal FEC parameters into the
1543 	 * ethtool values.
1544 	 */
1545 	fec->active_fec = cc_to_eth_fec(lc->fec);
1546 	return 0;
1547 }
1548 
1549 /*
1550  * Return our driver information.
1551  */
1552 static void cxgb4vf_get_drvinfo(struct net_device *dev,
1553 				struct ethtool_drvinfo *drvinfo)
1554 {
1555 	struct adapter *adapter = netdev2adap(dev);
1556 
1557 	strlcpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver));
1558 	strlcpy(drvinfo->bus_info, pci_name(to_pci_dev(dev->dev.parent)),
1559 		sizeof(drvinfo->bus_info));
1560 	snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
1561 		 "%u.%u.%u.%u, TP %u.%u.%u.%u",
1562 		 FW_HDR_FW_VER_MAJOR_G(adapter->params.dev.fwrev),
1563 		 FW_HDR_FW_VER_MINOR_G(adapter->params.dev.fwrev),
1564 		 FW_HDR_FW_VER_MICRO_G(adapter->params.dev.fwrev),
1565 		 FW_HDR_FW_VER_BUILD_G(adapter->params.dev.fwrev),
1566 		 FW_HDR_FW_VER_MAJOR_G(adapter->params.dev.tprev),
1567 		 FW_HDR_FW_VER_MINOR_G(adapter->params.dev.tprev),
1568 		 FW_HDR_FW_VER_MICRO_G(adapter->params.dev.tprev),
1569 		 FW_HDR_FW_VER_BUILD_G(adapter->params.dev.tprev));
1570 }
1571 
1572 /*
1573  * Return current adapter message level.
1574  */
1575 static u32 cxgb4vf_get_msglevel(struct net_device *dev)
1576 {
1577 	return netdev2adap(dev)->msg_enable;
1578 }
1579 
1580 /*
1581  * Set current adapter message level.
1582  */
1583 static void cxgb4vf_set_msglevel(struct net_device *dev, u32 msglevel)
1584 {
1585 	netdev2adap(dev)->msg_enable = msglevel;
1586 }
1587 
1588 /*
1589  * Return the device's current Queue Set ring size parameters along with the
1590  * allowed maximum values.  Since ethtool doesn't understand the concept of
1591  * multi-queue devices, we just return the current values associated with the
1592  * first Queue Set.
1593  */
1594 static void cxgb4vf_get_ringparam(struct net_device *dev,
1595 				  struct ethtool_ringparam *rp)
1596 {
1597 	const struct port_info *pi = netdev_priv(dev);
1598 	const struct sge *s = &pi->adapter->sge;
1599 
1600 	rp->rx_max_pending = MAX_RX_BUFFERS;
1601 	rp->rx_mini_max_pending = MAX_RSPQ_ENTRIES;
1602 	rp->rx_jumbo_max_pending = 0;
1603 	rp->tx_max_pending = MAX_TXQ_ENTRIES;
1604 
1605 	rp->rx_pending = s->ethrxq[pi->first_qset].fl.size - MIN_FL_RESID;
1606 	rp->rx_mini_pending = s->ethrxq[pi->first_qset].rspq.size;
1607 	rp->rx_jumbo_pending = 0;
1608 	rp->tx_pending = s->ethtxq[pi->first_qset].q.size;
1609 }
1610 
1611 /*
1612  * Set the Queue Set ring size parameters for the device.  Again, since
1613  * ethtool doesn't allow for the concept of multiple queues per device, we'll
1614  * apply these new values across all of the Queue Sets associated with the
1615  * device -- after vetting them of course!
1616  */
1617 static int cxgb4vf_set_ringparam(struct net_device *dev,
1618 				 struct ethtool_ringparam *rp)
1619 {
1620 	const struct port_info *pi = netdev_priv(dev);
1621 	struct adapter *adapter = pi->adapter;
1622 	struct sge *s = &adapter->sge;
1623 	int qs;
1624 
1625 	if (rp->rx_pending > MAX_RX_BUFFERS ||
1626 	    rp->rx_jumbo_pending ||
1627 	    rp->tx_pending > MAX_TXQ_ENTRIES ||
1628 	    rp->rx_mini_pending > MAX_RSPQ_ENTRIES ||
1629 	    rp->rx_mini_pending < MIN_RSPQ_ENTRIES ||
1630 	    rp->rx_pending < MIN_FL_ENTRIES ||
1631 	    rp->tx_pending < MIN_TXQ_ENTRIES)
1632 		return -EINVAL;
1633 
1634 	if (adapter->flags & CXGB4VF_FULL_INIT_DONE)
1635 		return -EBUSY;
1636 
1637 	for (qs = pi->first_qset; qs < pi->first_qset + pi->nqsets; qs++) {
1638 		s->ethrxq[qs].fl.size = rp->rx_pending + MIN_FL_RESID;
1639 		s->ethrxq[qs].rspq.size = rp->rx_mini_pending;
1640 		s->ethtxq[qs].q.size = rp->tx_pending;
1641 	}
1642 	return 0;
1643 }
1644 
1645 /*
1646  * Return the interrupt holdoff timer and count for the first Queue Set on the
1647  * device.  Our extension ioctl() (the cxgbtool interface) allows the
1648  * interrupt holdoff timer to be read on all of the device's Queue Sets.
1649  */
1650 static int cxgb4vf_get_coalesce(struct net_device *dev,
1651 				struct ethtool_coalesce *coalesce)
1652 {
1653 	const struct port_info *pi = netdev_priv(dev);
1654 	const struct adapter *adapter = pi->adapter;
1655 	const struct sge_rspq *rspq = &adapter->sge.ethrxq[pi->first_qset].rspq;
1656 
1657 	coalesce->rx_coalesce_usecs = qtimer_val(adapter, rspq);
1658 	coalesce->rx_max_coalesced_frames =
1659 		((rspq->intr_params & QINTR_CNT_EN_F)
1660 		 ? adapter->sge.counter_val[rspq->pktcnt_idx]
1661 		 : 0);
1662 	return 0;
1663 }
1664 
1665 /*
1666  * Set the RX interrupt holdoff timer and count for the first Queue Set on the
1667  * interface.  Our extension ioctl() (the cxgbtool interface) allows us to set
1668  * the interrupt holdoff timer on any of the device's Queue Sets.
1669  */
1670 static int cxgb4vf_set_coalesce(struct net_device *dev,
1671 				struct ethtool_coalesce *coalesce)
1672 {
1673 	const struct port_info *pi = netdev_priv(dev);
1674 	struct adapter *adapter = pi->adapter;
1675 
1676 	return set_rxq_intr_params(adapter,
1677 				   &adapter->sge.ethrxq[pi->first_qset].rspq,
1678 				   coalesce->rx_coalesce_usecs,
1679 				   coalesce->rx_max_coalesced_frames);
1680 }
1681 
1682 /*
1683  * Report current port link pause parameter settings.
1684  */
1685 static void cxgb4vf_get_pauseparam(struct net_device *dev,
1686 				   struct ethtool_pauseparam *pauseparam)
1687 {
1688 	struct port_info *pi = netdev_priv(dev);
1689 
1690 	pauseparam->autoneg = (pi->link_cfg.requested_fc & PAUSE_AUTONEG) != 0;
1691 	pauseparam->rx_pause = (pi->link_cfg.advertised_fc & PAUSE_RX) != 0;
1692 	pauseparam->tx_pause = (pi->link_cfg.advertised_fc & PAUSE_TX) != 0;
1693 }
1694 
1695 /*
1696  * Identify the port by blinking the port's LED.
1697  */
1698 static int cxgb4vf_phys_id(struct net_device *dev,
1699 			   enum ethtool_phys_id_state state)
1700 {
1701 	unsigned int val;
1702 	struct port_info *pi = netdev_priv(dev);
1703 
1704 	if (state == ETHTOOL_ID_ACTIVE)
1705 		val = 0xffff;
1706 	else if (state == ETHTOOL_ID_INACTIVE)
1707 		val = 0;
1708 	else
1709 		return -EINVAL;
1710 
1711 	return t4vf_identify_port(pi->adapter, pi->viid, val);
1712 }
1713 
1714 /*
1715  * Port stats maintained per queue of the port.
1716  */
1717 struct queue_port_stats {
1718 	u64 tso;
1719 	u64 tx_csum;
1720 	u64 rx_csum;
1721 	u64 vlan_ex;
1722 	u64 vlan_ins;
1723 	u64 lro_pkts;
1724 	u64 lro_merged;
1725 };
1726 
1727 /*
1728  * Strings for the ETH_SS_STATS statistics set ("ethtool -S").  Note that
1729  * these need to match the order of statistics returned by
1730  * t4vf_get_port_stats().
1731  */
1732 static const char stats_strings[][ETH_GSTRING_LEN] = {
1733 	/*
1734 	 * These must match the layout of the t4vf_port_stats structure.
1735 	 */
1736 	"TxBroadcastBytes  ",
1737 	"TxBroadcastFrames ",
1738 	"TxMulticastBytes  ",
1739 	"TxMulticastFrames ",
1740 	"TxUnicastBytes    ",
1741 	"TxUnicastFrames   ",
1742 	"TxDroppedFrames   ",
1743 	"TxOffloadBytes    ",
1744 	"TxOffloadFrames   ",
1745 	"RxBroadcastBytes  ",
1746 	"RxBroadcastFrames ",
1747 	"RxMulticastBytes  ",
1748 	"RxMulticastFrames ",
1749 	"RxUnicastBytes    ",
1750 	"RxUnicastFrames   ",
1751 	"RxErrorFrames     ",
1752 
1753 	/*
1754 	 * These are accumulated per-queue statistics and must match the
1755 	 * order of the fields in the queue_port_stats structure.
1756 	 */
1757 	"TSO               ",
1758 	"TxCsumOffload     ",
1759 	"RxCsumGood        ",
1760 	"VLANextractions   ",
1761 	"VLANinsertions    ",
1762 	"GROPackets        ",
1763 	"GROMerged         ",
1764 };
1765 
1766 /*
1767  * Return the number of statistics in the specified statistics set.
1768  */
1769 static int cxgb4vf_get_sset_count(struct net_device *dev, int sset)
1770 {
1771 	switch (sset) {
1772 	case ETH_SS_STATS:
1773 		return ARRAY_SIZE(stats_strings);
1774 	default:
1775 		return -EOPNOTSUPP;
1776 	}
1777 	/*NOTREACHED*/
1778 }
1779 
1780 /*
1781  * Return the strings for the specified statistics set.
1782  */
1783 static void cxgb4vf_get_strings(struct net_device *dev,
1784 				u32 sset,
1785 				u8 *data)
1786 {
1787 	switch (sset) {
1788 	case ETH_SS_STATS:
1789 		memcpy(data, stats_strings, sizeof(stats_strings));
1790 		break;
1791 	}
1792 }
1793 
1794 /*
1795  * Small utility routine to accumulate queue statistics across the queues of
1796  * a "port".
1797  */
1798 static void collect_sge_port_stats(const struct adapter *adapter,
1799 				   const struct port_info *pi,
1800 				   struct queue_port_stats *stats)
1801 {
1802 	const struct sge_eth_txq *txq = &adapter->sge.ethtxq[pi->first_qset];
1803 	const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[pi->first_qset];
1804 	int qs;
1805 
1806 	memset(stats, 0, sizeof(*stats));
1807 	for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
1808 		stats->tso += txq->tso;
1809 		stats->tx_csum += txq->tx_cso;
1810 		stats->rx_csum += rxq->stats.rx_cso;
1811 		stats->vlan_ex += rxq->stats.vlan_ex;
1812 		stats->vlan_ins += txq->vlan_ins;
1813 		stats->lro_pkts += rxq->stats.lro_pkts;
1814 		stats->lro_merged += rxq->stats.lro_merged;
1815 	}
1816 }
1817 
1818 /*
1819  * Return the ETH_SS_STATS statistics set.
1820  */
1821 static void cxgb4vf_get_ethtool_stats(struct net_device *dev,
1822 				      struct ethtool_stats *stats,
1823 				      u64 *data)
1824 {
1825 	struct port_info *pi = netdev2pinfo(dev);
1826 	struct adapter *adapter = pi->adapter;
1827 	int err = t4vf_get_port_stats(adapter, pi->pidx,
1828 				      (struct t4vf_port_stats *)data);
1829 	if (err)
1830 		memset(data, 0, sizeof(struct t4vf_port_stats));
1831 
1832 	data += sizeof(struct t4vf_port_stats) / sizeof(u64);
1833 	collect_sge_port_stats(adapter, pi, (struct queue_port_stats *)data);
1834 }
1835 
1836 /*
1837  * Return the size of our register map.
1838  */
1839 static int cxgb4vf_get_regs_len(struct net_device *dev)
1840 {
1841 	return T4VF_REGMAP_SIZE;
1842 }
1843 
1844 /*
1845  * Dump a block of registers, start to end inclusive, into a buffer.
1846  */
1847 static void reg_block_dump(struct adapter *adapter, void *regbuf,
1848 			   unsigned int start, unsigned int end)
1849 {
1850 	u32 *bp = regbuf + start - T4VF_REGMAP_START;
1851 
1852 	for ( ; start <= end; start += sizeof(u32)) {
1853 		/*
1854 		 * Avoid reading the Mailbox Control register since that
1855 		 * can trigger a Mailbox Ownership Arbitration cycle and
1856 		 * interfere with communication with the firmware.
1857 		 */
1858 		if (start == T4VF_CIM_BASE_ADDR + CIM_VF_EXT_MAILBOX_CTRL)
1859 			*bp++ = 0xffff;
1860 		else
1861 			*bp++ = t4_read_reg(adapter, start);
1862 	}
1863 }
1864 
1865 /*
1866  * Copy our entire register map into the provided buffer.
1867  */
1868 static void cxgb4vf_get_regs(struct net_device *dev,
1869 			     struct ethtool_regs *regs,
1870 			     void *regbuf)
1871 {
1872 	struct adapter *adapter = netdev2adap(dev);
1873 
1874 	regs->version = mk_adap_vers(adapter);
1875 
1876 	/*
1877 	 * Fill in register buffer with our register map.
1878 	 */
1879 	memset(regbuf, 0, T4VF_REGMAP_SIZE);
1880 
1881 	reg_block_dump(adapter, regbuf,
1882 		       T4VF_SGE_BASE_ADDR + T4VF_MOD_MAP_SGE_FIRST,
1883 		       T4VF_SGE_BASE_ADDR + T4VF_MOD_MAP_SGE_LAST);
1884 	reg_block_dump(adapter, regbuf,
1885 		       T4VF_MPS_BASE_ADDR + T4VF_MOD_MAP_MPS_FIRST,
1886 		       T4VF_MPS_BASE_ADDR + T4VF_MOD_MAP_MPS_LAST);
1887 
1888 	/* T5 adds new registers in the PL Register map.
1889 	 */
1890 	reg_block_dump(adapter, regbuf,
1891 		       T4VF_PL_BASE_ADDR + T4VF_MOD_MAP_PL_FIRST,
1892 		       T4VF_PL_BASE_ADDR + (is_t4(adapter->params.chip)
1893 		       ? PL_VF_WHOAMI_A : PL_VF_REVISION_A));
1894 	reg_block_dump(adapter, regbuf,
1895 		       T4VF_CIM_BASE_ADDR + T4VF_MOD_MAP_CIM_FIRST,
1896 		       T4VF_CIM_BASE_ADDR + T4VF_MOD_MAP_CIM_LAST);
1897 
1898 	reg_block_dump(adapter, regbuf,
1899 		       T4VF_MBDATA_BASE_ADDR + T4VF_MBDATA_FIRST,
1900 		       T4VF_MBDATA_BASE_ADDR + T4VF_MBDATA_LAST);
1901 }
1902 
1903 /*
1904  * Report current Wake On LAN settings.
1905  */
1906 static void cxgb4vf_get_wol(struct net_device *dev,
1907 			    struct ethtool_wolinfo *wol)
1908 {
1909 	wol->supported = 0;
1910 	wol->wolopts = 0;
1911 	memset(&wol->sopass, 0, sizeof(wol->sopass));
1912 }
1913 
1914 /*
1915  * TCP Segmentation Offload flags which we support.
1916  */
1917 #define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN)
1918 #define VLAN_FEAT (NETIF_F_SG | NETIF_F_IP_CSUM | TSO_FLAGS | \
1919 		   NETIF_F_GRO | NETIF_F_IPV6_CSUM | NETIF_F_HIGHDMA)
1920 
1921 static const struct ethtool_ops cxgb4vf_ethtool_ops = {
1922 	.supported_coalesce_params = ETHTOOL_COALESCE_RX_USECS |
1923 				     ETHTOOL_COALESCE_RX_MAX_FRAMES,
1924 	.get_link_ksettings	= cxgb4vf_get_link_ksettings,
1925 	.get_fecparam		= cxgb4vf_get_fecparam,
1926 	.get_drvinfo		= cxgb4vf_get_drvinfo,
1927 	.get_msglevel		= cxgb4vf_get_msglevel,
1928 	.set_msglevel		= cxgb4vf_set_msglevel,
1929 	.get_ringparam		= cxgb4vf_get_ringparam,
1930 	.set_ringparam		= cxgb4vf_set_ringparam,
1931 	.get_coalesce		= cxgb4vf_get_coalesce,
1932 	.set_coalesce		= cxgb4vf_set_coalesce,
1933 	.get_pauseparam		= cxgb4vf_get_pauseparam,
1934 	.get_link		= ethtool_op_get_link,
1935 	.get_strings		= cxgb4vf_get_strings,
1936 	.set_phys_id		= cxgb4vf_phys_id,
1937 	.get_sset_count		= cxgb4vf_get_sset_count,
1938 	.get_ethtool_stats	= cxgb4vf_get_ethtool_stats,
1939 	.get_regs_len		= cxgb4vf_get_regs_len,
1940 	.get_regs		= cxgb4vf_get_regs,
1941 	.get_wol		= cxgb4vf_get_wol,
1942 };
1943 
1944 /*
1945  * /sys/kernel/debug/cxgb4vf support code and data.
1946  * ================================================
1947  */
1948 
1949 /*
1950  * Show Firmware Mailbox Command/Reply Log
1951  *
1952  * Note that we don't do any locking when dumping the Firmware Mailbox Log so
1953  * it's possible that we can catch things during a log update and therefore
1954  * see partially corrupted log entries.  But i9t's probably Good Enough(tm).
1955  * If we ever decide that we want to make sure that we're dumping a coherent
1956  * log, we'd need to perform locking in the mailbox logging and in
1957  * mboxlog_open() where we'd need to grab the entire mailbox log in one go
1958  * like we do for the Firmware Device Log.  But as stated above, meh ...
1959  */
1960 static int mboxlog_show(struct seq_file *seq, void *v)
1961 {
1962 	struct adapter *adapter = seq->private;
1963 	struct mbox_cmd_log *log = adapter->mbox_log;
1964 	struct mbox_cmd *entry;
1965 	int entry_idx, i;
1966 
1967 	if (v == SEQ_START_TOKEN) {
1968 		seq_printf(seq,
1969 			   "%10s  %15s  %5s  %5s  %s\n",
1970 			   "Seq#", "Tstamp", "Atime", "Etime",
1971 			   "Command/Reply");
1972 		return 0;
1973 	}
1974 
1975 	entry_idx = log->cursor + ((uintptr_t)v - 2);
1976 	if (entry_idx >= log->size)
1977 		entry_idx -= log->size;
1978 	entry = mbox_cmd_log_entry(log, entry_idx);
1979 
1980 	/* skip over unused entries */
1981 	if (entry->timestamp == 0)
1982 		return 0;
1983 
1984 	seq_printf(seq, "%10u  %15llu  %5d  %5d",
1985 		   entry->seqno, entry->timestamp,
1986 		   entry->access, entry->execute);
1987 	for (i = 0; i < MBOX_LEN / 8; i++) {
1988 		u64 flit = entry->cmd[i];
1989 		u32 hi = (u32)(flit >> 32);
1990 		u32 lo = (u32)flit;
1991 
1992 		seq_printf(seq, "  %08x %08x", hi, lo);
1993 	}
1994 	seq_puts(seq, "\n");
1995 	return 0;
1996 }
1997 
1998 static inline void *mboxlog_get_idx(struct seq_file *seq, loff_t pos)
1999 {
2000 	struct adapter *adapter = seq->private;
2001 	struct mbox_cmd_log *log = adapter->mbox_log;
2002 
2003 	return ((pos <= log->size) ? (void *)(uintptr_t)(pos + 1) : NULL);
2004 }
2005 
2006 static void *mboxlog_start(struct seq_file *seq, loff_t *pos)
2007 {
2008 	return *pos ? mboxlog_get_idx(seq, *pos) : SEQ_START_TOKEN;
2009 }
2010 
2011 static void *mboxlog_next(struct seq_file *seq, void *v, loff_t *pos)
2012 {
2013 	++*pos;
2014 	return mboxlog_get_idx(seq, *pos);
2015 }
2016 
2017 static void mboxlog_stop(struct seq_file *seq, void *v)
2018 {
2019 }
2020 
2021 static const struct seq_operations mboxlog_seq_ops = {
2022 	.start = mboxlog_start,
2023 	.next  = mboxlog_next,
2024 	.stop  = mboxlog_stop,
2025 	.show  = mboxlog_show
2026 };
2027 
2028 static int mboxlog_open(struct inode *inode, struct file *file)
2029 {
2030 	int res = seq_open(file, &mboxlog_seq_ops);
2031 
2032 	if (!res) {
2033 		struct seq_file *seq = file->private_data;
2034 
2035 		seq->private = inode->i_private;
2036 	}
2037 	return res;
2038 }
2039 
2040 static const struct file_operations mboxlog_fops = {
2041 	.owner   = THIS_MODULE,
2042 	.open    = mboxlog_open,
2043 	.read    = seq_read,
2044 	.llseek  = seq_lseek,
2045 	.release = seq_release,
2046 };
2047 
2048 /*
2049  * Show SGE Queue Set information.  We display QPL Queues Sets per line.
2050  */
2051 #define QPL	4
2052 
2053 static int sge_qinfo_show(struct seq_file *seq, void *v)
2054 {
2055 	struct adapter *adapter = seq->private;
2056 	int eth_entries = DIV_ROUND_UP(adapter->sge.ethqsets, QPL);
2057 	int qs, r = (uintptr_t)v - 1;
2058 
2059 	if (r)
2060 		seq_putc(seq, '\n');
2061 
2062 	#define S3(fmt_spec, s, v) \
2063 		do {\
2064 			seq_printf(seq, "%-12s", s); \
2065 			for (qs = 0; qs < n; ++qs) \
2066 				seq_printf(seq, " %16" fmt_spec, v); \
2067 			seq_putc(seq, '\n'); \
2068 		} while (0)
2069 	#define S(s, v)		S3("s", s, v)
2070 	#define T(s, v)		S3("u", s, txq[qs].v)
2071 	#define R(s, v)		S3("u", s, rxq[qs].v)
2072 
2073 	if (r < eth_entries) {
2074 		const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[r * QPL];
2075 		const struct sge_eth_txq *txq = &adapter->sge.ethtxq[r * QPL];
2076 		int n = min(QPL, adapter->sge.ethqsets - QPL * r);
2077 
2078 		S("QType:", "Ethernet");
2079 		S("Interface:",
2080 		  (rxq[qs].rspq.netdev
2081 		   ? rxq[qs].rspq.netdev->name
2082 		   : "N/A"));
2083 		S3("d", "Port:",
2084 		   (rxq[qs].rspq.netdev
2085 		    ? ((struct port_info *)
2086 		       netdev_priv(rxq[qs].rspq.netdev))->port_id
2087 		    : -1));
2088 		T("TxQ ID:", q.abs_id);
2089 		T("TxQ size:", q.size);
2090 		T("TxQ inuse:", q.in_use);
2091 		T("TxQ PIdx:", q.pidx);
2092 		T("TxQ CIdx:", q.cidx);
2093 		R("RspQ ID:", rspq.abs_id);
2094 		R("RspQ size:", rspq.size);
2095 		R("RspQE size:", rspq.iqe_len);
2096 		S3("u", "Intr delay:", qtimer_val(adapter, &rxq[qs].rspq));
2097 		S3("u", "Intr pktcnt:",
2098 		   adapter->sge.counter_val[rxq[qs].rspq.pktcnt_idx]);
2099 		R("RspQ CIdx:", rspq.cidx);
2100 		R("RspQ Gen:", rspq.gen);
2101 		R("FL ID:", fl.abs_id);
2102 		R("FL size:", fl.size - MIN_FL_RESID);
2103 		R("FL avail:", fl.avail);
2104 		R("FL PIdx:", fl.pidx);
2105 		R("FL CIdx:", fl.cidx);
2106 		return 0;
2107 	}
2108 
2109 	r -= eth_entries;
2110 	if (r == 0) {
2111 		const struct sge_rspq *evtq = &adapter->sge.fw_evtq;
2112 
2113 		seq_printf(seq, "%-12s %16s\n", "QType:", "FW event queue");
2114 		seq_printf(seq, "%-12s %16u\n", "RspQ ID:", evtq->abs_id);
2115 		seq_printf(seq, "%-12s %16u\n", "Intr delay:",
2116 			   qtimer_val(adapter, evtq));
2117 		seq_printf(seq, "%-12s %16u\n", "Intr pktcnt:",
2118 			   adapter->sge.counter_val[evtq->pktcnt_idx]);
2119 		seq_printf(seq, "%-12s %16u\n", "RspQ Cidx:", evtq->cidx);
2120 		seq_printf(seq, "%-12s %16u\n", "RspQ Gen:", evtq->gen);
2121 	} else if (r == 1) {
2122 		const struct sge_rspq *intrq = &adapter->sge.intrq;
2123 
2124 		seq_printf(seq, "%-12s %16s\n", "QType:", "Interrupt Queue");
2125 		seq_printf(seq, "%-12s %16u\n", "RspQ ID:", intrq->abs_id);
2126 		seq_printf(seq, "%-12s %16u\n", "Intr delay:",
2127 			   qtimer_val(adapter, intrq));
2128 		seq_printf(seq, "%-12s %16u\n", "Intr pktcnt:",
2129 			   adapter->sge.counter_val[intrq->pktcnt_idx]);
2130 		seq_printf(seq, "%-12s %16u\n", "RspQ Cidx:", intrq->cidx);
2131 		seq_printf(seq, "%-12s %16u\n", "RspQ Gen:", intrq->gen);
2132 	}
2133 
2134 	#undef R
2135 	#undef T
2136 	#undef S
2137 	#undef S3
2138 
2139 	return 0;
2140 }
2141 
2142 /*
2143  * Return the number of "entries" in our "file".  We group the multi-Queue
2144  * sections with QPL Queue Sets per "entry".  The sections of the output are:
2145  *
2146  *     Ethernet RX/TX Queue Sets
2147  *     Firmware Event Queue
2148  *     Forwarded Interrupt Queue (if in MSI mode)
2149  */
2150 static int sge_queue_entries(const struct adapter *adapter)
2151 {
2152 	return DIV_ROUND_UP(adapter->sge.ethqsets, QPL) + 1 +
2153 		((adapter->flags & CXGB4VF_USING_MSI) != 0);
2154 }
2155 
2156 static void *sge_queue_start(struct seq_file *seq, loff_t *pos)
2157 {
2158 	int entries = sge_queue_entries(seq->private);
2159 
2160 	return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
2161 }
2162 
2163 static void sge_queue_stop(struct seq_file *seq, void *v)
2164 {
2165 }
2166 
2167 static void *sge_queue_next(struct seq_file *seq, void *v, loff_t *pos)
2168 {
2169 	int entries = sge_queue_entries(seq->private);
2170 
2171 	++*pos;
2172 	return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
2173 }
2174 
2175 static const struct seq_operations sge_qinfo_seq_ops = {
2176 	.start = sge_queue_start,
2177 	.next  = sge_queue_next,
2178 	.stop  = sge_queue_stop,
2179 	.show  = sge_qinfo_show
2180 };
2181 
2182 static int sge_qinfo_open(struct inode *inode, struct file *file)
2183 {
2184 	int res = seq_open(file, &sge_qinfo_seq_ops);
2185 
2186 	if (!res) {
2187 		struct seq_file *seq = file->private_data;
2188 		seq->private = inode->i_private;
2189 	}
2190 	return res;
2191 }
2192 
2193 static const struct file_operations sge_qinfo_debugfs_fops = {
2194 	.owner   = THIS_MODULE,
2195 	.open    = sge_qinfo_open,
2196 	.read    = seq_read,
2197 	.llseek  = seq_lseek,
2198 	.release = seq_release,
2199 };
2200 
2201 /*
2202  * Show SGE Queue Set statistics.  We display QPL Queues Sets per line.
2203  */
2204 #define QPL	4
2205 
2206 static int sge_qstats_show(struct seq_file *seq, void *v)
2207 {
2208 	struct adapter *adapter = seq->private;
2209 	int eth_entries = DIV_ROUND_UP(adapter->sge.ethqsets, QPL);
2210 	int qs, r = (uintptr_t)v - 1;
2211 
2212 	if (r)
2213 		seq_putc(seq, '\n');
2214 
2215 	#define S3(fmt, s, v) \
2216 		do { \
2217 			seq_printf(seq, "%-16s", s); \
2218 			for (qs = 0; qs < n; ++qs) \
2219 				seq_printf(seq, " %8" fmt, v); \
2220 			seq_putc(seq, '\n'); \
2221 		} while (0)
2222 	#define S(s, v)		S3("s", s, v)
2223 
2224 	#define T3(fmt, s, v)	S3(fmt, s, txq[qs].v)
2225 	#define T(s, v)		T3("lu", s, v)
2226 
2227 	#define R3(fmt, s, v)	S3(fmt, s, rxq[qs].v)
2228 	#define R(s, v)		R3("lu", s, v)
2229 
2230 	if (r < eth_entries) {
2231 		const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[r * QPL];
2232 		const struct sge_eth_txq *txq = &adapter->sge.ethtxq[r * QPL];
2233 		int n = min(QPL, adapter->sge.ethqsets - QPL * r);
2234 
2235 		S("QType:", "Ethernet");
2236 		S("Interface:",
2237 		  (rxq[qs].rspq.netdev
2238 		   ? rxq[qs].rspq.netdev->name
2239 		   : "N/A"));
2240 		R3("u", "RspQNullInts:", rspq.unhandled_irqs);
2241 		R("RxPackets:", stats.pkts);
2242 		R("RxCSO:", stats.rx_cso);
2243 		R("VLANxtract:", stats.vlan_ex);
2244 		R("LROmerged:", stats.lro_merged);
2245 		R("LROpackets:", stats.lro_pkts);
2246 		R("RxDrops:", stats.rx_drops);
2247 		T("TSO:", tso);
2248 		T("TxCSO:", tx_cso);
2249 		T("VLANins:", vlan_ins);
2250 		T("TxQFull:", q.stops);
2251 		T("TxQRestarts:", q.restarts);
2252 		T("TxMapErr:", mapping_err);
2253 		R("FLAllocErr:", fl.alloc_failed);
2254 		R("FLLrgAlcErr:", fl.large_alloc_failed);
2255 		R("FLStarving:", fl.starving);
2256 		return 0;
2257 	}
2258 
2259 	r -= eth_entries;
2260 	if (r == 0) {
2261 		const struct sge_rspq *evtq = &adapter->sge.fw_evtq;
2262 
2263 		seq_printf(seq, "%-8s %16s\n", "QType:", "FW event queue");
2264 		seq_printf(seq, "%-16s %8u\n", "RspQNullInts:",
2265 			   evtq->unhandled_irqs);
2266 		seq_printf(seq, "%-16s %8u\n", "RspQ CIdx:", evtq->cidx);
2267 		seq_printf(seq, "%-16s %8u\n", "RspQ Gen:", evtq->gen);
2268 	} else if (r == 1) {
2269 		const struct sge_rspq *intrq = &adapter->sge.intrq;
2270 
2271 		seq_printf(seq, "%-8s %16s\n", "QType:", "Interrupt Queue");
2272 		seq_printf(seq, "%-16s %8u\n", "RspQNullInts:",
2273 			   intrq->unhandled_irqs);
2274 		seq_printf(seq, "%-16s %8u\n", "RspQ CIdx:", intrq->cidx);
2275 		seq_printf(seq, "%-16s %8u\n", "RspQ Gen:", intrq->gen);
2276 	}
2277 
2278 	#undef R
2279 	#undef T
2280 	#undef S
2281 	#undef R3
2282 	#undef T3
2283 	#undef S3
2284 
2285 	return 0;
2286 }
2287 
2288 /*
2289  * Return the number of "entries" in our "file".  We group the multi-Queue
2290  * sections with QPL Queue Sets per "entry".  The sections of the output are:
2291  *
2292  *     Ethernet RX/TX Queue Sets
2293  *     Firmware Event Queue
2294  *     Forwarded Interrupt Queue (if in MSI mode)
2295  */
2296 static int sge_qstats_entries(const struct adapter *adapter)
2297 {
2298 	return DIV_ROUND_UP(adapter->sge.ethqsets, QPL) + 1 +
2299 		((adapter->flags & CXGB4VF_USING_MSI) != 0);
2300 }
2301 
2302 static void *sge_qstats_start(struct seq_file *seq, loff_t *pos)
2303 {
2304 	int entries = sge_qstats_entries(seq->private);
2305 
2306 	return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
2307 }
2308 
2309 static void sge_qstats_stop(struct seq_file *seq, void *v)
2310 {
2311 }
2312 
2313 static void *sge_qstats_next(struct seq_file *seq, void *v, loff_t *pos)
2314 {
2315 	int entries = sge_qstats_entries(seq->private);
2316 
2317 	(*pos)++;
2318 	return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
2319 }
2320 
2321 static const struct seq_operations sge_qstats_seq_ops = {
2322 	.start = sge_qstats_start,
2323 	.next  = sge_qstats_next,
2324 	.stop  = sge_qstats_stop,
2325 	.show  = sge_qstats_show
2326 };
2327 
2328 static int sge_qstats_open(struct inode *inode, struct file *file)
2329 {
2330 	int res = seq_open(file, &sge_qstats_seq_ops);
2331 
2332 	if (res == 0) {
2333 		struct seq_file *seq = file->private_data;
2334 		seq->private = inode->i_private;
2335 	}
2336 	return res;
2337 }
2338 
2339 static const struct file_operations sge_qstats_proc_fops = {
2340 	.owner   = THIS_MODULE,
2341 	.open    = sge_qstats_open,
2342 	.read    = seq_read,
2343 	.llseek  = seq_lseek,
2344 	.release = seq_release,
2345 };
2346 
2347 /*
2348  * Show PCI-E SR-IOV Virtual Function Resource Limits.
2349  */
2350 static int resources_show(struct seq_file *seq, void *v)
2351 {
2352 	struct adapter *adapter = seq->private;
2353 	struct vf_resources *vfres = &adapter->params.vfres;
2354 
2355 	#define S(desc, fmt, var) \
2356 		seq_printf(seq, "%-60s " fmt "\n", \
2357 			   desc " (" #var "):", vfres->var)
2358 
2359 	S("Virtual Interfaces", "%d", nvi);
2360 	S("Egress Queues", "%d", neq);
2361 	S("Ethernet Control", "%d", nethctrl);
2362 	S("Ingress Queues/w Free Lists/Interrupts", "%d", niqflint);
2363 	S("Ingress Queues", "%d", niq);
2364 	S("Traffic Class", "%d", tc);
2365 	S("Port Access Rights Mask", "%#x", pmask);
2366 	S("MAC Address Filters", "%d", nexactf);
2367 	S("Firmware Command Read Capabilities", "%#x", r_caps);
2368 	S("Firmware Command Write/Execute Capabilities", "%#x", wx_caps);
2369 
2370 	#undef S
2371 
2372 	return 0;
2373 }
2374 DEFINE_SHOW_ATTRIBUTE(resources);
2375 
2376 /*
2377  * Show Virtual Interfaces.
2378  */
2379 static int interfaces_show(struct seq_file *seq, void *v)
2380 {
2381 	if (v == SEQ_START_TOKEN) {
2382 		seq_puts(seq, "Interface  Port   VIID\n");
2383 	} else {
2384 		struct adapter *adapter = seq->private;
2385 		int pidx = (uintptr_t)v - 2;
2386 		struct net_device *dev = adapter->port[pidx];
2387 		struct port_info *pi = netdev_priv(dev);
2388 
2389 		seq_printf(seq, "%9s  %4d  %#5x\n",
2390 			   dev->name, pi->port_id, pi->viid);
2391 	}
2392 	return 0;
2393 }
2394 
2395 static inline void *interfaces_get_idx(struct adapter *adapter, loff_t pos)
2396 {
2397 	return pos <= adapter->params.nports
2398 		? (void *)(uintptr_t)(pos + 1)
2399 		: NULL;
2400 }
2401 
2402 static void *interfaces_start(struct seq_file *seq, loff_t *pos)
2403 {
2404 	return *pos
2405 		? interfaces_get_idx(seq->private, *pos)
2406 		: SEQ_START_TOKEN;
2407 }
2408 
2409 static void *interfaces_next(struct seq_file *seq, void *v, loff_t *pos)
2410 {
2411 	(*pos)++;
2412 	return interfaces_get_idx(seq->private, *pos);
2413 }
2414 
2415 static void interfaces_stop(struct seq_file *seq, void *v)
2416 {
2417 }
2418 
2419 static const struct seq_operations interfaces_seq_ops = {
2420 	.start = interfaces_start,
2421 	.next  = interfaces_next,
2422 	.stop  = interfaces_stop,
2423 	.show  = interfaces_show
2424 };
2425 
2426 static int interfaces_open(struct inode *inode, struct file *file)
2427 {
2428 	int res = seq_open(file, &interfaces_seq_ops);
2429 
2430 	if (res == 0) {
2431 		struct seq_file *seq = file->private_data;
2432 		seq->private = inode->i_private;
2433 	}
2434 	return res;
2435 }
2436 
2437 static const struct file_operations interfaces_proc_fops = {
2438 	.owner   = THIS_MODULE,
2439 	.open    = interfaces_open,
2440 	.read    = seq_read,
2441 	.llseek  = seq_lseek,
2442 	.release = seq_release,
2443 };
2444 
2445 /*
2446  * /sys/kernel/debugfs/cxgb4vf/ files list.
2447  */
2448 struct cxgb4vf_debugfs_entry {
2449 	const char *name;		/* name of debugfs node */
2450 	umode_t mode;			/* file system mode */
2451 	const struct file_operations *fops;
2452 };
2453 
2454 static struct cxgb4vf_debugfs_entry debugfs_files[] = {
2455 	{ "mboxlog",    0444, &mboxlog_fops },
2456 	{ "sge_qinfo",  0444, &sge_qinfo_debugfs_fops },
2457 	{ "sge_qstats", 0444, &sge_qstats_proc_fops },
2458 	{ "resources",  0444, &resources_fops },
2459 	{ "interfaces", 0444, &interfaces_proc_fops },
2460 };
2461 
2462 /*
2463  * Module and device initialization and cleanup code.
2464  * ==================================================
2465  */
2466 
2467 /*
2468  * Set up out /sys/kernel/debug/cxgb4vf sub-nodes.  We assume that the
2469  * directory (debugfs_root) has already been set up.
2470  */
2471 static int setup_debugfs(struct adapter *adapter)
2472 {
2473 	int i;
2474 
2475 	BUG_ON(IS_ERR_OR_NULL(adapter->debugfs_root));
2476 
2477 	/*
2478 	 * Debugfs support is best effort.
2479 	 */
2480 	for (i = 0; i < ARRAY_SIZE(debugfs_files); i++)
2481 		debugfs_create_file(debugfs_files[i].name,
2482 				    debugfs_files[i].mode,
2483 				    adapter->debugfs_root, (void *)adapter,
2484 				    debugfs_files[i].fops);
2485 
2486 	return 0;
2487 }
2488 
2489 /*
2490  * Tear down the /sys/kernel/debug/cxgb4vf sub-nodes created above.  We leave
2491  * it to our caller to tear down the directory (debugfs_root).
2492  */
2493 static void cleanup_debugfs(struct adapter *adapter)
2494 {
2495 	BUG_ON(IS_ERR_OR_NULL(adapter->debugfs_root));
2496 
2497 	/*
2498 	 * Unlike our sister routine cleanup_proc(), we don't need to remove
2499 	 * individual entries because a call will be made to
2500 	 * debugfs_remove_recursive().  We just need to clean up any ancillary
2501 	 * persistent state.
2502 	 */
2503 	/* nothing to do */
2504 }
2505 
2506 /* Figure out how many Ports and Queue Sets we can support.  This depends on
2507  * knowing our Virtual Function Resources and may be called a second time if
2508  * we fall back from MSI-X to MSI Interrupt Mode.
2509  */
2510 static void size_nports_qsets(struct adapter *adapter)
2511 {
2512 	struct vf_resources *vfres = &adapter->params.vfres;
2513 	unsigned int ethqsets, pmask_nports;
2514 
2515 	/* The number of "ports" which we support is equal to the number of
2516 	 * Virtual Interfaces with which we've been provisioned.
2517 	 */
2518 	adapter->params.nports = vfres->nvi;
2519 	if (adapter->params.nports > MAX_NPORTS) {
2520 		dev_warn(adapter->pdev_dev, "only using %d of %d maximum"
2521 			 " allowed virtual interfaces\n", MAX_NPORTS,
2522 			 adapter->params.nports);
2523 		adapter->params.nports = MAX_NPORTS;
2524 	}
2525 
2526 	/* We may have been provisioned with more VIs than the number of
2527 	 * ports we're allowed to access (our Port Access Rights Mask).
2528 	 * This is obviously a configuration conflict but we don't want to
2529 	 * crash the kernel or anything silly just because of that.
2530 	 */
2531 	pmask_nports = hweight32(adapter->params.vfres.pmask);
2532 	if (pmask_nports < adapter->params.nports) {
2533 		dev_warn(adapter->pdev_dev, "only using %d of %d provisioned"
2534 			 " virtual interfaces; limited by Port Access Rights"
2535 			 " mask %#x\n", pmask_nports, adapter->params.nports,
2536 			 adapter->params.vfres.pmask);
2537 		adapter->params.nports = pmask_nports;
2538 	}
2539 
2540 	/* We need to reserve an Ingress Queue for the Asynchronous Firmware
2541 	 * Event Queue.  And if we're using MSI Interrupts, we'll also need to
2542 	 * reserve an Ingress Queue for a Forwarded Interrupts.
2543 	 *
2544 	 * The rest of the FL/Intr-capable ingress queues will be matched up
2545 	 * one-for-one with Ethernet/Control egress queues in order to form
2546 	 * "Queue Sets" which will be aportioned between the "ports".  For
2547 	 * each Queue Set, we'll need the ability to allocate two Egress
2548 	 * Contexts -- one for the Ingress Queue Free List and one for the TX
2549 	 * Ethernet Queue.
2550 	 *
2551 	 * Note that even if we're currently configured to use MSI-X
2552 	 * Interrupts (module variable msi == MSI_MSIX) we may get downgraded
2553 	 * to MSI Interrupts if we can't get enough MSI-X Interrupts.  If that
2554 	 * happens we'll need to adjust things later.
2555 	 */
2556 	ethqsets = vfres->niqflint - 1 - (msi == MSI_MSI);
2557 	if (vfres->nethctrl != ethqsets)
2558 		ethqsets = min(vfres->nethctrl, ethqsets);
2559 	if (vfres->neq < ethqsets*2)
2560 		ethqsets = vfres->neq/2;
2561 	if (ethqsets > MAX_ETH_QSETS)
2562 		ethqsets = MAX_ETH_QSETS;
2563 	adapter->sge.max_ethqsets = ethqsets;
2564 
2565 	if (adapter->sge.max_ethqsets < adapter->params.nports) {
2566 		dev_warn(adapter->pdev_dev, "only using %d of %d available"
2567 			 " virtual interfaces (too few Queue Sets)\n",
2568 			 adapter->sge.max_ethqsets, adapter->params.nports);
2569 		adapter->params.nports = adapter->sge.max_ethqsets;
2570 	}
2571 }
2572 
2573 /*
2574  * Perform early "adapter" initialization.  This is where we discover what
2575  * adapter parameters we're going to be using and initialize basic adapter
2576  * hardware support.
2577  */
2578 static int adap_init0(struct adapter *adapter)
2579 {
2580 	struct sge_params *sge_params = &adapter->params.sge;
2581 	struct sge *s = &adapter->sge;
2582 	int err;
2583 	u32 param, val = 0;
2584 
2585 	/*
2586 	 * Some environments do not properly handle PCIE FLRs -- e.g. in Linux
2587 	 * 2.6.31 and later we can't call pci_reset_function() in order to
2588 	 * issue an FLR because of a self- deadlock on the device semaphore.
2589 	 * Meanwhile, the OS infrastructure doesn't issue FLRs in all the
2590 	 * cases where they're needed -- for instance, some versions of KVM
2591 	 * fail to reset "Assigned Devices" when the VM reboots.  Therefore we
2592 	 * use the firmware based reset in order to reset any per function
2593 	 * state.
2594 	 */
2595 	err = t4vf_fw_reset(adapter);
2596 	if (err < 0) {
2597 		dev_err(adapter->pdev_dev, "FW reset failed: err=%d\n", err);
2598 		return err;
2599 	}
2600 
2601 	/*
2602 	 * Grab basic operational parameters.  These will predominantly have
2603 	 * been set up by the Physical Function Driver or will be hard coded
2604 	 * into the adapter.  We just have to live with them ...  Note that
2605 	 * we _must_ get our VPD parameters before our SGE parameters because
2606 	 * we need to know the adapter's core clock from the VPD in order to
2607 	 * properly decode the SGE Timer Values.
2608 	 */
2609 	err = t4vf_get_dev_params(adapter);
2610 	if (err) {
2611 		dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2612 			" device parameters: err=%d\n", err);
2613 		return err;
2614 	}
2615 	err = t4vf_get_vpd_params(adapter);
2616 	if (err) {
2617 		dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2618 			" VPD parameters: err=%d\n", err);
2619 		return err;
2620 	}
2621 	err = t4vf_get_sge_params(adapter);
2622 	if (err) {
2623 		dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2624 			" SGE parameters: err=%d\n", err);
2625 		return err;
2626 	}
2627 	err = t4vf_get_rss_glb_config(adapter);
2628 	if (err) {
2629 		dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2630 			" RSS parameters: err=%d\n", err);
2631 		return err;
2632 	}
2633 	if (adapter->params.rss.mode !=
2634 	    FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL) {
2635 		dev_err(adapter->pdev_dev, "unable to operate with global RSS"
2636 			" mode %d\n", adapter->params.rss.mode);
2637 		return -EINVAL;
2638 	}
2639 	err = t4vf_sge_init(adapter);
2640 	if (err) {
2641 		dev_err(adapter->pdev_dev, "unable to use adapter parameters:"
2642 			" err=%d\n", err);
2643 		return err;
2644 	}
2645 
2646 	/* If we're running on newer firmware, let it know that we're
2647 	 * prepared to deal with encapsulated CPL messages.  Older
2648 	 * firmware won't understand this and we'll just get
2649 	 * unencapsulated messages ...
2650 	 */
2651 	param = FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_PFVF) |
2652 		FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_PFVF_CPLFW4MSG_ENCAP);
2653 	val = 1;
2654 	(void) t4vf_set_params(adapter, 1, &param, &val);
2655 
2656 	/*
2657 	 * Retrieve our RX interrupt holdoff timer values and counter
2658 	 * threshold values from the SGE parameters.
2659 	 */
2660 	s->timer_val[0] = core_ticks_to_us(adapter,
2661 		TIMERVALUE0_G(sge_params->sge_timer_value_0_and_1));
2662 	s->timer_val[1] = core_ticks_to_us(adapter,
2663 		TIMERVALUE1_G(sge_params->sge_timer_value_0_and_1));
2664 	s->timer_val[2] = core_ticks_to_us(adapter,
2665 		TIMERVALUE0_G(sge_params->sge_timer_value_2_and_3));
2666 	s->timer_val[3] = core_ticks_to_us(adapter,
2667 		TIMERVALUE1_G(sge_params->sge_timer_value_2_and_3));
2668 	s->timer_val[4] = core_ticks_to_us(adapter,
2669 		TIMERVALUE0_G(sge_params->sge_timer_value_4_and_5));
2670 	s->timer_val[5] = core_ticks_to_us(adapter,
2671 		TIMERVALUE1_G(sge_params->sge_timer_value_4_and_5));
2672 
2673 	s->counter_val[0] = THRESHOLD_0_G(sge_params->sge_ingress_rx_threshold);
2674 	s->counter_val[1] = THRESHOLD_1_G(sge_params->sge_ingress_rx_threshold);
2675 	s->counter_val[2] = THRESHOLD_2_G(sge_params->sge_ingress_rx_threshold);
2676 	s->counter_val[3] = THRESHOLD_3_G(sge_params->sge_ingress_rx_threshold);
2677 
2678 	/*
2679 	 * Grab our Virtual Interface resource allocation, extract the
2680 	 * features that we're interested in and do a bit of sanity testing on
2681 	 * what we discover.
2682 	 */
2683 	err = t4vf_get_vfres(adapter);
2684 	if (err) {
2685 		dev_err(adapter->pdev_dev, "unable to get virtual interface"
2686 			" resources: err=%d\n", err);
2687 		return err;
2688 	}
2689 
2690 	/* Check for various parameter sanity issues */
2691 	if (adapter->params.vfres.pmask == 0) {
2692 		dev_err(adapter->pdev_dev, "no port access configured\n"
2693 			"usable!\n");
2694 		return -EINVAL;
2695 	}
2696 	if (adapter->params.vfres.nvi == 0) {
2697 		dev_err(adapter->pdev_dev, "no virtual interfaces configured/"
2698 			"usable!\n");
2699 		return -EINVAL;
2700 	}
2701 
2702 	/* Initialize nports and max_ethqsets now that we have our Virtual
2703 	 * Function Resources.
2704 	 */
2705 	size_nports_qsets(adapter);
2706 
2707 	adapter->flags |= CXGB4VF_FW_OK;
2708 	return 0;
2709 }
2710 
2711 static inline void init_rspq(struct sge_rspq *rspq, u8 timer_idx,
2712 			     u8 pkt_cnt_idx, unsigned int size,
2713 			     unsigned int iqe_size)
2714 {
2715 	rspq->intr_params = (QINTR_TIMER_IDX_V(timer_idx) |
2716 			     (pkt_cnt_idx < SGE_NCOUNTERS ?
2717 			      QINTR_CNT_EN_F : 0));
2718 	rspq->pktcnt_idx = (pkt_cnt_idx < SGE_NCOUNTERS
2719 			    ? pkt_cnt_idx
2720 			    : 0);
2721 	rspq->iqe_len = iqe_size;
2722 	rspq->size = size;
2723 }
2724 
2725 /*
2726  * Perform default configuration of DMA queues depending on the number and
2727  * type of ports we found and the number of available CPUs.  Most settings can
2728  * be modified by the admin via ethtool and cxgbtool prior to the adapter
2729  * being brought up for the first time.
2730  */
2731 static void cfg_queues(struct adapter *adapter)
2732 {
2733 	struct sge *s = &adapter->sge;
2734 	int q10g, n10g, qidx, pidx, qs;
2735 	size_t iqe_size;
2736 
2737 	/*
2738 	 * We should not be called till we know how many Queue Sets we can
2739 	 * support.  In particular, this means that we need to know what kind
2740 	 * of interrupts we'll be using ...
2741 	 */
2742 	BUG_ON((adapter->flags &
2743 	       (CXGB4VF_USING_MSIX | CXGB4VF_USING_MSI)) == 0);
2744 
2745 	/*
2746 	 * Count the number of 10GbE Virtual Interfaces that we have.
2747 	 */
2748 	n10g = 0;
2749 	for_each_port(adapter, pidx)
2750 		n10g += is_x_10g_port(&adap2pinfo(adapter, pidx)->link_cfg);
2751 
2752 	/*
2753 	 * We default to 1 queue per non-10G port and up to # of cores queues
2754 	 * per 10G port.
2755 	 */
2756 	if (n10g == 0)
2757 		q10g = 0;
2758 	else {
2759 		int n1g = (adapter->params.nports - n10g);
2760 		q10g = (adapter->sge.max_ethqsets - n1g) / n10g;
2761 		if (q10g > num_online_cpus())
2762 			q10g = num_online_cpus();
2763 	}
2764 
2765 	/*
2766 	 * Allocate the "Queue Sets" to the various Virtual Interfaces.
2767 	 * The layout will be established in setup_sge_queues() when the
2768 	 * adapter is brough up for the first time.
2769 	 */
2770 	qidx = 0;
2771 	for_each_port(adapter, pidx) {
2772 		struct port_info *pi = adap2pinfo(adapter, pidx);
2773 
2774 		pi->first_qset = qidx;
2775 		pi->nqsets = is_x_10g_port(&pi->link_cfg) ? q10g : 1;
2776 		qidx += pi->nqsets;
2777 	}
2778 	s->ethqsets = qidx;
2779 
2780 	/*
2781 	 * The Ingress Queue Entry Size for our various Response Queues needs
2782 	 * to be big enough to accommodate the largest message we can receive
2783 	 * from the chip/firmware; which is 64 bytes ...
2784 	 */
2785 	iqe_size = 64;
2786 
2787 	/*
2788 	 * Set up default Queue Set parameters ...  Start off with the
2789 	 * shortest interrupt holdoff timer.
2790 	 */
2791 	for (qs = 0; qs < s->max_ethqsets; qs++) {
2792 		struct sge_eth_rxq *rxq = &s->ethrxq[qs];
2793 		struct sge_eth_txq *txq = &s->ethtxq[qs];
2794 
2795 		init_rspq(&rxq->rspq, 0, 0, 1024, iqe_size);
2796 		rxq->fl.size = 72;
2797 		txq->q.size = 1024;
2798 	}
2799 
2800 	/*
2801 	 * The firmware event queue is used for link state changes and
2802 	 * notifications of TX DMA completions.
2803 	 */
2804 	init_rspq(&s->fw_evtq, SGE_TIMER_RSTRT_CNTR, 0, 512, iqe_size);
2805 
2806 	/*
2807 	 * The forwarded interrupt queue is used when we're in MSI interrupt
2808 	 * mode.  In this mode all interrupts associated with RX queues will
2809 	 * be forwarded to a single queue which we'll associate with our MSI
2810 	 * interrupt vector.  The messages dropped in the forwarded interrupt
2811 	 * queue will indicate which ingress queue needs servicing ...  This
2812 	 * queue needs to be large enough to accommodate all of the ingress
2813 	 * queues which are forwarding their interrupt (+1 to prevent the PIDX
2814 	 * from equalling the CIDX if every ingress queue has an outstanding
2815 	 * interrupt).  The queue doesn't need to be any larger because no
2816 	 * ingress queue will ever have more than one outstanding interrupt at
2817 	 * any time ...
2818 	 */
2819 	init_rspq(&s->intrq, SGE_TIMER_RSTRT_CNTR, 0, MSIX_ENTRIES + 1,
2820 		  iqe_size);
2821 }
2822 
2823 /*
2824  * Reduce the number of Ethernet queues across all ports to at most n.
2825  * n provides at least one queue per port.
2826  */
2827 static void reduce_ethqs(struct adapter *adapter, int n)
2828 {
2829 	int i;
2830 	struct port_info *pi;
2831 
2832 	/*
2833 	 * While we have too many active Ether Queue Sets, interate across the
2834 	 * "ports" and reduce their individual Queue Set allocations.
2835 	 */
2836 	BUG_ON(n < adapter->params.nports);
2837 	while (n < adapter->sge.ethqsets)
2838 		for_each_port(adapter, i) {
2839 			pi = adap2pinfo(adapter, i);
2840 			if (pi->nqsets > 1) {
2841 				pi->nqsets--;
2842 				adapter->sge.ethqsets--;
2843 				if (adapter->sge.ethqsets <= n)
2844 					break;
2845 			}
2846 		}
2847 
2848 	/*
2849 	 * Reassign the starting Queue Sets for each of the "ports" ...
2850 	 */
2851 	n = 0;
2852 	for_each_port(adapter, i) {
2853 		pi = adap2pinfo(adapter, i);
2854 		pi->first_qset = n;
2855 		n += pi->nqsets;
2856 	}
2857 }
2858 
2859 /*
2860  * We need to grab enough MSI-X vectors to cover our interrupt needs.  Ideally
2861  * we get a separate MSI-X vector for every "Queue Set" plus any extras we
2862  * need.  Minimally we need one for every Virtual Interface plus those needed
2863  * for our "extras".  Note that this process may lower the maximum number of
2864  * allowed Queue Sets ...
2865  */
2866 static int enable_msix(struct adapter *adapter)
2867 {
2868 	int i, want, need, nqsets;
2869 	struct msix_entry entries[MSIX_ENTRIES];
2870 	struct sge *s = &adapter->sge;
2871 
2872 	for (i = 0; i < MSIX_ENTRIES; ++i)
2873 		entries[i].entry = i;
2874 
2875 	/*
2876 	 * We _want_ enough MSI-X interrupts to cover all of our "Queue Sets"
2877 	 * plus those needed for our "extras" (for example, the firmware
2878 	 * message queue).  We _need_ at least one "Queue Set" per Virtual
2879 	 * Interface plus those needed for our "extras".  So now we get to see
2880 	 * if the song is right ...
2881 	 */
2882 	want = s->max_ethqsets + MSIX_EXTRAS;
2883 	need = adapter->params.nports + MSIX_EXTRAS;
2884 
2885 	want = pci_enable_msix_range(adapter->pdev, entries, need, want);
2886 	if (want < 0)
2887 		return want;
2888 
2889 	nqsets = want - MSIX_EXTRAS;
2890 	if (nqsets < s->max_ethqsets) {
2891 		dev_warn(adapter->pdev_dev, "only enough MSI-X vectors"
2892 			 " for %d Queue Sets\n", nqsets);
2893 		s->max_ethqsets = nqsets;
2894 		if (nqsets < s->ethqsets)
2895 			reduce_ethqs(adapter, nqsets);
2896 	}
2897 	for (i = 0; i < want; ++i)
2898 		adapter->msix_info[i].vec = entries[i].vector;
2899 
2900 	return 0;
2901 }
2902 
2903 static const struct net_device_ops cxgb4vf_netdev_ops	= {
2904 	.ndo_open		= cxgb4vf_open,
2905 	.ndo_stop		= cxgb4vf_stop,
2906 	.ndo_start_xmit		= t4vf_eth_xmit,
2907 	.ndo_get_stats		= cxgb4vf_get_stats,
2908 	.ndo_set_rx_mode	= cxgb4vf_set_rxmode,
2909 	.ndo_set_mac_address	= cxgb4vf_set_mac_addr,
2910 	.ndo_validate_addr	= eth_validate_addr,
2911 	.ndo_do_ioctl		= cxgb4vf_do_ioctl,
2912 	.ndo_change_mtu		= cxgb4vf_change_mtu,
2913 	.ndo_fix_features	= cxgb4vf_fix_features,
2914 	.ndo_set_features	= cxgb4vf_set_features,
2915 #ifdef CONFIG_NET_POLL_CONTROLLER
2916 	.ndo_poll_controller	= cxgb4vf_poll_controller,
2917 #endif
2918 };
2919 
2920 /*
2921  * "Probe" a device: initialize a device and construct all kernel and driver
2922  * state needed to manage the device.  This routine is called "init_one" in
2923  * the PF Driver ...
2924  */
2925 static int cxgb4vf_pci_probe(struct pci_dev *pdev,
2926 			     const struct pci_device_id *ent)
2927 {
2928 	int pci_using_dac;
2929 	int err, pidx;
2930 	unsigned int pmask;
2931 	struct adapter *adapter;
2932 	struct port_info *pi;
2933 	struct net_device *netdev;
2934 	unsigned int pf;
2935 
2936 	/*
2937 	 * Initialize generic PCI device state.
2938 	 */
2939 	err = pci_enable_device(pdev);
2940 	if (err) {
2941 		dev_err(&pdev->dev, "cannot enable PCI device\n");
2942 		return err;
2943 	}
2944 
2945 	/*
2946 	 * Reserve PCI resources for the device.  If we can't get them some
2947 	 * other driver may have already claimed the device ...
2948 	 */
2949 	err = pci_request_regions(pdev, KBUILD_MODNAME);
2950 	if (err) {
2951 		dev_err(&pdev->dev, "cannot obtain PCI resources\n");
2952 		goto err_disable_device;
2953 	}
2954 
2955 	/*
2956 	 * Set up our DMA mask: try for 64-bit address masking first and
2957 	 * fall back to 32-bit if we can't get 64 bits ...
2958 	 */
2959 	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
2960 	if (err == 0) {
2961 		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
2962 		if (err) {
2963 			dev_err(&pdev->dev, "unable to obtain 64-bit DMA for"
2964 				" coherent allocations\n");
2965 			goto err_release_regions;
2966 		}
2967 		pci_using_dac = 1;
2968 	} else {
2969 		err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
2970 		if (err != 0) {
2971 			dev_err(&pdev->dev, "no usable DMA configuration\n");
2972 			goto err_release_regions;
2973 		}
2974 		pci_using_dac = 0;
2975 	}
2976 
2977 	/*
2978 	 * Enable bus mastering for the device ...
2979 	 */
2980 	pci_set_master(pdev);
2981 
2982 	/*
2983 	 * Allocate our adapter data structure and attach it to the device.
2984 	 */
2985 	adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
2986 	if (!adapter) {
2987 		err = -ENOMEM;
2988 		goto err_release_regions;
2989 	}
2990 	pci_set_drvdata(pdev, adapter);
2991 	adapter->pdev = pdev;
2992 	adapter->pdev_dev = &pdev->dev;
2993 
2994 	adapter->mbox_log = kzalloc(sizeof(*adapter->mbox_log) +
2995 				    (sizeof(struct mbox_cmd) *
2996 				     T4VF_OS_LOG_MBOX_CMDS),
2997 				    GFP_KERNEL);
2998 	if (!adapter->mbox_log) {
2999 		err = -ENOMEM;
3000 		goto err_free_adapter;
3001 	}
3002 	adapter->mbox_log->size = T4VF_OS_LOG_MBOX_CMDS;
3003 
3004 	/*
3005 	 * Initialize SMP data synchronization resources.
3006 	 */
3007 	spin_lock_init(&adapter->stats_lock);
3008 	spin_lock_init(&adapter->mbox_lock);
3009 	INIT_LIST_HEAD(&adapter->mlist.list);
3010 
3011 	/*
3012 	 * Map our I/O registers in BAR0.
3013 	 */
3014 	adapter->regs = pci_ioremap_bar(pdev, 0);
3015 	if (!adapter->regs) {
3016 		dev_err(&pdev->dev, "cannot map device registers\n");
3017 		err = -ENOMEM;
3018 		goto err_free_adapter;
3019 	}
3020 
3021 	/* Wait for the device to become ready before proceeding ...
3022 	 */
3023 	err = t4vf_prep_adapter(adapter);
3024 	if (err) {
3025 		dev_err(adapter->pdev_dev, "device didn't become ready:"
3026 			" err=%d\n", err);
3027 		goto err_unmap_bar0;
3028 	}
3029 
3030 	/* For T5 and later we want to use the new BAR-based User Doorbells,
3031 	 * so we need to map BAR2 here ...
3032 	 */
3033 	if (!is_t4(adapter->params.chip)) {
3034 		adapter->bar2 = ioremap_wc(pci_resource_start(pdev, 2),
3035 					   pci_resource_len(pdev, 2));
3036 		if (!adapter->bar2) {
3037 			dev_err(adapter->pdev_dev, "cannot map BAR2 doorbells\n");
3038 			err = -ENOMEM;
3039 			goto err_unmap_bar0;
3040 		}
3041 	}
3042 	/*
3043 	 * Initialize adapter level features.
3044 	 */
3045 	adapter->name = pci_name(pdev);
3046 	adapter->msg_enable = DFLT_MSG_ENABLE;
3047 
3048 	/* If possible, we use PCIe Relaxed Ordering Attribute to deliver
3049 	 * Ingress Packet Data to Free List Buffers in order to allow for
3050 	 * chipset performance optimizations between the Root Complex and
3051 	 * Memory Controllers.  (Messages to the associated Ingress Queue
3052 	 * notifying new Packet Placement in the Free Lists Buffers will be
3053 	 * send without the Relaxed Ordering Attribute thus guaranteeing that
3054 	 * all preceding PCIe Transaction Layer Packets will be processed
3055 	 * first.)  But some Root Complexes have various issues with Upstream
3056 	 * Transaction Layer Packets with the Relaxed Ordering Attribute set.
3057 	 * The PCIe devices which under the Root Complexes will be cleared the
3058 	 * Relaxed Ordering bit in the configuration space, So we check our
3059 	 * PCIe configuration space to see if it's flagged with advice against
3060 	 * using Relaxed Ordering.
3061 	 */
3062 	if (!pcie_relaxed_ordering_enabled(pdev))
3063 		adapter->flags |= CXGB4VF_ROOT_NO_RELAXED_ORDERING;
3064 
3065 	err = adap_init0(adapter);
3066 	if (err)
3067 		dev_err(&pdev->dev,
3068 			"Adapter initialization failed, error %d. Continuing in debug mode\n",
3069 			err);
3070 
3071 	/* Initialize hash mac addr list */
3072 	INIT_LIST_HEAD(&adapter->mac_hlist);
3073 
3074 	/*
3075 	 * Allocate our "adapter ports" and stitch everything together.
3076 	 */
3077 	pmask = adapter->params.vfres.pmask;
3078 	pf = t4vf_get_pf_from_vf(adapter);
3079 	for_each_port(adapter, pidx) {
3080 		int port_id, viid;
3081 		u8 mac[ETH_ALEN];
3082 		unsigned int naddr = 1;
3083 
3084 		/*
3085 		 * We simplistically allocate our virtual interfaces
3086 		 * sequentially across the port numbers to which we have
3087 		 * access rights.  This should be configurable in some manner
3088 		 * ...
3089 		 */
3090 		if (pmask == 0)
3091 			break;
3092 		port_id = ffs(pmask) - 1;
3093 		pmask &= ~(1 << port_id);
3094 
3095 		/*
3096 		 * Allocate our network device and stitch things together.
3097 		 */
3098 		netdev = alloc_etherdev_mq(sizeof(struct port_info),
3099 					   MAX_PORT_QSETS);
3100 		if (netdev == NULL) {
3101 			err = -ENOMEM;
3102 			goto err_free_dev;
3103 		}
3104 		adapter->port[pidx] = netdev;
3105 		SET_NETDEV_DEV(netdev, &pdev->dev);
3106 		pi = netdev_priv(netdev);
3107 		pi->adapter = adapter;
3108 		pi->pidx = pidx;
3109 		pi->port_id = port_id;
3110 
3111 		/*
3112 		 * Initialize the starting state of our "port" and register
3113 		 * it.
3114 		 */
3115 		pi->xact_addr_filt = -1;
3116 		netdev->irq = pdev->irq;
3117 
3118 		netdev->hw_features = NETIF_F_SG | TSO_FLAGS | NETIF_F_GRO |
3119 			NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
3120 			NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX;
3121 		netdev->features = netdev->hw_features;
3122 		if (pci_using_dac)
3123 			netdev->features |= NETIF_F_HIGHDMA;
3124 		netdev->vlan_features = netdev->features & VLAN_FEAT;
3125 
3126 		netdev->priv_flags |= IFF_UNICAST_FLT;
3127 		netdev->min_mtu = 81;
3128 		netdev->max_mtu = ETH_MAX_MTU;
3129 
3130 		netdev->netdev_ops = &cxgb4vf_netdev_ops;
3131 		netdev->ethtool_ops = &cxgb4vf_ethtool_ops;
3132 		netdev->dev_port = pi->port_id;
3133 
3134 		/*
3135 		 * If we haven't been able to contact the firmware, there's
3136 		 * nothing else we can do for this "port" ...
3137 		 */
3138 		if (!(adapter->flags & CXGB4VF_FW_OK))
3139 			continue;
3140 
3141 		viid = t4vf_alloc_vi(adapter, port_id);
3142 		if (viid < 0) {
3143 			dev_err(&pdev->dev,
3144 				"cannot allocate VI for port %d: err=%d\n",
3145 				port_id, viid);
3146 			err = viid;
3147 			goto err_free_dev;
3148 		}
3149 		pi->viid = viid;
3150 
3151 		/*
3152 		 * Initialize the hardware/software state for the port.
3153 		 */
3154 		err = t4vf_port_init(adapter, pidx);
3155 		if (err) {
3156 			dev_err(&pdev->dev, "cannot initialize port %d\n",
3157 				pidx);
3158 			goto err_free_dev;
3159 		}
3160 
3161 		err = t4vf_get_vf_mac_acl(adapter, pf, &naddr, mac);
3162 		if (err) {
3163 			dev_err(&pdev->dev,
3164 				"unable to determine MAC ACL address, "
3165 				"continuing anyway.. (status %d)\n", err);
3166 		} else if (naddr && adapter->params.vfres.nvi == 1) {
3167 			struct sockaddr addr;
3168 
3169 			ether_addr_copy(addr.sa_data, mac);
3170 			err = cxgb4vf_set_mac_addr(netdev, &addr);
3171 			if (err) {
3172 				dev_err(&pdev->dev,
3173 					"unable to set MAC address %pM\n",
3174 					mac);
3175 				goto err_free_dev;
3176 			}
3177 			dev_info(&pdev->dev,
3178 				 "Using assigned MAC ACL: %pM\n", mac);
3179 		}
3180 	}
3181 
3182 	/* See what interrupts we'll be using.  If we've been configured to
3183 	 * use MSI-X interrupts, try to enable them but fall back to using
3184 	 * MSI interrupts if we can't enable MSI-X interrupts.  If we can't
3185 	 * get MSI interrupts we bail with the error.
3186 	 */
3187 	if (msi == MSI_MSIX && enable_msix(adapter) == 0)
3188 		adapter->flags |= CXGB4VF_USING_MSIX;
3189 	else {
3190 		if (msi == MSI_MSIX) {
3191 			dev_info(adapter->pdev_dev,
3192 				 "Unable to use MSI-X Interrupts; falling "
3193 				 "back to MSI Interrupts\n");
3194 
3195 			/* We're going to need a Forwarded Interrupt Queue so
3196 			 * that may cut into how many Queue Sets we can
3197 			 * support.
3198 			 */
3199 			msi = MSI_MSI;
3200 			size_nports_qsets(adapter);
3201 		}
3202 		err = pci_enable_msi(pdev);
3203 		if (err) {
3204 			dev_err(&pdev->dev, "Unable to allocate MSI Interrupts;"
3205 				" err=%d\n", err);
3206 			goto err_free_dev;
3207 		}
3208 		adapter->flags |= CXGB4VF_USING_MSI;
3209 	}
3210 
3211 	/* Now that we know how many "ports" we have and what interrupt
3212 	 * mechanism we're going to use, we can configure our queue resources.
3213 	 */
3214 	cfg_queues(adapter);
3215 
3216 	/*
3217 	 * The "card" is now ready to go.  If any errors occur during device
3218 	 * registration we do not fail the whole "card" but rather proceed
3219 	 * only with the ports we manage to register successfully.  However we
3220 	 * must register at least one net device.
3221 	 */
3222 	for_each_port(adapter, pidx) {
3223 		struct port_info *pi = netdev_priv(adapter->port[pidx]);
3224 		netdev = adapter->port[pidx];
3225 		if (netdev == NULL)
3226 			continue;
3227 
3228 		netif_set_real_num_tx_queues(netdev, pi->nqsets);
3229 		netif_set_real_num_rx_queues(netdev, pi->nqsets);
3230 
3231 		err = register_netdev(netdev);
3232 		if (err) {
3233 			dev_warn(&pdev->dev, "cannot register net device %s,"
3234 				 " skipping\n", netdev->name);
3235 			continue;
3236 		}
3237 
3238 		netif_carrier_off(netdev);
3239 		set_bit(pidx, &adapter->registered_device_map);
3240 	}
3241 	if (adapter->registered_device_map == 0) {
3242 		dev_err(&pdev->dev, "could not register any net devices\n");
3243 		goto err_disable_interrupts;
3244 	}
3245 
3246 	/*
3247 	 * Set up our debugfs entries.
3248 	 */
3249 	if (!IS_ERR_OR_NULL(cxgb4vf_debugfs_root)) {
3250 		adapter->debugfs_root =
3251 			debugfs_create_dir(pci_name(pdev),
3252 					   cxgb4vf_debugfs_root);
3253 		setup_debugfs(adapter);
3254 	}
3255 
3256 	/*
3257 	 * Print a short notice on the existence and configuration of the new
3258 	 * VF network device ...
3259 	 */
3260 	for_each_port(adapter, pidx) {
3261 		dev_info(adapter->pdev_dev, "%s: Chelsio VF NIC PCIe %s\n",
3262 			 adapter->port[pidx]->name,
3263 			 (adapter->flags & CXGB4VF_USING_MSIX) ? "MSI-X" :
3264 			 (adapter->flags & CXGB4VF_USING_MSI)  ? "MSI" : "");
3265 	}
3266 
3267 	/*
3268 	 * Return success!
3269 	 */
3270 	return 0;
3271 
3272 	/*
3273 	 * Error recovery and exit code.  Unwind state that's been created
3274 	 * so far and return the error.
3275 	 */
3276 err_disable_interrupts:
3277 	if (adapter->flags & CXGB4VF_USING_MSIX) {
3278 		pci_disable_msix(adapter->pdev);
3279 		adapter->flags &= ~CXGB4VF_USING_MSIX;
3280 	} else if (adapter->flags & CXGB4VF_USING_MSI) {
3281 		pci_disable_msi(adapter->pdev);
3282 		adapter->flags &= ~CXGB4VF_USING_MSI;
3283 	}
3284 
3285 err_free_dev:
3286 	for_each_port(adapter, pidx) {
3287 		netdev = adapter->port[pidx];
3288 		if (netdev == NULL)
3289 			continue;
3290 		pi = netdev_priv(netdev);
3291 		if (pi->viid)
3292 			t4vf_free_vi(adapter, pi->viid);
3293 		if (test_bit(pidx, &adapter->registered_device_map))
3294 			unregister_netdev(netdev);
3295 		free_netdev(netdev);
3296 	}
3297 
3298 	if (!is_t4(adapter->params.chip))
3299 		iounmap(adapter->bar2);
3300 
3301 err_unmap_bar0:
3302 	iounmap(adapter->regs);
3303 
3304 err_free_adapter:
3305 	kfree(adapter->mbox_log);
3306 	kfree(adapter);
3307 
3308 err_release_regions:
3309 	pci_release_regions(pdev);
3310 	pci_clear_master(pdev);
3311 
3312 err_disable_device:
3313 	pci_disable_device(pdev);
3314 
3315 	return err;
3316 }
3317 
3318 /*
3319  * "Remove" a device: tear down all kernel and driver state created in the
3320  * "probe" routine and quiesce the device (disable interrupts, etc.).  (Note
3321  * that this is called "remove_one" in the PF Driver.)
3322  */
3323 static void cxgb4vf_pci_remove(struct pci_dev *pdev)
3324 {
3325 	struct adapter *adapter = pci_get_drvdata(pdev);
3326 	struct hash_mac_addr *entry, *tmp;
3327 
3328 	/*
3329 	 * Tear down driver state associated with device.
3330 	 */
3331 	if (adapter) {
3332 		int pidx;
3333 
3334 		/*
3335 		 * Stop all of our activity.  Unregister network port,
3336 		 * disable interrupts, etc.
3337 		 */
3338 		for_each_port(adapter, pidx)
3339 			if (test_bit(pidx, &adapter->registered_device_map))
3340 				unregister_netdev(adapter->port[pidx]);
3341 		t4vf_sge_stop(adapter);
3342 		if (adapter->flags & CXGB4VF_USING_MSIX) {
3343 			pci_disable_msix(adapter->pdev);
3344 			adapter->flags &= ~CXGB4VF_USING_MSIX;
3345 		} else if (adapter->flags & CXGB4VF_USING_MSI) {
3346 			pci_disable_msi(adapter->pdev);
3347 			adapter->flags &= ~CXGB4VF_USING_MSI;
3348 		}
3349 
3350 		/*
3351 		 * Tear down our debugfs entries.
3352 		 */
3353 		if (!IS_ERR_OR_NULL(adapter->debugfs_root)) {
3354 			cleanup_debugfs(adapter);
3355 			debugfs_remove_recursive(adapter->debugfs_root);
3356 		}
3357 
3358 		/*
3359 		 * Free all of the various resources which we've acquired ...
3360 		 */
3361 		t4vf_free_sge_resources(adapter);
3362 		for_each_port(adapter, pidx) {
3363 			struct net_device *netdev = adapter->port[pidx];
3364 			struct port_info *pi;
3365 
3366 			if (netdev == NULL)
3367 				continue;
3368 
3369 			pi = netdev_priv(netdev);
3370 			if (pi->viid)
3371 				t4vf_free_vi(adapter, pi->viid);
3372 			free_netdev(netdev);
3373 		}
3374 		iounmap(adapter->regs);
3375 		if (!is_t4(adapter->params.chip))
3376 			iounmap(adapter->bar2);
3377 		kfree(adapter->mbox_log);
3378 		list_for_each_entry_safe(entry, tmp, &adapter->mac_hlist,
3379 					 list) {
3380 			list_del(&entry->list);
3381 			kfree(entry);
3382 		}
3383 		kfree(adapter);
3384 	}
3385 
3386 	/*
3387 	 * Disable the device and release its PCI resources.
3388 	 */
3389 	pci_disable_device(pdev);
3390 	pci_clear_master(pdev);
3391 	pci_release_regions(pdev);
3392 }
3393 
3394 /*
3395  * "Shutdown" quiesce the device, stopping Ingress Packet and Interrupt
3396  * delivery.
3397  */
3398 static void cxgb4vf_pci_shutdown(struct pci_dev *pdev)
3399 {
3400 	struct adapter *adapter;
3401 	int pidx;
3402 
3403 	adapter = pci_get_drvdata(pdev);
3404 	if (!adapter)
3405 		return;
3406 
3407 	/* Disable all Virtual Interfaces.  This will shut down the
3408 	 * delivery of all ingress packets into the chip for these
3409 	 * Virtual Interfaces.
3410 	 */
3411 	for_each_port(adapter, pidx)
3412 		if (test_bit(pidx, &adapter->registered_device_map))
3413 			unregister_netdev(adapter->port[pidx]);
3414 
3415 	/* Free up all Queues which will prevent further DMA and
3416 	 * Interrupts allowing various internal pathways to drain.
3417 	 */
3418 	t4vf_sge_stop(adapter);
3419 	if (adapter->flags & CXGB4VF_USING_MSIX) {
3420 		pci_disable_msix(adapter->pdev);
3421 		adapter->flags &= ~CXGB4VF_USING_MSIX;
3422 	} else if (adapter->flags & CXGB4VF_USING_MSI) {
3423 		pci_disable_msi(adapter->pdev);
3424 		adapter->flags &= ~CXGB4VF_USING_MSI;
3425 	}
3426 
3427 	/*
3428 	 * Free up all Queues which will prevent further DMA and
3429 	 * Interrupts allowing various internal pathways to drain.
3430 	 */
3431 	t4vf_free_sge_resources(adapter);
3432 	pci_set_drvdata(pdev, NULL);
3433 }
3434 
3435 /* Macros needed to support the PCI Device ID Table ...
3436  */
3437 #define CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN \
3438 	static const struct pci_device_id cxgb4vf_pci_tbl[] = {
3439 #define CH_PCI_DEVICE_ID_FUNCTION	0x8
3440 
3441 #define CH_PCI_ID_TABLE_ENTRY(devid) \
3442 		{ PCI_VDEVICE(CHELSIO, (devid)), 0 }
3443 
3444 #define CH_PCI_DEVICE_ID_TABLE_DEFINE_END { 0, } }
3445 
3446 #include "../cxgb4/t4_pci_id_tbl.h"
3447 
3448 MODULE_DESCRIPTION(DRV_DESC);
3449 MODULE_AUTHOR("Chelsio Communications");
3450 MODULE_LICENSE("Dual BSD/GPL");
3451 MODULE_DEVICE_TABLE(pci, cxgb4vf_pci_tbl);
3452 
3453 static struct pci_driver cxgb4vf_driver = {
3454 	.name		= KBUILD_MODNAME,
3455 	.id_table	= cxgb4vf_pci_tbl,
3456 	.probe		= cxgb4vf_pci_probe,
3457 	.remove		= cxgb4vf_pci_remove,
3458 	.shutdown	= cxgb4vf_pci_shutdown,
3459 };
3460 
3461 /*
3462  * Initialize global driver state.
3463  */
3464 static int __init cxgb4vf_module_init(void)
3465 {
3466 	int ret;
3467 
3468 	/*
3469 	 * Vet our module parameters.
3470 	 */
3471 	if (msi != MSI_MSIX && msi != MSI_MSI) {
3472 		pr_warn("bad module parameter msi=%d; must be %d (MSI-X or MSI) or %d (MSI)\n",
3473 			msi, MSI_MSIX, MSI_MSI);
3474 		return -EINVAL;
3475 	}
3476 
3477 	/* Debugfs support is optional, debugfs will warn if this fails */
3478 	cxgb4vf_debugfs_root = debugfs_create_dir(KBUILD_MODNAME, NULL);
3479 
3480 	ret = pci_register_driver(&cxgb4vf_driver);
3481 	if (ret < 0)
3482 		debugfs_remove(cxgb4vf_debugfs_root);
3483 	return ret;
3484 }
3485 
3486 /*
3487  * Tear down global driver state.
3488  */
3489 static void __exit cxgb4vf_module_exit(void)
3490 {
3491 	pci_unregister_driver(&cxgb4vf_driver);
3492 	debugfs_remove(cxgb4vf_debugfs_root);
3493 }
3494 
3495 module_init(cxgb4vf_module_init);
3496 module_exit(cxgb4vf_module_exit);
3497