1 /*
2  * This file is part of the Chelsio T4 PCI-E SR-IOV Virtual Function Ethernet
3  * driver for Linux.
4  *
5  * Copyright (c) 2009-2010 Chelsio Communications, Inc. All rights reserved.
6  *
7  * This software is available to you under a choice of one of two
8  * licenses.  You may choose to be licensed under the terms of the GNU
9  * General Public License (GPL) Version 2, available from the file
10  * COPYING in the main directory of this source tree, or the
11  * OpenIB.org BSD license below:
12  *
13  *     Redistribution and use in source and binary forms, with or
14  *     without modification, are permitted provided that the following
15  *     conditions are met:
16  *
17  *      - Redistributions of source code must retain the above
18  *        copyright notice, this list of conditions and the following
19  *        disclaimer.
20  *
21  *      - Redistributions in binary form must reproduce the above
22  *        copyright notice, this list of conditions and the following
23  *        disclaimer in the documentation and/or other materials
24  *        provided with the distribution.
25  *
26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33  * SOFTWARE.
34  */
35 
36 #include <linux/module.h>
37 #include <linux/moduleparam.h>
38 #include <linux/init.h>
39 #include <linux/pci.h>
40 #include <linux/dma-mapping.h>
41 #include <linux/netdevice.h>
42 #include <linux/etherdevice.h>
43 #include <linux/debugfs.h>
44 #include <linux/ethtool.h>
45 
46 #include "t4vf_common.h"
47 #include "t4vf_defs.h"
48 
49 #include "../cxgb4/t4_regs.h"
50 #include "../cxgb4/t4_msg.h"
51 
52 /*
53  * Generic information about the driver.
54  */
55 #define DRV_VERSION "1.0.0"
56 #define DRV_DESC "Chelsio T4 Virtual Function (VF) Network Driver"
57 
58 /*
59  * Module Parameters.
60  * ==================
61  */
62 
63 /*
64  * Default ethtool "message level" for adapters.
65  */
66 #define DFLT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \
67 			 NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP |\
68 			 NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR)
69 
70 static int dflt_msg_enable = DFLT_MSG_ENABLE;
71 
72 module_param(dflt_msg_enable, int, 0644);
73 MODULE_PARM_DESC(dflt_msg_enable,
74 		 "default adapter ethtool message level bitmap");
75 
76 /*
77  * The driver uses the best interrupt scheme available on a platform in the
78  * order MSI-X then MSI.  This parameter determines which of these schemes the
79  * driver may consider as follows:
80  *
81  *     msi = 2: choose from among MSI-X and MSI
82  *     msi = 1: only consider MSI interrupts
83  *
84  * Note that unlike the Physical Function driver, this Virtual Function driver
85  * does _not_ support legacy INTx interrupts (this limitation is mandated by
86  * the PCI-E SR-IOV standard).
87  */
88 #define MSI_MSIX	2
89 #define MSI_MSI		1
90 #define MSI_DEFAULT	MSI_MSIX
91 
92 static int msi = MSI_DEFAULT;
93 
94 module_param(msi, int, 0644);
95 MODULE_PARM_DESC(msi, "whether to use MSI-X or MSI");
96 
97 /*
98  * Fundamental constants.
99  * ======================
100  */
101 
102 enum {
103 	MAX_TXQ_ENTRIES		= 16384,
104 	MAX_RSPQ_ENTRIES	= 16384,
105 	MAX_RX_BUFFERS		= 16384,
106 
107 	MIN_TXQ_ENTRIES		= 32,
108 	MIN_RSPQ_ENTRIES	= 128,
109 	MIN_FL_ENTRIES		= 16,
110 
111 	/*
112 	 * For purposes of manipulating the Free List size we need to
113 	 * recognize that Free Lists are actually Egress Queues (the host
114 	 * produces free buffers which the hardware consumes), Egress Queues
115 	 * indices are all in units of Egress Context Units bytes, and free
116 	 * list entries are 64-bit PCI DMA addresses.  And since the state of
117 	 * the Producer Index == the Consumer Index implies an EMPTY list, we
118 	 * always have at least one Egress Unit's worth of Free List entries
119 	 * unused.  See sge.c for more details ...
120 	 */
121 	EQ_UNIT = SGE_EQ_IDXSIZE,
122 	FL_PER_EQ_UNIT = EQ_UNIT / sizeof(__be64),
123 	MIN_FL_RESID = FL_PER_EQ_UNIT,
124 };
125 
126 /*
127  * Global driver state.
128  * ====================
129  */
130 
131 static struct dentry *cxgb4vf_debugfs_root;
132 
133 /*
134  * OS "Callback" functions.
135  * ========================
136  */
137 
138 /*
139  * The link status has changed on the indicated "port" (Virtual Interface).
140  */
141 void t4vf_os_link_changed(struct adapter *adapter, int pidx, int link_ok)
142 {
143 	struct net_device *dev = adapter->port[pidx];
144 
145 	/*
146 	 * If the port is disabled or the current recorded "link up"
147 	 * status matches the new status, just return.
148 	 */
149 	if (!netif_running(dev) || link_ok == netif_carrier_ok(dev))
150 		return;
151 
152 	/*
153 	 * Tell the OS that the link status has changed and print a short
154 	 * informative message on the console about the event.
155 	 */
156 	if (link_ok) {
157 		const char *s;
158 		const char *fc;
159 		const struct port_info *pi = netdev_priv(dev);
160 
161 		netif_carrier_on(dev);
162 
163 		switch (pi->link_cfg.speed) {
164 		case SPEED_10000:
165 			s = "10Gbps";
166 			break;
167 
168 		case SPEED_1000:
169 			s = "1000Mbps";
170 			break;
171 
172 		case SPEED_100:
173 			s = "100Mbps";
174 			break;
175 
176 		default:
177 			s = "unknown";
178 			break;
179 		}
180 
181 		switch (pi->link_cfg.fc) {
182 		case PAUSE_RX:
183 			fc = "RX";
184 			break;
185 
186 		case PAUSE_TX:
187 			fc = "TX";
188 			break;
189 
190 		case PAUSE_RX|PAUSE_TX:
191 			fc = "RX/TX";
192 			break;
193 
194 		default:
195 			fc = "no";
196 			break;
197 		}
198 
199 		printk(KERN_INFO "%s: link up, %s, full-duplex, %s PAUSE\n",
200 		       dev->name, s, fc);
201 	} else {
202 		netif_carrier_off(dev);
203 		printk(KERN_INFO "%s: link down\n", dev->name);
204 	}
205 }
206 
207 /*
208  * Net device operations.
209  * ======================
210  */
211 
212 
213 
214 
215 /*
216  * Perform the MAC and PHY actions needed to enable a "port" (Virtual
217  * Interface).
218  */
219 static int link_start(struct net_device *dev)
220 {
221 	int ret;
222 	struct port_info *pi = netdev_priv(dev);
223 
224 	/*
225 	 * We do not set address filters and promiscuity here, the stack does
226 	 * that step explicitly. Enable vlan accel.
227 	 */
228 	ret = t4vf_set_rxmode(pi->adapter, pi->viid, dev->mtu, -1, -1, -1, 1,
229 			      true);
230 	if (ret == 0) {
231 		ret = t4vf_change_mac(pi->adapter, pi->viid,
232 				      pi->xact_addr_filt, dev->dev_addr, true);
233 		if (ret >= 0) {
234 			pi->xact_addr_filt = ret;
235 			ret = 0;
236 		}
237 	}
238 
239 	/*
240 	 * We don't need to actually "start the link" itself since the
241 	 * firmware will do that for us when the first Virtual Interface
242 	 * is enabled on a port.
243 	 */
244 	if (ret == 0)
245 		ret = t4vf_enable_vi(pi->adapter, pi->viid, true, true);
246 	return ret;
247 }
248 
249 /*
250  * Name the MSI-X interrupts.
251  */
252 static void name_msix_vecs(struct adapter *adapter)
253 {
254 	int namelen = sizeof(adapter->msix_info[0].desc) - 1;
255 	int pidx;
256 
257 	/*
258 	 * Firmware events.
259 	 */
260 	snprintf(adapter->msix_info[MSIX_FW].desc, namelen,
261 		 "%s-FWeventq", adapter->name);
262 	adapter->msix_info[MSIX_FW].desc[namelen] = 0;
263 
264 	/*
265 	 * Ethernet queues.
266 	 */
267 	for_each_port(adapter, pidx) {
268 		struct net_device *dev = adapter->port[pidx];
269 		const struct port_info *pi = netdev_priv(dev);
270 		int qs, msi;
271 
272 		for (qs = 0, msi = MSIX_IQFLINT; qs < pi->nqsets; qs++, msi++) {
273 			snprintf(adapter->msix_info[msi].desc, namelen,
274 				 "%s-%d", dev->name, qs);
275 			adapter->msix_info[msi].desc[namelen] = 0;
276 		}
277 	}
278 }
279 
280 /*
281  * Request all of our MSI-X resources.
282  */
283 static int request_msix_queue_irqs(struct adapter *adapter)
284 {
285 	struct sge *s = &adapter->sge;
286 	int rxq, msi, err;
287 
288 	/*
289 	 * Firmware events.
290 	 */
291 	err = request_irq(adapter->msix_info[MSIX_FW].vec, t4vf_sge_intr_msix,
292 			  0, adapter->msix_info[MSIX_FW].desc, &s->fw_evtq);
293 	if (err)
294 		return err;
295 
296 	/*
297 	 * Ethernet queues.
298 	 */
299 	msi = MSIX_IQFLINT;
300 	for_each_ethrxq(s, rxq) {
301 		err = request_irq(adapter->msix_info[msi].vec,
302 				  t4vf_sge_intr_msix, 0,
303 				  adapter->msix_info[msi].desc,
304 				  &s->ethrxq[rxq].rspq);
305 		if (err)
306 			goto err_free_irqs;
307 		msi++;
308 	}
309 	return 0;
310 
311 err_free_irqs:
312 	while (--rxq >= 0)
313 		free_irq(adapter->msix_info[--msi].vec, &s->ethrxq[rxq].rspq);
314 	free_irq(adapter->msix_info[MSIX_FW].vec, &s->fw_evtq);
315 	return err;
316 }
317 
318 /*
319  * Free our MSI-X resources.
320  */
321 static void free_msix_queue_irqs(struct adapter *adapter)
322 {
323 	struct sge *s = &adapter->sge;
324 	int rxq, msi;
325 
326 	free_irq(adapter->msix_info[MSIX_FW].vec, &s->fw_evtq);
327 	msi = MSIX_IQFLINT;
328 	for_each_ethrxq(s, rxq)
329 		free_irq(adapter->msix_info[msi++].vec,
330 			 &s->ethrxq[rxq].rspq);
331 }
332 
333 /*
334  * Turn on NAPI and start up interrupts on a response queue.
335  */
336 static void qenable(struct sge_rspq *rspq)
337 {
338 	napi_enable(&rspq->napi);
339 
340 	/*
341 	 * 0-increment the Going To Sleep register to start the timer and
342 	 * enable interrupts.
343 	 */
344 	t4_write_reg(rspq->adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS,
345 		     CIDXINC(0) |
346 		     SEINTARM(rspq->intr_params) |
347 		     INGRESSQID(rspq->cntxt_id));
348 }
349 
350 /*
351  * Enable NAPI scheduling and interrupt generation for all Receive Queues.
352  */
353 static void enable_rx(struct adapter *adapter)
354 {
355 	int rxq;
356 	struct sge *s = &adapter->sge;
357 
358 	for_each_ethrxq(s, rxq)
359 		qenable(&s->ethrxq[rxq].rspq);
360 	qenable(&s->fw_evtq);
361 
362 	/*
363 	 * The interrupt queue doesn't use NAPI so we do the 0-increment of
364 	 * its Going To Sleep register here to get it started.
365 	 */
366 	if (adapter->flags & USING_MSI)
367 		t4_write_reg(adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS,
368 			     CIDXINC(0) |
369 			     SEINTARM(s->intrq.intr_params) |
370 			     INGRESSQID(s->intrq.cntxt_id));
371 
372 }
373 
374 /*
375  * Wait until all NAPI handlers are descheduled.
376  */
377 static void quiesce_rx(struct adapter *adapter)
378 {
379 	struct sge *s = &adapter->sge;
380 	int rxq;
381 
382 	for_each_ethrxq(s, rxq)
383 		napi_disable(&s->ethrxq[rxq].rspq.napi);
384 	napi_disable(&s->fw_evtq.napi);
385 }
386 
387 /*
388  * Response queue handler for the firmware event queue.
389  */
390 static int fwevtq_handler(struct sge_rspq *rspq, const __be64 *rsp,
391 			  const struct pkt_gl *gl)
392 {
393 	/*
394 	 * Extract response opcode and get pointer to CPL message body.
395 	 */
396 	struct adapter *adapter = rspq->adapter;
397 	u8 opcode = ((const struct rss_header *)rsp)->opcode;
398 	void *cpl = (void *)(rsp + 1);
399 
400 	switch (opcode) {
401 	case CPL_FW6_MSG: {
402 		/*
403 		 * We've received an asynchronous message from the firmware.
404 		 */
405 		const struct cpl_fw6_msg *fw_msg = cpl;
406 		if (fw_msg->type == FW6_TYPE_CMD_RPL)
407 			t4vf_handle_fw_rpl(adapter, fw_msg->data);
408 		break;
409 	}
410 
411 	case CPL_SGE_EGR_UPDATE: {
412 		/*
413 		 * We've received an Egress Queue Status Update message.  We
414 		 * get these, if the SGE is configured to send these when the
415 		 * firmware passes certain points in processing our TX
416 		 * Ethernet Queue or if we make an explicit request for one.
417 		 * We use these updates to determine when we may need to
418 		 * restart a TX Ethernet Queue which was stopped for lack of
419 		 * free TX Queue Descriptors ...
420 		 */
421 		const struct cpl_sge_egr_update *p = cpl;
422 		unsigned int qid = EGR_QID(be32_to_cpu(p->opcode_qid));
423 		struct sge *s = &adapter->sge;
424 		struct sge_txq *tq;
425 		struct sge_eth_txq *txq;
426 		unsigned int eq_idx;
427 
428 		/*
429 		 * Perform sanity checking on the Queue ID to make sure it
430 		 * really refers to one of our TX Ethernet Egress Queues which
431 		 * is active and matches the queue's ID.  None of these error
432 		 * conditions should ever happen so we may want to either make
433 		 * them fatal and/or conditionalized under DEBUG.
434 		 */
435 		eq_idx = EQ_IDX(s, qid);
436 		if (unlikely(eq_idx >= MAX_EGRQ)) {
437 			dev_err(adapter->pdev_dev,
438 				"Egress Update QID %d out of range\n", qid);
439 			break;
440 		}
441 		tq = s->egr_map[eq_idx];
442 		if (unlikely(tq == NULL)) {
443 			dev_err(adapter->pdev_dev,
444 				"Egress Update QID %d TXQ=NULL\n", qid);
445 			break;
446 		}
447 		txq = container_of(tq, struct sge_eth_txq, q);
448 		if (unlikely(tq->abs_id != qid)) {
449 			dev_err(adapter->pdev_dev,
450 				"Egress Update QID %d refers to TXQ %d\n",
451 				qid, tq->abs_id);
452 			break;
453 		}
454 
455 		/*
456 		 * Restart a stopped TX Queue which has less than half of its
457 		 * TX ring in use ...
458 		 */
459 		txq->q.restarts++;
460 		netif_tx_wake_queue(txq->txq);
461 		break;
462 	}
463 
464 	default:
465 		dev_err(adapter->pdev_dev,
466 			"unexpected CPL %#x on FW event queue\n", opcode);
467 	}
468 
469 	return 0;
470 }
471 
472 /*
473  * Allocate SGE TX/RX response queues.  Determine how many sets of SGE queues
474  * to use and initializes them.  We support multiple "Queue Sets" per port if
475  * we have MSI-X, otherwise just one queue set per port.
476  */
477 static int setup_sge_queues(struct adapter *adapter)
478 {
479 	struct sge *s = &adapter->sge;
480 	int err, pidx, msix;
481 
482 	/*
483 	 * Clear "Queue Set" Free List Starving and TX Queue Mapping Error
484 	 * state.
485 	 */
486 	bitmap_zero(s->starving_fl, MAX_EGRQ);
487 
488 	/*
489 	 * If we're using MSI interrupt mode we need to set up a "forwarded
490 	 * interrupt" queue which we'll set up with our MSI vector.  The rest
491 	 * of the ingress queues will be set up to forward their interrupts to
492 	 * this queue ...  This must be first since t4vf_sge_alloc_rxq() uses
493 	 * the intrq's queue ID as the interrupt forwarding queue for the
494 	 * subsequent calls ...
495 	 */
496 	if (adapter->flags & USING_MSI) {
497 		err = t4vf_sge_alloc_rxq(adapter, &s->intrq, false,
498 					 adapter->port[0], 0, NULL, NULL);
499 		if (err)
500 			goto err_free_queues;
501 	}
502 
503 	/*
504 	 * Allocate our ingress queue for asynchronous firmware messages.
505 	 */
506 	err = t4vf_sge_alloc_rxq(adapter, &s->fw_evtq, true, adapter->port[0],
507 				 MSIX_FW, NULL, fwevtq_handler);
508 	if (err)
509 		goto err_free_queues;
510 
511 	/*
512 	 * Allocate each "port"'s initial Queue Sets.  These can be changed
513 	 * later on ... up to the point where any interface on the adapter is
514 	 * brought up at which point lots of things get nailed down
515 	 * permanently ...
516 	 */
517 	msix = MSIX_IQFLINT;
518 	for_each_port(adapter, pidx) {
519 		struct net_device *dev = adapter->port[pidx];
520 		struct port_info *pi = netdev_priv(dev);
521 		struct sge_eth_rxq *rxq = &s->ethrxq[pi->first_qset];
522 		struct sge_eth_txq *txq = &s->ethtxq[pi->first_qset];
523 		int qs;
524 
525 		for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
526 			err = t4vf_sge_alloc_rxq(adapter, &rxq->rspq, false,
527 						 dev, msix++,
528 						 &rxq->fl, t4vf_ethrx_handler);
529 			if (err)
530 				goto err_free_queues;
531 
532 			err = t4vf_sge_alloc_eth_txq(adapter, txq, dev,
533 					     netdev_get_tx_queue(dev, qs),
534 					     s->fw_evtq.cntxt_id);
535 			if (err)
536 				goto err_free_queues;
537 
538 			rxq->rspq.idx = qs;
539 			memset(&rxq->stats, 0, sizeof(rxq->stats));
540 		}
541 	}
542 
543 	/*
544 	 * Create the reverse mappings for the queues.
545 	 */
546 	s->egr_base = s->ethtxq[0].q.abs_id - s->ethtxq[0].q.cntxt_id;
547 	s->ingr_base = s->ethrxq[0].rspq.abs_id - s->ethrxq[0].rspq.cntxt_id;
548 	IQ_MAP(s, s->fw_evtq.abs_id) = &s->fw_evtq;
549 	for_each_port(adapter, pidx) {
550 		struct net_device *dev = adapter->port[pidx];
551 		struct port_info *pi = netdev_priv(dev);
552 		struct sge_eth_rxq *rxq = &s->ethrxq[pi->first_qset];
553 		struct sge_eth_txq *txq = &s->ethtxq[pi->first_qset];
554 		int qs;
555 
556 		for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
557 			IQ_MAP(s, rxq->rspq.abs_id) = &rxq->rspq;
558 			EQ_MAP(s, txq->q.abs_id) = &txq->q;
559 
560 			/*
561 			 * The FW_IQ_CMD doesn't return the Absolute Queue IDs
562 			 * for Free Lists but since all of the Egress Queues
563 			 * (including Free Lists) have Relative Queue IDs
564 			 * which are computed as Absolute - Base Queue ID, we
565 			 * can synthesize the Absolute Queue IDs for the Free
566 			 * Lists.  This is useful for debugging purposes when
567 			 * we want to dump Queue Contexts via the PF Driver.
568 			 */
569 			rxq->fl.abs_id = rxq->fl.cntxt_id + s->egr_base;
570 			EQ_MAP(s, rxq->fl.abs_id) = &rxq->fl;
571 		}
572 	}
573 	return 0;
574 
575 err_free_queues:
576 	t4vf_free_sge_resources(adapter);
577 	return err;
578 }
579 
580 /*
581  * Set up Receive Side Scaling (RSS) to distribute packets to multiple receive
582  * queues.  We configure the RSS CPU lookup table to distribute to the number
583  * of HW receive queues, and the response queue lookup table to narrow that
584  * down to the response queues actually configured for each "port" (Virtual
585  * Interface).  We always configure the RSS mapping for all ports since the
586  * mapping table has plenty of entries.
587  */
588 static int setup_rss(struct adapter *adapter)
589 {
590 	int pidx;
591 
592 	for_each_port(adapter, pidx) {
593 		struct port_info *pi = adap2pinfo(adapter, pidx);
594 		struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[pi->first_qset];
595 		u16 rss[MAX_PORT_QSETS];
596 		int qs, err;
597 
598 		for (qs = 0; qs < pi->nqsets; qs++)
599 			rss[qs] = rxq[qs].rspq.abs_id;
600 
601 		err = t4vf_config_rss_range(adapter, pi->viid,
602 					    0, pi->rss_size, rss, pi->nqsets);
603 		if (err)
604 			return err;
605 
606 		/*
607 		 * Perform Global RSS Mode-specific initialization.
608 		 */
609 		switch (adapter->params.rss.mode) {
610 		case FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL:
611 			/*
612 			 * If Tunnel All Lookup isn't specified in the global
613 			 * RSS Configuration, then we need to specify a
614 			 * default Ingress Queue for any ingress packets which
615 			 * aren't hashed.  We'll use our first ingress queue
616 			 * ...
617 			 */
618 			if (!adapter->params.rss.u.basicvirtual.tnlalllookup) {
619 				union rss_vi_config config;
620 				err = t4vf_read_rss_vi_config(adapter,
621 							      pi->viid,
622 							      &config);
623 				if (err)
624 					return err;
625 				config.basicvirtual.defaultq =
626 					rxq[0].rspq.abs_id;
627 				err = t4vf_write_rss_vi_config(adapter,
628 							       pi->viid,
629 							       &config);
630 				if (err)
631 					return err;
632 			}
633 			break;
634 		}
635 	}
636 
637 	return 0;
638 }
639 
640 /*
641  * Bring the adapter up.  Called whenever we go from no "ports" open to having
642  * one open.  This function performs the actions necessary to make an adapter
643  * operational, such as completing the initialization of HW modules, and
644  * enabling interrupts.  Must be called with the rtnl lock held.  (Note that
645  * this is called "cxgb_up" in the PF Driver.)
646  */
647 static int adapter_up(struct adapter *adapter)
648 {
649 	int err;
650 
651 	/*
652 	 * If this is the first time we've been called, perform basic
653 	 * adapter setup.  Once we've done this, many of our adapter
654 	 * parameters can no longer be changed ...
655 	 */
656 	if ((adapter->flags & FULL_INIT_DONE) == 0) {
657 		err = setup_sge_queues(adapter);
658 		if (err)
659 			return err;
660 		err = setup_rss(adapter);
661 		if (err) {
662 			t4vf_free_sge_resources(adapter);
663 			return err;
664 		}
665 
666 		if (adapter->flags & USING_MSIX)
667 			name_msix_vecs(adapter);
668 		adapter->flags |= FULL_INIT_DONE;
669 	}
670 
671 	/*
672 	 * Acquire our interrupt resources.  We only support MSI-X and MSI.
673 	 */
674 	BUG_ON((adapter->flags & (USING_MSIX|USING_MSI)) == 0);
675 	if (adapter->flags & USING_MSIX)
676 		err = request_msix_queue_irqs(adapter);
677 	else
678 		err = request_irq(adapter->pdev->irq,
679 				  t4vf_intr_handler(adapter), 0,
680 				  adapter->name, adapter);
681 	if (err) {
682 		dev_err(adapter->pdev_dev, "request_irq failed, err %d\n",
683 			err);
684 		return err;
685 	}
686 
687 	/*
688 	 * Enable NAPI ingress processing and return success.
689 	 */
690 	enable_rx(adapter);
691 	t4vf_sge_start(adapter);
692 	return 0;
693 }
694 
695 /*
696  * Bring the adapter down.  Called whenever the last "port" (Virtual
697  * Interface) closed.  (Note that this routine is called "cxgb_down" in the PF
698  * Driver.)
699  */
700 static void adapter_down(struct adapter *adapter)
701 {
702 	/*
703 	 * Free interrupt resources.
704 	 */
705 	if (adapter->flags & USING_MSIX)
706 		free_msix_queue_irqs(adapter);
707 	else
708 		free_irq(adapter->pdev->irq, adapter);
709 
710 	/*
711 	 * Wait for NAPI handlers to finish.
712 	 */
713 	quiesce_rx(adapter);
714 }
715 
716 /*
717  * Start up a net device.
718  */
719 static int cxgb4vf_open(struct net_device *dev)
720 {
721 	int err;
722 	struct port_info *pi = netdev_priv(dev);
723 	struct adapter *adapter = pi->adapter;
724 
725 	/*
726 	 * If this is the first interface that we're opening on the "adapter",
727 	 * bring the "adapter" up now.
728 	 */
729 	if (adapter->open_device_map == 0) {
730 		err = adapter_up(adapter);
731 		if (err)
732 			return err;
733 	}
734 
735 	/*
736 	 * Note that this interface is up and start everything up ...
737 	 */
738 	netif_set_real_num_tx_queues(dev, pi->nqsets);
739 	err = netif_set_real_num_rx_queues(dev, pi->nqsets);
740 	if (err)
741 		goto err_unwind;
742 	err = link_start(dev);
743 	if (err)
744 		goto err_unwind;
745 
746 	netif_tx_start_all_queues(dev);
747 	set_bit(pi->port_id, &adapter->open_device_map);
748 	return 0;
749 
750 err_unwind:
751 	if (adapter->open_device_map == 0)
752 		adapter_down(adapter);
753 	return err;
754 }
755 
756 /*
757  * Shut down a net device.  This routine is called "cxgb_close" in the PF
758  * Driver ...
759  */
760 static int cxgb4vf_stop(struct net_device *dev)
761 {
762 	struct port_info *pi = netdev_priv(dev);
763 	struct adapter *adapter = pi->adapter;
764 
765 	netif_tx_stop_all_queues(dev);
766 	netif_carrier_off(dev);
767 	t4vf_enable_vi(adapter, pi->viid, false, false);
768 	pi->link_cfg.link_ok = 0;
769 
770 	clear_bit(pi->port_id, &adapter->open_device_map);
771 	if (adapter->open_device_map == 0)
772 		adapter_down(adapter);
773 	return 0;
774 }
775 
776 /*
777  * Translate our basic statistics into the standard "ifconfig" statistics.
778  */
779 static struct net_device_stats *cxgb4vf_get_stats(struct net_device *dev)
780 {
781 	struct t4vf_port_stats stats;
782 	struct port_info *pi = netdev2pinfo(dev);
783 	struct adapter *adapter = pi->adapter;
784 	struct net_device_stats *ns = &dev->stats;
785 	int err;
786 
787 	spin_lock(&adapter->stats_lock);
788 	err = t4vf_get_port_stats(adapter, pi->pidx, &stats);
789 	spin_unlock(&adapter->stats_lock);
790 
791 	memset(ns, 0, sizeof(*ns));
792 	if (err)
793 		return ns;
794 
795 	ns->tx_bytes = (stats.tx_bcast_bytes + stats.tx_mcast_bytes +
796 			stats.tx_ucast_bytes + stats.tx_offload_bytes);
797 	ns->tx_packets = (stats.tx_bcast_frames + stats.tx_mcast_frames +
798 			  stats.tx_ucast_frames + stats.tx_offload_frames);
799 	ns->rx_bytes = (stats.rx_bcast_bytes + stats.rx_mcast_bytes +
800 			stats.rx_ucast_bytes);
801 	ns->rx_packets = (stats.rx_bcast_frames + stats.rx_mcast_frames +
802 			  stats.rx_ucast_frames);
803 	ns->multicast = stats.rx_mcast_frames;
804 	ns->tx_errors = stats.tx_drop_frames;
805 	ns->rx_errors = stats.rx_err_frames;
806 
807 	return ns;
808 }
809 
810 /*
811  * Collect up to maxaddrs worth of a netdevice's unicast addresses, starting
812  * at a specified offset within the list, into an array of addrss pointers and
813  * return the number collected.
814  */
815 static inline unsigned int collect_netdev_uc_list_addrs(const struct net_device *dev,
816 							const u8 **addr,
817 							unsigned int offset,
818 							unsigned int maxaddrs)
819 {
820 	unsigned int index = 0;
821 	unsigned int naddr = 0;
822 	const struct netdev_hw_addr *ha;
823 
824 	for_each_dev_addr(dev, ha)
825 		if (index++ >= offset) {
826 			addr[naddr++] = ha->addr;
827 			if (naddr >= maxaddrs)
828 				break;
829 		}
830 	return naddr;
831 }
832 
833 /*
834  * Collect up to maxaddrs worth of a netdevice's multicast addresses, starting
835  * at a specified offset within the list, into an array of addrss pointers and
836  * return the number collected.
837  */
838 static inline unsigned int collect_netdev_mc_list_addrs(const struct net_device *dev,
839 							const u8 **addr,
840 							unsigned int offset,
841 							unsigned int maxaddrs)
842 {
843 	unsigned int index = 0;
844 	unsigned int naddr = 0;
845 	const struct netdev_hw_addr *ha;
846 
847 	netdev_for_each_mc_addr(ha, dev)
848 		if (index++ >= offset) {
849 			addr[naddr++] = ha->addr;
850 			if (naddr >= maxaddrs)
851 				break;
852 		}
853 	return naddr;
854 }
855 
856 /*
857  * Configure the exact and hash address filters to handle a port's multicast
858  * and secondary unicast MAC addresses.
859  */
860 static int set_addr_filters(const struct net_device *dev, bool sleep)
861 {
862 	u64 mhash = 0;
863 	u64 uhash = 0;
864 	bool free = true;
865 	unsigned int offset, naddr;
866 	const u8 *addr[7];
867 	int ret;
868 	const struct port_info *pi = netdev_priv(dev);
869 
870 	/* first do the secondary unicast addresses */
871 	for (offset = 0; ; offset += naddr) {
872 		naddr = collect_netdev_uc_list_addrs(dev, addr, offset,
873 						     ARRAY_SIZE(addr));
874 		if (naddr == 0)
875 			break;
876 
877 		ret = t4vf_alloc_mac_filt(pi->adapter, pi->viid, free,
878 					  naddr, addr, NULL, &uhash, sleep);
879 		if (ret < 0)
880 			return ret;
881 
882 		free = false;
883 	}
884 
885 	/* next set up the multicast addresses */
886 	for (offset = 0; ; offset += naddr) {
887 		naddr = collect_netdev_mc_list_addrs(dev, addr, offset,
888 						     ARRAY_SIZE(addr));
889 		if (naddr == 0)
890 			break;
891 
892 		ret = t4vf_alloc_mac_filt(pi->adapter, pi->viid, free,
893 					  naddr, addr, NULL, &mhash, sleep);
894 		if (ret < 0)
895 			return ret;
896 		free = false;
897 	}
898 
899 	return t4vf_set_addr_hash(pi->adapter, pi->viid, uhash != 0,
900 				  uhash | mhash, sleep);
901 }
902 
903 /*
904  * Set RX properties of a port, such as promiscruity, address filters, and MTU.
905  * If @mtu is -1 it is left unchanged.
906  */
907 static int set_rxmode(struct net_device *dev, int mtu, bool sleep_ok)
908 {
909 	int ret;
910 	struct port_info *pi = netdev_priv(dev);
911 
912 	ret = set_addr_filters(dev, sleep_ok);
913 	if (ret == 0)
914 		ret = t4vf_set_rxmode(pi->adapter, pi->viid, -1,
915 				      (dev->flags & IFF_PROMISC) != 0,
916 				      (dev->flags & IFF_ALLMULTI) != 0,
917 				      1, -1, sleep_ok);
918 	return ret;
919 }
920 
921 /*
922  * Set the current receive modes on the device.
923  */
924 static void cxgb4vf_set_rxmode(struct net_device *dev)
925 {
926 	/* unfortunately we can't return errors to the stack */
927 	set_rxmode(dev, -1, false);
928 }
929 
930 /*
931  * Find the entry in the interrupt holdoff timer value array which comes
932  * closest to the specified interrupt holdoff value.
933  */
934 static int closest_timer(const struct sge *s, int us)
935 {
936 	int i, timer_idx = 0, min_delta = INT_MAX;
937 
938 	for (i = 0; i < ARRAY_SIZE(s->timer_val); i++) {
939 		int delta = us - s->timer_val[i];
940 		if (delta < 0)
941 			delta = -delta;
942 		if (delta < min_delta) {
943 			min_delta = delta;
944 			timer_idx = i;
945 		}
946 	}
947 	return timer_idx;
948 }
949 
950 static int closest_thres(const struct sge *s, int thres)
951 {
952 	int i, delta, pktcnt_idx = 0, min_delta = INT_MAX;
953 
954 	for (i = 0; i < ARRAY_SIZE(s->counter_val); i++) {
955 		delta = thres - s->counter_val[i];
956 		if (delta < 0)
957 			delta = -delta;
958 		if (delta < min_delta) {
959 			min_delta = delta;
960 			pktcnt_idx = i;
961 		}
962 	}
963 	return pktcnt_idx;
964 }
965 
966 /*
967  * Return a queue's interrupt hold-off time in us.  0 means no timer.
968  */
969 static unsigned int qtimer_val(const struct adapter *adapter,
970 			       const struct sge_rspq *rspq)
971 {
972 	unsigned int timer_idx = QINTR_TIMER_IDX_GET(rspq->intr_params);
973 
974 	return timer_idx < SGE_NTIMERS
975 		? adapter->sge.timer_val[timer_idx]
976 		: 0;
977 }
978 
979 /**
980  *	set_rxq_intr_params - set a queue's interrupt holdoff parameters
981  *	@adapter: the adapter
982  *	@rspq: the RX response queue
983  *	@us: the hold-off time in us, or 0 to disable timer
984  *	@cnt: the hold-off packet count, or 0 to disable counter
985  *
986  *	Sets an RX response queue's interrupt hold-off time and packet count.
987  *	At least one of the two needs to be enabled for the queue to generate
988  *	interrupts.
989  */
990 static int set_rxq_intr_params(struct adapter *adapter, struct sge_rspq *rspq,
991 			       unsigned int us, unsigned int cnt)
992 {
993 	unsigned int timer_idx;
994 
995 	/*
996 	 * If both the interrupt holdoff timer and count are specified as
997 	 * zero, default to a holdoff count of 1 ...
998 	 */
999 	if ((us | cnt) == 0)
1000 		cnt = 1;
1001 
1002 	/*
1003 	 * If an interrupt holdoff count has been specified, then find the
1004 	 * closest configured holdoff count and use that.  If the response
1005 	 * queue has already been created, then update its queue context
1006 	 * parameters ...
1007 	 */
1008 	if (cnt) {
1009 		int err;
1010 		u32 v, pktcnt_idx;
1011 
1012 		pktcnt_idx = closest_thres(&adapter->sge, cnt);
1013 		if (rspq->desc && rspq->pktcnt_idx != pktcnt_idx) {
1014 			v = FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) |
1015 			    FW_PARAMS_PARAM_X(
1016 					FW_PARAMS_PARAM_DMAQ_IQ_INTCNTTHRESH) |
1017 			    FW_PARAMS_PARAM_YZ(rspq->cntxt_id);
1018 			err = t4vf_set_params(adapter, 1, &v, &pktcnt_idx);
1019 			if (err)
1020 				return err;
1021 		}
1022 		rspq->pktcnt_idx = pktcnt_idx;
1023 	}
1024 
1025 	/*
1026 	 * Compute the closest holdoff timer index from the supplied holdoff
1027 	 * timer value.
1028 	 */
1029 	timer_idx = (us == 0
1030 		     ? SGE_TIMER_RSTRT_CNTR
1031 		     : closest_timer(&adapter->sge, us));
1032 
1033 	/*
1034 	 * Update the response queue's interrupt coalescing parameters and
1035 	 * return success.
1036 	 */
1037 	rspq->intr_params = (QINTR_TIMER_IDX(timer_idx) |
1038 			     (cnt > 0 ? QINTR_CNT_EN : 0));
1039 	return 0;
1040 }
1041 
1042 /*
1043  * Return a version number to identify the type of adapter.  The scheme is:
1044  * - bits 0..9: chip version
1045  * - bits 10..15: chip revision
1046  */
1047 static inline unsigned int mk_adap_vers(const struct adapter *adapter)
1048 {
1049 	/*
1050 	 * Chip version 4, revision 0x3f (cxgb4vf).
1051 	 */
1052 	return 4 | (0x3f << 10);
1053 }
1054 
1055 /*
1056  * Execute the specified ioctl command.
1057  */
1058 static int cxgb4vf_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1059 {
1060 	int ret = 0;
1061 
1062 	switch (cmd) {
1063 	    /*
1064 	     * The VF Driver doesn't have access to any of the other
1065 	     * common Ethernet device ioctl()'s (like reading/writing
1066 	     * PHY registers, etc.
1067 	     */
1068 
1069 	default:
1070 		ret = -EOPNOTSUPP;
1071 		break;
1072 	}
1073 	return ret;
1074 }
1075 
1076 /*
1077  * Change the device's MTU.
1078  */
1079 static int cxgb4vf_change_mtu(struct net_device *dev, int new_mtu)
1080 {
1081 	int ret;
1082 	struct port_info *pi = netdev_priv(dev);
1083 
1084 	/* accommodate SACK */
1085 	if (new_mtu < 81)
1086 		return -EINVAL;
1087 
1088 	ret = t4vf_set_rxmode(pi->adapter, pi->viid, new_mtu,
1089 			      -1, -1, -1, -1, true);
1090 	if (!ret)
1091 		dev->mtu = new_mtu;
1092 	return ret;
1093 }
1094 
1095 static netdev_features_t cxgb4vf_fix_features(struct net_device *dev,
1096 	netdev_features_t features)
1097 {
1098 	/*
1099 	 * Since there is no support for separate rx/tx vlan accel
1100 	 * enable/disable make sure tx flag is always in same state as rx.
1101 	 */
1102 	if (features & NETIF_F_HW_VLAN_RX)
1103 		features |= NETIF_F_HW_VLAN_TX;
1104 	else
1105 		features &= ~NETIF_F_HW_VLAN_TX;
1106 
1107 	return features;
1108 }
1109 
1110 static int cxgb4vf_set_features(struct net_device *dev,
1111 	netdev_features_t features)
1112 {
1113 	struct port_info *pi = netdev_priv(dev);
1114 	netdev_features_t changed = dev->features ^ features;
1115 
1116 	if (changed & NETIF_F_HW_VLAN_RX)
1117 		t4vf_set_rxmode(pi->adapter, pi->viid, -1, -1, -1, -1,
1118 				features & NETIF_F_HW_VLAN_TX, 0);
1119 
1120 	return 0;
1121 }
1122 
1123 /*
1124  * Change the devices MAC address.
1125  */
1126 static int cxgb4vf_set_mac_addr(struct net_device *dev, void *_addr)
1127 {
1128 	int ret;
1129 	struct sockaddr *addr = _addr;
1130 	struct port_info *pi = netdev_priv(dev);
1131 
1132 	if (!is_valid_ether_addr(addr->sa_data))
1133 		return -EADDRNOTAVAIL;
1134 
1135 	ret = t4vf_change_mac(pi->adapter, pi->viid, pi->xact_addr_filt,
1136 			      addr->sa_data, true);
1137 	if (ret < 0)
1138 		return ret;
1139 
1140 	memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
1141 	pi->xact_addr_filt = ret;
1142 	return 0;
1143 }
1144 
1145 #ifdef CONFIG_NET_POLL_CONTROLLER
1146 /*
1147  * Poll all of our receive queues.  This is called outside of normal interrupt
1148  * context.
1149  */
1150 static void cxgb4vf_poll_controller(struct net_device *dev)
1151 {
1152 	struct port_info *pi = netdev_priv(dev);
1153 	struct adapter *adapter = pi->adapter;
1154 
1155 	if (adapter->flags & USING_MSIX) {
1156 		struct sge_eth_rxq *rxq;
1157 		int nqsets;
1158 
1159 		rxq = &adapter->sge.ethrxq[pi->first_qset];
1160 		for (nqsets = pi->nqsets; nqsets; nqsets--) {
1161 			t4vf_sge_intr_msix(0, &rxq->rspq);
1162 			rxq++;
1163 		}
1164 	} else
1165 		t4vf_intr_handler(adapter)(0, adapter);
1166 }
1167 #endif
1168 
1169 /*
1170  * Ethtool operations.
1171  * ===================
1172  *
1173  * Note that we don't support any ethtool operations which change the physical
1174  * state of the port to which we're linked.
1175  */
1176 
1177 /*
1178  * Return current port link settings.
1179  */
1180 static int cxgb4vf_get_settings(struct net_device *dev,
1181 				struct ethtool_cmd *cmd)
1182 {
1183 	const struct port_info *pi = netdev_priv(dev);
1184 
1185 	cmd->supported = pi->link_cfg.supported;
1186 	cmd->advertising = pi->link_cfg.advertising;
1187 	ethtool_cmd_speed_set(cmd,
1188 			      netif_carrier_ok(dev) ? pi->link_cfg.speed : -1);
1189 	cmd->duplex = DUPLEX_FULL;
1190 
1191 	cmd->port = (cmd->supported & SUPPORTED_TP) ? PORT_TP : PORT_FIBRE;
1192 	cmd->phy_address = pi->port_id;
1193 	cmd->transceiver = XCVR_EXTERNAL;
1194 	cmd->autoneg = pi->link_cfg.autoneg;
1195 	cmd->maxtxpkt = 0;
1196 	cmd->maxrxpkt = 0;
1197 	return 0;
1198 }
1199 
1200 /*
1201  * Return our driver information.
1202  */
1203 static void cxgb4vf_get_drvinfo(struct net_device *dev,
1204 				struct ethtool_drvinfo *drvinfo)
1205 {
1206 	struct adapter *adapter = netdev2adap(dev);
1207 
1208 	strlcpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver));
1209 	strlcpy(drvinfo->version, DRV_VERSION, sizeof(drvinfo->version));
1210 	strlcpy(drvinfo->bus_info, pci_name(to_pci_dev(dev->dev.parent)),
1211 		sizeof(drvinfo->bus_info));
1212 	snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
1213 		 "%u.%u.%u.%u, TP %u.%u.%u.%u",
1214 		 FW_HDR_FW_VER_MAJOR_GET(adapter->params.dev.fwrev),
1215 		 FW_HDR_FW_VER_MINOR_GET(adapter->params.dev.fwrev),
1216 		 FW_HDR_FW_VER_MICRO_GET(adapter->params.dev.fwrev),
1217 		 FW_HDR_FW_VER_BUILD_GET(adapter->params.dev.fwrev),
1218 		 FW_HDR_FW_VER_MAJOR_GET(adapter->params.dev.tprev),
1219 		 FW_HDR_FW_VER_MINOR_GET(adapter->params.dev.tprev),
1220 		 FW_HDR_FW_VER_MICRO_GET(adapter->params.dev.tprev),
1221 		 FW_HDR_FW_VER_BUILD_GET(adapter->params.dev.tprev));
1222 }
1223 
1224 /*
1225  * Return current adapter message level.
1226  */
1227 static u32 cxgb4vf_get_msglevel(struct net_device *dev)
1228 {
1229 	return netdev2adap(dev)->msg_enable;
1230 }
1231 
1232 /*
1233  * Set current adapter message level.
1234  */
1235 static void cxgb4vf_set_msglevel(struct net_device *dev, u32 msglevel)
1236 {
1237 	netdev2adap(dev)->msg_enable = msglevel;
1238 }
1239 
1240 /*
1241  * Return the device's current Queue Set ring size parameters along with the
1242  * allowed maximum values.  Since ethtool doesn't understand the concept of
1243  * multi-queue devices, we just return the current values associated with the
1244  * first Queue Set.
1245  */
1246 static void cxgb4vf_get_ringparam(struct net_device *dev,
1247 				  struct ethtool_ringparam *rp)
1248 {
1249 	const struct port_info *pi = netdev_priv(dev);
1250 	const struct sge *s = &pi->adapter->sge;
1251 
1252 	rp->rx_max_pending = MAX_RX_BUFFERS;
1253 	rp->rx_mini_max_pending = MAX_RSPQ_ENTRIES;
1254 	rp->rx_jumbo_max_pending = 0;
1255 	rp->tx_max_pending = MAX_TXQ_ENTRIES;
1256 
1257 	rp->rx_pending = s->ethrxq[pi->first_qset].fl.size - MIN_FL_RESID;
1258 	rp->rx_mini_pending = s->ethrxq[pi->first_qset].rspq.size;
1259 	rp->rx_jumbo_pending = 0;
1260 	rp->tx_pending = s->ethtxq[pi->first_qset].q.size;
1261 }
1262 
1263 /*
1264  * Set the Queue Set ring size parameters for the device.  Again, since
1265  * ethtool doesn't allow for the concept of multiple queues per device, we'll
1266  * apply these new values across all of the Queue Sets associated with the
1267  * device -- after vetting them of course!
1268  */
1269 static int cxgb4vf_set_ringparam(struct net_device *dev,
1270 				 struct ethtool_ringparam *rp)
1271 {
1272 	const struct port_info *pi = netdev_priv(dev);
1273 	struct adapter *adapter = pi->adapter;
1274 	struct sge *s = &adapter->sge;
1275 	int qs;
1276 
1277 	if (rp->rx_pending > MAX_RX_BUFFERS ||
1278 	    rp->rx_jumbo_pending ||
1279 	    rp->tx_pending > MAX_TXQ_ENTRIES ||
1280 	    rp->rx_mini_pending > MAX_RSPQ_ENTRIES ||
1281 	    rp->rx_mini_pending < MIN_RSPQ_ENTRIES ||
1282 	    rp->rx_pending < MIN_FL_ENTRIES ||
1283 	    rp->tx_pending < MIN_TXQ_ENTRIES)
1284 		return -EINVAL;
1285 
1286 	if (adapter->flags & FULL_INIT_DONE)
1287 		return -EBUSY;
1288 
1289 	for (qs = pi->first_qset; qs < pi->first_qset + pi->nqsets; qs++) {
1290 		s->ethrxq[qs].fl.size = rp->rx_pending + MIN_FL_RESID;
1291 		s->ethrxq[qs].rspq.size = rp->rx_mini_pending;
1292 		s->ethtxq[qs].q.size = rp->tx_pending;
1293 	}
1294 	return 0;
1295 }
1296 
1297 /*
1298  * Return the interrupt holdoff timer and count for the first Queue Set on the
1299  * device.  Our extension ioctl() (the cxgbtool interface) allows the
1300  * interrupt holdoff timer to be read on all of the device's Queue Sets.
1301  */
1302 static int cxgb4vf_get_coalesce(struct net_device *dev,
1303 				struct ethtool_coalesce *coalesce)
1304 {
1305 	const struct port_info *pi = netdev_priv(dev);
1306 	const struct adapter *adapter = pi->adapter;
1307 	const struct sge_rspq *rspq = &adapter->sge.ethrxq[pi->first_qset].rspq;
1308 
1309 	coalesce->rx_coalesce_usecs = qtimer_val(adapter, rspq);
1310 	coalesce->rx_max_coalesced_frames =
1311 		((rspq->intr_params & QINTR_CNT_EN)
1312 		 ? adapter->sge.counter_val[rspq->pktcnt_idx]
1313 		 : 0);
1314 	return 0;
1315 }
1316 
1317 /*
1318  * Set the RX interrupt holdoff timer and count for the first Queue Set on the
1319  * interface.  Our extension ioctl() (the cxgbtool interface) allows us to set
1320  * the interrupt holdoff timer on any of the device's Queue Sets.
1321  */
1322 static int cxgb4vf_set_coalesce(struct net_device *dev,
1323 				struct ethtool_coalesce *coalesce)
1324 {
1325 	const struct port_info *pi = netdev_priv(dev);
1326 	struct adapter *adapter = pi->adapter;
1327 
1328 	return set_rxq_intr_params(adapter,
1329 				   &adapter->sge.ethrxq[pi->first_qset].rspq,
1330 				   coalesce->rx_coalesce_usecs,
1331 				   coalesce->rx_max_coalesced_frames);
1332 }
1333 
1334 /*
1335  * Report current port link pause parameter settings.
1336  */
1337 static void cxgb4vf_get_pauseparam(struct net_device *dev,
1338 				   struct ethtool_pauseparam *pauseparam)
1339 {
1340 	struct port_info *pi = netdev_priv(dev);
1341 
1342 	pauseparam->autoneg = (pi->link_cfg.requested_fc & PAUSE_AUTONEG) != 0;
1343 	pauseparam->rx_pause = (pi->link_cfg.fc & PAUSE_RX) != 0;
1344 	pauseparam->tx_pause = (pi->link_cfg.fc & PAUSE_TX) != 0;
1345 }
1346 
1347 /*
1348  * Identify the port by blinking the port's LED.
1349  */
1350 static int cxgb4vf_phys_id(struct net_device *dev,
1351 			   enum ethtool_phys_id_state state)
1352 {
1353 	unsigned int val;
1354 	struct port_info *pi = netdev_priv(dev);
1355 
1356 	if (state == ETHTOOL_ID_ACTIVE)
1357 		val = 0xffff;
1358 	else if (state == ETHTOOL_ID_INACTIVE)
1359 		val = 0;
1360 	else
1361 		return -EINVAL;
1362 
1363 	return t4vf_identify_port(pi->adapter, pi->viid, val);
1364 }
1365 
1366 /*
1367  * Port stats maintained per queue of the port.
1368  */
1369 struct queue_port_stats {
1370 	u64 tso;
1371 	u64 tx_csum;
1372 	u64 rx_csum;
1373 	u64 vlan_ex;
1374 	u64 vlan_ins;
1375 	u64 lro_pkts;
1376 	u64 lro_merged;
1377 };
1378 
1379 /*
1380  * Strings for the ETH_SS_STATS statistics set ("ethtool -S").  Note that
1381  * these need to match the order of statistics returned by
1382  * t4vf_get_port_stats().
1383  */
1384 static const char stats_strings[][ETH_GSTRING_LEN] = {
1385 	/*
1386 	 * These must match the layout of the t4vf_port_stats structure.
1387 	 */
1388 	"TxBroadcastBytes  ",
1389 	"TxBroadcastFrames ",
1390 	"TxMulticastBytes  ",
1391 	"TxMulticastFrames ",
1392 	"TxUnicastBytes    ",
1393 	"TxUnicastFrames   ",
1394 	"TxDroppedFrames   ",
1395 	"TxOffloadBytes    ",
1396 	"TxOffloadFrames   ",
1397 	"RxBroadcastBytes  ",
1398 	"RxBroadcastFrames ",
1399 	"RxMulticastBytes  ",
1400 	"RxMulticastFrames ",
1401 	"RxUnicastBytes    ",
1402 	"RxUnicastFrames   ",
1403 	"RxErrorFrames     ",
1404 
1405 	/*
1406 	 * These are accumulated per-queue statistics and must match the
1407 	 * order of the fields in the queue_port_stats structure.
1408 	 */
1409 	"TSO               ",
1410 	"TxCsumOffload     ",
1411 	"RxCsumGood        ",
1412 	"VLANextractions   ",
1413 	"VLANinsertions    ",
1414 	"GROPackets        ",
1415 	"GROMerged         ",
1416 };
1417 
1418 /*
1419  * Return the number of statistics in the specified statistics set.
1420  */
1421 static int cxgb4vf_get_sset_count(struct net_device *dev, int sset)
1422 {
1423 	switch (sset) {
1424 	case ETH_SS_STATS:
1425 		return ARRAY_SIZE(stats_strings);
1426 	default:
1427 		return -EOPNOTSUPP;
1428 	}
1429 	/*NOTREACHED*/
1430 }
1431 
1432 /*
1433  * Return the strings for the specified statistics set.
1434  */
1435 static void cxgb4vf_get_strings(struct net_device *dev,
1436 				u32 sset,
1437 				u8 *data)
1438 {
1439 	switch (sset) {
1440 	case ETH_SS_STATS:
1441 		memcpy(data, stats_strings, sizeof(stats_strings));
1442 		break;
1443 	}
1444 }
1445 
1446 /*
1447  * Small utility routine to accumulate queue statistics across the queues of
1448  * a "port".
1449  */
1450 static void collect_sge_port_stats(const struct adapter *adapter,
1451 				   const struct port_info *pi,
1452 				   struct queue_port_stats *stats)
1453 {
1454 	const struct sge_eth_txq *txq = &adapter->sge.ethtxq[pi->first_qset];
1455 	const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[pi->first_qset];
1456 	int qs;
1457 
1458 	memset(stats, 0, sizeof(*stats));
1459 	for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) {
1460 		stats->tso += txq->tso;
1461 		stats->tx_csum += txq->tx_cso;
1462 		stats->rx_csum += rxq->stats.rx_cso;
1463 		stats->vlan_ex += rxq->stats.vlan_ex;
1464 		stats->vlan_ins += txq->vlan_ins;
1465 		stats->lro_pkts += rxq->stats.lro_pkts;
1466 		stats->lro_merged += rxq->stats.lro_merged;
1467 	}
1468 }
1469 
1470 /*
1471  * Return the ETH_SS_STATS statistics set.
1472  */
1473 static void cxgb4vf_get_ethtool_stats(struct net_device *dev,
1474 				      struct ethtool_stats *stats,
1475 				      u64 *data)
1476 {
1477 	struct port_info *pi = netdev2pinfo(dev);
1478 	struct adapter *adapter = pi->adapter;
1479 	int err = t4vf_get_port_stats(adapter, pi->pidx,
1480 				      (struct t4vf_port_stats *)data);
1481 	if (err)
1482 		memset(data, 0, sizeof(struct t4vf_port_stats));
1483 
1484 	data += sizeof(struct t4vf_port_stats) / sizeof(u64);
1485 	collect_sge_port_stats(adapter, pi, (struct queue_port_stats *)data);
1486 }
1487 
1488 /*
1489  * Return the size of our register map.
1490  */
1491 static int cxgb4vf_get_regs_len(struct net_device *dev)
1492 {
1493 	return T4VF_REGMAP_SIZE;
1494 }
1495 
1496 /*
1497  * Dump a block of registers, start to end inclusive, into a buffer.
1498  */
1499 static void reg_block_dump(struct adapter *adapter, void *regbuf,
1500 			   unsigned int start, unsigned int end)
1501 {
1502 	u32 *bp = regbuf + start - T4VF_REGMAP_START;
1503 
1504 	for ( ; start <= end; start += sizeof(u32)) {
1505 		/*
1506 		 * Avoid reading the Mailbox Control register since that
1507 		 * can trigger a Mailbox Ownership Arbitration cycle and
1508 		 * interfere with communication with the firmware.
1509 		 */
1510 		if (start == T4VF_CIM_BASE_ADDR + CIM_VF_EXT_MAILBOX_CTRL)
1511 			*bp++ = 0xffff;
1512 		else
1513 			*bp++ = t4_read_reg(adapter, start);
1514 	}
1515 }
1516 
1517 /*
1518  * Copy our entire register map into the provided buffer.
1519  */
1520 static void cxgb4vf_get_regs(struct net_device *dev,
1521 			     struct ethtool_regs *regs,
1522 			     void *regbuf)
1523 {
1524 	struct adapter *adapter = netdev2adap(dev);
1525 
1526 	regs->version = mk_adap_vers(adapter);
1527 
1528 	/*
1529 	 * Fill in register buffer with our register map.
1530 	 */
1531 	memset(regbuf, 0, T4VF_REGMAP_SIZE);
1532 
1533 	reg_block_dump(adapter, regbuf,
1534 		       T4VF_SGE_BASE_ADDR + T4VF_MOD_MAP_SGE_FIRST,
1535 		       T4VF_SGE_BASE_ADDR + T4VF_MOD_MAP_SGE_LAST);
1536 	reg_block_dump(adapter, regbuf,
1537 		       T4VF_MPS_BASE_ADDR + T4VF_MOD_MAP_MPS_FIRST,
1538 		       T4VF_MPS_BASE_ADDR + T4VF_MOD_MAP_MPS_LAST);
1539 	reg_block_dump(adapter, regbuf,
1540 		       T4VF_PL_BASE_ADDR + T4VF_MOD_MAP_PL_FIRST,
1541 		       T4VF_PL_BASE_ADDR + T4VF_MOD_MAP_PL_LAST);
1542 	reg_block_dump(adapter, regbuf,
1543 		       T4VF_CIM_BASE_ADDR + T4VF_MOD_MAP_CIM_FIRST,
1544 		       T4VF_CIM_BASE_ADDR + T4VF_MOD_MAP_CIM_LAST);
1545 
1546 	reg_block_dump(adapter, regbuf,
1547 		       T4VF_MBDATA_BASE_ADDR + T4VF_MBDATA_FIRST,
1548 		       T4VF_MBDATA_BASE_ADDR + T4VF_MBDATA_LAST);
1549 }
1550 
1551 /*
1552  * Report current Wake On LAN settings.
1553  */
1554 static void cxgb4vf_get_wol(struct net_device *dev,
1555 			    struct ethtool_wolinfo *wol)
1556 {
1557 	wol->supported = 0;
1558 	wol->wolopts = 0;
1559 	memset(&wol->sopass, 0, sizeof(wol->sopass));
1560 }
1561 
1562 /*
1563  * TCP Segmentation Offload flags which we support.
1564  */
1565 #define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN)
1566 
1567 static const struct ethtool_ops cxgb4vf_ethtool_ops = {
1568 	.get_settings		= cxgb4vf_get_settings,
1569 	.get_drvinfo		= cxgb4vf_get_drvinfo,
1570 	.get_msglevel		= cxgb4vf_get_msglevel,
1571 	.set_msglevel		= cxgb4vf_set_msglevel,
1572 	.get_ringparam		= cxgb4vf_get_ringparam,
1573 	.set_ringparam		= cxgb4vf_set_ringparam,
1574 	.get_coalesce		= cxgb4vf_get_coalesce,
1575 	.set_coalesce		= cxgb4vf_set_coalesce,
1576 	.get_pauseparam		= cxgb4vf_get_pauseparam,
1577 	.get_link		= ethtool_op_get_link,
1578 	.get_strings		= cxgb4vf_get_strings,
1579 	.set_phys_id		= cxgb4vf_phys_id,
1580 	.get_sset_count		= cxgb4vf_get_sset_count,
1581 	.get_ethtool_stats	= cxgb4vf_get_ethtool_stats,
1582 	.get_regs_len		= cxgb4vf_get_regs_len,
1583 	.get_regs		= cxgb4vf_get_regs,
1584 	.get_wol		= cxgb4vf_get_wol,
1585 };
1586 
1587 /*
1588  * /sys/kernel/debug/cxgb4vf support code and data.
1589  * ================================================
1590  */
1591 
1592 /*
1593  * Show SGE Queue Set information.  We display QPL Queues Sets per line.
1594  */
1595 #define QPL	4
1596 
1597 static int sge_qinfo_show(struct seq_file *seq, void *v)
1598 {
1599 	struct adapter *adapter = seq->private;
1600 	int eth_entries = DIV_ROUND_UP(adapter->sge.ethqsets, QPL);
1601 	int qs, r = (uintptr_t)v - 1;
1602 
1603 	if (r)
1604 		seq_putc(seq, '\n');
1605 
1606 	#define S3(fmt_spec, s, v) \
1607 		do {\
1608 			seq_printf(seq, "%-12s", s); \
1609 			for (qs = 0; qs < n; ++qs) \
1610 				seq_printf(seq, " %16" fmt_spec, v); \
1611 			seq_putc(seq, '\n'); \
1612 		} while (0)
1613 	#define S(s, v)		S3("s", s, v)
1614 	#define T(s, v)		S3("u", s, txq[qs].v)
1615 	#define R(s, v)		S3("u", s, rxq[qs].v)
1616 
1617 	if (r < eth_entries) {
1618 		const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[r * QPL];
1619 		const struct sge_eth_txq *txq = &adapter->sge.ethtxq[r * QPL];
1620 		int n = min(QPL, adapter->sge.ethqsets - QPL * r);
1621 
1622 		S("QType:", "Ethernet");
1623 		S("Interface:",
1624 		  (rxq[qs].rspq.netdev
1625 		   ? rxq[qs].rspq.netdev->name
1626 		   : "N/A"));
1627 		S3("d", "Port:",
1628 		   (rxq[qs].rspq.netdev
1629 		    ? ((struct port_info *)
1630 		       netdev_priv(rxq[qs].rspq.netdev))->port_id
1631 		    : -1));
1632 		T("TxQ ID:", q.abs_id);
1633 		T("TxQ size:", q.size);
1634 		T("TxQ inuse:", q.in_use);
1635 		T("TxQ PIdx:", q.pidx);
1636 		T("TxQ CIdx:", q.cidx);
1637 		R("RspQ ID:", rspq.abs_id);
1638 		R("RspQ size:", rspq.size);
1639 		R("RspQE size:", rspq.iqe_len);
1640 		S3("u", "Intr delay:", qtimer_val(adapter, &rxq[qs].rspq));
1641 		S3("u", "Intr pktcnt:",
1642 		   adapter->sge.counter_val[rxq[qs].rspq.pktcnt_idx]);
1643 		R("RspQ CIdx:", rspq.cidx);
1644 		R("RspQ Gen:", rspq.gen);
1645 		R("FL ID:", fl.abs_id);
1646 		R("FL size:", fl.size - MIN_FL_RESID);
1647 		R("FL avail:", fl.avail);
1648 		R("FL PIdx:", fl.pidx);
1649 		R("FL CIdx:", fl.cidx);
1650 		return 0;
1651 	}
1652 
1653 	r -= eth_entries;
1654 	if (r == 0) {
1655 		const struct sge_rspq *evtq = &adapter->sge.fw_evtq;
1656 
1657 		seq_printf(seq, "%-12s %16s\n", "QType:", "FW event queue");
1658 		seq_printf(seq, "%-12s %16u\n", "RspQ ID:", evtq->abs_id);
1659 		seq_printf(seq, "%-12s %16u\n", "Intr delay:",
1660 			   qtimer_val(adapter, evtq));
1661 		seq_printf(seq, "%-12s %16u\n", "Intr pktcnt:",
1662 			   adapter->sge.counter_val[evtq->pktcnt_idx]);
1663 		seq_printf(seq, "%-12s %16u\n", "RspQ Cidx:", evtq->cidx);
1664 		seq_printf(seq, "%-12s %16u\n", "RspQ Gen:", evtq->gen);
1665 	} else if (r == 1) {
1666 		const struct sge_rspq *intrq = &adapter->sge.intrq;
1667 
1668 		seq_printf(seq, "%-12s %16s\n", "QType:", "Interrupt Queue");
1669 		seq_printf(seq, "%-12s %16u\n", "RspQ ID:", intrq->abs_id);
1670 		seq_printf(seq, "%-12s %16u\n", "Intr delay:",
1671 			   qtimer_val(adapter, intrq));
1672 		seq_printf(seq, "%-12s %16u\n", "Intr pktcnt:",
1673 			   adapter->sge.counter_val[intrq->pktcnt_idx]);
1674 		seq_printf(seq, "%-12s %16u\n", "RspQ Cidx:", intrq->cidx);
1675 		seq_printf(seq, "%-12s %16u\n", "RspQ Gen:", intrq->gen);
1676 	}
1677 
1678 	#undef R
1679 	#undef T
1680 	#undef S
1681 	#undef S3
1682 
1683 	return 0;
1684 }
1685 
1686 /*
1687  * Return the number of "entries" in our "file".  We group the multi-Queue
1688  * sections with QPL Queue Sets per "entry".  The sections of the output are:
1689  *
1690  *     Ethernet RX/TX Queue Sets
1691  *     Firmware Event Queue
1692  *     Forwarded Interrupt Queue (if in MSI mode)
1693  */
1694 static int sge_queue_entries(const struct adapter *adapter)
1695 {
1696 	return DIV_ROUND_UP(adapter->sge.ethqsets, QPL) + 1 +
1697 		((adapter->flags & USING_MSI) != 0);
1698 }
1699 
1700 static void *sge_queue_start(struct seq_file *seq, loff_t *pos)
1701 {
1702 	int entries = sge_queue_entries(seq->private);
1703 
1704 	return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
1705 }
1706 
1707 static void sge_queue_stop(struct seq_file *seq, void *v)
1708 {
1709 }
1710 
1711 static void *sge_queue_next(struct seq_file *seq, void *v, loff_t *pos)
1712 {
1713 	int entries = sge_queue_entries(seq->private);
1714 
1715 	++*pos;
1716 	return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
1717 }
1718 
1719 static const struct seq_operations sge_qinfo_seq_ops = {
1720 	.start = sge_queue_start,
1721 	.next  = sge_queue_next,
1722 	.stop  = sge_queue_stop,
1723 	.show  = sge_qinfo_show
1724 };
1725 
1726 static int sge_qinfo_open(struct inode *inode, struct file *file)
1727 {
1728 	int res = seq_open(file, &sge_qinfo_seq_ops);
1729 
1730 	if (!res) {
1731 		struct seq_file *seq = file->private_data;
1732 		seq->private = inode->i_private;
1733 	}
1734 	return res;
1735 }
1736 
1737 static const struct file_operations sge_qinfo_debugfs_fops = {
1738 	.owner   = THIS_MODULE,
1739 	.open    = sge_qinfo_open,
1740 	.read    = seq_read,
1741 	.llseek  = seq_lseek,
1742 	.release = seq_release,
1743 };
1744 
1745 /*
1746  * Show SGE Queue Set statistics.  We display QPL Queues Sets per line.
1747  */
1748 #define QPL	4
1749 
1750 static int sge_qstats_show(struct seq_file *seq, void *v)
1751 {
1752 	struct adapter *adapter = seq->private;
1753 	int eth_entries = DIV_ROUND_UP(adapter->sge.ethqsets, QPL);
1754 	int qs, r = (uintptr_t)v - 1;
1755 
1756 	if (r)
1757 		seq_putc(seq, '\n');
1758 
1759 	#define S3(fmt, s, v) \
1760 		do { \
1761 			seq_printf(seq, "%-16s", s); \
1762 			for (qs = 0; qs < n; ++qs) \
1763 				seq_printf(seq, " %8" fmt, v); \
1764 			seq_putc(seq, '\n'); \
1765 		} while (0)
1766 	#define S(s, v)		S3("s", s, v)
1767 
1768 	#define T3(fmt, s, v)	S3(fmt, s, txq[qs].v)
1769 	#define T(s, v)		T3("lu", s, v)
1770 
1771 	#define R3(fmt, s, v)	S3(fmt, s, rxq[qs].v)
1772 	#define R(s, v)		R3("lu", s, v)
1773 
1774 	if (r < eth_entries) {
1775 		const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[r * QPL];
1776 		const struct sge_eth_txq *txq = &adapter->sge.ethtxq[r * QPL];
1777 		int n = min(QPL, adapter->sge.ethqsets - QPL * r);
1778 
1779 		S("QType:", "Ethernet");
1780 		S("Interface:",
1781 		  (rxq[qs].rspq.netdev
1782 		   ? rxq[qs].rspq.netdev->name
1783 		   : "N/A"));
1784 		R3("u", "RspQNullInts:", rspq.unhandled_irqs);
1785 		R("RxPackets:", stats.pkts);
1786 		R("RxCSO:", stats.rx_cso);
1787 		R("VLANxtract:", stats.vlan_ex);
1788 		R("LROmerged:", stats.lro_merged);
1789 		R("LROpackets:", stats.lro_pkts);
1790 		R("RxDrops:", stats.rx_drops);
1791 		T("TSO:", tso);
1792 		T("TxCSO:", tx_cso);
1793 		T("VLANins:", vlan_ins);
1794 		T("TxQFull:", q.stops);
1795 		T("TxQRestarts:", q.restarts);
1796 		T("TxMapErr:", mapping_err);
1797 		R("FLAllocErr:", fl.alloc_failed);
1798 		R("FLLrgAlcErr:", fl.large_alloc_failed);
1799 		R("FLStarving:", fl.starving);
1800 		return 0;
1801 	}
1802 
1803 	r -= eth_entries;
1804 	if (r == 0) {
1805 		const struct sge_rspq *evtq = &adapter->sge.fw_evtq;
1806 
1807 		seq_printf(seq, "%-8s %16s\n", "QType:", "FW event queue");
1808 		seq_printf(seq, "%-16s %8u\n", "RspQNullInts:",
1809 			   evtq->unhandled_irqs);
1810 		seq_printf(seq, "%-16s %8u\n", "RspQ CIdx:", evtq->cidx);
1811 		seq_printf(seq, "%-16s %8u\n", "RspQ Gen:", evtq->gen);
1812 	} else if (r == 1) {
1813 		const struct sge_rspq *intrq = &adapter->sge.intrq;
1814 
1815 		seq_printf(seq, "%-8s %16s\n", "QType:", "Interrupt Queue");
1816 		seq_printf(seq, "%-16s %8u\n", "RspQNullInts:",
1817 			   intrq->unhandled_irqs);
1818 		seq_printf(seq, "%-16s %8u\n", "RspQ CIdx:", intrq->cidx);
1819 		seq_printf(seq, "%-16s %8u\n", "RspQ Gen:", intrq->gen);
1820 	}
1821 
1822 	#undef R
1823 	#undef T
1824 	#undef S
1825 	#undef R3
1826 	#undef T3
1827 	#undef S3
1828 
1829 	return 0;
1830 }
1831 
1832 /*
1833  * Return the number of "entries" in our "file".  We group the multi-Queue
1834  * sections with QPL Queue Sets per "entry".  The sections of the output are:
1835  *
1836  *     Ethernet RX/TX Queue Sets
1837  *     Firmware Event Queue
1838  *     Forwarded Interrupt Queue (if in MSI mode)
1839  */
1840 static int sge_qstats_entries(const struct adapter *adapter)
1841 {
1842 	return DIV_ROUND_UP(adapter->sge.ethqsets, QPL) + 1 +
1843 		((adapter->flags & USING_MSI) != 0);
1844 }
1845 
1846 static void *sge_qstats_start(struct seq_file *seq, loff_t *pos)
1847 {
1848 	int entries = sge_qstats_entries(seq->private);
1849 
1850 	return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
1851 }
1852 
1853 static void sge_qstats_stop(struct seq_file *seq, void *v)
1854 {
1855 }
1856 
1857 static void *sge_qstats_next(struct seq_file *seq, void *v, loff_t *pos)
1858 {
1859 	int entries = sge_qstats_entries(seq->private);
1860 
1861 	(*pos)++;
1862 	return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL;
1863 }
1864 
1865 static const struct seq_operations sge_qstats_seq_ops = {
1866 	.start = sge_qstats_start,
1867 	.next  = sge_qstats_next,
1868 	.stop  = sge_qstats_stop,
1869 	.show  = sge_qstats_show
1870 };
1871 
1872 static int sge_qstats_open(struct inode *inode, struct file *file)
1873 {
1874 	int res = seq_open(file, &sge_qstats_seq_ops);
1875 
1876 	if (res == 0) {
1877 		struct seq_file *seq = file->private_data;
1878 		seq->private = inode->i_private;
1879 	}
1880 	return res;
1881 }
1882 
1883 static const struct file_operations sge_qstats_proc_fops = {
1884 	.owner   = THIS_MODULE,
1885 	.open    = sge_qstats_open,
1886 	.read    = seq_read,
1887 	.llseek  = seq_lseek,
1888 	.release = seq_release,
1889 };
1890 
1891 /*
1892  * Show PCI-E SR-IOV Virtual Function Resource Limits.
1893  */
1894 static int resources_show(struct seq_file *seq, void *v)
1895 {
1896 	struct adapter *adapter = seq->private;
1897 	struct vf_resources *vfres = &adapter->params.vfres;
1898 
1899 	#define S(desc, fmt, var) \
1900 		seq_printf(seq, "%-60s " fmt "\n", \
1901 			   desc " (" #var "):", vfres->var)
1902 
1903 	S("Virtual Interfaces", "%d", nvi);
1904 	S("Egress Queues", "%d", neq);
1905 	S("Ethernet Control", "%d", nethctrl);
1906 	S("Ingress Queues/w Free Lists/Interrupts", "%d", niqflint);
1907 	S("Ingress Queues", "%d", niq);
1908 	S("Traffic Class", "%d", tc);
1909 	S("Port Access Rights Mask", "%#x", pmask);
1910 	S("MAC Address Filters", "%d", nexactf);
1911 	S("Firmware Command Read Capabilities", "%#x", r_caps);
1912 	S("Firmware Command Write/Execute Capabilities", "%#x", wx_caps);
1913 
1914 	#undef S
1915 
1916 	return 0;
1917 }
1918 
1919 static int resources_open(struct inode *inode, struct file *file)
1920 {
1921 	return single_open(file, resources_show, inode->i_private);
1922 }
1923 
1924 static const struct file_operations resources_proc_fops = {
1925 	.owner   = THIS_MODULE,
1926 	.open    = resources_open,
1927 	.read    = seq_read,
1928 	.llseek  = seq_lseek,
1929 	.release = single_release,
1930 };
1931 
1932 /*
1933  * Show Virtual Interfaces.
1934  */
1935 static int interfaces_show(struct seq_file *seq, void *v)
1936 {
1937 	if (v == SEQ_START_TOKEN) {
1938 		seq_puts(seq, "Interface  Port   VIID\n");
1939 	} else {
1940 		struct adapter *adapter = seq->private;
1941 		int pidx = (uintptr_t)v - 2;
1942 		struct net_device *dev = adapter->port[pidx];
1943 		struct port_info *pi = netdev_priv(dev);
1944 
1945 		seq_printf(seq, "%9s  %4d  %#5x\n",
1946 			   dev->name, pi->port_id, pi->viid);
1947 	}
1948 	return 0;
1949 }
1950 
1951 static inline void *interfaces_get_idx(struct adapter *adapter, loff_t pos)
1952 {
1953 	return pos <= adapter->params.nports
1954 		? (void *)(uintptr_t)(pos + 1)
1955 		: NULL;
1956 }
1957 
1958 static void *interfaces_start(struct seq_file *seq, loff_t *pos)
1959 {
1960 	return *pos
1961 		? interfaces_get_idx(seq->private, *pos)
1962 		: SEQ_START_TOKEN;
1963 }
1964 
1965 static void *interfaces_next(struct seq_file *seq, void *v, loff_t *pos)
1966 {
1967 	(*pos)++;
1968 	return interfaces_get_idx(seq->private, *pos);
1969 }
1970 
1971 static void interfaces_stop(struct seq_file *seq, void *v)
1972 {
1973 }
1974 
1975 static const struct seq_operations interfaces_seq_ops = {
1976 	.start = interfaces_start,
1977 	.next  = interfaces_next,
1978 	.stop  = interfaces_stop,
1979 	.show  = interfaces_show
1980 };
1981 
1982 static int interfaces_open(struct inode *inode, struct file *file)
1983 {
1984 	int res = seq_open(file, &interfaces_seq_ops);
1985 
1986 	if (res == 0) {
1987 		struct seq_file *seq = file->private_data;
1988 		seq->private = inode->i_private;
1989 	}
1990 	return res;
1991 }
1992 
1993 static const struct file_operations interfaces_proc_fops = {
1994 	.owner   = THIS_MODULE,
1995 	.open    = interfaces_open,
1996 	.read    = seq_read,
1997 	.llseek  = seq_lseek,
1998 	.release = seq_release,
1999 };
2000 
2001 /*
2002  * /sys/kernel/debugfs/cxgb4vf/ files list.
2003  */
2004 struct cxgb4vf_debugfs_entry {
2005 	const char *name;		/* name of debugfs node */
2006 	umode_t mode;			/* file system mode */
2007 	const struct file_operations *fops;
2008 };
2009 
2010 static struct cxgb4vf_debugfs_entry debugfs_files[] = {
2011 	{ "sge_qinfo",  S_IRUGO, &sge_qinfo_debugfs_fops },
2012 	{ "sge_qstats", S_IRUGO, &sge_qstats_proc_fops },
2013 	{ "resources",  S_IRUGO, &resources_proc_fops },
2014 	{ "interfaces", S_IRUGO, &interfaces_proc_fops },
2015 };
2016 
2017 /*
2018  * Module and device initialization and cleanup code.
2019  * ==================================================
2020  */
2021 
2022 /*
2023  * Set up out /sys/kernel/debug/cxgb4vf sub-nodes.  We assume that the
2024  * directory (debugfs_root) has already been set up.
2025  */
2026 static int setup_debugfs(struct adapter *adapter)
2027 {
2028 	int i;
2029 
2030 	BUG_ON(IS_ERR_OR_NULL(adapter->debugfs_root));
2031 
2032 	/*
2033 	 * Debugfs support is best effort.
2034 	 */
2035 	for (i = 0; i < ARRAY_SIZE(debugfs_files); i++)
2036 		(void)debugfs_create_file(debugfs_files[i].name,
2037 				  debugfs_files[i].mode,
2038 				  adapter->debugfs_root,
2039 				  (void *)adapter,
2040 				  debugfs_files[i].fops);
2041 
2042 	return 0;
2043 }
2044 
2045 /*
2046  * Tear down the /sys/kernel/debug/cxgb4vf sub-nodes created above.  We leave
2047  * it to our caller to tear down the directory (debugfs_root).
2048  */
2049 static void cleanup_debugfs(struct adapter *adapter)
2050 {
2051 	BUG_ON(IS_ERR_OR_NULL(adapter->debugfs_root));
2052 
2053 	/*
2054 	 * Unlike our sister routine cleanup_proc(), we don't need to remove
2055 	 * individual entries because a call will be made to
2056 	 * debugfs_remove_recursive().  We just need to clean up any ancillary
2057 	 * persistent state.
2058 	 */
2059 	/* nothing to do */
2060 }
2061 
2062 /*
2063  * Perform early "adapter" initialization.  This is where we discover what
2064  * adapter parameters we're going to be using and initialize basic adapter
2065  * hardware support.
2066  */
2067 static int adap_init0(struct adapter *adapter)
2068 {
2069 	struct vf_resources *vfres = &adapter->params.vfres;
2070 	struct sge_params *sge_params = &adapter->params.sge;
2071 	struct sge *s = &adapter->sge;
2072 	unsigned int ethqsets;
2073 	int err;
2074 
2075 	/*
2076 	 * Wait for the device to become ready before proceeding ...
2077 	 */
2078 	err = t4vf_wait_dev_ready(adapter);
2079 	if (err) {
2080 		dev_err(adapter->pdev_dev, "device didn't become ready:"
2081 			" err=%d\n", err);
2082 		return err;
2083 	}
2084 
2085 	/*
2086 	 * Some environments do not properly handle PCIE FLRs -- e.g. in Linux
2087 	 * 2.6.31 and later we can't call pci_reset_function() in order to
2088 	 * issue an FLR because of a self- deadlock on the device semaphore.
2089 	 * Meanwhile, the OS infrastructure doesn't issue FLRs in all the
2090 	 * cases where they're needed -- for instance, some versions of KVM
2091 	 * fail to reset "Assigned Devices" when the VM reboots.  Therefore we
2092 	 * use the firmware based reset in order to reset any per function
2093 	 * state.
2094 	 */
2095 	err = t4vf_fw_reset(adapter);
2096 	if (err < 0) {
2097 		dev_err(adapter->pdev_dev, "FW reset failed: err=%d\n", err);
2098 		return err;
2099 	}
2100 
2101 	/*
2102 	 * Grab basic operational parameters.  These will predominantly have
2103 	 * been set up by the Physical Function Driver or will be hard coded
2104 	 * into the adapter.  We just have to live with them ...  Note that
2105 	 * we _must_ get our VPD parameters before our SGE parameters because
2106 	 * we need to know the adapter's core clock from the VPD in order to
2107 	 * properly decode the SGE Timer Values.
2108 	 */
2109 	err = t4vf_get_dev_params(adapter);
2110 	if (err) {
2111 		dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2112 			" device parameters: err=%d\n", err);
2113 		return err;
2114 	}
2115 	err = t4vf_get_vpd_params(adapter);
2116 	if (err) {
2117 		dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2118 			" VPD parameters: err=%d\n", err);
2119 		return err;
2120 	}
2121 	err = t4vf_get_sge_params(adapter);
2122 	if (err) {
2123 		dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2124 			" SGE parameters: err=%d\n", err);
2125 		return err;
2126 	}
2127 	err = t4vf_get_rss_glb_config(adapter);
2128 	if (err) {
2129 		dev_err(adapter->pdev_dev, "unable to retrieve adapter"
2130 			" RSS parameters: err=%d\n", err);
2131 		return err;
2132 	}
2133 	if (adapter->params.rss.mode !=
2134 	    FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL) {
2135 		dev_err(adapter->pdev_dev, "unable to operate with global RSS"
2136 			" mode %d\n", adapter->params.rss.mode);
2137 		return -EINVAL;
2138 	}
2139 	err = t4vf_sge_init(adapter);
2140 	if (err) {
2141 		dev_err(adapter->pdev_dev, "unable to use adapter parameters:"
2142 			" err=%d\n", err);
2143 		return err;
2144 	}
2145 
2146 	/*
2147 	 * Retrieve our RX interrupt holdoff timer values and counter
2148 	 * threshold values from the SGE parameters.
2149 	 */
2150 	s->timer_val[0] = core_ticks_to_us(adapter,
2151 		TIMERVALUE0_GET(sge_params->sge_timer_value_0_and_1));
2152 	s->timer_val[1] = core_ticks_to_us(adapter,
2153 		TIMERVALUE1_GET(sge_params->sge_timer_value_0_and_1));
2154 	s->timer_val[2] = core_ticks_to_us(adapter,
2155 		TIMERVALUE0_GET(sge_params->sge_timer_value_2_and_3));
2156 	s->timer_val[3] = core_ticks_to_us(adapter,
2157 		TIMERVALUE1_GET(sge_params->sge_timer_value_2_and_3));
2158 	s->timer_val[4] = core_ticks_to_us(adapter,
2159 		TIMERVALUE0_GET(sge_params->sge_timer_value_4_and_5));
2160 	s->timer_val[5] = core_ticks_to_us(adapter,
2161 		TIMERVALUE1_GET(sge_params->sge_timer_value_4_and_5));
2162 
2163 	s->counter_val[0] =
2164 		THRESHOLD_0_GET(sge_params->sge_ingress_rx_threshold);
2165 	s->counter_val[1] =
2166 		THRESHOLD_1_GET(sge_params->sge_ingress_rx_threshold);
2167 	s->counter_val[2] =
2168 		THRESHOLD_2_GET(sge_params->sge_ingress_rx_threshold);
2169 	s->counter_val[3] =
2170 		THRESHOLD_3_GET(sge_params->sge_ingress_rx_threshold);
2171 
2172 	/*
2173 	 * Grab our Virtual Interface resource allocation, extract the
2174 	 * features that we're interested in and do a bit of sanity testing on
2175 	 * what we discover.
2176 	 */
2177 	err = t4vf_get_vfres(adapter);
2178 	if (err) {
2179 		dev_err(adapter->pdev_dev, "unable to get virtual interface"
2180 			" resources: err=%d\n", err);
2181 		return err;
2182 	}
2183 
2184 	/*
2185 	 * The number of "ports" which we support is equal to the number of
2186 	 * Virtual Interfaces with which we've been provisioned.
2187 	 */
2188 	adapter->params.nports = vfres->nvi;
2189 	if (adapter->params.nports > MAX_NPORTS) {
2190 		dev_warn(adapter->pdev_dev, "only using %d of %d allowed"
2191 			 " virtual interfaces\n", MAX_NPORTS,
2192 			 adapter->params.nports);
2193 		adapter->params.nports = MAX_NPORTS;
2194 	}
2195 
2196 	/*
2197 	 * We need to reserve a number of the ingress queues with Free List
2198 	 * and Interrupt capabilities for special interrupt purposes (like
2199 	 * asynchronous firmware messages, or forwarded interrupts if we're
2200 	 * using MSI).  The rest of the FL/Intr-capable ingress queues will be
2201 	 * matched up one-for-one with Ethernet/Control egress queues in order
2202 	 * to form "Queue Sets" which will be aportioned between the "ports".
2203 	 * For each Queue Set, we'll need the ability to allocate two Egress
2204 	 * Contexts -- one for the Ingress Queue Free List and one for the TX
2205 	 * Ethernet Queue.
2206 	 */
2207 	ethqsets = vfres->niqflint - INGQ_EXTRAS;
2208 	if (vfres->nethctrl != ethqsets) {
2209 		dev_warn(adapter->pdev_dev, "unequal number of [available]"
2210 			 " ingress/egress queues (%d/%d); using minimum for"
2211 			 " number of Queue Sets\n", ethqsets, vfres->nethctrl);
2212 		ethqsets = min(vfres->nethctrl, ethqsets);
2213 	}
2214 	if (vfres->neq < ethqsets*2) {
2215 		dev_warn(adapter->pdev_dev, "Not enough Egress Contexts (%d)"
2216 			 " to support Queue Sets (%d); reducing allowed Queue"
2217 			 " Sets\n", vfres->neq, ethqsets);
2218 		ethqsets = vfres->neq/2;
2219 	}
2220 	if (ethqsets > MAX_ETH_QSETS) {
2221 		dev_warn(adapter->pdev_dev, "only using %d of %d allowed Queue"
2222 			 " Sets\n", MAX_ETH_QSETS, adapter->sge.max_ethqsets);
2223 		ethqsets = MAX_ETH_QSETS;
2224 	}
2225 	if (vfres->niq != 0 || vfres->neq > ethqsets*2) {
2226 		dev_warn(adapter->pdev_dev, "unused resources niq/neq (%d/%d)"
2227 			 " ignored\n", vfres->niq, vfres->neq - ethqsets*2);
2228 	}
2229 	adapter->sge.max_ethqsets = ethqsets;
2230 
2231 	/*
2232 	 * Check for various parameter sanity issues.  Most checks simply
2233 	 * result in us using fewer resources than our provissioning but we
2234 	 * do need at least  one "port" with which to work ...
2235 	 */
2236 	if (adapter->sge.max_ethqsets < adapter->params.nports) {
2237 		dev_warn(adapter->pdev_dev, "only using %d of %d available"
2238 			 " virtual interfaces (too few Queue Sets)\n",
2239 			 adapter->sge.max_ethqsets, adapter->params.nports);
2240 		adapter->params.nports = adapter->sge.max_ethqsets;
2241 	}
2242 	if (adapter->params.nports == 0) {
2243 		dev_err(adapter->pdev_dev, "no virtual interfaces configured/"
2244 			"usable!\n");
2245 		return -EINVAL;
2246 	}
2247 	return 0;
2248 }
2249 
2250 static inline void init_rspq(struct sge_rspq *rspq, u8 timer_idx,
2251 			     u8 pkt_cnt_idx, unsigned int size,
2252 			     unsigned int iqe_size)
2253 {
2254 	rspq->intr_params = (QINTR_TIMER_IDX(timer_idx) |
2255 			     (pkt_cnt_idx < SGE_NCOUNTERS ? QINTR_CNT_EN : 0));
2256 	rspq->pktcnt_idx = (pkt_cnt_idx < SGE_NCOUNTERS
2257 			    ? pkt_cnt_idx
2258 			    : 0);
2259 	rspq->iqe_len = iqe_size;
2260 	rspq->size = size;
2261 }
2262 
2263 /*
2264  * Perform default configuration of DMA queues depending on the number and
2265  * type of ports we found and the number of available CPUs.  Most settings can
2266  * be modified by the admin via ethtool and cxgbtool prior to the adapter
2267  * being brought up for the first time.
2268  */
2269 static void cfg_queues(struct adapter *adapter)
2270 {
2271 	struct sge *s = &adapter->sge;
2272 	int q10g, n10g, qidx, pidx, qs;
2273 	size_t iqe_size;
2274 
2275 	/*
2276 	 * We should not be called till we know how many Queue Sets we can
2277 	 * support.  In particular, this means that we need to know what kind
2278 	 * of interrupts we'll be using ...
2279 	 */
2280 	BUG_ON((adapter->flags & (USING_MSIX|USING_MSI)) == 0);
2281 
2282 	/*
2283 	 * Count the number of 10GbE Virtual Interfaces that we have.
2284 	 */
2285 	n10g = 0;
2286 	for_each_port(adapter, pidx)
2287 		n10g += is_10g_port(&adap2pinfo(adapter, pidx)->link_cfg);
2288 
2289 	/*
2290 	 * We default to 1 queue per non-10G port and up to # of cores queues
2291 	 * per 10G port.
2292 	 */
2293 	if (n10g == 0)
2294 		q10g = 0;
2295 	else {
2296 		int n1g = (adapter->params.nports - n10g);
2297 		q10g = (adapter->sge.max_ethqsets - n1g) / n10g;
2298 		if (q10g > num_online_cpus())
2299 			q10g = num_online_cpus();
2300 	}
2301 
2302 	/*
2303 	 * Allocate the "Queue Sets" to the various Virtual Interfaces.
2304 	 * The layout will be established in setup_sge_queues() when the
2305 	 * adapter is brough up for the first time.
2306 	 */
2307 	qidx = 0;
2308 	for_each_port(adapter, pidx) {
2309 		struct port_info *pi = adap2pinfo(adapter, pidx);
2310 
2311 		pi->first_qset = qidx;
2312 		pi->nqsets = is_10g_port(&pi->link_cfg) ? q10g : 1;
2313 		qidx += pi->nqsets;
2314 	}
2315 	s->ethqsets = qidx;
2316 
2317 	/*
2318 	 * The Ingress Queue Entry Size for our various Response Queues needs
2319 	 * to be big enough to accommodate the largest message we can receive
2320 	 * from the chip/firmware; which is 64 bytes ...
2321 	 */
2322 	iqe_size = 64;
2323 
2324 	/*
2325 	 * Set up default Queue Set parameters ...  Start off with the
2326 	 * shortest interrupt holdoff timer.
2327 	 */
2328 	for (qs = 0; qs < s->max_ethqsets; qs++) {
2329 		struct sge_eth_rxq *rxq = &s->ethrxq[qs];
2330 		struct sge_eth_txq *txq = &s->ethtxq[qs];
2331 
2332 		init_rspq(&rxq->rspq, 0, 0, 1024, iqe_size);
2333 		rxq->fl.size = 72;
2334 		txq->q.size = 1024;
2335 	}
2336 
2337 	/*
2338 	 * The firmware event queue is used for link state changes and
2339 	 * notifications of TX DMA completions.
2340 	 */
2341 	init_rspq(&s->fw_evtq, SGE_TIMER_RSTRT_CNTR, 0, 512, iqe_size);
2342 
2343 	/*
2344 	 * The forwarded interrupt queue is used when we're in MSI interrupt
2345 	 * mode.  In this mode all interrupts associated with RX queues will
2346 	 * be forwarded to a single queue which we'll associate with our MSI
2347 	 * interrupt vector.  The messages dropped in the forwarded interrupt
2348 	 * queue will indicate which ingress queue needs servicing ...  This
2349 	 * queue needs to be large enough to accommodate all of the ingress
2350 	 * queues which are forwarding their interrupt (+1 to prevent the PIDX
2351 	 * from equalling the CIDX if every ingress queue has an outstanding
2352 	 * interrupt).  The queue doesn't need to be any larger because no
2353 	 * ingress queue will ever have more than one outstanding interrupt at
2354 	 * any time ...
2355 	 */
2356 	init_rspq(&s->intrq, SGE_TIMER_RSTRT_CNTR, 0, MSIX_ENTRIES + 1,
2357 		  iqe_size);
2358 }
2359 
2360 /*
2361  * Reduce the number of Ethernet queues across all ports to at most n.
2362  * n provides at least one queue per port.
2363  */
2364 static void reduce_ethqs(struct adapter *adapter, int n)
2365 {
2366 	int i;
2367 	struct port_info *pi;
2368 
2369 	/*
2370 	 * While we have too many active Ether Queue Sets, interate across the
2371 	 * "ports" and reduce their individual Queue Set allocations.
2372 	 */
2373 	BUG_ON(n < adapter->params.nports);
2374 	while (n < adapter->sge.ethqsets)
2375 		for_each_port(adapter, i) {
2376 			pi = adap2pinfo(adapter, i);
2377 			if (pi->nqsets > 1) {
2378 				pi->nqsets--;
2379 				adapter->sge.ethqsets--;
2380 				if (adapter->sge.ethqsets <= n)
2381 					break;
2382 			}
2383 		}
2384 
2385 	/*
2386 	 * Reassign the starting Queue Sets for each of the "ports" ...
2387 	 */
2388 	n = 0;
2389 	for_each_port(adapter, i) {
2390 		pi = adap2pinfo(adapter, i);
2391 		pi->first_qset = n;
2392 		n += pi->nqsets;
2393 	}
2394 }
2395 
2396 /*
2397  * We need to grab enough MSI-X vectors to cover our interrupt needs.  Ideally
2398  * we get a separate MSI-X vector for every "Queue Set" plus any extras we
2399  * need.  Minimally we need one for every Virtual Interface plus those needed
2400  * for our "extras".  Note that this process may lower the maximum number of
2401  * allowed Queue Sets ...
2402  */
2403 static int enable_msix(struct adapter *adapter)
2404 {
2405 	int i, err, want, need;
2406 	struct msix_entry entries[MSIX_ENTRIES];
2407 	struct sge *s = &adapter->sge;
2408 
2409 	for (i = 0; i < MSIX_ENTRIES; ++i)
2410 		entries[i].entry = i;
2411 
2412 	/*
2413 	 * We _want_ enough MSI-X interrupts to cover all of our "Queue Sets"
2414 	 * plus those needed for our "extras" (for example, the firmware
2415 	 * message queue).  We _need_ at least one "Queue Set" per Virtual
2416 	 * Interface plus those needed for our "extras".  So now we get to see
2417 	 * if the song is right ...
2418 	 */
2419 	want = s->max_ethqsets + MSIX_EXTRAS;
2420 	need = adapter->params.nports + MSIX_EXTRAS;
2421 	while ((err = pci_enable_msix(adapter->pdev, entries, want)) >= need)
2422 		want = err;
2423 
2424 	if (err == 0) {
2425 		int nqsets = want - MSIX_EXTRAS;
2426 		if (nqsets < s->max_ethqsets) {
2427 			dev_warn(adapter->pdev_dev, "only enough MSI-X vectors"
2428 				 " for %d Queue Sets\n", nqsets);
2429 			s->max_ethqsets = nqsets;
2430 			if (nqsets < s->ethqsets)
2431 				reduce_ethqs(adapter, nqsets);
2432 		}
2433 		for (i = 0; i < want; ++i)
2434 			adapter->msix_info[i].vec = entries[i].vector;
2435 	} else if (err > 0) {
2436 		pci_disable_msix(adapter->pdev);
2437 		dev_info(adapter->pdev_dev, "only %d MSI-X vectors left,"
2438 			 " not using MSI-X\n", err);
2439 	}
2440 	return err;
2441 }
2442 
2443 static const struct net_device_ops cxgb4vf_netdev_ops	= {
2444 	.ndo_open		= cxgb4vf_open,
2445 	.ndo_stop		= cxgb4vf_stop,
2446 	.ndo_start_xmit		= t4vf_eth_xmit,
2447 	.ndo_get_stats		= cxgb4vf_get_stats,
2448 	.ndo_set_rx_mode	= cxgb4vf_set_rxmode,
2449 	.ndo_set_mac_address	= cxgb4vf_set_mac_addr,
2450 	.ndo_validate_addr	= eth_validate_addr,
2451 	.ndo_do_ioctl		= cxgb4vf_do_ioctl,
2452 	.ndo_change_mtu		= cxgb4vf_change_mtu,
2453 	.ndo_fix_features	= cxgb4vf_fix_features,
2454 	.ndo_set_features	= cxgb4vf_set_features,
2455 #ifdef CONFIG_NET_POLL_CONTROLLER
2456 	.ndo_poll_controller	= cxgb4vf_poll_controller,
2457 #endif
2458 };
2459 
2460 /*
2461  * "Probe" a device: initialize a device and construct all kernel and driver
2462  * state needed to manage the device.  This routine is called "init_one" in
2463  * the PF Driver ...
2464  */
2465 static int cxgb4vf_pci_probe(struct pci_dev *pdev,
2466 			     const struct pci_device_id *ent)
2467 {
2468 	static int version_printed;
2469 
2470 	int pci_using_dac;
2471 	int err, pidx;
2472 	unsigned int pmask;
2473 	struct adapter *adapter;
2474 	struct port_info *pi;
2475 	struct net_device *netdev;
2476 
2477 	/*
2478 	 * Print our driver banner the first time we're called to initialize a
2479 	 * device.
2480 	 */
2481 	if (version_printed == 0) {
2482 		printk(KERN_INFO "%s - version %s\n", DRV_DESC, DRV_VERSION);
2483 		version_printed = 1;
2484 	}
2485 
2486 	/*
2487 	 * Initialize generic PCI device state.
2488 	 */
2489 	err = pci_enable_device(pdev);
2490 	if (err) {
2491 		dev_err(&pdev->dev, "cannot enable PCI device\n");
2492 		return err;
2493 	}
2494 
2495 	/*
2496 	 * Reserve PCI resources for the device.  If we can't get them some
2497 	 * other driver may have already claimed the device ...
2498 	 */
2499 	err = pci_request_regions(pdev, KBUILD_MODNAME);
2500 	if (err) {
2501 		dev_err(&pdev->dev, "cannot obtain PCI resources\n");
2502 		goto err_disable_device;
2503 	}
2504 
2505 	/*
2506 	 * Set up our DMA mask: try for 64-bit address masking first and
2507 	 * fall back to 32-bit if we can't get 64 bits ...
2508 	 */
2509 	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
2510 	if (err == 0) {
2511 		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
2512 		if (err) {
2513 			dev_err(&pdev->dev, "unable to obtain 64-bit DMA for"
2514 				" coherent allocations\n");
2515 			goto err_release_regions;
2516 		}
2517 		pci_using_dac = 1;
2518 	} else {
2519 		err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
2520 		if (err != 0) {
2521 			dev_err(&pdev->dev, "no usable DMA configuration\n");
2522 			goto err_release_regions;
2523 		}
2524 		pci_using_dac = 0;
2525 	}
2526 
2527 	/*
2528 	 * Enable bus mastering for the device ...
2529 	 */
2530 	pci_set_master(pdev);
2531 
2532 	/*
2533 	 * Allocate our adapter data structure and attach it to the device.
2534 	 */
2535 	adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
2536 	if (!adapter) {
2537 		err = -ENOMEM;
2538 		goto err_release_regions;
2539 	}
2540 	pci_set_drvdata(pdev, adapter);
2541 	adapter->pdev = pdev;
2542 	adapter->pdev_dev = &pdev->dev;
2543 
2544 	/*
2545 	 * Initialize SMP data synchronization resources.
2546 	 */
2547 	spin_lock_init(&adapter->stats_lock);
2548 
2549 	/*
2550 	 * Map our I/O registers in BAR0.
2551 	 */
2552 	adapter->regs = pci_ioremap_bar(pdev, 0);
2553 	if (!adapter->regs) {
2554 		dev_err(&pdev->dev, "cannot map device registers\n");
2555 		err = -ENOMEM;
2556 		goto err_free_adapter;
2557 	}
2558 
2559 	/*
2560 	 * Initialize adapter level features.
2561 	 */
2562 	adapter->name = pci_name(pdev);
2563 	adapter->msg_enable = dflt_msg_enable;
2564 	err = adap_init0(adapter);
2565 	if (err)
2566 		goto err_unmap_bar;
2567 
2568 	/*
2569 	 * Allocate our "adapter ports" and stitch everything together.
2570 	 */
2571 	pmask = adapter->params.vfres.pmask;
2572 	for_each_port(adapter, pidx) {
2573 		int port_id, viid;
2574 
2575 		/*
2576 		 * We simplistically allocate our virtual interfaces
2577 		 * sequentially across the port numbers to which we have
2578 		 * access rights.  This should be configurable in some manner
2579 		 * ...
2580 		 */
2581 		if (pmask == 0)
2582 			break;
2583 		port_id = ffs(pmask) - 1;
2584 		pmask &= ~(1 << port_id);
2585 		viid = t4vf_alloc_vi(adapter, port_id);
2586 		if (viid < 0) {
2587 			dev_err(&pdev->dev, "cannot allocate VI for port %d:"
2588 				" err=%d\n", port_id, viid);
2589 			err = viid;
2590 			goto err_free_dev;
2591 		}
2592 
2593 		/*
2594 		 * Allocate our network device and stitch things together.
2595 		 */
2596 		netdev = alloc_etherdev_mq(sizeof(struct port_info),
2597 					   MAX_PORT_QSETS);
2598 		if (netdev == NULL) {
2599 			t4vf_free_vi(adapter, viid);
2600 			err = -ENOMEM;
2601 			goto err_free_dev;
2602 		}
2603 		adapter->port[pidx] = netdev;
2604 		SET_NETDEV_DEV(netdev, &pdev->dev);
2605 		pi = netdev_priv(netdev);
2606 		pi->adapter = adapter;
2607 		pi->pidx = pidx;
2608 		pi->port_id = port_id;
2609 		pi->viid = viid;
2610 
2611 		/*
2612 		 * Initialize the starting state of our "port" and register
2613 		 * it.
2614 		 */
2615 		pi->xact_addr_filt = -1;
2616 		netif_carrier_off(netdev);
2617 		netdev->irq = pdev->irq;
2618 
2619 		netdev->hw_features = NETIF_F_SG | TSO_FLAGS |
2620 			NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
2621 			NETIF_F_HW_VLAN_RX | NETIF_F_RXCSUM;
2622 		netdev->vlan_features = NETIF_F_SG | TSO_FLAGS |
2623 			NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
2624 			NETIF_F_HIGHDMA;
2625 		netdev->features = netdev->hw_features | NETIF_F_HW_VLAN_TX;
2626 		if (pci_using_dac)
2627 			netdev->features |= NETIF_F_HIGHDMA;
2628 
2629 		netdev->priv_flags |= IFF_UNICAST_FLT;
2630 
2631 		netdev->netdev_ops = &cxgb4vf_netdev_ops;
2632 		SET_ETHTOOL_OPS(netdev, &cxgb4vf_ethtool_ops);
2633 
2634 		/*
2635 		 * Initialize the hardware/software state for the port.
2636 		 */
2637 		err = t4vf_port_init(adapter, pidx);
2638 		if (err) {
2639 			dev_err(&pdev->dev, "cannot initialize port %d\n",
2640 				pidx);
2641 			goto err_free_dev;
2642 		}
2643 	}
2644 
2645 	/*
2646 	 * The "card" is now ready to go.  If any errors occur during device
2647 	 * registration we do not fail the whole "card" but rather proceed
2648 	 * only with the ports we manage to register successfully.  However we
2649 	 * must register at least one net device.
2650 	 */
2651 	for_each_port(adapter, pidx) {
2652 		netdev = adapter->port[pidx];
2653 		if (netdev == NULL)
2654 			continue;
2655 
2656 		err = register_netdev(netdev);
2657 		if (err) {
2658 			dev_warn(&pdev->dev, "cannot register net device %s,"
2659 				 " skipping\n", netdev->name);
2660 			continue;
2661 		}
2662 
2663 		set_bit(pidx, &adapter->registered_device_map);
2664 	}
2665 	if (adapter->registered_device_map == 0) {
2666 		dev_err(&pdev->dev, "could not register any net devices\n");
2667 		goto err_free_dev;
2668 	}
2669 
2670 	/*
2671 	 * Set up our debugfs entries.
2672 	 */
2673 	if (!IS_ERR_OR_NULL(cxgb4vf_debugfs_root)) {
2674 		adapter->debugfs_root =
2675 			debugfs_create_dir(pci_name(pdev),
2676 					   cxgb4vf_debugfs_root);
2677 		if (IS_ERR_OR_NULL(adapter->debugfs_root))
2678 			dev_warn(&pdev->dev, "could not create debugfs"
2679 				 " directory");
2680 		else
2681 			setup_debugfs(adapter);
2682 	}
2683 
2684 	/*
2685 	 * See what interrupts we'll be using.  If we've been configured to
2686 	 * use MSI-X interrupts, try to enable them but fall back to using
2687 	 * MSI interrupts if we can't enable MSI-X interrupts.  If we can't
2688 	 * get MSI interrupts we bail with the error.
2689 	 */
2690 	if (msi == MSI_MSIX && enable_msix(adapter) == 0)
2691 		adapter->flags |= USING_MSIX;
2692 	else {
2693 		err = pci_enable_msi(pdev);
2694 		if (err) {
2695 			dev_err(&pdev->dev, "Unable to allocate %s interrupts;"
2696 				" err=%d\n",
2697 				msi == MSI_MSIX ? "MSI-X or MSI" : "MSI", err);
2698 			goto err_free_debugfs;
2699 		}
2700 		adapter->flags |= USING_MSI;
2701 	}
2702 
2703 	/*
2704 	 * Now that we know how many "ports" we have and what their types are,
2705 	 * and how many Queue Sets we can support, we can configure our queue
2706 	 * resources.
2707 	 */
2708 	cfg_queues(adapter);
2709 
2710 	/*
2711 	 * Print a short notice on the existence and configuration of the new
2712 	 * VF network device ...
2713 	 */
2714 	for_each_port(adapter, pidx) {
2715 		dev_info(adapter->pdev_dev, "%s: Chelsio VF NIC PCIe %s\n",
2716 			 adapter->port[pidx]->name,
2717 			 (adapter->flags & USING_MSIX) ? "MSI-X" :
2718 			 (adapter->flags & USING_MSI)  ? "MSI" : "");
2719 	}
2720 
2721 	/*
2722 	 * Return success!
2723 	 */
2724 	return 0;
2725 
2726 	/*
2727 	 * Error recovery and exit code.  Unwind state that's been created
2728 	 * so far and return the error.
2729 	 */
2730 
2731 err_free_debugfs:
2732 	if (!IS_ERR_OR_NULL(adapter->debugfs_root)) {
2733 		cleanup_debugfs(adapter);
2734 		debugfs_remove_recursive(adapter->debugfs_root);
2735 	}
2736 
2737 err_free_dev:
2738 	for_each_port(adapter, pidx) {
2739 		netdev = adapter->port[pidx];
2740 		if (netdev == NULL)
2741 			continue;
2742 		pi = netdev_priv(netdev);
2743 		t4vf_free_vi(adapter, pi->viid);
2744 		if (test_bit(pidx, &adapter->registered_device_map))
2745 			unregister_netdev(netdev);
2746 		free_netdev(netdev);
2747 	}
2748 
2749 err_unmap_bar:
2750 	iounmap(adapter->regs);
2751 
2752 err_free_adapter:
2753 	kfree(adapter);
2754 	pci_set_drvdata(pdev, NULL);
2755 
2756 err_release_regions:
2757 	pci_release_regions(pdev);
2758 	pci_set_drvdata(pdev, NULL);
2759 	pci_clear_master(pdev);
2760 
2761 err_disable_device:
2762 	pci_disable_device(pdev);
2763 
2764 	return err;
2765 }
2766 
2767 /*
2768  * "Remove" a device: tear down all kernel and driver state created in the
2769  * "probe" routine and quiesce the device (disable interrupts, etc.).  (Note
2770  * that this is called "remove_one" in the PF Driver.)
2771  */
2772 static void cxgb4vf_pci_remove(struct pci_dev *pdev)
2773 {
2774 	struct adapter *adapter = pci_get_drvdata(pdev);
2775 
2776 	/*
2777 	 * Tear down driver state associated with device.
2778 	 */
2779 	if (adapter) {
2780 		int pidx;
2781 
2782 		/*
2783 		 * Stop all of our activity.  Unregister network port,
2784 		 * disable interrupts, etc.
2785 		 */
2786 		for_each_port(adapter, pidx)
2787 			if (test_bit(pidx, &adapter->registered_device_map))
2788 				unregister_netdev(adapter->port[pidx]);
2789 		t4vf_sge_stop(adapter);
2790 		if (adapter->flags & USING_MSIX) {
2791 			pci_disable_msix(adapter->pdev);
2792 			adapter->flags &= ~USING_MSIX;
2793 		} else if (adapter->flags & USING_MSI) {
2794 			pci_disable_msi(adapter->pdev);
2795 			adapter->flags &= ~USING_MSI;
2796 		}
2797 
2798 		/*
2799 		 * Tear down our debugfs entries.
2800 		 */
2801 		if (!IS_ERR_OR_NULL(adapter->debugfs_root)) {
2802 			cleanup_debugfs(adapter);
2803 			debugfs_remove_recursive(adapter->debugfs_root);
2804 		}
2805 
2806 		/*
2807 		 * Free all of the various resources which we've acquired ...
2808 		 */
2809 		t4vf_free_sge_resources(adapter);
2810 		for_each_port(adapter, pidx) {
2811 			struct net_device *netdev = adapter->port[pidx];
2812 			struct port_info *pi;
2813 
2814 			if (netdev == NULL)
2815 				continue;
2816 
2817 			pi = netdev_priv(netdev);
2818 			t4vf_free_vi(adapter, pi->viid);
2819 			free_netdev(netdev);
2820 		}
2821 		iounmap(adapter->regs);
2822 		kfree(adapter);
2823 		pci_set_drvdata(pdev, NULL);
2824 	}
2825 
2826 	/*
2827 	 * Disable the device and release its PCI resources.
2828 	 */
2829 	pci_disable_device(pdev);
2830 	pci_clear_master(pdev);
2831 	pci_release_regions(pdev);
2832 }
2833 
2834 /*
2835  * "Shutdown" quiesce the device, stopping Ingress Packet and Interrupt
2836  * delivery.
2837  */
2838 static void cxgb4vf_pci_shutdown(struct pci_dev *pdev)
2839 {
2840 	struct adapter *adapter;
2841 	int pidx;
2842 
2843 	adapter = pci_get_drvdata(pdev);
2844 	if (!adapter)
2845 		return;
2846 
2847 	/*
2848 	 * Disable all Virtual Interfaces.  This will shut down the
2849 	 * delivery of all ingress packets into the chip for these
2850 	 * Virtual Interfaces.
2851 	 */
2852 	for_each_port(adapter, pidx) {
2853 		struct net_device *netdev;
2854 		struct port_info *pi;
2855 
2856 		if (!test_bit(pidx, &adapter->registered_device_map))
2857 			continue;
2858 
2859 		netdev = adapter->port[pidx];
2860 		if (!netdev)
2861 			continue;
2862 
2863 		pi = netdev_priv(netdev);
2864 		t4vf_enable_vi(adapter, pi->viid, false, false);
2865 	}
2866 
2867 	/*
2868 	 * Free up all Queues which will prevent further DMA and
2869 	 * Interrupts allowing various internal pathways to drain.
2870 	 */
2871 	t4vf_free_sge_resources(adapter);
2872 }
2873 
2874 /*
2875  * PCI Device registration data structures.
2876  */
2877 #define CH_DEVICE(devid, idx) \
2878 	{ PCI_VENDOR_ID_CHELSIO, devid, PCI_ANY_ID, PCI_ANY_ID, 0, 0, idx }
2879 
2880 static struct pci_device_id cxgb4vf_pci_tbl[] = {
2881 	CH_DEVICE(0xb000, 0),	/* PE10K FPGA */
2882 	CH_DEVICE(0x4800, 0),	/* T440-dbg */
2883 	CH_DEVICE(0x4801, 0),	/* T420-cr */
2884 	CH_DEVICE(0x4802, 0),	/* T422-cr */
2885 	CH_DEVICE(0x4803, 0),	/* T440-cr */
2886 	CH_DEVICE(0x4804, 0),	/* T420-bch */
2887 	CH_DEVICE(0x4805, 0),   /* T440-bch */
2888 	CH_DEVICE(0x4806, 0),	/* T460-ch */
2889 	CH_DEVICE(0x4807, 0),	/* T420-so */
2890 	CH_DEVICE(0x4808, 0),	/* T420-cx */
2891 	CH_DEVICE(0x4809, 0),	/* T420-bt */
2892 	CH_DEVICE(0x480a, 0),   /* T404-bt */
2893 	CH_DEVICE(0x480d, 0),   /* T480-cr */
2894 	CH_DEVICE(0x480e, 0),   /* T440-lp-cr */
2895 	{ 0, }
2896 };
2897 
2898 MODULE_DESCRIPTION(DRV_DESC);
2899 MODULE_AUTHOR("Chelsio Communications");
2900 MODULE_LICENSE("Dual BSD/GPL");
2901 MODULE_VERSION(DRV_VERSION);
2902 MODULE_DEVICE_TABLE(pci, cxgb4vf_pci_tbl);
2903 
2904 static struct pci_driver cxgb4vf_driver = {
2905 	.name		= KBUILD_MODNAME,
2906 	.id_table	= cxgb4vf_pci_tbl,
2907 	.probe		= cxgb4vf_pci_probe,
2908 	.remove		= cxgb4vf_pci_remove,
2909 	.shutdown	= cxgb4vf_pci_shutdown,
2910 };
2911 
2912 /*
2913  * Initialize global driver state.
2914  */
2915 static int __init cxgb4vf_module_init(void)
2916 {
2917 	int ret;
2918 
2919 	/*
2920 	 * Vet our module parameters.
2921 	 */
2922 	if (msi != MSI_MSIX && msi != MSI_MSI) {
2923 		printk(KERN_WARNING KBUILD_MODNAME
2924 		       ": bad module parameter msi=%d; must be %d"
2925 		       " (MSI-X or MSI) or %d (MSI)\n",
2926 		       msi, MSI_MSIX, MSI_MSI);
2927 		return -EINVAL;
2928 	}
2929 
2930 	/* Debugfs support is optional, just warn if this fails */
2931 	cxgb4vf_debugfs_root = debugfs_create_dir(KBUILD_MODNAME, NULL);
2932 	if (IS_ERR_OR_NULL(cxgb4vf_debugfs_root))
2933 		printk(KERN_WARNING KBUILD_MODNAME ": could not create"
2934 		       " debugfs entry, continuing\n");
2935 
2936 	ret = pci_register_driver(&cxgb4vf_driver);
2937 	if (ret < 0 && !IS_ERR_OR_NULL(cxgb4vf_debugfs_root))
2938 		debugfs_remove(cxgb4vf_debugfs_root);
2939 	return ret;
2940 }
2941 
2942 /*
2943  * Tear down global driver state.
2944  */
2945 static void __exit cxgb4vf_module_exit(void)
2946 {
2947 	pci_unregister_driver(&cxgb4vf_driver);
2948 	debugfs_remove(cxgb4vf_debugfs_root);
2949 }
2950 
2951 module_init(cxgb4vf_module_init);
2952 module_exit(cxgb4vf_module_exit);
2953