1 /* 2 * This file is part of the Chelsio T4 PCI-E SR-IOV Virtual Function Ethernet 3 * driver for Linux. 4 * 5 * Copyright (c) 2009-2010 Chelsio Communications, Inc. All rights reserved. 6 * 7 * This software is available to you under a choice of one of two 8 * licenses. You may choose to be licensed under the terms of the GNU 9 * General Public License (GPL) Version 2, available from the file 10 * COPYING in the main directory of this source tree, or the 11 * OpenIB.org BSD license below: 12 * 13 * Redistribution and use in source and binary forms, with or 14 * without modification, are permitted provided that the following 15 * conditions are met: 16 * 17 * - Redistributions of source code must retain the above 18 * copyright notice, this list of conditions and the following 19 * disclaimer. 20 * 21 * - Redistributions in binary form must reproduce the above 22 * copyright notice, this list of conditions and the following 23 * disclaimer in the documentation and/or other materials 24 * provided with the distribution. 25 * 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 * SOFTWARE. 34 */ 35 36 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 37 38 #include <linux/module.h> 39 #include <linux/moduleparam.h> 40 #include <linux/init.h> 41 #include <linux/pci.h> 42 #include <linux/dma-mapping.h> 43 #include <linux/netdevice.h> 44 #include <linux/etherdevice.h> 45 #include <linux/debugfs.h> 46 #include <linux/ethtool.h> 47 48 #include "t4vf_common.h" 49 #include "t4vf_defs.h" 50 51 #include "../cxgb4/t4_regs.h" 52 #include "../cxgb4/t4_msg.h" 53 54 /* 55 * Generic information about the driver. 56 */ 57 #define DRV_VERSION "2.0.0-ko" 58 #define DRV_DESC "Chelsio T4/T5 Virtual Function (VF) Network Driver" 59 60 /* 61 * Module Parameters. 62 * ================== 63 */ 64 65 /* 66 * Default ethtool "message level" for adapters. 67 */ 68 #define DFLT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \ 69 NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP |\ 70 NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR) 71 72 static int dflt_msg_enable = DFLT_MSG_ENABLE; 73 74 module_param(dflt_msg_enable, int, 0644); 75 MODULE_PARM_DESC(dflt_msg_enable, 76 "default adapter ethtool message level bitmap"); 77 78 /* 79 * The driver uses the best interrupt scheme available on a platform in the 80 * order MSI-X then MSI. This parameter determines which of these schemes the 81 * driver may consider as follows: 82 * 83 * msi = 2: choose from among MSI-X and MSI 84 * msi = 1: only consider MSI interrupts 85 * 86 * Note that unlike the Physical Function driver, this Virtual Function driver 87 * does _not_ support legacy INTx interrupts (this limitation is mandated by 88 * the PCI-E SR-IOV standard). 89 */ 90 #define MSI_MSIX 2 91 #define MSI_MSI 1 92 #define MSI_DEFAULT MSI_MSIX 93 94 static int msi = MSI_DEFAULT; 95 96 module_param(msi, int, 0644); 97 MODULE_PARM_DESC(msi, "whether to use MSI-X or MSI"); 98 99 /* 100 * Fundamental constants. 101 * ====================== 102 */ 103 104 enum { 105 MAX_TXQ_ENTRIES = 16384, 106 MAX_RSPQ_ENTRIES = 16384, 107 MAX_RX_BUFFERS = 16384, 108 109 MIN_TXQ_ENTRIES = 32, 110 MIN_RSPQ_ENTRIES = 128, 111 MIN_FL_ENTRIES = 16, 112 113 /* 114 * For purposes of manipulating the Free List size we need to 115 * recognize that Free Lists are actually Egress Queues (the host 116 * produces free buffers which the hardware consumes), Egress Queues 117 * indices are all in units of Egress Context Units bytes, and free 118 * list entries are 64-bit PCI DMA addresses. And since the state of 119 * the Producer Index == the Consumer Index implies an EMPTY list, we 120 * always have at least one Egress Unit's worth of Free List entries 121 * unused. See sge.c for more details ... 122 */ 123 EQ_UNIT = SGE_EQ_IDXSIZE, 124 FL_PER_EQ_UNIT = EQ_UNIT / sizeof(__be64), 125 MIN_FL_RESID = FL_PER_EQ_UNIT, 126 }; 127 128 /* 129 * Global driver state. 130 * ==================== 131 */ 132 133 static struct dentry *cxgb4vf_debugfs_root; 134 135 /* 136 * OS "Callback" functions. 137 * ======================== 138 */ 139 140 /* 141 * The link status has changed on the indicated "port" (Virtual Interface). 142 */ 143 void t4vf_os_link_changed(struct adapter *adapter, int pidx, int link_ok) 144 { 145 struct net_device *dev = adapter->port[pidx]; 146 147 /* 148 * If the port is disabled or the current recorded "link up" 149 * status matches the new status, just return. 150 */ 151 if (!netif_running(dev) || link_ok == netif_carrier_ok(dev)) 152 return; 153 154 /* 155 * Tell the OS that the link status has changed and print a short 156 * informative message on the console about the event. 157 */ 158 if (link_ok) { 159 const char *s; 160 const char *fc; 161 const struct port_info *pi = netdev_priv(dev); 162 163 netif_carrier_on(dev); 164 165 switch (pi->link_cfg.speed) { 166 case SPEED_10000: 167 s = "10Gbps"; 168 break; 169 170 case SPEED_1000: 171 s = "1000Mbps"; 172 break; 173 174 case SPEED_100: 175 s = "100Mbps"; 176 break; 177 178 default: 179 s = "unknown"; 180 break; 181 } 182 183 switch (pi->link_cfg.fc) { 184 case PAUSE_RX: 185 fc = "RX"; 186 break; 187 188 case PAUSE_TX: 189 fc = "TX"; 190 break; 191 192 case PAUSE_RX|PAUSE_TX: 193 fc = "RX/TX"; 194 break; 195 196 default: 197 fc = "no"; 198 break; 199 } 200 201 netdev_info(dev, "link up, %s, full-duplex, %s PAUSE\n", s, fc); 202 } else { 203 netif_carrier_off(dev); 204 netdev_info(dev, "link down\n"); 205 } 206 } 207 208 /* 209 * Net device operations. 210 * ====================== 211 */ 212 213 214 215 216 /* 217 * Perform the MAC and PHY actions needed to enable a "port" (Virtual 218 * Interface). 219 */ 220 static int link_start(struct net_device *dev) 221 { 222 int ret; 223 struct port_info *pi = netdev_priv(dev); 224 225 /* 226 * We do not set address filters and promiscuity here, the stack does 227 * that step explicitly. Enable vlan accel. 228 */ 229 ret = t4vf_set_rxmode(pi->adapter, pi->viid, dev->mtu, -1, -1, -1, 1, 230 true); 231 if (ret == 0) { 232 ret = t4vf_change_mac(pi->adapter, pi->viid, 233 pi->xact_addr_filt, dev->dev_addr, true); 234 if (ret >= 0) { 235 pi->xact_addr_filt = ret; 236 ret = 0; 237 } 238 } 239 240 /* 241 * We don't need to actually "start the link" itself since the 242 * firmware will do that for us when the first Virtual Interface 243 * is enabled on a port. 244 */ 245 if (ret == 0) 246 ret = t4vf_enable_vi(pi->adapter, pi->viid, true, true); 247 return ret; 248 } 249 250 /* 251 * Name the MSI-X interrupts. 252 */ 253 static void name_msix_vecs(struct adapter *adapter) 254 { 255 int namelen = sizeof(adapter->msix_info[0].desc) - 1; 256 int pidx; 257 258 /* 259 * Firmware events. 260 */ 261 snprintf(adapter->msix_info[MSIX_FW].desc, namelen, 262 "%s-FWeventq", adapter->name); 263 adapter->msix_info[MSIX_FW].desc[namelen] = 0; 264 265 /* 266 * Ethernet queues. 267 */ 268 for_each_port(adapter, pidx) { 269 struct net_device *dev = adapter->port[pidx]; 270 const struct port_info *pi = netdev_priv(dev); 271 int qs, msi; 272 273 for (qs = 0, msi = MSIX_IQFLINT; qs < pi->nqsets; qs++, msi++) { 274 snprintf(adapter->msix_info[msi].desc, namelen, 275 "%s-%d", dev->name, qs); 276 adapter->msix_info[msi].desc[namelen] = 0; 277 } 278 } 279 } 280 281 /* 282 * Request all of our MSI-X resources. 283 */ 284 static int request_msix_queue_irqs(struct adapter *adapter) 285 { 286 struct sge *s = &adapter->sge; 287 int rxq, msi, err; 288 289 /* 290 * Firmware events. 291 */ 292 err = request_irq(adapter->msix_info[MSIX_FW].vec, t4vf_sge_intr_msix, 293 0, adapter->msix_info[MSIX_FW].desc, &s->fw_evtq); 294 if (err) 295 return err; 296 297 /* 298 * Ethernet queues. 299 */ 300 msi = MSIX_IQFLINT; 301 for_each_ethrxq(s, rxq) { 302 err = request_irq(adapter->msix_info[msi].vec, 303 t4vf_sge_intr_msix, 0, 304 adapter->msix_info[msi].desc, 305 &s->ethrxq[rxq].rspq); 306 if (err) 307 goto err_free_irqs; 308 msi++; 309 } 310 return 0; 311 312 err_free_irqs: 313 while (--rxq >= 0) 314 free_irq(adapter->msix_info[--msi].vec, &s->ethrxq[rxq].rspq); 315 free_irq(adapter->msix_info[MSIX_FW].vec, &s->fw_evtq); 316 return err; 317 } 318 319 /* 320 * Free our MSI-X resources. 321 */ 322 static void free_msix_queue_irqs(struct adapter *adapter) 323 { 324 struct sge *s = &adapter->sge; 325 int rxq, msi; 326 327 free_irq(adapter->msix_info[MSIX_FW].vec, &s->fw_evtq); 328 msi = MSIX_IQFLINT; 329 for_each_ethrxq(s, rxq) 330 free_irq(adapter->msix_info[msi++].vec, 331 &s->ethrxq[rxq].rspq); 332 } 333 334 /* 335 * Turn on NAPI and start up interrupts on a response queue. 336 */ 337 static void qenable(struct sge_rspq *rspq) 338 { 339 napi_enable(&rspq->napi); 340 341 /* 342 * 0-increment the Going To Sleep register to start the timer and 343 * enable interrupts. 344 */ 345 t4_write_reg(rspq->adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS, 346 CIDXINC(0) | 347 SEINTARM(rspq->intr_params) | 348 INGRESSQID(rspq->cntxt_id)); 349 } 350 351 /* 352 * Enable NAPI scheduling and interrupt generation for all Receive Queues. 353 */ 354 static void enable_rx(struct adapter *adapter) 355 { 356 int rxq; 357 struct sge *s = &adapter->sge; 358 359 for_each_ethrxq(s, rxq) 360 qenable(&s->ethrxq[rxq].rspq); 361 qenable(&s->fw_evtq); 362 363 /* 364 * The interrupt queue doesn't use NAPI so we do the 0-increment of 365 * its Going To Sleep register here to get it started. 366 */ 367 if (adapter->flags & USING_MSI) 368 t4_write_reg(adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS, 369 CIDXINC(0) | 370 SEINTARM(s->intrq.intr_params) | 371 INGRESSQID(s->intrq.cntxt_id)); 372 373 } 374 375 /* 376 * Wait until all NAPI handlers are descheduled. 377 */ 378 static void quiesce_rx(struct adapter *adapter) 379 { 380 struct sge *s = &adapter->sge; 381 int rxq; 382 383 for_each_ethrxq(s, rxq) 384 napi_disable(&s->ethrxq[rxq].rspq.napi); 385 napi_disable(&s->fw_evtq.napi); 386 } 387 388 /* 389 * Response queue handler for the firmware event queue. 390 */ 391 static int fwevtq_handler(struct sge_rspq *rspq, const __be64 *rsp, 392 const struct pkt_gl *gl) 393 { 394 /* 395 * Extract response opcode and get pointer to CPL message body. 396 */ 397 struct adapter *adapter = rspq->adapter; 398 u8 opcode = ((const struct rss_header *)rsp)->opcode; 399 void *cpl = (void *)(rsp + 1); 400 401 switch (opcode) { 402 case CPL_FW6_MSG: { 403 /* 404 * We've received an asynchronous message from the firmware. 405 */ 406 const struct cpl_fw6_msg *fw_msg = cpl; 407 if (fw_msg->type == FW6_TYPE_CMD_RPL) 408 t4vf_handle_fw_rpl(adapter, fw_msg->data); 409 break; 410 } 411 412 case CPL_FW4_MSG: { 413 /* FW can send EGR_UPDATEs encapsulated in a CPL_FW4_MSG. 414 */ 415 const struct cpl_sge_egr_update *p = (void *)(rsp + 3); 416 opcode = G_CPL_OPCODE(ntohl(p->opcode_qid)); 417 if (opcode != CPL_SGE_EGR_UPDATE) { 418 dev_err(adapter->pdev_dev, "unexpected FW4/CPL %#x on FW event queue\n" 419 , opcode); 420 break; 421 } 422 cpl = (void *)p; 423 /*FALLTHROUGH*/ 424 } 425 426 case CPL_SGE_EGR_UPDATE: { 427 /* 428 * We've received an Egress Queue Status Update message. We 429 * get these, if the SGE is configured to send these when the 430 * firmware passes certain points in processing our TX 431 * Ethernet Queue or if we make an explicit request for one. 432 * We use these updates to determine when we may need to 433 * restart a TX Ethernet Queue which was stopped for lack of 434 * free TX Queue Descriptors ... 435 */ 436 const struct cpl_sge_egr_update *p = cpl; 437 unsigned int qid = EGR_QID(be32_to_cpu(p->opcode_qid)); 438 struct sge *s = &adapter->sge; 439 struct sge_txq *tq; 440 struct sge_eth_txq *txq; 441 unsigned int eq_idx; 442 443 /* 444 * Perform sanity checking on the Queue ID to make sure it 445 * really refers to one of our TX Ethernet Egress Queues which 446 * is active and matches the queue's ID. None of these error 447 * conditions should ever happen so we may want to either make 448 * them fatal and/or conditionalized under DEBUG. 449 */ 450 eq_idx = EQ_IDX(s, qid); 451 if (unlikely(eq_idx >= MAX_EGRQ)) { 452 dev_err(adapter->pdev_dev, 453 "Egress Update QID %d out of range\n", qid); 454 break; 455 } 456 tq = s->egr_map[eq_idx]; 457 if (unlikely(tq == NULL)) { 458 dev_err(adapter->pdev_dev, 459 "Egress Update QID %d TXQ=NULL\n", qid); 460 break; 461 } 462 txq = container_of(tq, struct sge_eth_txq, q); 463 if (unlikely(tq->abs_id != qid)) { 464 dev_err(adapter->pdev_dev, 465 "Egress Update QID %d refers to TXQ %d\n", 466 qid, tq->abs_id); 467 break; 468 } 469 470 /* 471 * Restart a stopped TX Queue which has less than half of its 472 * TX ring in use ... 473 */ 474 txq->q.restarts++; 475 netif_tx_wake_queue(txq->txq); 476 break; 477 } 478 479 default: 480 dev_err(adapter->pdev_dev, 481 "unexpected CPL %#x on FW event queue\n", opcode); 482 } 483 484 return 0; 485 } 486 487 /* 488 * Allocate SGE TX/RX response queues. Determine how many sets of SGE queues 489 * to use and initializes them. We support multiple "Queue Sets" per port if 490 * we have MSI-X, otherwise just one queue set per port. 491 */ 492 static int setup_sge_queues(struct adapter *adapter) 493 { 494 struct sge *s = &adapter->sge; 495 int err, pidx, msix; 496 497 /* 498 * Clear "Queue Set" Free List Starving and TX Queue Mapping Error 499 * state. 500 */ 501 bitmap_zero(s->starving_fl, MAX_EGRQ); 502 503 /* 504 * If we're using MSI interrupt mode we need to set up a "forwarded 505 * interrupt" queue which we'll set up with our MSI vector. The rest 506 * of the ingress queues will be set up to forward their interrupts to 507 * this queue ... This must be first since t4vf_sge_alloc_rxq() uses 508 * the intrq's queue ID as the interrupt forwarding queue for the 509 * subsequent calls ... 510 */ 511 if (adapter->flags & USING_MSI) { 512 err = t4vf_sge_alloc_rxq(adapter, &s->intrq, false, 513 adapter->port[0], 0, NULL, NULL); 514 if (err) 515 goto err_free_queues; 516 } 517 518 /* 519 * Allocate our ingress queue for asynchronous firmware messages. 520 */ 521 err = t4vf_sge_alloc_rxq(adapter, &s->fw_evtq, true, adapter->port[0], 522 MSIX_FW, NULL, fwevtq_handler); 523 if (err) 524 goto err_free_queues; 525 526 /* 527 * Allocate each "port"'s initial Queue Sets. These can be changed 528 * later on ... up to the point where any interface on the adapter is 529 * brought up at which point lots of things get nailed down 530 * permanently ... 531 */ 532 msix = MSIX_IQFLINT; 533 for_each_port(adapter, pidx) { 534 struct net_device *dev = adapter->port[pidx]; 535 struct port_info *pi = netdev_priv(dev); 536 struct sge_eth_rxq *rxq = &s->ethrxq[pi->first_qset]; 537 struct sge_eth_txq *txq = &s->ethtxq[pi->first_qset]; 538 int qs; 539 540 for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) { 541 err = t4vf_sge_alloc_rxq(adapter, &rxq->rspq, false, 542 dev, msix++, 543 &rxq->fl, t4vf_ethrx_handler); 544 if (err) 545 goto err_free_queues; 546 547 err = t4vf_sge_alloc_eth_txq(adapter, txq, dev, 548 netdev_get_tx_queue(dev, qs), 549 s->fw_evtq.cntxt_id); 550 if (err) 551 goto err_free_queues; 552 553 rxq->rspq.idx = qs; 554 memset(&rxq->stats, 0, sizeof(rxq->stats)); 555 } 556 } 557 558 /* 559 * Create the reverse mappings for the queues. 560 */ 561 s->egr_base = s->ethtxq[0].q.abs_id - s->ethtxq[0].q.cntxt_id; 562 s->ingr_base = s->ethrxq[0].rspq.abs_id - s->ethrxq[0].rspq.cntxt_id; 563 IQ_MAP(s, s->fw_evtq.abs_id) = &s->fw_evtq; 564 for_each_port(adapter, pidx) { 565 struct net_device *dev = adapter->port[pidx]; 566 struct port_info *pi = netdev_priv(dev); 567 struct sge_eth_rxq *rxq = &s->ethrxq[pi->first_qset]; 568 struct sge_eth_txq *txq = &s->ethtxq[pi->first_qset]; 569 int qs; 570 571 for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) { 572 IQ_MAP(s, rxq->rspq.abs_id) = &rxq->rspq; 573 EQ_MAP(s, txq->q.abs_id) = &txq->q; 574 575 /* 576 * The FW_IQ_CMD doesn't return the Absolute Queue IDs 577 * for Free Lists but since all of the Egress Queues 578 * (including Free Lists) have Relative Queue IDs 579 * which are computed as Absolute - Base Queue ID, we 580 * can synthesize the Absolute Queue IDs for the Free 581 * Lists. This is useful for debugging purposes when 582 * we want to dump Queue Contexts via the PF Driver. 583 */ 584 rxq->fl.abs_id = rxq->fl.cntxt_id + s->egr_base; 585 EQ_MAP(s, rxq->fl.abs_id) = &rxq->fl; 586 } 587 } 588 return 0; 589 590 err_free_queues: 591 t4vf_free_sge_resources(adapter); 592 return err; 593 } 594 595 /* 596 * Set up Receive Side Scaling (RSS) to distribute packets to multiple receive 597 * queues. We configure the RSS CPU lookup table to distribute to the number 598 * of HW receive queues, and the response queue lookup table to narrow that 599 * down to the response queues actually configured for each "port" (Virtual 600 * Interface). We always configure the RSS mapping for all ports since the 601 * mapping table has plenty of entries. 602 */ 603 static int setup_rss(struct adapter *adapter) 604 { 605 int pidx; 606 607 for_each_port(adapter, pidx) { 608 struct port_info *pi = adap2pinfo(adapter, pidx); 609 struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[pi->first_qset]; 610 u16 rss[MAX_PORT_QSETS]; 611 int qs, err; 612 613 for (qs = 0; qs < pi->nqsets; qs++) 614 rss[qs] = rxq[qs].rspq.abs_id; 615 616 err = t4vf_config_rss_range(adapter, pi->viid, 617 0, pi->rss_size, rss, pi->nqsets); 618 if (err) 619 return err; 620 621 /* 622 * Perform Global RSS Mode-specific initialization. 623 */ 624 switch (adapter->params.rss.mode) { 625 case FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL: 626 /* 627 * If Tunnel All Lookup isn't specified in the global 628 * RSS Configuration, then we need to specify a 629 * default Ingress Queue for any ingress packets which 630 * aren't hashed. We'll use our first ingress queue 631 * ... 632 */ 633 if (!adapter->params.rss.u.basicvirtual.tnlalllookup) { 634 union rss_vi_config config; 635 err = t4vf_read_rss_vi_config(adapter, 636 pi->viid, 637 &config); 638 if (err) 639 return err; 640 config.basicvirtual.defaultq = 641 rxq[0].rspq.abs_id; 642 err = t4vf_write_rss_vi_config(adapter, 643 pi->viid, 644 &config); 645 if (err) 646 return err; 647 } 648 break; 649 } 650 } 651 652 return 0; 653 } 654 655 /* 656 * Bring the adapter up. Called whenever we go from no "ports" open to having 657 * one open. This function performs the actions necessary to make an adapter 658 * operational, such as completing the initialization of HW modules, and 659 * enabling interrupts. Must be called with the rtnl lock held. (Note that 660 * this is called "cxgb_up" in the PF Driver.) 661 */ 662 static int adapter_up(struct adapter *adapter) 663 { 664 int err; 665 666 /* 667 * If this is the first time we've been called, perform basic 668 * adapter setup. Once we've done this, many of our adapter 669 * parameters can no longer be changed ... 670 */ 671 if ((adapter->flags & FULL_INIT_DONE) == 0) { 672 err = setup_sge_queues(adapter); 673 if (err) 674 return err; 675 err = setup_rss(adapter); 676 if (err) { 677 t4vf_free_sge_resources(adapter); 678 return err; 679 } 680 681 if (adapter->flags & USING_MSIX) 682 name_msix_vecs(adapter); 683 adapter->flags |= FULL_INIT_DONE; 684 } 685 686 /* 687 * Acquire our interrupt resources. We only support MSI-X and MSI. 688 */ 689 BUG_ON((adapter->flags & (USING_MSIX|USING_MSI)) == 0); 690 if (adapter->flags & USING_MSIX) 691 err = request_msix_queue_irqs(adapter); 692 else 693 err = request_irq(adapter->pdev->irq, 694 t4vf_intr_handler(adapter), 0, 695 adapter->name, adapter); 696 if (err) { 697 dev_err(adapter->pdev_dev, "request_irq failed, err %d\n", 698 err); 699 return err; 700 } 701 702 /* 703 * Enable NAPI ingress processing and return success. 704 */ 705 enable_rx(adapter); 706 t4vf_sge_start(adapter); 707 return 0; 708 } 709 710 /* 711 * Bring the adapter down. Called whenever the last "port" (Virtual 712 * Interface) closed. (Note that this routine is called "cxgb_down" in the PF 713 * Driver.) 714 */ 715 static void adapter_down(struct adapter *adapter) 716 { 717 /* 718 * Free interrupt resources. 719 */ 720 if (adapter->flags & USING_MSIX) 721 free_msix_queue_irqs(adapter); 722 else 723 free_irq(adapter->pdev->irq, adapter); 724 725 /* 726 * Wait for NAPI handlers to finish. 727 */ 728 quiesce_rx(adapter); 729 } 730 731 /* 732 * Start up a net device. 733 */ 734 static int cxgb4vf_open(struct net_device *dev) 735 { 736 int err; 737 struct port_info *pi = netdev_priv(dev); 738 struct adapter *adapter = pi->adapter; 739 740 /* 741 * If this is the first interface that we're opening on the "adapter", 742 * bring the "adapter" up now. 743 */ 744 if (adapter->open_device_map == 0) { 745 err = adapter_up(adapter); 746 if (err) 747 return err; 748 } 749 750 /* 751 * Note that this interface is up and start everything up ... 752 */ 753 netif_set_real_num_tx_queues(dev, pi->nqsets); 754 err = netif_set_real_num_rx_queues(dev, pi->nqsets); 755 if (err) 756 goto err_unwind; 757 err = link_start(dev); 758 if (err) 759 goto err_unwind; 760 761 netif_tx_start_all_queues(dev); 762 set_bit(pi->port_id, &adapter->open_device_map); 763 return 0; 764 765 err_unwind: 766 if (adapter->open_device_map == 0) 767 adapter_down(adapter); 768 return err; 769 } 770 771 /* 772 * Shut down a net device. This routine is called "cxgb_close" in the PF 773 * Driver ... 774 */ 775 static int cxgb4vf_stop(struct net_device *dev) 776 { 777 struct port_info *pi = netdev_priv(dev); 778 struct adapter *adapter = pi->adapter; 779 780 netif_tx_stop_all_queues(dev); 781 netif_carrier_off(dev); 782 t4vf_enable_vi(adapter, pi->viid, false, false); 783 pi->link_cfg.link_ok = 0; 784 785 clear_bit(pi->port_id, &adapter->open_device_map); 786 if (adapter->open_device_map == 0) 787 adapter_down(adapter); 788 return 0; 789 } 790 791 /* 792 * Translate our basic statistics into the standard "ifconfig" statistics. 793 */ 794 static struct net_device_stats *cxgb4vf_get_stats(struct net_device *dev) 795 { 796 struct t4vf_port_stats stats; 797 struct port_info *pi = netdev2pinfo(dev); 798 struct adapter *adapter = pi->adapter; 799 struct net_device_stats *ns = &dev->stats; 800 int err; 801 802 spin_lock(&adapter->stats_lock); 803 err = t4vf_get_port_stats(adapter, pi->pidx, &stats); 804 spin_unlock(&adapter->stats_lock); 805 806 memset(ns, 0, sizeof(*ns)); 807 if (err) 808 return ns; 809 810 ns->tx_bytes = (stats.tx_bcast_bytes + stats.tx_mcast_bytes + 811 stats.tx_ucast_bytes + stats.tx_offload_bytes); 812 ns->tx_packets = (stats.tx_bcast_frames + stats.tx_mcast_frames + 813 stats.tx_ucast_frames + stats.tx_offload_frames); 814 ns->rx_bytes = (stats.rx_bcast_bytes + stats.rx_mcast_bytes + 815 stats.rx_ucast_bytes); 816 ns->rx_packets = (stats.rx_bcast_frames + stats.rx_mcast_frames + 817 stats.rx_ucast_frames); 818 ns->multicast = stats.rx_mcast_frames; 819 ns->tx_errors = stats.tx_drop_frames; 820 ns->rx_errors = stats.rx_err_frames; 821 822 return ns; 823 } 824 825 /* 826 * Collect up to maxaddrs worth of a netdevice's unicast addresses, starting 827 * at a specified offset within the list, into an array of addrss pointers and 828 * return the number collected. 829 */ 830 static inline unsigned int collect_netdev_uc_list_addrs(const struct net_device *dev, 831 const u8 **addr, 832 unsigned int offset, 833 unsigned int maxaddrs) 834 { 835 unsigned int index = 0; 836 unsigned int naddr = 0; 837 const struct netdev_hw_addr *ha; 838 839 for_each_dev_addr(dev, ha) 840 if (index++ >= offset) { 841 addr[naddr++] = ha->addr; 842 if (naddr >= maxaddrs) 843 break; 844 } 845 return naddr; 846 } 847 848 /* 849 * Collect up to maxaddrs worth of a netdevice's multicast addresses, starting 850 * at a specified offset within the list, into an array of addrss pointers and 851 * return the number collected. 852 */ 853 static inline unsigned int collect_netdev_mc_list_addrs(const struct net_device *dev, 854 const u8 **addr, 855 unsigned int offset, 856 unsigned int maxaddrs) 857 { 858 unsigned int index = 0; 859 unsigned int naddr = 0; 860 const struct netdev_hw_addr *ha; 861 862 netdev_for_each_mc_addr(ha, dev) 863 if (index++ >= offset) { 864 addr[naddr++] = ha->addr; 865 if (naddr >= maxaddrs) 866 break; 867 } 868 return naddr; 869 } 870 871 /* 872 * Configure the exact and hash address filters to handle a port's multicast 873 * and secondary unicast MAC addresses. 874 */ 875 static int set_addr_filters(const struct net_device *dev, bool sleep) 876 { 877 u64 mhash = 0; 878 u64 uhash = 0; 879 bool free = true; 880 unsigned int offset, naddr; 881 const u8 *addr[7]; 882 int ret; 883 const struct port_info *pi = netdev_priv(dev); 884 885 /* first do the secondary unicast addresses */ 886 for (offset = 0; ; offset += naddr) { 887 naddr = collect_netdev_uc_list_addrs(dev, addr, offset, 888 ARRAY_SIZE(addr)); 889 if (naddr == 0) 890 break; 891 892 ret = t4vf_alloc_mac_filt(pi->adapter, pi->viid, free, 893 naddr, addr, NULL, &uhash, sleep); 894 if (ret < 0) 895 return ret; 896 897 free = false; 898 } 899 900 /* next set up the multicast addresses */ 901 for (offset = 0; ; offset += naddr) { 902 naddr = collect_netdev_mc_list_addrs(dev, addr, offset, 903 ARRAY_SIZE(addr)); 904 if (naddr == 0) 905 break; 906 907 ret = t4vf_alloc_mac_filt(pi->adapter, pi->viid, free, 908 naddr, addr, NULL, &mhash, sleep); 909 if (ret < 0) 910 return ret; 911 free = false; 912 } 913 914 return t4vf_set_addr_hash(pi->adapter, pi->viid, uhash != 0, 915 uhash | mhash, sleep); 916 } 917 918 /* 919 * Set RX properties of a port, such as promiscruity, address filters, and MTU. 920 * If @mtu is -1 it is left unchanged. 921 */ 922 static int set_rxmode(struct net_device *dev, int mtu, bool sleep_ok) 923 { 924 int ret; 925 struct port_info *pi = netdev_priv(dev); 926 927 ret = set_addr_filters(dev, sleep_ok); 928 if (ret == 0) 929 ret = t4vf_set_rxmode(pi->adapter, pi->viid, -1, 930 (dev->flags & IFF_PROMISC) != 0, 931 (dev->flags & IFF_ALLMULTI) != 0, 932 1, -1, sleep_ok); 933 return ret; 934 } 935 936 /* 937 * Set the current receive modes on the device. 938 */ 939 static void cxgb4vf_set_rxmode(struct net_device *dev) 940 { 941 /* unfortunately we can't return errors to the stack */ 942 set_rxmode(dev, -1, false); 943 } 944 945 /* 946 * Find the entry in the interrupt holdoff timer value array which comes 947 * closest to the specified interrupt holdoff value. 948 */ 949 static int closest_timer(const struct sge *s, int us) 950 { 951 int i, timer_idx = 0, min_delta = INT_MAX; 952 953 for (i = 0; i < ARRAY_SIZE(s->timer_val); i++) { 954 int delta = us - s->timer_val[i]; 955 if (delta < 0) 956 delta = -delta; 957 if (delta < min_delta) { 958 min_delta = delta; 959 timer_idx = i; 960 } 961 } 962 return timer_idx; 963 } 964 965 static int closest_thres(const struct sge *s, int thres) 966 { 967 int i, delta, pktcnt_idx = 0, min_delta = INT_MAX; 968 969 for (i = 0; i < ARRAY_SIZE(s->counter_val); i++) { 970 delta = thres - s->counter_val[i]; 971 if (delta < 0) 972 delta = -delta; 973 if (delta < min_delta) { 974 min_delta = delta; 975 pktcnt_idx = i; 976 } 977 } 978 return pktcnt_idx; 979 } 980 981 /* 982 * Return a queue's interrupt hold-off time in us. 0 means no timer. 983 */ 984 static unsigned int qtimer_val(const struct adapter *adapter, 985 const struct sge_rspq *rspq) 986 { 987 unsigned int timer_idx = QINTR_TIMER_IDX_GET(rspq->intr_params); 988 989 return timer_idx < SGE_NTIMERS 990 ? adapter->sge.timer_val[timer_idx] 991 : 0; 992 } 993 994 /** 995 * set_rxq_intr_params - set a queue's interrupt holdoff parameters 996 * @adapter: the adapter 997 * @rspq: the RX response queue 998 * @us: the hold-off time in us, or 0 to disable timer 999 * @cnt: the hold-off packet count, or 0 to disable counter 1000 * 1001 * Sets an RX response queue's interrupt hold-off time and packet count. 1002 * At least one of the two needs to be enabled for the queue to generate 1003 * interrupts. 1004 */ 1005 static int set_rxq_intr_params(struct adapter *adapter, struct sge_rspq *rspq, 1006 unsigned int us, unsigned int cnt) 1007 { 1008 unsigned int timer_idx; 1009 1010 /* 1011 * If both the interrupt holdoff timer and count are specified as 1012 * zero, default to a holdoff count of 1 ... 1013 */ 1014 if ((us | cnt) == 0) 1015 cnt = 1; 1016 1017 /* 1018 * If an interrupt holdoff count has been specified, then find the 1019 * closest configured holdoff count and use that. If the response 1020 * queue has already been created, then update its queue context 1021 * parameters ... 1022 */ 1023 if (cnt) { 1024 int err; 1025 u32 v, pktcnt_idx; 1026 1027 pktcnt_idx = closest_thres(&adapter->sge, cnt); 1028 if (rspq->desc && rspq->pktcnt_idx != pktcnt_idx) { 1029 v = FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) | 1030 FW_PARAMS_PARAM_X( 1031 FW_PARAMS_PARAM_DMAQ_IQ_INTCNTTHRESH) | 1032 FW_PARAMS_PARAM_YZ(rspq->cntxt_id); 1033 err = t4vf_set_params(adapter, 1, &v, &pktcnt_idx); 1034 if (err) 1035 return err; 1036 } 1037 rspq->pktcnt_idx = pktcnt_idx; 1038 } 1039 1040 /* 1041 * Compute the closest holdoff timer index from the supplied holdoff 1042 * timer value. 1043 */ 1044 timer_idx = (us == 0 1045 ? SGE_TIMER_RSTRT_CNTR 1046 : closest_timer(&adapter->sge, us)); 1047 1048 /* 1049 * Update the response queue's interrupt coalescing parameters and 1050 * return success. 1051 */ 1052 rspq->intr_params = (QINTR_TIMER_IDX(timer_idx) | 1053 (cnt > 0 ? QINTR_CNT_EN : 0)); 1054 return 0; 1055 } 1056 1057 /* 1058 * Return a version number to identify the type of adapter. The scheme is: 1059 * - bits 0..9: chip version 1060 * - bits 10..15: chip revision 1061 */ 1062 static inline unsigned int mk_adap_vers(const struct adapter *adapter) 1063 { 1064 /* 1065 * Chip version 4, revision 0x3f (cxgb4vf). 1066 */ 1067 return CHELSIO_CHIP_VERSION(adapter->params.chip) | (0x3f << 10); 1068 } 1069 1070 /* 1071 * Execute the specified ioctl command. 1072 */ 1073 static int cxgb4vf_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) 1074 { 1075 int ret = 0; 1076 1077 switch (cmd) { 1078 /* 1079 * The VF Driver doesn't have access to any of the other 1080 * common Ethernet device ioctl()'s (like reading/writing 1081 * PHY registers, etc. 1082 */ 1083 1084 default: 1085 ret = -EOPNOTSUPP; 1086 break; 1087 } 1088 return ret; 1089 } 1090 1091 /* 1092 * Change the device's MTU. 1093 */ 1094 static int cxgb4vf_change_mtu(struct net_device *dev, int new_mtu) 1095 { 1096 int ret; 1097 struct port_info *pi = netdev_priv(dev); 1098 1099 /* accommodate SACK */ 1100 if (new_mtu < 81) 1101 return -EINVAL; 1102 1103 ret = t4vf_set_rxmode(pi->adapter, pi->viid, new_mtu, 1104 -1, -1, -1, -1, true); 1105 if (!ret) 1106 dev->mtu = new_mtu; 1107 return ret; 1108 } 1109 1110 static netdev_features_t cxgb4vf_fix_features(struct net_device *dev, 1111 netdev_features_t features) 1112 { 1113 /* 1114 * Since there is no support for separate rx/tx vlan accel 1115 * enable/disable make sure tx flag is always in same state as rx. 1116 */ 1117 if (features & NETIF_F_HW_VLAN_CTAG_RX) 1118 features |= NETIF_F_HW_VLAN_CTAG_TX; 1119 else 1120 features &= ~NETIF_F_HW_VLAN_CTAG_TX; 1121 1122 return features; 1123 } 1124 1125 static int cxgb4vf_set_features(struct net_device *dev, 1126 netdev_features_t features) 1127 { 1128 struct port_info *pi = netdev_priv(dev); 1129 netdev_features_t changed = dev->features ^ features; 1130 1131 if (changed & NETIF_F_HW_VLAN_CTAG_RX) 1132 t4vf_set_rxmode(pi->adapter, pi->viid, -1, -1, -1, -1, 1133 features & NETIF_F_HW_VLAN_CTAG_TX, 0); 1134 1135 return 0; 1136 } 1137 1138 /* 1139 * Change the devices MAC address. 1140 */ 1141 static int cxgb4vf_set_mac_addr(struct net_device *dev, void *_addr) 1142 { 1143 int ret; 1144 struct sockaddr *addr = _addr; 1145 struct port_info *pi = netdev_priv(dev); 1146 1147 if (!is_valid_ether_addr(addr->sa_data)) 1148 return -EADDRNOTAVAIL; 1149 1150 ret = t4vf_change_mac(pi->adapter, pi->viid, pi->xact_addr_filt, 1151 addr->sa_data, true); 1152 if (ret < 0) 1153 return ret; 1154 1155 memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); 1156 pi->xact_addr_filt = ret; 1157 return 0; 1158 } 1159 1160 #ifdef CONFIG_NET_POLL_CONTROLLER 1161 /* 1162 * Poll all of our receive queues. This is called outside of normal interrupt 1163 * context. 1164 */ 1165 static void cxgb4vf_poll_controller(struct net_device *dev) 1166 { 1167 struct port_info *pi = netdev_priv(dev); 1168 struct adapter *adapter = pi->adapter; 1169 1170 if (adapter->flags & USING_MSIX) { 1171 struct sge_eth_rxq *rxq; 1172 int nqsets; 1173 1174 rxq = &adapter->sge.ethrxq[pi->first_qset]; 1175 for (nqsets = pi->nqsets; nqsets; nqsets--) { 1176 t4vf_sge_intr_msix(0, &rxq->rspq); 1177 rxq++; 1178 } 1179 } else 1180 t4vf_intr_handler(adapter)(0, adapter); 1181 } 1182 #endif 1183 1184 /* 1185 * Ethtool operations. 1186 * =================== 1187 * 1188 * Note that we don't support any ethtool operations which change the physical 1189 * state of the port to which we're linked. 1190 */ 1191 1192 /* 1193 * Return current port link settings. 1194 */ 1195 static int cxgb4vf_get_settings(struct net_device *dev, 1196 struct ethtool_cmd *cmd) 1197 { 1198 const struct port_info *pi = netdev_priv(dev); 1199 1200 cmd->supported = pi->link_cfg.supported; 1201 cmd->advertising = pi->link_cfg.advertising; 1202 ethtool_cmd_speed_set(cmd, 1203 netif_carrier_ok(dev) ? pi->link_cfg.speed : -1); 1204 cmd->duplex = DUPLEX_FULL; 1205 1206 cmd->port = (cmd->supported & SUPPORTED_TP) ? PORT_TP : PORT_FIBRE; 1207 cmd->phy_address = pi->port_id; 1208 cmd->transceiver = XCVR_EXTERNAL; 1209 cmd->autoneg = pi->link_cfg.autoneg; 1210 cmd->maxtxpkt = 0; 1211 cmd->maxrxpkt = 0; 1212 return 0; 1213 } 1214 1215 /* 1216 * Return our driver information. 1217 */ 1218 static void cxgb4vf_get_drvinfo(struct net_device *dev, 1219 struct ethtool_drvinfo *drvinfo) 1220 { 1221 struct adapter *adapter = netdev2adap(dev); 1222 1223 strlcpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver)); 1224 strlcpy(drvinfo->version, DRV_VERSION, sizeof(drvinfo->version)); 1225 strlcpy(drvinfo->bus_info, pci_name(to_pci_dev(dev->dev.parent)), 1226 sizeof(drvinfo->bus_info)); 1227 snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), 1228 "%u.%u.%u.%u, TP %u.%u.%u.%u", 1229 FW_HDR_FW_VER_MAJOR_GET(adapter->params.dev.fwrev), 1230 FW_HDR_FW_VER_MINOR_GET(adapter->params.dev.fwrev), 1231 FW_HDR_FW_VER_MICRO_GET(adapter->params.dev.fwrev), 1232 FW_HDR_FW_VER_BUILD_GET(adapter->params.dev.fwrev), 1233 FW_HDR_FW_VER_MAJOR_GET(adapter->params.dev.tprev), 1234 FW_HDR_FW_VER_MINOR_GET(adapter->params.dev.tprev), 1235 FW_HDR_FW_VER_MICRO_GET(adapter->params.dev.tprev), 1236 FW_HDR_FW_VER_BUILD_GET(adapter->params.dev.tprev)); 1237 } 1238 1239 /* 1240 * Return current adapter message level. 1241 */ 1242 static u32 cxgb4vf_get_msglevel(struct net_device *dev) 1243 { 1244 return netdev2adap(dev)->msg_enable; 1245 } 1246 1247 /* 1248 * Set current adapter message level. 1249 */ 1250 static void cxgb4vf_set_msglevel(struct net_device *dev, u32 msglevel) 1251 { 1252 netdev2adap(dev)->msg_enable = msglevel; 1253 } 1254 1255 /* 1256 * Return the device's current Queue Set ring size parameters along with the 1257 * allowed maximum values. Since ethtool doesn't understand the concept of 1258 * multi-queue devices, we just return the current values associated with the 1259 * first Queue Set. 1260 */ 1261 static void cxgb4vf_get_ringparam(struct net_device *dev, 1262 struct ethtool_ringparam *rp) 1263 { 1264 const struct port_info *pi = netdev_priv(dev); 1265 const struct sge *s = &pi->adapter->sge; 1266 1267 rp->rx_max_pending = MAX_RX_BUFFERS; 1268 rp->rx_mini_max_pending = MAX_RSPQ_ENTRIES; 1269 rp->rx_jumbo_max_pending = 0; 1270 rp->tx_max_pending = MAX_TXQ_ENTRIES; 1271 1272 rp->rx_pending = s->ethrxq[pi->first_qset].fl.size - MIN_FL_RESID; 1273 rp->rx_mini_pending = s->ethrxq[pi->first_qset].rspq.size; 1274 rp->rx_jumbo_pending = 0; 1275 rp->tx_pending = s->ethtxq[pi->first_qset].q.size; 1276 } 1277 1278 /* 1279 * Set the Queue Set ring size parameters for the device. Again, since 1280 * ethtool doesn't allow for the concept of multiple queues per device, we'll 1281 * apply these new values across all of the Queue Sets associated with the 1282 * device -- after vetting them of course! 1283 */ 1284 static int cxgb4vf_set_ringparam(struct net_device *dev, 1285 struct ethtool_ringparam *rp) 1286 { 1287 const struct port_info *pi = netdev_priv(dev); 1288 struct adapter *adapter = pi->adapter; 1289 struct sge *s = &adapter->sge; 1290 int qs; 1291 1292 if (rp->rx_pending > MAX_RX_BUFFERS || 1293 rp->rx_jumbo_pending || 1294 rp->tx_pending > MAX_TXQ_ENTRIES || 1295 rp->rx_mini_pending > MAX_RSPQ_ENTRIES || 1296 rp->rx_mini_pending < MIN_RSPQ_ENTRIES || 1297 rp->rx_pending < MIN_FL_ENTRIES || 1298 rp->tx_pending < MIN_TXQ_ENTRIES) 1299 return -EINVAL; 1300 1301 if (adapter->flags & FULL_INIT_DONE) 1302 return -EBUSY; 1303 1304 for (qs = pi->first_qset; qs < pi->first_qset + pi->nqsets; qs++) { 1305 s->ethrxq[qs].fl.size = rp->rx_pending + MIN_FL_RESID; 1306 s->ethrxq[qs].rspq.size = rp->rx_mini_pending; 1307 s->ethtxq[qs].q.size = rp->tx_pending; 1308 } 1309 return 0; 1310 } 1311 1312 /* 1313 * Return the interrupt holdoff timer and count for the first Queue Set on the 1314 * device. Our extension ioctl() (the cxgbtool interface) allows the 1315 * interrupt holdoff timer to be read on all of the device's Queue Sets. 1316 */ 1317 static int cxgb4vf_get_coalesce(struct net_device *dev, 1318 struct ethtool_coalesce *coalesce) 1319 { 1320 const struct port_info *pi = netdev_priv(dev); 1321 const struct adapter *adapter = pi->adapter; 1322 const struct sge_rspq *rspq = &adapter->sge.ethrxq[pi->first_qset].rspq; 1323 1324 coalesce->rx_coalesce_usecs = qtimer_val(adapter, rspq); 1325 coalesce->rx_max_coalesced_frames = 1326 ((rspq->intr_params & QINTR_CNT_EN) 1327 ? adapter->sge.counter_val[rspq->pktcnt_idx] 1328 : 0); 1329 return 0; 1330 } 1331 1332 /* 1333 * Set the RX interrupt holdoff timer and count for the first Queue Set on the 1334 * interface. Our extension ioctl() (the cxgbtool interface) allows us to set 1335 * the interrupt holdoff timer on any of the device's Queue Sets. 1336 */ 1337 static int cxgb4vf_set_coalesce(struct net_device *dev, 1338 struct ethtool_coalesce *coalesce) 1339 { 1340 const struct port_info *pi = netdev_priv(dev); 1341 struct adapter *adapter = pi->adapter; 1342 1343 return set_rxq_intr_params(adapter, 1344 &adapter->sge.ethrxq[pi->first_qset].rspq, 1345 coalesce->rx_coalesce_usecs, 1346 coalesce->rx_max_coalesced_frames); 1347 } 1348 1349 /* 1350 * Report current port link pause parameter settings. 1351 */ 1352 static void cxgb4vf_get_pauseparam(struct net_device *dev, 1353 struct ethtool_pauseparam *pauseparam) 1354 { 1355 struct port_info *pi = netdev_priv(dev); 1356 1357 pauseparam->autoneg = (pi->link_cfg.requested_fc & PAUSE_AUTONEG) != 0; 1358 pauseparam->rx_pause = (pi->link_cfg.fc & PAUSE_RX) != 0; 1359 pauseparam->tx_pause = (pi->link_cfg.fc & PAUSE_TX) != 0; 1360 } 1361 1362 /* 1363 * Identify the port by blinking the port's LED. 1364 */ 1365 static int cxgb4vf_phys_id(struct net_device *dev, 1366 enum ethtool_phys_id_state state) 1367 { 1368 unsigned int val; 1369 struct port_info *pi = netdev_priv(dev); 1370 1371 if (state == ETHTOOL_ID_ACTIVE) 1372 val = 0xffff; 1373 else if (state == ETHTOOL_ID_INACTIVE) 1374 val = 0; 1375 else 1376 return -EINVAL; 1377 1378 return t4vf_identify_port(pi->adapter, pi->viid, val); 1379 } 1380 1381 /* 1382 * Port stats maintained per queue of the port. 1383 */ 1384 struct queue_port_stats { 1385 u64 tso; 1386 u64 tx_csum; 1387 u64 rx_csum; 1388 u64 vlan_ex; 1389 u64 vlan_ins; 1390 u64 lro_pkts; 1391 u64 lro_merged; 1392 }; 1393 1394 /* 1395 * Strings for the ETH_SS_STATS statistics set ("ethtool -S"). Note that 1396 * these need to match the order of statistics returned by 1397 * t4vf_get_port_stats(). 1398 */ 1399 static const char stats_strings[][ETH_GSTRING_LEN] = { 1400 /* 1401 * These must match the layout of the t4vf_port_stats structure. 1402 */ 1403 "TxBroadcastBytes ", 1404 "TxBroadcastFrames ", 1405 "TxMulticastBytes ", 1406 "TxMulticastFrames ", 1407 "TxUnicastBytes ", 1408 "TxUnicastFrames ", 1409 "TxDroppedFrames ", 1410 "TxOffloadBytes ", 1411 "TxOffloadFrames ", 1412 "RxBroadcastBytes ", 1413 "RxBroadcastFrames ", 1414 "RxMulticastBytes ", 1415 "RxMulticastFrames ", 1416 "RxUnicastBytes ", 1417 "RxUnicastFrames ", 1418 "RxErrorFrames ", 1419 1420 /* 1421 * These are accumulated per-queue statistics and must match the 1422 * order of the fields in the queue_port_stats structure. 1423 */ 1424 "TSO ", 1425 "TxCsumOffload ", 1426 "RxCsumGood ", 1427 "VLANextractions ", 1428 "VLANinsertions ", 1429 "GROPackets ", 1430 "GROMerged ", 1431 }; 1432 1433 /* 1434 * Return the number of statistics in the specified statistics set. 1435 */ 1436 static int cxgb4vf_get_sset_count(struct net_device *dev, int sset) 1437 { 1438 switch (sset) { 1439 case ETH_SS_STATS: 1440 return ARRAY_SIZE(stats_strings); 1441 default: 1442 return -EOPNOTSUPP; 1443 } 1444 /*NOTREACHED*/ 1445 } 1446 1447 /* 1448 * Return the strings for the specified statistics set. 1449 */ 1450 static void cxgb4vf_get_strings(struct net_device *dev, 1451 u32 sset, 1452 u8 *data) 1453 { 1454 switch (sset) { 1455 case ETH_SS_STATS: 1456 memcpy(data, stats_strings, sizeof(stats_strings)); 1457 break; 1458 } 1459 } 1460 1461 /* 1462 * Small utility routine to accumulate queue statistics across the queues of 1463 * a "port". 1464 */ 1465 static void collect_sge_port_stats(const struct adapter *adapter, 1466 const struct port_info *pi, 1467 struct queue_port_stats *stats) 1468 { 1469 const struct sge_eth_txq *txq = &adapter->sge.ethtxq[pi->first_qset]; 1470 const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[pi->first_qset]; 1471 int qs; 1472 1473 memset(stats, 0, sizeof(*stats)); 1474 for (qs = 0; qs < pi->nqsets; qs++, rxq++, txq++) { 1475 stats->tso += txq->tso; 1476 stats->tx_csum += txq->tx_cso; 1477 stats->rx_csum += rxq->stats.rx_cso; 1478 stats->vlan_ex += rxq->stats.vlan_ex; 1479 stats->vlan_ins += txq->vlan_ins; 1480 stats->lro_pkts += rxq->stats.lro_pkts; 1481 stats->lro_merged += rxq->stats.lro_merged; 1482 } 1483 } 1484 1485 /* 1486 * Return the ETH_SS_STATS statistics set. 1487 */ 1488 static void cxgb4vf_get_ethtool_stats(struct net_device *dev, 1489 struct ethtool_stats *stats, 1490 u64 *data) 1491 { 1492 struct port_info *pi = netdev2pinfo(dev); 1493 struct adapter *adapter = pi->adapter; 1494 int err = t4vf_get_port_stats(adapter, pi->pidx, 1495 (struct t4vf_port_stats *)data); 1496 if (err) 1497 memset(data, 0, sizeof(struct t4vf_port_stats)); 1498 1499 data += sizeof(struct t4vf_port_stats) / sizeof(u64); 1500 collect_sge_port_stats(adapter, pi, (struct queue_port_stats *)data); 1501 } 1502 1503 /* 1504 * Return the size of our register map. 1505 */ 1506 static int cxgb4vf_get_regs_len(struct net_device *dev) 1507 { 1508 return T4VF_REGMAP_SIZE; 1509 } 1510 1511 /* 1512 * Dump a block of registers, start to end inclusive, into a buffer. 1513 */ 1514 static void reg_block_dump(struct adapter *adapter, void *regbuf, 1515 unsigned int start, unsigned int end) 1516 { 1517 u32 *bp = regbuf + start - T4VF_REGMAP_START; 1518 1519 for ( ; start <= end; start += sizeof(u32)) { 1520 /* 1521 * Avoid reading the Mailbox Control register since that 1522 * can trigger a Mailbox Ownership Arbitration cycle and 1523 * interfere with communication with the firmware. 1524 */ 1525 if (start == T4VF_CIM_BASE_ADDR + CIM_VF_EXT_MAILBOX_CTRL) 1526 *bp++ = 0xffff; 1527 else 1528 *bp++ = t4_read_reg(adapter, start); 1529 } 1530 } 1531 1532 /* 1533 * Copy our entire register map into the provided buffer. 1534 */ 1535 static void cxgb4vf_get_regs(struct net_device *dev, 1536 struct ethtool_regs *regs, 1537 void *regbuf) 1538 { 1539 struct adapter *adapter = netdev2adap(dev); 1540 1541 regs->version = mk_adap_vers(adapter); 1542 1543 /* 1544 * Fill in register buffer with our register map. 1545 */ 1546 memset(regbuf, 0, T4VF_REGMAP_SIZE); 1547 1548 reg_block_dump(adapter, regbuf, 1549 T4VF_SGE_BASE_ADDR + T4VF_MOD_MAP_SGE_FIRST, 1550 T4VF_SGE_BASE_ADDR + T4VF_MOD_MAP_SGE_LAST); 1551 reg_block_dump(adapter, regbuf, 1552 T4VF_MPS_BASE_ADDR + T4VF_MOD_MAP_MPS_FIRST, 1553 T4VF_MPS_BASE_ADDR + T4VF_MOD_MAP_MPS_LAST); 1554 1555 /* T5 adds new registers in the PL Register map. 1556 */ 1557 reg_block_dump(adapter, regbuf, 1558 T4VF_PL_BASE_ADDR + T4VF_MOD_MAP_PL_FIRST, 1559 T4VF_PL_BASE_ADDR + (is_t4(adapter->params.chip) 1560 ? A_PL_VF_WHOAMI : A_PL_VF_REVISION)); 1561 reg_block_dump(adapter, regbuf, 1562 T4VF_CIM_BASE_ADDR + T4VF_MOD_MAP_CIM_FIRST, 1563 T4VF_CIM_BASE_ADDR + T4VF_MOD_MAP_CIM_LAST); 1564 1565 reg_block_dump(adapter, regbuf, 1566 T4VF_MBDATA_BASE_ADDR + T4VF_MBDATA_FIRST, 1567 T4VF_MBDATA_BASE_ADDR + T4VF_MBDATA_LAST); 1568 } 1569 1570 /* 1571 * Report current Wake On LAN settings. 1572 */ 1573 static void cxgb4vf_get_wol(struct net_device *dev, 1574 struct ethtool_wolinfo *wol) 1575 { 1576 wol->supported = 0; 1577 wol->wolopts = 0; 1578 memset(&wol->sopass, 0, sizeof(wol->sopass)); 1579 } 1580 1581 /* 1582 * TCP Segmentation Offload flags which we support. 1583 */ 1584 #define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN) 1585 1586 static const struct ethtool_ops cxgb4vf_ethtool_ops = { 1587 .get_settings = cxgb4vf_get_settings, 1588 .get_drvinfo = cxgb4vf_get_drvinfo, 1589 .get_msglevel = cxgb4vf_get_msglevel, 1590 .set_msglevel = cxgb4vf_set_msglevel, 1591 .get_ringparam = cxgb4vf_get_ringparam, 1592 .set_ringparam = cxgb4vf_set_ringparam, 1593 .get_coalesce = cxgb4vf_get_coalesce, 1594 .set_coalesce = cxgb4vf_set_coalesce, 1595 .get_pauseparam = cxgb4vf_get_pauseparam, 1596 .get_link = ethtool_op_get_link, 1597 .get_strings = cxgb4vf_get_strings, 1598 .set_phys_id = cxgb4vf_phys_id, 1599 .get_sset_count = cxgb4vf_get_sset_count, 1600 .get_ethtool_stats = cxgb4vf_get_ethtool_stats, 1601 .get_regs_len = cxgb4vf_get_regs_len, 1602 .get_regs = cxgb4vf_get_regs, 1603 .get_wol = cxgb4vf_get_wol, 1604 }; 1605 1606 /* 1607 * /sys/kernel/debug/cxgb4vf support code and data. 1608 * ================================================ 1609 */ 1610 1611 /* 1612 * Show SGE Queue Set information. We display QPL Queues Sets per line. 1613 */ 1614 #define QPL 4 1615 1616 static int sge_qinfo_show(struct seq_file *seq, void *v) 1617 { 1618 struct adapter *adapter = seq->private; 1619 int eth_entries = DIV_ROUND_UP(adapter->sge.ethqsets, QPL); 1620 int qs, r = (uintptr_t)v - 1; 1621 1622 if (r) 1623 seq_putc(seq, '\n'); 1624 1625 #define S3(fmt_spec, s, v) \ 1626 do {\ 1627 seq_printf(seq, "%-12s", s); \ 1628 for (qs = 0; qs < n; ++qs) \ 1629 seq_printf(seq, " %16" fmt_spec, v); \ 1630 seq_putc(seq, '\n'); \ 1631 } while (0) 1632 #define S(s, v) S3("s", s, v) 1633 #define T(s, v) S3("u", s, txq[qs].v) 1634 #define R(s, v) S3("u", s, rxq[qs].v) 1635 1636 if (r < eth_entries) { 1637 const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[r * QPL]; 1638 const struct sge_eth_txq *txq = &adapter->sge.ethtxq[r * QPL]; 1639 int n = min(QPL, adapter->sge.ethqsets - QPL * r); 1640 1641 S("QType:", "Ethernet"); 1642 S("Interface:", 1643 (rxq[qs].rspq.netdev 1644 ? rxq[qs].rspq.netdev->name 1645 : "N/A")); 1646 S3("d", "Port:", 1647 (rxq[qs].rspq.netdev 1648 ? ((struct port_info *) 1649 netdev_priv(rxq[qs].rspq.netdev))->port_id 1650 : -1)); 1651 T("TxQ ID:", q.abs_id); 1652 T("TxQ size:", q.size); 1653 T("TxQ inuse:", q.in_use); 1654 T("TxQ PIdx:", q.pidx); 1655 T("TxQ CIdx:", q.cidx); 1656 R("RspQ ID:", rspq.abs_id); 1657 R("RspQ size:", rspq.size); 1658 R("RspQE size:", rspq.iqe_len); 1659 S3("u", "Intr delay:", qtimer_val(adapter, &rxq[qs].rspq)); 1660 S3("u", "Intr pktcnt:", 1661 adapter->sge.counter_val[rxq[qs].rspq.pktcnt_idx]); 1662 R("RspQ CIdx:", rspq.cidx); 1663 R("RspQ Gen:", rspq.gen); 1664 R("FL ID:", fl.abs_id); 1665 R("FL size:", fl.size - MIN_FL_RESID); 1666 R("FL avail:", fl.avail); 1667 R("FL PIdx:", fl.pidx); 1668 R("FL CIdx:", fl.cidx); 1669 return 0; 1670 } 1671 1672 r -= eth_entries; 1673 if (r == 0) { 1674 const struct sge_rspq *evtq = &adapter->sge.fw_evtq; 1675 1676 seq_printf(seq, "%-12s %16s\n", "QType:", "FW event queue"); 1677 seq_printf(seq, "%-12s %16u\n", "RspQ ID:", evtq->abs_id); 1678 seq_printf(seq, "%-12s %16u\n", "Intr delay:", 1679 qtimer_val(adapter, evtq)); 1680 seq_printf(seq, "%-12s %16u\n", "Intr pktcnt:", 1681 adapter->sge.counter_val[evtq->pktcnt_idx]); 1682 seq_printf(seq, "%-12s %16u\n", "RspQ Cidx:", evtq->cidx); 1683 seq_printf(seq, "%-12s %16u\n", "RspQ Gen:", evtq->gen); 1684 } else if (r == 1) { 1685 const struct sge_rspq *intrq = &adapter->sge.intrq; 1686 1687 seq_printf(seq, "%-12s %16s\n", "QType:", "Interrupt Queue"); 1688 seq_printf(seq, "%-12s %16u\n", "RspQ ID:", intrq->abs_id); 1689 seq_printf(seq, "%-12s %16u\n", "Intr delay:", 1690 qtimer_val(adapter, intrq)); 1691 seq_printf(seq, "%-12s %16u\n", "Intr pktcnt:", 1692 adapter->sge.counter_val[intrq->pktcnt_idx]); 1693 seq_printf(seq, "%-12s %16u\n", "RspQ Cidx:", intrq->cidx); 1694 seq_printf(seq, "%-12s %16u\n", "RspQ Gen:", intrq->gen); 1695 } 1696 1697 #undef R 1698 #undef T 1699 #undef S 1700 #undef S3 1701 1702 return 0; 1703 } 1704 1705 /* 1706 * Return the number of "entries" in our "file". We group the multi-Queue 1707 * sections with QPL Queue Sets per "entry". The sections of the output are: 1708 * 1709 * Ethernet RX/TX Queue Sets 1710 * Firmware Event Queue 1711 * Forwarded Interrupt Queue (if in MSI mode) 1712 */ 1713 static int sge_queue_entries(const struct adapter *adapter) 1714 { 1715 return DIV_ROUND_UP(adapter->sge.ethqsets, QPL) + 1 + 1716 ((adapter->flags & USING_MSI) != 0); 1717 } 1718 1719 static void *sge_queue_start(struct seq_file *seq, loff_t *pos) 1720 { 1721 int entries = sge_queue_entries(seq->private); 1722 1723 return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL; 1724 } 1725 1726 static void sge_queue_stop(struct seq_file *seq, void *v) 1727 { 1728 } 1729 1730 static void *sge_queue_next(struct seq_file *seq, void *v, loff_t *pos) 1731 { 1732 int entries = sge_queue_entries(seq->private); 1733 1734 ++*pos; 1735 return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL; 1736 } 1737 1738 static const struct seq_operations sge_qinfo_seq_ops = { 1739 .start = sge_queue_start, 1740 .next = sge_queue_next, 1741 .stop = sge_queue_stop, 1742 .show = sge_qinfo_show 1743 }; 1744 1745 static int sge_qinfo_open(struct inode *inode, struct file *file) 1746 { 1747 int res = seq_open(file, &sge_qinfo_seq_ops); 1748 1749 if (!res) { 1750 struct seq_file *seq = file->private_data; 1751 seq->private = inode->i_private; 1752 } 1753 return res; 1754 } 1755 1756 static const struct file_operations sge_qinfo_debugfs_fops = { 1757 .owner = THIS_MODULE, 1758 .open = sge_qinfo_open, 1759 .read = seq_read, 1760 .llseek = seq_lseek, 1761 .release = seq_release, 1762 }; 1763 1764 /* 1765 * Show SGE Queue Set statistics. We display QPL Queues Sets per line. 1766 */ 1767 #define QPL 4 1768 1769 static int sge_qstats_show(struct seq_file *seq, void *v) 1770 { 1771 struct adapter *adapter = seq->private; 1772 int eth_entries = DIV_ROUND_UP(adapter->sge.ethqsets, QPL); 1773 int qs, r = (uintptr_t)v - 1; 1774 1775 if (r) 1776 seq_putc(seq, '\n'); 1777 1778 #define S3(fmt, s, v) \ 1779 do { \ 1780 seq_printf(seq, "%-16s", s); \ 1781 for (qs = 0; qs < n; ++qs) \ 1782 seq_printf(seq, " %8" fmt, v); \ 1783 seq_putc(seq, '\n'); \ 1784 } while (0) 1785 #define S(s, v) S3("s", s, v) 1786 1787 #define T3(fmt, s, v) S3(fmt, s, txq[qs].v) 1788 #define T(s, v) T3("lu", s, v) 1789 1790 #define R3(fmt, s, v) S3(fmt, s, rxq[qs].v) 1791 #define R(s, v) R3("lu", s, v) 1792 1793 if (r < eth_entries) { 1794 const struct sge_eth_rxq *rxq = &adapter->sge.ethrxq[r * QPL]; 1795 const struct sge_eth_txq *txq = &adapter->sge.ethtxq[r * QPL]; 1796 int n = min(QPL, adapter->sge.ethqsets - QPL * r); 1797 1798 S("QType:", "Ethernet"); 1799 S("Interface:", 1800 (rxq[qs].rspq.netdev 1801 ? rxq[qs].rspq.netdev->name 1802 : "N/A")); 1803 R3("u", "RspQNullInts:", rspq.unhandled_irqs); 1804 R("RxPackets:", stats.pkts); 1805 R("RxCSO:", stats.rx_cso); 1806 R("VLANxtract:", stats.vlan_ex); 1807 R("LROmerged:", stats.lro_merged); 1808 R("LROpackets:", stats.lro_pkts); 1809 R("RxDrops:", stats.rx_drops); 1810 T("TSO:", tso); 1811 T("TxCSO:", tx_cso); 1812 T("VLANins:", vlan_ins); 1813 T("TxQFull:", q.stops); 1814 T("TxQRestarts:", q.restarts); 1815 T("TxMapErr:", mapping_err); 1816 R("FLAllocErr:", fl.alloc_failed); 1817 R("FLLrgAlcErr:", fl.large_alloc_failed); 1818 R("FLStarving:", fl.starving); 1819 return 0; 1820 } 1821 1822 r -= eth_entries; 1823 if (r == 0) { 1824 const struct sge_rspq *evtq = &adapter->sge.fw_evtq; 1825 1826 seq_printf(seq, "%-8s %16s\n", "QType:", "FW event queue"); 1827 seq_printf(seq, "%-16s %8u\n", "RspQNullInts:", 1828 evtq->unhandled_irqs); 1829 seq_printf(seq, "%-16s %8u\n", "RspQ CIdx:", evtq->cidx); 1830 seq_printf(seq, "%-16s %8u\n", "RspQ Gen:", evtq->gen); 1831 } else if (r == 1) { 1832 const struct sge_rspq *intrq = &adapter->sge.intrq; 1833 1834 seq_printf(seq, "%-8s %16s\n", "QType:", "Interrupt Queue"); 1835 seq_printf(seq, "%-16s %8u\n", "RspQNullInts:", 1836 intrq->unhandled_irqs); 1837 seq_printf(seq, "%-16s %8u\n", "RspQ CIdx:", intrq->cidx); 1838 seq_printf(seq, "%-16s %8u\n", "RspQ Gen:", intrq->gen); 1839 } 1840 1841 #undef R 1842 #undef T 1843 #undef S 1844 #undef R3 1845 #undef T3 1846 #undef S3 1847 1848 return 0; 1849 } 1850 1851 /* 1852 * Return the number of "entries" in our "file". We group the multi-Queue 1853 * sections with QPL Queue Sets per "entry". The sections of the output are: 1854 * 1855 * Ethernet RX/TX Queue Sets 1856 * Firmware Event Queue 1857 * Forwarded Interrupt Queue (if in MSI mode) 1858 */ 1859 static int sge_qstats_entries(const struct adapter *adapter) 1860 { 1861 return DIV_ROUND_UP(adapter->sge.ethqsets, QPL) + 1 + 1862 ((adapter->flags & USING_MSI) != 0); 1863 } 1864 1865 static void *sge_qstats_start(struct seq_file *seq, loff_t *pos) 1866 { 1867 int entries = sge_qstats_entries(seq->private); 1868 1869 return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL; 1870 } 1871 1872 static void sge_qstats_stop(struct seq_file *seq, void *v) 1873 { 1874 } 1875 1876 static void *sge_qstats_next(struct seq_file *seq, void *v, loff_t *pos) 1877 { 1878 int entries = sge_qstats_entries(seq->private); 1879 1880 (*pos)++; 1881 return *pos < entries ? (void *)((uintptr_t)*pos + 1) : NULL; 1882 } 1883 1884 static const struct seq_operations sge_qstats_seq_ops = { 1885 .start = sge_qstats_start, 1886 .next = sge_qstats_next, 1887 .stop = sge_qstats_stop, 1888 .show = sge_qstats_show 1889 }; 1890 1891 static int sge_qstats_open(struct inode *inode, struct file *file) 1892 { 1893 int res = seq_open(file, &sge_qstats_seq_ops); 1894 1895 if (res == 0) { 1896 struct seq_file *seq = file->private_data; 1897 seq->private = inode->i_private; 1898 } 1899 return res; 1900 } 1901 1902 static const struct file_operations sge_qstats_proc_fops = { 1903 .owner = THIS_MODULE, 1904 .open = sge_qstats_open, 1905 .read = seq_read, 1906 .llseek = seq_lseek, 1907 .release = seq_release, 1908 }; 1909 1910 /* 1911 * Show PCI-E SR-IOV Virtual Function Resource Limits. 1912 */ 1913 static int resources_show(struct seq_file *seq, void *v) 1914 { 1915 struct adapter *adapter = seq->private; 1916 struct vf_resources *vfres = &adapter->params.vfres; 1917 1918 #define S(desc, fmt, var) \ 1919 seq_printf(seq, "%-60s " fmt "\n", \ 1920 desc " (" #var "):", vfres->var) 1921 1922 S("Virtual Interfaces", "%d", nvi); 1923 S("Egress Queues", "%d", neq); 1924 S("Ethernet Control", "%d", nethctrl); 1925 S("Ingress Queues/w Free Lists/Interrupts", "%d", niqflint); 1926 S("Ingress Queues", "%d", niq); 1927 S("Traffic Class", "%d", tc); 1928 S("Port Access Rights Mask", "%#x", pmask); 1929 S("MAC Address Filters", "%d", nexactf); 1930 S("Firmware Command Read Capabilities", "%#x", r_caps); 1931 S("Firmware Command Write/Execute Capabilities", "%#x", wx_caps); 1932 1933 #undef S 1934 1935 return 0; 1936 } 1937 1938 static int resources_open(struct inode *inode, struct file *file) 1939 { 1940 return single_open(file, resources_show, inode->i_private); 1941 } 1942 1943 static const struct file_operations resources_proc_fops = { 1944 .owner = THIS_MODULE, 1945 .open = resources_open, 1946 .read = seq_read, 1947 .llseek = seq_lseek, 1948 .release = single_release, 1949 }; 1950 1951 /* 1952 * Show Virtual Interfaces. 1953 */ 1954 static int interfaces_show(struct seq_file *seq, void *v) 1955 { 1956 if (v == SEQ_START_TOKEN) { 1957 seq_puts(seq, "Interface Port VIID\n"); 1958 } else { 1959 struct adapter *adapter = seq->private; 1960 int pidx = (uintptr_t)v - 2; 1961 struct net_device *dev = adapter->port[pidx]; 1962 struct port_info *pi = netdev_priv(dev); 1963 1964 seq_printf(seq, "%9s %4d %#5x\n", 1965 dev->name, pi->port_id, pi->viid); 1966 } 1967 return 0; 1968 } 1969 1970 static inline void *interfaces_get_idx(struct adapter *adapter, loff_t pos) 1971 { 1972 return pos <= adapter->params.nports 1973 ? (void *)(uintptr_t)(pos + 1) 1974 : NULL; 1975 } 1976 1977 static void *interfaces_start(struct seq_file *seq, loff_t *pos) 1978 { 1979 return *pos 1980 ? interfaces_get_idx(seq->private, *pos) 1981 : SEQ_START_TOKEN; 1982 } 1983 1984 static void *interfaces_next(struct seq_file *seq, void *v, loff_t *pos) 1985 { 1986 (*pos)++; 1987 return interfaces_get_idx(seq->private, *pos); 1988 } 1989 1990 static void interfaces_stop(struct seq_file *seq, void *v) 1991 { 1992 } 1993 1994 static const struct seq_operations interfaces_seq_ops = { 1995 .start = interfaces_start, 1996 .next = interfaces_next, 1997 .stop = interfaces_stop, 1998 .show = interfaces_show 1999 }; 2000 2001 static int interfaces_open(struct inode *inode, struct file *file) 2002 { 2003 int res = seq_open(file, &interfaces_seq_ops); 2004 2005 if (res == 0) { 2006 struct seq_file *seq = file->private_data; 2007 seq->private = inode->i_private; 2008 } 2009 return res; 2010 } 2011 2012 static const struct file_operations interfaces_proc_fops = { 2013 .owner = THIS_MODULE, 2014 .open = interfaces_open, 2015 .read = seq_read, 2016 .llseek = seq_lseek, 2017 .release = seq_release, 2018 }; 2019 2020 /* 2021 * /sys/kernel/debugfs/cxgb4vf/ files list. 2022 */ 2023 struct cxgb4vf_debugfs_entry { 2024 const char *name; /* name of debugfs node */ 2025 umode_t mode; /* file system mode */ 2026 const struct file_operations *fops; 2027 }; 2028 2029 static struct cxgb4vf_debugfs_entry debugfs_files[] = { 2030 { "sge_qinfo", S_IRUGO, &sge_qinfo_debugfs_fops }, 2031 { "sge_qstats", S_IRUGO, &sge_qstats_proc_fops }, 2032 { "resources", S_IRUGO, &resources_proc_fops }, 2033 { "interfaces", S_IRUGO, &interfaces_proc_fops }, 2034 }; 2035 2036 /* 2037 * Module and device initialization and cleanup code. 2038 * ================================================== 2039 */ 2040 2041 /* 2042 * Set up out /sys/kernel/debug/cxgb4vf sub-nodes. We assume that the 2043 * directory (debugfs_root) has already been set up. 2044 */ 2045 static int setup_debugfs(struct adapter *adapter) 2046 { 2047 int i; 2048 2049 BUG_ON(IS_ERR_OR_NULL(adapter->debugfs_root)); 2050 2051 /* 2052 * Debugfs support is best effort. 2053 */ 2054 for (i = 0; i < ARRAY_SIZE(debugfs_files); i++) 2055 (void)debugfs_create_file(debugfs_files[i].name, 2056 debugfs_files[i].mode, 2057 adapter->debugfs_root, 2058 (void *)adapter, 2059 debugfs_files[i].fops); 2060 2061 return 0; 2062 } 2063 2064 /* 2065 * Tear down the /sys/kernel/debug/cxgb4vf sub-nodes created above. We leave 2066 * it to our caller to tear down the directory (debugfs_root). 2067 */ 2068 static void cleanup_debugfs(struct adapter *adapter) 2069 { 2070 BUG_ON(IS_ERR_OR_NULL(adapter->debugfs_root)); 2071 2072 /* 2073 * Unlike our sister routine cleanup_proc(), we don't need to remove 2074 * individual entries because a call will be made to 2075 * debugfs_remove_recursive(). We just need to clean up any ancillary 2076 * persistent state. 2077 */ 2078 /* nothing to do */ 2079 } 2080 2081 /* 2082 * Perform early "adapter" initialization. This is where we discover what 2083 * adapter parameters we're going to be using and initialize basic adapter 2084 * hardware support. 2085 */ 2086 static int adap_init0(struct adapter *adapter) 2087 { 2088 struct vf_resources *vfres = &adapter->params.vfres; 2089 struct sge_params *sge_params = &adapter->params.sge; 2090 struct sge *s = &adapter->sge; 2091 unsigned int ethqsets; 2092 int err; 2093 u32 param, val = 0; 2094 unsigned int chipid; 2095 2096 /* 2097 * Wait for the device to become ready before proceeding ... 2098 */ 2099 err = t4vf_wait_dev_ready(adapter); 2100 if (err) { 2101 dev_err(adapter->pdev_dev, "device didn't become ready:" 2102 " err=%d\n", err); 2103 return err; 2104 } 2105 2106 /* 2107 * Some environments do not properly handle PCIE FLRs -- e.g. in Linux 2108 * 2.6.31 and later we can't call pci_reset_function() in order to 2109 * issue an FLR because of a self- deadlock on the device semaphore. 2110 * Meanwhile, the OS infrastructure doesn't issue FLRs in all the 2111 * cases where they're needed -- for instance, some versions of KVM 2112 * fail to reset "Assigned Devices" when the VM reboots. Therefore we 2113 * use the firmware based reset in order to reset any per function 2114 * state. 2115 */ 2116 err = t4vf_fw_reset(adapter); 2117 if (err < 0) { 2118 dev_err(adapter->pdev_dev, "FW reset failed: err=%d\n", err); 2119 return err; 2120 } 2121 2122 adapter->params.chip = 0; 2123 switch (adapter->pdev->device >> 12) { 2124 case CHELSIO_T4: 2125 adapter->params.chip = CHELSIO_CHIP_CODE(CHELSIO_T4, 0); 2126 break; 2127 case CHELSIO_T5: 2128 chipid = G_REV(t4_read_reg(adapter, A_PL_VF_REV)); 2129 adapter->params.chip |= CHELSIO_CHIP_CODE(CHELSIO_T5, chipid); 2130 break; 2131 } 2132 2133 /* 2134 * Grab basic operational parameters. These will predominantly have 2135 * been set up by the Physical Function Driver or will be hard coded 2136 * into the adapter. We just have to live with them ... Note that 2137 * we _must_ get our VPD parameters before our SGE parameters because 2138 * we need to know the adapter's core clock from the VPD in order to 2139 * properly decode the SGE Timer Values. 2140 */ 2141 err = t4vf_get_dev_params(adapter); 2142 if (err) { 2143 dev_err(adapter->pdev_dev, "unable to retrieve adapter" 2144 " device parameters: err=%d\n", err); 2145 return err; 2146 } 2147 err = t4vf_get_vpd_params(adapter); 2148 if (err) { 2149 dev_err(adapter->pdev_dev, "unable to retrieve adapter" 2150 " VPD parameters: err=%d\n", err); 2151 return err; 2152 } 2153 err = t4vf_get_sge_params(adapter); 2154 if (err) { 2155 dev_err(adapter->pdev_dev, "unable to retrieve adapter" 2156 " SGE parameters: err=%d\n", err); 2157 return err; 2158 } 2159 err = t4vf_get_rss_glb_config(adapter); 2160 if (err) { 2161 dev_err(adapter->pdev_dev, "unable to retrieve adapter" 2162 " RSS parameters: err=%d\n", err); 2163 return err; 2164 } 2165 if (adapter->params.rss.mode != 2166 FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL) { 2167 dev_err(adapter->pdev_dev, "unable to operate with global RSS" 2168 " mode %d\n", adapter->params.rss.mode); 2169 return -EINVAL; 2170 } 2171 err = t4vf_sge_init(adapter); 2172 if (err) { 2173 dev_err(adapter->pdev_dev, "unable to use adapter parameters:" 2174 " err=%d\n", err); 2175 return err; 2176 } 2177 2178 /* If we're running on newer firmware, let it know that we're 2179 * prepared to deal with encapsulated CPL messages. Older 2180 * firmware won't understand this and we'll just get 2181 * unencapsulated messages ... 2182 */ 2183 param = FW_PARAMS_MNEM(FW_PARAMS_MNEM_PFVF) | 2184 FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_PFVF_CPLFW4MSG_ENCAP); 2185 val = 1; 2186 (void) t4vf_set_params(adapter, 1, ¶m, &val); 2187 2188 /* 2189 * Retrieve our RX interrupt holdoff timer values and counter 2190 * threshold values from the SGE parameters. 2191 */ 2192 s->timer_val[0] = core_ticks_to_us(adapter, 2193 TIMERVALUE0_GET(sge_params->sge_timer_value_0_and_1)); 2194 s->timer_val[1] = core_ticks_to_us(adapter, 2195 TIMERVALUE1_GET(sge_params->sge_timer_value_0_and_1)); 2196 s->timer_val[2] = core_ticks_to_us(adapter, 2197 TIMERVALUE0_GET(sge_params->sge_timer_value_2_and_3)); 2198 s->timer_val[3] = core_ticks_to_us(adapter, 2199 TIMERVALUE1_GET(sge_params->sge_timer_value_2_and_3)); 2200 s->timer_val[4] = core_ticks_to_us(adapter, 2201 TIMERVALUE0_GET(sge_params->sge_timer_value_4_and_5)); 2202 s->timer_val[5] = core_ticks_to_us(adapter, 2203 TIMERVALUE1_GET(sge_params->sge_timer_value_4_and_5)); 2204 2205 s->counter_val[0] = 2206 THRESHOLD_0_GET(sge_params->sge_ingress_rx_threshold); 2207 s->counter_val[1] = 2208 THRESHOLD_1_GET(sge_params->sge_ingress_rx_threshold); 2209 s->counter_val[2] = 2210 THRESHOLD_2_GET(sge_params->sge_ingress_rx_threshold); 2211 s->counter_val[3] = 2212 THRESHOLD_3_GET(sge_params->sge_ingress_rx_threshold); 2213 2214 /* 2215 * Grab our Virtual Interface resource allocation, extract the 2216 * features that we're interested in and do a bit of sanity testing on 2217 * what we discover. 2218 */ 2219 err = t4vf_get_vfres(adapter); 2220 if (err) { 2221 dev_err(adapter->pdev_dev, "unable to get virtual interface" 2222 " resources: err=%d\n", err); 2223 return err; 2224 } 2225 2226 /* 2227 * The number of "ports" which we support is equal to the number of 2228 * Virtual Interfaces with which we've been provisioned. 2229 */ 2230 adapter->params.nports = vfres->nvi; 2231 if (adapter->params.nports > MAX_NPORTS) { 2232 dev_warn(adapter->pdev_dev, "only using %d of %d allowed" 2233 " virtual interfaces\n", MAX_NPORTS, 2234 adapter->params.nports); 2235 adapter->params.nports = MAX_NPORTS; 2236 } 2237 2238 /* 2239 * We need to reserve a number of the ingress queues with Free List 2240 * and Interrupt capabilities for special interrupt purposes (like 2241 * asynchronous firmware messages, or forwarded interrupts if we're 2242 * using MSI). The rest of the FL/Intr-capable ingress queues will be 2243 * matched up one-for-one with Ethernet/Control egress queues in order 2244 * to form "Queue Sets" which will be aportioned between the "ports". 2245 * For each Queue Set, we'll need the ability to allocate two Egress 2246 * Contexts -- one for the Ingress Queue Free List and one for the TX 2247 * Ethernet Queue. 2248 */ 2249 ethqsets = vfres->niqflint - INGQ_EXTRAS; 2250 if (vfres->nethctrl != ethqsets) { 2251 dev_warn(adapter->pdev_dev, "unequal number of [available]" 2252 " ingress/egress queues (%d/%d); using minimum for" 2253 " number of Queue Sets\n", ethqsets, vfres->nethctrl); 2254 ethqsets = min(vfres->nethctrl, ethqsets); 2255 } 2256 if (vfres->neq < ethqsets*2) { 2257 dev_warn(adapter->pdev_dev, "Not enough Egress Contexts (%d)" 2258 " to support Queue Sets (%d); reducing allowed Queue" 2259 " Sets\n", vfres->neq, ethqsets); 2260 ethqsets = vfres->neq/2; 2261 } 2262 if (ethqsets > MAX_ETH_QSETS) { 2263 dev_warn(adapter->pdev_dev, "only using %d of %d allowed Queue" 2264 " Sets\n", MAX_ETH_QSETS, adapter->sge.max_ethqsets); 2265 ethqsets = MAX_ETH_QSETS; 2266 } 2267 if (vfres->niq != 0 || vfres->neq > ethqsets*2) { 2268 dev_warn(adapter->pdev_dev, "unused resources niq/neq (%d/%d)" 2269 " ignored\n", vfres->niq, vfres->neq - ethqsets*2); 2270 } 2271 adapter->sge.max_ethqsets = ethqsets; 2272 2273 /* 2274 * Check for various parameter sanity issues. Most checks simply 2275 * result in us using fewer resources than our provissioning but we 2276 * do need at least one "port" with which to work ... 2277 */ 2278 if (adapter->sge.max_ethqsets < adapter->params.nports) { 2279 dev_warn(adapter->pdev_dev, "only using %d of %d available" 2280 " virtual interfaces (too few Queue Sets)\n", 2281 adapter->sge.max_ethqsets, adapter->params.nports); 2282 adapter->params.nports = adapter->sge.max_ethqsets; 2283 } 2284 if (adapter->params.nports == 0) { 2285 dev_err(adapter->pdev_dev, "no virtual interfaces configured/" 2286 "usable!\n"); 2287 return -EINVAL; 2288 } 2289 return 0; 2290 } 2291 2292 static inline void init_rspq(struct sge_rspq *rspq, u8 timer_idx, 2293 u8 pkt_cnt_idx, unsigned int size, 2294 unsigned int iqe_size) 2295 { 2296 rspq->intr_params = (QINTR_TIMER_IDX(timer_idx) | 2297 (pkt_cnt_idx < SGE_NCOUNTERS ? QINTR_CNT_EN : 0)); 2298 rspq->pktcnt_idx = (pkt_cnt_idx < SGE_NCOUNTERS 2299 ? pkt_cnt_idx 2300 : 0); 2301 rspq->iqe_len = iqe_size; 2302 rspq->size = size; 2303 } 2304 2305 /* 2306 * Perform default configuration of DMA queues depending on the number and 2307 * type of ports we found and the number of available CPUs. Most settings can 2308 * be modified by the admin via ethtool and cxgbtool prior to the adapter 2309 * being brought up for the first time. 2310 */ 2311 static void cfg_queues(struct adapter *adapter) 2312 { 2313 struct sge *s = &adapter->sge; 2314 int q10g, n10g, qidx, pidx, qs; 2315 size_t iqe_size; 2316 2317 /* 2318 * We should not be called till we know how many Queue Sets we can 2319 * support. In particular, this means that we need to know what kind 2320 * of interrupts we'll be using ... 2321 */ 2322 BUG_ON((adapter->flags & (USING_MSIX|USING_MSI)) == 0); 2323 2324 /* 2325 * Count the number of 10GbE Virtual Interfaces that we have. 2326 */ 2327 n10g = 0; 2328 for_each_port(adapter, pidx) 2329 n10g += is_10g_port(&adap2pinfo(adapter, pidx)->link_cfg); 2330 2331 /* 2332 * We default to 1 queue per non-10G port and up to # of cores queues 2333 * per 10G port. 2334 */ 2335 if (n10g == 0) 2336 q10g = 0; 2337 else { 2338 int n1g = (adapter->params.nports - n10g); 2339 q10g = (adapter->sge.max_ethqsets - n1g) / n10g; 2340 if (q10g > num_online_cpus()) 2341 q10g = num_online_cpus(); 2342 } 2343 2344 /* 2345 * Allocate the "Queue Sets" to the various Virtual Interfaces. 2346 * The layout will be established in setup_sge_queues() when the 2347 * adapter is brough up for the first time. 2348 */ 2349 qidx = 0; 2350 for_each_port(adapter, pidx) { 2351 struct port_info *pi = adap2pinfo(adapter, pidx); 2352 2353 pi->first_qset = qidx; 2354 pi->nqsets = is_10g_port(&pi->link_cfg) ? q10g : 1; 2355 qidx += pi->nqsets; 2356 } 2357 s->ethqsets = qidx; 2358 2359 /* 2360 * The Ingress Queue Entry Size for our various Response Queues needs 2361 * to be big enough to accommodate the largest message we can receive 2362 * from the chip/firmware; which is 64 bytes ... 2363 */ 2364 iqe_size = 64; 2365 2366 /* 2367 * Set up default Queue Set parameters ... Start off with the 2368 * shortest interrupt holdoff timer. 2369 */ 2370 for (qs = 0; qs < s->max_ethqsets; qs++) { 2371 struct sge_eth_rxq *rxq = &s->ethrxq[qs]; 2372 struct sge_eth_txq *txq = &s->ethtxq[qs]; 2373 2374 init_rspq(&rxq->rspq, 0, 0, 1024, iqe_size); 2375 rxq->fl.size = 72; 2376 txq->q.size = 1024; 2377 } 2378 2379 /* 2380 * The firmware event queue is used for link state changes and 2381 * notifications of TX DMA completions. 2382 */ 2383 init_rspq(&s->fw_evtq, SGE_TIMER_RSTRT_CNTR, 0, 512, iqe_size); 2384 2385 /* 2386 * The forwarded interrupt queue is used when we're in MSI interrupt 2387 * mode. In this mode all interrupts associated with RX queues will 2388 * be forwarded to a single queue which we'll associate with our MSI 2389 * interrupt vector. The messages dropped in the forwarded interrupt 2390 * queue will indicate which ingress queue needs servicing ... This 2391 * queue needs to be large enough to accommodate all of the ingress 2392 * queues which are forwarding their interrupt (+1 to prevent the PIDX 2393 * from equalling the CIDX if every ingress queue has an outstanding 2394 * interrupt). The queue doesn't need to be any larger because no 2395 * ingress queue will ever have more than one outstanding interrupt at 2396 * any time ... 2397 */ 2398 init_rspq(&s->intrq, SGE_TIMER_RSTRT_CNTR, 0, MSIX_ENTRIES + 1, 2399 iqe_size); 2400 } 2401 2402 /* 2403 * Reduce the number of Ethernet queues across all ports to at most n. 2404 * n provides at least one queue per port. 2405 */ 2406 static void reduce_ethqs(struct adapter *adapter, int n) 2407 { 2408 int i; 2409 struct port_info *pi; 2410 2411 /* 2412 * While we have too many active Ether Queue Sets, interate across the 2413 * "ports" and reduce their individual Queue Set allocations. 2414 */ 2415 BUG_ON(n < adapter->params.nports); 2416 while (n < adapter->sge.ethqsets) 2417 for_each_port(adapter, i) { 2418 pi = adap2pinfo(adapter, i); 2419 if (pi->nqsets > 1) { 2420 pi->nqsets--; 2421 adapter->sge.ethqsets--; 2422 if (adapter->sge.ethqsets <= n) 2423 break; 2424 } 2425 } 2426 2427 /* 2428 * Reassign the starting Queue Sets for each of the "ports" ... 2429 */ 2430 n = 0; 2431 for_each_port(adapter, i) { 2432 pi = adap2pinfo(adapter, i); 2433 pi->first_qset = n; 2434 n += pi->nqsets; 2435 } 2436 } 2437 2438 /* 2439 * We need to grab enough MSI-X vectors to cover our interrupt needs. Ideally 2440 * we get a separate MSI-X vector for every "Queue Set" plus any extras we 2441 * need. Minimally we need one for every Virtual Interface plus those needed 2442 * for our "extras". Note that this process may lower the maximum number of 2443 * allowed Queue Sets ... 2444 */ 2445 static int enable_msix(struct adapter *adapter) 2446 { 2447 int i, want, need, nqsets; 2448 struct msix_entry entries[MSIX_ENTRIES]; 2449 struct sge *s = &adapter->sge; 2450 2451 for (i = 0; i < MSIX_ENTRIES; ++i) 2452 entries[i].entry = i; 2453 2454 /* 2455 * We _want_ enough MSI-X interrupts to cover all of our "Queue Sets" 2456 * plus those needed for our "extras" (for example, the firmware 2457 * message queue). We _need_ at least one "Queue Set" per Virtual 2458 * Interface plus those needed for our "extras". So now we get to see 2459 * if the song is right ... 2460 */ 2461 want = s->max_ethqsets + MSIX_EXTRAS; 2462 need = adapter->params.nports + MSIX_EXTRAS; 2463 2464 want = pci_enable_msix_range(adapter->pdev, entries, need, want); 2465 if (want < 0) 2466 return want; 2467 2468 nqsets = want - MSIX_EXTRAS; 2469 if (nqsets < s->max_ethqsets) { 2470 dev_warn(adapter->pdev_dev, "only enough MSI-X vectors" 2471 " for %d Queue Sets\n", nqsets); 2472 s->max_ethqsets = nqsets; 2473 if (nqsets < s->ethqsets) 2474 reduce_ethqs(adapter, nqsets); 2475 } 2476 for (i = 0; i < want; ++i) 2477 adapter->msix_info[i].vec = entries[i].vector; 2478 2479 return 0; 2480 } 2481 2482 static const struct net_device_ops cxgb4vf_netdev_ops = { 2483 .ndo_open = cxgb4vf_open, 2484 .ndo_stop = cxgb4vf_stop, 2485 .ndo_start_xmit = t4vf_eth_xmit, 2486 .ndo_get_stats = cxgb4vf_get_stats, 2487 .ndo_set_rx_mode = cxgb4vf_set_rxmode, 2488 .ndo_set_mac_address = cxgb4vf_set_mac_addr, 2489 .ndo_validate_addr = eth_validate_addr, 2490 .ndo_do_ioctl = cxgb4vf_do_ioctl, 2491 .ndo_change_mtu = cxgb4vf_change_mtu, 2492 .ndo_fix_features = cxgb4vf_fix_features, 2493 .ndo_set_features = cxgb4vf_set_features, 2494 #ifdef CONFIG_NET_POLL_CONTROLLER 2495 .ndo_poll_controller = cxgb4vf_poll_controller, 2496 #endif 2497 }; 2498 2499 /* 2500 * "Probe" a device: initialize a device and construct all kernel and driver 2501 * state needed to manage the device. This routine is called "init_one" in 2502 * the PF Driver ... 2503 */ 2504 static int cxgb4vf_pci_probe(struct pci_dev *pdev, 2505 const struct pci_device_id *ent) 2506 { 2507 int pci_using_dac; 2508 int err, pidx; 2509 unsigned int pmask; 2510 struct adapter *adapter; 2511 struct port_info *pi; 2512 struct net_device *netdev; 2513 2514 /* 2515 * Print our driver banner the first time we're called to initialize a 2516 * device. 2517 */ 2518 pr_info_once("%s - version %s\n", DRV_DESC, DRV_VERSION); 2519 2520 /* 2521 * Initialize generic PCI device state. 2522 */ 2523 err = pci_enable_device(pdev); 2524 if (err) { 2525 dev_err(&pdev->dev, "cannot enable PCI device\n"); 2526 return err; 2527 } 2528 2529 /* 2530 * Reserve PCI resources for the device. If we can't get them some 2531 * other driver may have already claimed the device ... 2532 */ 2533 err = pci_request_regions(pdev, KBUILD_MODNAME); 2534 if (err) { 2535 dev_err(&pdev->dev, "cannot obtain PCI resources\n"); 2536 goto err_disable_device; 2537 } 2538 2539 /* 2540 * Set up our DMA mask: try for 64-bit address masking first and 2541 * fall back to 32-bit if we can't get 64 bits ... 2542 */ 2543 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); 2544 if (err == 0) { 2545 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); 2546 if (err) { 2547 dev_err(&pdev->dev, "unable to obtain 64-bit DMA for" 2548 " coherent allocations\n"); 2549 goto err_release_regions; 2550 } 2551 pci_using_dac = 1; 2552 } else { 2553 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); 2554 if (err != 0) { 2555 dev_err(&pdev->dev, "no usable DMA configuration\n"); 2556 goto err_release_regions; 2557 } 2558 pci_using_dac = 0; 2559 } 2560 2561 /* 2562 * Enable bus mastering for the device ... 2563 */ 2564 pci_set_master(pdev); 2565 2566 /* 2567 * Allocate our adapter data structure and attach it to the device. 2568 */ 2569 adapter = kzalloc(sizeof(*adapter), GFP_KERNEL); 2570 if (!adapter) { 2571 err = -ENOMEM; 2572 goto err_release_regions; 2573 } 2574 pci_set_drvdata(pdev, adapter); 2575 adapter->pdev = pdev; 2576 adapter->pdev_dev = &pdev->dev; 2577 2578 /* 2579 * Initialize SMP data synchronization resources. 2580 */ 2581 spin_lock_init(&adapter->stats_lock); 2582 2583 /* 2584 * Map our I/O registers in BAR0. 2585 */ 2586 adapter->regs = pci_ioremap_bar(pdev, 0); 2587 if (!adapter->regs) { 2588 dev_err(&pdev->dev, "cannot map device registers\n"); 2589 err = -ENOMEM; 2590 goto err_free_adapter; 2591 } 2592 2593 /* 2594 * Initialize adapter level features. 2595 */ 2596 adapter->name = pci_name(pdev); 2597 adapter->msg_enable = dflt_msg_enable; 2598 err = adap_init0(adapter); 2599 if (err) 2600 goto err_unmap_bar; 2601 2602 /* 2603 * Allocate our "adapter ports" and stitch everything together. 2604 */ 2605 pmask = adapter->params.vfres.pmask; 2606 for_each_port(adapter, pidx) { 2607 int port_id, viid; 2608 2609 /* 2610 * We simplistically allocate our virtual interfaces 2611 * sequentially across the port numbers to which we have 2612 * access rights. This should be configurable in some manner 2613 * ... 2614 */ 2615 if (pmask == 0) 2616 break; 2617 port_id = ffs(pmask) - 1; 2618 pmask &= ~(1 << port_id); 2619 viid = t4vf_alloc_vi(adapter, port_id); 2620 if (viid < 0) { 2621 dev_err(&pdev->dev, "cannot allocate VI for port %d:" 2622 " err=%d\n", port_id, viid); 2623 err = viid; 2624 goto err_free_dev; 2625 } 2626 2627 /* 2628 * Allocate our network device and stitch things together. 2629 */ 2630 netdev = alloc_etherdev_mq(sizeof(struct port_info), 2631 MAX_PORT_QSETS); 2632 if (netdev == NULL) { 2633 t4vf_free_vi(adapter, viid); 2634 err = -ENOMEM; 2635 goto err_free_dev; 2636 } 2637 adapter->port[pidx] = netdev; 2638 SET_NETDEV_DEV(netdev, &pdev->dev); 2639 pi = netdev_priv(netdev); 2640 pi->adapter = adapter; 2641 pi->pidx = pidx; 2642 pi->port_id = port_id; 2643 pi->viid = viid; 2644 2645 /* 2646 * Initialize the starting state of our "port" and register 2647 * it. 2648 */ 2649 pi->xact_addr_filt = -1; 2650 netif_carrier_off(netdev); 2651 netdev->irq = pdev->irq; 2652 2653 netdev->hw_features = NETIF_F_SG | TSO_FLAGS | 2654 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | 2655 NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_RXCSUM; 2656 netdev->vlan_features = NETIF_F_SG | TSO_FLAGS | 2657 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | 2658 NETIF_F_HIGHDMA; 2659 netdev->features = netdev->hw_features | 2660 NETIF_F_HW_VLAN_CTAG_TX; 2661 if (pci_using_dac) 2662 netdev->features |= NETIF_F_HIGHDMA; 2663 2664 netdev->priv_flags |= IFF_UNICAST_FLT; 2665 2666 netdev->netdev_ops = &cxgb4vf_netdev_ops; 2667 SET_ETHTOOL_OPS(netdev, &cxgb4vf_ethtool_ops); 2668 2669 /* 2670 * Initialize the hardware/software state for the port. 2671 */ 2672 err = t4vf_port_init(adapter, pidx); 2673 if (err) { 2674 dev_err(&pdev->dev, "cannot initialize port %d\n", 2675 pidx); 2676 goto err_free_dev; 2677 } 2678 } 2679 2680 /* 2681 * The "card" is now ready to go. If any errors occur during device 2682 * registration we do not fail the whole "card" but rather proceed 2683 * only with the ports we manage to register successfully. However we 2684 * must register at least one net device. 2685 */ 2686 for_each_port(adapter, pidx) { 2687 netdev = adapter->port[pidx]; 2688 if (netdev == NULL) 2689 continue; 2690 2691 err = register_netdev(netdev); 2692 if (err) { 2693 dev_warn(&pdev->dev, "cannot register net device %s," 2694 " skipping\n", netdev->name); 2695 continue; 2696 } 2697 2698 set_bit(pidx, &adapter->registered_device_map); 2699 } 2700 if (adapter->registered_device_map == 0) { 2701 dev_err(&pdev->dev, "could not register any net devices\n"); 2702 goto err_free_dev; 2703 } 2704 2705 /* 2706 * Set up our debugfs entries. 2707 */ 2708 if (!IS_ERR_OR_NULL(cxgb4vf_debugfs_root)) { 2709 adapter->debugfs_root = 2710 debugfs_create_dir(pci_name(pdev), 2711 cxgb4vf_debugfs_root); 2712 if (IS_ERR_OR_NULL(adapter->debugfs_root)) 2713 dev_warn(&pdev->dev, "could not create debugfs" 2714 " directory"); 2715 else 2716 setup_debugfs(adapter); 2717 } 2718 2719 /* 2720 * See what interrupts we'll be using. If we've been configured to 2721 * use MSI-X interrupts, try to enable them but fall back to using 2722 * MSI interrupts if we can't enable MSI-X interrupts. If we can't 2723 * get MSI interrupts we bail with the error. 2724 */ 2725 if (msi == MSI_MSIX && enable_msix(adapter) == 0) 2726 adapter->flags |= USING_MSIX; 2727 else { 2728 err = pci_enable_msi(pdev); 2729 if (err) { 2730 dev_err(&pdev->dev, "Unable to allocate %s interrupts;" 2731 " err=%d\n", 2732 msi == MSI_MSIX ? "MSI-X or MSI" : "MSI", err); 2733 goto err_free_debugfs; 2734 } 2735 adapter->flags |= USING_MSI; 2736 } 2737 2738 /* 2739 * Now that we know how many "ports" we have and what their types are, 2740 * and how many Queue Sets we can support, we can configure our queue 2741 * resources. 2742 */ 2743 cfg_queues(adapter); 2744 2745 /* 2746 * Print a short notice on the existence and configuration of the new 2747 * VF network device ... 2748 */ 2749 for_each_port(adapter, pidx) { 2750 dev_info(adapter->pdev_dev, "%s: Chelsio VF NIC PCIe %s\n", 2751 adapter->port[pidx]->name, 2752 (adapter->flags & USING_MSIX) ? "MSI-X" : 2753 (adapter->flags & USING_MSI) ? "MSI" : ""); 2754 } 2755 2756 /* 2757 * Return success! 2758 */ 2759 return 0; 2760 2761 /* 2762 * Error recovery and exit code. Unwind state that's been created 2763 * so far and return the error. 2764 */ 2765 2766 err_free_debugfs: 2767 if (!IS_ERR_OR_NULL(adapter->debugfs_root)) { 2768 cleanup_debugfs(adapter); 2769 debugfs_remove_recursive(adapter->debugfs_root); 2770 } 2771 2772 err_free_dev: 2773 for_each_port(adapter, pidx) { 2774 netdev = adapter->port[pidx]; 2775 if (netdev == NULL) 2776 continue; 2777 pi = netdev_priv(netdev); 2778 t4vf_free_vi(adapter, pi->viid); 2779 if (test_bit(pidx, &adapter->registered_device_map)) 2780 unregister_netdev(netdev); 2781 free_netdev(netdev); 2782 } 2783 2784 err_unmap_bar: 2785 iounmap(adapter->regs); 2786 2787 err_free_adapter: 2788 kfree(adapter); 2789 2790 err_release_regions: 2791 pci_release_regions(pdev); 2792 pci_clear_master(pdev); 2793 2794 err_disable_device: 2795 pci_disable_device(pdev); 2796 2797 return err; 2798 } 2799 2800 /* 2801 * "Remove" a device: tear down all kernel and driver state created in the 2802 * "probe" routine and quiesce the device (disable interrupts, etc.). (Note 2803 * that this is called "remove_one" in the PF Driver.) 2804 */ 2805 static void cxgb4vf_pci_remove(struct pci_dev *pdev) 2806 { 2807 struct adapter *adapter = pci_get_drvdata(pdev); 2808 2809 /* 2810 * Tear down driver state associated with device. 2811 */ 2812 if (adapter) { 2813 int pidx; 2814 2815 /* 2816 * Stop all of our activity. Unregister network port, 2817 * disable interrupts, etc. 2818 */ 2819 for_each_port(adapter, pidx) 2820 if (test_bit(pidx, &adapter->registered_device_map)) 2821 unregister_netdev(adapter->port[pidx]); 2822 t4vf_sge_stop(adapter); 2823 if (adapter->flags & USING_MSIX) { 2824 pci_disable_msix(adapter->pdev); 2825 adapter->flags &= ~USING_MSIX; 2826 } else if (adapter->flags & USING_MSI) { 2827 pci_disable_msi(adapter->pdev); 2828 adapter->flags &= ~USING_MSI; 2829 } 2830 2831 /* 2832 * Tear down our debugfs entries. 2833 */ 2834 if (!IS_ERR_OR_NULL(adapter->debugfs_root)) { 2835 cleanup_debugfs(adapter); 2836 debugfs_remove_recursive(adapter->debugfs_root); 2837 } 2838 2839 /* 2840 * Free all of the various resources which we've acquired ... 2841 */ 2842 t4vf_free_sge_resources(adapter); 2843 for_each_port(adapter, pidx) { 2844 struct net_device *netdev = adapter->port[pidx]; 2845 struct port_info *pi; 2846 2847 if (netdev == NULL) 2848 continue; 2849 2850 pi = netdev_priv(netdev); 2851 t4vf_free_vi(adapter, pi->viid); 2852 free_netdev(netdev); 2853 } 2854 iounmap(adapter->regs); 2855 kfree(adapter); 2856 } 2857 2858 /* 2859 * Disable the device and release its PCI resources. 2860 */ 2861 pci_disable_device(pdev); 2862 pci_clear_master(pdev); 2863 pci_release_regions(pdev); 2864 } 2865 2866 /* 2867 * "Shutdown" quiesce the device, stopping Ingress Packet and Interrupt 2868 * delivery. 2869 */ 2870 static void cxgb4vf_pci_shutdown(struct pci_dev *pdev) 2871 { 2872 struct adapter *adapter; 2873 int pidx; 2874 2875 adapter = pci_get_drvdata(pdev); 2876 if (!adapter) 2877 return; 2878 2879 /* 2880 * Disable all Virtual Interfaces. This will shut down the 2881 * delivery of all ingress packets into the chip for these 2882 * Virtual Interfaces. 2883 */ 2884 for_each_port(adapter, pidx) { 2885 struct net_device *netdev; 2886 struct port_info *pi; 2887 2888 if (!test_bit(pidx, &adapter->registered_device_map)) 2889 continue; 2890 2891 netdev = adapter->port[pidx]; 2892 if (!netdev) 2893 continue; 2894 2895 pi = netdev_priv(netdev); 2896 t4vf_enable_vi(adapter, pi->viid, false, false); 2897 } 2898 2899 /* 2900 * Free up all Queues which will prevent further DMA and 2901 * Interrupts allowing various internal pathways to drain. 2902 */ 2903 t4vf_free_sge_resources(adapter); 2904 } 2905 2906 /* 2907 * PCI Device registration data structures. 2908 */ 2909 #define CH_DEVICE(devid, idx) \ 2910 { PCI_VENDOR_ID_CHELSIO, devid, PCI_ANY_ID, PCI_ANY_ID, 0, 0, idx } 2911 2912 static DEFINE_PCI_DEVICE_TABLE(cxgb4vf_pci_tbl) = { 2913 CH_DEVICE(0xb000, 0), /* PE10K FPGA */ 2914 CH_DEVICE(0x4800, 0), /* T440-dbg */ 2915 CH_DEVICE(0x4801, 0), /* T420-cr */ 2916 CH_DEVICE(0x4802, 0), /* T422-cr */ 2917 CH_DEVICE(0x4803, 0), /* T440-cr */ 2918 CH_DEVICE(0x4804, 0), /* T420-bch */ 2919 CH_DEVICE(0x4805, 0), /* T440-bch */ 2920 CH_DEVICE(0x4806, 0), /* T460-ch */ 2921 CH_DEVICE(0x4807, 0), /* T420-so */ 2922 CH_DEVICE(0x4808, 0), /* T420-cx */ 2923 CH_DEVICE(0x4809, 0), /* T420-bt */ 2924 CH_DEVICE(0x480a, 0), /* T404-bt */ 2925 CH_DEVICE(0x480d, 0), /* T480-cr */ 2926 CH_DEVICE(0x480e, 0), /* T440-lp-cr */ 2927 CH_DEVICE(0x5800, 0), /* T580-dbg */ 2928 CH_DEVICE(0x5801, 0), /* T520-cr */ 2929 CH_DEVICE(0x5802, 0), /* T522-cr */ 2930 CH_DEVICE(0x5803, 0), /* T540-cr */ 2931 CH_DEVICE(0x5804, 0), /* T520-bch */ 2932 CH_DEVICE(0x5805, 0), /* T540-bch */ 2933 CH_DEVICE(0x5806, 0), /* T540-ch */ 2934 CH_DEVICE(0x5807, 0), /* T520-so */ 2935 CH_DEVICE(0x5808, 0), /* T520-cx */ 2936 CH_DEVICE(0x5809, 0), /* T520-bt */ 2937 CH_DEVICE(0x580a, 0), /* T504-bt */ 2938 CH_DEVICE(0x580b, 0), /* T520-sr */ 2939 CH_DEVICE(0x580c, 0), /* T504-bt */ 2940 CH_DEVICE(0x580d, 0), /* T580-cr */ 2941 CH_DEVICE(0x580e, 0), /* T540-lp-cr */ 2942 CH_DEVICE(0x580f, 0), /* Amsterdam */ 2943 CH_DEVICE(0x5810, 0), /* T580-lp-cr */ 2944 CH_DEVICE(0x5811, 0), /* T520-lp-cr */ 2945 CH_DEVICE(0x5812, 0), /* T560-cr */ 2946 CH_DEVICE(0x5813, 0), /* T580-cr */ 2947 CH_DEVICE(0x5814, 0), /* T580-so-cr */ 2948 CH_DEVICE(0x5815, 0), /* T502-bt */ 2949 CH_DEVICE(0x5880, 0), 2950 CH_DEVICE(0x5881, 0), 2951 CH_DEVICE(0x5882, 0), 2952 CH_DEVICE(0x5883, 0), 2953 CH_DEVICE(0x5884, 0), 2954 CH_DEVICE(0x5885, 0), 2955 { 0, } 2956 }; 2957 2958 MODULE_DESCRIPTION(DRV_DESC); 2959 MODULE_AUTHOR("Chelsio Communications"); 2960 MODULE_LICENSE("Dual BSD/GPL"); 2961 MODULE_VERSION(DRV_VERSION); 2962 MODULE_DEVICE_TABLE(pci, cxgb4vf_pci_tbl); 2963 2964 static struct pci_driver cxgb4vf_driver = { 2965 .name = KBUILD_MODNAME, 2966 .id_table = cxgb4vf_pci_tbl, 2967 .probe = cxgb4vf_pci_probe, 2968 .remove = cxgb4vf_pci_remove, 2969 .shutdown = cxgb4vf_pci_shutdown, 2970 }; 2971 2972 /* 2973 * Initialize global driver state. 2974 */ 2975 static int __init cxgb4vf_module_init(void) 2976 { 2977 int ret; 2978 2979 /* 2980 * Vet our module parameters. 2981 */ 2982 if (msi != MSI_MSIX && msi != MSI_MSI) { 2983 pr_warn("bad module parameter msi=%d; must be %d (MSI-X or MSI) or %d (MSI)\n", 2984 msi, MSI_MSIX, MSI_MSI); 2985 return -EINVAL; 2986 } 2987 2988 /* Debugfs support is optional, just warn if this fails */ 2989 cxgb4vf_debugfs_root = debugfs_create_dir(KBUILD_MODNAME, NULL); 2990 if (IS_ERR_OR_NULL(cxgb4vf_debugfs_root)) 2991 pr_warn("could not create debugfs entry, continuing\n"); 2992 2993 ret = pci_register_driver(&cxgb4vf_driver); 2994 if (ret < 0 && !IS_ERR_OR_NULL(cxgb4vf_debugfs_root)) 2995 debugfs_remove(cxgb4vf_debugfs_root); 2996 return ret; 2997 } 2998 2999 /* 3000 * Tear down global driver state. 3001 */ 3002 static void __exit cxgb4vf_module_exit(void) 3003 { 3004 pci_unregister_driver(&cxgb4vf_driver); 3005 debugfs_remove(cxgb4vf_debugfs_root); 3006 } 3007 3008 module_init(cxgb4vf_module_init); 3009 module_exit(cxgb4vf_module_exit); 3010