1 /* 2 * Copyright (C) 2015 Cavium, Inc. 3 * 4 * This program is free software; you can redistribute it and/or modify it 5 * under the terms of version 2 of the GNU General Public License 6 * as published by the Free Software Foundation. 7 */ 8 9 #include <linux/module.h> 10 #include <linux/interrupt.h> 11 #include <linux/pci.h> 12 #include <linux/netdevice.h> 13 #include <linux/if_vlan.h> 14 #include <linux/etherdevice.h> 15 #include <linux/ethtool.h> 16 #include <linux/log2.h> 17 #include <linux/prefetch.h> 18 #include <linux/irq.h> 19 #include <linux/iommu.h> 20 #include <linux/bpf.h> 21 #include <linux/bpf_trace.h> 22 #include <linux/filter.h> 23 24 #include "nic_reg.h" 25 #include "nic.h" 26 #include "nicvf_queues.h" 27 #include "thunder_bgx.h" 28 29 #define DRV_NAME "thunder-nicvf" 30 #define DRV_VERSION "1.0" 31 32 /* Supported devices */ 33 static const struct pci_device_id nicvf_id_table[] = { 34 { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, 35 PCI_DEVICE_ID_THUNDER_NIC_VF, 36 PCI_VENDOR_ID_CAVIUM, 37 PCI_SUBSYS_DEVID_88XX_NIC_VF) }, 38 { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, 39 PCI_DEVICE_ID_THUNDER_PASS1_NIC_VF, 40 PCI_VENDOR_ID_CAVIUM, 41 PCI_SUBSYS_DEVID_88XX_PASS1_NIC_VF) }, 42 { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, 43 PCI_DEVICE_ID_THUNDER_NIC_VF, 44 PCI_VENDOR_ID_CAVIUM, 45 PCI_SUBSYS_DEVID_81XX_NIC_VF) }, 46 { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, 47 PCI_DEVICE_ID_THUNDER_NIC_VF, 48 PCI_VENDOR_ID_CAVIUM, 49 PCI_SUBSYS_DEVID_83XX_NIC_VF) }, 50 { 0, } /* end of table */ 51 }; 52 53 MODULE_AUTHOR("Sunil Goutham"); 54 MODULE_DESCRIPTION("Cavium Thunder NIC Virtual Function Driver"); 55 MODULE_LICENSE("GPL v2"); 56 MODULE_VERSION(DRV_VERSION); 57 MODULE_DEVICE_TABLE(pci, nicvf_id_table); 58 59 static int debug = 0x00; 60 module_param(debug, int, 0644); 61 MODULE_PARM_DESC(debug, "Debug message level bitmap"); 62 63 static int cpi_alg = CPI_ALG_NONE; 64 module_param(cpi_alg, int, S_IRUGO); 65 MODULE_PARM_DESC(cpi_alg, 66 "PFC algorithm (0=none, 1=VLAN, 2=VLAN16, 3=IP Diffserv)"); 67 68 struct nicvf_xdp_tx { 69 u64 dma_addr; 70 u8 qidx; 71 }; 72 73 static inline u8 nicvf_netdev_qidx(struct nicvf *nic, u8 qidx) 74 { 75 if (nic->sqs_mode) 76 return qidx + ((nic->sqs_id + 1) * MAX_CMP_QUEUES_PER_QS); 77 else 78 return qidx; 79 } 80 81 /* The Cavium ThunderX network controller can *only* be found in SoCs 82 * containing the ThunderX ARM64 CPU implementation. All accesses to the device 83 * registers on this platform are implicitly strongly ordered with respect 84 * to memory accesses. So writeq_relaxed() and readq_relaxed() are safe to use 85 * with no memory barriers in this driver. The readq()/writeq() functions add 86 * explicit ordering operation which in this case are redundant, and only 87 * add overhead. 88 */ 89 90 /* Register read/write APIs */ 91 void nicvf_reg_write(struct nicvf *nic, u64 offset, u64 val) 92 { 93 writeq_relaxed(val, nic->reg_base + offset); 94 } 95 96 u64 nicvf_reg_read(struct nicvf *nic, u64 offset) 97 { 98 return readq_relaxed(nic->reg_base + offset); 99 } 100 101 void nicvf_queue_reg_write(struct nicvf *nic, u64 offset, 102 u64 qidx, u64 val) 103 { 104 void __iomem *addr = nic->reg_base + offset; 105 106 writeq_relaxed(val, addr + (qidx << NIC_Q_NUM_SHIFT)); 107 } 108 109 u64 nicvf_queue_reg_read(struct nicvf *nic, u64 offset, u64 qidx) 110 { 111 void __iomem *addr = nic->reg_base + offset; 112 113 return readq_relaxed(addr + (qidx << NIC_Q_NUM_SHIFT)); 114 } 115 116 /* VF -> PF mailbox communication */ 117 static void nicvf_write_to_mbx(struct nicvf *nic, union nic_mbx *mbx) 118 { 119 u64 *msg = (u64 *)mbx; 120 121 nicvf_reg_write(nic, NIC_VF_PF_MAILBOX_0_1 + 0, msg[0]); 122 nicvf_reg_write(nic, NIC_VF_PF_MAILBOX_0_1 + 8, msg[1]); 123 } 124 125 int nicvf_send_msg_to_pf(struct nicvf *nic, union nic_mbx *mbx) 126 { 127 int timeout = NIC_MBOX_MSG_TIMEOUT; 128 int sleep = 10; 129 130 nic->pf_acked = false; 131 nic->pf_nacked = false; 132 133 nicvf_write_to_mbx(nic, mbx); 134 135 /* Wait for previous message to be acked, timeout 2sec */ 136 while (!nic->pf_acked) { 137 if (nic->pf_nacked) { 138 netdev_err(nic->netdev, 139 "PF NACK to mbox msg 0x%02x from VF%d\n", 140 (mbx->msg.msg & 0xFF), nic->vf_id); 141 return -EINVAL; 142 } 143 msleep(sleep); 144 if (nic->pf_acked) 145 break; 146 timeout -= sleep; 147 if (!timeout) { 148 netdev_err(nic->netdev, 149 "PF didn't ACK to mbox msg 0x%02x from VF%d\n", 150 (mbx->msg.msg & 0xFF), nic->vf_id); 151 return -EBUSY; 152 } 153 } 154 return 0; 155 } 156 157 /* Checks if VF is able to comminicate with PF 158 * and also gets the VNIC number this VF is associated to. 159 */ 160 static int nicvf_check_pf_ready(struct nicvf *nic) 161 { 162 union nic_mbx mbx = {}; 163 164 mbx.msg.msg = NIC_MBOX_MSG_READY; 165 if (nicvf_send_msg_to_pf(nic, &mbx)) { 166 netdev_err(nic->netdev, 167 "PF didn't respond to READY msg\n"); 168 return 0; 169 } 170 171 return 1; 172 } 173 174 static void nicvf_read_bgx_stats(struct nicvf *nic, struct bgx_stats_msg *bgx) 175 { 176 if (bgx->rx) 177 nic->bgx_stats.rx_stats[bgx->idx] = bgx->stats; 178 else 179 nic->bgx_stats.tx_stats[bgx->idx] = bgx->stats; 180 } 181 182 static void nicvf_handle_mbx_intr(struct nicvf *nic) 183 { 184 union nic_mbx mbx = {}; 185 u64 *mbx_data; 186 u64 mbx_addr; 187 int i; 188 189 mbx_addr = NIC_VF_PF_MAILBOX_0_1; 190 mbx_data = (u64 *)&mbx; 191 192 for (i = 0; i < NIC_PF_VF_MAILBOX_SIZE; i++) { 193 *mbx_data = nicvf_reg_read(nic, mbx_addr); 194 mbx_data++; 195 mbx_addr += sizeof(u64); 196 } 197 198 netdev_dbg(nic->netdev, "Mbox message: msg: 0x%x\n", mbx.msg.msg); 199 switch (mbx.msg.msg) { 200 case NIC_MBOX_MSG_READY: 201 nic->pf_acked = true; 202 nic->vf_id = mbx.nic_cfg.vf_id & 0x7F; 203 nic->tns_mode = mbx.nic_cfg.tns_mode & 0x7F; 204 nic->node = mbx.nic_cfg.node_id; 205 if (!nic->set_mac_pending) 206 ether_addr_copy(nic->netdev->dev_addr, 207 mbx.nic_cfg.mac_addr); 208 nic->sqs_mode = mbx.nic_cfg.sqs_mode; 209 nic->loopback_supported = mbx.nic_cfg.loopback_supported; 210 nic->link_up = false; 211 nic->duplex = 0; 212 nic->speed = 0; 213 break; 214 case NIC_MBOX_MSG_ACK: 215 nic->pf_acked = true; 216 break; 217 case NIC_MBOX_MSG_NACK: 218 nic->pf_nacked = true; 219 break; 220 case NIC_MBOX_MSG_RSS_SIZE: 221 nic->rss_info.rss_size = mbx.rss_size.ind_tbl_size; 222 nic->pf_acked = true; 223 break; 224 case NIC_MBOX_MSG_BGX_STATS: 225 nicvf_read_bgx_stats(nic, &mbx.bgx_stats); 226 nic->pf_acked = true; 227 break; 228 case NIC_MBOX_MSG_BGX_LINK_CHANGE: 229 nic->pf_acked = true; 230 nic->link_up = mbx.link_status.link_up; 231 nic->duplex = mbx.link_status.duplex; 232 nic->speed = mbx.link_status.speed; 233 nic->mac_type = mbx.link_status.mac_type; 234 if (nic->link_up) { 235 netdev_info(nic->netdev, "Link is Up %d Mbps %s duplex\n", 236 nic->speed, 237 nic->duplex == DUPLEX_FULL ? 238 "Full" : "Half"); 239 netif_carrier_on(nic->netdev); 240 netif_tx_start_all_queues(nic->netdev); 241 } else { 242 netdev_info(nic->netdev, "Link is Down\n"); 243 netif_carrier_off(nic->netdev); 244 netif_tx_stop_all_queues(nic->netdev); 245 } 246 break; 247 case NIC_MBOX_MSG_ALLOC_SQS: 248 nic->sqs_count = mbx.sqs_alloc.qs_count; 249 nic->pf_acked = true; 250 break; 251 case NIC_MBOX_MSG_SNICVF_PTR: 252 /* Primary VF: make note of secondary VF's pointer 253 * to be used while packet transmission. 254 */ 255 nic->snicvf[mbx.nicvf.sqs_id] = 256 (struct nicvf *)mbx.nicvf.nicvf; 257 nic->pf_acked = true; 258 break; 259 case NIC_MBOX_MSG_PNICVF_PTR: 260 /* Secondary VF/Qset: make note of primary VF's pointer 261 * to be used while packet reception, to handover packet 262 * to primary VF's netdev. 263 */ 264 nic->pnicvf = (struct nicvf *)mbx.nicvf.nicvf; 265 nic->pf_acked = true; 266 break; 267 case NIC_MBOX_MSG_PFC: 268 nic->pfc.autoneg = mbx.pfc.autoneg; 269 nic->pfc.fc_rx = mbx.pfc.fc_rx; 270 nic->pfc.fc_tx = mbx.pfc.fc_tx; 271 nic->pf_acked = true; 272 break; 273 default: 274 netdev_err(nic->netdev, 275 "Invalid message from PF, msg 0x%x\n", mbx.msg.msg); 276 break; 277 } 278 nicvf_clear_intr(nic, NICVF_INTR_MBOX, 0); 279 } 280 281 static int nicvf_hw_set_mac_addr(struct nicvf *nic, struct net_device *netdev) 282 { 283 union nic_mbx mbx = {}; 284 285 mbx.mac.msg = NIC_MBOX_MSG_SET_MAC; 286 mbx.mac.vf_id = nic->vf_id; 287 ether_addr_copy(mbx.mac.mac_addr, netdev->dev_addr); 288 289 return nicvf_send_msg_to_pf(nic, &mbx); 290 } 291 292 static void nicvf_config_cpi(struct nicvf *nic) 293 { 294 union nic_mbx mbx = {}; 295 296 mbx.cpi_cfg.msg = NIC_MBOX_MSG_CPI_CFG; 297 mbx.cpi_cfg.vf_id = nic->vf_id; 298 mbx.cpi_cfg.cpi_alg = nic->cpi_alg; 299 mbx.cpi_cfg.rq_cnt = nic->qs->rq_cnt; 300 301 nicvf_send_msg_to_pf(nic, &mbx); 302 } 303 304 static void nicvf_get_rss_size(struct nicvf *nic) 305 { 306 union nic_mbx mbx = {}; 307 308 mbx.rss_size.msg = NIC_MBOX_MSG_RSS_SIZE; 309 mbx.rss_size.vf_id = nic->vf_id; 310 nicvf_send_msg_to_pf(nic, &mbx); 311 } 312 313 void nicvf_config_rss(struct nicvf *nic) 314 { 315 union nic_mbx mbx = {}; 316 struct nicvf_rss_info *rss = &nic->rss_info; 317 int ind_tbl_len = rss->rss_size; 318 int i, nextq = 0; 319 320 mbx.rss_cfg.vf_id = nic->vf_id; 321 mbx.rss_cfg.hash_bits = rss->hash_bits; 322 while (ind_tbl_len) { 323 mbx.rss_cfg.tbl_offset = nextq; 324 mbx.rss_cfg.tbl_len = min(ind_tbl_len, 325 RSS_IND_TBL_LEN_PER_MBX_MSG); 326 mbx.rss_cfg.msg = mbx.rss_cfg.tbl_offset ? 327 NIC_MBOX_MSG_RSS_CFG_CONT : NIC_MBOX_MSG_RSS_CFG; 328 329 for (i = 0; i < mbx.rss_cfg.tbl_len; i++) 330 mbx.rss_cfg.ind_tbl[i] = rss->ind_tbl[nextq++]; 331 332 nicvf_send_msg_to_pf(nic, &mbx); 333 334 ind_tbl_len -= mbx.rss_cfg.tbl_len; 335 } 336 } 337 338 void nicvf_set_rss_key(struct nicvf *nic) 339 { 340 struct nicvf_rss_info *rss = &nic->rss_info; 341 u64 key_addr = NIC_VNIC_RSS_KEY_0_4; 342 int idx; 343 344 for (idx = 0; idx < RSS_HASH_KEY_SIZE; idx++) { 345 nicvf_reg_write(nic, key_addr, rss->key[idx]); 346 key_addr += sizeof(u64); 347 } 348 } 349 350 static int nicvf_rss_init(struct nicvf *nic) 351 { 352 struct nicvf_rss_info *rss = &nic->rss_info; 353 int idx; 354 355 nicvf_get_rss_size(nic); 356 357 if (cpi_alg != CPI_ALG_NONE) { 358 rss->enable = false; 359 rss->hash_bits = 0; 360 return 0; 361 } 362 363 rss->enable = true; 364 365 netdev_rss_key_fill(rss->key, RSS_HASH_KEY_SIZE * sizeof(u64)); 366 nicvf_set_rss_key(nic); 367 368 rss->cfg = RSS_IP_HASH_ENA | RSS_TCP_HASH_ENA | RSS_UDP_HASH_ENA; 369 nicvf_reg_write(nic, NIC_VNIC_RSS_CFG, rss->cfg); 370 371 rss->hash_bits = ilog2(rounddown_pow_of_two(rss->rss_size)); 372 373 for (idx = 0; idx < rss->rss_size; idx++) 374 rss->ind_tbl[idx] = ethtool_rxfh_indir_default(idx, 375 nic->rx_queues); 376 nicvf_config_rss(nic); 377 return 1; 378 } 379 380 /* Request PF to allocate additional Qsets */ 381 static void nicvf_request_sqs(struct nicvf *nic) 382 { 383 union nic_mbx mbx = {}; 384 int sqs; 385 int sqs_count = nic->sqs_count; 386 int rx_queues = 0, tx_queues = 0; 387 388 /* Only primary VF should request */ 389 if (nic->sqs_mode || !nic->sqs_count) 390 return; 391 392 mbx.sqs_alloc.msg = NIC_MBOX_MSG_ALLOC_SQS; 393 mbx.sqs_alloc.vf_id = nic->vf_id; 394 mbx.sqs_alloc.qs_count = nic->sqs_count; 395 if (nicvf_send_msg_to_pf(nic, &mbx)) { 396 /* No response from PF */ 397 nic->sqs_count = 0; 398 return; 399 } 400 401 /* Return if no Secondary Qsets available */ 402 if (!nic->sqs_count) 403 return; 404 405 if (nic->rx_queues > MAX_RCV_QUEUES_PER_QS) 406 rx_queues = nic->rx_queues - MAX_RCV_QUEUES_PER_QS; 407 408 tx_queues = nic->tx_queues + nic->xdp_tx_queues; 409 if (tx_queues > MAX_SND_QUEUES_PER_QS) 410 tx_queues = tx_queues - MAX_SND_QUEUES_PER_QS; 411 412 /* Set no of Rx/Tx queues in each of the SQsets */ 413 for (sqs = 0; sqs < nic->sqs_count; sqs++) { 414 mbx.nicvf.msg = NIC_MBOX_MSG_SNICVF_PTR; 415 mbx.nicvf.vf_id = nic->vf_id; 416 mbx.nicvf.sqs_id = sqs; 417 nicvf_send_msg_to_pf(nic, &mbx); 418 419 nic->snicvf[sqs]->sqs_id = sqs; 420 if (rx_queues > MAX_RCV_QUEUES_PER_QS) { 421 nic->snicvf[sqs]->qs->rq_cnt = MAX_RCV_QUEUES_PER_QS; 422 rx_queues -= MAX_RCV_QUEUES_PER_QS; 423 } else { 424 nic->snicvf[sqs]->qs->rq_cnt = rx_queues; 425 rx_queues = 0; 426 } 427 428 if (tx_queues > MAX_SND_QUEUES_PER_QS) { 429 nic->snicvf[sqs]->qs->sq_cnt = MAX_SND_QUEUES_PER_QS; 430 tx_queues -= MAX_SND_QUEUES_PER_QS; 431 } else { 432 nic->snicvf[sqs]->qs->sq_cnt = tx_queues; 433 tx_queues = 0; 434 } 435 436 nic->snicvf[sqs]->qs->cq_cnt = 437 max(nic->snicvf[sqs]->qs->rq_cnt, nic->snicvf[sqs]->qs->sq_cnt); 438 439 /* Initialize secondary Qset's queues and its interrupts */ 440 nicvf_open(nic->snicvf[sqs]->netdev); 441 } 442 443 /* Update stack with actual Rx/Tx queue count allocated */ 444 if (sqs_count != nic->sqs_count) 445 nicvf_set_real_num_queues(nic->netdev, 446 nic->tx_queues, nic->rx_queues); 447 } 448 449 /* Send this Qset's nicvf pointer to PF. 450 * PF inturn sends primary VF's nicvf struct to secondary Qsets/VFs 451 * so that packets received by these Qsets can use primary VF's netdev 452 */ 453 static void nicvf_send_vf_struct(struct nicvf *nic) 454 { 455 union nic_mbx mbx = {}; 456 457 mbx.nicvf.msg = NIC_MBOX_MSG_NICVF_PTR; 458 mbx.nicvf.sqs_mode = nic->sqs_mode; 459 mbx.nicvf.nicvf = (u64)nic; 460 nicvf_send_msg_to_pf(nic, &mbx); 461 } 462 463 static void nicvf_get_primary_vf_struct(struct nicvf *nic) 464 { 465 union nic_mbx mbx = {}; 466 467 mbx.nicvf.msg = NIC_MBOX_MSG_PNICVF_PTR; 468 nicvf_send_msg_to_pf(nic, &mbx); 469 } 470 471 int nicvf_set_real_num_queues(struct net_device *netdev, 472 int tx_queues, int rx_queues) 473 { 474 int err = 0; 475 476 err = netif_set_real_num_tx_queues(netdev, tx_queues); 477 if (err) { 478 netdev_err(netdev, 479 "Failed to set no of Tx queues: %d\n", tx_queues); 480 return err; 481 } 482 483 err = netif_set_real_num_rx_queues(netdev, rx_queues); 484 if (err) 485 netdev_err(netdev, 486 "Failed to set no of Rx queues: %d\n", rx_queues); 487 return err; 488 } 489 490 static int nicvf_init_resources(struct nicvf *nic) 491 { 492 int err; 493 494 /* Enable Qset */ 495 nicvf_qset_config(nic, true); 496 497 /* Initialize queues and HW for data transfer */ 498 err = nicvf_config_data_transfer(nic, true); 499 if (err) { 500 netdev_err(nic->netdev, 501 "Failed to alloc/config VF's QSet resources\n"); 502 return err; 503 } 504 505 return 0; 506 } 507 508 static void nicvf_unmap_page(struct nicvf *nic, struct page *page, u64 dma_addr) 509 { 510 /* Check if it's a recycled page, if not unmap the DMA mapping. 511 * Recycled page holds an extra reference. 512 */ 513 if (page_ref_count(page) == 1) { 514 dma_addr &= PAGE_MASK; 515 dma_unmap_page_attrs(&nic->pdev->dev, dma_addr, 516 RCV_FRAG_LEN + XDP_HEADROOM, 517 DMA_FROM_DEVICE, 518 DMA_ATTR_SKIP_CPU_SYNC); 519 } 520 } 521 522 static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog, 523 struct cqe_rx_t *cqe_rx, struct snd_queue *sq, 524 struct rcv_queue *rq, struct sk_buff **skb) 525 { 526 struct xdp_buff xdp; 527 struct page *page; 528 struct nicvf_xdp_tx *xdp_tx = NULL; 529 u32 action; 530 u16 len, err, offset = 0; 531 u64 dma_addr, cpu_addr; 532 void *orig_data; 533 534 /* Retrieve packet buffer's DMA address and length */ 535 len = *((u16 *)((void *)cqe_rx + (3 * sizeof(u64)))); 536 dma_addr = *((u64 *)((void *)cqe_rx + (7 * sizeof(u64)))); 537 538 cpu_addr = nicvf_iova_to_phys(nic, dma_addr); 539 if (!cpu_addr) 540 return false; 541 cpu_addr = (u64)phys_to_virt(cpu_addr); 542 page = virt_to_page((void *)cpu_addr); 543 544 xdp.data_hard_start = page_address(page) + RCV_BUF_HEADROOM; 545 xdp.data = (void *)cpu_addr; 546 xdp_set_data_meta_invalid(&xdp); 547 xdp.data_end = xdp.data + len; 548 xdp.rxq = &rq->xdp_rxq; 549 orig_data = xdp.data; 550 551 rcu_read_lock(); 552 action = bpf_prog_run_xdp(prog, &xdp); 553 rcu_read_unlock(); 554 555 /* Check if XDP program has changed headers */ 556 if (orig_data != xdp.data) { 557 len = xdp.data_end - xdp.data; 558 offset = orig_data - xdp.data; 559 dma_addr -= offset; 560 } 561 562 switch (action) { 563 case XDP_PASS: 564 nicvf_unmap_page(nic, page, dma_addr); 565 566 /* Build SKB and pass on packet to network stack */ 567 *skb = build_skb(xdp.data, 568 RCV_FRAG_LEN - cqe_rx->align_pad + offset); 569 if (!*skb) 570 put_page(page); 571 else 572 skb_put(*skb, len); 573 return false; 574 case XDP_TX: 575 nicvf_xdp_sq_append_pkt(nic, sq, (u64)xdp.data, dma_addr, len); 576 return true; 577 case XDP_REDIRECT: 578 /* Save DMA address for use while transmitting */ 579 xdp_tx = (struct nicvf_xdp_tx *)page_address(page); 580 xdp_tx->dma_addr = dma_addr; 581 xdp_tx->qidx = nicvf_netdev_qidx(nic, cqe_rx->rq_idx); 582 583 err = xdp_do_redirect(nic->pnicvf->netdev, &xdp, prog); 584 if (!err) 585 return true; 586 587 /* Free the page on error */ 588 nicvf_unmap_page(nic, page, dma_addr); 589 put_page(page); 590 break; 591 default: 592 bpf_warn_invalid_xdp_action(action); 593 /* fall through */ 594 case XDP_ABORTED: 595 trace_xdp_exception(nic->netdev, prog, action); 596 /* fall through */ 597 case XDP_DROP: 598 nicvf_unmap_page(nic, page, dma_addr); 599 put_page(page); 600 return true; 601 } 602 return false; 603 } 604 605 static void nicvf_snd_pkt_handler(struct net_device *netdev, 606 struct cqe_send_t *cqe_tx, 607 int budget, int *subdesc_cnt, 608 unsigned int *tx_pkts, unsigned int *tx_bytes) 609 { 610 struct sk_buff *skb = NULL; 611 struct page *page; 612 struct nicvf *nic = netdev_priv(netdev); 613 struct snd_queue *sq; 614 struct sq_hdr_subdesc *hdr; 615 struct sq_hdr_subdesc *tso_sqe; 616 617 sq = &nic->qs->sq[cqe_tx->sq_idx]; 618 619 hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, cqe_tx->sqe_ptr); 620 if (hdr->subdesc_type != SQ_DESC_TYPE_HEADER) 621 return; 622 623 /* Check for errors */ 624 if (cqe_tx->send_status) 625 nicvf_check_cqe_tx_errs(nic->pnicvf, cqe_tx); 626 627 /* Is this a XDP designated Tx queue */ 628 if (sq->is_xdp) { 629 page = (struct page *)sq->xdp_page[cqe_tx->sqe_ptr]; 630 /* Check if it's recycled page or else unmap DMA mapping */ 631 if (page && (page_ref_count(page) == 1)) 632 nicvf_unmap_sndq_buffers(nic, sq, cqe_tx->sqe_ptr, 633 hdr->subdesc_cnt); 634 635 /* Release page reference for recycling */ 636 if (page) 637 put_page(page); 638 sq->xdp_page[cqe_tx->sqe_ptr] = (u64)NULL; 639 *subdesc_cnt += hdr->subdesc_cnt + 1; 640 return; 641 } 642 643 skb = (struct sk_buff *)sq->skbuff[cqe_tx->sqe_ptr]; 644 if (skb) { 645 /* Check for dummy descriptor used for HW TSO offload on 88xx */ 646 if (hdr->dont_send) { 647 /* Get actual TSO descriptors and free them */ 648 tso_sqe = 649 (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, hdr->rsvd2); 650 nicvf_unmap_sndq_buffers(nic, sq, hdr->rsvd2, 651 tso_sqe->subdesc_cnt); 652 *subdesc_cnt += tso_sqe->subdesc_cnt + 1; 653 } else { 654 nicvf_unmap_sndq_buffers(nic, sq, cqe_tx->sqe_ptr, 655 hdr->subdesc_cnt); 656 } 657 *subdesc_cnt += hdr->subdesc_cnt + 1; 658 prefetch(skb); 659 (*tx_pkts)++; 660 *tx_bytes += skb->len; 661 napi_consume_skb(skb, budget); 662 sq->skbuff[cqe_tx->sqe_ptr] = (u64)NULL; 663 } else { 664 /* In case of SW TSO on 88xx, only last segment will have 665 * a SKB attached, so just free SQEs here. 666 */ 667 if (!nic->hw_tso) 668 *subdesc_cnt += hdr->subdesc_cnt + 1; 669 } 670 } 671 672 static inline void nicvf_set_rxhash(struct net_device *netdev, 673 struct cqe_rx_t *cqe_rx, 674 struct sk_buff *skb) 675 { 676 u8 hash_type; 677 u32 hash; 678 679 if (!(netdev->features & NETIF_F_RXHASH)) 680 return; 681 682 switch (cqe_rx->rss_alg) { 683 case RSS_ALG_TCP_IP: 684 case RSS_ALG_UDP_IP: 685 hash_type = PKT_HASH_TYPE_L4; 686 hash = cqe_rx->rss_tag; 687 break; 688 case RSS_ALG_IP: 689 hash_type = PKT_HASH_TYPE_L3; 690 hash = cqe_rx->rss_tag; 691 break; 692 default: 693 hash_type = PKT_HASH_TYPE_NONE; 694 hash = 0; 695 } 696 697 skb_set_hash(skb, hash, hash_type); 698 } 699 700 static void nicvf_rcv_pkt_handler(struct net_device *netdev, 701 struct napi_struct *napi, 702 struct cqe_rx_t *cqe_rx, 703 struct snd_queue *sq, struct rcv_queue *rq) 704 { 705 struct sk_buff *skb = NULL; 706 struct nicvf *nic = netdev_priv(netdev); 707 struct nicvf *snic = nic; 708 int err = 0; 709 int rq_idx; 710 711 rq_idx = nicvf_netdev_qidx(nic, cqe_rx->rq_idx); 712 713 if (nic->sqs_mode) { 714 /* Use primary VF's 'nicvf' struct */ 715 nic = nic->pnicvf; 716 netdev = nic->netdev; 717 } 718 719 /* Check for errors */ 720 if (cqe_rx->err_level || cqe_rx->err_opcode) { 721 err = nicvf_check_cqe_rx_errs(nic, cqe_rx); 722 if (err && !cqe_rx->rb_cnt) 723 return; 724 } 725 726 /* For XDP, ignore pkts spanning multiple pages */ 727 if (nic->xdp_prog && (cqe_rx->rb_cnt == 1)) { 728 /* Packet consumed by XDP */ 729 if (nicvf_xdp_rx(snic, nic->xdp_prog, cqe_rx, sq, rq, &skb)) 730 return; 731 } else { 732 skb = nicvf_get_rcv_skb(snic, cqe_rx, 733 nic->xdp_prog ? true : false); 734 } 735 736 if (!skb) 737 return; 738 739 if (netif_msg_pktdata(nic)) { 740 netdev_info(nic->netdev, "skb 0x%p, len=%d\n", skb, skb->len); 741 print_hex_dump(KERN_INFO, "", DUMP_PREFIX_OFFSET, 16, 1, 742 skb->data, skb->len, true); 743 } 744 745 /* If error packet, drop it here */ 746 if (err) { 747 dev_kfree_skb_any(skb); 748 return; 749 } 750 751 nicvf_set_rxhash(netdev, cqe_rx, skb); 752 753 skb_record_rx_queue(skb, rq_idx); 754 if (netdev->hw_features & NETIF_F_RXCSUM) { 755 /* HW by default verifies TCP/UDP/SCTP checksums */ 756 skb->ip_summed = CHECKSUM_UNNECESSARY; 757 } else { 758 skb_checksum_none_assert(skb); 759 } 760 761 skb->protocol = eth_type_trans(skb, netdev); 762 763 /* Check for stripped VLAN */ 764 if (cqe_rx->vlan_found && cqe_rx->vlan_stripped) 765 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), 766 ntohs((__force __be16)cqe_rx->vlan_tci)); 767 768 if (napi && (netdev->features & NETIF_F_GRO)) 769 napi_gro_receive(napi, skb); 770 else 771 netif_receive_skb(skb); 772 } 773 774 static int nicvf_cq_intr_handler(struct net_device *netdev, u8 cq_idx, 775 struct napi_struct *napi, int budget) 776 { 777 int processed_cqe, work_done = 0, tx_done = 0; 778 int cqe_count, cqe_head; 779 int subdesc_cnt = 0; 780 struct nicvf *nic = netdev_priv(netdev); 781 struct queue_set *qs = nic->qs; 782 struct cmp_queue *cq = &qs->cq[cq_idx]; 783 struct cqe_rx_t *cq_desc; 784 struct netdev_queue *txq; 785 struct snd_queue *sq = &qs->sq[cq_idx]; 786 struct rcv_queue *rq = &qs->rq[cq_idx]; 787 unsigned int tx_pkts = 0, tx_bytes = 0, txq_idx; 788 789 spin_lock_bh(&cq->lock); 790 loop: 791 processed_cqe = 0; 792 /* Get no of valid CQ entries to process */ 793 cqe_count = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_STATUS, cq_idx); 794 cqe_count &= CQ_CQE_COUNT; 795 if (!cqe_count) 796 goto done; 797 798 /* Get head of the valid CQ entries */ 799 cqe_head = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_HEAD, cq_idx) >> 9; 800 cqe_head &= 0xFFFF; 801 802 while (processed_cqe < cqe_count) { 803 /* Get the CQ descriptor */ 804 cq_desc = (struct cqe_rx_t *)GET_CQ_DESC(cq, cqe_head); 805 cqe_head++; 806 cqe_head &= (cq->dmem.q_len - 1); 807 /* Initiate prefetch for next descriptor */ 808 prefetch((struct cqe_rx_t *)GET_CQ_DESC(cq, cqe_head)); 809 810 if ((work_done >= budget) && napi && 811 (cq_desc->cqe_type != CQE_TYPE_SEND)) { 812 break; 813 } 814 815 switch (cq_desc->cqe_type) { 816 case CQE_TYPE_RX: 817 nicvf_rcv_pkt_handler(netdev, napi, cq_desc, sq, rq); 818 work_done++; 819 break; 820 case CQE_TYPE_SEND: 821 nicvf_snd_pkt_handler(netdev, (void *)cq_desc, 822 budget, &subdesc_cnt, 823 &tx_pkts, &tx_bytes); 824 tx_done++; 825 break; 826 case CQE_TYPE_INVALID: 827 case CQE_TYPE_RX_SPLIT: 828 case CQE_TYPE_RX_TCP: 829 case CQE_TYPE_SEND_PTP: 830 /* Ignore for now */ 831 break; 832 } 833 processed_cqe++; 834 } 835 836 /* Ring doorbell to inform H/W to reuse processed CQEs */ 837 nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_DOOR, 838 cq_idx, processed_cqe); 839 840 if ((work_done < budget) && napi) 841 goto loop; 842 843 done: 844 /* Update SQ's descriptor free count */ 845 if (subdesc_cnt) 846 nicvf_put_sq_desc(sq, subdesc_cnt); 847 848 txq_idx = nicvf_netdev_qidx(nic, cq_idx); 849 /* Handle XDP TX queues */ 850 if (nic->pnicvf->xdp_prog) { 851 if (txq_idx < nic->pnicvf->xdp_tx_queues) { 852 nicvf_xdp_sq_doorbell(nic, sq, cq_idx); 853 goto out; 854 } 855 nic = nic->pnicvf; 856 txq_idx -= nic->pnicvf->xdp_tx_queues; 857 } 858 859 /* Wakeup TXQ if its stopped earlier due to SQ full */ 860 if (tx_done || 861 (atomic_read(&sq->free_cnt) >= MIN_SQ_DESC_PER_PKT_XMIT)) { 862 netdev = nic->pnicvf->netdev; 863 txq = netdev_get_tx_queue(netdev, txq_idx); 864 if (tx_pkts) 865 netdev_tx_completed_queue(txq, tx_pkts, tx_bytes); 866 867 /* To read updated queue and carrier status */ 868 smp_mb(); 869 if (netif_tx_queue_stopped(txq) && netif_carrier_ok(netdev)) { 870 netif_tx_wake_queue(txq); 871 nic = nic->pnicvf; 872 this_cpu_inc(nic->drv_stats->txq_wake); 873 netif_warn(nic, tx_err, netdev, 874 "Transmit queue wakeup SQ%d\n", txq_idx); 875 } 876 } 877 878 out: 879 spin_unlock_bh(&cq->lock); 880 return work_done; 881 } 882 883 static int nicvf_poll(struct napi_struct *napi, int budget) 884 { 885 u64 cq_head; 886 int work_done = 0; 887 struct net_device *netdev = napi->dev; 888 struct nicvf *nic = netdev_priv(netdev); 889 struct nicvf_cq_poll *cq; 890 891 cq = container_of(napi, struct nicvf_cq_poll, napi); 892 work_done = nicvf_cq_intr_handler(netdev, cq->cq_idx, napi, budget); 893 894 if (work_done < budget) { 895 /* Slow packet rate, exit polling */ 896 napi_complete_done(napi, work_done); 897 /* Re-enable interrupts */ 898 cq_head = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_HEAD, 899 cq->cq_idx); 900 nicvf_clear_intr(nic, NICVF_INTR_CQ, cq->cq_idx); 901 nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_HEAD, 902 cq->cq_idx, cq_head); 903 nicvf_enable_intr(nic, NICVF_INTR_CQ, cq->cq_idx); 904 } 905 return work_done; 906 } 907 908 /* Qset error interrupt handler 909 * 910 * As of now only CQ errors are handled 911 */ 912 static void nicvf_handle_qs_err(unsigned long data) 913 { 914 struct nicvf *nic = (struct nicvf *)data; 915 struct queue_set *qs = nic->qs; 916 int qidx; 917 u64 status; 918 919 netif_tx_disable(nic->netdev); 920 921 /* Check if it is CQ err */ 922 for (qidx = 0; qidx < qs->cq_cnt; qidx++) { 923 status = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_STATUS, 924 qidx); 925 if (!(status & CQ_ERR_MASK)) 926 continue; 927 /* Process already queued CQEs and reconfig CQ */ 928 nicvf_disable_intr(nic, NICVF_INTR_CQ, qidx); 929 nicvf_sq_disable(nic, qidx); 930 nicvf_cq_intr_handler(nic->netdev, qidx, NULL, 0); 931 nicvf_cmp_queue_config(nic, qs, qidx, true); 932 nicvf_sq_free_used_descs(nic->netdev, &qs->sq[qidx], qidx); 933 nicvf_sq_enable(nic, &qs->sq[qidx], qidx); 934 935 nicvf_enable_intr(nic, NICVF_INTR_CQ, qidx); 936 } 937 938 netif_tx_start_all_queues(nic->netdev); 939 /* Re-enable Qset error interrupt */ 940 nicvf_enable_intr(nic, NICVF_INTR_QS_ERR, 0); 941 } 942 943 static void nicvf_dump_intr_status(struct nicvf *nic) 944 { 945 netif_info(nic, intr, nic->netdev, "interrupt status 0x%llx\n", 946 nicvf_reg_read(nic, NIC_VF_INT)); 947 } 948 949 static irqreturn_t nicvf_misc_intr_handler(int irq, void *nicvf_irq) 950 { 951 struct nicvf *nic = (struct nicvf *)nicvf_irq; 952 u64 intr; 953 954 nicvf_dump_intr_status(nic); 955 956 intr = nicvf_reg_read(nic, NIC_VF_INT); 957 /* Check for spurious interrupt */ 958 if (!(intr & NICVF_INTR_MBOX_MASK)) 959 return IRQ_HANDLED; 960 961 nicvf_handle_mbx_intr(nic); 962 963 return IRQ_HANDLED; 964 } 965 966 static irqreturn_t nicvf_intr_handler(int irq, void *cq_irq) 967 { 968 struct nicvf_cq_poll *cq_poll = (struct nicvf_cq_poll *)cq_irq; 969 struct nicvf *nic = cq_poll->nicvf; 970 int qidx = cq_poll->cq_idx; 971 972 nicvf_dump_intr_status(nic); 973 974 /* Disable interrupts */ 975 nicvf_disable_intr(nic, NICVF_INTR_CQ, qidx); 976 977 /* Schedule NAPI */ 978 napi_schedule_irqoff(&cq_poll->napi); 979 980 /* Clear interrupt */ 981 nicvf_clear_intr(nic, NICVF_INTR_CQ, qidx); 982 983 return IRQ_HANDLED; 984 } 985 986 static irqreturn_t nicvf_rbdr_intr_handler(int irq, void *nicvf_irq) 987 { 988 struct nicvf *nic = (struct nicvf *)nicvf_irq; 989 u8 qidx; 990 991 992 nicvf_dump_intr_status(nic); 993 994 /* Disable RBDR interrupt and schedule softirq */ 995 for (qidx = 0; qidx < nic->qs->rbdr_cnt; qidx++) { 996 if (!nicvf_is_intr_enabled(nic, NICVF_INTR_RBDR, qidx)) 997 continue; 998 nicvf_disable_intr(nic, NICVF_INTR_RBDR, qidx); 999 tasklet_hi_schedule(&nic->rbdr_task); 1000 /* Clear interrupt */ 1001 nicvf_clear_intr(nic, NICVF_INTR_RBDR, qidx); 1002 } 1003 1004 return IRQ_HANDLED; 1005 } 1006 1007 static irqreturn_t nicvf_qs_err_intr_handler(int irq, void *nicvf_irq) 1008 { 1009 struct nicvf *nic = (struct nicvf *)nicvf_irq; 1010 1011 nicvf_dump_intr_status(nic); 1012 1013 /* Disable Qset err interrupt and schedule softirq */ 1014 nicvf_disable_intr(nic, NICVF_INTR_QS_ERR, 0); 1015 tasklet_hi_schedule(&nic->qs_err_task); 1016 nicvf_clear_intr(nic, NICVF_INTR_QS_ERR, 0); 1017 1018 return IRQ_HANDLED; 1019 } 1020 1021 static void nicvf_set_irq_affinity(struct nicvf *nic) 1022 { 1023 int vec, cpu; 1024 1025 for (vec = 0; vec < nic->num_vec; vec++) { 1026 if (!nic->irq_allocated[vec]) 1027 continue; 1028 1029 if (!zalloc_cpumask_var(&nic->affinity_mask[vec], GFP_KERNEL)) 1030 return; 1031 /* CQ interrupts */ 1032 if (vec < NICVF_INTR_ID_SQ) 1033 /* Leave CPU0 for RBDR and other interrupts */ 1034 cpu = nicvf_netdev_qidx(nic, vec) + 1; 1035 else 1036 cpu = 0; 1037 1038 cpumask_set_cpu(cpumask_local_spread(cpu, nic->node), 1039 nic->affinity_mask[vec]); 1040 irq_set_affinity_hint(pci_irq_vector(nic->pdev, vec), 1041 nic->affinity_mask[vec]); 1042 } 1043 } 1044 1045 static int nicvf_register_interrupts(struct nicvf *nic) 1046 { 1047 int irq, ret = 0; 1048 1049 for_each_cq_irq(irq) 1050 sprintf(nic->irq_name[irq], "%s-rxtx-%d", 1051 nic->pnicvf->netdev->name, 1052 nicvf_netdev_qidx(nic, irq)); 1053 1054 for_each_sq_irq(irq) 1055 sprintf(nic->irq_name[irq], "%s-sq-%d", 1056 nic->pnicvf->netdev->name, 1057 nicvf_netdev_qidx(nic, irq - NICVF_INTR_ID_SQ)); 1058 1059 for_each_rbdr_irq(irq) 1060 sprintf(nic->irq_name[irq], "%s-rbdr-%d", 1061 nic->pnicvf->netdev->name, 1062 nic->sqs_mode ? (nic->sqs_id + 1) : 0); 1063 1064 /* Register CQ interrupts */ 1065 for (irq = 0; irq < nic->qs->cq_cnt; irq++) { 1066 ret = request_irq(pci_irq_vector(nic->pdev, irq), 1067 nicvf_intr_handler, 1068 0, nic->irq_name[irq], nic->napi[irq]); 1069 if (ret) 1070 goto err; 1071 nic->irq_allocated[irq] = true; 1072 } 1073 1074 /* Register RBDR interrupt */ 1075 for (irq = NICVF_INTR_ID_RBDR; 1076 irq < (NICVF_INTR_ID_RBDR + nic->qs->rbdr_cnt); irq++) { 1077 ret = request_irq(pci_irq_vector(nic->pdev, irq), 1078 nicvf_rbdr_intr_handler, 1079 0, nic->irq_name[irq], nic); 1080 if (ret) 1081 goto err; 1082 nic->irq_allocated[irq] = true; 1083 } 1084 1085 /* Register QS error interrupt */ 1086 sprintf(nic->irq_name[NICVF_INTR_ID_QS_ERR], "%s-qset-err-%d", 1087 nic->pnicvf->netdev->name, 1088 nic->sqs_mode ? (nic->sqs_id + 1) : 0); 1089 irq = NICVF_INTR_ID_QS_ERR; 1090 ret = request_irq(pci_irq_vector(nic->pdev, irq), 1091 nicvf_qs_err_intr_handler, 1092 0, nic->irq_name[irq], nic); 1093 if (ret) 1094 goto err; 1095 1096 nic->irq_allocated[irq] = true; 1097 1098 /* Set IRQ affinities */ 1099 nicvf_set_irq_affinity(nic); 1100 1101 err: 1102 if (ret) 1103 netdev_err(nic->netdev, "request_irq failed, vector %d\n", irq); 1104 1105 return ret; 1106 } 1107 1108 static void nicvf_unregister_interrupts(struct nicvf *nic) 1109 { 1110 struct pci_dev *pdev = nic->pdev; 1111 int irq; 1112 1113 /* Free registered interrupts */ 1114 for (irq = 0; irq < nic->num_vec; irq++) { 1115 if (!nic->irq_allocated[irq]) 1116 continue; 1117 1118 irq_set_affinity_hint(pci_irq_vector(pdev, irq), NULL); 1119 free_cpumask_var(nic->affinity_mask[irq]); 1120 1121 if (irq < NICVF_INTR_ID_SQ) 1122 free_irq(pci_irq_vector(pdev, irq), nic->napi[irq]); 1123 else 1124 free_irq(pci_irq_vector(pdev, irq), nic); 1125 1126 nic->irq_allocated[irq] = false; 1127 } 1128 1129 /* Disable MSI-X */ 1130 pci_free_irq_vectors(pdev); 1131 nic->num_vec = 0; 1132 } 1133 1134 /* Initialize MSIX vectors and register MISC interrupt. 1135 * Send READY message to PF to check if its alive 1136 */ 1137 static int nicvf_register_misc_interrupt(struct nicvf *nic) 1138 { 1139 int ret = 0; 1140 int irq = NICVF_INTR_ID_MISC; 1141 1142 /* Return if mailbox interrupt is already registered */ 1143 if (nic->pdev->msix_enabled) 1144 return 0; 1145 1146 /* Enable MSI-X */ 1147 nic->num_vec = pci_msix_vec_count(nic->pdev); 1148 ret = pci_alloc_irq_vectors(nic->pdev, nic->num_vec, nic->num_vec, 1149 PCI_IRQ_MSIX); 1150 if (ret < 0) { 1151 netdev_err(nic->netdev, 1152 "Req for #%d msix vectors failed\n", nic->num_vec); 1153 return 1; 1154 } 1155 1156 sprintf(nic->irq_name[irq], "%s Mbox", "NICVF"); 1157 /* Register Misc interrupt */ 1158 ret = request_irq(pci_irq_vector(nic->pdev, irq), 1159 nicvf_misc_intr_handler, 0, nic->irq_name[irq], nic); 1160 1161 if (ret) 1162 return ret; 1163 nic->irq_allocated[irq] = true; 1164 1165 /* Enable mailbox interrupt */ 1166 nicvf_enable_intr(nic, NICVF_INTR_MBOX, 0); 1167 1168 /* Check if VF is able to communicate with PF */ 1169 if (!nicvf_check_pf_ready(nic)) { 1170 nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0); 1171 nicvf_unregister_interrupts(nic); 1172 return 1; 1173 } 1174 1175 return 0; 1176 } 1177 1178 static netdev_tx_t nicvf_xmit(struct sk_buff *skb, struct net_device *netdev) 1179 { 1180 struct nicvf *nic = netdev_priv(netdev); 1181 int qid = skb_get_queue_mapping(skb); 1182 struct netdev_queue *txq = netdev_get_tx_queue(netdev, qid); 1183 struct nicvf *snic; 1184 struct snd_queue *sq; 1185 int tmp; 1186 1187 /* Check for minimum packet length */ 1188 if (skb->len <= ETH_HLEN) { 1189 dev_kfree_skb(skb); 1190 return NETDEV_TX_OK; 1191 } 1192 1193 /* In XDP case, initial HW tx queues are used for XDP, 1194 * but stack's queue mapping starts at '0', so skip the 1195 * Tx queues attached to Rx queues for XDP. 1196 */ 1197 if (nic->xdp_prog) 1198 qid += nic->xdp_tx_queues; 1199 1200 snic = nic; 1201 /* Get secondary Qset's SQ structure */ 1202 if (qid >= MAX_SND_QUEUES_PER_QS) { 1203 tmp = qid / MAX_SND_QUEUES_PER_QS; 1204 snic = (struct nicvf *)nic->snicvf[tmp - 1]; 1205 if (!snic) { 1206 netdev_warn(nic->netdev, 1207 "Secondary Qset#%d's ptr not initialized\n", 1208 tmp - 1); 1209 dev_kfree_skb(skb); 1210 return NETDEV_TX_OK; 1211 } 1212 qid = qid % MAX_SND_QUEUES_PER_QS; 1213 } 1214 1215 sq = &snic->qs->sq[qid]; 1216 if (!netif_tx_queue_stopped(txq) && 1217 !nicvf_sq_append_skb(snic, sq, skb, qid)) { 1218 netif_tx_stop_queue(txq); 1219 1220 /* Barrier, so that stop_queue visible to other cpus */ 1221 smp_mb(); 1222 1223 /* Check again, incase another cpu freed descriptors */ 1224 if (atomic_read(&sq->free_cnt) > MIN_SQ_DESC_PER_PKT_XMIT) { 1225 netif_tx_wake_queue(txq); 1226 } else { 1227 this_cpu_inc(nic->drv_stats->txq_stop); 1228 netif_warn(nic, tx_err, netdev, 1229 "Transmit ring full, stopping SQ%d\n", qid); 1230 } 1231 return NETDEV_TX_BUSY; 1232 } 1233 1234 return NETDEV_TX_OK; 1235 } 1236 1237 static inline void nicvf_free_cq_poll(struct nicvf *nic) 1238 { 1239 struct nicvf_cq_poll *cq_poll; 1240 int qidx; 1241 1242 for (qidx = 0; qidx < nic->qs->cq_cnt; qidx++) { 1243 cq_poll = nic->napi[qidx]; 1244 if (!cq_poll) 1245 continue; 1246 nic->napi[qidx] = NULL; 1247 kfree(cq_poll); 1248 } 1249 } 1250 1251 int nicvf_stop(struct net_device *netdev) 1252 { 1253 int irq, qidx; 1254 struct nicvf *nic = netdev_priv(netdev); 1255 struct queue_set *qs = nic->qs; 1256 struct nicvf_cq_poll *cq_poll = NULL; 1257 union nic_mbx mbx = {}; 1258 1259 mbx.msg.msg = NIC_MBOX_MSG_SHUTDOWN; 1260 nicvf_send_msg_to_pf(nic, &mbx); 1261 1262 netif_carrier_off(netdev); 1263 netif_tx_stop_all_queues(nic->netdev); 1264 nic->link_up = false; 1265 1266 /* Teardown secondary qsets first */ 1267 if (!nic->sqs_mode) { 1268 for (qidx = 0; qidx < nic->sqs_count; qidx++) { 1269 if (!nic->snicvf[qidx]) 1270 continue; 1271 nicvf_stop(nic->snicvf[qidx]->netdev); 1272 nic->snicvf[qidx] = NULL; 1273 } 1274 } 1275 1276 /* Disable RBDR & QS error interrupts */ 1277 for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) { 1278 nicvf_disable_intr(nic, NICVF_INTR_RBDR, qidx); 1279 nicvf_clear_intr(nic, NICVF_INTR_RBDR, qidx); 1280 } 1281 nicvf_disable_intr(nic, NICVF_INTR_QS_ERR, 0); 1282 nicvf_clear_intr(nic, NICVF_INTR_QS_ERR, 0); 1283 1284 /* Wait for pending IRQ handlers to finish */ 1285 for (irq = 0; irq < nic->num_vec; irq++) 1286 synchronize_irq(pci_irq_vector(nic->pdev, irq)); 1287 1288 tasklet_kill(&nic->rbdr_task); 1289 tasklet_kill(&nic->qs_err_task); 1290 if (nic->rb_work_scheduled) 1291 cancel_delayed_work_sync(&nic->rbdr_work); 1292 1293 for (qidx = 0; qidx < nic->qs->cq_cnt; qidx++) { 1294 cq_poll = nic->napi[qidx]; 1295 if (!cq_poll) 1296 continue; 1297 napi_synchronize(&cq_poll->napi); 1298 /* CQ intr is enabled while napi_complete, 1299 * so disable it now 1300 */ 1301 nicvf_disable_intr(nic, NICVF_INTR_CQ, qidx); 1302 nicvf_clear_intr(nic, NICVF_INTR_CQ, qidx); 1303 napi_disable(&cq_poll->napi); 1304 netif_napi_del(&cq_poll->napi); 1305 } 1306 1307 netif_tx_disable(netdev); 1308 1309 for (qidx = 0; qidx < netdev->num_tx_queues; qidx++) 1310 netdev_tx_reset_queue(netdev_get_tx_queue(netdev, qidx)); 1311 1312 /* Free resources */ 1313 nicvf_config_data_transfer(nic, false); 1314 1315 /* Disable HW Qset */ 1316 nicvf_qset_config(nic, false); 1317 1318 /* disable mailbox interrupt */ 1319 nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0); 1320 1321 nicvf_unregister_interrupts(nic); 1322 1323 nicvf_free_cq_poll(nic); 1324 1325 /* Clear multiqset info */ 1326 nic->pnicvf = nic; 1327 1328 return 0; 1329 } 1330 1331 static int nicvf_update_hw_max_frs(struct nicvf *nic, int mtu) 1332 { 1333 union nic_mbx mbx = {}; 1334 1335 mbx.frs.msg = NIC_MBOX_MSG_SET_MAX_FRS; 1336 mbx.frs.max_frs = mtu; 1337 mbx.frs.vf_id = nic->vf_id; 1338 1339 return nicvf_send_msg_to_pf(nic, &mbx); 1340 } 1341 1342 int nicvf_open(struct net_device *netdev) 1343 { 1344 int cpu, err, qidx; 1345 struct nicvf *nic = netdev_priv(netdev); 1346 struct queue_set *qs = nic->qs; 1347 struct nicvf_cq_poll *cq_poll = NULL; 1348 union nic_mbx mbx = {}; 1349 1350 netif_carrier_off(netdev); 1351 1352 err = nicvf_register_misc_interrupt(nic); 1353 if (err) 1354 return err; 1355 1356 /* Register NAPI handler for processing CQEs */ 1357 for (qidx = 0; qidx < qs->cq_cnt; qidx++) { 1358 cq_poll = kzalloc(sizeof(*cq_poll), GFP_KERNEL); 1359 if (!cq_poll) { 1360 err = -ENOMEM; 1361 goto napi_del; 1362 } 1363 cq_poll->cq_idx = qidx; 1364 cq_poll->nicvf = nic; 1365 netif_napi_add(netdev, &cq_poll->napi, nicvf_poll, 1366 NAPI_POLL_WEIGHT); 1367 napi_enable(&cq_poll->napi); 1368 nic->napi[qidx] = cq_poll; 1369 } 1370 1371 /* Check if we got MAC address from PF or else generate a radom MAC */ 1372 if (!nic->sqs_mode && is_zero_ether_addr(netdev->dev_addr)) { 1373 eth_hw_addr_random(netdev); 1374 nicvf_hw_set_mac_addr(nic, netdev); 1375 } 1376 1377 if (nic->set_mac_pending) { 1378 nic->set_mac_pending = false; 1379 nicvf_hw_set_mac_addr(nic, netdev); 1380 } 1381 1382 /* Init tasklet for handling Qset err interrupt */ 1383 tasklet_init(&nic->qs_err_task, nicvf_handle_qs_err, 1384 (unsigned long)nic); 1385 1386 /* Init RBDR tasklet which will refill RBDR */ 1387 tasklet_init(&nic->rbdr_task, nicvf_rbdr_task, 1388 (unsigned long)nic); 1389 INIT_DELAYED_WORK(&nic->rbdr_work, nicvf_rbdr_work); 1390 1391 /* Configure CPI alorithm */ 1392 nic->cpi_alg = cpi_alg; 1393 if (!nic->sqs_mode) 1394 nicvf_config_cpi(nic); 1395 1396 nicvf_request_sqs(nic); 1397 if (nic->sqs_mode) 1398 nicvf_get_primary_vf_struct(nic); 1399 1400 /* Configure receive side scaling and MTU */ 1401 if (!nic->sqs_mode) { 1402 nicvf_rss_init(nic); 1403 err = nicvf_update_hw_max_frs(nic, netdev->mtu); 1404 if (err) 1405 goto cleanup; 1406 1407 /* Clear percpu stats */ 1408 for_each_possible_cpu(cpu) 1409 memset(per_cpu_ptr(nic->drv_stats, cpu), 0, 1410 sizeof(struct nicvf_drv_stats)); 1411 } 1412 1413 err = nicvf_register_interrupts(nic); 1414 if (err) 1415 goto cleanup; 1416 1417 /* Initialize the queues */ 1418 err = nicvf_init_resources(nic); 1419 if (err) 1420 goto cleanup; 1421 1422 /* Make sure queue initialization is written */ 1423 wmb(); 1424 1425 nicvf_reg_write(nic, NIC_VF_INT, -1); 1426 /* Enable Qset err interrupt */ 1427 nicvf_enable_intr(nic, NICVF_INTR_QS_ERR, 0); 1428 1429 /* Enable completion queue interrupt */ 1430 for (qidx = 0; qidx < qs->cq_cnt; qidx++) 1431 nicvf_enable_intr(nic, NICVF_INTR_CQ, qidx); 1432 1433 /* Enable RBDR threshold interrupt */ 1434 for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) 1435 nicvf_enable_intr(nic, NICVF_INTR_RBDR, qidx); 1436 1437 /* Send VF config done msg to PF */ 1438 mbx.msg.msg = NIC_MBOX_MSG_CFG_DONE; 1439 nicvf_write_to_mbx(nic, &mbx); 1440 1441 return 0; 1442 cleanup: 1443 nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0); 1444 nicvf_unregister_interrupts(nic); 1445 tasklet_kill(&nic->qs_err_task); 1446 tasklet_kill(&nic->rbdr_task); 1447 napi_del: 1448 for (qidx = 0; qidx < qs->cq_cnt; qidx++) { 1449 cq_poll = nic->napi[qidx]; 1450 if (!cq_poll) 1451 continue; 1452 napi_disable(&cq_poll->napi); 1453 netif_napi_del(&cq_poll->napi); 1454 } 1455 nicvf_free_cq_poll(nic); 1456 return err; 1457 } 1458 1459 static int nicvf_change_mtu(struct net_device *netdev, int new_mtu) 1460 { 1461 struct nicvf *nic = netdev_priv(netdev); 1462 int orig_mtu = netdev->mtu; 1463 1464 netdev->mtu = new_mtu; 1465 1466 if (!netif_running(netdev)) 1467 return 0; 1468 1469 if (nicvf_update_hw_max_frs(nic, new_mtu)) { 1470 netdev->mtu = orig_mtu; 1471 return -EINVAL; 1472 } 1473 1474 return 0; 1475 } 1476 1477 static int nicvf_set_mac_address(struct net_device *netdev, void *p) 1478 { 1479 struct sockaddr *addr = p; 1480 struct nicvf *nic = netdev_priv(netdev); 1481 1482 if (!is_valid_ether_addr(addr->sa_data)) 1483 return -EADDRNOTAVAIL; 1484 1485 memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); 1486 1487 if (nic->pdev->msix_enabled) { 1488 if (nicvf_hw_set_mac_addr(nic, netdev)) 1489 return -EBUSY; 1490 } else { 1491 nic->set_mac_pending = true; 1492 } 1493 1494 return 0; 1495 } 1496 1497 void nicvf_update_lmac_stats(struct nicvf *nic) 1498 { 1499 int stat = 0; 1500 union nic_mbx mbx = {}; 1501 1502 if (!netif_running(nic->netdev)) 1503 return; 1504 1505 mbx.bgx_stats.msg = NIC_MBOX_MSG_BGX_STATS; 1506 mbx.bgx_stats.vf_id = nic->vf_id; 1507 /* Rx stats */ 1508 mbx.bgx_stats.rx = 1; 1509 while (stat < BGX_RX_STATS_COUNT) { 1510 mbx.bgx_stats.idx = stat; 1511 if (nicvf_send_msg_to_pf(nic, &mbx)) 1512 return; 1513 stat++; 1514 } 1515 1516 stat = 0; 1517 1518 /* Tx stats */ 1519 mbx.bgx_stats.rx = 0; 1520 while (stat < BGX_TX_STATS_COUNT) { 1521 mbx.bgx_stats.idx = stat; 1522 if (nicvf_send_msg_to_pf(nic, &mbx)) 1523 return; 1524 stat++; 1525 } 1526 } 1527 1528 void nicvf_update_stats(struct nicvf *nic) 1529 { 1530 int qidx, cpu; 1531 u64 tmp_stats = 0; 1532 struct nicvf_hw_stats *stats = &nic->hw_stats; 1533 struct nicvf_drv_stats *drv_stats; 1534 struct queue_set *qs = nic->qs; 1535 1536 #define GET_RX_STATS(reg) \ 1537 nicvf_reg_read(nic, NIC_VNIC_RX_STAT_0_13 | (reg << 3)) 1538 #define GET_TX_STATS(reg) \ 1539 nicvf_reg_read(nic, NIC_VNIC_TX_STAT_0_4 | (reg << 3)) 1540 1541 stats->rx_bytes = GET_RX_STATS(RX_OCTS); 1542 stats->rx_ucast_frames = GET_RX_STATS(RX_UCAST); 1543 stats->rx_bcast_frames = GET_RX_STATS(RX_BCAST); 1544 stats->rx_mcast_frames = GET_RX_STATS(RX_MCAST); 1545 stats->rx_fcs_errors = GET_RX_STATS(RX_FCS); 1546 stats->rx_l2_errors = GET_RX_STATS(RX_L2ERR); 1547 stats->rx_drop_red = GET_RX_STATS(RX_RED); 1548 stats->rx_drop_red_bytes = GET_RX_STATS(RX_RED_OCTS); 1549 stats->rx_drop_overrun = GET_RX_STATS(RX_ORUN); 1550 stats->rx_drop_overrun_bytes = GET_RX_STATS(RX_ORUN_OCTS); 1551 stats->rx_drop_bcast = GET_RX_STATS(RX_DRP_BCAST); 1552 stats->rx_drop_mcast = GET_RX_STATS(RX_DRP_MCAST); 1553 stats->rx_drop_l3_bcast = GET_RX_STATS(RX_DRP_L3BCAST); 1554 stats->rx_drop_l3_mcast = GET_RX_STATS(RX_DRP_L3MCAST); 1555 1556 stats->tx_bytes = GET_TX_STATS(TX_OCTS); 1557 stats->tx_ucast_frames = GET_TX_STATS(TX_UCAST); 1558 stats->tx_bcast_frames = GET_TX_STATS(TX_BCAST); 1559 stats->tx_mcast_frames = GET_TX_STATS(TX_MCAST); 1560 stats->tx_drops = GET_TX_STATS(TX_DROP); 1561 1562 /* On T88 pass 2.0, the dummy SQE added for TSO notification 1563 * via CQE has 'dont_send' set. Hence HW drops the pkt pointed 1564 * pointed by dummy SQE and results in tx_drops counter being 1565 * incremented. Subtracting it from tx_tso counter will give 1566 * exact tx_drops counter. 1567 */ 1568 if (nic->t88 && nic->hw_tso) { 1569 for_each_possible_cpu(cpu) { 1570 drv_stats = per_cpu_ptr(nic->drv_stats, cpu); 1571 tmp_stats += drv_stats->tx_tso; 1572 } 1573 stats->tx_drops = tmp_stats - stats->tx_drops; 1574 } 1575 stats->tx_frames = stats->tx_ucast_frames + 1576 stats->tx_bcast_frames + 1577 stats->tx_mcast_frames; 1578 stats->rx_frames = stats->rx_ucast_frames + 1579 stats->rx_bcast_frames + 1580 stats->rx_mcast_frames; 1581 stats->rx_drops = stats->rx_drop_red + 1582 stats->rx_drop_overrun; 1583 1584 /* Update RQ and SQ stats */ 1585 for (qidx = 0; qidx < qs->rq_cnt; qidx++) 1586 nicvf_update_rq_stats(nic, qidx); 1587 for (qidx = 0; qidx < qs->sq_cnt; qidx++) 1588 nicvf_update_sq_stats(nic, qidx); 1589 } 1590 1591 static void nicvf_get_stats64(struct net_device *netdev, 1592 struct rtnl_link_stats64 *stats) 1593 { 1594 struct nicvf *nic = netdev_priv(netdev); 1595 struct nicvf_hw_stats *hw_stats = &nic->hw_stats; 1596 1597 nicvf_update_stats(nic); 1598 1599 stats->rx_bytes = hw_stats->rx_bytes; 1600 stats->rx_packets = hw_stats->rx_frames; 1601 stats->rx_dropped = hw_stats->rx_drops; 1602 stats->multicast = hw_stats->rx_mcast_frames; 1603 1604 stats->tx_bytes = hw_stats->tx_bytes; 1605 stats->tx_packets = hw_stats->tx_frames; 1606 stats->tx_dropped = hw_stats->tx_drops; 1607 1608 } 1609 1610 static void nicvf_tx_timeout(struct net_device *dev) 1611 { 1612 struct nicvf *nic = netdev_priv(dev); 1613 1614 netif_warn(nic, tx_err, dev, "Transmit timed out, resetting\n"); 1615 1616 this_cpu_inc(nic->drv_stats->tx_timeout); 1617 schedule_work(&nic->reset_task); 1618 } 1619 1620 static void nicvf_reset_task(struct work_struct *work) 1621 { 1622 struct nicvf *nic; 1623 1624 nic = container_of(work, struct nicvf, reset_task); 1625 1626 if (!netif_running(nic->netdev)) 1627 return; 1628 1629 nicvf_stop(nic->netdev); 1630 nicvf_open(nic->netdev); 1631 netif_trans_update(nic->netdev); 1632 } 1633 1634 static int nicvf_config_loopback(struct nicvf *nic, 1635 netdev_features_t features) 1636 { 1637 union nic_mbx mbx = {}; 1638 1639 mbx.lbk.msg = NIC_MBOX_MSG_LOOPBACK; 1640 mbx.lbk.vf_id = nic->vf_id; 1641 mbx.lbk.enable = (features & NETIF_F_LOOPBACK) != 0; 1642 1643 return nicvf_send_msg_to_pf(nic, &mbx); 1644 } 1645 1646 static netdev_features_t nicvf_fix_features(struct net_device *netdev, 1647 netdev_features_t features) 1648 { 1649 struct nicvf *nic = netdev_priv(netdev); 1650 1651 if ((features & NETIF_F_LOOPBACK) && 1652 netif_running(netdev) && !nic->loopback_supported) 1653 features &= ~NETIF_F_LOOPBACK; 1654 1655 return features; 1656 } 1657 1658 static int nicvf_set_features(struct net_device *netdev, 1659 netdev_features_t features) 1660 { 1661 struct nicvf *nic = netdev_priv(netdev); 1662 netdev_features_t changed = features ^ netdev->features; 1663 1664 if (changed & NETIF_F_HW_VLAN_CTAG_RX) 1665 nicvf_config_vlan_stripping(nic, features); 1666 1667 if ((changed & NETIF_F_LOOPBACK) && netif_running(netdev)) 1668 return nicvf_config_loopback(nic, features); 1669 1670 return 0; 1671 } 1672 1673 static void nicvf_set_xdp_queues(struct nicvf *nic, bool bpf_attached) 1674 { 1675 u8 cq_count, txq_count; 1676 1677 /* Set XDP Tx queue count same as Rx queue count */ 1678 if (!bpf_attached) 1679 nic->xdp_tx_queues = 0; 1680 else 1681 nic->xdp_tx_queues = nic->rx_queues; 1682 1683 /* If queue count > MAX_CMP_QUEUES_PER_QS, then additional qsets 1684 * needs to be allocated, check how many. 1685 */ 1686 txq_count = nic->xdp_tx_queues + nic->tx_queues; 1687 cq_count = max(nic->rx_queues, txq_count); 1688 if (cq_count > MAX_CMP_QUEUES_PER_QS) { 1689 nic->sqs_count = roundup(cq_count, MAX_CMP_QUEUES_PER_QS); 1690 nic->sqs_count = (nic->sqs_count / MAX_CMP_QUEUES_PER_QS) - 1; 1691 } else { 1692 nic->sqs_count = 0; 1693 } 1694 1695 /* Set primary Qset's resources */ 1696 nic->qs->rq_cnt = min_t(u8, nic->rx_queues, MAX_RCV_QUEUES_PER_QS); 1697 nic->qs->sq_cnt = min_t(u8, txq_count, MAX_SND_QUEUES_PER_QS); 1698 nic->qs->cq_cnt = max_t(u8, nic->qs->rq_cnt, nic->qs->sq_cnt); 1699 1700 /* Update stack */ 1701 nicvf_set_real_num_queues(nic->netdev, nic->tx_queues, nic->rx_queues); 1702 } 1703 1704 static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog) 1705 { 1706 struct net_device *dev = nic->netdev; 1707 bool if_up = netif_running(nic->netdev); 1708 struct bpf_prog *old_prog; 1709 bool bpf_attached = false; 1710 1711 /* For now just support only the usual MTU sized frames */ 1712 if (prog && (dev->mtu > 1500)) { 1713 netdev_warn(dev, "Jumbo frames not yet supported with XDP, current MTU %d.\n", 1714 dev->mtu); 1715 return -EOPNOTSUPP; 1716 } 1717 1718 /* ALL SQs attached to CQs i.e same as RQs, are treated as 1719 * XDP Tx queues and more Tx queues are allocated for 1720 * network stack to send pkts out. 1721 * 1722 * No of Tx queues are either same as Rx queues or whatever 1723 * is left in max no of queues possible. 1724 */ 1725 if ((nic->rx_queues + nic->tx_queues) > nic->max_queues) { 1726 netdev_warn(dev, 1727 "Failed to attach BPF prog, RXQs + TXQs > Max %d\n", 1728 nic->max_queues); 1729 return -ENOMEM; 1730 } 1731 1732 if (if_up) 1733 nicvf_stop(nic->netdev); 1734 1735 old_prog = xchg(&nic->xdp_prog, prog); 1736 /* Detach old prog, if any */ 1737 if (old_prog) 1738 bpf_prog_put(old_prog); 1739 1740 if (nic->xdp_prog) { 1741 /* Attach BPF program */ 1742 nic->xdp_prog = bpf_prog_add(nic->xdp_prog, nic->rx_queues - 1); 1743 if (!IS_ERR(nic->xdp_prog)) 1744 bpf_attached = true; 1745 } 1746 1747 /* Calculate Tx queues needed for XDP and network stack */ 1748 nicvf_set_xdp_queues(nic, bpf_attached); 1749 1750 if (if_up) { 1751 /* Reinitialize interface, clean slate */ 1752 nicvf_open(nic->netdev); 1753 netif_trans_update(nic->netdev); 1754 } 1755 1756 return 0; 1757 } 1758 1759 static int nicvf_xdp(struct net_device *netdev, struct netdev_bpf *xdp) 1760 { 1761 struct nicvf *nic = netdev_priv(netdev); 1762 1763 /* To avoid checks while retrieving buffer address from CQE_RX, 1764 * do not support XDP for T88 pass1.x silicons which are anyway 1765 * not in use widely. 1766 */ 1767 if (pass1_silicon(nic->pdev)) 1768 return -EOPNOTSUPP; 1769 1770 switch (xdp->command) { 1771 case XDP_SETUP_PROG: 1772 return nicvf_xdp_setup(nic, xdp->prog); 1773 case XDP_QUERY_PROG: 1774 xdp->prog_attached = !!nic->xdp_prog; 1775 xdp->prog_id = nic->xdp_prog ? nic->xdp_prog->aux->id : 0; 1776 return 0; 1777 default: 1778 return -EINVAL; 1779 } 1780 } 1781 1782 static int nicvf_xdp_xmit(struct net_device *netdev, struct xdp_buff *xdp) 1783 { 1784 struct nicvf *nic = netdev_priv(netdev); 1785 struct nicvf *snic = nic; 1786 struct nicvf_xdp_tx *xdp_tx; 1787 struct snd_queue *sq; 1788 struct page *page; 1789 int err, qidx; 1790 1791 if (!netif_running(netdev) || !nic->xdp_prog) 1792 return -EINVAL; 1793 1794 page = virt_to_page(xdp->data); 1795 xdp_tx = (struct nicvf_xdp_tx *)page_address(page); 1796 qidx = xdp_tx->qidx; 1797 1798 if (xdp_tx->qidx >= nic->xdp_tx_queues) 1799 return -EINVAL; 1800 1801 /* Get secondary Qset's info */ 1802 if (xdp_tx->qidx >= MAX_SND_QUEUES_PER_QS) { 1803 qidx = xdp_tx->qidx / MAX_SND_QUEUES_PER_QS; 1804 snic = (struct nicvf *)nic->snicvf[qidx - 1]; 1805 if (!snic) 1806 return -EINVAL; 1807 qidx = xdp_tx->qidx % MAX_SND_QUEUES_PER_QS; 1808 } 1809 1810 sq = &snic->qs->sq[qidx]; 1811 err = nicvf_xdp_sq_append_pkt(snic, sq, (u64)xdp->data, 1812 xdp_tx->dma_addr, 1813 xdp->data_end - xdp->data); 1814 if (err) 1815 return -ENOMEM; 1816 1817 nicvf_xdp_sq_doorbell(snic, sq, qidx); 1818 return 0; 1819 } 1820 1821 static void nicvf_xdp_flush(struct net_device *dev) 1822 { 1823 return; 1824 } 1825 1826 static const struct net_device_ops nicvf_netdev_ops = { 1827 .ndo_open = nicvf_open, 1828 .ndo_stop = nicvf_stop, 1829 .ndo_start_xmit = nicvf_xmit, 1830 .ndo_change_mtu = nicvf_change_mtu, 1831 .ndo_set_mac_address = nicvf_set_mac_address, 1832 .ndo_get_stats64 = nicvf_get_stats64, 1833 .ndo_tx_timeout = nicvf_tx_timeout, 1834 .ndo_fix_features = nicvf_fix_features, 1835 .ndo_set_features = nicvf_set_features, 1836 .ndo_bpf = nicvf_xdp, 1837 .ndo_xdp_xmit = nicvf_xdp_xmit, 1838 .ndo_xdp_flush = nicvf_xdp_flush, 1839 }; 1840 1841 static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 1842 { 1843 struct device *dev = &pdev->dev; 1844 struct net_device *netdev; 1845 struct nicvf *nic; 1846 int err, qcount; 1847 u16 sdevid; 1848 1849 err = pci_enable_device(pdev); 1850 if (err) { 1851 dev_err(dev, "Failed to enable PCI device\n"); 1852 return err; 1853 } 1854 1855 err = pci_request_regions(pdev, DRV_NAME); 1856 if (err) { 1857 dev_err(dev, "PCI request regions failed 0x%x\n", err); 1858 goto err_disable_device; 1859 } 1860 1861 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(48)); 1862 if (err) { 1863 dev_err(dev, "Unable to get usable DMA configuration\n"); 1864 goto err_release_regions; 1865 } 1866 1867 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(48)); 1868 if (err) { 1869 dev_err(dev, "unable to get 48-bit DMA for consistent allocations\n"); 1870 goto err_release_regions; 1871 } 1872 1873 qcount = netif_get_num_default_rss_queues(); 1874 1875 /* Restrict multiqset support only for host bound VFs */ 1876 if (pdev->is_virtfn) { 1877 /* Set max number of queues per VF */ 1878 qcount = min_t(int, num_online_cpus(), 1879 (MAX_SQS_PER_VF + 1) * MAX_CMP_QUEUES_PER_QS); 1880 } 1881 1882 netdev = alloc_etherdev_mqs(sizeof(struct nicvf), qcount, qcount); 1883 if (!netdev) { 1884 err = -ENOMEM; 1885 goto err_release_regions; 1886 } 1887 1888 pci_set_drvdata(pdev, netdev); 1889 1890 SET_NETDEV_DEV(netdev, &pdev->dev); 1891 1892 nic = netdev_priv(netdev); 1893 nic->netdev = netdev; 1894 nic->pdev = pdev; 1895 nic->pnicvf = nic; 1896 nic->max_queues = qcount; 1897 /* If no of CPUs are too low, there won't be any queues left 1898 * for XDP_TX, hence double it. 1899 */ 1900 if (!nic->t88) 1901 nic->max_queues *= 2; 1902 1903 /* MAP VF's configuration registers */ 1904 nic->reg_base = pcim_iomap(pdev, PCI_CFG_REG_BAR_NUM, 0); 1905 if (!nic->reg_base) { 1906 dev_err(dev, "Cannot map config register space, aborting\n"); 1907 err = -ENOMEM; 1908 goto err_free_netdev; 1909 } 1910 1911 nic->drv_stats = netdev_alloc_pcpu_stats(struct nicvf_drv_stats); 1912 if (!nic->drv_stats) { 1913 err = -ENOMEM; 1914 goto err_free_netdev; 1915 } 1916 1917 err = nicvf_set_qset_resources(nic); 1918 if (err) 1919 goto err_free_netdev; 1920 1921 /* Check if PF is alive and get MAC address for this VF */ 1922 err = nicvf_register_misc_interrupt(nic); 1923 if (err) 1924 goto err_free_netdev; 1925 1926 nicvf_send_vf_struct(nic); 1927 1928 if (!pass1_silicon(nic->pdev)) 1929 nic->hw_tso = true; 1930 1931 /* Get iommu domain for iova to physical addr conversion */ 1932 nic->iommu_domain = iommu_get_domain_for_dev(dev); 1933 1934 pci_read_config_word(nic->pdev, PCI_SUBSYSTEM_ID, &sdevid); 1935 if (sdevid == 0xA134) 1936 nic->t88 = true; 1937 1938 /* Check if this VF is in QS only mode */ 1939 if (nic->sqs_mode) 1940 return 0; 1941 1942 err = nicvf_set_real_num_queues(netdev, nic->tx_queues, nic->rx_queues); 1943 if (err) 1944 goto err_unregister_interrupts; 1945 1946 netdev->hw_features = (NETIF_F_RXCSUM | NETIF_F_SG | 1947 NETIF_F_TSO | NETIF_F_GRO | NETIF_F_TSO6 | 1948 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | 1949 NETIF_F_HW_VLAN_CTAG_RX); 1950 1951 netdev->hw_features |= NETIF_F_RXHASH; 1952 1953 netdev->features |= netdev->hw_features; 1954 netdev->hw_features |= NETIF_F_LOOPBACK; 1955 1956 netdev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | 1957 NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO6; 1958 1959 netdev->netdev_ops = &nicvf_netdev_ops; 1960 netdev->watchdog_timeo = NICVF_TX_TIMEOUT; 1961 1962 /* MTU range: 64 - 9200 */ 1963 netdev->min_mtu = NIC_HW_MIN_FRS; 1964 netdev->max_mtu = NIC_HW_MAX_FRS; 1965 1966 INIT_WORK(&nic->reset_task, nicvf_reset_task); 1967 1968 err = register_netdev(netdev); 1969 if (err) { 1970 dev_err(dev, "Failed to register netdevice\n"); 1971 goto err_unregister_interrupts; 1972 } 1973 1974 nic->msg_enable = debug; 1975 1976 nicvf_set_ethtool_ops(netdev); 1977 1978 return 0; 1979 1980 err_unregister_interrupts: 1981 nicvf_unregister_interrupts(nic); 1982 err_free_netdev: 1983 pci_set_drvdata(pdev, NULL); 1984 if (nic->drv_stats) 1985 free_percpu(nic->drv_stats); 1986 free_netdev(netdev); 1987 err_release_regions: 1988 pci_release_regions(pdev); 1989 err_disable_device: 1990 pci_disable_device(pdev); 1991 return err; 1992 } 1993 1994 static void nicvf_remove(struct pci_dev *pdev) 1995 { 1996 struct net_device *netdev = pci_get_drvdata(pdev); 1997 struct nicvf *nic; 1998 struct net_device *pnetdev; 1999 2000 if (!netdev) 2001 return; 2002 2003 nic = netdev_priv(netdev); 2004 pnetdev = nic->pnicvf->netdev; 2005 2006 /* Check if this Qset is assigned to different VF. 2007 * If yes, clean primary and all secondary Qsets. 2008 */ 2009 if (pnetdev && (pnetdev->reg_state == NETREG_REGISTERED)) 2010 unregister_netdev(pnetdev); 2011 nicvf_unregister_interrupts(nic); 2012 pci_set_drvdata(pdev, NULL); 2013 if (nic->drv_stats) 2014 free_percpu(nic->drv_stats); 2015 free_netdev(netdev); 2016 pci_release_regions(pdev); 2017 pci_disable_device(pdev); 2018 } 2019 2020 static void nicvf_shutdown(struct pci_dev *pdev) 2021 { 2022 nicvf_remove(pdev); 2023 } 2024 2025 static struct pci_driver nicvf_driver = { 2026 .name = DRV_NAME, 2027 .id_table = nicvf_id_table, 2028 .probe = nicvf_probe, 2029 .remove = nicvf_remove, 2030 .shutdown = nicvf_shutdown, 2031 }; 2032 2033 static int __init nicvf_init_module(void) 2034 { 2035 pr_info("%s, ver %s\n", DRV_NAME, DRV_VERSION); 2036 2037 return pci_register_driver(&nicvf_driver); 2038 } 2039 2040 static void __exit nicvf_cleanup_module(void) 2041 { 2042 pci_unregister_driver(&nicvf_driver); 2043 } 2044 2045 module_init(nicvf_init_module); 2046 module_exit(nicvf_cleanup_module); 2047