1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2015 Cavium, Inc. 4 */ 5 6 #include <linux/module.h> 7 #include <linux/interrupt.h> 8 #include <linux/pci.h> 9 #include <linux/netdevice.h> 10 #include <linux/if_vlan.h> 11 #include <linux/etherdevice.h> 12 #include <linux/ethtool.h> 13 #include <linux/log2.h> 14 #include <linux/prefetch.h> 15 #include <linux/irq.h> 16 #include <linux/iommu.h> 17 #include <linux/bpf.h> 18 #include <linux/bpf_trace.h> 19 #include <linux/filter.h> 20 #include <linux/net_tstamp.h> 21 #include <linux/workqueue.h> 22 23 #include "nic_reg.h" 24 #include "nic.h" 25 #include "nicvf_queues.h" 26 #include "thunder_bgx.h" 27 #include "../common/cavium_ptp.h" 28 29 #define DRV_NAME "nicvf" 30 #define DRV_VERSION "1.0" 31 32 /* NOTE: Packets bigger than 1530 are split across multiple pages and XDP needs 33 * the buffer to be contiguous. Allow XDP to be set up only if we don't exceed 34 * this value, keeping headroom for the 14 byte Ethernet header and two 35 * VLAN tags (for QinQ) 36 */ 37 #define MAX_XDP_MTU (1530 - ETH_HLEN - VLAN_HLEN * 2) 38 39 /* Supported devices */ 40 static const struct pci_device_id nicvf_id_table[] = { 41 { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, 42 PCI_DEVICE_ID_THUNDER_NIC_VF, 43 PCI_VENDOR_ID_CAVIUM, 44 PCI_SUBSYS_DEVID_88XX_NIC_VF) }, 45 { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, 46 PCI_DEVICE_ID_THUNDER_PASS1_NIC_VF, 47 PCI_VENDOR_ID_CAVIUM, 48 PCI_SUBSYS_DEVID_88XX_PASS1_NIC_VF) }, 49 { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, 50 PCI_DEVICE_ID_THUNDER_NIC_VF, 51 PCI_VENDOR_ID_CAVIUM, 52 PCI_SUBSYS_DEVID_81XX_NIC_VF) }, 53 { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, 54 PCI_DEVICE_ID_THUNDER_NIC_VF, 55 PCI_VENDOR_ID_CAVIUM, 56 PCI_SUBSYS_DEVID_83XX_NIC_VF) }, 57 { 0, } /* end of table */ 58 }; 59 60 MODULE_AUTHOR("Sunil Goutham"); 61 MODULE_DESCRIPTION("Cavium Thunder NIC Virtual Function Driver"); 62 MODULE_LICENSE("GPL v2"); 63 MODULE_VERSION(DRV_VERSION); 64 MODULE_DEVICE_TABLE(pci, nicvf_id_table); 65 66 static int debug = 0x00; 67 module_param(debug, int, 0644); 68 MODULE_PARM_DESC(debug, "Debug message level bitmap"); 69 70 static int cpi_alg = CPI_ALG_NONE; 71 module_param(cpi_alg, int, 0444); 72 MODULE_PARM_DESC(cpi_alg, 73 "PFC algorithm (0=none, 1=VLAN, 2=VLAN16, 3=IP Diffserv)"); 74 75 static inline u8 nicvf_netdev_qidx(struct nicvf *nic, u8 qidx) 76 { 77 if (nic->sqs_mode) 78 return qidx + ((nic->sqs_id + 1) * MAX_CMP_QUEUES_PER_QS); 79 else 80 return qidx; 81 } 82 83 /* The Cavium ThunderX network controller can *only* be found in SoCs 84 * containing the ThunderX ARM64 CPU implementation. All accesses to the device 85 * registers on this platform are implicitly strongly ordered with respect 86 * to memory accesses. So writeq_relaxed() and readq_relaxed() are safe to use 87 * with no memory barriers in this driver. The readq()/writeq() functions add 88 * explicit ordering operation which in this case are redundant, and only 89 * add overhead. 90 */ 91 92 /* Register read/write APIs */ 93 void nicvf_reg_write(struct nicvf *nic, u64 offset, u64 val) 94 { 95 writeq_relaxed(val, nic->reg_base + offset); 96 } 97 98 u64 nicvf_reg_read(struct nicvf *nic, u64 offset) 99 { 100 return readq_relaxed(nic->reg_base + offset); 101 } 102 103 void nicvf_queue_reg_write(struct nicvf *nic, u64 offset, 104 u64 qidx, u64 val) 105 { 106 void __iomem *addr = nic->reg_base + offset; 107 108 writeq_relaxed(val, addr + (qidx << NIC_Q_NUM_SHIFT)); 109 } 110 111 u64 nicvf_queue_reg_read(struct nicvf *nic, u64 offset, u64 qidx) 112 { 113 void __iomem *addr = nic->reg_base + offset; 114 115 return readq_relaxed(addr + (qidx << NIC_Q_NUM_SHIFT)); 116 } 117 118 /* VF -> PF mailbox communication */ 119 static void nicvf_write_to_mbx(struct nicvf *nic, union nic_mbx *mbx) 120 { 121 u64 *msg = (u64 *)mbx; 122 123 nicvf_reg_write(nic, NIC_VF_PF_MAILBOX_0_1 + 0, msg[0]); 124 nicvf_reg_write(nic, NIC_VF_PF_MAILBOX_0_1 + 8, msg[1]); 125 } 126 127 int nicvf_send_msg_to_pf(struct nicvf *nic, union nic_mbx *mbx) 128 { 129 unsigned long timeout; 130 int ret = 0; 131 132 mutex_lock(&nic->rx_mode_mtx); 133 134 nic->pf_acked = false; 135 nic->pf_nacked = false; 136 137 nicvf_write_to_mbx(nic, mbx); 138 139 timeout = jiffies + msecs_to_jiffies(NIC_MBOX_MSG_TIMEOUT); 140 /* Wait for previous message to be acked, timeout 2sec */ 141 while (!nic->pf_acked) { 142 if (nic->pf_nacked) { 143 netdev_err(nic->netdev, 144 "PF NACK to mbox msg 0x%02x from VF%d\n", 145 (mbx->msg.msg & 0xFF), nic->vf_id); 146 ret = -EINVAL; 147 break; 148 } 149 usleep_range(8000, 10000); 150 if (nic->pf_acked) 151 break; 152 if (time_after(jiffies, timeout)) { 153 netdev_err(nic->netdev, 154 "PF didn't ACK to mbox msg 0x%02x from VF%d\n", 155 (mbx->msg.msg & 0xFF), nic->vf_id); 156 ret = -EBUSY; 157 break; 158 } 159 } 160 mutex_unlock(&nic->rx_mode_mtx); 161 return ret; 162 } 163 164 /* Checks if VF is able to comminicate with PF 165 * and also gets the VNIC number this VF is associated to. 166 */ 167 static int nicvf_check_pf_ready(struct nicvf *nic) 168 { 169 union nic_mbx mbx = {}; 170 171 mbx.msg.msg = NIC_MBOX_MSG_READY; 172 if (nicvf_send_msg_to_pf(nic, &mbx)) { 173 netdev_err(nic->netdev, 174 "PF didn't respond to READY msg\n"); 175 return 0; 176 } 177 178 return 1; 179 } 180 181 static void nicvf_send_cfg_done(struct nicvf *nic) 182 { 183 union nic_mbx mbx = {}; 184 185 mbx.msg.msg = NIC_MBOX_MSG_CFG_DONE; 186 if (nicvf_send_msg_to_pf(nic, &mbx)) { 187 netdev_err(nic->netdev, 188 "PF didn't respond to CFG DONE msg\n"); 189 } 190 } 191 192 static void nicvf_read_bgx_stats(struct nicvf *nic, struct bgx_stats_msg *bgx) 193 { 194 if (bgx->rx) 195 nic->bgx_stats.rx_stats[bgx->idx] = bgx->stats; 196 else 197 nic->bgx_stats.tx_stats[bgx->idx] = bgx->stats; 198 } 199 200 static void nicvf_handle_mbx_intr(struct nicvf *nic) 201 { 202 union nic_mbx mbx = {}; 203 u64 *mbx_data; 204 u64 mbx_addr; 205 int i; 206 207 mbx_addr = NIC_VF_PF_MAILBOX_0_1; 208 mbx_data = (u64 *)&mbx; 209 210 for (i = 0; i < NIC_PF_VF_MAILBOX_SIZE; i++) { 211 *mbx_data = nicvf_reg_read(nic, mbx_addr); 212 mbx_data++; 213 mbx_addr += sizeof(u64); 214 } 215 216 netdev_dbg(nic->netdev, "Mbox message: msg: 0x%x\n", mbx.msg.msg); 217 switch (mbx.msg.msg) { 218 case NIC_MBOX_MSG_READY: 219 nic->pf_acked = true; 220 nic->vf_id = mbx.nic_cfg.vf_id & 0x7F; 221 nic->tns_mode = mbx.nic_cfg.tns_mode & 0x7F; 222 nic->node = mbx.nic_cfg.node_id; 223 if (!nic->set_mac_pending) 224 ether_addr_copy(nic->netdev->dev_addr, 225 mbx.nic_cfg.mac_addr); 226 nic->sqs_mode = mbx.nic_cfg.sqs_mode; 227 nic->loopback_supported = mbx.nic_cfg.loopback_supported; 228 nic->link_up = false; 229 nic->duplex = 0; 230 nic->speed = 0; 231 break; 232 case NIC_MBOX_MSG_ACK: 233 nic->pf_acked = true; 234 break; 235 case NIC_MBOX_MSG_NACK: 236 nic->pf_nacked = true; 237 break; 238 case NIC_MBOX_MSG_RSS_SIZE: 239 nic->rss_info.rss_size = mbx.rss_size.ind_tbl_size; 240 nic->pf_acked = true; 241 break; 242 case NIC_MBOX_MSG_BGX_STATS: 243 nicvf_read_bgx_stats(nic, &mbx.bgx_stats); 244 nic->pf_acked = true; 245 break; 246 case NIC_MBOX_MSG_BGX_LINK_CHANGE: 247 nic->pf_acked = true; 248 if (nic->link_up != mbx.link_status.link_up) { 249 nic->link_up = mbx.link_status.link_up; 250 nic->duplex = mbx.link_status.duplex; 251 nic->speed = mbx.link_status.speed; 252 nic->mac_type = mbx.link_status.mac_type; 253 if (nic->link_up) { 254 netdev_info(nic->netdev, 255 "Link is Up %d Mbps %s duplex\n", 256 nic->speed, 257 nic->duplex == DUPLEX_FULL ? 258 "Full" : "Half"); 259 netif_carrier_on(nic->netdev); 260 netif_tx_start_all_queues(nic->netdev); 261 } else { 262 netdev_info(nic->netdev, "Link is Down\n"); 263 netif_carrier_off(nic->netdev); 264 netif_tx_stop_all_queues(nic->netdev); 265 } 266 } 267 break; 268 case NIC_MBOX_MSG_ALLOC_SQS: 269 nic->sqs_count = mbx.sqs_alloc.qs_count; 270 nic->pf_acked = true; 271 break; 272 case NIC_MBOX_MSG_SNICVF_PTR: 273 /* Primary VF: make note of secondary VF's pointer 274 * to be used while packet transmission. 275 */ 276 nic->snicvf[mbx.nicvf.sqs_id] = 277 (struct nicvf *)mbx.nicvf.nicvf; 278 nic->pf_acked = true; 279 break; 280 case NIC_MBOX_MSG_PNICVF_PTR: 281 /* Secondary VF/Qset: make note of primary VF's pointer 282 * to be used while packet reception, to handover packet 283 * to primary VF's netdev. 284 */ 285 nic->pnicvf = (struct nicvf *)mbx.nicvf.nicvf; 286 nic->pf_acked = true; 287 break; 288 case NIC_MBOX_MSG_PFC: 289 nic->pfc.autoneg = mbx.pfc.autoneg; 290 nic->pfc.fc_rx = mbx.pfc.fc_rx; 291 nic->pfc.fc_tx = mbx.pfc.fc_tx; 292 nic->pf_acked = true; 293 break; 294 default: 295 netdev_err(nic->netdev, 296 "Invalid message from PF, msg 0x%x\n", mbx.msg.msg); 297 break; 298 } 299 nicvf_clear_intr(nic, NICVF_INTR_MBOX, 0); 300 } 301 302 static int nicvf_hw_set_mac_addr(struct nicvf *nic, struct net_device *netdev) 303 { 304 union nic_mbx mbx = {}; 305 306 mbx.mac.msg = NIC_MBOX_MSG_SET_MAC; 307 mbx.mac.vf_id = nic->vf_id; 308 ether_addr_copy(mbx.mac.mac_addr, netdev->dev_addr); 309 310 return nicvf_send_msg_to_pf(nic, &mbx); 311 } 312 313 static void nicvf_config_cpi(struct nicvf *nic) 314 { 315 union nic_mbx mbx = {}; 316 317 mbx.cpi_cfg.msg = NIC_MBOX_MSG_CPI_CFG; 318 mbx.cpi_cfg.vf_id = nic->vf_id; 319 mbx.cpi_cfg.cpi_alg = nic->cpi_alg; 320 mbx.cpi_cfg.rq_cnt = nic->qs->rq_cnt; 321 322 nicvf_send_msg_to_pf(nic, &mbx); 323 } 324 325 static void nicvf_get_rss_size(struct nicvf *nic) 326 { 327 union nic_mbx mbx = {}; 328 329 mbx.rss_size.msg = NIC_MBOX_MSG_RSS_SIZE; 330 mbx.rss_size.vf_id = nic->vf_id; 331 nicvf_send_msg_to_pf(nic, &mbx); 332 } 333 334 void nicvf_config_rss(struct nicvf *nic) 335 { 336 union nic_mbx mbx = {}; 337 struct nicvf_rss_info *rss = &nic->rss_info; 338 int ind_tbl_len = rss->rss_size; 339 int i, nextq = 0; 340 341 mbx.rss_cfg.vf_id = nic->vf_id; 342 mbx.rss_cfg.hash_bits = rss->hash_bits; 343 while (ind_tbl_len) { 344 mbx.rss_cfg.tbl_offset = nextq; 345 mbx.rss_cfg.tbl_len = min(ind_tbl_len, 346 RSS_IND_TBL_LEN_PER_MBX_MSG); 347 mbx.rss_cfg.msg = mbx.rss_cfg.tbl_offset ? 348 NIC_MBOX_MSG_RSS_CFG_CONT : NIC_MBOX_MSG_RSS_CFG; 349 350 for (i = 0; i < mbx.rss_cfg.tbl_len; i++) 351 mbx.rss_cfg.ind_tbl[i] = rss->ind_tbl[nextq++]; 352 353 nicvf_send_msg_to_pf(nic, &mbx); 354 355 ind_tbl_len -= mbx.rss_cfg.tbl_len; 356 } 357 } 358 359 void nicvf_set_rss_key(struct nicvf *nic) 360 { 361 struct nicvf_rss_info *rss = &nic->rss_info; 362 u64 key_addr = NIC_VNIC_RSS_KEY_0_4; 363 int idx; 364 365 for (idx = 0; idx < RSS_HASH_KEY_SIZE; idx++) { 366 nicvf_reg_write(nic, key_addr, rss->key[idx]); 367 key_addr += sizeof(u64); 368 } 369 } 370 371 static int nicvf_rss_init(struct nicvf *nic) 372 { 373 struct nicvf_rss_info *rss = &nic->rss_info; 374 int idx; 375 376 nicvf_get_rss_size(nic); 377 378 if (cpi_alg != CPI_ALG_NONE) { 379 rss->enable = false; 380 rss->hash_bits = 0; 381 return 0; 382 } 383 384 rss->enable = true; 385 386 netdev_rss_key_fill(rss->key, RSS_HASH_KEY_SIZE * sizeof(u64)); 387 nicvf_set_rss_key(nic); 388 389 rss->cfg = RSS_IP_HASH_ENA | RSS_TCP_HASH_ENA | RSS_UDP_HASH_ENA; 390 nicvf_reg_write(nic, NIC_VNIC_RSS_CFG, rss->cfg); 391 392 rss->hash_bits = ilog2(rounddown_pow_of_two(rss->rss_size)); 393 394 for (idx = 0; idx < rss->rss_size; idx++) 395 rss->ind_tbl[idx] = ethtool_rxfh_indir_default(idx, 396 nic->rx_queues); 397 nicvf_config_rss(nic); 398 return 1; 399 } 400 401 /* Request PF to allocate additional Qsets */ 402 static void nicvf_request_sqs(struct nicvf *nic) 403 { 404 union nic_mbx mbx = {}; 405 int sqs; 406 int sqs_count = nic->sqs_count; 407 int rx_queues = 0, tx_queues = 0; 408 409 /* Only primary VF should request */ 410 if (nic->sqs_mode || !nic->sqs_count) 411 return; 412 413 mbx.sqs_alloc.msg = NIC_MBOX_MSG_ALLOC_SQS; 414 mbx.sqs_alloc.vf_id = nic->vf_id; 415 mbx.sqs_alloc.qs_count = nic->sqs_count; 416 if (nicvf_send_msg_to_pf(nic, &mbx)) { 417 /* No response from PF */ 418 nic->sqs_count = 0; 419 return; 420 } 421 422 /* Return if no Secondary Qsets available */ 423 if (!nic->sqs_count) 424 return; 425 426 if (nic->rx_queues > MAX_RCV_QUEUES_PER_QS) 427 rx_queues = nic->rx_queues - MAX_RCV_QUEUES_PER_QS; 428 429 tx_queues = nic->tx_queues + nic->xdp_tx_queues; 430 if (tx_queues > MAX_SND_QUEUES_PER_QS) 431 tx_queues = tx_queues - MAX_SND_QUEUES_PER_QS; 432 433 /* Set no of Rx/Tx queues in each of the SQsets */ 434 for (sqs = 0; sqs < nic->sqs_count; sqs++) { 435 mbx.nicvf.msg = NIC_MBOX_MSG_SNICVF_PTR; 436 mbx.nicvf.vf_id = nic->vf_id; 437 mbx.nicvf.sqs_id = sqs; 438 nicvf_send_msg_to_pf(nic, &mbx); 439 440 nic->snicvf[sqs]->sqs_id = sqs; 441 if (rx_queues > MAX_RCV_QUEUES_PER_QS) { 442 nic->snicvf[sqs]->qs->rq_cnt = MAX_RCV_QUEUES_PER_QS; 443 rx_queues -= MAX_RCV_QUEUES_PER_QS; 444 } else { 445 nic->snicvf[sqs]->qs->rq_cnt = rx_queues; 446 rx_queues = 0; 447 } 448 449 if (tx_queues > MAX_SND_QUEUES_PER_QS) { 450 nic->snicvf[sqs]->qs->sq_cnt = MAX_SND_QUEUES_PER_QS; 451 tx_queues -= MAX_SND_QUEUES_PER_QS; 452 } else { 453 nic->snicvf[sqs]->qs->sq_cnt = tx_queues; 454 tx_queues = 0; 455 } 456 457 nic->snicvf[sqs]->qs->cq_cnt = 458 max(nic->snicvf[sqs]->qs->rq_cnt, nic->snicvf[sqs]->qs->sq_cnt); 459 460 /* Initialize secondary Qset's queues and its interrupts */ 461 nicvf_open(nic->snicvf[sqs]->netdev); 462 } 463 464 /* Update stack with actual Rx/Tx queue count allocated */ 465 if (sqs_count != nic->sqs_count) 466 nicvf_set_real_num_queues(nic->netdev, 467 nic->tx_queues, nic->rx_queues); 468 } 469 470 /* Send this Qset's nicvf pointer to PF. 471 * PF inturn sends primary VF's nicvf struct to secondary Qsets/VFs 472 * so that packets received by these Qsets can use primary VF's netdev 473 */ 474 static void nicvf_send_vf_struct(struct nicvf *nic) 475 { 476 union nic_mbx mbx = {}; 477 478 mbx.nicvf.msg = NIC_MBOX_MSG_NICVF_PTR; 479 mbx.nicvf.sqs_mode = nic->sqs_mode; 480 mbx.nicvf.nicvf = (u64)nic; 481 nicvf_send_msg_to_pf(nic, &mbx); 482 } 483 484 static void nicvf_get_primary_vf_struct(struct nicvf *nic) 485 { 486 union nic_mbx mbx = {}; 487 488 mbx.nicvf.msg = NIC_MBOX_MSG_PNICVF_PTR; 489 nicvf_send_msg_to_pf(nic, &mbx); 490 } 491 492 int nicvf_set_real_num_queues(struct net_device *netdev, 493 int tx_queues, int rx_queues) 494 { 495 int err = 0; 496 497 err = netif_set_real_num_tx_queues(netdev, tx_queues); 498 if (err) { 499 netdev_err(netdev, 500 "Failed to set no of Tx queues: %d\n", tx_queues); 501 return err; 502 } 503 504 err = netif_set_real_num_rx_queues(netdev, rx_queues); 505 if (err) 506 netdev_err(netdev, 507 "Failed to set no of Rx queues: %d\n", rx_queues); 508 return err; 509 } 510 511 static int nicvf_init_resources(struct nicvf *nic) 512 { 513 int err; 514 515 /* Enable Qset */ 516 nicvf_qset_config(nic, true); 517 518 /* Initialize queues and HW for data transfer */ 519 err = nicvf_config_data_transfer(nic, true); 520 if (err) { 521 netdev_err(nic->netdev, 522 "Failed to alloc/config VF's QSet resources\n"); 523 return err; 524 } 525 526 return 0; 527 } 528 529 static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog, 530 struct cqe_rx_t *cqe_rx, struct snd_queue *sq, 531 struct rcv_queue *rq, struct sk_buff **skb) 532 { 533 struct xdp_buff xdp; 534 struct page *page; 535 u32 action; 536 u16 len, offset = 0; 537 u64 dma_addr, cpu_addr; 538 void *orig_data; 539 540 /* Retrieve packet buffer's DMA address and length */ 541 len = *((u16 *)((void *)cqe_rx + (3 * sizeof(u64)))); 542 dma_addr = *((u64 *)((void *)cqe_rx + (7 * sizeof(u64)))); 543 544 cpu_addr = nicvf_iova_to_phys(nic, dma_addr); 545 if (!cpu_addr) 546 return false; 547 cpu_addr = (u64)phys_to_virt(cpu_addr); 548 page = virt_to_page((void *)cpu_addr); 549 550 xdp.data_hard_start = page_address(page); 551 xdp.data = (void *)cpu_addr; 552 xdp_set_data_meta_invalid(&xdp); 553 xdp.data_end = xdp.data + len; 554 xdp.rxq = &rq->xdp_rxq; 555 orig_data = xdp.data; 556 557 rcu_read_lock(); 558 action = bpf_prog_run_xdp(prog, &xdp); 559 rcu_read_unlock(); 560 561 len = xdp.data_end - xdp.data; 562 /* Check if XDP program has changed headers */ 563 if (orig_data != xdp.data) { 564 offset = orig_data - xdp.data; 565 dma_addr -= offset; 566 } 567 568 switch (action) { 569 case XDP_PASS: 570 /* Check if it's a recycled page, if not 571 * unmap the DMA mapping. 572 * 573 * Recycled page holds an extra reference. 574 */ 575 if (page_ref_count(page) == 1) { 576 dma_addr &= PAGE_MASK; 577 dma_unmap_page_attrs(&nic->pdev->dev, dma_addr, 578 RCV_FRAG_LEN + XDP_PACKET_HEADROOM, 579 DMA_FROM_DEVICE, 580 DMA_ATTR_SKIP_CPU_SYNC); 581 } 582 583 /* Build SKB and pass on packet to network stack */ 584 *skb = build_skb(xdp.data, 585 RCV_FRAG_LEN - cqe_rx->align_pad + offset); 586 if (!*skb) 587 put_page(page); 588 else 589 skb_put(*skb, len); 590 return false; 591 case XDP_TX: 592 nicvf_xdp_sq_append_pkt(nic, sq, (u64)xdp.data, dma_addr, len); 593 return true; 594 default: 595 bpf_warn_invalid_xdp_action(action); 596 /* fall through */ 597 case XDP_ABORTED: 598 trace_xdp_exception(nic->netdev, prog, action); 599 /* fall through */ 600 case XDP_DROP: 601 /* Check if it's a recycled page, if not 602 * unmap the DMA mapping. 603 * 604 * Recycled page holds an extra reference. 605 */ 606 if (page_ref_count(page) == 1) { 607 dma_addr &= PAGE_MASK; 608 dma_unmap_page_attrs(&nic->pdev->dev, dma_addr, 609 RCV_FRAG_LEN + XDP_PACKET_HEADROOM, 610 DMA_FROM_DEVICE, 611 DMA_ATTR_SKIP_CPU_SYNC); 612 } 613 put_page(page); 614 return true; 615 } 616 return false; 617 } 618 619 static void nicvf_snd_ptp_handler(struct net_device *netdev, 620 struct cqe_send_t *cqe_tx) 621 { 622 struct nicvf *nic = netdev_priv(netdev); 623 struct skb_shared_hwtstamps ts; 624 u64 ns; 625 626 nic = nic->pnicvf; 627 628 /* Sync for 'ptp_skb' */ 629 smp_rmb(); 630 631 /* New timestamp request can be queued now */ 632 atomic_set(&nic->tx_ptp_skbs, 0); 633 634 /* Check for timestamp requested skb */ 635 if (!nic->ptp_skb) 636 return; 637 638 /* Check if timestamping is timedout, which is set to 10us */ 639 if (cqe_tx->send_status == CQ_TX_ERROP_TSTMP_TIMEOUT || 640 cqe_tx->send_status == CQ_TX_ERROP_TSTMP_CONFLICT) 641 goto no_tstamp; 642 643 /* Get the timestamp */ 644 memset(&ts, 0, sizeof(ts)); 645 ns = cavium_ptp_tstamp2time(nic->ptp_clock, cqe_tx->ptp_timestamp); 646 ts.hwtstamp = ns_to_ktime(ns); 647 skb_tstamp_tx(nic->ptp_skb, &ts); 648 649 no_tstamp: 650 /* Free the original skb */ 651 dev_kfree_skb_any(nic->ptp_skb); 652 nic->ptp_skb = NULL; 653 /* Sync 'ptp_skb' */ 654 smp_wmb(); 655 } 656 657 static void nicvf_snd_pkt_handler(struct net_device *netdev, 658 struct cqe_send_t *cqe_tx, 659 int budget, int *subdesc_cnt, 660 unsigned int *tx_pkts, unsigned int *tx_bytes) 661 { 662 struct sk_buff *skb = NULL; 663 struct page *page; 664 struct nicvf *nic = netdev_priv(netdev); 665 struct snd_queue *sq; 666 struct sq_hdr_subdesc *hdr; 667 struct sq_hdr_subdesc *tso_sqe; 668 669 sq = &nic->qs->sq[cqe_tx->sq_idx]; 670 671 hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, cqe_tx->sqe_ptr); 672 if (hdr->subdesc_type != SQ_DESC_TYPE_HEADER) 673 return; 674 675 /* Check for errors */ 676 if (cqe_tx->send_status) 677 nicvf_check_cqe_tx_errs(nic->pnicvf, cqe_tx); 678 679 /* Is this a XDP designated Tx queue */ 680 if (sq->is_xdp) { 681 page = (struct page *)sq->xdp_page[cqe_tx->sqe_ptr]; 682 /* Check if it's recycled page or else unmap DMA mapping */ 683 if (page && (page_ref_count(page) == 1)) 684 nicvf_unmap_sndq_buffers(nic, sq, cqe_tx->sqe_ptr, 685 hdr->subdesc_cnt); 686 687 /* Release page reference for recycling */ 688 if (page) 689 put_page(page); 690 sq->xdp_page[cqe_tx->sqe_ptr] = (u64)NULL; 691 *subdesc_cnt += hdr->subdesc_cnt + 1; 692 return; 693 } 694 695 skb = (struct sk_buff *)sq->skbuff[cqe_tx->sqe_ptr]; 696 if (skb) { 697 /* Check for dummy descriptor used for HW TSO offload on 88xx */ 698 if (hdr->dont_send) { 699 /* Get actual TSO descriptors and free them */ 700 tso_sqe = 701 (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, hdr->rsvd2); 702 nicvf_unmap_sndq_buffers(nic, sq, hdr->rsvd2, 703 tso_sqe->subdesc_cnt); 704 *subdesc_cnt += tso_sqe->subdesc_cnt + 1; 705 } else { 706 nicvf_unmap_sndq_buffers(nic, sq, cqe_tx->sqe_ptr, 707 hdr->subdesc_cnt); 708 } 709 *subdesc_cnt += hdr->subdesc_cnt + 1; 710 prefetch(skb); 711 (*tx_pkts)++; 712 *tx_bytes += skb->len; 713 /* If timestamp is requested for this skb, don't free it */ 714 if (skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS && 715 !nic->pnicvf->ptp_skb) 716 nic->pnicvf->ptp_skb = skb; 717 else 718 napi_consume_skb(skb, budget); 719 sq->skbuff[cqe_tx->sqe_ptr] = (u64)NULL; 720 } else { 721 /* In case of SW TSO on 88xx, only last segment will have 722 * a SKB attached, so just free SQEs here. 723 */ 724 if (!nic->hw_tso) 725 *subdesc_cnt += hdr->subdesc_cnt + 1; 726 } 727 } 728 729 static inline void nicvf_set_rxhash(struct net_device *netdev, 730 struct cqe_rx_t *cqe_rx, 731 struct sk_buff *skb) 732 { 733 u8 hash_type; 734 u32 hash; 735 736 if (!(netdev->features & NETIF_F_RXHASH)) 737 return; 738 739 switch (cqe_rx->rss_alg) { 740 case RSS_ALG_TCP_IP: 741 case RSS_ALG_UDP_IP: 742 hash_type = PKT_HASH_TYPE_L4; 743 hash = cqe_rx->rss_tag; 744 break; 745 case RSS_ALG_IP: 746 hash_type = PKT_HASH_TYPE_L3; 747 hash = cqe_rx->rss_tag; 748 break; 749 default: 750 hash_type = PKT_HASH_TYPE_NONE; 751 hash = 0; 752 } 753 754 skb_set_hash(skb, hash, hash_type); 755 } 756 757 static inline void nicvf_set_rxtstamp(struct nicvf *nic, struct sk_buff *skb) 758 { 759 u64 ns; 760 761 if (!nic->ptp_clock || !nic->hw_rx_tstamp) 762 return; 763 764 /* The first 8 bytes is the timestamp */ 765 ns = cavium_ptp_tstamp2time(nic->ptp_clock, 766 be64_to_cpu(*(__be64 *)skb->data)); 767 skb_hwtstamps(skb)->hwtstamp = ns_to_ktime(ns); 768 769 __skb_pull(skb, 8); 770 } 771 772 static void nicvf_rcv_pkt_handler(struct net_device *netdev, 773 struct napi_struct *napi, 774 struct cqe_rx_t *cqe_rx, 775 struct snd_queue *sq, struct rcv_queue *rq) 776 { 777 struct sk_buff *skb = NULL; 778 struct nicvf *nic = netdev_priv(netdev); 779 struct nicvf *snic = nic; 780 int err = 0; 781 int rq_idx; 782 783 rq_idx = nicvf_netdev_qidx(nic, cqe_rx->rq_idx); 784 785 if (nic->sqs_mode) { 786 /* Use primary VF's 'nicvf' struct */ 787 nic = nic->pnicvf; 788 netdev = nic->netdev; 789 } 790 791 /* Check for errors */ 792 if (cqe_rx->err_level || cqe_rx->err_opcode) { 793 err = nicvf_check_cqe_rx_errs(nic, cqe_rx); 794 if (err && !cqe_rx->rb_cnt) 795 return; 796 } 797 798 /* For XDP, ignore pkts spanning multiple pages */ 799 if (nic->xdp_prog && (cqe_rx->rb_cnt == 1)) { 800 /* Packet consumed by XDP */ 801 if (nicvf_xdp_rx(snic, nic->xdp_prog, cqe_rx, sq, rq, &skb)) 802 return; 803 } else { 804 skb = nicvf_get_rcv_skb(snic, cqe_rx, 805 nic->xdp_prog ? true : false); 806 } 807 808 if (!skb) 809 return; 810 811 if (netif_msg_pktdata(nic)) { 812 netdev_info(nic->netdev, "skb 0x%p, len=%d\n", skb, skb->len); 813 print_hex_dump(KERN_INFO, "", DUMP_PREFIX_OFFSET, 16, 1, 814 skb->data, skb->len, true); 815 } 816 817 /* If error packet, drop it here */ 818 if (err) { 819 dev_kfree_skb_any(skb); 820 return; 821 } 822 823 nicvf_set_rxtstamp(nic, skb); 824 nicvf_set_rxhash(netdev, cqe_rx, skb); 825 826 skb_record_rx_queue(skb, rq_idx); 827 if (netdev->hw_features & NETIF_F_RXCSUM) { 828 /* HW by default verifies TCP/UDP/SCTP checksums */ 829 skb->ip_summed = CHECKSUM_UNNECESSARY; 830 } else { 831 skb_checksum_none_assert(skb); 832 } 833 834 skb->protocol = eth_type_trans(skb, netdev); 835 836 /* Check for stripped VLAN */ 837 if (cqe_rx->vlan_found && cqe_rx->vlan_stripped) 838 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), 839 ntohs((__force __be16)cqe_rx->vlan_tci)); 840 841 if (napi && (netdev->features & NETIF_F_GRO)) 842 napi_gro_receive(napi, skb); 843 else 844 netif_receive_skb(skb); 845 } 846 847 static int nicvf_cq_intr_handler(struct net_device *netdev, u8 cq_idx, 848 struct napi_struct *napi, int budget) 849 { 850 int processed_cqe, work_done = 0, tx_done = 0; 851 int cqe_count, cqe_head; 852 int subdesc_cnt = 0; 853 struct nicvf *nic = netdev_priv(netdev); 854 struct queue_set *qs = nic->qs; 855 struct cmp_queue *cq = &qs->cq[cq_idx]; 856 struct cqe_rx_t *cq_desc; 857 struct netdev_queue *txq; 858 struct snd_queue *sq = &qs->sq[cq_idx]; 859 struct rcv_queue *rq = &qs->rq[cq_idx]; 860 unsigned int tx_pkts = 0, tx_bytes = 0, txq_idx; 861 862 spin_lock_bh(&cq->lock); 863 loop: 864 processed_cqe = 0; 865 /* Get no of valid CQ entries to process */ 866 cqe_count = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_STATUS, cq_idx); 867 cqe_count &= CQ_CQE_COUNT; 868 if (!cqe_count) 869 goto done; 870 871 /* Get head of the valid CQ entries */ 872 cqe_head = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_HEAD, cq_idx) >> 9; 873 cqe_head &= 0xFFFF; 874 875 while (processed_cqe < cqe_count) { 876 /* Get the CQ descriptor */ 877 cq_desc = (struct cqe_rx_t *)GET_CQ_DESC(cq, cqe_head); 878 cqe_head++; 879 cqe_head &= (cq->dmem.q_len - 1); 880 /* Initiate prefetch for next descriptor */ 881 prefetch((struct cqe_rx_t *)GET_CQ_DESC(cq, cqe_head)); 882 883 if ((work_done >= budget) && napi && 884 (cq_desc->cqe_type != CQE_TYPE_SEND)) { 885 break; 886 } 887 888 switch (cq_desc->cqe_type) { 889 case CQE_TYPE_RX: 890 nicvf_rcv_pkt_handler(netdev, napi, cq_desc, sq, rq); 891 work_done++; 892 break; 893 case CQE_TYPE_SEND: 894 nicvf_snd_pkt_handler(netdev, (void *)cq_desc, 895 budget, &subdesc_cnt, 896 &tx_pkts, &tx_bytes); 897 tx_done++; 898 break; 899 case CQE_TYPE_SEND_PTP: 900 nicvf_snd_ptp_handler(netdev, (void *)cq_desc); 901 break; 902 case CQE_TYPE_INVALID: 903 case CQE_TYPE_RX_SPLIT: 904 case CQE_TYPE_RX_TCP: 905 /* Ignore for now */ 906 break; 907 } 908 processed_cqe++; 909 } 910 911 /* Ring doorbell to inform H/W to reuse processed CQEs */ 912 nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_DOOR, 913 cq_idx, processed_cqe); 914 915 if ((work_done < budget) && napi) 916 goto loop; 917 918 done: 919 /* Update SQ's descriptor free count */ 920 if (subdesc_cnt) 921 nicvf_put_sq_desc(sq, subdesc_cnt); 922 923 txq_idx = nicvf_netdev_qidx(nic, cq_idx); 924 /* Handle XDP TX queues */ 925 if (nic->pnicvf->xdp_prog) { 926 if (txq_idx < nic->pnicvf->xdp_tx_queues) { 927 nicvf_xdp_sq_doorbell(nic, sq, cq_idx); 928 goto out; 929 } 930 nic = nic->pnicvf; 931 txq_idx -= nic->pnicvf->xdp_tx_queues; 932 } 933 934 /* Wakeup TXQ if its stopped earlier due to SQ full */ 935 if (tx_done || 936 (atomic_read(&sq->free_cnt) >= MIN_SQ_DESC_PER_PKT_XMIT)) { 937 netdev = nic->pnicvf->netdev; 938 txq = netdev_get_tx_queue(netdev, txq_idx); 939 if (tx_pkts) 940 netdev_tx_completed_queue(txq, tx_pkts, tx_bytes); 941 942 /* To read updated queue and carrier status */ 943 smp_mb(); 944 if (netif_tx_queue_stopped(txq) && netif_carrier_ok(netdev)) { 945 netif_tx_wake_queue(txq); 946 nic = nic->pnicvf; 947 this_cpu_inc(nic->drv_stats->txq_wake); 948 netif_warn(nic, tx_err, netdev, 949 "Transmit queue wakeup SQ%d\n", txq_idx); 950 } 951 } 952 953 out: 954 spin_unlock_bh(&cq->lock); 955 return work_done; 956 } 957 958 static int nicvf_poll(struct napi_struct *napi, int budget) 959 { 960 u64 cq_head; 961 int work_done = 0; 962 struct net_device *netdev = napi->dev; 963 struct nicvf *nic = netdev_priv(netdev); 964 struct nicvf_cq_poll *cq; 965 966 cq = container_of(napi, struct nicvf_cq_poll, napi); 967 work_done = nicvf_cq_intr_handler(netdev, cq->cq_idx, napi, budget); 968 969 if (work_done < budget) { 970 /* Slow packet rate, exit polling */ 971 napi_complete_done(napi, work_done); 972 /* Re-enable interrupts */ 973 cq_head = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_HEAD, 974 cq->cq_idx); 975 nicvf_clear_intr(nic, NICVF_INTR_CQ, cq->cq_idx); 976 nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_HEAD, 977 cq->cq_idx, cq_head); 978 nicvf_enable_intr(nic, NICVF_INTR_CQ, cq->cq_idx); 979 } 980 return work_done; 981 } 982 983 /* Qset error interrupt handler 984 * 985 * As of now only CQ errors are handled 986 */ 987 static void nicvf_handle_qs_err(unsigned long data) 988 { 989 struct nicvf *nic = (struct nicvf *)data; 990 struct queue_set *qs = nic->qs; 991 int qidx; 992 u64 status; 993 994 netif_tx_disable(nic->netdev); 995 996 /* Check if it is CQ err */ 997 for (qidx = 0; qidx < qs->cq_cnt; qidx++) { 998 status = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_STATUS, 999 qidx); 1000 if (!(status & CQ_ERR_MASK)) 1001 continue; 1002 /* Process already queued CQEs and reconfig CQ */ 1003 nicvf_disable_intr(nic, NICVF_INTR_CQ, qidx); 1004 nicvf_sq_disable(nic, qidx); 1005 nicvf_cq_intr_handler(nic->netdev, qidx, NULL, 0); 1006 nicvf_cmp_queue_config(nic, qs, qidx, true); 1007 nicvf_sq_free_used_descs(nic->netdev, &qs->sq[qidx], qidx); 1008 nicvf_sq_enable(nic, &qs->sq[qidx], qidx); 1009 1010 nicvf_enable_intr(nic, NICVF_INTR_CQ, qidx); 1011 } 1012 1013 netif_tx_start_all_queues(nic->netdev); 1014 /* Re-enable Qset error interrupt */ 1015 nicvf_enable_intr(nic, NICVF_INTR_QS_ERR, 0); 1016 } 1017 1018 static void nicvf_dump_intr_status(struct nicvf *nic) 1019 { 1020 netif_info(nic, intr, nic->netdev, "interrupt status 0x%llx\n", 1021 nicvf_reg_read(nic, NIC_VF_INT)); 1022 } 1023 1024 static irqreturn_t nicvf_misc_intr_handler(int irq, void *nicvf_irq) 1025 { 1026 struct nicvf *nic = (struct nicvf *)nicvf_irq; 1027 u64 intr; 1028 1029 nicvf_dump_intr_status(nic); 1030 1031 intr = nicvf_reg_read(nic, NIC_VF_INT); 1032 /* Check for spurious interrupt */ 1033 if (!(intr & NICVF_INTR_MBOX_MASK)) 1034 return IRQ_HANDLED; 1035 1036 nicvf_handle_mbx_intr(nic); 1037 1038 return IRQ_HANDLED; 1039 } 1040 1041 static irqreturn_t nicvf_intr_handler(int irq, void *cq_irq) 1042 { 1043 struct nicvf_cq_poll *cq_poll = (struct nicvf_cq_poll *)cq_irq; 1044 struct nicvf *nic = cq_poll->nicvf; 1045 int qidx = cq_poll->cq_idx; 1046 1047 nicvf_dump_intr_status(nic); 1048 1049 /* Disable interrupts */ 1050 nicvf_disable_intr(nic, NICVF_INTR_CQ, qidx); 1051 1052 /* Schedule NAPI */ 1053 napi_schedule_irqoff(&cq_poll->napi); 1054 1055 /* Clear interrupt */ 1056 nicvf_clear_intr(nic, NICVF_INTR_CQ, qidx); 1057 1058 return IRQ_HANDLED; 1059 } 1060 1061 static irqreturn_t nicvf_rbdr_intr_handler(int irq, void *nicvf_irq) 1062 { 1063 struct nicvf *nic = (struct nicvf *)nicvf_irq; 1064 u8 qidx; 1065 1066 1067 nicvf_dump_intr_status(nic); 1068 1069 /* Disable RBDR interrupt and schedule softirq */ 1070 for (qidx = 0; qidx < nic->qs->rbdr_cnt; qidx++) { 1071 if (!nicvf_is_intr_enabled(nic, NICVF_INTR_RBDR, qidx)) 1072 continue; 1073 nicvf_disable_intr(nic, NICVF_INTR_RBDR, qidx); 1074 tasklet_hi_schedule(&nic->rbdr_task); 1075 /* Clear interrupt */ 1076 nicvf_clear_intr(nic, NICVF_INTR_RBDR, qidx); 1077 } 1078 1079 return IRQ_HANDLED; 1080 } 1081 1082 static irqreturn_t nicvf_qs_err_intr_handler(int irq, void *nicvf_irq) 1083 { 1084 struct nicvf *nic = (struct nicvf *)nicvf_irq; 1085 1086 nicvf_dump_intr_status(nic); 1087 1088 /* Disable Qset err interrupt and schedule softirq */ 1089 nicvf_disable_intr(nic, NICVF_INTR_QS_ERR, 0); 1090 tasklet_hi_schedule(&nic->qs_err_task); 1091 nicvf_clear_intr(nic, NICVF_INTR_QS_ERR, 0); 1092 1093 return IRQ_HANDLED; 1094 } 1095 1096 static void nicvf_set_irq_affinity(struct nicvf *nic) 1097 { 1098 int vec, cpu; 1099 1100 for (vec = 0; vec < nic->num_vec; vec++) { 1101 if (!nic->irq_allocated[vec]) 1102 continue; 1103 1104 if (!zalloc_cpumask_var(&nic->affinity_mask[vec], GFP_KERNEL)) 1105 return; 1106 /* CQ interrupts */ 1107 if (vec < NICVF_INTR_ID_SQ) 1108 /* Leave CPU0 for RBDR and other interrupts */ 1109 cpu = nicvf_netdev_qidx(nic, vec) + 1; 1110 else 1111 cpu = 0; 1112 1113 cpumask_set_cpu(cpumask_local_spread(cpu, nic->node), 1114 nic->affinity_mask[vec]); 1115 irq_set_affinity_hint(pci_irq_vector(nic->pdev, vec), 1116 nic->affinity_mask[vec]); 1117 } 1118 } 1119 1120 static int nicvf_register_interrupts(struct nicvf *nic) 1121 { 1122 int irq, ret = 0; 1123 1124 for_each_cq_irq(irq) 1125 sprintf(nic->irq_name[irq], "%s-rxtx-%d", 1126 nic->pnicvf->netdev->name, 1127 nicvf_netdev_qidx(nic, irq)); 1128 1129 for_each_sq_irq(irq) 1130 sprintf(nic->irq_name[irq], "%s-sq-%d", 1131 nic->pnicvf->netdev->name, 1132 nicvf_netdev_qidx(nic, irq - NICVF_INTR_ID_SQ)); 1133 1134 for_each_rbdr_irq(irq) 1135 sprintf(nic->irq_name[irq], "%s-rbdr-%d", 1136 nic->pnicvf->netdev->name, 1137 nic->sqs_mode ? (nic->sqs_id + 1) : 0); 1138 1139 /* Register CQ interrupts */ 1140 for (irq = 0; irq < nic->qs->cq_cnt; irq++) { 1141 ret = request_irq(pci_irq_vector(nic->pdev, irq), 1142 nicvf_intr_handler, 1143 0, nic->irq_name[irq], nic->napi[irq]); 1144 if (ret) 1145 goto err; 1146 nic->irq_allocated[irq] = true; 1147 } 1148 1149 /* Register RBDR interrupt */ 1150 for (irq = NICVF_INTR_ID_RBDR; 1151 irq < (NICVF_INTR_ID_RBDR + nic->qs->rbdr_cnt); irq++) { 1152 ret = request_irq(pci_irq_vector(nic->pdev, irq), 1153 nicvf_rbdr_intr_handler, 1154 0, nic->irq_name[irq], nic); 1155 if (ret) 1156 goto err; 1157 nic->irq_allocated[irq] = true; 1158 } 1159 1160 /* Register QS error interrupt */ 1161 sprintf(nic->irq_name[NICVF_INTR_ID_QS_ERR], "%s-qset-err-%d", 1162 nic->pnicvf->netdev->name, 1163 nic->sqs_mode ? (nic->sqs_id + 1) : 0); 1164 irq = NICVF_INTR_ID_QS_ERR; 1165 ret = request_irq(pci_irq_vector(nic->pdev, irq), 1166 nicvf_qs_err_intr_handler, 1167 0, nic->irq_name[irq], nic); 1168 if (ret) 1169 goto err; 1170 1171 nic->irq_allocated[irq] = true; 1172 1173 /* Set IRQ affinities */ 1174 nicvf_set_irq_affinity(nic); 1175 1176 err: 1177 if (ret) 1178 netdev_err(nic->netdev, "request_irq failed, vector %d\n", irq); 1179 1180 return ret; 1181 } 1182 1183 static void nicvf_unregister_interrupts(struct nicvf *nic) 1184 { 1185 struct pci_dev *pdev = nic->pdev; 1186 int irq; 1187 1188 /* Free registered interrupts */ 1189 for (irq = 0; irq < nic->num_vec; irq++) { 1190 if (!nic->irq_allocated[irq]) 1191 continue; 1192 1193 irq_set_affinity_hint(pci_irq_vector(pdev, irq), NULL); 1194 free_cpumask_var(nic->affinity_mask[irq]); 1195 1196 if (irq < NICVF_INTR_ID_SQ) 1197 free_irq(pci_irq_vector(pdev, irq), nic->napi[irq]); 1198 else 1199 free_irq(pci_irq_vector(pdev, irq), nic); 1200 1201 nic->irq_allocated[irq] = false; 1202 } 1203 1204 /* Disable MSI-X */ 1205 pci_free_irq_vectors(pdev); 1206 nic->num_vec = 0; 1207 } 1208 1209 /* Initialize MSIX vectors and register MISC interrupt. 1210 * Send READY message to PF to check if its alive 1211 */ 1212 static int nicvf_register_misc_interrupt(struct nicvf *nic) 1213 { 1214 int ret = 0; 1215 int irq = NICVF_INTR_ID_MISC; 1216 1217 /* Return if mailbox interrupt is already registered */ 1218 if (nic->pdev->msix_enabled) 1219 return 0; 1220 1221 /* Enable MSI-X */ 1222 nic->num_vec = pci_msix_vec_count(nic->pdev); 1223 ret = pci_alloc_irq_vectors(nic->pdev, nic->num_vec, nic->num_vec, 1224 PCI_IRQ_MSIX); 1225 if (ret < 0) { 1226 netdev_err(nic->netdev, 1227 "Req for #%d msix vectors failed\n", nic->num_vec); 1228 return 1; 1229 } 1230 1231 sprintf(nic->irq_name[irq], "%s Mbox", "NICVF"); 1232 /* Register Misc interrupt */ 1233 ret = request_irq(pci_irq_vector(nic->pdev, irq), 1234 nicvf_misc_intr_handler, 0, nic->irq_name[irq], nic); 1235 1236 if (ret) 1237 return ret; 1238 nic->irq_allocated[irq] = true; 1239 1240 /* Enable mailbox interrupt */ 1241 nicvf_enable_intr(nic, NICVF_INTR_MBOX, 0); 1242 1243 /* Check if VF is able to communicate with PF */ 1244 if (!nicvf_check_pf_ready(nic)) { 1245 nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0); 1246 nicvf_unregister_interrupts(nic); 1247 return 1; 1248 } 1249 1250 return 0; 1251 } 1252 1253 static netdev_tx_t nicvf_xmit(struct sk_buff *skb, struct net_device *netdev) 1254 { 1255 struct nicvf *nic = netdev_priv(netdev); 1256 int qid = skb_get_queue_mapping(skb); 1257 struct netdev_queue *txq = netdev_get_tx_queue(netdev, qid); 1258 struct nicvf *snic; 1259 struct snd_queue *sq; 1260 int tmp; 1261 1262 /* Check for minimum packet length */ 1263 if (skb->len <= ETH_HLEN) { 1264 dev_kfree_skb(skb); 1265 return NETDEV_TX_OK; 1266 } 1267 1268 /* In XDP case, initial HW tx queues are used for XDP, 1269 * but stack's queue mapping starts at '0', so skip the 1270 * Tx queues attached to Rx queues for XDP. 1271 */ 1272 if (nic->xdp_prog) 1273 qid += nic->xdp_tx_queues; 1274 1275 snic = nic; 1276 /* Get secondary Qset's SQ structure */ 1277 if (qid >= MAX_SND_QUEUES_PER_QS) { 1278 tmp = qid / MAX_SND_QUEUES_PER_QS; 1279 snic = (struct nicvf *)nic->snicvf[tmp - 1]; 1280 if (!snic) { 1281 netdev_warn(nic->netdev, 1282 "Secondary Qset#%d's ptr not initialized\n", 1283 tmp - 1); 1284 dev_kfree_skb(skb); 1285 return NETDEV_TX_OK; 1286 } 1287 qid = qid % MAX_SND_QUEUES_PER_QS; 1288 } 1289 1290 sq = &snic->qs->sq[qid]; 1291 if (!netif_tx_queue_stopped(txq) && 1292 !nicvf_sq_append_skb(snic, sq, skb, qid)) { 1293 netif_tx_stop_queue(txq); 1294 1295 /* Barrier, so that stop_queue visible to other cpus */ 1296 smp_mb(); 1297 1298 /* Check again, incase another cpu freed descriptors */ 1299 if (atomic_read(&sq->free_cnt) > MIN_SQ_DESC_PER_PKT_XMIT) { 1300 netif_tx_wake_queue(txq); 1301 } else { 1302 this_cpu_inc(nic->drv_stats->txq_stop); 1303 netif_warn(nic, tx_err, netdev, 1304 "Transmit ring full, stopping SQ%d\n", qid); 1305 } 1306 return NETDEV_TX_BUSY; 1307 } 1308 1309 return NETDEV_TX_OK; 1310 } 1311 1312 static inline void nicvf_free_cq_poll(struct nicvf *nic) 1313 { 1314 struct nicvf_cq_poll *cq_poll; 1315 int qidx; 1316 1317 for (qidx = 0; qidx < nic->qs->cq_cnt; qidx++) { 1318 cq_poll = nic->napi[qidx]; 1319 if (!cq_poll) 1320 continue; 1321 nic->napi[qidx] = NULL; 1322 kfree(cq_poll); 1323 } 1324 } 1325 1326 int nicvf_stop(struct net_device *netdev) 1327 { 1328 int irq, qidx; 1329 struct nicvf *nic = netdev_priv(netdev); 1330 struct queue_set *qs = nic->qs; 1331 struct nicvf_cq_poll *cq_poll = NULL; 1332 union nic_mbx mbx = {}; 1333 1334 /* wait till all queued set_rx_mode tasks completes */ 1335 if (nic->nicvf_rx_mode_wq) { 1336 cancel_delayed_work_sync(&nic->link_change_work); 1337 drain_workqueue(nic->nicvf_rx_mode_wq); 1338 } 1339 1340 mbx.msg.msg = NIC_MBOX_MSG_SHUTDOWN; 1341 nicvf_send_msg_to_pf(nic, &mbx); 1342 1343 netif_carrier_off(netdev); 1344 netif_tx_stop_all_queues(nic->netdev); 1345 nic->link_up = false; 1346 1347 /* Teardown secondary qsets first */ 1348 if (!nic->sqs_mode) { 1349 for (qidx = 0; qidx < nic->sqs_count; qidx++) { 1350 if (!nic->snicvf[qidx]) 1351 continue; 1352 nicvf_stop(nic->snicvf[qidx]->netdev); 1353 nic->snicvf[qidx] = NULL; 1354 } 1355 } 1356 1357 /* Disable RBDR & QS error interrupts */ 1358 for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) { 1359 nicvf_disable_intr(nic, NICVF_INTR_RBDR, qidx); 1360 nicvf_clear_intr(nic, NICVF_INTR_RBDR, qidx); 1361 } 1362 nicvf_disable_intr(nic, NICVF_INTR_QS_ERR, 0); 1363 nicvf_clear_intr(nic, NICVF_INTR_QS_ERR, 0); 1364 1365 /* Wait for pending IRQ handlers to finish */ 1366 for (irq = 0; irq < nic->num_vec; irq++) 1367 synchronize_irq(pci_irq_vector(nic->pdev, irq)); 1368 1369 tasklet_kill(&nic->rbdr_task); 1370 tasklet_kill(&nic->qs_err_task); 1371 if (nic->rb_work_scheduled) 1372 cancel_delayed_work_sync(&nic->rbdr_work); 1373 1374 for (qidx = 0; qidx < nic->qs->cq_cnt; qidx++) { 1375 cq_poll = nic->napi[qidx]; 1376 if (!cq_poll) 1377 continue; 1378 napi_synchronize(&cq_poll->napi); 1379 /* CQ intr is enabled while napi_complete, 1380 * so disable it now 1381 */ 1382 nicvf_disable_intr(nic, NICVF_INTR_CQ, qidx); 1383 nicvf_clear_intr(nic, NICVF_INTR_CQ, qidx); 1384 napi_disable(&cq_poll->napi); 1385 netif_napi_del(&cq_poll->napi); 1386 } 1387 1388 netif_tx_disable(netdev); 1389 1390 for (qidx = 0; qidx < netdev->num_tx_queues; qidx++) 1391 netdev_tx_reset_queue(netdev_get_tx_queue(netdev, qidx)); 1392 1393 /* Free resources */ 1394 nicvf_config_data_transfer(nic, false); 1395 1396 /* Disable HW Qset */ 1397 nicvf_qset_config(nic, false); 1398 1399 /* disable mailbox interrupt */ 1400 nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0); 1401 1402 nicvf_unregister_interrupts(nic); 1403 1404 nicvf_free_cq_poll(nic); 1405 1406 /* Free any pending SKB saved to receive timestamp */ 1407 if (nic->ptp_skb) { 1408 dev_kfree_skb_any(nic->ptp_skb); 1409 nic->ptp_skb = NULL; 1410 } 1411 1412 /* Clear multiqset info */ 1413 nic->pnicvf = nic; 1414 1415 return 0; 1416 } 1417 1418 static int nicvf_config_hw_rx_tstamp(struct nicvf *nic, bool enable) 1419 { 1420 union nic_mbx mbx = {}; 1421 1422 mbx.ptp.msg = NIC_MBOX_MSG_PTP_CFG; 1423 mbx.ptp.enable = enable; 1424 1425 return nicvf_send_msg_to_pf(nic, &mbx); 1426 } 1427 1428 static int nicvf_update_hw_max_frs(struct nicvf *nic, int mtu) 1429 { 1430 union nic_mbx mbx = {}; 1431 1432 mbx.frs.msg = NIC_MBOX_MSG_SET_MAX_FRS; 1433 mbx.frs.max_frs = mtu; 1434 mbx.frs.vf_id = nic->vf_id; 1435 1436 return nicvf_send_msg_to_pf(nic, &mbx); 1437 } 1438 1439 static void nicvf_link_status_check_task(struct work_struct *work_arg) 1440 { 1441 struct nicvf *nic = container_of(work_arg, 1442 struct nicvf, 1443 link_change_work.work); 1444 union nic_mbx mbx = {}; 1445 mbx.msg.msg = NIC_MBOX_MSG_BGX_LINK_CHANGE; 1446 nicvf_send_msg_to_pf(nic, &mbx); 1447 queue_delayed_work(nic->nicvf_rx_mode_wq, 1448 &nic->link_change_work, 2 * HZ); 1449 } 1450 1451 int nicvf_open(struct net_device *netdev) 1452 { 1453 int cpu, err, qidx; 1454 struct nicvf *nic = netdev_priv(netdev); 1455 struct queue_set *qs = nic->qs; 1456 struct nicvf_cq_poll *cq_poll = NULL; 1457 1458 /* wait till all queued set_rx_mode tasks completes if any */ 1459 if (nic->nicvf_rx_mode_wq) 1460 drain_workqueue(nic->nicvf_rx_mode_wq); 1461 1462 netif_carrier_off(netdev); 1463 1464 err = nicvf_register_misc_interrupt(nic); 1465 if (err) 1466 return err; 1467 1468 /* Register NAPI handler for processing CQEs */ 1469 for (qidx = 0; qidx < qs->cq_cnt; qidx++) { 1470 cq_poll = kzalloc(sizeof(*cq_poll), GFP_KERNEL); 1471 if (!cq_poll) { 1472 err = -ENOMEM; 1473 goto napi_del; 1474 } 1475 cq_poll->cq_idx = qidx; 1476 cq_poll->nicvf = nic; 1477 netif_napi_add(netdev, &cq_poll->napi, nicvf_poll, 1478 NAPI_POLL_WEIGHT); 1479 napi_enable(&cq_poll->napi); 1480 nic->napi[qidx] = cq_poll; 1481 } 1482 1483 /* Check if we got MAC address from PF or else generate a radom MAC */ 1484 if (!nic->sqs_mode && is_zero_ether_addr(netdev->dev_addr)) { 1485 eth_hw_addr_random(netdev); 1486 nicvf_hw_set_mac_addr(nic, netdev); 1487 } 1488 1489 if (nic->set_mac_pending) { 1490 nic->set_mac_pending = false; 1491 nicvf_hw_set_mac_addr(nic, netdev); 1492 } 1493 1494 /* Init tasklet for handling Qset err interrupt */ 1495 tasklet_init(&nic->qs_err_task, nicvf_handle_qs_err, 1496 (unsigned long)nic); 1497 1498 /* Init RBDR tasklet which will refill RBDR */ 1499 tasklet_init(&nic->rbdr_task, nicvf_rbdr_task, 1500 (unsigned long)nic); 1501 INIT_DELAYED_WORK(&nic->rbdr_work, nicvf_rbdr_work); 1502 1503 /* Configure CPI alorithm */ 1504 nic->cpi_alg = cpi_alg; 1505 if (!nic->sqs_mode) 1506 nicvf_config_cpi(nic); 1507 1508 nicvf_request_sqs(nic); 1509 if (nic->sqs_mode) 1510 nicvf_get_primary_vf_struct(nic); 1511 1512 /* Configure PTP timestamp */ 1513 if (nic->ptp_clock) 1514 nicvf_config_hw_rx_tstamp(nic, nic->hw_rx_tstamp); 1515 atomic_set(&nic->tx_ptp_skbs, 0); 1516 nic->ptp_skb = NULL; 1517 1518 /* Configure receive side scaling and MTU */ 1519 if (!nic->sqs_mode) { 1520 nicvf_rss_init(nic); 1521 err = nicvf_update_hw_max_frs(nic, netdev->mtu); 1522 if (err) 1523 goto cleanup; 1524 1525 /* Clear percpu stats */ 1526 for_each_possible_cpu(cpu) 1527 memset(per_cpu_ptr(nic->drv_stats, cpu), 0, 1528 sizeof(struct nicvf_drv_stats)); 1529 } 1530 1531 err = nicvf_register_interrupts(nic); 1532 if (err) 1533 goto cleanup; 1534 1535 /* Initialize the queues */ 1536 err = nicvf_init_resources(nic); 1537 if (err) 1538 goto cleanup; 1539 1540 /* Make sure queue initialization is written */ 1541 wmb(); 1542 1543 nicvf_reg_write(nic, NIC_VF_INT, -1); 1544 /* Enable Qset err interrupt */ 1545 nicvf_enable_intr(nic, NICVF_INTR_QS_ERR, 0); 1546 1547 /* Enable completion queue interrupt */ 1548 for (qidx = 0; qidx < qs->cq_cnt; qidx++) 1549 nicvf_enable_intr(nic, NICVF_INTR_CQ, qidx); 1550 1551 /* Enable RBDR threshold interrupt */ 1552 for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) 1553 nicvf_enable_intr(nic, NICVF_INTR_RBDR, qidx); 1554 1555 /* Send VF config done msg to PF */ 1556 nicvf_send_cfg_done(nic); 1557 1558 if (nic->nicvf_rx_mode_wq) { 1559 INIT_DELAYED_WORK(&nic->link_change_work, 1560 nicvf_link_status_check_task); 1561 queue_delayed_work(nic->nicvf_rx_mode_wq, 1562 &nic->link_change_work, 0); 1563 } 1564 1565 return 0; 1566 cleanup: 1567 nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0); 1568 nicvf_unregister_interrupts(nic); 1569 tasklet_kill(&nic->qs_err_task); 1570 tasklet_kill(&nic->rbdr_task); 1571 napi_del: 1572 for (qidx = 0; qidx < qs->cq_cnt; qidx++) { 1573 cq_poll = nic->napi[qidx]; 1574 if (!cq_poll) 1575 continue; 1576 napi_disable(&cq_poll->napi); 1577 netif_napi_del(&cq_poll->napi); 1578 } 1579 nicvf_free_cq_poll(nic); 1580 return err; 1581 } 1582 1583 static int nicvf_change_mtu(struct net_device *netdev, int new_mtu) 1584 { 1585 struct nicvf *nic = netdev_priv(netdev); 1586 int orig_mtu = netdev->mtu; 1587 1588 /* For now just support only the usual MTU sized frames, 1589 * plus some headroom for VLAN, QinQ. 1590 */ 1591 if (nic->xdp_prog && new_mtu > MAX_XDP_MTU) { 1592 netdev_warn(netdev, "Jumbo frames not yet supported with XDP, current MTU %d.\n", 1593 netdev->mtu); 1594 return -EINVAL; 1595 } 1596 1597 netdev->mtu = new_mtu; 1598 1599 if (!netif_running(netdev)) 1600 return 0; 1601 1602 if (nicvf_update_hw_max_frs(nic, new_mtu)) { 1603 netdev->mtu = orig_mtu; 1604 return -EINVAL; 1605 } 1606 1607 return 0; 1608 } 1609 1610 static int nicvf_set_mac_address(struct net_device *netdev, void *p) 1611 { 1612 struct sockaddr *addr = p; 1613 struct nicvf *nic = netdev_priv(netdev); 1614 1615 if (!is_valid_ether_addr(addr->sa_data)) 1616 return -EADDRNOTAVAIL; 1617 1618 memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); 1619 1620 if (nic->pdev->msix_enabled) { 1621 if (nicvf_hw_set_mac_addr(nic, netdev)) 1622 return -EBUSY; 1623 } else { 1624 nic->set_mac_pending = true; 1625 } 1626 1627 return 0; 1628 } 1629 1630 void nicvf_update_lmac_stats(struct nicvf *nic) 1631 { 1632 int stat = 0; 1633 union nic_mbx mbx = {}; 1634 1635 if (!netif_running(nic->netdev)) 1636 return; 1637 1638 mbx.bgx_stats.msg = NIC_MBOX_MSG_BGX_STATS; 1639 mbx.bgx_stats.vf_id = nic->vf_id; 1640 /* Rx stats */ 1641 mbx.bgx_stats.rx = 1; 1642 while (stat < BGX_RX_STATS_COUNT) { 1643 mbx.bgx_stats.idx = stat; 1644 if (nicvf_send_msg_to_pf(nic, &mbx)) 1645 return; 1646 stat++; 1647 } 1648 1649 stat = 0; 1650 1651 /* Tx stats */ 1652 mbx.bgx_stats.rx = 0; 1653 while (stat < BGX_TX_STATS_COUNT) { 1654 mbx.bgx_stats.idx = stat; 1655 if (nicvf_send_msg_to_pf(nic, &mbx)) 1656 return; 1657 stat++; 1658 } 1659 } 1660 1661 void nicvf_update_stats(struct nicvf *nic) 1662 { 1663 int qidx, cpu; 1664 u64 tmp_stats = 0; 1665 struct nicvf_hw_stats *stats = &nic->hw_stats; 1666 struct nicvf_drv_stats *drv_stats; 1667 struct queue_set *qs = nic->qs; 1668 1669 #define GET_RX_STATS(reg) \ 1670 nicvf_reg_read(nic, NIC_VNIC_RX_STAT_0_13 | (reg << 3)) 1671 #define GET_TX_STATS(reg) \ 1672 nicvf_reg_read(nic, NIC_VNIC_TX_STAT_0_4 | (reg << 3)) 1673 1674 stats->rx_bytes = GET_RX_STATS(RX_OCTS); 1675 stats->rx_ucast_frames = GET_RX_STATS(RX_UCAST); 1676 stats->rx_bcast_frames = GET_RX_STATS(RX_BCAST); 1677 stats->rx_mcast_frames = GET_RX_STATS(RX_MCAST); 1678 stats->rx_fcs_errors = GET_RX_STATS(RX_FCS); 1679 stats->rx_l2_errors = GET_RX_STATS(RX_L2ERR); 1680 stats->rx_drop_red = GET_RX_STATS(RX_RED); 1681 stats->rx_drop_red_bytes = GET_RX_STATS(RX_RED_OCTS); 1682 stats->rx_drop_overrun = GET_RX_STATS(RX_ORUN); 1683 stats->rx_drop_overrun_bytes = GET_RX_STATS(RX_ORUN_OCTS); 1684 stats->rx_drop_bcast = GET_RX_STATS(RX_DRP_BCAST); 1685 stats->rx_drop_mcast = GET_RX_STATS(RX_DRP_MCAST); 1686 stats->rx_drop_l3_bcast = GET_RX_STATS(RX_DRP_L3BCAST); 1687 stats->rx_drop_l3_mcast = GET_RX_STATS(RX_DRP_L3MCAST); 1688 1689 stats->tx_bytes = GET_TX_STATS(TX_OCTS); 1690 stats->tx_ucast_frames = GET_TX_STATS(TX_UCAST); 1691 stats->tx_bcast_frames = GET_TX_STATS(TX_BCAST); 1692 stats->tx_mcast_frames = GET_TX_STATS(TX_MCAST); 1693 stats->tx_drops = GET_TX_STATS(TX_DROP); 1694 1695 /* On T88 pass 2.0, the dummy SQE added for TSO notification 1696 * via CQE has 'dont_send' set. Hence HW drops the pkt pointed 1697 * pointed by dummy SQE and results in tx_drops counter being 1698 * incremented. Subtracting it from tx_tso counter will give 1699 * exact tx_drops counter. 1700 */ 1701 if (nic->t88 && nic->hw_tso) { 1702 for_each_possible_cpu(cpu) { 1703 drv_stats = per_cpu_ptr(nic->drv_stats, cpu); 1704 tmp_stats += drv_stats->tx_tso; 1705 } 1706 stats->tx_drops = tmp_stats - stats->tx_drops; 1707 } 1708 stats->tx_frames = stats->tx_ucast_frames + 1709 stats->tx_bcast_frames + 1710 stats->tx_mcast_frames; 1711 stats->rx_frames = stats->rx_ucast_frames + 1712 stats->rx_bcast_frames + 1713 stats->rx_mcast_frames; 1714 stats->rx_drops = stats->rx_drop_red + 1715 stats->rx_drop_overrun; 1716 1717 /* Update RQ and SQ stats */ 1718 for (qidx = 0; qidx < qs->rq_cnt; qidx++) 1719 nicvf_update_rq_stats(nic, qidx); 1720 for (qidx = 0; qidx < qs->sq_cnt; qidx++) 1721 nicvf_update_sq_stats(nic, qidx); 1722 } 1723 1724 static void nicvf_get_stats64(struct net_device *netdev, 1725 struct rtnl_link_stats64 *stats) 1726 { 1727 struct nicvf *nic = netdev_priv(netdev); 1728 struct nicvf_hw_stats *hw_stats = &nic->hw_stats; 1729 1730 nicvf_update_stats(nic); 1731 1732 stats->rx_bytes = hw_stats->rx_bytes; 1733 stats->rx_packets = hw_stats->rx_frames; 1734 stats->rx_dropped = hw_stats->rx_drops; 1735 stats->multicast = hw_stats->rx_mcast_frames; 1736 1737 stats->tx_bytes = hw_stats->tx_bytes; 1738 stats->tx_packets = hw_stats->tx_frames; 1739 stats->tx_dropped = hw_stats->tx_drops; 1740 1741 } 1742 1743 static void nicvf_tx_timeout(struct net_device *dev, unsigned int txqueue) 1744 { 1745 struct nicvf *nic = netdev_priv(dev); 1746 1747 netif_warn(nic, tx_err, dev, "Transmit timed out, resetting\n"); 1748 1749 this_cpu_inc(nic->drv_stats->tx_timeout); 1750 schedule_work(&nic->reset_task); 1751 } 1752 1753 static void nicvf_reset_task(struct work_struct *work) 1754 { 1755 struct nicvf *nic; 1756 1757 nic = container_of(work, struct nicvf, reset_task); 1758 1759 if (!netif_running(nic->netdev)) 1760 return; 1761 1762 nicvf_stop(nic->netdev); 1763 nicvf_open(nic->netdev); 1764 netif_trans_update(nic->netdev); 1765 } 1766 1767 static int nicvf_config_loopback(struct nicvf *nic, 1768 netdev_features_t features) 1769 { 1770 union nic_mbx mbx = {}; 1771 1772 mbx.lbk.msg = NIC_MBOX_MSG_LOOPBACK; 1773 mbx.lbk.vf_id = nic->vf_id; 1774 mbx.lbk.enable = (features & NETIF_F_LOOPBACK) != 0; 1775 1776 return nicvf_send_msg_to_pf(nic, &mbx); 1777 } 1778 1779 static netdev_features_t nicvf_fix_features(struct net_device *netdev, 1780 netdev_features_t features) 1781 { 1782 struct nicvf *nic = netdev_priv(netdev); 1783 1784 if ((features & NETIF_F_LOOPBACK) && 1785 netif_running(netdev) && !nic->loopback_supported) 1786 features &= ~NETIF_F_LOOPBACK; 1787 1788 return features; 1789 } 1790 1791 static int nicvf_set_features(struct net_device *netdev, 1792 netdev_features_t features) 1793 { 1794 struct nicvf *nic = netdev_priv(netdev); 1795 netdev_features_t changed = features ^ netdev->features; 1796 1797 if (changed & NETIF_F_HW_VLAN_CTAG_RX) 1798 nicvf_config_vlan_stripping(nic, features); 1799 1800 if ((changed & NETIF_F_LOOPBACK) && netif_running(netdev)) 1801 return nicvf_config_loopback(nic, features); 1802 1803 return 0; 1804 } 1805 1806 static void nicvf_set_xdp_queues(struct nicvf *nic, bool bpf_attached) 1807 { 1808 u8 cq_count, txq_count; 1809 1810 /* Set XDP Tx queue count same as Rx queue count */ 1811 if (!bpf_attached) 1812 nic->xdp_tx_queues = 0; 1813 else 1814 nic->xdp_tx_queues = nic->rx_queues; 1815 1816 /* If queue count > MAX_CMP_QUEUES_PER_QS, then additional qsets 1817 * needs to be allocated, check how many. 1818 */ 1819 txq_count = nic->xdp_tx_queues + nic->tx_queues; 1820 cq_count = max(nic->rx_queues, txq_count); 1821 if (cq_count > MAX_CMP_QUEUES_PER_QS) { 1822 nic->sqs_count = roundup(cq_count, MAX_CMP_QUEUES_PER_QS); 1823 nic->sqs_count = (nic->sqs_count / MAX_CMP_QUEUES_PER_QS) - 1; 1824 } else { 1825 nic->sqs_count = 0; 1826 } 1827 1828 /* Set primary Qset's resources */ 1829 nic->qs->rq_cnt = min_t(u8, nic->rx_queues, MAX_RCV_QUEUES_PER_QS); 1830 nic->qs->sq_cnt = min_t(u8, txq_count, MAX_SND_QUEUES_PER_QS); 1831 nic->qs->cq_cnt = max_t(u8, nic->qs->rq_cnt, nic->qs->sq_cnt); 1832 1833 /* Update stack */ 1834 nicvf_set_real_num_queues(nic->netdev, nic->tx_queues, nic->rx_queues); 1835 } 1836 1837 static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog) 1838 { 1839 struct net_device *dev = nic->netdev; 1840 bool if_up = netif_running(nic->netdev); 1841 struct bpf_prog *old_prog; 1842 bool bpf_attached = false; 1843 int ret = 0; 1844 1845 /* For now just support only the usual MTU sized frames, 1846 * plus some headroom for VLAN, QinQ. 1847 */ 1848 if (prog && dev->mtu > MAX_XDP_MTU) { 1849 netdev_warn(dev, "Jumbo frames not yet supported with XDP, current MTU %d.\n", 1850 dev->mtu); 1851 return -EOPNOTSUPP; 1852 } 1853 1854 /* ALL SQs attached to CQs i.e same as RQs, are treated as 1855 * XDP Tx queues and more Tx queues are allocated for 1856 * network stack to send pkts out. 1857 * 1858 * No of Tx queues are either same as Rx queues or whatever 1859 * is left in max no of queues possible. 1860 */ 1861 if ((nic->rx_queues + nic->tx_queues) > nic->max_queues) { 1862 netdev_warn(dev, 1863 "Failed to attach BPF prog, RXQs + TXQs > Max %d\n", 1864 nic->max_queues); 1865 return -ENOMEM; 1866 } 1867 1868 if (if_up) 1869 nicvf_stop(nic->netdev); 1870 1871 old_prog = xchg(&nic->xdp_prog, prog); 1872 /* Detach old prog, if any */ 1873 if (old_prog) 1874 bpf_prog_put(old_prog); 1875 1876 if (nic->xdp_prog) { 1877 /* Attach BPF program */ 1878 bpf_prog_add(nic->xdp_prog, nic->rx_queues - 1); 1879 bpf_attached = true; 1880 } 1881 1882 /* Calculate Tx queues needed for XDP and network stack */ 1883 nicvf_set_xdp_queues(nic, bpf_attached); 1884 1885 if (if_up) { 1886 /* Reinitialize interface, clean slate */ 1887 nicvf_open(nic->netdev); 1888 netif_trans_update(nic->netdev); 1889 } 1890 1891 return ret; 1892 } 1893 1894 static int nicvf_xdp(struct net_device *netdev, struct netdev_bpf *xdp) 1895 { 1896 struct nicvf *nic = netdev_priv(netdev); 1897 1898 /* To avoid checks while retrieving buffer address from CQE_RX, 1899 * do not support XDP for T88 pass1.x silicons which are anyway 1900 * not in use widely. 1901 */ 1902 if (pass1_silicon(nic->pdev)) 1903 return -EOPNOTSUPP; 1904 1905 switch (xdp->command) { 1906 case XDP_SETUP_PROG: 1907 return nicvf_xdp_setup(nic, xdp->prog); 1908 case XDP_QUERY_PROG: 1909 xdp->prog_id = nic->xdp_prog ? nic->xdp_prog->aux->id : 0; 1910 return 0; 1911 default: 1912 return -EINVAL; 1913 } 1914 } 1915 1916 static int nicvf_config_hwtstamp(struct net_device *netdev, struct ifreq *ifr) 1917 { 1918 struct hwtstamp_config config; 1919 struct nicvf *nic = netdev_priv(netdev); 1920 1921 if (!nic->ptp_clock) 1922 return -ENODEV; 1923 1924 if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) 1925 return -EFAULT; 1926 1927 /* reserved for future extensions */ 1928 if (config.flags) 1929 return -EINVAL; 1930 1931 switch (config.tx_type) { 1932 case HWTSTAMP_TX_OFF: 1933 case HWTSTAMP_TX_ON: 1934 break; 1935 default: 1936 return -ERANGE; 1937 } 1938 1939 switch (config.rx_filter) { 1940 case HWTSTAMP_FILTER_NONE: 1941 nic->hw_rx_tstamp = false; 1942 break; 1943 case HWTSTAMP_FILTER_ALL: 1944 case HWTSTAMP_FILTER_SOME: 1945 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: 1946 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: 1947 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: 1948 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: 1949 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: 1950 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: 1951 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: 1952 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: 1953 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: 1954 case HWTSTAMP_FILTER_PTP_V2_EVENT: 1955 case HWTSTAMP_FILTER_PTP_V2_SYNC: 1956 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: 1957 nic->hw_rx_tstamp = true; 1958 config.rx_filter = HWTSTAMP_FILTER_ALL; 1959 break; 1960 default: 1961 return -ERANGE; 1962 } 1963 1964 if (netif_running(netdev)) 1965 nicvf_config_hw_rx_tstamp(nic, nic->hw_rx_tstamp); 1966 1967 if (copy_to_user(ifr->ifr_data, &config, sizeof(config))) 1968 return -EFAULT; 1969 1970 return 0; 1971 } 1972 1973 static int nicvf_ioctl(struct net_device *netdev, struct ifreq *req, int cmd) 1974 { 1975 switch (cmd) { 1976 case SIOCSHWTSTAMP: 1977 return nicvf_config_hwtstamp(netdev, req); 1978 default: 1979 return -EOPNOTSUPP; 1980 } 1981 } 1982 1983 static void __nicvf_set_rx_mode_task(u8 mode, struct xcast_addr_list *mc_addrs, 1984 struct nicvf *nic) 1985 { 1986 union nic_mbx mbx = {}; 1987 int idx; 1988 1989 /* From the inside of VM code flow we have only 128 bits memory 1990 * available to send message to host's PF, so send all mc addrs 1991 * one by one, starting from flush command in case if kernel 1992 * requests to configure specific MAC filtering 1993 */ 1994 1995 /* flush DMAC filters and reset RX mode */ 1996 mbx.xcast.msg = NIC_MBOX_MSG_RESET_XCAST; 1997 if (nicvf_send_msg_to_pf(nic, &mbx) < 0) 1998 goto free_mc; 1999 2000 if (mode & BGX_XCAST_MCAST_FILTER) { 2001 /* once enabling filtering, we need to signal to PF to add 2002 * its' own LMAC to the filter to accept packets for it. 2003 */ 2004 mbx.xcast.msg = NIC_MBOX_MSG_ADD_MCAST; 2005 mbx.xcast.mac = 0; 2006 if (nicvf_send_msg_to_pf(nic, &mbx) < 0) 2007 goto free_mc; 2008 } 2009 2010 /* check if we have any specific MACs to be added to PF DMAC filter */ 2011 if (mc_addrs) { 2012 /* now go through kernel list of MACs and add them one by one */ 2013 for (idx = 0; idx < mc_addrs->count; idx++) { 2014 mbx.xcast.msg = NIC_MBOX_MSG_ADD_MCAST; 2015 mbx.xcast.mac = mc_addrs->mc[idx]; 2016 if (nicvf_send_msg_to_pf(nic, &mbx) < 0) 2017 goto free_mc; 2018 } 2019 } 2020 2021 /* and finally set rx mode for PF accordingly */ 2022 mbx.xcast.msg = NIC_MBOX_MSG_SET_XCAST; 2023 mbx.xcast.mode = mode; 2024 2025 nicvf_send_msg_to_pf(nic, &mbx); 2026 free_mc: 2027 kfree(mc_addrs); 2028 } 2029 2030 static void nicvf_set_rx_mode_task(struct work_struct *work_arg) 2031 { 2032 struct nicvf_work *vf_work = container_of(work_arg, struct nicvf_work, 2033 work); 2034 struct nicvf *nic = container_of(vf_work, struct nicvf, rx_mode_work); 2035 u8 mode; 2036 struct xcast_addr_list *mc; 2037 2038 if (!vf_work) 2039 return; 2040 2041 /* Save message data locally to prevent them from 2042 * being overwritten by next ndo_set_rx_mode call(). 2043 */ 2044 spin_lock(&nic->rx_mode_wq_lock); 2045 mode = vf_work->mode; 2046 mc = vf_work->mc; 2047 vf_work->mc = NULL; 2048 spin_unlock(&nic->rx_mode_wq_lock); 2049 2050 __nicvf_set_rx_mode_task(mode, mc, nic); 2051 } 2052 2053 static void nicvf_set_rx_mode(struct net_device *netdev) 2054 { 2055 struct nicvf *nic = netdev_priv(netdev); 2056 struct netdev_hw_addr *ha; 2057 struct xcast_addr_list *mc_list = NULL; 2058 u8 mode = 0; 2059 2060 if (netdev->flags & IFF_PROMISC) { 2061 mode = BGX_XCAST_BCAST_ACCEPT | BGX_XCAST_MCAST_ACCEPT; 2062 } else { 2063 if (netdev->flags & IFF_BROADCAST) 2064 mode |= BGX_XCAST_BCAST_ACCEPT; 2065 2066 if (netdev->flags & IFF_ALLMULTI) { 2067 mode |= BGX_XCAST_MCAST_ACCEPT; 2068 } else if (netdev->flags & IFF_MULTICAST) { 2069 mode |= BGX_XCAST_MCAST_FILTER; 2070 /* here we need to copy mc addrs */ 2071 if (netdev_mc_count(netdev)) { 2072 mc_list = kmalloc(offsetof(typeof(*mc_list), 2073 mc[netdev_mc_count(netdev)]), 2074 GFP_ATOMIC); 2075 if (unlikely(!mc_list)) 2076 return; 2077 mc_list->count = 0; 2078 netdev_hw_addr_list_for_each(ha, &netdev->mc) { 2079 mc_list->mc[mc_list->count] = 2080 ether_addr_to_u64(ha->addr); 2081 mc_list->count++; 2082 } 2083 } 2084 } 2085 } 2086 spin_lock(&nic->rx_mode_wq_lock); 2087 kfree(nic->rx_mode_work.mc); 2088 nic->rx_mode_work.mc = mc_list; 2089 nic->rx_mode_work.mode = mode; 2090 queue_work(nic->nicvf_rx_mode_wq, &nic->rx_mode_work.work); 2091 spin_unlock(&nic->rx_mode_wq_lock); 2092 } 2093 2094 static const struct net_device_ops nicvf_netdev_ops = { 2095 .ndo_open = nicvf_open, 2096 .ndo_stop = nicvf_stop, 2097 .ndo_start_xmit = nicvf_xmit, 2098 .ndo_change_mtu = nicvf_change_mtu, 2099 .ndo_set_mac_address = nicvf_set_mac_address, 2100 .ndo_get_stats64 = nicvf_get_stats64, 2101 .ndo_tx_timeout = nicvf_tx_timeout, 2102 .ndo_fix_features = nicvf_fix_features, 2103 .ndo_set_features = nicvf_set_features, 2104 .ndo_bpf = nicvf_xdp, 2105 .ndo_do_ioctl = nicvf_ioctl, 2106 .ndo_set_rx_mode = nicvf_set_rx_mode, 2107 }; 2108 2109 static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 2110 { 2111 struct device *dev = &pdev->dev; 2112 struct net_device *netdev; 2113 struct nicvf *nic; 2114 int err, qcount; 2115 u16 sdevid; 2116 struct cavium_ptp *ptp_clock; 2117 2118 ptp_clock = cavium_ptp_get(); 2119 if (IS_ERR(ptp_clock)) { 2120 if (PTR_ERR(ptp_clock) == -ENODEV) 2121 /* In virtualized environment we proceed without ptp */ 2122 ptp_clock = NULL; 2123 else 2124 return PTR_ERR(ptp_clock); 2125 } 2126 2127 err = pci_enable_device(pdev); 2128 if (err) { 2129 dev_err(dev, "Failed to enable PCI device\n"); 2130 return err; 2131 } 2132 2133 err = pci_request_regions(pdev, DRV_NAME); 2134 if (err) { 2135 dev_err(dev, "PCI request regions failed 0x%x\n", err); 2136 goto err_disable_device; 2137 } 2138 2139 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(48)); 2140 if (err) { 2141 dev_err(dev, "Unable to get usable DMA configuration\n"); 2142 goto err_release_regions; 2143 } 2144 2145 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(48)); 2146 if (err) { 2147 dev_err(dev, "unable to get 48-bit DMA for consistent allocations\n"); 2148 goto err_release_regions; 2149 } 2150 2151 qcount = netif_get_num_default_rss_queues(); 2152 2153 /* Restrict multiqset support only for host bound VFs */ 2154 if (pdev->is_virtfn) { 2155 /* Set max number of queues per VF */ 2156 qcount = min_t(int, num_online_cpus(), 2157 (MAX_SQS_PER_VF + 1) * MAX_CMP_QUEUES_PER_QS); 2158 } 2159 2160 netdev = alloc_etherdev_mqs(sizeof(struct nicvf), qcount, qcount); 2161 if (!netdev) { 2162 err = -ENOMEM; 2163 goto err_release_regions; 2164 } 2165 2166 pci_set_drvdata(pdev, netdev); 2167 2168 SET_NETDEV_DEV(netdev, &pdev->dev); 2169 2170 nic = netdev_priv(netdev); 2171 nic->netdev = netdev; 2172 nic->pdev = pdev; 2173 nic->pnicvf = nic; 2174 nic->max_queues = qcount; 2175 /* If no of CPUs are too low, there won't be any queues left 2176 * for XDP_TX, hence double it. 2177 */ 2178 if (!nic->t88) 2179 nic->max_queues *= 2; 2180 nic->ptp_clock = ptp_clock; 2181 2182 /* MAP VF's configuration registers */ 2183 nic->reg_base = pcim_iomap(pdev, PCI_CFG_REG_BAR_NUM, 0); 2184 if (!nic->reg_base) { 2185 dev_err(dev, "Cannot map config register space, aborting\n"); 2186 err = -ENOMEM; 2187 goto err_free_netdev; 2188 } 2189 2190 nic->drv_stats = netdev_alloc_pcpu_stats(struct nicvf_drv_stats); 2191 if (!nic->drv_stats) { 2192 err = -ENOMEM; 2193 goto err_free_netdev; 2194 } 2195 2196 err = nicvf_set_qset_resources(nic); 2197 if (err) 2198 goto err_free_netdev; 2199 2200 /* Check if PF is alive and get MAC address for this VF */ 2201 err = nicvf_register_misc_interrupt(nic); 2202 if (err) 2203 goto err_free_netdev; 2204 2205 nicvf_send_vf_struct(nic); 2206 2207 if (!pass1_silicon(nic->pdev)) 2208 nic->hw_tso = true; 2209 2210 /* Get iommu domain for iova to physical addr conversion */ 2211 nic->iommu_domain = iommu_get_domain_for_dev(dev); 2212 2213 pci_read_config_word(nic->pdev, PCI_SUBSYSTEM_ID, &sdevid); 2214 if (sdevid == 0xA134) 2215 nic->t88 = true; 2216 2217 /* Check if this VF is in QS only mode */ 2218 if (nic->sqs_mode) 2219 return 0; 2220 2221 err = nicvf_set_real_num_queues(netdev, nic->tx_queues, nic->rx_queues); 2222 if (err) 2223 goto err_unregister_interrupts; 2224 2225 netdev->hw_features = (NETIF_F_RXCSUM | NETIF_F_SG | 2226 NETIF_F_TSO | NETIF_F_GRO | NETIF_F_TSO6 | 2227 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | 2228 NETIF_F_HW_VLAN_CTAG_RX); 2229 2230 netdev->hw_features |= NETIF_F_RXHASH; 2231 2232 netdev->features |= netdev->hw_features; 2233 netdev->hw_features |= NETIF_F_LOOPBACK; 2234 2235 netdev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | 2236 NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO6; 2237 2238 netdev->netdev_ops = &nicvf_netdev_ops; 2239 netdev->watchdog_timeo = NICVF_TX_TIMEOUT; 2240 2241 /* MTU range: 64 - 9200 */ 2242 netdev->min_mtu = NIC_HW_MIN_FRS; 2243 netdev->max_mtu = NIC_HW_MAX_FRS; 2244 2245 INIT_WORK(&nic->reset_task, nicvf_reset_task); 2246 2247 nic->nicvf_rx_mode_wq = alloc_ordered_workqueue("nicvf_rx_mode_wq_VF%d", 2248 WQ_MEM_RECLAIM, 2249 nic->vf_id); 2250 if (!nic->nicvf_rx_mode_wq) { 2251 err = -ENOMEM; 2252 dev_err(dev, "Failed to allocate work queue\n"); 2253 goto err_unregister_interrupts; 2254 } 2255 2256 INIT_WORK(&nic->rx_mode_work.work, nicvf_set_rx_mode_task); 2257 spin_lock_init(&nic->rx_mode_wq_lock); 2258 mutex_init(&nic->rx_mode_mtx); 2259 2260 err = register_netdev(netdev); 2261 if (err) { 2262 dev_err(dev, "Failed to register netdevice\n"); 2263 goto err_unregister_interrupts; 2264 } 2265 2266 nic->msg_enable = debug; 2267 2268 nicvf_set_ethtool_ops(netdev); 2269 2270 return 0; 2271 2272 err_unregister_interrupts: 2273 nicvf_unregister_interrupts(nic); 2274 err_free_netdev: 2275 pci_set_drvdata(pdev, NULL); 2276 if (nic->drv_stats) 2277 free_percpu(nic->drv_stats); 2278 free_netdev(netdev); 2279 err_release_regions: 2280 pci_release_regions(pdev); 2281 err_disable_device: 2282 pci_disable_device(pdev); 2283 return err; 2284 } 2285 2286 static void nicvf_remove(struct pci_dev *pdev) 2287 { 2288 struct net_device *netdev = pci_get_drvdata(pdev); 2289 struct nicvf *nic; 2290 struct net_device *pnetdev; 2291 2292 if (!netdev) 2293 return; 2294 2295 nic = netdev_priv(netdev); 2296 pnetdev = nic->pnicvf->netdev; 2297 2298 /* Check if this Qset is assigned to different VF. 2299 * If yes, clean primary and all secondary Qsets. 2300 */ 2301 if (pnetdev && (pnetdev->reg_state == NETREG_REGISTERED)) 2302 unregister_netdev(pnetdev); 2303 if (nic->nicvf_rx_mode_wq) { 2304 destroy_workqueue(nic->nicvf_rx_mode_wq); 2305 nic->nicvf_rx_mode_wq = NULL; 2306 } 2307 nicvf_unregister_interrupts(nic); 2308 pci_set_drvdata(pdev, NULL); 2309 if (nic->drv_stats) 2310 free_percpu(nic->drv_stats); 2311 cavium_ptp_put(nic->ptp_clock); 2312 free_netdev(netdev); 2313 pci_release_regions(pdev); 2314 pci_disable_device(pdev); 2315 } 2316 2317 static void nicvf_shutdown(struct pci_dev *pdev) 2318 { 2319 nicvf_remove(pdev); 2320 } 2321 2322 static struct pci_driver nicvf_driver = { 2323 .name = DRV_NAME, 2324 .id_table = nicvf_id_table, 2325 .probe = nicvf_probe, 2326 .remove = nicvf_remove, 2327 .shutdown = nicvf_shutdown, 2328 }; 2329 2330 static int __init nicvf_init_module(void) 2331 { 2332 pr_info("%s, ver %s\n", DRV_NAME, DRV_VERSION); 2333 return pci_register_driver(&nicvf_driver); 2334 } 2335 2336 static void __exit nicvf_cleanup_module(void) 2337 { 2338 pci_unregister_driver(&nicvf_driver); 2339 } 2340 2341 module_init(nicvf_init_module); 2342 module_exit(nicvf_cleanup_module); 2343