1 /* 2 * Copyright (C) 2015 Cavium, Inc. 3 * 4 * This program is free software; you can redistribute it and/or modify it 5 * under the terms of version 2 of the GNU General Public License 6 * as published by the Free Software Foundation. 7 */ 8 9 #include <linux/module.h> 10 #include <linux/interrupt.h> 11 #include <linux/pci.h> 12 #include <linux/netdevice.h> 13 #include <linux/if_vlan.h> 14 #include <linux/etherdevice.h> 15 #include <linux/ethtool.h> 16 #include <linux/log2.h> 17 #include <linux/prefetch.h> 18 #include <linux/irq.h> 19 #include <linux/iommu.h> 20 #include <linux/bpf.h> 21 #include <linux/bpf_trace.h> 22 #include <linux/filter.h> 23 #include <linux/net_tstamp.h> 24 25 #include "nic_reg.h" 26 #include "nic.h" 27 #include "nicvf_queues.h" 28 #include "thunder_bgx.h" 29 #include "../common/cavium_ptp.h" 30 31 #define DRV_NAME "nicvf" 32 #define DRV_VERSION "1.0" 33 34 /* Supported devices */ 35 static const struct pci_device_id nicvf_id_table[] = { 36 { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, 37 PCI_DEVICE_ID_THUNDER_NIC_VF, 38 PCI_VENDOR_ID_CAVIUM, 39 PCI_SUBSYS_DEVID_88XX_NIC_VF) }, 40 { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, 41 PCI_DEVICE_ID_THUNDER_PASS1_NIC_VF, 42 PCI_VENDOR_ID_CAVIUM, 43 PCI_SUBSYS_DEVID_88XX_PASS1_NIC_VF) }, 44 { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, 45 PCI_DEVICE_ID_THUNDER_NIC_VF, 46 PCI_VENDOR_ID_CAVIUM, 47 PCI_SUBSYS_DEVID_81XX_NIC_VF) }, 48 { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, 49 PCI_DEVICE_ID_THUNDER_NIC_VF, 50 PCI_VENDOR_ID_CAVIUM, 51 PCI_SUBSYS_DEVID_83XX_NIC_VF) }, 52 { 0, } /* end of table */ 53 }; 54 55 MODULE_AUTHOR("Sunil Goutham"); 56 MODULE_DESCRIPTION("Cavium Thunder NIC Virtual Function Driver"); 57 MODULE_LICENSE("GPL v2"); 58 MODULE_VERSION(DRV_VERSION); 59 MODULE_DEVICE_TABLE(pci, nicvf_id_table); 60 61 static int debug = 0x00; 62 module_param(debug, int, 0644); 63 MODULE_PARM_DESC(debug, "Debug message level bitmap"); 64 65 static int cpi_alg = CPI_ALG_NONE; 66 module_param(cpi_alg, int, S_IRUGO); 67 MODULE_PARM_DESC(cpi_alg, 68 "PFC algorithm (0=none, 1=VLAN, 2=VLAN16, 3=IP Diffserv)"); 69 70 struct nicvf_xdp_tx { 71 u64 dma_addr; 72 u8 qidx; 73 }; 74 75 static inline u8 nicvf_netdev_qidx(struct nicvf *nic, u8 qidx) 76 { 77 if (nic->sqs_mode) 78 return qidx + ((nic->sqs_id + 1) * MAX_CMP_QUEUES_PER_QS); 79 else 80 return qidx; 81 } 82 83 /* The Cavium ThunderX network controller can *only* be found in SoCs 84 * containing the ThunderX ARM64 CPU implementation. All accesses to the device 85 * registers on this platform are implicitly strongly ordered with respect 86 * to memory accesses. So writeq_relaxed() and readq_relaxed() are safe to use 87 * with no memory barriers in this driver. The readq()/writeq() functions add 88 * explicit ordering operation which in this case are redundant, and only 89 * add overhead. 90 */ 91 92 /* Register read/write APIs */ 93 void nicvf_reg_write(struct nicvf *nic, u64 offset, u64 val) 94 { 95 writeq_relaxed(val, nic->reg_base + offset); 96 } 97 98 u64 nicvf_reg_read(struct nicvf *nic, u64 offset) 99 { 100 return readq_relaxed(nic->reg_base + offset); 101 } 102 103 void nicvf_queue_reg_write(struct nicvf *nic, u64 offset, 104 u64 qidx, u64 val) 105 { 106 void __iomem *addr = nic->reg_base + offset; 107 108 writeq_relaxed(val, addr + (qidx << NIC_Q_NUM_SHIFT)); 109 } 110 111 u64 nicvf_queue_reg_read(struct nicvf *nic, u64 offset, u64 qidx) 112 { 113 void __iomem *addr = nic->reg_base + offset; 114 115 return readq_relaxed(addr + (qidx << NIC_Q_NUM_SHIFT)); 116 } 117 118 /* VF -> PF mailbox communication */ 119 static void nicvf_write_to_mbx(struct nicvf *nic, union nic_mbx *mbx) 120 { 121 u64 *msg = (u64 *)mbx; 122 123 nicvf_reg_write(nic, NIC_VF_PF_MAILBOX_0_1 + 0, msg[0]); 124 nicvf_reg_write(nic, NIC_VF_PF_MAILBOX_0_1 + 8, msg[1]); 125 } 126 127 int nicvf_send_msg_to_pf(struct nicvf *nic, union nic_mbx *mbx) 128 { 129 int timeout = NIC_MBOX_MSG_TIMEOUT; 130 int sleep = 10; 131 132 nic->pf_acked = false; 133 nic->pf_nacked = false; 134 135 nicvf_write_to_mbx(nic, mbx); 136 137 /* Wait for previous message to be acked, timeout 2sec */ 138 while (!nic->pf_acked) { 139 if (nic->pf_nacked) { 140 netdev_err(nic->netdev, 141 "PF NACK to mbox msg 0x%02x from VF%d\n", 142 (mbx->msg.msg & 0xFF), nic->vf_id); 143 return -EINVAL; 144 } 145 msleep(sleep); 146 if (nic->pf_acked) 147 break; 148 timeout -= sleep; 149 if (!timeout) { 150 netdev_err(nic->netdev, 151 "PF didn't ACK to mbox msg 0x%02x from VF%d\n", 152 (mbx->msg.msg & 0xFF), nic->vf_id); 153 return -EBUSY; 154 } 155 } 156 return 0; 157 } 158 159 /* Checks if VF is able to comminicate with PF 160 * and also gets the VNIC number this VF is associated to. 161 */ 162 static int nicvf_check_pf_ready(struct nicvf *nic) 163 { 164 union nic_mbx mbx = {}; 165 166 mbx.msg.msg = NIC_MBOX_MSG_READY; 167 if (nicvf_send_msg_to_pf(nic, &mbx)) { 168 netdev_err(nic->netdev, 169 "PF didn't respond to READY msg\n"); 170 return 0; 171 } 172 173 return 1; 174 } 175 176 static void nicvf_read_bgx_stats(struct nicvf *nic, struct bgx_stats_msg *bgx) 177 { 178 if (bgx->rx) 179 nic->bgx_stats.rx_stats[bgx->idx] = bgx->stats; 180 else 181 nic->bgx_stats.tx_stats[bgx->idx] = bgx->stats; 182 } 183 184 static void nicvf_handle_mbx_intr(struct nicvf *nic) 185 { 186 union nic_mbx mbx = {}; 187 u64 *mbx_data; 188 u64 mbx_addr; 189 int i; 190 191 mbx_addr = NIC_VF_PF_MAILBOX_0_1; 192 mbx_data = (u64 *)&mbx; 193 194 for (i = 0; i < NIC_PF_VF_MAILBOX_SIZE; i++) { 195 *mbx_data = nicvf_reg_read(nic, mbx_addr); 196 mbx_data++; 197 mbx_addr += sizeof(u64); 198 } 199 200 netdev_dbg(nic->netdev, "Mbox message: msg: 0x%x\n", mbx.msg.msg); 201 switch (mbx.msg.msg) { 202 case NIC_MBOX_MSG_READY: 203 nic->pf_acked = true; 204 nic->vf_id = mbx.nic_cfg.vf_id & 0x7F; 205 nic->tns_mode = mbx.nic_cfg.tns_mode & 0x7F; 206 nic->node = mbx.nic_cfg.node_id; 207 if (!nic->set_mac_pending) 208 ether_addr_copy(nic->netdev->dev_addr, 209 mbx.nic_cfg.mac_addr); 210 nic->sqs_mode = mbx.nic_cfg.sqs_mode; 211 nic->loopback_supported = mbx.nic_cfg.loopback_supported; 212 nic->link_up = false; 213 nic->duplex = 0; 214 nic->speed = 0; 215 break; 216 case NIC_MBOX_MSG_ACK: 217 nic->pf_acked = true; 218 break; 219 case NIC_MBOX_MSG_NACK: 220 nic->pf_nacked = true; 221 break; 222 case NIC_MBOX_MSG_RSS_SIZE: 223 nic->rss_info.rss_size = mbx.rss_size.ind_tbl_size; 224 nic->pf_acked = true; 225 break; 226 case NIC_MBOX_MSG_BGX_STATS: 227 nicvf_read_bgx_stats(nic, &mbx.bgx_stats); 228 nic->pf_acked = true; 229 break; 230 case NIC_MBOX_MSG_BGX_LINK_CHANGE: 231 nic->pf_acked = true; 232 nic->link_up = mbx.link_status.link_up; 233 nic->duplex = mbx.link_status.duplex; 234 nic->speed = mbx.link_status.speed; 235 nic->mac_type = mbx.link_status.mac_type; 236 if (nic->link_up) { 237 netdev_info(nic->netdev, "Link is Up %d Mbps %s duplex\n", 238 nic->speed, 239 nic->duplex == DUPLEX_FULL ? 240 "Full" : "Half"); 241 netif_carrier_on(nic->netdev); 242 netif_tx_start_all_queues(nic->netdev); 243 } else { 244 netdev_info(nic->netdev, "Link is Down\n"); 245 netif_carrier_off(nic->netdev); 246 netif_tx_stop_all_queues(nic->netdev); 247 } 248 break; 249 case NIC_MBOX_MSG_ALLOC_SQS: 250 nic->sqs_count = mbx.sqs_alloc.qs_count; 251 nic->pf_acked = true; 252 break; 253 case NIC_MBOX_MSG_SNICVF_PTR: 254 /* Primary VF: make note of secondary VF's pointer 255 * to be used while packet transmission. 256 */ 257 nic->snicvf[mbx.nicvf.sqs_id] = 258 (struct nicvf *)mbx.nicvf.nicvf; 259 nic->pf_acked = true; 260 break; 261 case NIC_MBOX_MSG_PNICVF_PTR: 262 /* Secondary VF/Qset: make note of primary VF's pointer 263 * to be used while packet reception, to handover packet 264 * to primary VF's netdev. 265 */ 266 nic->pnicvf = (struct nicvf *)mbx.nicvf.nicvf; 267 nic->pf_acked = true; 268 break; 269 case NIC_MBOX_MSG_PFC: 270 nic->pfc.autoneg = mbx.pfc.autoneg; 271 nic->pfc.fc_rx = mbx.pfc.fc_rx; 272 nic->pfc.fc_tx = mbx.pfc.fc_tx; 273 nic->pf_acked = true; 274 break; 275 default: 276 netdev_err(nic->netdev, 277 "Invalid message from PF, msg 0x%x\n", mbx.msg.msg); 278 break; 279 } 280 nicvf_clear_intr(nic, NICVF_INTR_MBOX, 0); 281 } 282 283 static int nicvf_hw_set_mac_addr(struct nicvf *nic, struct net_device *netdev) 284 { 285 union nic_mbx mbx = {}; 286 287 mbx.mac.msg = NIC_MBOX_MSG_SET_MAC; 288 mbx.mac.vf_id = nic->vf_id; 289 ether_addr_copy(mbx.mac.mac_addr, netdev->dev_addr); 290 291 return nicvf_send_msg_to_pf(nic, &mbx); 292 } 293 294 static void nicvf_config_cpi(struct nicvf *nic) 295 { 296 union nic_mbx mbx = {}; 297 298 mbx.cpi_cfg.msg = NIC_MBOX_MSG_CPI_CFG; 299 mbx.cpi_cfg.vf_id = nic->vf_id; 300 mbx.cpi_cfg.cpi_alg = nic->cpi_alg; 301 mbx.cpi_cfg.rq_cnt = nic->qs->rq_cnt; 302 303 nicvf_send_msg_to_pf(nic, &mbx); 304 } 305 306 static void nicvf_get_rss_size(struct nicvf *nic) 307 { 308 union nic_mbx mbx = {}; 309 310 mbx.rss_size.msg = NIC_MBOX_MSG_RSS_SIZE; 311 mbx.rss_size.vf_id = nic->vf_id; 312 nicvf_send_msg_to_pf(nic, &mbx); 313 } 314 315 void nicvf_config_rss(struct nicvf *nic) 316 { 317 union nic_mbx mbx = {}; 318 struct nicvf_rss_info *rss = &nic->rss_info; 319 int ind_tbl_len = rss->rss_size; 320 int i, nextq = 0; 321 322 mbx.rss_cfg.vf_id = nic->vf_id; 323 mbx.rss_cfg.hash_bits = rss->hash_bits; 324 while (ind_tbl_len) { 325 mbx.rss_cfg.tbl_offset = nextq; 326 mbx.rss_cfg.tbl_len = min(ind_tbl_len, 327 RSS_IND_TBL_LEN_PER_MBX_MSG); 328 mbx.rss_cfg.msg = mbx.rss_cfg.tbl_offset ? 329 NIC_MBOX_MSG_RSS_CFG_CONT : NIC_MBOX_MSG_RSS_CFG; 330 331 for (i = 0; i < mbx.rss_cfg.tbl_len; i++) 332 mbx.rss_cfg.ind_tbl[i] = rss->ind_tbl[nextq++]; 333 334 nicvf_send_msg_to_pf(nic, &mbx); 335 336 ind_tbl_len -= mbx.rss_cfg.tbl_len; 337 } 338 } 339 340 void nicvf_set_rss_key(struct nicvf *nic) 341 { 342 struct nicvf_rss_info *rss = &nic->rss_info; 343 u64 key_addr = NIC_VNIC_RSS_KEY_0_4; 344 int idx; 345 346 for (idx = 0; idx < RSS_HASH_KEY_SIZE; idx++) { 347 nicvf_reg_write(nic, key_addr, rss->key[idx]); 348 key_addr += sizeof(u64); 349 } 350 } 351 352 static int nicvf_rss_init(struct nicvf *nic) 353 { 354 struct nicvf_rss_info *rss = &nic->rss_info; 355 int idx; 356 357 nicvf_get_rss_size(nic); 358 359 if (cpi_alg != CPI_ALG_NONE) { 360 rss->enable = false; 361 rss->hash_bits = 0; 362 return 0; 363 } 364 365 rss->enable = true; 366 367 netdev_rss_key_fill(rss->key, RSS_HASH_KEY_SIZE * sizeof(u64)); 368 nicvf_set_rss_key(nic); 369 370 rss->cfg = RSS_IP_HASH_ENA | RSS_TCP_HASH_ENA | RSS_UDP_HASH_ENA; 371 nicvf_reg_write(nic, NIC_VNIC_RSS_CFG, rss->cfg); 372 373 rss->hash_bits = ilog2(rounddown_pow_of_two(rss->rss_size)); 374 375 for (idx = 0; idx < rss->rss_size; idx++) 376 rss->ind_tbl[idx] = ethtool_rxfh_indir_default(idx, 377 nic->rx_queues); 378 nicvf_config_rss(nic); 379 return 1; 380 } 381 382 /* Request PF to allocate additional Qsets */ 383 static void nicvf_request_sqs(struct nicvf *nic) 384 { 385 union nic_mbx mbx = {}; 386 int sqs; 387 int sqs_count = nic->sqs_count; 388 int rx_queues = 0, tx_queues = 0; 389 390 /* Only primary VF should request */ 391 if (nic->sqs_mode || !nic->sqs_count) 392 return; 393 394 mbx.sqs_alloc.msg = NIC_MBOX_MSG_ALLOC_SQS; 395 mbx.sqs_alloc.vf_id = nic->vf_id; 396 mbx.sqs_alloc.qs_count = nic->sqs_count; 397 if (nicvf_send_msg_to_pf(nic, &mbx)) { 398 /* No response from PF */ 399 nic->sqs_count = 0; 400 return; 401 } 402 403 /* Return if no Secondary Qsets available */ 404 if (!nic->sqs_count) 405 return; 406 407 if (nic->rx_queues > MAX_RCV_QUEUES_PER_QS) 408 rx_queues = nic->rx_queues - MAX_RCV_QUEUES_PER_QS; 409 410 tx_queues = nic->tx_queues + nic->xdp_tx_queues; 411 if (tx_queues > MAX_SND_QUEUES_PER_QS) 412 tx_queues = tx_queues - MAX_SND_QUEUES_PER_QS; 413 414 /* Set no of Rx/Tx queues in each of the SQsets */ 415 for (sqs = 0; sqs < nic->sqs_count; sqs++) { 416 mbx.nicvf.msg = NIC_MBOX_MSG_SNICVF_PTR; 417 mbx.nicvf.vf_id = nic->vf_id; 418 mbx.nicvf.sqs_id = sqs; 419 nicvf_send_msg_to_pf(nic, &mbx); 420 421 nic->snicvf[sqs]->sqs_id = sqs; 422 if (rx_queues > MAX_RCV_QUEUES_PER_QS) { 423 nic->snicvf[sqs]->qs->rq_cnt = MAX_RCV_QUEUES_PER_QS; 424 rx_queues -= MAX_RCV_QUEUES_PER_QS; 425 } else { 426 nic->snicvf[sqs]->qs->rq_cnt = rx_queues; 427 rx_queues = 0; 428 } 429 430 if (tx_queues > MAX_SND_QUEUES_PER_QS) { 431 nic->snicvf[sqs]->qs->sq_cnt = MAX_SND_QUEUES_PER_QS; 432 tx_queues -= MAX_SND_QUEUES_PER_QS; 433 } else { 434 nic->snicvf[sqs]->qs->sq_cnt = tx_queues; 435 tx_queues = 0; 436 } 437 438 nic->snicvf[sqs]->qs->cq_cnt = 439 max(nic->snicvf[sqs]->qs->rq_cnt, nic->snicvf[sqs]->qs->sq_cnt); 440 441 /* Initialize secondary Qset's queues and its interrupts */ 442 nicvf_open(nic->snicvf[sqs]->netdev); 443 } 444 445 /* Update stack with actual Rx/Tx queue count allocated */ 446 if (sqs_count != nic->sqs_count) 447 nicvf_set_real_num_queues(nic->netdev, 448 nic->tx_queues, nic->rx_queues); 449 } 450 451 /* Send this Qset's nicvf pointer to PF. 452 * PF inturn sends primary VF's nicvf struct to secondary Qsets/VFs 453 * so that packets received by these Qsets can use primary VF's netdev 454 */ 455 static void nicvf_send_vf_struct(struct nicvf *nic) 456 { 457 union nic_mbx mbx = {}; 458 459 mbx.nicvf.msg = NIC_MBOX_MSG_NICVF_PTR; 460 mbx.nicvf.sqs_mode = nic->sqs_mode; 461 mbx.nicvf.nicvf = (u64)nic; 462 nicvf_send_msg_to_pf(nic, &mbx); 463 } 464 465 static void nicvf_get_primary_vf_struct(struct nicvf *nic) 466 { 467 union nic_mbx mbx = {}; 468 469 mbx.nicvf.msg = NIC_MBOX_MSG_PNICVF_PTR; 470 nicvf_send_msg_to_pf(nic, &mbx); 471 } 472 473 int nicvf_set_real_num_queues(struct net_device *netdev, 474 int tx_queues, int rx_queues) 475 { 476 int err = 0; 477 478 err = netif_set_real_num_tx_queues(netdev, tx_queues); 479 if (err) { 480 netdev_err(netdev, 481 "Failed to set no of Tx queues: %d\n", tx_queues); 482 return err; 483 } 484 485 err = netif_set_real_num_rx_queues(netdev, rx_queues); 486 if (err) 487 netdev_err(netdev, 488 "Failed to set no of Rx queues: %d\n", rx_queues); 489 return err; 490 } 491 492 static int nicvf_init_resources(struct nicvf *nic) 493 { 494 int err; 495 496 /* Enable Qset */ 497 nicvf_qset_config(nic, true); 498 499 /* Initialize queues and HW for data transfer */ 500 err = nicvf_config_data_transfer(nic, true); 501 if (err) { 502 netdev_err(nic->netdev, 503 "Failed to alloc/config VF's QSet resources\n"); 504 return err; 505 } 506 507 return 0; 508 } 509 510 static void nicvf_unmap_page(struct nicvf *nic, struct page *page, u64 dma_addr) 511 { 512 /* Check if it's a recycled page, if not unmap the DMA mapping. 513 * Recycled page holds an extra reference. 514 */ 515 if (page_ref_count(page) == 1) { 516 dma_addr &= PAGE_MASK; 517 dma_unmap_page_attrs(&nic->pdev->dev, dma_addr, 518 RCV_FRAG_LEN + XDP_HEADROOM, 519 DMA_FROM_DEVICE, 520 DMA_ATTR_SKIP_CPU_SYNC); 521 } 522 } 523 524 static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog, 525 struct cqe_rx_t *cqe_rx, struct snd_queue *sq, 526 struct rcv_queue *rq, struct sk_buff **skb) 527 { 528 struct xdp_buff xdp; 529 struct page *page; 530 struct nicvf_xdp_tx *xdp_tx = NULL; 531 u32 action; 532 u16 len, err, offset = 0; 533 u64 dma_addr, cpu_addr; 534 void *orig_data; 535 536 /* Retrieve packet buffer's DMA address and length */ 537 len = *((u16 *)((void *)cqe_rx + (3 * sizeof(u64)))); 538 dma_addr = *((u64 *)((void *)cqe_rx + (7 * sizeof(u64)))); 539 540 cpu_addr = nicvf_iova_to_phys(nic, dma_addr); 541 if (!cpu_addr) 542 return false; 543 cpu_addr = (u64)phys_to_virt(cpu_addr); 544 page = virt_to_page((void *)cpu_addr); 545 546 xdp.data_hard_start = page_address(page) + RCV_BUF_HEADROOM; 547 xdp.data = (void *)cpu_addr; 548 xdp_set_data_meta_invalid(&xdp); 549 xdp.data_end = xdp.data + len; 550 xdp.rxq = &rq->xdp_rxq; 551 orig_data = xdp.data; 552 553 rcu_read_lock(); 554 action = bpf_prog_run_xdp(prog, &xdp); 555 rcu_read_unlock(); 556 557 /* Check if XDP program has changed headers */ 558 if (orig_data != xdp.data) { 559 len = xdp.data_end - xdp.data; 560 offset = orig_data - xdp.data; 561 dma_addr -= offset; 562 } 563 564 switch (action) { 565 case XDP_PASS: 566 nicvf_unmap_page(nic, page, dma_addr); 567 568 /* Build SKB and pass on packet to network stack */ 569 *skb = build_skb(xdp.data, 570 RCV_FRAG_LEN - cqe_rx->align_pad + offset); 571 if (!*skb) 572 put_page(page); 573 else 574 skb_put(*skb, len); 575 return false; 576 case XDP_TX: 577 nicvf_xdp_sq_append_pkt(nic, sq, (u64)xdp.data, dma_addr, len); 578 return true; 579 case XDP_REDIRECT: 580 /* Save DMA address for use while transmitting */ 581 xdp_tx = (struct nicvf_xdp_tx *)page_address(page); 582 xdp_tx->dma_addr = dma_addr; 583 xdp_tx->qidx = nicvf_netdev_qidx(nic, cqe_rx->rq_idx); 584 585 err = xdp_do_redirect(nic->pnicvf->netdev, &xdp, prog); 586 if (!err) 587 return true; 588 589 /* Free the page on error */ 590 nicvf_unmap_page(nic, page, dma_addr); 591 put_page(page); 592 break; 593 default: 594 bpf_warn_invalid_xdp_action(action); 595 /* fall through */ 596 case XDP_ABORTED: 597 trace_xdp_exception(nic->netdev, prog, action); 598 /* fall through */ 599 case XDP_DROP: 600 nicvf_unmap_page(nic, page, dma_addr); 601 put_page(page); 602 return true; 603 } 604 return false; 605 } 606 607 static void nicvf_snd_ptp_handler(struct net_device *netdev, 608 struct cqe_send_t *cqe_tx) 609 { 610 struct nicvf *nic = netdev_priv(netdev); 611 struct skb_shared_hwtstamps ts; 612 u64 ns; 613 614 nic = nic->pnicvf; 615 616 /* Sync for 'ptp_skb' */ 617 smp_rmb(); 618 619 /* New timestamp request can be queued now */ 620 atomic_set(&nic->tx_ptp_skbs, 0); 621 622 /* Check for timestamp requested skb */ 623 if (!nic->ptp_skb) 624 return; 625 626 /* Check if timestamping is timedout, which is set to 10us */ 627 if (cqe_tx->send_status == CQ_TX_ERROP_TSTMP_TIMEOUT || 628 cqe_tx->send_status == CQ_TX_ERROP_TSTMP_CONFLICT) 629 goto no_tstamp; 630 631 /* Get the timestamp */ 632 memset(&ts, 0, sizeof(ts)); 633 ns = cavium_ptp_tstamp2time(nic->ptp_clock, cqe_tx->ptp_timestamp); 634 ts.hwtstamp = ns_to_ktime(ns); 635 skb_tstamp_tx(nic->ptp_skb, &ts); 636 637 no_tstamp: 638 /* Free the original skb */ 639 dev_kfree_skb_any(nic->ptp_skb); 640 nic->ptp_skb = NULL; 641 /* Sync 'ptp_skb' */ 642 smp_wmb(); 643 } 644 645 static void nicvf_snd_pkt_handler(struct net_device *netdev, 646 struct cqe_send_t *cqe_tx, 647 int budget, int *subdesc_cnt, 648 unsigned int *tx_pkts, unsigned int *tx_bytes) 649 { 650 struct sk_buff *skb = NULL; 651 struct page *page; 652 struct nicvf *nic = netdev_priv(netdev); 653 struct snd_queue *sq; 654 struct sq_hdr_subdesc *hdr; 655 struct sq_hdr_subdesc *tso_sqe; 656 657 sq = &nic->qs->sq[cqe_tx->sq_idx]; 658 659 hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, cqe_tx->sqe_ptr); 660 if (hdr->subdesc_type != SQ_DESC_TYPE_HEADER) 661 return; 662 663 /* Check for errors */ 664 if (cqe_tx->send_status) 665 nicvf_check_cqe_tx_errs(nic->pnicvf, cqe_tx); 666 667 /* Is this a XDP designated Tx queue */ 668 if (sq->is_xdp) { 669 page = (struct page *)sq->xdp_page[cqe_tx->sqe_ptr]; 670 /* Check if it's recycled page or else unmap DMA mapping */ 671 if (page && (page_ref_count(page) == 1)) 672 nicvf_unmap_sndq_buffers(nic, sq, cqe_tx->sqe_ptr, 673 hdr->subdesc_cnt); 674 675 /* Release page reference for recycling */ 676 if (page) 677 put_page(page); 678 sq->xdp_page[cqe_tx->sqe_ptr] = (u64)NULL; 679 *subdesc_cnt += hdr->subdesc_cnt + 1; 680 return; 681 } 682 683 skb = (struct sk_buff *)sq->skbuff[cqe_tx->sqe_ptr]; 684 if (skb) { 685 /* Check for dummy descriptor used for HW TSO offload on 88xx */ 686 if (hdr->dont_send) { 687 /* Get actual TSO descriptors and free them */ 688 tso_sqe = 689 (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, hdr->rsvd2); 690 nicvf_unmap_sndq_buffers(nic, sq, hdr->rsvd2, 691 tso_sqe->subdesc_cnt); 692 *subdesc_cnt += tso_sqe->subdesc_cnt + 1; 693 } else { 694 nicvf_unmap_sndq_buffers(nic, sq, cqe_tx->sqe_ptr, 695 hdr->subdesc_cnt); 696 } 697 *subdesc_cnt += hdr->subdesc_cnt + 1; 698 prefetch(skb); 699 (*tx_pkts)++; 700 *tx_bytes += skb->len; 701 /* If timestamp is requested for this skb, don't free it */ 702 if (skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS && 703 !nic->pnicvf->ptp_skb) 704 nic->pnicvf->ptp_skb = skb; 705 else 706 napi_consume_skb(skb, budget); 707 sq->skbuff[cqe_tx->sqe_ptr] = (u64)NULL; 708 } else { 709 /* In case of SW TSO on 88xx, only last segment will have 710 * a SKB attached, so just free SQEs here. 711 */ 712 if (!nic->hw_tso) 713 *subdesc_cnt += hdr->subdesc_cnt + 1; 714 } 715 } 716 717 static inline void nicvf_set_rxhash(struct net_device *netdev, 718 struct cqe_rx_t *cqe_rx, 719 struct sk_buff *skb) 720 { 721 u8 hash_type; 722 u32 hash; 723 724 if (!(netdev->features & NETIF_F_RXHASH)) 725 return; 726 727 switch (cqe_rx->rss_alg) { 728 case RSS_ALG_TCP_IP: 729 case RSS_ALG_UDP_IP: 730 hash_type = PKT_HASH_TYPE_L4; 731 hash = cqe_rx->rss_tag; 732 break; 733 case RSS_ALG_IP: 734 hash_type = PKT_HASH_TYPE_L3; 735 hash = cqe_rx->rss_tag; 736 break; 737 default: 738 hash_type = PKT_HASH_TYPE_NONE; 739 hash = 0; 740 } 741 742 skb_set_hash(skb, hash, hash_type); 743 } 744 745 static inline void nicvf_set_rxtstamp(struct nicvf *nic, struct sk_buff *skb) 746 { 747 u64 ns; 748 749 if (!nic->ptp_clock || !nic->hw_rx_tstamp) 750 return; 751 752 /* The first 8 bytes is the timestamp */ 753 ns = cavium_ptp_tstamp2time(nic->ptp_clock, 754 be64_to_cpu(*(__be64 *)skb->data)); 755 skb_hwtstamps(skb)->hwtstamp = ns_to_ktime(ns); 756 757 __skb_pull(skb, 8); 758 } 759 760 static void nicvf_rcv_pkt_handler(struct net_device *netdev, 761 struct napi_struct *napi, 762 struct cqe_rx_t *cqe_rx, 763 struct snd_queue *sq, struct rcv_queue *rq) 764 { 765 struct sk_buff *skb = NULL; 766 struct nicvf *nic = netdev_priv(netdev); 767 struct nicvf *snic = nic; 768 int err = 0; 769 int rq_idx; 770 771 rq_idx = nicvf_netdev_qidx(nic, cqe_rx->rq_idx); 772 773 if (nic->sqs_mode) { 774 /* Use primary VF's 'nicvf' struct */ 775 nic = nic->pnicvf; 776 netdev = nic->netdev; 777 } 778 779 /* Check for errors */ 780 if (cqe_rx->err_level || cqe_rx->err_opcode) { 781 err = nicvf_check_cqe_rx_errs(nic, cqe_rx); 782 if (err && !cqe_rx->rb_cnt) 783 return; 784 } 785 786 /* For XDP, ignore pkts spanning multiple pages */ 787 if (nic->xdp_prog && (cqe_rx->rb_cnt == 1)) { 788 /* Packet consumed by XDP */ 789 if (nicvf_xdp_rx(snic, nic->xdp_prog, cqe_rx, sq, rq, &skb)) 790 return; 791 } else { 792 skb = nicvf_get_rcv_skb(snic, cqe_rx, 793 nic->xdp_prog ? true : false); 794 } 795 796 if (!skb) 797 return; 798 799 if (netif_msg_pktdata(nic)) { 800 netdev_info(nic->netdev, "skb 0x%p, len=%d\n", skb, skb->len); 801 print_hex_dump(KERN_INFO, "", DUMP_PREFIX_OFFSET, 16, 1, 802 skb->data, skb->len, true); 803 } 804 805 /* If error packet, drop it here */ 806 if (err) { 807 dev_kfree_skb_any(skb); 808 return; 809 } 810 811 nicvf_set_rxtstamp(nic, skb); 812 nicvf_set_rxhash(netdev, cqe_rx, skb); 813 814 skb_record_rx_queue(skb, rq_idx); 815 if (netdev->hw_features & NETIF_F_RXCSUM) { 816 /* HW by default verifies TCP/UDP/SCTP checksums */ 817 skb->ip_summed = CHECKSUM_UNNECESSARY; 818 } else { 819 skb_checksum_none_assert(skb); 820 } 821 822 skb->protocol = eth_type_trans(skb, netdev); 823 824 /* Check for stripped VLAN */ 825 if (cqe_rx->vlan_found && cqe_rx->vlan_stripped) 826 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), 827 ntohs((__force __be16)cqe_rx->vlan_tci)); 828 829 if (napi && (netdev->features & NETIF_F_GRO)) 830 napi_gro_receive(napi, skb); 831 else 832 netif_receive_skb(skb); 833 } 834 835 static int nicvf_cq_intr_handler(struct net_device *netdev, u8 cq_idx, 836 struct napi_struct *napi, int budget) 837 { 838 int processed_cqe, work_done = 0, tx_done = 0; 839 int cqe_count, cqe_head; 840 int subdesc_cnt = 0; 841 struct nicvf *nic = netdev_priv(netdev); 842 struct queue_set *qs = nic->qs; 843 struct cmp_queue *cq = &qs->cq[cq_idx]; 844 struct cqe_rx_t *cq_desc; 845 struct netdev_queue *txq; 846 struct snd_queue *sq = &qs->sq[cq_idx]; 847 struct rcv_queue *rq = &qs->rq[cq_idx]; 848 unsigned int tx_pkts = 0, tx_bytes = 0, txq_idx; 849 850 spin_lock_bh(&cq->lock); 851 loop: 852 processed_cqe = 0; 853 /* Get no of valid CQ entries to process */ 854 cqe_count = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_STATUS, cq_idx); 855 cqe_count &= CQ_CQE_COUNT; 856 if (!cqe_count) 857 goto done; 858 859 /* Get head of the valid CQ entries */ 860 cqe_head = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_HEAD, cq_idx) >> 9; 861 cqe_head &= 0xFFFF; 862 863 while (processed_cqe < cqe_count) { 864 /* Get the CQ descriptor */ 865 cq_desc = (struct cqe_rx_t *)GET_CQ_DESC(cq, cqe_head); 866 cqe_head++; 867 cqe_head &= (cq->dmem.q_len - 1); 868 /* Initiate prefetch for next descriptor */ 869 prefetch((struct cqe_rx_t *)GET_CQ_DESC(cq, cqe_head)); 870 871 if ((work_done >= budget) && napi && 872 (cq_desc->cqe_type != CQE_TYPE_SEND)) { 873 break; 874 } 875 876 switch (cq_desc->cqe_type) { 877 case CQE_TYPE_RX: 878 nicvf_rcv_pkt_handler(netdev, napi, cq_desc, sq, rq); 879 work_done++; 880 break; 881 case CQE_TYPE_SEND: 882 nicvf_snd_pkt_handler(netdev, (void *)cq_desc, 883 budget, &subdesc_cnt, 884 &tx_pkts, &tx_bytes); 885 tx_done++; 886 break; 887 case CQE_TYPE_SEND_PTP: 888 nicvf_snd_ptp_handler(netdev, (void *)cq_desc); 889 break; 890 case CQE_TYPE_INVALID: 891 case CQE_TYPE_RX_SPLIT: 892 case CQE_TYPE_RX_TCP: 893 /* Ignore for now */ 894 break; 895 } 896 processed_cqe++; 897 } 898 899 /* Ring doorbell to inform H/W to reuse processed CQEs */ 900 nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_DOOR, 901 cq_idx, processed_cqe); 902 903 if ((work_done < budget) && napi) 904 goto loop; 905 906 done: 907 /* Update SQ's descriptor free count */ 908 if (subdesc_cnt) 909 nicvf_put_sq_desc(sq, subdesc_cnt); 910 911 txq_idx = nicvf_netdev_qidx(nic, cq_idx); 912 /* Handle XDP TX queues */ 913 if (nic->pnicvf->xdp_prog) { 914 if (txq_idx < nic->pnicvf->xdp_tx_queues) { 915 nicvf_xdp_sq_doorbell(nic, sq, cq_idx); 916 goto out; 917 } 918 nic = nic->pnicvf; 919 txq_idx -= nic->pnicvf->xdp_tx_queues; 920 } 921 922 /* Wakeup TXQ if its stopped earlier due to SQ full */ 923 if (tx_done || 924 (atomic_read(&sq->free_cnt) >= MIN_SQ_DESC_PER_PKT_XMIT)) { 925 netdev = nic->pnicvf->netdev; 926 txq = netdev_get_tx_queue(netdev, txq_idx); 927 if (tx_pkts) 928 netdev_tx_completed_queue(txq, tx_pkts, tx_bytes); 929 930 /* To read updated queue and carrier status */ 931 smp_mb(); 932 if (netif_tx_queue_stopped(txq) && netif_carrier_ok(netdev)) { 933 netif_tx_wake_queue(txq); 934 nic = nic->pnicvf; 935 this_cpu_inc(nic->drv_stats->txq_wake); 936 netif_warn(nic, tx_err, netdev, 937 "Transmit queue wakeup SQ%d\n", txq_idx); 938 } 939 } 940 941 out: 942 spin_unlock_bh(&cq->lock); 943 return work_done; 944 } 945 946 static int nicvf_poll(struct napi_struct *napi, int budget) 947 { 948 u64 cq_head; 949 int work_done = 0; 950 struct net_device *netdev = napi->dev; 951 struct nicvf *nic = netdev_priv(netdev); 952 struct nicvf_cq_poll *cq; 953 954 cq = container_of(napi, struct nicvf_cq_poll, napi); 955 work_done = nicvf_cq_intr_handler(netdev, cq->cq_idx, napi, budget); 956 957 if (work_done < budget) { 958 /* Slow packet rate, exit polling */ 959 napi_complete_done(napi, work_done); 960 /* Re-enable interrupts */ 961 cq_head = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_HEAD, 962 cq->cq_idx); 963 nicvf_clear_intr(nic, NICVF_INTR_CQ, cq->cq_idx); 964 nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_HEAD, 965 cq->cq_idx, cq_head); 966 nicvf_enable_intr(nic, NICVF_INTR_CQ, cq->cq_idx); 967 } 968 return work_done; 969 } 970 971 /* Qset error interrupt handler 972 * 973 * As of now only CQ errors are handled 974 */ 975 static void nicvf_handle_qs_err(unsigned long data) 976 { 977 struct nicvf *nic = (struct nicvf *)data; 978 struct queue_set *qs = nic->qs; 979 int qidx; 980 u64 status; 981 982 netif_tx_disable(nic->netdev); 983 984 /* Check if it is CQ err */ 985 for (qidx = 0; qidx < qs->cq_cnt; qidx++) { 986 status = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_STATUS, 987 qidx); 988 if (!(status & CQ_ERR_MASK)) 989 continue; 990 /* Process already queued CQEs and reconfig CQ */ 991 nicvf_disable_intr(nic, NICVF_INTR_CQ, qidx); 992 nicvf_sq_disable(nic, qidx); 993 nicvf_cq_intr_handler(nic->netdev, qidx, NULL, 0); 994 nicvf_cmp_queue_config(nic, qs, qidx, true); 995 nicvf_sq_free_used_descs(nic->netdev, &qs->sq[qidx], qidx); 996 nicvf_sq_enable(nic, &qs->sq[qidx], qidx); 997 998 nicvf_enable_intr(nic, NICVF_INTR_CQ, qidx); 999 } 1000 1001 netif_tx_start_all_queues(nic->netdev); 1002 /* Re-enable Qset error interrupt */ 1003 nicvf_enable_intr(nic, NICVF_INTR_QS_ERR, 0); 1004 } 1005 1006 static void nicvf_dump_intr_status(struct nicvf *nic) 1007 { 1008 netif_info(nic, intr, nic->netdev, "interrupt status 0x%llx\n", 1009 nicvf_reg_read(nic, NIC_VF_INT)); 1010 } 1011 1012 static irqreturn_t nicvf_misc_intr_handler(int irq, void *nicvf_irq) 1013 { 1014 struct nicvf *nic = (struct nicvf *)nicvf_irq; 1015 u64 intr; 1016 1017 nicvf_dump_intr_status(nic); 1018 1019 intr = nicvf_reg_read(nic, NIC_VF_INT); 1020 /* Check for spurious interrupt */ 1021 if (!(intr & NICVF_INTR_MBOX_MASK)) 1022 return IRQ_HANDLED; 1023 1024 nicvf_handle_mbx_intr(nic); 1025 1026 return IRQ_HANDLED; 1027 } 1028 1029 static irqreturn_t nicvf_intr_handler(int irq, void *cq_irq) 1030 { 1031 struct nicvf_cq_poll *cq_poll = (struct nicvf_cq_poll *)cq_irq; 1032 struct nicvf *nic = cq_poll->nicvf; 1033 int qidx = cq_poll->cq_idx; 1034 1035 nicvf_dump_intr_status(nic); 1036 1037 /* Disable interrupts */ 1038 nicvf_disable_intr(nic, NICVF_INTR_CQ, qidx); 1039 1040 /* Schedule NAPI */ 1041 napi_schedule_irqoff(&cq_poll->napi); 1042 1043 /* Clear interrupt */ 1044 nicvf_clear_intr(nic, NICVF_INTR_CQ, qidx); 1045 1046 return IRQ_HANDLED; 1047 } 1048 1049 static irqreturn_t nicvf_rbdr_intr_handler(int irq, void *nicvf_irq) 1050 { 1051 struct nicvf *nic = (struct nicvf *)nicvf_irq; 1052 u8 qidx; 1053 1054 1055 nicvf_dump_intr_status(nic); 1056 1057 /* Disable RBDR interrupt and schedule softirq */ 1058 for (qidx = 0; qidx < nic->qs->rbdr_cnt; qidx++) { 1059 if (!nicvf_is_intr_enabled(nic, NICVF_INTR_RBDR, qidx)) 1060 continue; 1061 nicvf_disable_intr(nic, NICVF_INTR_RBDR, qidx); 1062 tasklet_hi_schedule(&nic->rbdr_task); 1063 /* Clear interrupt */ 1064 nicvf_clear_intr(nic, NICVF_INTR_RBDR, qidx); 1065 } 1066 1067 return IRQ_HANDLED; 1068 } 1069 1070 static irqreturn_t nicvf_qs_err_intr_handler(int irq, void *nicvf_irq) 1071 { 1072 struct nicvf *nic = (struct nicvf *)nicvf_irq; 1073 1074 nicvf_dump_intr_status(nic); 1075 1076 /* Disable Qset err interrupt and schedule softirq */ 1077 nicvf_disable_intr(nic, NICVF_INTR_QS_ERR, 0); 1078 tasklet_hi_schedule(&nic->qs_err_task); 1079 nicvf_clear_intr(nic, NICVF_INTR_QS_ERR, 0); 1080 1081 return IRQ_HANDLED; 1082 } 1083 1084 static void nicvf_set_irq_affinity(struct nicvf *nic) 1085 { 1086 int vec, cpu; 1087 1088 for (vec = 0; vec < nic->num_vec; vec++) { 1089 if (!nic->irq_allocated[vec]) 1090 continue; 1091 1092 if (!zalloc_cpumask_var(&nic->affinity_mask[vec], GFP_KERNEL)) 1093 return; 1094 /* CQ interrupts */ 1095 if (vec < NICVF_INTR_ID_SQ) 1096 /* Leave CPU0 for RBDR and other interrupts */ 1097 cpu = nicvf_netdev_qidx(nic, vec) + 1; 1098 else 1099 cpu = 0; 1100 1101 cpumask_set_cpu(cpumask_local_spread(cpu, nic->node), 1102 nic->affinity_mask[vec]); 1103 irq_set_affinity_hint(pci_irq_vector(nic->pdev, vec), 1104 nic->affinity_mask[vec]); 1105 } 1106 } 1107 1108 static int nicvf_register_interrupts(struct nicvf *nic) 1109 { 1110 int irq, ret = 0; 1111 1112 for_each_cq_irq(irq) 1113 sprintf(nic->irq_name[irq], "%s-rxtx-%d", 1114 nic->pnicvf->netdev->name, 1115 nicvf_netdev_qidx(nic, irq)); 1116 1117 for_each_sq_irq(irq) 1118 sprintf(nic->irq_name[irq], "%s-sq-%d", 1119 nic->pnicvf->netdev->name, 1120 nicvf_netdev_qidx(nic, irq - NICVF_INTR_ID_SQ)); 1121 1122 for_each_rbdr_irq(irq) 1123 sprintf(nic->irq_name[irq], "%s-rbdr-%d", 1124 nic->pnicvf->netdev->name, 1125 nic->sqs_mode ? (nic->sqs_id + 1) : 0); 1126 1127 /* Register CQ interrupts */ 1128 for (irq = 0; irq < nic->qs->cq_cnt; irq++) { 1129 ret = request_irq(pci_irq_vector(nic->pdev, irq), 1130 nicvf_intr_handler, 1131 0, nic->irq_name[irq], nic->napi[irq]); 1132 if (ret) 1133 goto err; 1134 nic->irq_allocated[irq] = true; 1135 } 1136 1137 /* Register RBDR interrupt */ 1138 for (irq = NICVF_INTR_ID_RBDR; 1139 irq < (NICVF_INTR_ID_RBDR + nic->qs->rbdr_cnt); irq++) { 1140 ret = request_irq(pci_irq_vector(nic->pdev, irq), 1141 nicvf_rbdr_intr_handler, 1142 0, nic->irq_name[irq], nic); 1143 if (ret) 1144 goto err; 1145 nic->irq_allocated[irq] = true; 1146 } 1147 1148 /* Register QS error interrupt */ 1149 sprintf(nic->irq_name[NICVF_INTR_ID_QS_ERR], "%s-qset-err-%d", 1150 nic->pnicvf->netdev->name, 1151 nic->sqs_mode ? (nic->sqs_id + 1) : 0); 1152 irq = NICVF_INTR_ID_QS_ERR; 1153 ret = request_irq(pci_irq_vector(nic->pdev, irq), 1154 nicvf_qs_err_intr_handler, 1155 0, nic->irq_name[irq], nic); 1156 if (ret) 1157 goto err; 1158 1159 nic->irq_allocated[irq] = true; 1160 1161 /* Set IRQ affinities */ 1162 nicvf_set_irq_affinity(nic); 1163 1164 err: 1165 if (ret) 1166 netdev_err(nic->netdev, "request_irq failed, vector %d\n", irq); 1167 1168 return ret; 1169 } 1170 1171 static void nicvf_unregister_interrupts(struct nicvf *nic) 1172 { 1173 struct pci_dev *pdev = nic->pdev; 1174 int irq; 1175 1176 /* Free registered interrupts */ 1177 for (irq = 0; irq < nic->num_vec; irq++) { 1178 if (!nic->irq_allocated[irq]) 1179 continue; 1180 1181 irq_set_affinity_hint(pci_irq_vector(pdev, irq), NULL); 1182 free_cpumask_var(nic->affinity_mask[irq]); 1183 1184 if (irq < NICVF_INTR_ID_SQ) 1185 free_irq(pci_irq_vector(pdev, irq), nic->napi[irq]); 1186 else 1187 free_irq(pci_irq_vector(pdev, irq), nic); 1188 1189 nic->irq_allocated[irq] = false; 1190 } 1191 1192 /* Disable MSI-X */ 1193 pci_free_irq_vectors(pdev); 1194 nic->num_vec = 0; 1195 } 1196 1197 /* Initialize MSIX vectors and register MISC interrupt. 1198 * Send READY message to PF to check if its alive 1199 */ 1200 static int nicvf_register_misc_interrupt(struct nicvf *nic) 1201 { 1202 int ret = 0; 1203 int irq = NICVF_INTR_ID_MISC; 1204 1205 /* Return if mailbox interrupt is already registered */ 1206 if (nic->pdev->msix_enabled) 1207 return 0; 1208 1209 /* Enable MSI-X */ 1210 nic->num_vec = pci_msix_vec_count(nic->pdev); 1211 ret = pci_alloc_irq_vectors(nic->pdev, nic->num_vec, nic->num_vec, 1212 PCI_IRQ_MSIX); 1213 if (ret < 0) { 1214 netdev_err(nic->netdev, 1215 "Req for #%d msix vectors failed\n", nic->num_vec); 1216 return 1; 1217 } 1218 1219 sprintf(nic->irq_name[irq], "%s Mbox", "NICVF"); 1220 /* Register Misc interrupt */ 1221 ret = request_irq(pci_irq_vector(nic->pdev, irq), 1222 nicvf_misc_intr_handler, 0, nic->irq_name[irq], nic); 1223 1224 if (ret) 1225 return ret; 1226 nic->irq_allocated[irq] = true; 1227 1228 /* Enable mailbox interrupt */ 1229 nicvf_enable_intr(nic, NICVF_INTR_MBOX, 0); 1230 1231 /* Check if VF is able to communicate with PF */ 1232 if (!nicvf_check_pf_ready(nic)) { 1233 nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0); 1234 nicvf_unregister_interrupts(nic); 1235 return 1; 1236 } 1237 1238 return 0; 1239 } 1240 1241 static netdev_tx_t nicvf_xmit(struct sk_buff *skb, struct net_device *netdev) 1242 { 1243 struct nicvf *nic = netdev_priv(netdev); 1244 int qid = skb_get_queue_mapping(skb); 1245 struct netdev_queue *txq = netdev_get_tx_queue(netdev, qid); 1246 struct nicvf *snic; 1247 struct snd_queue *sq; 1248 int tmp; 1249 1250 /* Check for minimum packet length */ 1251 if (skb->len <= ETH_HLEN) { 1252 dev_kfree_skb(skb); 1253 return NETDEV_TX_OK; 1254 } 1255 1256 /* In XDP case, initial HW tx queues are used for XDP, 1257 * but stack's queue mapping starts at '0', so skip the 1258 * Tx queues attached to Rx queues for XDP. 1259 */ 1260 if (nic->xdp_prog) 1261 qid += nic->xdp_tx_queues; 1262 1263 snic = nic; 1264 /* Get secondary Qset's SQ structure */ 1265 if (qid >= MAX_SND_QUEUES_PER_QS) { 1266 tmp = qid / MAX_SND_QUEUES_PER_QS; 1267 snic = (struct nicvf *)nic->snicvf[tmp - 1]; 1268 if (!snic) { 1269 netdev_warn(nic->netdev, 1270 "Secondary Qset#%d's ptr not initialized\n", 1271 tmp - 1); 1272 dev_kfree_skb(skb); 1273 return NETDEV_TX_OK; 1274 } 1275 qid = qid % MAX_SND_QUEUES_PER_QS; 1276 } 1277 1278 sq = &snic->qs->sq[qid]; 1279 if (!netif_tx_queue_stopped(txq) && 1280 !nicvf_sq_append_skb(snic, sq, skb, qid)) { 1281 netif_tx_stop_queue(txq); 1282 1283 /* Barrier, so that stop_queue visible to other cpus */ 1284 smp_mb(); 1285 1286 /* Check again, incase another cpu freed descriptors */ 1287 if (atomic_read(&sq->free_cnt) > MIN_SQ_DESC_PER_PKT_XMIT) { 1288 netif_tx_wake_queue(txq); 1289 } else { 1290 this_cpu_inc(nic->drv_stats->txq_stop); 1291 netif_warn(nic, tx_err, netdev, 1292 "Transmit ring full, stopping SQ%d\n", qid); 1293 } 1294 return NETDEV_TX_BUSY; 1295 } 1296 1297 return NETDEV_TX_OK; 1298 } 1299 1300 static inline void nicvf_free_cq_poll(struct nicvf *nic) 1301 { 1302 struct nicvf_cq_poll *cq_poll; 1303 int qidx; 1304 1305 for (qidx = 0; qidx < nic->qs->cq_cnt; qidx++) { 1306 cq_poll = nic->napi[qidx]; 1307 if (!cq_poll) 1308 continue; 1309 nic->napi[qidx] = NULL; 1310 kfree(cq_poll); 1311 } 1312 } 1313 1314 int nicvf_stop(struct net_device *netdev) 1315 { 1316 int irq, qidx; 1317 struct nicvf *nic = netdev_priv(netdev); 1318 struct queue_set *qs = nic->qs; 1319 struct nicvf_cq_poll *cq_poll = NULL; 1320 union nic_mbx mbx = {}; 1321 1322 mbx.msg.msg = NIC_MBOX_MSG_SHUTDOWN; 1323 nicvf_send_msg_to_pf(nic, &mbx); 1324 1325 netif_carrier_off(netdev); 1326 netif_tx_stop_all_queues(nic->netdev); 1327 nic->link_up = false; 1328 1329 /* Teardown secondary qsets first */ 1330 if (!nic->sqs_mode) { 1331 for (qidx = 0; qidx < nic->sqs_count; qidx++) { 1332 if (!nic->snicvf[qidx]) 1333 continue; 1334 nicvf_stop(nic->snicvf[qidx]->netdev); 1335 nic->snicvf[qidx] = NULL; 1336 } 1337 } 1338 1339 /* Disable RBDR & QS error interrupts */ 1340 for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) { 1341 nicvf_disable_intr(nic, NICVF_INTR_RBDR, qidx); 1342 nicvf_clear_intr(nic, NICVF_INTR_RBDR, qidx); 1343 } 1344 nicvf_disable_intr(nic, NICVF_INTR_QS_ERR, 0); 1345 nicvf_clear_intr(nic, NICVF_INTR_QS_ERR, 0); 1346 1347 /* Wait for pending IRQ handlers to finish */ 1348 for (irq = 0; irq < nic->num_vec; irq++) 1349 synchronize_irq(pci_irq_vector(nic->pdev, irq)); 1350 1351 tasklet_kill(&nic->rbdr_task); 1352 tasklet_kill(&nic->qs_err_task); 1353 if (nic->rb_work_scheduled) 1354 cancel_delayed_work_sync(&nic->rbdr_work); 1355 1356 for (qidx = 0; qidx < nic->qs->cq_cnt; qidx++) { 1357 cq_poll = nic->napi[qidx]; 1358 if (!cq_poll) 1359 continue; 1360 napi_synchronize(&cq_poll->napi); 1361 /* CQ intr is enabled while napi_complete, 1362 * so disable it now 1363 */ 1364 nicvf_disable_intr(nic, NICVF_INTR_CQ, qidx); 1365 nicvf_clear_intr(nic, NICVF_INTR_CQ, qidx); 1366 napi_disable(&cq_poll->napi); 1367 netif_napi_del(&cq_poll->napi); 1368 } 1369 1370 netif_tx_disable(netdev); 1371 1372 for (qidx = 0; qidx < netdev->num_tx_queues; qidx++) 1373 netdev_tx_reset_queue(netdev_get_tx_queue(netdev, qidx)); 1374 1375 /* Free resources */ 1376 nicvf_config_data_transfer(nic, false); 1377 1378 /* Disable HW Qset */ 1379 nicvf_qset_config(nic, false); 1380 1381 /* disable mailbox interrupt */ 1382 nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0); 1383 1384 nicvf_unregister_interrupts(nic); 1385 1386 nicvf_free_cq_poll(nic); 1387 1388 /* Free any pending SKB saved to receive timestamp */ 1389 if (nic->ptp_skb) { 1390 dev_kfree_skb_any(nic->ptp_skb); 1391 nic->ptp_skb = NULL; 1392 } 1393 1394 /* Clear multiqset info */ 1395 nic->pnicvf = nic; 1396 1397 return 0; 1398 } 1399 1400 static int nicvf_config_hw_rx_tstamp(struct nicvf *nic, bool enable) 1401 { 1402 union nic_mbx mbx = {}; 1403 1404 mbx.ptp.msg = NIC_MBOX_MSG_PTP_CFG; 1405 mbx.ptp.enable = enable; 1406 1407 return nicvf_send_msg_to_pf(nic, &mbx); 1408 } 1409 1410 static int nicvf_update_hw_max_frs(struct nicvf *nic, int mtu) 1411 { 1412 union nic_mbx mbx = {}; 1413 1414 mbx.frs.msg = NIC_MBOX_MSG_SET_MAX_FRS; 1415 mbx.frs.max_frs = mtu; 1416 mbx.frs.vf_id = nic->vf_id; 1417 1418 return nicvf_send_msg_to_pf(nic, &mbx); 1419 } 1420 1421 int nicvf_open(struct net_device *netdev) 1422 { 1423 int cpu, err, qidx; 1424 struct nicvf *nic = netdev_priv(netdev); 1425 struct queue_set *qs = nic->qs; 1426 struct nicvf_cq_poll *cq_poll = NULL; 1427 union nic_mbx mbx = {}; 1428 1429 netif_carrier_off(netdev); 1430 1431 err = nicvf_register_misc_interrupt(nic); 1432 if (err) 1433 return err; 1434 1435 /* Register NAPI handler for processing CQEs */ 1436 for (qidx = 0; qidx < qs->cq_cnt; qidx++) { 1437 cq_poll = kzalloc(sizeof(*cq_poll), GFP_KERNEL); 1438 if (!cq_poll) { 1439 err = -ENOMEM; 1440 goto napi_del; 1441 } 1442 cq_poll->cq_idx = qidx; 1443 cq_poll->nicvf = nic; 1444 netif_napi_add(netdev, &cq_poll->napi, nicvf_poll, 1445 NAPI_POLL_WEIGHT); 1446 napi_enable(&cq_poll->napi); 1447 nic->napi[qidx] = cq_poll; 1448 } 1449 1450 /* Check if we got MAC address from PF or else generate a radom MAC */ 1451 if (!nic->sqs_mode && is_zero_ether_addr(netdev->dev_addr)) { 1452 eth_hw_addr_random(netdev); 1453 nicvf_hw_set_mac_addr(nic, netdev); 1454 } 1455 1456 if (nic->set_mac_pending) { 1457 nic->set_mac_pending = false; 1458 nicvf_hw_set_mac_addr(nic, netdev); 1459 } 1460 1461 /* Init tasklet for handling Qset err interrupt */ 1462 tasklet_init(&nic->qs_err_task, nicvf_handle_qs_err, 1463 (unsigned long)nic); 1464 1465 /* Init RBDR tasklet which will refill RBDR */ 1466 tasklet_init(&nic->rbdr_task, nicvf_rbdr_task, 1467 (unsigned long)nic); 1468 INIT_DELAYED_WORK(&nic->rbdr_work, nicvf_rbdr_work); 1469 1470 /* Configure CPI alorithm */ 1471 nic->cpi_alg = cpi_alg; 1472 if (!nic->sqs_mode) 1473 nicvf_config_cpi(nic); 1474 1475 nicvf_request_sqs(nic); 1476 if (nic->sqs_mode) 1477 nicvf_get_primary_vf_struct(nic); 1478 1479 /* Configure PTP timestamp */ 1480 if (nic->ptp_clock) 1481 nicvf_config_hw_rx_tstamp(nic, nic->hw_rx_tstamp); 1482 atomic_set(&nic->tx_ptp_skbs, 0); 1483 nic->ptp_skb = NULL; 1484 1485 /* Configure receive side scaling and MTU */ 1486 if (!nic->sqs_mode) { 1487 nicvf_rss_init(nic); 1488 err = nicvf_update_hw_max_frs(nic, netdev->mtu); 1489 if (err) 1490 goto cleanup; 1491 1492 /* Clear percpu stats */ 1493 for_each_possible_cpu(cpu) 1494 memset(per_cpu_ptr(nic->drv_stats, cpu), 0, 1495 sizeof(struct nicvf_drv_stats)); 1496 } 1497 1498 err = nicvf_register_interrupts(nic); 1499 if (err) 1500 goto cleanup; 1501 1502 /* Initialize the queues */ 1503 err = nicvf_init_resources(nic); 1504 if (err) 1505 goto cleanup; 1506 1507 /* Make sure queue initialization is written */ 1508 wmb(); 1509 1510 nicvf_reg_write(nic, NIC_VF_INT, -1); 1511 /* Enable Qset err interrupt */ 1512 nicvf_enable_intr(nic, NICVF_INTR_QS_ERR, 0); 1513 1514 /* Enable completion queue interrupt */ 1515 for (qidx = 0; qidx < qs->cq_cnt; qidx++) 1516 nicvf_enable_intr(nic, NICVF_INTR_CQ, qidx); 1517 1518 /* Enable RBDR threshold interrupt */ 1519 for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) 1520 nicvf_enable_intr(nic, NICVF_INTR_RBDR, qidx); 1521 1522 /* Send VF config done msg to PF */ 1523 mbx.msg.msg = NIC_MBOX_MSG_CFG_DONE; 1524 nicvf_write_to_mbx(nic, &mbx); 1525 1526 return 0; 1527 cleanup: 1528 nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0); 1529 nicvf_unregister_interrupts(nic); 1530 tasklet_kill(&nic->qs_err_task); 1531 tasklet_kill(&nic->rbdr_task); 1532 napi_del: 1533 for (qidx = 0; qidx < qs->cq_cnt; qidx++) { 1534 cq_poll = nic->napi[qidx]; 1535 if (!cq_poll) 1536 continue; 1537 napi_disable(&cq_poll->napi); 1538 netif_napi_del(&cq_poll->napi); 1539 } 1540 nicvf_free_cq_poll(nic); 1541 return err; 1542 } 1543 1544 static int nicvf_change_mtu(struct net_device *netdev, int new_mtu) 1545 { 1546 struct nicvf *nic = netdev_priv(netdev); 1547 int orig_mtu = netdev->mtu; 1548 1549 netdev->mtu = new_mtu; 1550 1551 if (!netif_running(netdev)) 1552 return 0; 1553 1554 if (nicvf_update_hw_max_frs(nic, new_mtu)) { 1555 netdev->mtu = orig_mtu; 1556 return -EINVAL; 1557 } 1558 1559 return 0; 1560 } 1561 1562 static int nicvf_set_mac_address(struct net_device *netdev, void *p) 1563 { 1564 struct sockaddr *addr = p; 1565 struct nicvf *nic = netdev_priv(netdev); 1566 1567 if (!is_valid_ether_addr(addr->sa_data)) 1568 return -EADDRNOTAVAIL; 1569 1570 memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); 1571 1572 if (nic->pdev->msix_enabled) { 1573 if (nicvf_hw_set_mac_addr(nic, netdev)) 1574 return -EBUSY; 1575 } else { 1576 nic->set_mac_pending = true; 1577 } 1578 1579 return 0; 1580 } 1581 1582 void nicvf_update_lmac_stats(struct nicvf *nic) 1583 { 1584 int stat = 0; 1585 union nic_mbx mbx = {}; 1586 1587 if (!netif_running(nic->netdev)) 1588 return; 1589 1590 mbx.bgx_stats.msg = NIC_MBOX_MSG_BGX_STATS; 1591 mbx.bgx_stats.vf_id = nic->vf_id; 1592 /* Rx stats */ 1593 mbx.bgx_stats.rx = 1; 1594 while (stat < BGX_RX_STATS_COUNT) { 1595 mbx.bgx_stats.idx = stat; 1596 if (nicvf_send_msg_to_pf(nic, &mbx)) 1597 return; 1598 stat++; 1599 } 1600 1601 stat = 0; 1602 1603 /* Tx stats */ 1604 mbx.bgx_stats.rx = 0; 1605 while (stat < BGX_TX_STATS_COUNT) { 1606 mbx.bgx_stats.idx = stat; 1607 if (nicvf_send_msg_to_pf(nic, &mbx)) 1608 return; 1609 stat++; 1610 } 1611 } 1612 1613 void nicvf_update_stats(struct nicvf *nic) 1614 { 1615 int qidx, cpu; 1616 u64 tmp_stats = 0; 1617 struct nicvf_hw_stats *stats = &nic->hw_stats; 1618 struct nicvf_drv_stats *drv_stats; 1619 struct queue_set *qs = nic->qs; 1620 1621 #define GET_RX_STATS(reg) \ 1622 nicvf_reg_read(nic, NIC_VNIC_RX_STAT_0_13 | (reg << 3)) 1623 #define GET_TX_STATS(reg) \ 1624 nicvf_reg_read(nic, NIC_VNIC_TX_STAT_0_4 | (reg << 3)) 1625 1626 stats->rx_bytes = GET_RX_STATS(RX_OCTS); 1627 stats->rx_ucast_frames = GET_RX_STATS(RX_UCAST); 1628 stats->rx_bcast_frames = GET_RX_STATS(RX_BCAST); 1629 stats->rx_mcast_frames = GET_RX_STATS(RX_MCAST); 1630 stats->rx_fcs_errors = GET_RX_STATS(RX_FCS); 1631 stats->rx_l2_errors = GET_RX_STATS(RX_L2ERR); 1632 stats->rx_drop_red = GET_RX_STATS(RX_RED); 1633 stats->rx_drop_red_bytes = GET_RX_STATS(RX_RED_OCTS); 1634 stats->rx_drop_overrun = GET_RX_STATS(RX_ORUN); 1635 stats->rx_drop_overrun_bytes = GET_RX_STATS(RX_ORUN_OCTS); 1636 stats->rx_drop_bcast = GET_RX_STATS(RX_DRP_BCAST); 1637 stats->rx_drop_mcast = GET_RX_STATS(RX_DRP_MCAST); 1638 stats->rx_drop_l3_bcast = GET_RX_STATS(RX_DRP_L3BCAST); 1639 stats->rx_drop_l3_mcast = GET_RX_STATS(RX_DRP_L3MCAST); 1640 1641 stats->tx_bytes = GET_TX_STATS(TX_OCTS); 1642 stats->tx_ucast_frames = GET_TX_STATS(TX_UCAST); 1643 stats->tx_bcast_frames = GET_TX_STATS(TX_BCAST); 1644 stats->tx_mcast_frames = GET_TX_STATS(TX_MCAST); 1645 stats->tx_drops = GET_TX_STATS(TX_DROP); 1646 1647 /* On T88 pass 2.0, the dummy SQE added for TSO notification 1648 * via CQE has 'dont_send' set. Hence HW drops the pkt pointed 1649 * pointed by dummy SQE and results in tx_drops counter being 1650 * incremented. Subtracting it from tx_tso counter will give 1651 * exact tx_drops counter. 1652 */ 1653 if (nic->t88 && nic->hw_tso) { 1654 for_each_possible_cpu(cpu) { 1655 drv_stats = per_cpu_ptr(nic->drv_stats, cpu); 1656 tmp_stats += drv_stats->tx_tso; 1657 } 1658 stats->tx_drops = tmp_stats - stats->tx_drops; 1659 } 1660 stats->tx_frames = stats->tx_ucast_frames + 1661 stats->tx_bcast_frames + 1662 stats->tx_mcast_frames; 1663 stats->rx_frames = stats->rx_ucast_frames + 1664 stats->rx_bcast_frames + 1665 stats->rx_mcast_frames; 1666 stats->rx_drops = stats->rx_drop_red + 1667 stats->rx_drop_overrun; 1668 1669 /* Update RQ and SQ stats */ 1670 for (qidx = 0; qidx < qs->rq_cnt; qidx++) 1671 nicvf_update_rq_stats(nic, qidx); 1672 for (qidx = 0; qidx < qs->sq_cnt; qidx++) 1673 nicvf_update_sq_stats(nic, qidx); 1674 } 1675 1676 static void nicvf_get_stats64(struct net_device *netdev, 1677 struct rtnl_link_stats64 *stats) 1678 { 1679 struct nicvf *nic = netdev_priv(netdev); 1680 struct nicvf_hw_stats *hw_stats = &nic->hw_stats; 1681 1682 nicvf_update_stats(nic); 1683 1684 stats->rx_bytes = hw_stats->rx_bytes; 1685 stats->rx_packets = hw_stats->rx_frames; 1686 stats->rx_dropped = hw_stats->rx_drops; 1687 stats->multicast = hw_stats->rx_mcast_frames; 1688 1689 stats->tx_bytes = hw_stats->tx_bytes; 1690 stats->tx_packets = hw_stats->tx_frames; 1691 stats->tx_dropped = hw_stats->tx_drops; 1692 1693 } 1694 1695 static void nicvf_tx_timeout(struct net_device *dev) 1696 { 1697 struct nicvf *nic = netdev_priv(dev); 1698 1699 netif_warn(nic, tx_err, dev, "Transmit timed out, resetting\n"); 1700 1701 this_cpu_inc(nic->drv_stats->tx_timeout); 1702 schedule_work(&nic->reset_task); 1703 } 1704 1705 static void nicvf_reset_task(struct work_struct *work) 1706 { 1707 struct nicvf *nic; 1708 1709 nic = container_of(work, struct nicvf, reset_task); 1710 1711 if (!netif_running(nic->netdev)) 1712 return; 1713 1714 nicvf_stop(nic->netdev); 1715 nicvf_open(nic->netdev); 1716 netif_trans_update(nic->netdev); 1717 } 1718 1719 static int nicvf_config_loopback(struct nicvf *nic, 1720 netdev_features_t features) 1721 { 1722 union nic_mbx mbx = {}; 1723 1724 mbx.lbk.msg = NIC_MBOX_MSG_LOOPBACK; 1725 mbx.lbk.vf_id = nic->vf_id; 1726 mbx.lbk.enable = (features & NETIF_F_LOOPBACK) != 0; 1727 1728 return nicvf_send_msg_to_pf(nic, &mbx); 1729 } 1730 1731 static netdev_features_t nicvf_fix_features(struct net_device *netdev, 1732 netdev_features_t features) 1733 { 1734 struct nicvf *nic = netdev_priv(netdev); 1735 1736 if ((features & NETIF_F_LOOPBACK) && 1737 netif_running(netdev) && !nic->loopback_supported) 1738 features &= ~NETIF_F_LOOPBACK; 1739 1740 return features; 1741 } 1742 1743 static int nicvf_set_features(struct net_device *netdev, 1744 netdev_features_t features) 1745 { 1746 struct nicvf *nic = netdev_priv(netdev); 1747 netdev_features_t changed = features ^ netdev->features; 1748 1749 if (changed & NETIF_F_HW_VLAN_CTAG_RX) 1750 nicvf_config_vlan_stripping(nic, features); 1751 1752 if ((changed & NETIF_F_LOOPBACK) && netif_running(netdev)) 1753 return nicvf_config_loopback(nic, features); 1754 1755 return 0; 1756 } 1757 1758 static void nicvf_set_xdp_queues(struct nicvf *nic, bool bpf_attached) 1759 { 1760 u8 cq_count, txq_count; 1761 1762 /* Set XDP Tx queue count same as Rx queue count */ 1763 if (!bpf_attached) 1764 nic->xdp_tx_queues = 0; 1765 else 1766 nic->xdp_tx_queues = nic->rx_queues; 1767 1768 /* If queue count > MAX_CMP_QUEUES_PER_QS, then additional qsets 1769 * needs to be allocated, check how many. 1770 */ 1771 txq_count = nic->xdp_tx_queues + nic->tx_queues; 1772 cq_count = max(nic->rx_queues, txq_count); 1773 if (cq_count > MAX_CMP_QUEUES_PER_QS) { 1774 nic->sqs_count = roundup(cq_count, MAX_CMP_QUEUES_PER_QS); 1775 nic->sqs_count = (nic->sqs_count / MAX_CMP_QUEUES_PER_QS) - 1; 1776 } else { 1777 nic->sqs_count = 0; 1778 } 1779 1780 /* Set primary Qset's resources */ 1781 nic->qs->rq_cnt = min_t(u8, nic->rx_queues, MAX_RCV_QUEUES_PER_QS); 1782 nic->qs->sq_cnt = min_t(u8, txq_count, MAX_SND_QUEUES_PER_QS); 1783 nic->qs->cq_cnt = max_t(u8, nic->qs->rq_cnt, nic->qs->sq_cnt); 1784 1785 /* Update stack */ 1786 nicvf_set_real_num_queues(nic->netdev, nic->tx_queues, nic->rx_queues); 1787 } 1788 1789 static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog) 1790 { 1791 struct net_device *dev = nic->netdev; 1792 bool if_up = netif_running(nic->netdev); 1793 struct bpf_prog *old_prog; 1794 bool bpf_attached = false; 1795 1796 /* For now just support only the usual MTU sized frames */ 1797 if (prog && (dev->mtu > 1500)) { 1798 netdev_warn(dev, "Jumbo frames not yet supported with XDP, current MTU %d.\n", 1799 dev->mtu); 1800 return -EOPNOTSUPP; 1801 } 1802 1803 /* ALL SQs attached to CQs i.e same as RQs, are treated as 1804 * XDP Tx queues and more Tx queues are allocated for 1805 * network stack to send pkts out. 1806 * 1807 * No of Tx queues are either same as Rx queues or whatever 1808 * is left in max no of queues possible. 1809 */ 1810 if ((nic->rx_queues + nic->tx_queues) > nic->max_queues) { 1811 netdev_warn(dev, 1812 "Failed to attach BPF prog, RXQs + TXQs > Max %d\n", 1813 nic->max_queues); 1814 return -ENOMEM; 1815 } 1816 1817 if (if_up) 1818 nicvf_stop(nic->netdev); 1819 1820 old_prog = xchg(&nic->xdp_prog, prog); 1821 /* Detach old prog, if any */ 1822 if (old_prog) 1823 bpf_prog_put(old_prog); 1824 1825 if (nic->xdp_prog) { 1826 /* Attach BPF program */ 1827 nic->xdp_prog = bpf_prog_add(nic->xdp_prog, nic->rx_queues - 1); 1828 if (!IS_ERR(nic->xdp_prog)) 1829 bpf_attached = true; 1830 } 1831 1832 /* Calculate Tx queues needed for XDP and network stack */ 1833 nicvf_set_xdp_queues(nic, bpf_attached); 1834 1835 if (if_up) { 1836 /* Reinitialize interface, clean slate */ 1837 nicvf_open(nic->netdev); 1838 netif_trans_update(nic->netdev); 1839 } 1840 1841 return 0; 1842 } 1843 1844 static int nicvf_xdp(struct net_device *netdev, struct netdev_bpf *xdp) 1845 { 1846 struct nicvf *nic = netdev_priv(netdev); 1847 1848 /* To avoid checks while retrieving buffer address from CQE_RX, 1849 * do not support XDP for T88 pass1.x silicons which are anyway 1850 * not in use widely. 1851 */ 1852 if (pass1_silicon(nic->pdev)) 1853 return -EOPNOTSUPP; 1854 1855 switch (xdp->command) { 1856 case XDP_SETUP_PROG: 1857 return nicvf_xdp_setup(nic, xdp->prog); 1858 case XDP_QUERY_PROG: 1859 xdp->prog_attached = !!nic->xdp_prog; 1860 xdp->prog_id = nic->xdp_prog ? nic->xdp_prog->aux->id : 0; 1861 return 0; 1862 default: 1863 return -EINVAL; 1864 } 1865 } 1866 1867 static int nicvf_xdp_xmit(struct net_device *netdev, struct xdp_buff *xdp) 1868 { 1869 struct nicvf *nic = netdev_priv(netdev); 1870 struct nicvf *snic = nic; 1871 struct nicvf_xdp_tx *xdp_tx; 1872 struct snd_queue *sq; 1873 struct page *page; 1874 int err, qidx; 1875 1876 if (!netif_running(netdev) || !nic->xdp_prog) 1877 return -EINVAL; 1878 1879 page = virt_to_page(xdp->data); 1880 xdp_tx = (struct nicvf_xdp_tx *)page_address(page); 1881 qidx = xdp_tx->qidx; 1882 1883 if (xdp_tx->qidx >= nic->xdp_tx_queues) 1884 return -EINVAL; 1885 1886 /* Get secondary Qset's info */ 1887 if (xdp_tx->qidx >= MAX_SND_QUEUES_PER_QS) { 1888 qidx = xdp_tx->qidx / MAX_SND_QUEUES_PER_QS; 1889 snic = (struct nicvf *)nic->snicvf[qidx - 1]; 1890 if (!snic) 1891 return -EINVAL; 1892 qidx = xdp_tx->qidx % MAX_SND_QUEUES_PER_QS; 1893 } 1894 1895 sq = &snic->qs->sq[qidx]; 1896 err = nicvf_xdp_sq_append_pkt(snic, sq, (u64)xdp->data, 1897 xdp_tx->dma_addr, 1898 xdp->data_end - xdp->data); 1899 if (err) 1900 return -ENOMEM; 1901 1902 nicvf_xdp_sq_doorbell(snic, sq, qidx); 1903 return 0; 1904 } 1905 1906 static void nicvf_xdp_flush(struct net_device *dev) 1907 { 1908 return; 1909 } 1910 1911 static int nicvf_config_hwtstamp(struct net_device *netdev, struct ifreq *ifr) 1912 { 1913 struct hwtstamp_config config; 1914 struct nicvf *nic = netdev_priv(netdev); 1915 1916 if (!nic->ptp_clock) 1917 return -ENODEV; 1918 1919 if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) 1920 return -EFAULT; 1921 1922 /* reserved for future extensions */ 1923 if (config.flags) 1924 return -EINVAL; 1925 1926 switch (config.tx_type) { 1927 case HWTSTAMP_TX_OFF: 1928 case HWTSTAMP_TX_ON: 1929 break; 1930 default: 1931 return -ERANGE; 1932 } 1933 1934 switch (config.rx_filter) { 1935 case HWTSTAMP_FILTER_NONE: 1936 nic->hw_rx_tstamp = false; 1937 break; 1938 case HWTSTAMP_FILTER_ALL: 1939 case HWTSTAMP_FILTER_SOME: 1940 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: 1941 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: 1942 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: 1943 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: 1944 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: 1945 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: 1946 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: 1947 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: 1948 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: 1949 case HWTSTAMP_FILTER_PTP_V2_EVENT: 1950 case HWTSTAMP_FILTER_PTP_V2_SYNC: 1951 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: 1952 nic->hw_rx_tstamp = true; 1953 config.rx_filter = HWTSTAMP_FILTER_ALL; 1954 break; 1955 default: 1956 return -ERANGE; 1957 } 1958 1959 if (netif_running(netdev)) 1960 nicvf_config_hw_rx_tstamp(nic, nic->hw_rx_tstamp); 1961 1962 if (copy_to_user(ifr->ifr_data, &config, sizeof(config))) 1963 return -EFAULT; 1964 1965 return 0; 1966 } 1967 1968 static int nicvf_ioctl(struct net_device *netdev, struct ifreq *req, int cmd) 1969 { 1970 switch (cmd) { 1971 case SIOCSHWTSTAMP: 1972 return nicvf_config_hwtstamp(netdev, req); 1973 default: 1974 return -EOPNOTSUPP; 1975 } 1976 } 1977 1978 static const struct net_device_ops nicvf_netdev_ops = { 1979 .ndo_open = nicvf_open, 1980 .ndo_stop = nicvf_stop, 1981 .ndo_start_xmit = nicvf_xmit, 1982 .ndo_change_mtu = nicvf_change_mtu, 1983 .ndo_set_mac_address = nicvf_set_mac_address, 1984 .ndo_get_stats64 = nicvf_get_stats64, 1985 .ndo_tx_timeout = nicvf_tx_timeout, 1986 .ndo_fix_features = nicvf_fix_features, 1987 .ndo_set_features = nicvf_set_features, 1988 .ndo_bpf = nicvf_xdp, 1989 .ndo_xdp_xmit = nicvf_xdp_xmit, 1990 .ndo_xdp_flush = nicvf_xdp_flush, 1991 .ndo_do_ioctl = nicvf_ioctl, 1992 }; 1993 1994 static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 1995 { 1996 struct device *dev = &pdev->dev; 1997 struct net_device *netdev; 1998 struct nicvf *nic; 1999 int err, qcount; 2000 u16 sdevid; 2001 struct cavium_ptp *ptp_clock; 2002 2003 ptp_clock = cavium_ptp_get(); 2004 if (IS_ERR(ptp_clock)) { 2005 if (PTR_ERR(ptp_clock) == -ENODEV) 2006 /* In virtualized environment we proceed without ptp */ 2007 ptp_clock = NULL; 2008 else 2009 return PTR_ERR(ptp_clock); 2010 } 2011 2012 err = pci_enable_device(pdev); 2013 if (err) { 2014 dev_err(dev, "Failed to enable PCI device\n"); 2015 return err; 2016 } 2017 2018 err = pci_request_regions(pdev, DRV_NAME); 2019 if (err) { 2020 dev_err(dev, "PCI request regions failed 0x%x\n", err); 2021 goto err_disable_device; 2022 } 2023 2024 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(48)); 2025 if (err) { 2026 dev_err(dev, "Unable to get usable DMA configuration\n"); 2027 goto err_release_regions; 2028 } 2029 2030 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(48)); 2031 if (err) { 2032 dev_err(dev, "unable to get 48-bit DMA for consistent allocations\n"); 2033 goto err_release_regions; 2034 } 2035 2036 qcount = netif_get_num_default_rss_queues(); 2037 2038 /* Restrict multiqset support only for host bound VFs */ 2039 if (pdev->is_virtfn) { 2040 /* Set max number of queues per VF */ 2041 qcount = min_t(int, num_online_cpus(), 2042 (MAX_SQS_PER_VF + 1) * MAX_CMP_QUEUES_PER_QS); 2043 } 2044 2045 netdev = alloc_etherdev_mqs(sizeof(struct nicvf), qcount, qcount); 2046 if (!netdev) { 2047 err = -ENOMEM; 2048 goto err_release_regions; 2049 } 2050 2051 pci_set_drvdata(pdev, netdev); 2052 2053 SET_NETDEV_DEV(netdev, &pdev->dev); 2054 2055 nic = netdev_priv(netdev); 2056 nic->netdev = netdev; 2057 nic->pdev = pdev; 2058 nic->pnicvf = nic; 2059 nic->max_queues = qcount; 2060 /* If no of CPUs are too low, there won't be any queues left 2061 * for XDP_TX, hence double it. 2062 */ 2063 if (!nic->t88) 2064 nic->max_queues *= 2; 2065 nic->ptp_clock = ptp_clock; 2066 2067 /* MAP VF's configuration registers */ 2068 nic->reg_base = pcim_iomap(pdev, PCI_CFG_REG_BAR_NUM, 0); 2069 if (!nic->reg_base) { 2070 dev_err(dev, "Cannot map config register space, aborting\n"); 2071 err = -ENOMEM; 2072 goto err_free_netdev; 2073 } 2074 2075 nic->drv_stats = netdev_alloc_pcpu_stats(struct nicvf_drv_stats); 2076 if (!nic->drv_stats) { 2077 err = -ENOMEM; 2078 goto err_free_netdev; 2079 } 2080 2081 err = nicvf_set_qset_resources(nic); 2082 if (err) 2083 goto err_free_netdev; 2084 2085 /* Check if PF is alive and get MAC address for this VF */ 2086 err = nicvf_register_misc_interrupt(nic); 2087 if (err) 2088 goto err_free_netdev; 2089 2090 nicvf_send_vf_struct(nic); 2091 2092 if (!pass1_silicon(nic->pdev)) 2093 nic->hw_tso = true; 2094 2095 /* Get iommu domain for iova to physical addr conversion */ 2096 nic->iommu_domain = iommu_get_domain_for_dev(dev); 2097 2098 pci_read_config_word(nic->pdev, PCI_SUBSYSTEM_ID, &sdevid); 2099 if (sdevid == 0xA134) 2100 nic->t88 = true; 2101 2102 /* Check if this VF is in QS only mode */ 2103 if (nic->sqs_mode) 2104 return 0; 2105 2106 err = nicvf_set_real_num_queues(netdev, nic->tx_queues, nic->rx_queues); 2107 if (err) 2108 goto err_unregister_interrupts; 2109 2110 netdev->hw_features = (NETIF_F_RXCSUM | NETIF_F_SG | 2111 NETIF_F_TSO | NETIF_F_GRO | NETIF_F_TSO6 | 2112 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | 2113 NETIF_F_HW_VLAN_CTAG_RX); 2114 2115 netdev->hw_features |= NETIF_F_RXHASH; 2116 2117 netdev->features |= netdev->hw_features; 2118 netdev->hw_features |= NETIF_F_LOOPBACK; 2119 2120 netdev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | 2121 NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO6; 2122 2123 netdev->netdev_ops = &nicvf_netdev_ops; 2124 netdev->watchdog_timeo = NICVF_TX_TIMEOUT; 2125 2126 /* MTU range: 64 - 9200 */ 2127 netdev->min_mtu = NIC_HW_MIN_FRS; 2128 netdev->max_mtu = NIC_HW_MAX_FRS; 2129 2130 INIT_WORK(&nic->reset_task, nicvf_reset_task); 2131 2132 err = register_netdev(netdev); 2133 if (err) { 2134 dev_err(dev, "Failed to register netdevice\n"); 2135 goto err_unregister_interrupts; 2136 } 2137 2138 nic->msg_enable = debug; 2139 2140 nicvf_set_ethtool_ops(netdev); 2141 2142 return 0; 2143 2144 err_unregister_interrupts: 2145 nicvf_unregister_interrupts(nic); 2146 err_free_netdev: 2147 pci_set_drvdata(pdev, NULL); 2148 if (nic->drv_stats) 2149 free_percpu(nic->drv_stats); 2150 free_netdev(netdev); 2151 err_release_regions: 2152 pci_release_regions(pdev); 2153 err_disable_device: 2154 pci_disable_device(pdev); 2155 return err; 2156 } 2157 2158 static void nicvf_remove(struct pci_dev *pdev) 2159 { 2160 struct net_device *netdev = pci_get_drvdata(pdev); 2161 struct nicvf *nic; 2162 struct net_device *pnetdev; 2163 2164 if (!netdev) 2165 return; 2166 2167 nic = netdev_priv(netdev); 2168 pnetdev = nic->pnicvf->netdev; 2169 2170 /* Check if this Qset is assigned to different VF. 2171 * If yes, clean primary and all secondary Qsets. 2172 */ 2173 if (pnetdev && (pnetdev->reg_state == NETREG_REGISTERED)) 2174 unregister_netdev(pnetdev); 2175 nicvf_unregister_interrupts(nic); 2176 pci_set_drvdata(pdev, NULL); 2177 if (nic->drv_stats) 2178 free_percpu(nic->drv_stats); 2179 cavium_ptp_put(nic->ptp_clock); 2180 free_netdev(netdev); 2181 pci_release_regions(pdev); 2182 pci_disable_device(pdev); 2183 } 2184 2185 static void nicvf_shutdown(struct pci_dev *pdev) 2186 { 2187 nicvf_remove(pdev); 2188 } 2189 2190 static struct pci_driver nicvf_driver = { 2191 .name = DRV_NAME, 2192 .id_table = nicvf_id_table, 2193 .probe = nicvf_probe, 2194 .remove = nicvf_remove, 2195 .shutdown = nicvf_shutdown, 2196 }; 2197 2198 static int __init nicvf_init_module(void) 2199 { 2200 pr_info("%s, ver %s\n", DRV_NAME, DRV_VERSION); 2201 2202 return pci_register_driver(&nicvf_driver); 2203 } 2204 2205 static void __exit nicvf_cleanup_module(void) 2206 { 2207 pci_unregister_driver(&nicvf_driver); 2208 } 2209 2210 module_init(nicvf_init_module); 2211 module_exit(nicvf_cleanup_module); 2212