1 /* 2 * Copyright (C) 2015 Cavium, Inc. 3 * 4 * This program is free software; you can redistribute it and/or modify it 5 * under the terms of version 2 of the GNU General Public License 6 * as published by the Free Software Foundation. 7 */ 8 9 #include <linux/module.h> 10 #include <linux/interrupt.h> 11 #include <linux/pci.h> 12 #include <linux/netdevice.h> 13 #include <linux/if_vlan.h> 14 #include <linux/etherdevice.h> 15 #include <linux/ethtool.h> 16 #include <linux/log2.h> 17 #include <linux/prefetch.h> 18 #include <linux/irq.h> 19 #include <linux/iommu.h> 20 #include <linux/bpf.h> 21 #include <linux/bpf_trace.h> 22 #include <linux/filter.h> 23 #include <linux/net_tstamp.h> 24 25 #include "nic_reg.h" 26 #include "nic.h" 27 #include "nicvf_queues.h" 28 #include "thunder_bgx.h" 29 #include "../common/cavium_ptp.h" 30 31 #define DRV_NAME "nicvf" 32 #define DRV_VERSION "1.0" 33 34 /* Supported devices */ 35 static const struct pci_device_id nicvf_id_table[] = { 36 { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, 37 PCI_DEVICE_ID_THUNDER_NIC_VF, 38 PCI_VENDOR_ID_CAVIUM, 39 PCI_SUBSYS_DEVID_88XX_NIC_VF) }, 40 { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, 41 PCI_DEVICE_ID_THUNDER_PASS1_NIC_VF, 42 PCI_VENDOR_ID_CAVIUM, 43 PCI_SUBSYS_DEVID_88XX_PASS1_NIC_VF) }, 44 { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, 45 PCI_DEVICE_ID_THUNDER_NIC_VF, 46 PCI_VENDOR_ID_CAVIUM, 47 PCI_SUBSYS_DEVID_81XX_NIC_VF) }, 48 { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, 49 PCI_DEVICE_ID_THUNDER_NIC_VF, 50 PCI_VENDOR_ID_CAVIUM, 51 PCI_SUBSYS_DEVID_83XX_NIC_VF) }, 52 { 0, } /* end of table */ 53 }; 54 55 MODULE_AUTHOR("Sunil Goutham"); 56 MODULE_DESCRIPTION("Cavium Thunder NIC Virtual Function Driver"); 57 MODULE_LICENSE("GPL v2"); 58 MODULE_VERSION(DRV_VERSION); 59 MODULE_DEVICE_TABLE(pci, nicvf_id_table); 60 61 static int debug = 0x00; 62 module_param(debug, int, 0644); 63 MODULE_PARM_DESC(debug, "Debug message level bitmap"); 64 65 static int cpi_alg = CPI_ALG_NONE; 66 module_param(cpi_alg, int, S_IRUGO); 67 MODULE_PARM_DESC(cpi_alg, 68 "PFC algorithm (0=none, 1=VLAN, 2=VLAN16, 3=IP Diffserv)"); 69 70 static inline u8 nicvf_netdev_qidx(struct nicvf *nic, u8 qidx) 71 { 72 if (nic->sqs_mode) 73 return qidx + ((nic->sqs_id + 1) * MAX_CMP_QUEUES_PER_QS); 74 else 75 return qidx; 76 } 77 78 /* The Cavium ThunderX network controller can *only* be found in SoCs 79 * containing the ThunderX ARM64 CPU implementation. All accesses to the device 80 * registers on this platform are implicitly strongly ordered with respect 81 * to memory accesses. So writeq_relaxed() and readq_relaxed() are safe to use 82 * with no memory barriers in this driver. The readq()/writeq() functions add 83 * explicit ordering operation which in this case are redundant, and only 84 * add overhead. 85 */ 86 87 /* Register read/write APIs */ 88 void nicvf_reg_write(struct nicvf *nic, u64 offset, u64 val) 89 { 90 writeq_relaxed(val, nic->reg_base + offset); 91 } 92 93 u64 nicvf_reg_read(struct nicvf *nic, u64 offset) 94 { 95 return readq_relaxed(nic->reg_base + offset); 96 } 97 98 void nicvf_queue_reg_write(struct nicvf *nic, u64 offset, 99 u64 qidx, u64 val) 100 { 101 void __iomem *addr = nic->reg_base + offset; 102 103 writeq_relaxed(val, addr + (qidx << NIC_Q_NUM_SHIFT)); 104 } 105 106 u64 nicvf_queue_reg_read(struct nicvf *nic, u64 offset, u64 qidx) 107 { 108 void __iomem *addr = nic->reg_base + offset; 109 110 return readq_relaxed(addr + (qidx << NIC_Q_NUM_SHIFT)); 111 } 112 113 /* VF -> PF mailbox communication */ 114 static void nicvf_write_to_mbx(struct nicvf *nic, union nic_mbx *mbx) 115 { 116 u64 *msg = (u64 *)mbx; 117 118 nicvf_reg_write(nic, NIC_VF_PF_MAILBOX_0_1 + 0, msg[0]); 119 nicvf_reg_write(nic, NIC_VF_PF_MAILBOX_0_1 + 8, msg[1]); 120 } 121 122 int nicvf_send_msg_to_pf(struct nicvf *nic, union nic_mbx *mbx) 123 { 124 int timeout = NIC_MBOX_MSG_TIMEOUT; 125 int sleep = 10; 126 127 nic->pf_acked = false; 128 nic->pf_nacked = false; 129 130 nicvf_write_to_mbx(nic, mbx); 131 132 /* Wait for previous message to be acked, timeout 2sec */ 133 while (!nic->pf_acked) { 134 if (nic->pf_nacked) { 135 netdev_err(nic->netdev, 136 "PF NACK to mbox msg 0x%02x from VF%d\n", 137 (mbx->msg.msg & 0xFF), nic->vf_id); 138 return -EINVAL; 139 } 140 msleep(sleep); 141 if (nic->pf_acked) 142 break; 143 timeout -= sleep; 144 if (!timeout) { 145 netdev_err(nic->netdev, 146 "PF didn't ACK to mbox msg 0x%02x from VF%d\n", 147 (mbx->msg.msg & 0xFF), nic->vf_id); 148 return -EBUSY; 149 } 150 } 151 return 0; 152 } 153 154 /* Checks if VF is able to comminicate with PF 155 * and also gets the VNIC number this VF is associated to. 156 */ 157 static int nicvf_check_pf_ready(struct nicvf *nic) 158 { 159 union nic_mbx mbx = {}; 160 161 mbx.msg.msg = NIC_MBOX_MSG_READY; 162 if (nicvf_send_msg_to_pf(nic, &mbx)) { 163 netdev_err(nic->netdev, 164 "PF didn't respond to READY msg\n"); 165 return 0; 166 } 167 168 return 1; 169 } 170 171 static void nicvf_read_bgx_stats(struct nicvf *nic, struct bgx_stats_msg *bgx) 172 { 173 if (bgx->rx) 174 nic->bgx_stats.rx_stats[bgx->idx] = bgx->stats; 175 else 176 nic->bgx_stats.tx_stats[bgx->idx] = bgx->stats; 177 } 178 179 static void nicvf_handle_mbx_intr(struct nicvf *nic) 180 { 181 union nic_mbx mbx = {}; 182 u64 *mbx_data; 183 u64 mbx_addr; 184 int i; 185 186 mbx_addr = NIC_VF_PF_MAILBOX_0_1; 187 mbx_data = (u64 *)&mbx; 188 189 for (i = 0; i < NIC_PF_VF_MAILBOX_SIZE; i++) { 190 *mbx_data = nicvf_reg_read(nic, mbx_addr); 191 mbx_data++; 192 mbx_addr += sizeof(u64); 193 } 194 195 netdev_dbg(nic->netdev, "Mbox message: msg: 0x%x\n", mbx.msg.msg); 196 switch (mbx.msg.msg) { 197 case NIC_MBOX_MSG_READY: 198 nic->pf_acked = true; 199 nic->vf_id = mbx.nic_cfg.vf_id & 0x7F; 200 nic->tns_mode = mbx.nic_cfg.tns_mode & 0x7F; 201 nic->node = mbx.nic_cfg.node_id; 202 if (!nic->set_mac_pending) 203 ether_addr_copy(nic->netdev->dev_addr, 204 mbx.nic_cfg.mac_addr); 205 nic->sqs_mode = mbx.nic_cfg.sqs_mode; 206 nic->loopback_supported = mbx.nic_cfg.loopback_supported; 207 nic->link_up = false; 208 nic->duplex = 0; 209 nic->speed = 0; 210 break; 211 case NIC_MBOX_MSG_ACK: 212 nic->pf_acked = true; 213 break; 214 case NIC_MBOX_MSG_NACK: 215 nic->pf_nacked = true; 216 break; 217 case NIC_MBOX_MSG_RSS_SIZE: 218 nic->rss_info.rss_size = mbx.rss_size.ind_tbl_size; 219 nic->pf_acked = true; 220 break; 221 case NIC_MBOX_MSG_BGX_STATS: 222 nicvf_read_bgx_stats(nic, &mbx.bgx_stats); 223 nic->pf_acked = true; 224 break; 225 case NIC_MBOX_MSG_BGX_LINK_CHANGE: 226 nic->pf_acked = true; 227 nic->link_up = mbx.link_status.link_up; 228 nic->duplex = mbx.link_status.duplex; 229 nic->speed = mbx.link_status.speed; 230 nic->mac_type = mbx.link_status.mac_type; 231 if (nic->link_up) { 232 netdev_info(nic->netdev, "Link is Up %d Mbps %s duplex\n", 233 nic->speed, 234 nic->duplex == DUPLEX_FULL ? 235 "Full" : "Half"); 236 netif_carrier_on(nic->netdev); 237 netif_tx_start_all_queues(nic->netdev); 238 } else { 239 netdev_info(nic->netdev, "Link is Down\n"); 240 netif_carrier_off(nic->netdev); 241 netif_tx_stop_all_queues(nic->netdev); 242 } 243 break; 244 case NIC_MBOX_MSG_ALLOC_SQS: 245 nic->sqs_count = mbx.sqs_alloc.qs_count; 246 nic->pf_acked = true; 247 break; 248 case NIC_MBOX_MSG_SNICVF_PTR: 249 /* Primary VF: make note of secondary VF's pointer 250 * to be used while packet transmission. 251 */ 252 nic->snicvf[mbx.nicvf.sqs_id] = 253 (struct nicvf *)mbx.nicvf.nicvf; 254 nic->pf_acked = true; 255 break; 256 case NIC_MBOX_MSG_PNICVF_PTR: 257 /* Secondary VF/Qset: make note of primary VF's pointer 258 * to be used while packet reception, to handover packet 259 * to primary VF's netdev. 260 */ 261 nic->pnicvf = (struct nicvf *)mbx.nicvf.nicvf; 262 nic->pf_acked = true; 263 break; 264 case NIC_MBOX_MSG_PFC: 265 nic->pfc.autoneg = mbx.pfc.autoneg; 266 nic->pfc.fc_rx = mbx.pfc.fc_rx; 267 nic->pfc.fc_tx = mbx.pfc.fc_tx; 268 nic->pf_acked = true; 269 break; 270 default: 271 netdev_err(nic->netdev, 272 "Invalid message from PF, msg 0x%x\n", mbx.msg.msg); 273 break; 274 } 275 nicvf_clear_intr(nic, NICVF_INTR_MBOX, 0); 276 } 277 278 static int nicvf_hw_set_mac_addr(struct nicvf *nic, struct net_device *netdev) 279 { 280 union nic_mbx mbx = {}; 281 282 mbx.mac.msg = NIC_MBOX_MSG_SET_MAC; 283 mbx.mac.vf_id = nic->vf_id; 284 ether_addr_copy(mbx.mac.mac_addr, netdev->dev_addr); 285 286 return nicvf_send_msg_to_pf(nic, &mbx); 287 } 288 289 static void nicvf_config_cpi(struct nicvf *nic) 290 { 291 union nic_mbx mbx = {}; 292 293 mbx.cpi_cfg.msg = NIC_MBOX_MSG_CPI_CFG; 294 mbx.cpi_cfg.vf_id = nic->vf_id; 295 mbx.cpi_cfg.cpi_alg = nic->cpi_alg; 296 mbx.cpi_cfg.rq_cnt = nic->qs->rq_cnt; 297 298 nicvf_send_msg_to_pf(nic, &mbx); 299 } 300 301 static void nicvf_get_rss_size(struct nicvf *nic) 302 { 303 union nic_mbx mbx = {}; 304 305 mbx.rss_size.msg = NIC_MBOX_MSG_RSS_SIZE; 306 mbx.rss_size.vf_id = nic->vf_id; 307 nicvf_send_msg_to_pf(nic, &mbx); 308 } 309 310 void nicvf_config_rss(struct nicvf *nic) 311 { 312 union nic_mbx mbx = {}; 313 struct nicvf_rss_info *rss = &nic->rss_info; 314 int ind_tbl_len = rss->rss_size; 315 int i, nextq = 0; 316 317 mbx.rss_cfg.vf_id = nic->vf_id; 318 mbx.rss_cfg.hash_bits = rss->hash_bits; 319 while (ind_tbl_len) { 320 mbx.rss_cfg.tbl_offset = nextq; 321 mbx.rss_cfg.tbl_len = min(ind_tbl_len, 322 RSS_IND_TBL_LEN_PER_MBX_MSG); 323 mbx.rss_cfg.msg = mbx.rss_cfg.tbl_offset ? 324 NIC_MBOX_MSG_RSS_CFG_CONT : NIC_MBOX_MSG_RSS_CFG; 325 326 for (i = 0; i < mbx.rss_cfg.tbl_len; i++) 327 mbx.rss_cfg.ind_tbl[i] = rss->ind_tbl[nextq++]; 328 329 nicvf_send_msg_to_pf(nic, &mbx); 330 331 ind_tbl_len -= mbx.rss_cfg.tbl_len; 332 } 333 } 334 335 void nicvf_set_rss_key(struct nicvf *nic) 336 { 337 struct nicvf_rss_info *rss = &nic->rss_info; 338 u64 key_addr = NIC_VNIC_RSS_KEY_0_4; 339 int idx; 340 341 for (idx = 0; idx < RSS_HASH_KEY_SIZE; idx++) { 342 nicvf_reg_write(nic, key_addr, rss->key[idx]); 343 key_addr += sizeof(u64); 344 } 345 } 346 347 static int nicvf_rss_init(struct nicvf *nic) 348 { 349 struct nicvf_rss_info *rss = &nic->rss_info; 350 int idx; 351 352 nicvf_get_rss_size(nic); 353 354 if (cpi_alg != CPI_ALG_NONE) { 355 rss->enable = false; 356 rss->hash_bits = 0; 357 return 0; 358 } 359 360 rss->enable = true; 361 362 netdev_rss_key_fill(rss->key, RSS_HASH_KEY_SIZE * sizeof(u64)); 363 nicvf_set_rss_key(nic); 364 365 rss->cfg = RSS_IP_HASH_ENA | RSS_TCP_HASH_ENA | RSS_UDP_HASH_ENA; 366 nicvf_reg_write(nic, NIC_VNIC_RSS_CFG, rss->cfg); 367 368 rss->hash_bits = ilog2(rounddown_pow_of_two(rss->rss_size)); 369 370 for (idx = 0; idx < rss->rss_size; idx++) 371 rss->ind_tbl[idx] = ethtool_rxfh_indir_default(idx, 372 nic->rx_queues); 373 nicvf_config_rss(nic); 374 return 1; 375 } 376 377 /* Request PF to allocate additional Qsets */ 378 static void nicvf_request_sqs(struct nicvf *nic) 379 { 380 union nic_mbx mbx = {}; 381 int sqs; 382 int sqs_count = nic->sqs_count; 383 int rx_queues = 0, tx_queues = 0; 384 385 /* Only primary VF should request */ 386 if (nic->sqs_mode || !nic->sqs_count) 387 return; 388 389 mbx.sqs_alloc.msg = NIC_MBOX_MSG_ALLOC_SQS; 390 mbx.sqs_alloc.vf_id = nic->vf_id; 391 mbx.sqs_alloc.qs_count = nic->sqs_count; 392 if (nicvf_send_msg_to_pf(nic, &mbx)) { 393 /* No response from PF */ 394 nic->sqs_count = 0; 395 return; 396 } 397 398 /* Return if no Secondary Qsets available */ 399 if (!nic->sqs_count) 400 return; 401 402 if (nic->rx_queues > MAX_RCV_QUEUES_PER_QS) 403 rx_queues = nic->rx_queues - MAX_RCV_QUEUES_PER_QS; 404 405 tx_queues = nic->tx_queues + nic->xdp_tx_queues; 406 if (tx_queues > MAX_SND_QUEUES_PER_QS) 407 tx_queues = tx_queues - MAX_SND_QUEUES_PER_QS; 408 409 /* Set no of Rx/Tx queues in each of the SQsets */ 410 for (sqs = 0; sqs < nic->sqs_count; sqs++) { 411 mbx.nicvf.msg = NIC_MBOX_MSG_SNICVF_PTR; 412 mbx.nicvf.vf_id = nic->vf_id; 413 mbx.nicvf.sqs_id = sqs; 414 nicvf_send_msg_to_pf(nic, &mbx); 415 416 nic->snicvf[sqs]->sqs_id = sqs; 417 if (rx_queues > MAX_RCV_QUEUES_PER_QS) { 418 nic->snicvf[sqs]->qs->rq_cnt = MAX_RCV_QUEUES_PER_QS; 419 rx_queues -= MAX_RCV_QUEUES_PER_QS; 420 } else { 421 nic->snicvf[sqs]->qs->rq_cnt = rx_queues; 422 rx_queues = 0; 423 } 424 425 if (tx_queues > MAX_SND_QUEUES_PER_QS) { 426 nic->snicvf[sqs]->qs->sq_cnt = MAX_SND_QUEUES_PER_QS; 427 tx_queues -= MAX_SND_QUEUES_PER_QS; 428 } else { 429 nic->snicvf[sqs]->qs->sq_cnt = tx_queues; 430 tx_queues = 0; 431 } 432 433 nic->snicvf[sqs]->qs->cq_cnt = 434 max(nic->snicvf[sqs]->qs->rq_cnt, nic->snicvf[sqs]->qs->sq_cnt); 435 436 /* Initialize secondary Qset's queues and its interrupts */ 437 nicvf_open(nic->snicvf[sqs]->netdev); 438 } 439 440 /* Update stack with actual Rx/Tx queue count allocated */ 441 if (sqs_count != nic->sqs_count) 442 nicvf_set_real_num_queues(nic->netdev, 443 nic->tx_queues, nic->rx_queues); 444 } 445 446 /* Send this Qset's nicvf pointer to PF. 447 * PF inturn sends primary VF's nicvf struct to secondary Qsets/VFs 448 * so that packets received by these Qsets can use primary VF's netdev 449 */ 450 static void nicvf_send_vf_struct(struct nicvf *nic) 451 { 452 union nic_mbx mbx = {}; 453 454 mbx.nicvf.msg = NIC_MBOX_MSG_NICVF_PTR; 455 mbx.nicvf.sqs_mode = nic->sqs_mode; 456 mbx.nicvf.nicvf = (u64)nic; 457 nicvf_send_msg_to_pf(nic, &mbx); 458 } 459 460 static void nicvf_get_primary_vf_struct(struct nicvf *nic) 461 { 462 union nic_mbx mbx = {}; 463 464 mbx.nicvf.msg = NIC_MBOX_MSG_PNICVF_PTR; 465 nicvf_send_msg_to_pf(nic, &mbx); 466 } 467 468 int nicvf_set_real_num_queues(struct net_device *netdev, 469 int tx_queues, int rx_queues) 470 { 471 int err = 0; 472 473 err = netif_set_real_num_tx_queues(netdev, tx_queues); 474 if (err) { 475 netdev_err(netdev, 476 "Failed to set no of Tx queues: %d\n", tx_queues); 477 return err; 478 } 479 480 err = netif_set_real_num_rx_queues(netdev, rx_queues); 481 if (err) 482 netdev_err(netdev, 483 "Failed to set no of Rx queues: %d\n", rx_queues); 484 return err; 485 } 486 487 static int nicvf_init_resources(struct nicvf *nic) 488 { 489 int err; 490 491 /* Enable Qset */ 492 nicvf_qset_config(nic, true); 493 494 /* Initialize queues and HW for data transfer */ 495 err = nicvf_config_data_transfer(nic, true); 496 if (err) { 497 netdev_err(nic->netdev, 498 "Failed to alloc/config VF's QSet resources\n"); 499 return err; 500 } 501 502 return 0; 503 } 504 505 static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog, 506 struct cqe_rx_t *cqe_rx, struct snd_queue *sq, 507 struct rcv_queue *rq, struct sk_buff **skb) 508 { 509 struct xdp_buff xdp; 510 struct page *page; 511 u32 action; 512 u16 len, offset = 0; 513 u64 dma_addr, cpu_addr; 514 void *orig_data; 515 516 /* Retrieve packet buffer's DMA address and length */ 517 len = *((u16 *)((void *)cqe_rx + (3 * sizeof(u64)))); 518 dma_addr = *((u64 *)((void *)cqe_rx + (7 * sizeof(u64)))); 519 520 cpu_addr = nicvf_iova_to_phys(nic, dma_addr); 521 if (!cpu_addr) 522 return false; 523 cpu_addr = (u64)phys_to_virt(cpu_addr); 524 page = virt_to_page((void *)cpu_addr); 525 526 xdp.data_hard_start = page_address(page); 527 xdp.data = (void *)cpu_addr; 528 xdp_set_data_meta_invalid(&xdp); 529 xdp.data_end = xdp.data + len; 530 xdp.rxq = &rq->xdp_rxq; 531 orig_data = xdp.data; 532 533 rcu_read_lock(); 534 action = bpf_prog_run_xdp(prog, &xdp); 535 rcu_read_unlock(); 536 537 /* Check if XDP program has changed headers */ 538 if (orig_data != xdp.data) { 539 len = xdp.data_end - xdp.data; 540 offset = orig_data - xdp.data; 541 dma_addr -= offset; 542 } 543 544 switch (action) { 545 case XDP_PASS: 546 /* Check if it's a recycled page, if not 547 * unmap the DMA mapping. 548 * 549 * Recycled page holds an extra reference. 550 */ 551 if (page_ref_count(page) == 1) { 552 dma_addr &= PAGE_MASK; 553 dma_unmap_page_attrs(&nic->pdev->dev, dma_addr, 554 RCV_FRAG_LEN + XDP_PACKET_HEADROOM, 555 DMA_FROM_DEVICE, 556 DMA_ATTR_SKIP_CPU_SYNC); 557 } 558 559 /* Build SKB and pass on packet to network stack */ 560 *skb = build_skb(xdp.data, 561 RCV_FRAG_LEN - cqe_rx->align_pad + offset); 562 if (!*skb) 563 put_page(page); 564 else 565 skb_put(*skb, len); 566 return false; 567 case XDP_TX: 568 nicvf_xdp_sq_append_pkt(nic, sq, (u64)xdp.data, dma_addr, len); 569 return true; 570 default: 571 bpf_warn_invalid_xdp_action(action); 572 /* fall through */ 573 case XDP_ABORTED: 574 trace_xdp_exception(nic->netdev, prog, action); 575 /* fall through */ 576 case XDP_DROP: 577 /* Check if it's a recycled page, if not 578 * unmap the DMA mapping. 579 * 580 * Recycled page holds an extra reference. 581 */ 582 if (page_ref_count(page) == 1) { 583 dma_addr &= PAGE_MASK; 584 dma_unmap_page_attrs(&nic->pdev->dev, dma_addr, 585 RCV_FRAG_LEN + XDP_PACKET_HEADROOM, 586 DMA_FROM_DEVICE, 587 DMA_ATTR_SKIP_CPU_SYNC); 588 } 589 put_page(page); 590 return true; 591 } 592 return false; 593 } 594 595 static void nicvf_snd_ptp_handler(struct net_device *netdev, 596 struct cqe_send_t *cqe_tx) 597 { 598 struct nicvf *nic = netdev_priv(netdev); 599 struct skb_shared_hwtstamps ts; 600 u64 ns; 601 602 nic = nic->pnicvf; 603 604 /* Sync for 'ptp_skb' */ 605 smp_rmb(); 606 607 /* New timestamp request can be queued now */ 608 atomic_set(&nic->tx_ptp_skbs, 0); 609 610 /* Check for timestamp requested skb */ 611 if (!nic->ptp_skb) 612 return; 613 614 /* Check if timestamping is timedout, which is set to 10us */ 615 if (cqe_tx->send_status == CQ_TX_ERROP_TSTMP_TIMEOUT || 616 cqe_tx->send_status == CQ_TX_ERROP_TSTMP_CONFLICT) 617 goto no_tstamp; 618 619 /* Get the timestamp */ 620 memset(&ts, 0, sizeof(ts)); 621 ns = cavium_ptp_tstamp2time(nic->ptp_clock, cqe_tx->ptp_timestamp); 622 ts.hwtstamp = ns_to_ktime(ns); 623 skb_tstamp_tx(nic->ptp_skb, &ts); 624 625 no_tstamp: 626 /* Free the original skb */ 627 dev_kfree_skb_any(nic->ptp_skb); 628 nic->ptp_skb = NULL; 629 /* Sync 'ptp_skb' */ 630 smp_wmb(); 631 } 632 633 static void nicvf_snd_pkt_handler(struct net_device *netdev, 634 struct cqe_send_t *cqe_tx, 635 int budget, int *subdesc_cnt, 636 unsigned int *tx_pkts, unsigned int *tx_bytes) 637 { 638 struct sk_buff *skb = NULL; 639 struct page *page; 640 struct nicvf *nic = netdev_priv(netdev); 641 struct snd_queue *sq; 642 struct sq_hdr_subdesc *hdr; 643 struct sq_hdr_subdesc *tso_sqe; 644 645 sq = &nic->qs->sq[cqe_tx->sq_idx]; 646 647 hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, cqe_tx->sqe_ptr); 648 if (hdr->subdesc_type != SQ_DESC_TYPE_HEADER) 649 return; 650 651 /* Check for errors */ 652 if (cqe_tx->send_status) 653 nicvf_check_cqe_tx_errs(nic->pnicvf, cqe_tx); 654 655 /* Is this a XDP designated Tx queue */ 656 if (sq->is_xdp) { 657 page = (struct page *)sq->xdp_page[cqe_tx->sqe_ptr]; 658 /* Check if it's recycled page or else unmap DMA mapping */ 659 if (page && (page_ref_count(page) == 1)) 660 nicvf_unmap_sndq_buffers(nic, sq, cqe_tx->sqe_ptr, 661 hdr->subdesc_cnt); 662 663 /* Release page reference for recycling */ 664 if (page) 665 put_page(page); 666 sq->xdp_page[cqe_tx->sqe_ptr] = (u64)NULL; 667 *subdesc_cnt += hdr->subdesc_cnt + 1; 668 return; 669 } 670 671 skb = (struct sk_buff *)sq->skbuff[cqe_tx->sqe_ptr]; 672 if (skb) { 673 /* Check for dummy descriptor used for HW TSO offload on 88xx */ 674 if (hdr->dont_send) { 675 /* Get actual TSO descriptors and free them */ 676 tso_sqe = 677 (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, hdr->rsvd2); 678 nicvf_unmap_sndq_buffers(nic, sq, hdr->rsvd2, 679 tso_sqe->subdesc_cnt); 680 *subdesc_cnt += tso_sqe->subdesc_cnt + 1; 681 } else { 682 nicvf_unmap_sndq_buffers(nic, sq, cqe_tx->sqe_ptr, 683 hdr->subdesc_cnt); 684 } 685 *subdesc_cnt += hdr->subdesc_cnt + 1; 686 prefetch(skb); 687 (*tx_pkts)++; 688 *tx_bytes += skb->len; 689 /* If timestamp is requested for this skb, don't free it */ 690 if (skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS && 691 !nic->pnicvf->ptp_skb) 692 nic->pnicvf->ptp_skb = skb; 693 else 694 napi_consume_skb(skb, budget); 695 sq->skbuff[cqe_tx->sqe_ptr] = (u64)NULL; 696 } else { 697 /* In case of SW TSO on 88xx, only last segment will have 698 * a SKB attached, so just free SQEs here. 699 */ 700 if (!nic->hw_tso) 701 *subdesc_cnt += hdr->subdesc_cnt + 1; 702 } 703 } 704 705 static inline void nicvf_set_rxhash(struct net_device *netdev, 706 struct cqe_rx_t *cqe_rx, 707 struct sk_buff *skb) 708 { 709 u8 hash_type; 710 u32 hash; 711 712 if (!(netdev->features & NETIF_F_RXHASH)) 713 return; 714 715 switch (cqe_rx->rss_alg) { 716 case RSS_ALG_TCP_IP: 717 case RSS_ALG_UDP_IP: 718 hash_type = PKT_HASH_TYPE_L4; 719 hash = cqe_rx->rss_tag; 720 break; 721 case RSS_ALG_IP: 722 hash_type = PKT_HASH_TYPE_L3; 723 hash = cqe_rx->rss_tag; 724 break; 725 default: 726 hash_type = PKT_HASH_TYPE_NONE; 727 hash = 0; 728 } 729 730 skb_set_hash(skb, hash, hash_type); 731 } 732 733 static inline void nicvf_set_rxtstamp(struct nicvf *nic, struct sk_buff *skb) 734 { 735 u64 ns; 736 737 if (!nic->ptp_clock || !nic->hw_rx_tstamp) 738 return; 739 740 /* The first 8 bytes is the timestamp */ 741 ns = cavium_ptp_tstamp2time(nic->ptp_clock, 742 be64_to_cpu(*(__be64 *)skb->data)); 743 skb_hwtstamps(skb)->hwtstamp = ns_to_ktime(ns); 744 745 __skb_pull(skb, 8); 746 } 747 748 static void nicvf_rcv_pkt_handler(struct net_device *netdev, 749 struct napi_struct *napi, 750 struct cqe_rx_t *cqe_rx, 751 struct snd_queue *sq, struct rcv_queue *rq) 752 { 753 struct sk_buff *skb = NULL; 754 struct nicvf *nic = netdev_priv(netdev); 755 struct nicvf *snic = nic; 756 int err = 0; 757 int rq_idx; 758 759 rq_idx = nicvf_netdev_qidx(nic, cqe_rx->rq_idx); 760 761 if (nic->sqs_mode) { 762 /* Use primary VF's 'nicvf' struct */ 763 nic = nic->pnicvf; 764 netdev = nic->netdev; 765 } 766 767 /* Check for errors */ 768 if (cqe_rx->err_level || cqe_rx->err_opcode) { 769 err = nicvf_check_cqe_rx_errs(nic, cqe_rx); 770 if (err && !cqe_rx->rb_cnt) 771 return; 772 } 773 774 /* For XDP, ignore pkts spanning multiple pages */ 775 if (nic->xdp_prog && (cqe_rx->rb_cnt == 1)) { 776 /* Packet consumed by XDP */ 777 if (nicvf_xdp_rx(snic, nic->xdp_prog, cqe_rx, sq, rq, &skb)) 778 return; 779 } else { 780 skb = nicvf_get_rcv_skb(snic, cqe_rx, 781 nic->xdp_prog ? true : false); 782 } 783 784 if (!skb) 785 return; 786 787 if (netif_msg_pktdata(nic)) { 788 netdev_info(nic->netdev, "skb 0x%p, len=%d\n", skb, skb->len); 789 print_hex_dump(KERN_INFO, "", DUMP_PREFIX_OFFSET, 16, 1, 790 skb->data, skb->len, true); 791 } 792 793 /* If error packet, drop it here */ 794 if (err) { 795 dev_kfree_skb_any(skb); 796 return; 797 } 798 799 nicvf_set_rxtstamp(nic, skb); 800 nicvf_set_rxhash(netdev, cqe_rx, skb); 801 802 skb_record_rx_queue(skb, rq_idx); 803 if (netdev->hw_features & NETIF_F_RXCSUM) { 804 /* HW by default verifies TCP/UDP/SCTP checksums */ 805 skb->ip_summed = CHECKSUM_UNNECESSARY; 806 } else { 807 skb_checksum_none_assert(skb); 808 } 809 810 skb->protocol = eth_type_trans(skb, netdev); 811 812 /* Check for stripped VLAN */ 813 if (cqe_rx->vlan_found && cqe_rx->vlan_stripped) 814 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), 815 ntohs((__force __be16)cqe_rx->vlan_tci)); 816 817 if (napi && (netdev->features & NETIF_F_GRO)) 818 napi_gro_receive(napi, skb); 819 else 820 netif_receive_skb(skb); 821 } 822 823 static int nicvf_cq_intr_handler(struct net_device *netdev, u8 cq_idx, 824 struct napi_struct *napi, int budget) 825 { 826 int processed_cqe, work_done = 0, tx_done = 0; 827 int cqe_count, cqe_head; 828 int subdesc_cnt = 0; 829 struct nicvf *nic = netdev_priv(netdev); 830 struct queue_set *qs = nic->qs; 831 struct cmp_queue *cq = &qs->cq[cq_idx]; 832 struct cqe_rx_t *cq_desc; 833 struct netdev_queue *txq; 834 struct snd_queue *sq = &qs->sq[cq_idx]; 835 struct rcv_queue *rq = &qs->rq[cq_idx]; 836 unsigned int tx_pkts = 0, tx_bytes = 0, txq_idx; 837 838 spin_lock_bh(&cq->lock); 839 loop: 840 processed_cqe = 0; 841 /* Get no of valid CQ entries to process */ 842 cqe_count = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_STATUS, cq_idx); 843 cqe_count &= CQ_CQE_COUNT; 844 if (!cqe_count) 845 goto done; 846 847 /* Get head of the valid CQ entries */ 848 cqe_head = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_HEAD, cq_idx) >> 9; 849 cqe_head &= 0xFFFF; 850 851 while (processed_cqe < cqe_count) { 852 /* Get the CQ descriptor */ 853 cq_desc = (struct cqe_rx_t *)GET_CQ_DESC(cq, cqe_head); 854 cqe_head++; 855 cqe_head &= (cq->dmem.q_len - 1); 856 /* Initiate prefetch for next descriptor */ 857 prefetch((struct cqe_rx_t *)GET_CQ_DESC(cq, cqe_head)); 858 859 if ((work_done >= budget) && napi && 860 (cq_desc->cqe_type != CQE_TYPE_SEND)) { 861 break; 862 } 863 864 switch (cq_desc->cqe_type) { 865 case CQE_TYPE_RX: 866 nicvf_rcv_pkt_handler(netdev, napi, cq_desc, sq, rq); 867 work_done++; 868 break; 869 case CQE_TYPE_SEND: 870 nicvf_snd_pkt_handler(netdev, (void *)cq_desc, 871 budget, &subdesc_cnt, 872 &tx_pkts, &tx_bytes); 873 tx_done++; 874 break; 875 case CQE_TYPE_SEND_PTP: 876 nicvf_snd_ptp_handler(netdev, (void *)cq_desc); 877 break; 878 case CQE_TYPE_INVALID: 879 case CQE_TYPE_RX_SPLIT: 880 case CQE_TYPE_RX_TCP: 881 /* Ignore for now */ 882 break; 883 } 884 processed_cqe++; 885 } 886 887 /* Ring doorbell to inform H/W to reuse processed CQEs */ 888 nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_DOOR, 889 cq_idx, processed_cqe); 890 891 if ((work_done < budget) && napi) 892 goto loop; 893 894 done: 895 /* Update SQ's descriptor free count */ 896 if (subdesc_cnt) 897 nicvf_put_sq_desc(sq, subdesc_cnt); 898 899 txq_idx = nicvf_netdev_qidx(nic, cq_idx); 900 /* Handle XDP TX queues */ 901 if (nic->pnicvf->xdp_prog) { 902 if (txq_idx < nic->pnicvf->xdp_tx_queues) { 903 nicvf_xdp_sq_doorbell(nic, sq, cq_idx); 904 goto out; 905 } 906 nic = nic->pnicvf; 907 txq_idx -= nic->pnicvf->xdp_tx_queues; 908 } 909 910 /* Wakeup TXQ if its stopped earlier due to SQ full */ 911 if (tx_done || 912 (atomic_read(&sq->free_cnt) >= MIN_SQ_DESC_PER_PKT_XMIT)) { 913 netdev = nic->pnicvf->netdev; 914 txq = netdev_get_tx_queue(netdev, txq_idx); 915 if (tx_pkts) 916 netdev_tx_completed_queue(txq, tx_pkts, tx_bytes); 917 918 /* To read updated queue and carrier status */ 919 smp_mb(); 920 if (netif_tx_queue_stopped(txq) && netif_carrier_ok(netdev)) { 921 netif_tx_wake_queue(txq); 922 nic = nic->pnicvf; 923 this_cpu_inc(nic->drv_stats->txq_wake); 924 netif_warn(nic, tx_err, netdev, 925 "Transmit queue wakeup SQ%d\n", txq_idx); 926 } 927 } 928 929 out: 930 spin_unlock_bh(&cq->lock); 931 return work_done; 932 } 933 934 static int nicvf_poll(struct napi_struct *napi, int budget) 935 { 936 u64 cq_head; 937 int work_done = 0; 938 struct net_device *netdev = napi->dev; 939 struct nicvf *nic = netdev_priv(netdev); 940 struct nicvf_cq_poll *cq; 941 942 cq = container_of(napi, struct nicvf_cq_poll, napi); 943 work_done = nicvf_cq_intr_handler(netdev, cq->cq_idx, napi, budget); 944 945 if (work_done < budget) { 946 /* Slow packet rate, exit polling */ 947 napi_complete_done(napi, work_done); 948 /* Re-enable interrupts */ 949 cq_head = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_HEAD, 950 cq->cq_idx); 951 nicvf_clear_intr(nic, NICVF_INTR_CQ, cq->cq_idx); 952 nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_HEAD, 953 cq->cq_idx, cq_head); 954 nicvf_enable_intr(nic, NICVF_INTR_CQ, cq->cq_idx); 955 } 956 return work_done; 957 } 958 959 /* Qset error interrupt handler 960 * 961 * As of now only CQ errors are handled 962 */ 963 static void nicvf_handle_qs_err(unsigned long data) 964 { 965 struct nicvf *nic = (struct nicvf *)data; 966 struct queue_set *qs = nic->qs; 967 int qidx; 968 u64 status; 969 970 netif_tx_disable(nic->netdev); 971 972 /* Check if it is CQ err */ 973 for (qidx = 0; qidx < qs->cq_cnt; qidx++) { 974 status = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_STATUS, 975 qidx); 976 if (!(status & CQ_ERR_MASK)) 977 continue; 978 /* Process already queued CQEs and reconfig CQ */ 979 nicvf_disable_intr(nic, NICVF_INTR_CQ, qidx); 980 nicvf_sq_disable(nic, qidx); 981 nicvf_cq_intr_handler(nic->netdev, qidx, NULL, 0); 982 nicvf_cmp_queue_config(nic, qs, qidx, true); 983 nicvf_sq_free_used_descs(nic->netdev, &qs->sq[qidx], qidx); 984 nicvf_sq_enable(nic, &qs->sq[qidx], qidx); 985 986 nicvf_enable_intr(nic, NICVF_INTR_CQ, qidx); 987 } 988 989 netif_tx_start_all_queues(nic->netdev); 990 /* Re-enable Qset error interrupt */ 991 nicvf_enable_intr(nic, NICVF_INTR_QS_ERR, 0); 992 } 993 994 static void nicvf_dump_intr_status(struct nicvf *nic) 995 { 996 netif_info(nic, intr, nic->netdev, "interrupt status 0x%llx\n", 997 nicvf_reg_read(nic, NIC_VF_INT)); 998 } 999 1000 static irqreturn_t nicvf_misc_intr_handler(int irq, void *nicvf_irq) 1001 { 1002 struct nicvf *nic = (struct nicvf *)nicvf_irq; 1003 u64 intr; 1004 1005 nicvf_dump_intr_status(nic); 1006 1007 intr = nicvf_reg_read(nic, NIC_VF_INT); 1008 /* Check for spurious interrupt */ 1009 if (!(intr & NICVF_INTR_MBOX_MASK)) 1010 return IRQ_HANDLED; 1011 1012 nicvf_handle_mbx_intr(nic); 1013 1014 return IRQ_HANDLED; 1015 } 1016 1017 static irqreturn_t nicvf_intr_handler(int irq, void *cq_irq) 1018 { 1019 struct nicvf_cq_poll *cq_poll = (struct nicvf_cq_poll *)cq_irq; 1020 struct nicvf *nic = cq_poll->nicvf; 1021 int qidx = cq_poll->cq_idx; 1022 1023 nicvf_dump_intr_status(nic); 1024 1025 /* Disable interrupts */ 1026 nicvf_disable_intr(nic, NICVF_INTR_CQ, qidx); 1027 1028 /* Schedule NAPI */ 1029 napi_schedule_irqoff(&cq_poll->napi); 1030 1031 /* Clear interrupt */ 1032 nicvf_clear_intr(nic, NICVF_INTR_CQ, qidx); 1033 1034 return IRQ_HANDLED; 1035 } 1036 1037 static irqreturn_t nicvf_rbdr_intr_handler(int irq, void *nicvf_irq) 1038 { 1039 struct nicvf *nic = (struct nicvf *)nicvf_irq; 1040 u8 qidx; 1041 1042 1043 nicvf_dump_intr_status(nic); 1044 1045 /* Disable RBDR interrupt and schedule softirq */ 1046 for (qidx = 0; qidx < nic->qs->rbdr_cnt; qidx++) { 1047 if (!nicvf_is_intr_enabled(nic, NICVF_INTR_RBDR, qidx)) 1048 continue; 1049 nicvf_disable_intr(nic, NICVF_INTR_RBDR, qidx); 1050 tasklet_hi_schedule(&nic->rbdr_task); 1051 /* Clear interrupt */ 1052 nicvf_clear_intr(nic, NICVF_INTR_RBDR, qidx); 1053 } 1054 1055 return IRQ_HANDLED; 1056 } 1057 1058 static irqreturn_t nicvf_qs_err_intr_handler(int irq, void *nicvf_irq) 1059 { 1060 struct nicvf *nic = (struct nicvf *)nicvf_irq; 1061 1062 nicvf_dump_intr_status(nic); 1063 1064 /* Disable Qset err interrupt and schedule softirq */ 1065 nicvf_disable_intr(nic, NICVF_INTR_QS_ERR, 0); 1066 tasklet_hi_schedule(&nic->qs_err_task); 1067 nicvf_clear_intr(nic, NICVF_INTR_QS_ERR, 0); 1068 1069 return IRQ_HANDLED; 1070 } 1071 1072 static void nicvf_set_irq_affinity(struct nicvf *nic) 1073 { 1074 int vec, cpu; 1075 1076 for (vec = 0; vec < nic->num_vec; vec++) { 1077 if (!nic->irq_allocated[vec]) 1078 continue; 1079 1080 if (!zalloc_cpumask_var(&nic->affinity_mask[vec], GFP_KERNEL)) 1081 return; 1082 /* CQ interrupts */ 1083 if (vec < NICVF_INTR_ID_SQ) 1084 /* Leave CPU0 for RBDR and other interrupts */ 1085 cpu = nicvf_netdev_qidx(nic, vec) + 1; 1086 else 1087 cpu = 0; 1088 1089 cpumask_set_cpu(cpumask_local_spread(cpu, nic->node), 1090 nic->affinity_mask[vec]); 1091 irq_set_affinity_hint(pci_irq_vector(nic->pdev, vec), 1092 nic->affinity_mask[vec]); 1093 } 1094 } 1095 1096 static int nicvf_register_interrupts(struct nicvf *nic) 1097 { 1098 int irq, ret = 0; 1099 1100 for_each_cq_irq(irq) 1101 sprintf(nic->irq_name[irq], "%s-rxtx-%d", 1102 nic->pnicvf->netdev->name, 1103 nicvf_netdev_qidx(nic, irq)); 1104 1105 for_each_sq_irq(irq) 1106 sprintf(nic->irq_name[irq], "%s-sq-%d", 1107 nic->pnicvf->netdev->name, 1108 nicvf_netdev_qidx(nic, irq - NICVF_INTR_ID_SQ)); 1109 1110 for_each_rbdr_irq(irq) 1111 sprintf(nic->irq_name[irq], "%s-rbdr-%d", 1112 nic->pnicvf->netdev->name, 1113 nic->sqs_mode ? (nic->sqs_id + 1) : 0); 1114 1115 /* Register CQ interrupts */ 1116 for (irq = 0; irq < nic->qs->cq_cnt; irq++) { 1117 ret = request_irq(pci_irq_vector(nic->pdev, irq), 1118 nicvf_intr_handler, 1119 0, nic->irq_name[irq], nic->napi[irq]); 1120 if (ret) 1121 goto err; 1122 nic->irq_allocated[irq] = true; 1123 } 1124 1125 /* Register RBDR interrupt */ 1126 for (irq = NICVF_INTR_ID_RBDR; 1127 irq < (NICVF_INTR_ID_RBDR + nic->qs->rbdr_cnt); irq++) { 1128 ret = request_irq(pci_irq_vector(nic->pdev, irq), 1129 nicvf_rbdr_intr_handler, 1130 0, nic->irq_name[irq], nic); 1131 if (ret) 1132 goto err; 1133 nic->irq_allocated[irq] = true; 1134 } 1135 1136 /* Register QS error interrupt */ 1137 sprintf(nic->irq_name[NICVF_INTR_ID_QS_ERR], "%s-qset-err-%d", 1138 nic->pnicvf->netdev->name, 1139 nic->sqs_mode ? (nic->sqs_id + 1) : 0); 1140 irq = NICVF_INTR_ID_QS_ERR; 1141 ret = request_irq(pci_irq_vector(nic->pdev, irq), 1142 nicvf_qs_err_intr_handler, 1143 0, nic->irq_name[irq], nic); 1144 if (ret) 1145 goto err; 1146 1147 nic->irq_allocated[irq] = true; 1148 1149 /* Set IRQ affinities */ 1150 nicvf_set_irq_affinity(nic); 1151 1152 err: 1153 if (ret) 1154 netdev_err(nic->netdev, "request_irq failed, vector %d\n", irq); 1155 1156 return ret; 1157 } 1158 1159 static void nicvf_unregister_interrupts(struct nicvf *nic) 1160 { 1161 struct pci_dev *pdev = nic->pdev; 1162 int irq; 1163 1164 /* Free registered interrupts */ 1165 for (irq = 0; irq < nic->num_vec; irq++) { 1166 if (!nic->irq_allocated[irq]) 1167 continue; 1168 1169 irq_set_affinity_hint(pci_irq_vector(pdev, irq), NULL); 1170 free_cpumask_var(nic->affinity_mask[irq]); 1171 1172 if (irq < NICVF_INTR_ID_SQ) 1173 free_irq(pci_irq_vector(pdev, irq), nic->napi[irq]); 1174 else 1175 free_irq(pci_irq_vector(pdev, irq), nic); 1176 1177 nic->irq_allocated[irq] = false; 1178 } 1179 1180 /* Disable MSI-X */ 1181 pci_free_irq_vectors(pdev); 1182 nic->num_vec = 0; 1183 } 1184 1185 /* Initialize MSIX vectors and register MISC interrupt. 1186 * Send READY message to PF to check if its alive 1187 */ 1188 static int nicvf_register_misc_interrupt(struct nicvf *nic) 1189 { 1190 int ret = 0; 1191 int irq = NICVF_INTR_ID_MISC; 1192 1193 /* Return if mailbox interrupt is already registered */ 1194 if (nic->pdev->msix_enabled) 1195 return 0; 1196 1197 /* Enable MSI-X */ 1198 nic->num_vec = pci_msix_vec_count(nic->pdev); 1199 ret = pci_alloc_irq_vectors(nic->pdev, nic->num_vec, nic->num_vec, 1200 PCI_IRQ_MSIX); 1201 if (ret < 0) { 1202 netdev_err(nic->netdev, 1203 "Req for #%d msix vectors failed\n", nic->num_vec); 1204 return 1; 1205 } 1206 1207 sprintf(nic->irq_name[irq], "%s Mbox", "NICVF"); 1208 /* Register Misc interrupt */ 1209 ret = request_irq(pci_irq_vector(nic->pdev, irq), 1210 nicvf_misc_intr_handler, 0, nic->irq_name[irq], nic); 1211 1212 if (ret) 1213 return ret; 1214 nic->irq_allocated[irq] = true; 1215 1216 /* Enable mailbox interrupt */ 1217 nicvf_enable_intr(nic, NICVF_INTR_MBOX, 0); 1218 1219 /* Check if VF is able to communicate with PF */ 1220 if (!nicvf_check_pf_ready(nic)) { 1221 nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0); 1222 nicvf_unregister_interrupts(nic); 1223 return 1; 1224 } 1225 1226 return 0; 1227 } 1228 1229 static netdev_tx_t nicvf_xmit(struct sk_buff *skb, struct net_device *netdev) 1230 { 1231 struct nicvf *nic = netdev_priv(netdev); 1232 int qid = skb_get_queue_mapping(skb); 1233 struct netdev_queue *txq = netdev_get_tx_queue(netdev, qid); 1234 struct nicvf *snic; 1235 struct snd_queue *sq; 1236 int tmp; 1237 1238 /* Check for minimum packet length */ 1239 if (skb->len <= ETH_HLEN) { 1240 dev_kfree_skb(skb); 1241 return NETDEV_TX_OK; 1242 } 1243 1244 /* In XDP case, initial HW tx queues are used for XDP, 1245 * but stack's queue mapping starts at '0', so skip the 1246 * Tx queues attached to Rx queues for XDP. 1247 */ 1248 if (nic->xdp_prog) 1249 qid += nic->xdp_tx_queues; 1250 1251 snic = nic; 1252 /* Get secondary Qset's SQ structure */ 1253 if (qid >= MAX_SND_QUEUES_PER_QS) { 1254 tmp = qid / MAX_SND_QUEUES_PER_QS; 1255 snic = (struct nicvf *)nic->snicvf[tmp - 1]; 1256 if (!snic) { 1257 netdev_warn(nic->netdev, 1258 "Secondary Qset#%d's ptr not initialized\n", 1259 tmp - 1); 1260 dev_kfree_skb(skb); 1261 return NETDEV_TX_OK; 1262 } 1263 qid = qid % MAX_SND_QUEUES_PER_QS; 1264 } 1265 1266 sq = &snic->qs->sq[qid]; 1267 if (!netif_tx_queue_stopped(txq) && 1268 !nicvf_sq_append_skb(snic, sq, skb, qid)) { 1269 netif_tx_stop_queue(txq); 1270 1271 /* Barrier, so that stop_queue visible to other cpus */ 1272 smp_mb(); 1273 1274 /* Check again, incase another cpu freed descriptors */ 1275 if (atomic_read(&sq->free_cnt) > MIN_SQ_DESC_PER_PKT_XMIT) { 1276 netif_tx_wake_queue(txq); 1277 } else { 1278 this_cpu_inc(nic->drv_stats->txq_stop); 1279 netif_warn(nic, tx_err, netdev, 1280 "Transmit ring full, stopping SQ%d\n", qid); 1281 } 1282 return NETDEV_TX_BUSY; 1283 } 1284 1285 return NETDEV_TX_OK; 1286 } 1287 1288 static inline void nicvf_free_cq_poll(struct nicvf *nic) 1289 { 1290 struct nicvf_cq_poll *cq_poll; 1291 int qidx; 1292 1293 for (qidx = 0; qidx < nic->qs->cq_cnt; qidx++) { 1294 cq_poll = nic->napi[qidx]; 1295 if (!cq_poll) 1296 continue; 1297 nic->napi[qidx] = NULL; 1298 kfree(cq_poll); 1299 } 1300 } 1301 1302 int nicvf_stop(struct net_device *netdev) 1303 { 1304 int irq, qidx; 1305 struct nicvf *nic = netdev_priv(netdev); 1306 struct queue_set *qs = nic->qs; 1307 struct nicvf_cq_poll *cq_poll = NULL; 1308 union nic_mbx mbx = {}; 1309 1310 mbx.msg.msg = NIC_MBOX_MSG_SHUTDOWN; 1311 nicvf_send_msg_to_pf(nic, &mbx); 1312 1313 netif_carrier_off(netdev); 1314 netif_tx_stop_all_queues(nic->netdev); 1315 nic->link_up = false; 1316 1317 /* Teardown secondary qsets first */ 1318 if (!nic->sqs_mode) { 1319 for (qidx = 0; qidx < nic->sqs_count; qidx++) { 1320 if (!nic->snicvf[qidx]) 1321 continue; 1322 nicvf_stop(nic->snicvf[qidx]->netdev); 1323 nic->snicvf[qidx] = NULL; 1324 } 1325 } 1326 1327 /* Disable RBDR & QS error interrupts */ 1328 for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) { 1329 nicvf_disable_intr(nic, NICVF_INTR_RBDR, qidx); 1330 nicvf_clear_intr(nic, NICVF_INTR_RBDR, qidx); 1331 } 1332 nicvf_disable_intr(nic, NICVF_INTR_QS_ERR, 0); 1333 nicvf_clear_intr(nic, NICVF_INTR_QS_ERR, 0); 1334 1335 /* Wait for pending IRQ handlers to finish */ 1336 for (irq = 0; irq < nic->num_vec; irq++) 1337 synchronize_irq(pci_irq_vector(nic->pdev, irq)); 1338 1339 tasklet_kill(&nic->rbdr_task); 1340 tasklet_kill(&nic->qs_err_task); 1341 if (nic->rb_work_scheduled) 1342 cancel_delayed_work_sync(&nic->rbdr_work); 1343 1344 for (qidx = 0; qidx < nic->qs->cq_cnt; qidx++) { 1345 cq_poll = nic->napi[qidx]; 1346 if (!cq_poll) 1347 continue; 1348 napi_synchronize(&cq_poll->napi); 1349 /* CQ intr is enabled while napi_complete, 1350 * so disable it now 1351 */ 1352 nicvf_disable_intr(nic, NICVF_INTR_CQ, qidx); 1353 nicvf_clear_intr(nic, NICVF_INTR_CQ, qidx); 1354 napi_disable(&cq_poll->napi); 1355 netif_napi_del(&cq_poll->napi); 1356 } 1357 1358 netif_tx_disable(netdev); 1359 1360 for (qidx = 0; qidx < netdev->num_tx_queues; qidx++) 1361 netdev_tx_reset_queue(netdev_get_tx_queue(netdev, qidx)); 1362 1363 /* Free resources */ 1364 nicvf_config_data_transfer(nic, false); 1365 1366 /* Disable HW Qset */ 1367 nicvf_qset_config(nic, false); 1368 1369 /* disable mailbox interrupt */ 1370 nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0); 1371 1372 nicvf_unregister_interrupts(nic); 1373 1374 nicvf_free_cq_poll(nic); 1375 1376 /* Free any pending SKB saved to receive timestamp */ 1377 if (nic->ptp_skb) { 1378 dev_kfree_skb_any(nic->ptp_skb); 1379 nic->ptp_skb = NULL; 1380 } 1381 1382 /* Clear multiqset info */ 1383 nic->pnicvf = nic; 1384 1385 return 0; 1386 } 1387 1388 static int nicvf_config_hw_rx_tstamp(struct nicvf *nic, bool enable) 1389 { 1390 union nic_mbx mbx = {}; 1391 1392 mbx.ptp.msg = NIC_MBOX_MSG_PTP_CFG; 1393 mbx.ptp.enable = enable; 1394 1395 return nicvf_send_msg_to_pf(nic, &mbx); 1396 } 1397 1398 static int nicvf_update_hw_max_frs(struct nicvf *nic, int mtu) 1399 { 1400 union nic_mbx mbx = {}; 1401 1402 mbx.frs.msg = NIC_MBOX_MSG_SET_MAX_FRS; 1403 mbx.frs.max_frs = mtu; 1404 mbx.frs.vf_id = nic->vf_id; 1405 1406 return nicvf_send_msg_to_pf(nic, &mbx); 1407 } 1408 1409 int nicvf_open(struct net_device *netdev) 1410 { 1411 int cpu, err, qidx; 1412 struct nicvf *nic = netdev_priv(netdev); 1413 struct queue_set *qs = nic->qs; 1414 struct nicvf_cq_poll *cq_poll = NULL; 1415 union nic_mbx mbx = {}; 1416 1417 netif_carrier_off(netdev); 1418 1419 err = nicvf_register_misc_interrupt(nic); 1420 if (err) 1421 return err; 1422 1423 /* Register NAPI handler for processing CQEs */ 1424 for (qidx = 0; qidx < qs->cq_cnt; qidx++) { 1425 cq_poll = kzalloc(sizeof(*cq_poll), GFP_KERNEL); 1426 if (!cq_poll) { 1427 err = -ENOMEM; 1428 goto napi_del; 1429 } 1430 cq_poll->cq_idx = qidx; 1431 cq_poll->nicvf = nic; 1432 netif_napi_add(netdev, &cq_poll->napi, nicvf_poll, 1433 NAPI_POLL_WEIGHT); 1434 napi_enable(&cq_poll->napi); 1435 nic->napi[qidx] = cq_poll; 1436 } 1437 1438 /* Check if we got MAC address from PF or else generate a radom MAC */ 1439 if (!nic->sqs_mode && is_zero_ether_addr(netdev->dev_addr)) { 1440 eth_hw_addr_random(netdev); 1441 nicvf_hw_set_mac_addr(nic, netdev); 1442 } 1443 1444 if (nic->set_mac_pending) { 1445 nic->set_mac_pending = false; 1446 nicvf_hw_set_mac_addr(nic, netdev); 1447 } 1448 1449 /* Init tasklet for handling Qset err interrupt */ 1450 tasklet_init(&nic->qs_err_task, nicvf_handle_qs_err, 1451 (unsigned long)nic); 1452 1453 /* Init RBDR tasklet which will refill RBDR */ 1454 tasklet_init(&nic->rbdr_task, nicvf_rbdr_task, 1455 (unsigned long)nic); 1456 INIT_DELAYED_WORK(&nic->rbdr_work, nicvf_rbdr_work); 1457 1458 /* Configure CPI alorithm */ 1459 nic->cpi_alg = cpi_alg; 1460 if (!nic->sqs_mode) 1461 nicvf_config_cpi(nic); 1462 1463 nicvf_request_sqs(nic); 1464 if (nic->sqs_mode) 1465 nicvf_get_primary_vf_struct(nic); 1466 1467 /* Configure PTP timestamp */ 1468 if (nic->ptp_clock) 1469 nicvf_config_hw_rx_tstamp(nic, nic->hw_rx_tstamp); 1470 atomic_set(&nic->tx_ptp_skbs, 0); 1471 nic->ptp_skb = NULL; 1472 1473 /* Configure receive side scaling and MTU */ 1474 if (!nic->sqs_mode) { 1475 nicvf_rss_init(nic); 1476 err = nicvf_update_hw_max_frs(nic, netdev->mtu); 1477 if (err) 1478 goto cleanup; 1479 1480 /* Clear percpu stats */ 1481 for_each_possible_cpu(cpu) 1482 memset(per_cpu_ptr(nic->drv_stats, cpu), 0, 1483 sizeof(struct nicvf_drv_stats)); 1484 } 1485 1486 err = nicvf_register_interrupts(nic); 1487 if (err) 1488 goto cleanup; 1489 1490 /* Initialize the queues */ 1491 err = nicvf_init_resources(nic); 1492 if (err) 1493 goto cleanup; 1494 1495 /* Make sure queue initialization is written */ 1496 wmb(); 1497 1498 nicvf_reg_write(nic, NIC_VF_INT, -1); 1499 /* Enable Qset err interrupt */ 1500 nicvf_enable_intr(nic, NICVF_INTR_QS_ERR, 0); 1501 1502 /* Enable completion queue interrupt */ 1503 for (qidx = 0; qidx < qs->cq_cnt; qidx++) 1504 nicvf_enable_intr(nic, NICVF_INTR_CQ, qidx); 1505 1506 /* Enable RBDR threshold interrupt */ 1507 for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) 1508 nicvf_enable_intr(nic, NICVF_INTR_RBDR, qidx); 1509 1510 /* Send VF config done msg to PF */ 1511 mbx.msg.msg = NIC_MBOX_MSG_CFG_DONE; 1512 nicvf_write_to_mbx(nic, &mbx); 1513 1514 return 0; 1515 cleanup: 1516 nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0); 1517 nicvf_unregister_interrupts(nic); 1518 tasklet_kill(&nic->qs_err_task); 1519 tasklet_kill(&nic->rbdr_task); 1520 napi_del: 1521 for (qidx = 0; qidx < qs->cq_cnt; qidx++) { 1522 cq_poll = nic->napi[qidx]; 1523 if (!cq_poll) 1524 continue; 1525 napi_disable(&cq_poll->napi); 1526 netif_napi_del(&cq_poll->napi); 1527 } 1528 nicvf_free_cq_poll(nic); 1529 return err; 1530 } 1531 1532 static int nicvf_change_mtu(struct net_device *netdev, int new_mtu) 1533 { 1534 struct nicvf *nic = netdev_priv(netdev); 1535 int orig_mtu = netdev->mtu; 1536 1537 netdev->mtu = new_mtu; 1538 1539 if (!netif_running(netdev)) 1540 return 0; 1541 1542 if (nicvf_update_hw_max_frs(nic, new_mtu)) { 1543 netdev->mtu = orig_mtu; 1544 return -EINVAL; 1545 } 1546 1547 return 0; 1548 } 1549 1550 static int nicvf_set_mac_address(struct net_device *netdev, void *p) 1551 { 1552 struct sockaddr *addr = p; 1553 struct nicvf *nic = netdev_priv(netdev); 1554 1555 if (!is_valid_ether_addr(addr->sa_data)) 1556 return -EADDRNOTAVAIL; 1557 1558 memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); 1559 1560 if (nic->pdev->msix_enabled) { 1561 if (nicvf_hw_set_mac_addr(nic, netdev)) 1562 return -EBUSY; 1563 } else { 1564 nic->set_mac_pending = true; 1565 } 1566 1567 return 0; 1568 } 1569 1570 void nicvf_update_lmac_stats(struct nicvf *nic) 1571 { 1572 int stat = 0; 1573 union nic_mbx mbx = {}; 1574 1575 if (!netif_running(nic->netdev)) 1576 return; 1577 1578 mbx.bgx_stats.msg = NIC_MBOX_MSG_BGX_STATS; 1579 mbx.bgx_stats.vf_id = nic->vf_id; 1580 /* Rx stats */ 1581 mbx.bgx_stats.rx = 1; 1582 while (stat < BGX_RX_STATS_COUNT) { 1583 mbx.bgx_stats.idx = stat; 1584 if (nicvf_send_msg_to_pf(nic, &mbx)) 1585 return; 1586 stat++; 1587 } 1588 1589 stat = 0; 1590 1591 /* Tx stats */ 1592 mbx.bgx_stats.rx = 0; 1593 while (stat < BGX_TX_STATS_COUNT) { 1594 mbx.bgx_stats.idx = stat; 1595 if (nicvf_send_msg_to_pf(nic, &mbx)) 1596 return; 1597 stat++; 1598 } 1599 } 1600 1601 void nicvf_update_stats(struct nicvf *nic) 1602 { 1603 int qidx, cpu; 1604 u64 tmp_stats = 0; 1605 struct nicvf_hw_stats *stats = &nic->hw_stats; 1606 struct nicvf_drv_stats *drv_stats; 1607 struct queue_set *qs = nic->qs; 1608 1609 #define GET_RX_STATS(reg) \ 1610 nicvf_reg_read(nic, NIC_VNIC_RX_STAT_0_13 | (reg << 3)) 1611 #define GET_TX_STATS(reg) \ 1612 nicvf_reg_read(nic, NIC_VNIC_TX_STAT_0_4 | (reg << 3)) 1613 1614 stats->rx_bytes = GET_RX_STATS(RX_OCTS); 1615 stats->rx_ucast_frames = GET_RX_STATS(RX_UCAST); 1616 stats->rx_bcast_frames = GET_RX_STATS(RX_BCAST); 1617 stats->rx_mcast_frames = GET_RX_STATS(RX_MCAST); 1618 stats->rx_fcs_errors = GET_RX_STATS(RX_FCS); 1619 stats->rx_l2_errors = GET_RX_STATS(RX_L2ERR); 1620 stats->rx_drop_red = GET_RX_STATS(RX_RED); 1621 stats->rx_drop_red_bytes = GET_RX_STATS(RX_RED_OCTS); 1622 stats->rx_drop_overrun = GET_RX_STATS(RX_ORUN); 1623 stats->rx_drop_overrun_bytes = GET_RX_STATS(RX_ORUN_OCTS); 1624 stats->rx_drop_bcast = GET_RX_STATS(RX_DRP_BCAST); 1625 stats->rx_drop_mcast = GET_RX_STATS(RX_DRP_MCAST); 1626 stats->rx_drop_l3_bcast = GET_RX_STATS(RX_DRP_L3BCAST); 1627 stats->rx_drop_l3_mcast = GET_RX_STATS(RX_DRP_L3MCAST); 1628 1629 stats->tx_bytes = GET_TX_STATS(TX_OCTS); 1630 stats->tx_ucast_frames = GET_TX_STATS(TX_UCAST); 1631 stats->tx_bcast_frames = GET_TX_STATS(TX_BCAST); 1632 stats->tx_mcast_frames = GET_TX_STATS(TX_MCAST); 1633 stats->tx_drops = GET_TX_STATS(TX_DROP); 1634 1635 /* On T88 pass 2.0, the dummy SQE added for TSO notification 1636 * via CQE has 'dont_send' set. Hence HW drops the pkt pointed 1637 * pointed by dummy SQE and results in tx_drops counter being 1638 * incremented. Subtracting it from tx_tso counter will give 1639 * exact tx_drops counter. 1640 */ 1641 if (nic->t88 && nic->hw_tso) { 1642 for_each_possible_cpu(cpu) { 1643 drv_stats = per_cpu_ptr(nic->drv_stats, cpu); 1644 tmp_stats += drv_stats->tx_tso; 1645 } 1646 stats->tx_drops = tmp_stats - stats->tx_drops; 1647 } 1648 stats->tx_frames = stats->tx_ucast_frames + 1649 stats->tx_bcast_frames + 1650 stats->tx_mcast_frames; 1651 stats->rx_frames = stats->rx_ucast_frames + 1652 stats->rx_bcast_frames + 1653 stats->rx_mcast_frames; 1654 stats->rx_drops = stats->rx_drop_red + 1655 stats->rx_drop_overrun; 1656 1657 /* Update RQ and SQ stats */ 1658 for (qidx = 0; qidx < qs->rq_cnt; qidx++) 1659 nicvf_update_rq_stats(nic, qidx); 1660 for (qidx = 0; qidx < qs->sq_cnt; qidx++) 1661 nicvf_update_sq_stats(nic, qidx); 1662 } 1663 1664 static void nicvf_get_stats64(struct net_device *netdev, 1665 struct rtnl_link_stats64 *stats) 1666 { 1667 struct nicvf *nic = netdev_priv(netdev); 1668 struct nicvf_hw_stats *hw_stats = &nic->hw_stats; 1669 1670 nicvf_update_stats(nic); 1671 1672 stats->rx_bytes = hw_stats->rx_bytes; 1673 stats->rx_packets = hw_stats->rx_frames; 1674 stats->rx_dropped = hw_stats->rx_drops; 1675 stats->multicast = hw_stats->rx_mcast_frames; 1676 1677 stats->tx_bytes = hw_stats->tx_bytes; 1678 stats->tx_packets = hw_stats->tx_frames; 1679 stats->tx_dropped = hw_stats->tx_drops; 1680 1681 } 1682 1683 static void nicvf_tx_timeout(struct net_device *dev) 1684 { 1685 struct nicvf *nic = netdev_priv(dev); 1686 1687 netif_warn(nic, tx_err, dev, "Transmit timed out, resetting\n"); 1688 1689 this_cpu_inc(nic->drv_stats->tx_timeout); 1690 schedule_work(&nic->reset_task); 1691 } 1692 1693 static void nicvf_reset_task(struct work_struct *work) 1694 { 1695 struct nicvf *nic; 1696 1697 nic = container_of(work, struct nicvf, reset_task); 1698 1699 if (!netif_running(nic->netdev)) 1700 return; 1701 1702 nicvf_stop(nic->netdev); 1703 nicvf_open(nic->netdev); 1704 netif_trans_update(nic->netdev); 1705 } 1706 1707 static int nicvf_config_loopback(struct nicvf *nic, 1708 netdev_features_t features) 1709 { 1710 union nic_mbx mbx = {}; 1711 1712 mbx.lbk.msg = NIC_MBOX_MSG_LOOPBACK; 1713 mbx.lbk.vf_id = nic->vf_id; 1714 mbx.lbk.enable = (features & NETIF_F_LOOPBACK) != 0; 1715 1716 return nicvf_send_msg_to_pf(nic, &mbx); 1717 } 1718 1719 static netdev_features_t nicvf_fix_features(struct net_device *netdev, 1720 netdev_features_t features) 1721 { 1722 struct nicvf *nic = netdev_priv(netdev); 1723 1724 if ((features & NETIF_F_LOOPBACK) && 1725 netif_running(netdev) && !nic->loopback_supported) 1726 features &= ~NETIF_F_LOOPBACK; 1727 1728 return features; 1729 } 1730 1731 static int nicvf_set_features(struct net_device *netdev, 1732 netdev_features_t features) 1733 { 1734 struct nicvf *nic = netdev_priv(netdev); 1735 netdev_features_t changed = features ^ netdev->features; 1736 1737 if (changed & NETIF_F_HW_VLAN_CTAG_RX) 1738 nicvf_config_vlan_stripping(nic, features); 1739 1740 if ((changed & NETIF_F_LOOPBACK) && netif_running(netdev)) 1741 return nicvf_config_loopback(nic, features); 1742 1743 return 0; 1744 } 1745 1746 static void nicvf_set_xdp_queues(struct nicvf *nic, bool bpf_attached) 1747 { 1748 u8 cq_count, txq_count; 1749 1750 /* Set XDP Tx queue count same as Rx queue count */ 1751 if (!bpf_attached) 1752 nic->xdp_tx_queues = 0; 1753 else 1754 nic->xdp_tx_queues = nic->rx_queues; 1755 1756 /* If queue count > MAX_CMP_QUEUES_PER_QS, then additional qsets 1757 * needs to be allocated, check how many. 1758 */ 1759 txq_count = nic->xdp_tx_queues + nic->tx_queues; 1760 cq_count = max(nic->rx_queues, txq_count); 1761 if (cq_count > MAX_CMP_QUEUES_PER_QS) { 1762 nic->sqs_count = roundup(cq_count, MAX_CMP_QUEUES_PER_QS); 1763 nic->sqs_count = (nic->sqs_count / MAX_CMP_QUEUES_PER_QS) - 1; 1764 } else { 1765 nic->sqs_count = 0; 1766 } 1767 1768 /* Set primary Qset's resources */ 1769 nic->qs->rq_cnt = min_t(u8, nic->rx_queues, MAX_RCV_QUEUES_PER_QS); 1770 nic->qs->sq_cnt = min_t(u8, txq_count, MAX_SND_QUEUES_PER_QS); 1771 nic->qs->cq_cnt = max_t(u8, nic->qs->rq_cnt, nic->qs->sq_cnt); 1772 1773 /* Update stack */ 1774 nicvf_set_real_num_queues(nic->netdev, nic->tx_queues, nic->rx_queues); 1775 } 1776 1777 static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog) 1778 { 1779 struct net_device *dev = nic->netdev; 1780 bool if_up = netif_running(nic->netdev); 1781 struct bpf_prog *old_prog; 1782 bool bpf_attached = false; 1783 1784 /* For now just support only the usual MTU sized frames */ 1785 if (prog && (dev->mtu > 1500)) { 1786 netdev_warn(dev, "Jumbo frames not yet supported with XDP, current MTU %d.\n", 1787 dev->mtu); 1788 return -EOPNOTSUPP; 1789 } 1790 1791 /* ALL SQs attached to CQs i.e same as RQs, are treated as 1792 * XDP Tx queues and more Tx queues are allocated for 1793 * network stack to send pkts out. 1794 * 1795 * No of Tx queues are either same as Rx queues or whatever 1796 * is left in max no of queues possible. 1797 */ 1798 if ((nic->rx_queues + nic->tx_queues) > nic->max_queues) { 1799 netdev_warn(dev, 1800 "Failed to attach BPF prog, RXQs + TXQs > Max %d\n", 1801 nic->max_queues); 1802 return -ENOMEM; 1803 } 1804 1805 if (if_up) 1806 nicvf_stop(nic->netdev); 1807 1808 old_prog = xchg(&nic->xdp_prog, prog); 1809 /* Detach old prog, if any */ 1810 if (old_prog) 1811 bpf_prog_put(old_prog); 1812 1813 if (nic->xdp_prog) { 1814 /* Attach BPF program */ 1815 nic->xdp_prog = bpf_prog_add(nic->xdp_prog, nic->rx_queues - 1); 1816 if (!IS_ERR(nic->xdp_prog)) 1817 bpf_attached = true; 1818 } 1819 1820 /* Calculate Tx queues needed for XDP and network stack */ 1821 nicvf_set_xdp_queues(nic, bpf_attached); 1822 1823 if (if_up) { 1824 /* Reinitialize interface, clean slate */ 1825 nicvf_open(nic->netdev); 1826 netif_trans_update(nic->netdev); 1827 } 1828 1829 return 0; 1830 } 1831 1832 static int nicvf_xdp(struct net_device *netdev, struct netdev_bpf *xdp) 1833 { 1834 struct nicvf *nic = netdev_priv(netdev); 1835 1836 /* To avoid checks while retrieving buffer address from CQE_RX, 1837 * do not support XDP for T88 pass1.x silicons which are anyway 1838 * not in use widely. 1839 */ 1840 if (pass1_silicon(nic->pdev)) 1841 return -EOPNOTSUPP; 1842 1843 switch (xdp->command) { 1844 case XDP_SETUP_PROG: 1845 return nicvf_xdp_setup(nic, xdp->prog); 1846 case XDP_QUERY_PROG: 1847 xdp->prog_attached = !!nic->xdp_prog; 1848 xdp->prog_id = nic->xdp_prog ? nic->xdp_prog->aux->id : 0; 1849 return 0; 1850 default: 1851 return -EINVAL; 1852 } 1853 } 1854 1855 static int nicvf_config_hwtstamp(struct net_device *netdev, struct ifreq *ifr) 1856 { 1857 struct hwtstamp_config config; 1858 struct nicvf *nic = netdev_priv(netdev); 1859 1860 if (!nic->ptp_clock) 1861 return -ENODEV; 1862 1863 if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) 1864 return -EFAULT; 1865 1866 /* reserved for future extensions */ 1867 if (config.flags) 1868 return -EINVAL; 1869 1870 switch (config.tx_type) { 1871 case HWTSTAMP_TX_OFF: 1872 case HWTSTAMP_TX_ON: 1873 break; 1874 default: 1875 return -ERANGE; 1876 } 1877 1878 switch (config.rx_filter) { 1879 case HWTSTAMP_FILTER_NONE: 1880 nic->hw_rx_tstamp = false; 1881 break; 1882 case HWTSTAMP_FILTER_ALL: 1883 case HWTSTAMP_FILTER_SOME: 1884 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: 1885 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: 1886 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: 1887 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: 1888 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: 1889 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: 1890 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: 1891 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: 1892 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: 1893 case HWTSTAMP_FILTER_PTP_V2_EVENT: 1894 case HWTSTAMP_FILTER_PTP_V2_SYNC: 1895 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: 1896 nic->hw_rx_tstamp = true; 1897 config.rx_filter = HWTSTAMP_FILTER_ALL; 1898 break; 1899 default: 1900 return -ERANGE; 1901 } 1902 1903 if (netif_running(netdev)) 1904 nicvf_config_hw_rx_tstamp(nic, nic->hw_rx_tstamp); 1905 1906 if (copy_to_user(ifr->ifr_data, &config, sizeof(config))) 1907 return -EFAULT; 1908 1909 return 0; 1910 } 1911 1912 static int nicvf_ioctl(struct net_device *netdev, struct ifreq *req, int cmd) 1913 { 1914 switch (cmd) { 1915 case SIOCSHWTSTAMP: 1916 return nicvf_config_hwtstamp(netdev, req); 1917 default: 1918 return -EOPNOTSUPP; 1919 } 1920 } 1921 1922 static const struct net_device_ops nicvf_netdev_ops = { 1923 .ndo_open = nicvf_open, 1924 .ndo_stop = nicvf_stop, 1925 .ndo_start_xmit = nicvf_xmit, 1926 .ndo_change_mtu = nicvf_change_mtu, 1927 .ndo_set_mac_address = nicvf_set_mac_address, 1928 .ndo_get_stats64 = nicvf_get_stats64, 1929 .ndo_tx_timeout = nicvf_tx_timeout, 1930 .ndo_fix_features = nicvf_fix_features, 1931 .ndo_set_features = nicvf_set_features, 1932 .ndo_bpf = nicvf_xdp, 1933 .ndo_do_ioctl = nicvf_ioctl, 1934 }; 1935 1936 static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 1937 { 1938 struct device *dev = &pdev->dev; 1939 struct net_device *netdev; 1940 struct nicvf *nic; 1941 int err, qcount; 1942 u16 sdevid; 1943 struct cavium_ptp *ptp_clock; 1944 1945 ptp_clock = cavium_ptp_get(); 1946 if (IS_ERR(ptp_clock)) { 1947 if (PTR_ERR(ptp_clock) == -ENODEV) 1948 /* In virtualized environment we proceed without ptp */ 1949 ptp_clock = NULL; 1950 else 1951 return PTR_ERR(ptp_clock); 1952 } 1953 1954 err = pci_enable_device(pdev); 1955 if (err) { 1956 dev_err(dev, "Failed to enable PCI device\n"); 1957 return err; 1958 } 1959 1960 err = pci_request_regions(pdev, DRV_NAME); 1961 if (err) { 1962 dev_err(dev, "PCI request regions failed 0x%x\n", err); 1963 goto err_disable_device; 1964 } 1965 1966 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(48)); 1967 if (err) { 1968 dev_err(dev, "Unable to get usable DMA configuration\n"); 1969 goto err_release_regions; 1970 } 1971 1972 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(48)); 1973 if (err) { 1974 dev_err(dev, "unable to get 48-bit DMA for consistent allocations\n"); 1975 goto err_release_regions; 1976 } 1977 1978 qcount = netif_get_num_default_rss_queues(); 1979 1980 /* Restrict multiqset support only for host bound VFs */ 1981 if (pdev->is_virtfn) { 1982 /* Set max number of queues per VF */ 1983 qcount = min_t(int, num_online_cpus(), 1984 (MAX_SQS_PER_VF + 1) * MAX_CMP_QUEUES_PER_QS); 1985 } 1986 1987 netdev = alloc_etherdev_mqs(sizeof(struct nicvf), qcount, qcount); 1988 if (!netdev) { 1989 err = -ENOMEM; 1990 goto err_release_regions; 1991 } 1992 1993 pci_set_drvdata(pdev, netdev); 1994 1995 SET_NETDEV_DEV(netdev, &pdev->dev); 1996 1997 nic = netdev_priv(netdev); 1998 nic->netdev = netdev; 1999 nic->pdev = pdev; 2000 nic->pnicvf = nic; 2001 nic->max_queues = qcount; 2002 /* If no of CPUs are too low, there won't be any queues left 2003 * for XDP_TX, hence double it. 2004 */ 2005 if (!nic->t88) 2006 nic->max_queues *= 2; 2007 nic->ptp_clock = ptp_clock; 2008 2009 /* MAP VF's configuration registers */ 2010 nic->reg_base = pcim_iomap(pdev, PCI_CFG_REG_BAR_NUM, 0); 2011 if (!nic->reg_base) { 2012 dev_err(dev, "Cannot map config register space, aborting\n"); 2013 err = -ENOMEM; 2014 goto err_free_netdev; 2015 } 2016 2017 nic->drv_stats = netdev_alloc_pcpu_stats(struct nicvf_drv_stats); 2018 if (!nic->drv_stats) { 2019 err = -ENOMEM; 2020 goto err_free_netdev; 2021 } 2022 2023 err = nicvf_set_qset_resources(nic); 2024 if (err) 2025 goto err_free_netdev; 2026 2027 /* Check if PF is alive and get MAC address for this VF */ 2028 err = nicvf_register_misc_interrupt(nic); 2029 if (err) 2030 goto err_free_netdev; 2031 2032 nicvf_send_vf_struct(nic); 2033 2034 if (!pass1_silicon(nic->pdev)) 2035 nic->hw_tso = true; 2036 2037 /* Get iommu domain for iova to physical addr conversion */ 2038 nic->iommu_domain = iommu_get_domain_for_dev(dev); 2039 2040 pci_read_config_word(nic->pdev, PCI_SUBSYSTEM_ID, &sdevid); 2041 if (sdevid == 0xA134) 2042 nic->t88 = true; 2043 2044 /* Check if this VF is in QS only mode */ 2045 if (nic->sqs_mode) 2046 return 0; 2047 2048 err = nicvf_set_real_num_queues(netdev, nic->tx_queues, nic->rx_queues); 2049 if (err) 2050 goto err_unregister_interrupts; 2051 2052 netdev->hw_features = (NETIF_F_RXCSUM | NETIF_F_SG | 2053 NETIF_F_TSO | NETIF_F_GRO | NETIF_F_TSO6 | 2054 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | 2055 NETIF_F_HW_VLAN_CTAG_RX); 2056 2057 netdev->hw_features |= NETIF_F_RXHASH; 2058 2059 netdev->features |= netdev->hw_features; 2060 netdev->hw_features |= NETIF_F_LOOPBACK; 2061 2062 netdev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | 2063 NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO6; 2064 2065 netdev->netdev_ops = &nicvf_netdev_ops; 2066 netdev->watchdog_timeo = NICVF_TX_TIMEOUT; 2067 2068 /* MTU range: 64 - 9200 */ 2069 netdev->min_mtu = NIC_HW_MIN_FRS; 2070 netdev->max_mtu = NIC_HW_MAX_FRS; 2071 2072 INIT_WORK(&nic->reset_task, nicvf_reset_task); 2073 2074 err = register_netdev(netdev); 2075 if (err) { 2076 dev_err(dev, "Failed to register netdevice\n"); 2077 goto err_unregister_interrupts; 2078 } 2079 2080 nic->msg_enable = debug; 2081 2082 nicvf_set_ethtool_ops(netdev); 2083 2084 return 0; 2085 2086 err_unregister_interrupts: 2087 nicvf_unregister_interrupts(nic); 2088 err_free_netdev: 2089 pci_set_drvdata(pdev, NULL); 2090 if (nic->drv_stats) 2091 free_percpu(nic->drv_stats); 2092 free_netdev(netdev); 2093 err_release_regions: 2094 pci_release_regions(pdev); 2095 err_disable_device: 2096 pci_disable_device(pdev); 2097 return err; 2098 } 2099 2100 static void nicvf_remove(struct pci_dev *pdev) 2101 { 2102 struct net_device *netdev = pci_get_drvdata(pdev); 2103 struct nicvf *nic; 2104 struct net_device *pnetdev; 2105 2106 if (!netdev) 2107 return; 2108 2109 nic = netdev_priv(netdev); 2110 pnetdev = nic->pnicvf->netdev; 2111 2112 /* Check if this Qset is assigned to different VF. 2113 * If yes, clean primary and all secondary Qsets. 2114 */ 2115 if (pnetdev && (pnetdev->reg_state == NETREG_REGISTERED)) 2116 unregister_netdev(pnetdev); 2117 nicvf_unregister_interrupts(nic); 2118 pci_set_drvdata(pdev, NULL); 2119 if (nic->drv_stats) 2120 free_percpu(nic->drv_stats); 2121 cavium_ptp_put(nic->ptp_clock); 2122 free_netdev(netdev); 2123 pci_release_regions(pdev); 2124 pci_disable_device(pdev); 2125 } 2126 2127 static void nicvf_shutdown(struct pci_dev *pdev) 2128 { 2129 nicvf_remove(pdev); 2130 } 2131 2132 static struct pci_driver nicvf_driver = { 2133 .name = DRV_NAME, 2134 .id_table = nicvf_id_table, 2135 .probe = nicvf_probe, 2136 .remove = nicvf_remove, 2137 .shutdown = nicvf_shutdown, 2138 }; 2139 2140 static int __init nicvf_init_module(void) 2141 { 2142 pr_info("%s, ver %s\n", DRV_NAME, DRV_VERSION); 2143 2144 return pci_register_driver(&nicvf_driver); 2145 } 2146 2147 static void __exit nicvf_cleanup_module(void) 2148 { 2149 pci_unregister_driver(&nicvf_driver); 2150 } 2151 2152 module_init(nicvf_init_module); 2153 module_exit(nicvf_cleanup_module); 2154