1 /* 2 * Copyright (C) 2015 Cavium, Inc. 3 * 4 * This program is free software; you can redistribute it and/or modify it 5 * under the terms of version 2 of the GNU General Public License 6 * as published by the Free Software Foundation. 7 */ 8 9 #include <linux/module.h> 10 #include <linux/interrupt.h> 11 #include <linux/pci.h> 12 #include <linux/netdevice.h> 13 #include <linux/if_vlan.h> 14 #include <linux/etherdevice.h> 15 #include <linux/ethtool.h> 16 #include <linux/log2.h> 17 #include <linux/prefetch.h> 18 #include <linux/irq.h> 19 #include <linux/iommu.h> 20 #include <linux/bpf.h> 21 #include <linux/bpf_trace.h> 22 #include <linux/filter.h> 23 #include <linux/net_tstamp.h> 24 #include <linux/workqueue.h> 25 26 #include "nic_reg.h" 27 #include "nic.h" 28 #include "nicvf_queues.h" 29 #include "thunder_bgx.h" 30 #include "../common/cavium_ptp.h" 31 32 #define DRV_NAME "nicvf" 33 #define DRV_VERSION "1.0" 34 35 /* NOTE: Packets bigger than 1530 are split across multiple pages and XDP needs 36 * the buffer to be contiguous. Allow XDP to be set up only if we don't exceed 37 * this value, keeping headroom for the 14 byte Ethernet header and two 38 * VLAN tags (for QinQ) 39 */ 40 #define MAX_XDP_MTU (1530 - ETH_HLEN - VLAN_HLEN * 2) 41 42 /* Supported devices */ 43 static const struct pci_device_id nicvf_id_table[] = { 44 { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, 45 PCI_DEVICE_ID_THUNDER_NIC_VF, 46 PCI_VENDOR_ID_CAVIUM, 47 PCI_SUBSYS_DEVID_88XX_NIC_VF) }, 48 { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, 49 PCI_DEVICE_ID_THUNDER_PASS1_NIC_VF, 50 PCI_VENDOR_ID_CAVIUM, 51 PCI_SUBSYS_DEVID_88XX_PASS1_NIC_VF) }, 52 { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, 53 PCI_DEVICE_ID_THUNDER_NIC_VF, 54 PCI_VENDOR_ID_CAVIUM, 55 PCI_SUBSYS_DEVID_81XX_NIC_VF) }, 56 { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, 57 PCI_DEVICE_ID_THUNDER_NIC_VF, 58 PCI_VENDOR_ID_CAVIUM, 59 PCI_SUBSYS_DEVID_83XX_NIC_VF) }, 60 { 0, } /* end of table */ 61 }; 62 63 MODULE_AUTHOR("Sunil Goutham"); 64 MODULE_DESCRIPTION("Cavium Thunder NIC Virtual Function Driver"); 65 MODULE_LICENSE("GPL v2"); 66 MODULE_VERSION(DRV_VERSION); 67 MODULE_DEVICE_TABLE(pci, nicvf_id_table); 68 69 static int debug = 0x00; 70 module_param(debug, int, 0644); 71 MODULE_PARM_DESC(debug, "Debug message level bitmap"); 72 73 static int cpi_alg = CPI_ALG_NONE; 74 module_param(cpi_alg, int, 0444); 75 MODULE_PARM_DESC(cpi_alg, 76 "PFC algorithm (0=none, 1=VLAN, 2=VLAN16, 3=IP Diffserv)"); 77 78 static inline u8 nicvf_netdev_qidx(struct nicvf *nic, u8 qidx) 79 { 80 if (nic->sqs_mode) 81 return qidx + ((nic->sqs_id + 1) * MAX_CMP_QUEUES_PER_QS); 82 else 83 return qidx; 84 } 85 86 /* The Cavium ThunderX network controller can *only* be found in SoCs 87 * containing the ThunderX ARM64 CPU implementation. All accesses to the device 88 * registers on this platform are implicitly strongly ordered with respect 89 * to memory accesses. So writeq_relaxed() and readq_relaxed() are safe to use 90 * with no memory barriers in this driver. The readq()/writeq() functions add 91 * explicit ordering operation which in this case are redundant, and only 92 * add overhead. 93 */ 94 95 /* Register read/write APIs */ 96 void nicvf_reg_write(struct nicvf *nic, u64 offset, u64 val) 97 { 98 writeq_relaxed(val, nic->reg_base + offset); 99 } 100 101 u64 nicvf_reg_read(struct nicvf *nic, u64 offset) 102 { 103 return readq_relaxed(nic->reg_base + offset); 104 } 105 106 void nicvf_queue_reg_write(struct nicvf *nic, u64 offset, 107 u64 qidx, u64 val) 108 { 109 void __iomem *addr = nic->reg_base + offset; 110 111 writeq_relaxed(val, addr + (qidx << NIC_Q_NUM_SHIFT)); 112 } 113 114 u64 nicvf_queue_reg_read(struct nicvf *nic, u64 offset, u64 qidx) 115 { 116 void __iomem *addr = nic->reg_base + offset; 117 118 return readq_relaxed(addr + (qidx << NIC_Q_NUM_SHIFT)); 119 } 120 121 /* VF -> PF mailbox communication */ 122 static void nicvf_write_to_mbx(struct nicvf *nic, union nic_mbx *mbx) 123 { 124 u64 *msg = (u64 *)mbx; 125 126 nicvf_reg_write(nic, NIC_VF_PF_MAILBOX_0_1 + 0, msg[0]); 127 nicvf_reg_write(nic, NIC_VF_PF_MAILBOX_0_1 + 8, msg[1]); 128 } 129 130 int nicvf_send_msg_to_pf(struct nicvf *nic, union nic_mbx *mbx) 131 { 132 int timeout = NIC_MBOX_MSG_TIMEOUT; 133 int sleep = 10; 134 int ret = 0; 135 136 mutex_lock(&nic->rx_mode_mtx); 137 138 nic->pf_acked = false; 139 nic->pf_nacked = false; 140 141 nicvf_write_to_mbx(nic, mbx); 142 143 /* Wait for previous message to be acked, timeout 2sec */ 144 while (!nic->pf_acked) { 145 if (nic->pf_nacked) { 146 netdev_err(nic->netdev, 147 "PF NACK to mbox msg 0x%02x from VF%d\n", 148 (mbx->msg.msg & 0xFF), nic->vf_id); 149 ret = -EINVAL; 150 break; 151 } 152 msleep(sleep); 153 if (nic->pf_acked) 154 break; 155 timeout -= sleep; 156 if (!timeout) { 157 netdev_err(nic->netdev, 158 "PF didn't ACK to mbox msg 0x%02x from VF%d\n", 159 (mbx->msg.msg & 0xFF), nic->vf_id); 160 ret = -EBUSY; 161 break; 162 } 163 } 164 mutex_unlock(&nic->rx_mode_mtx); 165 return ret; 166 } 167 168 /* Checks if VF is able to comminicate with PF 169 * and also gets the VNIC number this VF is associated to. 170 */ 171 static int nicvf_check_pf_ready(struct nicvf *nic) 172 { 173 union nic_mbx mbx = {}; 174 175 mbx.msg.msg = NIC_MBOX_MSG_READY; 176 if (nicvf_send_msg_to_pf(nic, &mbx)) { 177 netdev_err(nic->netdev, 178 "PF didn't respond to READY msg\n"); 179 return 0; 180 } 181 182 return 1; 183 } 184 185 static void nicvf_send_cfg_done(struct nicvf *nic) 186 { 187 union nic_mbx mbx = {}; 188 189 mbx.msg.msg = NIC_MBOX_MSG_CFG_DONE; 190 if (nicvf_send_msg_to_pf(nic, &mbx)) { 191 netdev_err(nic->netdev, 192 "PF didn't respond to CFG DONE msg\n"); 193 } 194 } 195 196 static void nicvf_read_bgx_stats(struct nicvf *nic, struct bgx_stats_msg *bgx) 197 { 198 if (bgx->rx) 199 nic->bgx_stats.rx_stats[bgx->idx] = bgx->stats; 200 else 201 nic->bgx_stats.tx_stats[bgx->idx] = bgx->stats; 202 } 203 204 static void nicvf_handle_mbx_intr(struct nicvf *nic) 205 { 206 union nic_mbx mbx = {}; 207 u64 *mbx_data; 208 u64 mbx_addr; 209 int i; 210 211 mbx_addr = NIC_VF_PF_MAILBOX_0_1; 212 mbx_data = (u64 *)&mbx; 213 214 for (i = 0; i < NIC_PF_VF_MAILBOX_SIZE; i++) { 215 *mbx_data = nicvf_reg_read(nic, mbx_addr); 216 mbx_data++; 217 mbx_addr += sizeof(u64); 218 } 219 220 netdev_dbg(nic->netdev, "Mbox message: msg: 0x%x\n", mbx.msg.msg); 221 switch (mbx.msg.msg) { 222 case NIC_MBOX_MSG_READY: 223 nic->pf_acked = true; 224 nic->vf_id = mbx.nic_cfg.vf_id & 0x7F; 225 nic->tns_mode = mbx.nic_cfg.tns_mode & 0x7F; 226 nic->node = mbx.nic_cfg.node_id; 227 if (!nic->set_mac_pending) 228 ether_addr_copy(nic->netdev->dev_addr, 229 mbx.nic_cfg.mac_addr); 230 nic->sqs_mode = mbx.nic_cfg.sqs_mode; 231 nic->loopback_supported = mbx.nic_cfg.loopback_supported; 232 nic->link_up = false; 233 nic->duplex = 0; 234 nic->speed = 0; 235 break; 236 case NIC_MBOX_MSG_ACK: 237 nic->pf_acked = true; 238 break; 239 case NIC_MBOX_MSG_NACK: 240 nic->pf_nacked = true; 241 break; 242 case NIC_MBOX_MSG_RSS_SIZE: 243 nic->rss_info.rss_size = mbx.rss_size.ind_tbl_size; 244 nic->pf_acked = true; 245 break; 246 case NIC_MBOX_MSG_BGX_STATS: 247 nicvf_read_bgx_stats(nic, &mbx.bgx_stats); 248 nic->pf_acked = true; 249 break; 250 case NIC_MBOX_MSG_BGX_LINK_CHANGE: 251 nic->pf_acked = true; 252 if (nic->link_up != mbx.link_status.link_up) { 253 nic->link_up = mbx.link_status.link_up; 254 nic->duplex = mbx.link_status.duplex; 255 nic->speed = mbx.link_status.speed; 256 nic->mac_type = mbx.link_status.mac_type; 257 if (nic->link_up) { 258 netdev_info(nic->netdev, 259 "Link is Up %d Mbps %s duplex\n", 260 nic->speed, 261 nic->duplex == DUPLEX_FULL ? 262 "Full" : "Half"); 263 netif_carrier_on(nic->netdev); 264 netif_tx_start_all_queues(nic->netdev); 265 } else { 266 netdev_info(nic->netdev, "Link is Down\n"); 267 netif_carrier_off(nic->netdev); 268 netif_tx_stop_all_queues(nic->netdev); 269 } 270 } 271 break; 272 case NIC_MBOX_MSG_ALLOC_SQS: 273 nic->sqs_count = mbx.sqs_alloc.qs_count; 274 nic->pf_acked = true; 275 break; 276 case NIC_MBOX_MSG_SNICVF_PTR: 277 /* Primary VF: make note of secondary VF's pointer 278 * to be used while packet transmission. 279 */ 280 nic->snicvf[mbx.nicvf.sqs_id] = 281 (struct nicvf *)mbx.nicvf.nicvf; 282 nic->pf_acked = true; 283 break; 284 case NIC_MBOX_MSG_PNICVF_PTR: 285 /* Secondary VF/Qset: make note of primary VF's pointer 286 * to be used while packet reception, to handover packet 287 * to primary VF's netdev. 288 */ 289 nic->pnicvf = (struct nicvf *)mbx.nicvf.nicvf; 290 nic->pf_acked = true; 291 break; 292 case NIC_MBOX_MSG_PFC: 293 nic->pfc.autoneg = mbx.pfc.autoneg; 294 nic->pfc.fc_rx = mbx.pfc.fc_rx; 295 nic->pfc.fc_tx = mbx.pfc.fc_tx; 296 nic->pf_acked = true; 297 break; 298 default: 299 netdev_err(nic->netdev, 300 "Invalid message from PF, msg 0x%x\n", mbx.msg.msg); 301 break; 302 } 303 nicvf_clear_intr(nic, NICVF_INTR_MBOX, 0); 304 } 305 306 static int nicvf_hw_set_mac_addr(struct nicvf *nic, struct net_device *netdev) 307 { 308 union nic_mbx mbx = {}; 309 310 mbx.mac.msg = NIC_MBOX_MSG_SET_MAC; 311 mbx.mac.vf_id = nic->vf_id; 312 ether_addr_copy(mbx.mac.mac_addr, netdev->dev_addr); 313 314 return nicvf_send_msg_to_pf(nic, &mbx); 315 } 316 317 static void nicvf_config_cpi(struct nicvf *nic) 318 { 319 union nic_mbx mbx = {}; 320 321 mbx.cpi_cfg.msg = NIC_MBOX_MSG_CPI_CFG; 322 mbx.cpi_cfg.vf_id = nic->vf_id; 323 mbx.cpi_cfg.cpi_alg = nic->cpi_alg; 324 mbx.cpi_cfg.rq_cnt = nic->qs->rq_cnt; 325 326 nicvf_send_msg_to_pf(nic, &mbx); 327 } 328 329 static void nicvf_get_rss_size(struct nicvf *nic) 330 { 331 union nic_mbx mbx = {}; 332 333 mbx.rss_size.msg = NIC_MBOX_MSG_RSS_SIZE; 334 mbx.rss_size.vf_id = nic->vf_id; 335 nicvf_send_msg_to_pf(nic, &mbx); 336 } 337 338 void nicvf_config_rss(struct nicvf *nic) 339 { 340 union nic_mbx mbx = {}; 341 struct nicvf_rss_info *rss = &nic->rss_info; 342 int ind_tbl_len = rss->rss_size; 343 int i, nextq = 0; 344 345 mbx.rss_cfg.vf_id = nic->vf_id; 346 mbx.rss_cfg.hash_bits = rss->hash_bits; 347 while (ind_tbl_len) { 348 mbx.rss_cfg.tbl_offset = nextq; 349 mbx.rss_cfg.tbl_len = min(ind_tbl_len, 350 RSS_IND_TBL_LEN_PER_MBX_MSG); 351 mbx.rss_cfg.msg = mbx.rss_cfg.tbl_offset ? 352 NIC_MBOX_MSG_RSS_CFG_CONT : NIC_MBOX_MSG_RSS_CFG; 353 354 for (i = 0; i < mbx.rss_cfg.tbl_len; i++) 355 mbx.rss_cfg.ind_tbl[i] = rss->ind_tbl[nextq++]; 356 357 nicvf_send_msg_to_pf(nic, &mbx); 358 359 ind_tbl_len -= mbx.rss_cfg.tbl_len; 360 } 361 } 362 363 void nicvf_set_rss_key(struct nicvf *nic) 364 { 365 struct nicvf_rss_info *rss = &nic->rss_info; 366 u64 key_addr = NIC_VNIC_RSS_KEY_0_4; 367 int idx; 368 369 for (idx = 0; idx < RSS_HASH_KEY_SIZE; idx++) { 370 nicvf_reg_write(nic, key_addr, rss->key[idx]); 371 key_addr += sizeof(u64); 372 } 373 } 374 375 static int nicvf_rss_init(struct nicvf *nic) 376 { 377 struct nicvf_rss_info *rss = &nic->rss_info; 378 int idx; 379 380 nicvf_get_rss_size(nic); 381 382 if (cpi_alg != CPI_ALG_NONE) { 383 rss->enable = false; 384 rss->hash_bits = 0; 385 return 0; 386 } 387 388 rss->enable = true; 389 390 netdev_rss_key_fill(rss->key, RSS_HASH_KEY_SIZE * sizeof(u64)); 391 nicvf_set_rss_key(nic); 392 393 rss->cfg = RSS_IP_HASH_ENA | RSS_TCP_HASH_ENA | RSS_UDP_HASH_ENA; 394 nicvf_reg_write(nic, NIC_VNIC_RSS_CFG, rss->cfg); 395 396 rss->hash_bits = ilog2(rounddown_pow_of_two(rss->rss_size)); 397 398 for (idx = 0; idx < rss->rss_size; idx++) 399 rss->ind_tbl[idx] = ethtool_rxfh_indir_default(idx, 400 nic->rx_queues); 401 nicvf_config_rss(nic); 402 return 1; 403 } 404 405 /* Request PF to allocate additional Qsets */ 406 static void nicvf_request_sqs(struct nicvf *nic) 407 { 408 union nic_mbx mbx = {}; 409 int sqs; 410 int sqs_count = nic->sqs_count; 411 int rx_queues = 0, tx_queues = 0; 412 413 /* Only primary VF should request */ 414 if (nic->sqs_mode || !nic->sqs_count) 415 return; 416 417 mbx.sqs_alloc.msg = NIC_MBOX_MSG_ALLOC_SQS; 418 mbx.sqs_alloc.vf_id = nic->vf_id; 419 mbx.sqs_alloc.qs_count = nic->sqs_count; 420 if (nicvf_send_msg_to_pf(nic, &mbx)) { 421 /* No response from PF */ 422 nic->sqs_count = 0; 423 return; 424 } 425 426 /* Return if no Secondary Qsets available */ 427 if (!nic->sqs_count) 428 return; 429 430 if (nic->rx_queues > MAX_RCV_QUEUES_PER_QS) 431 rx_queues = nic->rx_queues - MAX_RCV_QUEUES_PER_QS; 432 433 tx_queues = nic->tx_queues + nic->xdp_tx_queues; 434 if (tx_queues > MAX_SND_QUEUES_PER_QS) 435 tx_queues = tx_queues - MAX_SND_QUEUES_PER_QS; 436 437 /* Set no of Rx/Tx queues in each of the SQsets */ 438 for (sqs = 0; sqs < nic->sqs_count; sqs++) { 439 mbx.nicvf.msg = NIC_MBOX_MSG_SNICVF_PTR; 440 mbx.nicvf.vf_id = nic->vf_id; 441 mbx.nicvf.sqs_id = sqs; 442 nicvf_send_msg_to_pf(nic, &mbx); 443 444 nic->snicvf[sqs]->sqs_id = sqs; 445 if (rx_queues > MAX_RCV_QUEUES_PER_QS) { 446 nic->snicvf[sqs]->qs->rq_cnt = MAX_RCV_QUEUES_PER_QS; 447 rx_queues -= MAX_RCV_QUEUES_PER_QS; 448 } else { 449 nic->snicvf[sqs]->qs->rq_cnt = rx_queues; 450 rx_queues = 0; 451 } 452 453 if (tx_queues > MAX_SND_QUEUES_PER_QS) { 454 nic->snicvf[sqs]->qs->sq_cnt = MAX_SND_QUEUES_PER_QS; 455 tx_queues -= MAX_SND_QUEUES_PER_QS; 456 } else { 457 nic->snicvf[sqs]->qs->sq_cnt = tx_queues; 458 tx_queues = 0; 459 } 460 461 nic->snicvf[sqs]->qs->cq_cnt = 462 max(nic->snicvf[sqs]->qs->rq_cnt, nic->snicvf[sqs]->qs->sq_cnt); 463 464 /* Initialize secondary Qset's queues and its interrupts */ 465 nicvf_open(nic->snicvf[sqs]->netdev); 466 } 467 468 /* Update stack with actual Rx/Tx queue count allocated */ 469 if (sqs_count != nic->sqs_count) 470 nicvf_set_real_num_queues(nic->netdev, 471 nic->tx_queues, nic->rx_queues); 472 } 473 474 /* Send this Qset's nicvf pointer to PF. 475 * PF inturn sends primary VF's nicvf struct to secondary Qsets/VFs 476 * so that packets received by these Qsets can use primary VF's netdev 477 */ 478 static void nicvf_send_vf_struct(struct nicvf *nic) 479 { 480 union nic_mbx mbx = {}; 481 482 mbx.nicvf.msg = NIC_MBOX_MSG_NICVF_PTR; 483 mbx.nicvf.sqs_mode = nic->sqs_mode; 484 mbx.nicvf.nicvf = (u64)nic; 485 nicvf_send_msg_to_pf(nic, &mbx); 486 } 487 488 static void nicvf_get_primary_vf_struct(struct nicvf *nic) 489 { 490 union nic_mbx mbx = {}; 491 492 mbx.nicvf.msg = NIC_MBOX_MSG_PNICVF_PTR; 493 nicvf_send_msg_to_pf(nic, &mbx); 494 } 495 496 int nicvf_set_real_num_queues(struct net_device *netdev, 497 int tx_queues, int rx_queues) 498 { 499 int err = 0; 500 501 err = netif_set_real_num_tx_queues(netdev, tx_queues); 502 if (err) { 503 netdev_err(netdev, 504 "Failed to set no of Tx queues: %d\n", tx_queues); 505 return err; 506 } 507 508 err = netif_set_real_num_rx_queues(netdev, rx_queues); 509 if (err) 510 netdev_err(netdev, 511 "Failed to set no of Rx queues: %d\n", rx_queues); 512 return err; 513 } 514 515 static int nicvf_init_resources(struct nicvf *nic) 516 { 517 int err; 518 519 /* Enable Qset */ 520 nicvf_qset_config(nic, true); 521 522 /* Initialize queues and HW for data transfer */ 523 err = nicvf_config_data_transfer(nic, true); 524 if (err) { 525 netdev_err(nic->netdev, 526 "Failed to alloc/config VF's QSet resources\n"); 527 return err; 528 } 529 530 return 0; 531 } 532 533 static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog, 534 struct cqe_rx_t *cqe_rx, struct snd_queue *sq, 535 struct rcv_queue *rq, struct sk_buff **skb) 536 { 537 struct xdp_buff xdp; 538 struct page *page; 539 u32 action; 540 u16 len, offset = 0; 541 u64 dma_addr, cpu_addr; 542 void *orig_data; 543 544 /* Retrieve packet buffer's DMA address and length */ 545 len = *((u16 *)((void *)cqe_rx + (3 * sizeof(u64)))); 546 dma_addr = *((u64 *)((void *)cqe_rx + (7 * sizeof(u64)))); 547 548 cpu_addr = nicvf_iova_to_phys(nic, dma_addr); 549 if (!cpu_addr) 550 return false; 551 cpu_addr = (u64)phys_to_virt(cpu_addr); 552 page = virt_to_page((void *)cpu_addr); 553 554 xdp.data_hard_start = page_address(page); 555 xdp.data = (void *)cpu_addr; 556 xdp_set_data_meta_invalid(&xdp); 557 xdp.data_end = xdp.data + len; 558 xdp.rxq = &rq->xdp_rxq; 559 orig_data = xdp.data; 560 561 rcu_read_lock(); 562 action = bpf_prog_run_xdp(prog, &xdp); 563 rcu_read_unlock(); 564 565 len = xdp.data_end - xdp.data; 566 /* Check if XDP program has changed headers */ 567 if (orig_data != xdp.data) { 568 offset = orig_data - xdp.data; 569 dma_addr -= offset; 570 } 571 572 switch (action) { 573 case XDP_PASS: 574 /* Check if it's a recycled page, if not 575 * unmap the DMA mapping. 576 * 577 * Recycled page holds an extra reference. 578 */ 579 if (page_ref_count(page) == 1) { 580 dma_addr &= PAGE_MASK; 581 dma_unmap_page_attrs(&nic->pdev->dev, dma_addr, 582 RCV_FRAG_LEN + XDP_PACKET_HEADROOM, 583 DMA_FROM_DEVICE, 584 DMA_ATTR_SKIP_CPU_SYNC); 585 } 586 587 /* Build SKB and pass on packet to network stack */ 588 *skb = build_skb(xdp.data, 589 RCV_FRAG_LEN - cqe_rx->align_pad + offset); 590 if (!*skb) 591 put_page(page); 592 else 593 skb_put(*skb, len); 594 return false; 595 case XDP_TX: 596 nicvf_xdp_sq_append_pkt(nic, sq, (u64)xdp.data, dma_addr, len); 597 return true; 598 default: 599 bpf_warn_invalid_xdp_action(action); 600 /* fall through */ 601 case XDP_ABORTED: 602 trace_xdp_exception(nic->netdev, prog, action); 603 /* fall through */ 604 case XDP_DROP: 605 /* Check if it's a recycled page, if not 606 * unmap the DMA mapping. 607 * 608 * Recycled page holds an extra reference. 609 */ 610 if (page_ref_count(page) == 1) { 611 dma_addr &= PAGE_MASK; 612 dma_unmap_page_attrs(&nic->pdev->dev, dma_addr, 613 RCV_FRAG_LEN + XDP_PACKET_HEADROOM, 614 DMA_FROM_DEVICE, 615 DMA_ATTR_SKIP_CPU_SYNC); 616 } 617 put_page(page); 618 return true; 619 } 620 return false; 621 } 622 623 static void nicvf_snd_ptp_handler(struct net_device *netdev, 624 struct cqe_send_t *cqe_tx) 625 { 626 struct nicvf *nic = netdev_priv(netdev); 627 struct skb_shared_hwtstamps ts; 628 u64 ns; 629 630 nic = nic->pnicvf; 631 632 /* Sync for 'ptp_skb' */ 633 smp_rmb(); 634 635 /* New timestamp request can be queued now */ 636 atomic_set(&nic->tx_ptp_skbs, 0); 637 638 /* Check for timestamp requested skb */ 639 if (!nic->ptp_skb) 640 return; 641 642 /* Check if timestamping is timedout, which is set to 10us */ 643 if (cqe_tx->send_status == CQ_TX_ERROP_TSTMP_TIMEOUT || 644 cqe_tx->send_status == CQ_TX_ERROP_TSTMP_CONFLICT) 645 goto no_tstamp; 646 647 /* Get the timestamp */ 648 memset(&ts, 0, sizeof(ts)); 649 ns = cavium_ptp_tstamp2time(nic->ptp_clock, cqe_tx->ptp_timestamp); 650 ts.hwtstamp = ns_to_ktime(ns); 651 skb_tstamp_tx(nic->ptp_skb, &ts); 652 653 no_tstamp: 654 /* Free the original skb */ 655 dev_kfree_skb_any(nic->ptp_skb); 656 nic->ptp_skb = NULL; 657 /* Sync 'ptp_skb' */ 658 smp_wmb(); 659 } 660 661 static void nicvf_snd_pkt_handler(struct net_device *netdev, 662 struct cqe_send_t *cqe_tx, 663 int budget, int *subdesc_cnt, 664 unsigned int *tx_pkts, unsigned int *tx_bytes) 665 { 666 struct sk_buff *skb = NULL; 667 struct page *page; 668 struct nicvf *nic = netdev_priv(netdev); 669 struct snd_queue *sq; 670 struct sq_hdr_subdesc *hdr; 671 struct sq_hdr_subdesc *tso_sqe; 672 673 sq = &nic->qs->sq[cqe_tx->sq_idx]; 674 675 hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, cqe_tx->sqe_ptr); 676 if (hdr->subdesc_type != SQ_DESC_TYPE_HEADER) 677 return; 678 679 /* Check for errors */ 680 if (cqe_tx->send_status) 681 nicvf_check_cqe_tx_errs(nic->pnicvf, cqe_tx); 682 683 /* Is this a XDP designated Tx queue */ 684 if (sq->is_xdp) { 685 page = (struct page *)sq->xdp_page[cqe_tx->sqe_ptr]; 686 /* Check if it's recycled page or else unmap DMA mapping */ 687 if (page && (page_ref_count(page) == 1)) 688 nicvf_unmap_sndq_buffers(nic, sq, cqe_tx->sqe_ptr, 689 hdr->subdesc_cnt); 690 691 /* Release page reference for recycling */ 692 if (page) 693 put_page(page); 694 sq->xdp_page[cqe_tx->sqe_ptr] = (u64)NULL; 695 *subdesc_cnt += hdr->subdesc_cnt + 1; 696 return; 697 } 698 699 skb = (struct sk_buff *)sq->skbuff[cqe_tx->sqe_ptr]; 700 if (skb) { 701 /* Check for dummy descriptor used for HW TSO offload on 88xx */ 702 if (hdr->dont_send) { 703 /* Get actual TSO descriptors and free them */ 704 tso_sqe = 705 (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, hdr->rsvd2); 706 nicvf_unmap_sndq_buffers(nic, sq, hdr->rsvd2, 707 tso_sqe->subdesc_cnt); 708 *subdesc_cnt += tso_sqe->subdesc_cnt + 1; 709 } else { 710 nicvf_unmap_sndq_buffers(nic, sq, cqe_tx->sqe_ptr, 711 hdr->subdesc_cnt); 712 } 713 *subdesc_cnt += hdr->subdesc_cnt + 1; 714 prefetch(skb); 715 (*tx_pkts)++; 716 *tx_bytes += skb->len; 717 /* If timestamp is requested for this skb, don't free it */ 718 if (skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS && 719 !nic->pnicvf->ptp_skb) 720 nic->pnicvf->ptp_skb = skb; 721 else 722 napi_consume_skb(skb, budget); 723 sq->skbuff[cqe_tx->sqe_ptr] = (u64)NULL; 724 } else { 725 /* In case of SW TSO on 88xx, only last segment will have 726 * a SKB attached, so just free SQEs here. 727 */ 728 if (!nic->hw_tso) 729 *subdesc_cnt += hdr->subdesc_cnt + 1; 730 } 731 } 732 733 static inline void nicvf_set_rxhash(struct net_device *netdev, 734 struct cqe_rx_t *cqe_rx, 735 struct sk_buff *skb) 736 { 737 u8 hash_type; 738 u32 hash; 739 740 if (!(netdev->features & NETIF_F_RXHASH)) 741 return; 742 743 switch (cqe_rx->rss_alg) { 744 case RSS_ALG_TCP_IP: 745 case RSS_ALG_UDP_IP: 746 hash_type = PKT_HASH_TYPE_L4; 747 hash = cqe_rx->rss_tag; 748 break; 749 case RSS_ALG_IP: 750 hash_type = PKT_HASH_TYPE_L3; 751 hash = cqe_rx->rss_tag; 752 break; 753 default: 754 hash_type = PKT_HASH_TYPE_NONE; 755 hash = 0; 756 } 757 758 skb_set_hash(skb, hash, hash_type); 759 } 760 761 static inline void nicvf_set_rxtstamp(struct nicvf *nic, struct sk_buff *skb) 762 { 763 u64 ns; 764 765 if (!nic->ptp_clock || !nic->hw_rx_tstamp) 766 return; 767 768 /* The first 8 bytes is the timestamp */ 769 ns = cavium_ptp_tstamp2time(nic->ptp_clock, 770 be64_to_cpu(*(__be64 *)skb->data)); 771 skb_hwtstamps(skb)->hwtstamp = ns_to_ktime(ns); 772 773 __skb_pull(skb, 8); 774 } 775 776 static void nicvf_rcv_pkt_handler(struct net_device *netdev, 777 struct napi_struct *napi, 778 struct cqe_rx_t *cqe_rx, 779 struct snd_queue *sq, struct rcv_queue *rq) 780 { 781 struct sk_buff *skb = NULL; 782 struct nicvf *nic = netdev_priv(netdev); 783 struct nicvf *snic = nic; 784 int err = 0; 785 int rq_idx; 786 787 rq_idx = nicvf_netdev_qidx(nic, cqe_rx->rq_idx); 788 789 if (nic->sqs_mode) { 790 /* Use primary VF's 'nicvf' struct */ 791 nic = nic->pnicvf; 792 netdev = nic->netdev; 793 } 794 795 /* Check for errors */ 796 if (cqe_rx->err_level || cqe_rx->err_opcode) { 797 err = nicvf_check_cqe_rx_errs(nic, cqe_rx); 798 if (err && !cqe_rx->rb_cnt) 799 return; 800 } 801 802 /* For XDP, ignore pkts spanning multiple pages */ 803 if (nic->xdp_prog && (cqe_rx->rb_cnt == 1)) { 804 /* Packet consumed by XDP */ 805 if (nicvf_xdp_rx(snic, nic->xdp_prog, cqe_rx, sq, rq, &skb)) 806 return; 807 } else { 808 skb = nicvf_get_rcv_skb(snic, cqe_rx, 809 nic->xdp_prog ? true : false); 810 } 811 812 if (!skb) 813 return; 814 815 if (netif_msg_pktdata(nic)) { 816 netdev_info(nic->netdev, "skb 0x%p, len=%d\n", skb, skb->len); 817 print_hex_dump(KERN_INFO, "", DUMP_PREFIX_OFFSET, 16, 1, 818 skb->data, skb->len, true); 819 } 820 821 /* If error packet, drop it here */ 822 if (err) { 823 dev_kfree_skb_any(skb); 824 return; 825 } 826 827 nicvf_set_rxtstamp(nic, skb); 828 nicvf_set_rxhash(netdev, cqe_rx, skb); 829 830 skb_record_rx_queue(skb, rq_idx); 831 if (netdev->hw_features & NETIF_F_RXCSUM) { 832 /* HW by default verifies TCP/UDP/SCTP checksums */ 833 skb->ip_summed = CHECKSUM_UNNECESSARY; 834 } else { 835 skb_checksum_none_assert(skb); 836 } 837 838 skb->protocol = eth_type_trans(skb, netdev); 839 840 /* Check for stripped VLAN */ 841 if (cqe_rx->vlan_found && cqe_rx->vlan_stripped) 842 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), 843 ntohs((__force __be16)cqe_rx->vlan_tci)); 844 845 if (napi && (netdev->features & NETIF_F_GRO)) 846 napi_gro_receive(napi, skb); 847 else 848 netif_receive_skb(skb); 849 } 850 851 static int nicvf_cq_intr_handler(struct net_device *netdev, u8 cq_idx, 852 struct napi_struct *napi, int budget) 853 { 854 int processed_cqe, work_done = 0, tx_done = 0; 855 int cqe_count, cqe_head; 856 int subdesc_cnt = 0; 857 struct nicvf *nic = netdev_priv(netdev); 858 struct queue_set *qs = nic->qs; 859 struct cmp_queue *cq = &qs->cq[cq_idx]; 860 struct cqe_rx_t *cq_desc; 861 struct netdev_queue *txq; 862 struct snd_queue *sq = &qs->sq[cq_idx]; 863 struct rcv_queue *rq = &qs->rq[cq_idx]; 864 unsigned int tx_pkts = 0, tx_bytes = 0, txq_idx; 865 866 spin_lock_bh(&cq->lock); 867 loop: 868 processed_cqe = 0; 869 /* Get no of valid CQ entries to process */ 870 cqe_count = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_STATUS, cq_idx); 871 cqe_count &= CQ_CQE_COUNT; 872 if (!cqe_count) 873 goto done; 874 875 /* Get head of the valid CQ entries */ 876 cqe_head = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_HEAD, cq_idx) >> 9; 877 cqe_head &= 0xFFFF; 878 879 while (processed_cqe < cqe_count) { 880 /* Get the CQ descriptor */ 881 cq_desc = (struct cqe_rx_t *)GET_CQ_DESC(cq, cqe_head); 882 cqe_head++; 883 cqe_head &= (cq->dmem.q_len - 1); 884 /* Initiate prefetch for next descriptor */ 885 prefetch((struct cqe_rx_t *)GET_CQ_DESC(cq, cqe_head)); 886 887 if ((work_done >= budget) && napi && 888 (cq_desc->cqe_type != CQE_TYPE_SEND)) { 889 break; 890 } 891 892 switch (cq_desc->cqe_type) { 893 case CQE_TYPE_RX: 894 nicvf_rcv_pkt_handler(netdev, napi, cq_desc, sq, rq); 895 work_done++; 896 break; 897 case CQE_TYPE_SEND: 898 nicvf_snd_pkt_handler(netdev, (void *)cq_desc, 899 budget, &subdesc_cnt, 900 &tx_pkts, &tx_bytes); 901 tx_done++; 902 break; 903 case CQE_TYPE_SEND_PTP: 904 nicvf_snd_ptp_handler(netdev, (void *)cq_desc); 905 break; 906 case CQE_TYPE_INVALID: 907 case CQE_TYPE_RX_SPLIT: 908 case CQE_TYPE_RX_TCP: 909 /* Ignore for now */ 910 break; 911 } 912 processed_cqe++; 913 } 914 915 /* Ring doorbell to inform H/W to reuse processed CQEs */ 916 nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_DOOR, 917 cq_idx, processed_cqe); 918 919 if ((work_done < budget) && napi) 920 goto loop; 921 922 done: 923 /* Update SQ's descriptor free count */ 924 if (subdesc_cnt) 925 nicvf_put_sq_desc(sq, subdesc_cnt); 926 927 txq_idx = nicvf_netdev_qidx(nic, cq_idx); 928 /* Handle XDP TX queues */ 929 if (nic->pnicvf->xdp_prog) { 930 if (txq_idx < nic->pnicvf->xdp_tx_queues) { 931 nicvf_xdp_sq_doorbell(nic, sq, cq_idx); 932 goto out; 933 } 934 nic = nic->pnicvf; 935 txq_idx -= nic->pnicvf->xdp_tx_queues; 936 } 937 938 /* Wakeup TXQ if its stopped earlier due to SQ full */ 939 if (tx_done || 940 (atomic_read(&sq->free_cnt) >= MIN_SQ_DESC_PER_PKT_XMIT)) { 941 netdev = nic->pnicvf->netdev; 942 txq = netdev_get_tx_queue(netdev, txq_idx); 943 if (tx_pkts) 944 netdev_tx_completed_queue(txq, tx_pkts, tx_bytes); 945 946 /* To read updated queue and carrier status */ 947 smp_mb(); 948 if (netif_tx_queue_stopped(txq) && netif_carrier_ok(netdev)) { 949 netif_tx_wake_queue(txq); 950 nic = nic->pnicvf; 951 this_cpu_inc(nic->drv_stats->txq_wake); 952 netif_warn(nic, tx_err, netdev, 953 "Transmit queue wakeup SQ%d\n", txq_idx); 954 } 955 } 956 957 out: 958 spin_unlock_bh(&cq->lock); 959 return work_done; 960 } 961 962 static int nicvf_poll(struct napi_struct *napi, int budget) 963 { 964 u64 cq_head; 965 int work_done = 0; 966 struct net_device *netdev = napi->dev; 967 struct nicvf *nic = netdev_priv(netdev); 968 struct nicvf_cq_poll *cq; 969 970 cq = container_of(napi, struct nicvf_cq_poll, napi); 971 work_done = nicvf_cq_intr_handler(netdev, cq->cq_idx, napi, budget); 972 973 if (work_done < budget) { 974 /* Slow packet rate, exit polling */ 975 napi_complete_done(napi, work_done); 976 /* Re-enable interrupts */ 977 cq_head = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_HEAD, 978 cq->cq_idx); 979 nicvf_clear_intr(nic, NICVF_INTR_CQ, cq->cq_idx); 980 nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_HEAD, 981 cq->cq_idx, cq_head); 982 nicvf_enable_intr(nic, NICVF_INTR_CQ, cq->cq_idx); 983 } 984 return work_done; 985 } 986 987 /* Qset error interrupt handler 988 * 989 * As of now only CQ errors are handled 990 */ 991 static void nicvf_handle_qs_err(unsigned long data) 992 { 993 struct nicvf *nic = (struct nicvf *)data; 994 struct queue_set *qs = nic->qs; 995 int qidx; 996 u64 status; 997 998 netif_tx_disable(nic->netdev); 999 1000 /* Check if it is CQ err */ 1001 for (qidx = 0; qidx < qs->cq_cnt; qidx++) { 1002 status = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_STATUS, 1003 qidx); 1004 if (!(status & CQ_ERR_MASK)) 1005 continue; 1006 /* Process already queued CQEs and reconfig CQ */ 1007 nicvf_disable_intr(nic, NICVF_INTR_CQ, qidx); 1008 nicvf_sq_disable(nic, qidx); 1009 nicvf_cq_intr_handler(nic->netdev, qidx, NULL, 0); 1010 nicvf_cmp_queue_config(nic, qs, qidx, true); 1011 nicvf_sq_free_used_descs(nic->netdev, &qs->sq[qidx], qidx); 1012 nicvf_sq_enable(nic, &qs->sq[qidx], qidx); 1013 1014 nicvf_enable_intr(nic, NICVF_INTR_CQ, qidx); 1015 } 1016 1017 netif_tx_start_all_queues(nic->netdev); 1018 /* Re-enable Qset error interrupt */ 1019 nicvf_enable_intr(nic, NICVF_INTR_QS_ERR, 0); 1020 } 1021 1022 static void nicvf_dump_intr_status(struct nicvf *nic) 1023 { 1024 netif_info(nic, intr, nic->netdev, "interrupt status 0x%llx\n", 1025 nicvf_reg_read(nic, NIC_VF_INT)); 1026 } 1027 1028 static irqreturn_t nicvf_misc_intr_handler(int irq, void *nicvf_irq) 1029 { 1030 struct nicvf *nic = (struct nicvf *)nicvf_irq; 1031 u64 intr; 1032 1033 nicvf_dump_intr_status(nic); 1034 1035 intr = nicvf_reg_read(nic, NIC_VF_INT); 1036 /* Check for spurious interrupt */ 1037 if (!(intr & NICVF_INTR_MBOX_MASK)) 1038 return IRQ_HANDLED; 1039 1040 nicvf_handle_mbx_intr(nic); 1041 1042 return IRQ_HANDLED; 1043 } 1044 1045 static irqreturn_t nicvf_intr_handler(int irq, void *cq_irq) 1046 { 1047 struct nicvf_cq_poll *cq_poll = (struct nicvf_cq_poll *)cq_irq; 1048 struct nicvf *nic = cq_poll->nicvf; 1049 int qidx = cq_poll->cq_idx; 1050 1051 nicvf_dump_intr_status(nic); 1052 1053 /* Disable interrupts */ 1054 nicvf_disable_intr(nic, NICVF_INTR_CQ, qidx); 1055 1056 /* Schedule NAPI */ 1057 napi_schedule_irqoff(&cq_poll->napi); 1058 1059 /* Clear interrupt */ 1060 nicvf_clear_intr(nic, NICVF_INTR_CQ, qidx); 1061 1062 return IRQ_HANDLED; 1063 } 1064 1065 static irqreturn_t nicvf_rbdr_intr_handler(int irq, void *nicvf_irq) 1066 { 1067 struct nicvf *nic = (struct nicvf *)nicvf_irq; 1068 u8 qidx; 1069 1070 1071 nicvf_dump_intr_status(nic); 1072 1073 /* Disable RBDR interrupt and schedule softirq */ 1074 for (qidx = 0; qidx < nic->qs->rbdr_cnt; qidx++) { 1075 if (!nicvf_is_intr_enabled(nic, NICVF_INTR_RBDR, qidx)) 1076 continue; 1077 nicvf_disable_intr(nic, NICVF_INTR_RBDR, qidx); 1078 tasklet_hi_schedule(&nic->rbdr_task); 1079 /* Clear interrupt */ 1080 nicvf_clear_intr(nic, NICVF_INTR_RBDR, qidx); 1081 } 1082 1083 return IRQ_HANDLED; 1084 } 1085 1086 static irqreturn_t nicvf_qs_err_intr_handler(int irq, void *nicvf_irq) 1087 { 1088 struct nicvf *nic = (struct nicvf *)nicvf_irq; 1089 1090 nicvf_dump_intr_status(nic); 1091 1092 /* Disable Qset err interrupt and schedule softirq */ 1093 nicvf_disable_intr(nic, NICVF_INTR_QS_ERR, 0); 1094 tasklet_hi_schedule(&nic->qs_err_task); 1095 nicvf_clear_intr(nic, NICVF_INTR_QS_ERR, 0); 1096 1097 return IRQ_HANDLED; 1098 } 1099 1100 static void nicvf_set_irq_affinity(struct nicvf *nic) 1101 { 1102 int vec, cpu; 1103 1104 for (vec = 0; vec < nic->num_vec; vec++) { 1105 if (!nic->irq_allocated[vec]) 1106 continue; 1107 1108 if (!zalloc_cpumask_var(&nic->affinity_mask[vec], GFP_KERNEL)) 1109 return; 1110 /* CQ interrupts */ 1111 if (vec < NICVF_INTR_ID_SQ) 1112 /* Leave CPU0 for RBDR and other interrupts */ 1113 cpu = nicvf_netdev_qidx(nic, vec) + 1; 1114 else 1115 cpu = 0; 1116 1117 cpumask_set_cpu(cpumask_local_spread(cpu, nic->node), 1118 nic->affinity_mask[vec]); 1119 irq_set_affinity_hint(pci_irq_vector(nic->pdev, vec), 1120 nic->affinity_mask[vec]); 1121 } 1122 } 1123 1124 static int nicvf_register_interrupts(struct nicvf *nic) 1125 { 1126 int irq, ret = 0; 1127 1128 for_each_cq_irq(irq) 1129 sprintf(nic->irq_name[irq], "%s-rxtx-%d", 1130 nic->pnicvf->netdev->name, 1131 nicvf_netdev_qidx(nic, irq)); 1132 1133 for_each_sq_irq(irq) 1134 sprintf(nic->irq_name[irq], "%s-sq-%d", 1135 nic->pnicvf->netdev->name, 1136 nicvf_netdev_qidx(nic, irq - NICVF_INTR_ID_SQ)); 1137 1138 for_each_rbdr_irq(irq) 1139 sprintf(nic->irq_name[irq], "%s-rbdr-%d", 1140 nic->pnicvf->netdev->name, 1141 nic->sqs_mode ? (nic->sqs_id + 1) : 0); 1142 1143 /* Register CQ interrupts */ 1144 for (irq = 0; irq < nic->qs->cq_cnt; irq++) { 1145 ret = request_irq(pci_irq_vector(nic->pdev, irq), 1146 nicvf_intr_handler, 1147 0, nic->irq_name[irq], nic->napi[irq]); 1148 if (ret) 1149 goto err; 1150 nic->irq_allocated[irq] = true; 1151 } 1152 1153 /* Register RBDR interrupt */ 1154 for (irq = NICVF_INTR_ID_RBDR; 1155 irq < (NICVF_INTR_ID_RBDR + nic->qs->rbdr_cnt); irq++) { 1156 ret = request_irq(pci_irq_vector(nic->pdev, irq), 1157 nicvf_rbdr_intr_handler, 1158 0, nic->irq_name[irq], nic); 1159 if (ret) 1160 goto err; 1161 nic->irq_allocated[irq] = true; 1162 } 1163 1164 /* Register QS error interrupt */ 1165 sprintf(nic->irq_name[NICVF_INTR_ID_QS_ERR], "%s-qset-err-%d", 1166 nic->pnicvf->netdev->name, 1167 nic->sqs_mode ? (nic->sqs_id + 1) : 0); 1168 irq = NICVF_INTR_ID_QS_ERR; 1169 ret = request_irq(pci_irq_vector(nic->pdev, irq), 1170 nicvf_qs_err_intr_handler, 1171 0, nic->irq_name[irq], nic); 1172 if (ret) 1173 goto err; 1174 1175 nic->irq_allocated[irq] = true; 1176 1177 /* Set IRQ affinities */ 1178 nicvf_set_irq_affinity(nic); 1179 1180 err: 1181 if (ret) 1182 netdev_err(nic->netdev, "request_irq failed, vector %d\n", irq); 1183 1184 return ret; 1185 } 1186 1187 static void nicvf_unregister_interrupts(struct nicvf *nic) 1188 { 1189 struct pci_dev *pdev = nic->pdev; 1190 int irq; 1191 1192 /* Free registered interrupts */ 1193 for (irq = 0; irq < nic->num_vec; irq++) { 1194 if (!nic->irq_allocated[irq]) 1195 continue; 1196 1197 irq_set_affinity_hint(pci_irq_vector(pdev, irq), NULL); 1198 free_cpumask_var(nic->affinity_mask[irq]); 1199 1200 if (irq < NICVF_INTR_ID_SQ) 1201 free_irq(pci_irq_vector(pdev, irq), nic->napi[irq]); 1202 else 1203 free_irq(pci_irq_vector(pdev, irq), nic); 1204 1205 nic->irq_allocated[irq] = false; 1206 } 1207 1208 /* Disable MSI-X */ 1209 pci_free_irq_vectors(pdev); 1210 nic->num_vec = 0; 1211 } 1212 1213 /* Initialize MSIX vectors and register MISC interrupt. 1214 * Send READY message to PF to check if its alive 1215 */ 1216 static int nicvf_register_misc_interrupt(struct nicvf *nic) 1217 { 1218 int ret = 0; 1219 int irq = NICVF_INTR_ID_MISC; 1220 1221 /* Return if mailbox interrupt is already registered */ 1222 if (nic->pdev->msix_enabled) 1223 return 0; 1224 1225 /* Enable MSI-X */ 1226 nic->num_vec = pci_msix_vec_count(nic->pdev); 1227 ret = pci_alloc_irq_vectors(nic->pdev, nic->num_vec, nic->num_vec, 1228 PCI_IRQ_MSIX); 1229 if (ret < 0) { 1230 netdev_err(nic->netdev, 1231 "Req for #%d msix vectors failed\n", nic->num_vec); 1232 return 1; 1233 } 1234 1235 sprintf(nic->irq_name[irq], "%s Mbox", "NICVF"); 1236 /* Register Misc interrupt */ 1237 ret = request_irq(pci_irq_vector(nic->pdev, irq), 1238 nicvf_misc_intr_handler, 0, nic->irq_name[irq], nic); 1239 1240 if (ret) 1241 return ret; 1242 nic->irq_allocated[irq] = true; 1243 1244 /* Enable mailbox interrupt */ 1245 nicvf_enable_intr(nic, NICVF_INTR_MBOX, 0); 1246 1247 /* Check if VF is able to communicate with PF */ 1248 if (!nicvf_check_pf_ready(nic)) { 1249 nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0); 1250 nicvf_unregister_interrupts(nic); 1251 return 1; 1252 } 1253 1254 return 0; 1255 } 1256 1257 static netdev_tx_t nicvf_xmit(struct sk_buff *skb, struct net_device *netdev) 1258 { 1259 struct nicvf *nic = netdev_priv(netdev); 1260 int qid = skb_get_queue_mapping(skb); 1261 struct netdev_queue *txq = netdev_get_tx_queue(netdev, qid); 1262 struct nicvf *snic; 1263 struct snd_queue *sq; 1264 int tmp; 1265 1266 /* Check for minimum packet length */ 1267 if (skb->len <= ETH_HLEN) { 1268 dev_kfree_skb(skb); 1269 return NETDEV_TX_OK; 1270 } 1271 1272 /* In XDP case, initial HW tx queues are used for XDP, 1273 * but stack's queue mapping starts at '0', so skip the 1274 * Tx queues attached to Rx queues for XDP. 1275 */ 1276 if (nic->xdp_prog) 1277 qid += nic->xdp_tx_queues; 1278 1279 snic = nic; 1280 /* Get secondary Qset's SQ structure */ 1281 if (qid >= MAX_SND_QUEUES_PER_QS) { 1282 tmp = qid / MAX_SND_QUEUES_PER_QS; 1283 snic = (struct nicvf *)nic->snicvf[tmp - 1]; 1284 if (!snic) { 1285 netdev_warn(nic->netdev, 1286 "Secondary Qset#%d's ptr not initialized\n", 1287 tmp - 1); 1288 dev_kfree_skb(skb); 1289 return NETDEV_TX_OK; 1290 } 1291 qid = qid % MAX_SND_QUEUES_PER_QS; 1292 } 1293 1294 sq = &snic->qs->sq[qid]; 1295 if (!netif_tx_queue_stopped(txq) && 1296 !nicvf_sq_append_skb(snic, sq, skb, qid)) { 1297 netif_tx_stop_queue(txq); 1298 1299 /* Barrier, so that stop_queue visible to other cpus */ 1300 smp_mb(); 1301 1302 /* Check again, incase another cpu freed descriptors */ 1303 if (atomic_read(&sq->free_cnt) > MIN_SQ_DESC_PER_PKT_XMIT) { 1304 netif_tx_wake_queue(txq); 1305 } else { 1306 this_cpu_inc(nic->drv_stats->txq_stop); 1307 netif_warn(nic, tx_err, netdev, 1308 "Transmit ring full, stopping SQ%d\n", qid); 1309 } 1310 return NETDEV_TX_BUSY; 1311 } 1312 1313 return NETDEV_TX_OK; 1314 } 1315 1316 static inline void nicvf_free_cq_poll(struct nicvf *nic) 1317 { 1318 struct nicvf_cq_poll *cq_poll; 1319 int qidx; 1320 1321 for (qidx = 0; qidx < nic->qs->cq_cnt; qidx++) { 1322 cq_poll = nic->napi[qidx]; 1323 if (!cq_poll) 1324 continue; 1325 nic->napi[qidx] = NULL; 1326 kfree(cq_poll); 1327 } 1328 } 1329 1330 int nicvf_stop(struct net_device *netdev) 1331 { 1332 int irq, qidx; 1333 struct nicvf *nic = netdev_priv(netdev); 1334 struct queue_set *qs = nic->qs; 1335 struct nicvf_cq_poll *cq_poll = NULL; 1336 union nic_mbx mbx = {}; 1337 1338 /* wait till all queued set_rx_mode tasks completes */ 1339 if (nic->nicvf_rx_mode_wq) { 1340 cancel_delayed_work_sync(&nic->link_change_work); 1341 drain_workqueue(nic->nicvf_rx_mode_wq); 1342 } 1343 1344 mbx.msg.msg = NIC_MBOX_MSG_SHUTDOWN; 1345 nicvf_send_msg_to_pf(nic, &mbx); 1346 1347 netif_carrier_off(netdev); 1348 netif_tx_stop_all_queues(nic->netdev); 1349 nic->link_up = false; 1350 1351 /* Teardown secondary qsets first */ 1352 if (!nic->sqs_mode) { 1353 for (qidx = 0; qidx < nic->sqs_count; qidx++) { 1354 if (!nic->snicvf[qidx]) 1355 continue; 1356 nicvf_stop(nic->snicvf[qidx]->netdev); 1357 nic->snicvf[qidx] = NULL; 1358 } 1359 } 1360 1361 /* Disable RBDR & QS error interrupts */ 1362 for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) { 1363 nicvf_disable_intr(nic, NICVF_INTR_RBDR, qidx); 1364 nicvf_clear_intr(nic, NICVF_INTR_RBDR, qidx); 1365 } 1366 nicvf_disable_intr(nic, NICVF_INTR_QS_ERR, 0); 1367 nicvf_clear_intr(nic, NICVF_INTR_QS_ERR, 0); 1368 1369 /* Wait for pending IRQ handlers to finish */ 1370 for (irq = 0; irq < nic->num_vec; irq++) 1371 synchronize_irq(pci_irq_vector(nic->pdev, irq)); 1372 1373 tasklet_kill(&nic->rbdr_task); 1374 tasklet_kill(&nic->qs_err_task); 1375 if (nic->rb_work_scheduled) 1376 cancel_delayed_work_sync(&nic->rbdr_work); 1377 1378 for (qidx = 0; qidx < nic->qs->cq_cnt; qidx++) { 1379 cq_poll = nic->napi[qidx]; 1380 if (!cq_poll) 1381 continue; 1382 napi_synchronize(&cq_poll->napi); 1383 /* CQ intr is enabled while napi_complete, 1384 * so disable it now 1385 */ 1386 nicvf_disable_intr(nic, NICVF_INTR_CQ, qidx); 1387 nicvf_clear_intr(nic, NICVF_INTR_CQ, qidx); 1388 napi_disable(&cq_poll->napi); 1389 netif_napi_del(&cq_poll->napi); 1390 } 1391 1392 netif_tx_disable(netdev); 1393 1394 for (qidx = 0; qidx < netdev->num_tx_queues; qidx++) 1395 netdev_tx_reset_queue(netdev_get_tx_queue(netdev, qidx)); 1396 1397 /* Free resources */ 1398 nicvf_config_data_transfer(nic, false); 1399 1400 /* Disable HW Qset */ 1401 nicvf_qset_config(nic, false); 1402 1403 /* disable mailbox interrupt */ 1404 nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0); 1405 1406 nicvf_unregister_interrupts(nic); 1407 1408 nicvf_free_cq_poll(nic); 1409 1410 /* Free any pending SKB saved to receive timestamp */ 1411 if (nic->ptp_skb) { 1412 dev_kfree_skb_any(nic->ptp_skb); 1413 nic->ptp_skb = NULL; 1414 } 1415 1416 /* Clear multiqset info */ 1417 nic->pnicvf = nic; 1418 1419 return 0; 1420 } 1421 1422 static int nicvf_config_hw_rx_tstamp(struct nicvf *nic, bool enable) 1423 { 1424 union nic_mbx mbx = {}; 1425 1426 mbx.ptp.msg = NIC_MBOX_MSG_PTP_CFG; 1427 mbx.ptp.enable = enable; 1428 1429 return nicvf_send_msg_to_pf(nic, &mbx); 1430 } 1431 1432 static int nicvf_update_hw_max_frs(struct nicvf *nic, int mtu) 1433 { 1434 union nic_mbx mbx = {}; 1435 1436 mbx.frs.msg = NIC_MBOX_MSG_SET_MAX_FRS; 1437 mbx.frs.max_frs = mtu; 1438 mbx.frs.vf_id = nic->vf_id; 1439 1440 return nicvf_send_msg_to_pf(nic, &mbx); 1441 } 1442 1443 static void nicvf_link_status_check_task(struct work_struct *work_arg) 1444 { 1445 struct nicvf *nic = container_of(work_arg, 1446 struct nicvf, 1447 link_change_work.work); 1448 union nic_mbx mbx = {}; 1449 mbx.msg.msg = NIC_MBOX_MSG_BGX_LINK_CHANGE; 1450 nicvf_send_msg_to_pf(nic, &mbx); 1451 queue_delayed_work(nic->nicvf_rx_mode_wq, 1452 &nic->link_change_work, 2 * HZ); 1453 } 1454 1455 int nicvf_open(struct net_device *netdev) 1456 { 1457 int cpu, err, qidx; 1458 struct nicvf *nic = netdev_priv(netdev); 1459 struct queue_set *qs = nic->qs; 1460 struct nicvf_cq_poll *cq_poll = NULL; 1461 1462 /* wait till all queued set_rx_mode tasks completes if any */ 1463 if (nic->nicvf_rx_mode_wq) 1464 drain_workqueue(nic->nicvf_rx_mode_wq); 1465 1466 netif_carrier_off(netdev); 1467 1468 err = nicvf_register_misc_interrupt(nic); 1469 if (err) 1470 return err; 1471 1472 /* Register NAPI handler for processing CQEs */ 1473 for (qidx = 0; qidx < qs->cq_cnt; qidx++) { 1474 cq_poll = kzalloc(sizeof(*cq_poll), GFP_KERNEL); 1475 if (!cq_poll) { 1476 err = -ENOMEM; 1477 goto napi_del; 1478 } 1479 cq_poll->cq_idx = qidx; 1480 cq_poll->nicvf = nic; 1481 netif_napi_add(netdev, &cq_poll->napi, nicvf_poll, 1482 NAPI_POLL_WEIGHT); 1483 napi_enable(&cq_poll->napi); 1484 nic->napi[qidx] = cq_poll; 1485 } 1486 1487 /* Check if we got MAC address from PF or else generate a radom MAC */ 1488 if (!nic->sqs_mode && is_zero_ether_addr(netdev->dev_addr)) { 1489 eth_hw_addr_random(netdev); 1490 nicvf_hw_set_mac_addr(nic, netdev); 1491 } 1492 1493 if (nic->set_mac_pending) { 1494 nic->set_mac_pending = false; 1495 nicvf_hw_set_mac_addr(nic, netdev); 1496 } 1497 1498 /* Init tasklet for handling Qset err interrupt */ 1499 tasklet_init(&nic->qs_err_task, nicvf_handle_qs_err, 1500 (unsigned long)nic); 1501 1502 /* Init RBDR tasklet which will refill RBDR */ 1503 tasklet_init(&nic->rbdr_task, nicvf_rbdr_task, 1504 (unsigned long)nic); 1505 INIT_DELAYED_WORK(&nic->rbdr_work, nicvf_rbdr_work); 1506 1507 /* Configure CPI alorithm */ 1508 nic->cpi_alg = cpi_alg; 1509 if (!nic->sqs_mode) 1510 nicvf_config_cpi(nic); 1511 1512 nicvf_request_sqs(nic); 1513 if (nic->sqs_mode) 1514 nicvf_get_primary_vf_struct(nic); 1515 1516 /* Configure PTP timestamp */ 1517 if (nic->ptp_clock) 1518 nicvf_config_hw_rx_tstamp(nic, nic->hw_rx_tstamp); 1519 atomic_set(&nic->tx_ptp_skbs, 0); 1520 nic->ptp_skb = NULL; 1521 1522 /* Configure receive side scaling and MTU */ 1523 if (!nic->sqs_mode) { 1524 nicvf_rss_init(nic); 1525 err = nicvf_update_hw_max_frs(nic, netdev->mtu); 1526 if (err) 1527 goto cleanup; 1528 1529 /* Clear percpu stats */ 1530 for_each_possible_cpu(cpu) 1531 memset(per_cpu_ptr(nic->drv_stats, cpu), 0, 1532 sizeof(struct nicvf_drv_stats)); 1533 } 1534 1535 err = nicvf_register_interrupts(nic); 1536 if (err) 1537 goto cleanup; 1538 1539 /* Initialize the queues */ 1540 err = nicvf_init_resources(nic); 1541 if (err) 1542 goto cleanup; 1543 1544 /* Make sure queue initialization is written */ 1545 wmb(); 1546 1547 nicvf_reg_write(nic, NIC_VF_INT, -1); 1548 /* Enable Qset err interrupt */ 1549 nicvf_enable_intr(nic, NICVF_INTR_QS_ERR, 0); 1550 1551 /* Enable completion queue interrupt */ 1552 for (qidx = 0; qidx < qs->cq_cnt; qidx++) 1553 nicvf_enable_intr(nic, NICVF_INTR_CQ, qidx); 1554 1555 /* Enable RBDR threshold interrupt */ 1556 for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) 1557 nicvf_enable_intr(nic, NICVF_INTR_RBDR, qidx); 1558 1559 /* Send VF config done msg to PF */ 1560 nicvf_send_cfg_done(nic); 1561 1562 if (nic->nicvf_rx_mode_wq) { 1563 INIT_DELAYED_WORK(&nic->link_change_work, 1564 nicvf_link_status_check_task); 1565 queue_delayed_work(nic->nicvf_rx_mode_wq, 1566 &nic->link_change_work, 0); 1567 } 1568 1569 return 0; 1570 cleanup: 1571 nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0); 1572 nicvf_unregister_interrupts(nic); 1573 tasklet_kill(&nic->qs_err_task); 1574 tasklet_kill(&nic->rbdr_task); 1575 napi_del: 1576 for (qidx = 0; qidx < qs->cq_cnt; qidx++) { 1577 cq_poll = nic->napi[qidx]; 1578 if (!cq_poll) 1579 continue; 1580 napi_disable(&cq_poll->napi); 1581 netif_napi_del(&cq_poll->napi); 1582 } 1583 nicvf_free_cq_poll(nic); 1584 return err; 1585 } 1586 1587 static int nicvf_change_mtu(struct net_device *netdev, int new_mtu) 1588 { 1589 struct nicvf *nic = netdev_priv(netdev); 1590 int orig_mtu = netdev->mtu; 1591 1592 /* For now just support only the usual MTU sized frames, 1593 * plus some headroom for VLAN, QinQ. 1594 */ 1595 if (nic->xdp_prog && new_mtu > MAX_XDP_MTU) { 1596 netdev_warn(netdev, "Jumbo frames not yet supported with XDP, current MTU %d.\n", 1597 netdev->mtu); 1598 return -EINVAL; 1599 } 1600 1601 netdev->mtu = new_mtu; 1602 1603 if (!netif_running(netdev)) 1604 return 0; 1605 1606 if (nicvf_update_hw_max_frs(nic, new_mtu)) { 1607 netdev->mtu = orig_mtu; 1608 return -EINVAL; 1609 } 1610 1611 return 0; 1612 } 1613 1614 static int nicvf_set_mac_address(struct net_device *netdev, void *p) 1615 { 1616 struct sockaddr *addr = p; 1617 struct nicvf *nic = netdev_priv(netdev); 1618 1619 if (!is_valid_ether_addr(addr->sa_data)) 1620 return -EADDRNOTAVAIL; 1621 1622 memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); 1623 1624 if (nic->pdev->msix_enabled) { 1625 if (nicvf_hw_set_mac_addr(nic, netdev)) 1626 return -EBUSY; 1627 } else { 1628 nic->set_mac_pending = true; 1629 } 1630 1631 return 0; 1632 } 1633 1634 void nicvf_update_lmac_stats(struct nicvf *nic) 1635 { 1636 int stat = 0; 1637 union nic_mbx mbx = {}; 1638 1639 if (!netif_running(nic->netdev)) 1640 return; 1641 1642 mbx.bgx_stats.msg = NIC_MBOX_MSG_BGX_STATS; 1643 mbx.bgx_stats.vf_id = nic->vf_id; 1644 /* Rx stats */ 1645 mbx.bgx_stats.rx = 1; 1646 while (stat < BGX_RX_STATS_COUNT) { 1647 mbx.bgx_stats.idx = stat; 1648 if (nicvf_send_msg_to_pf(nic, &mbx)) 1649 return; 1650 stat++; 1651 } 1652 1653 stat = 0; 1654 1655 /* Tx stats */ 1656 mbx.bgx_stats.rx = 0; 1657 while (stat < BGX_TX_STATS_COUNT) { 1658 mbx.bgx_stats.idx = stat; 1659 if (nicvf_send_msg_to_pf(nic, &mbx)) 1660 return; 1661 stat++; 1662 } 1663 } 1664 1665 void nicvf_update_stats(struct nicvf *nic) 1666 { 1667 int qidx, cpu; 1668 u64 tmp_stats = 0; 1669 struct nicvf_hw_stats *stats = &nic->hw_stats; 1670 struct nicvf_drv_stats *drv_stats; 1671 struct queue_set *qs = nic->qs; 1672 1673 #define GET_RX_STATS(reg) \ 1674 nicvf_reg_read(nic, NIC_VNIC_RX_STAT_0_13 | (reg << 3)) 1675 #define GET_TX_STATS(reg) \ 1676 nicvf_reg_read(nic, NIC_VNIC_TX_STAT_0_4 | (reg << 3)) 1677 1678 stats->rx_bytes = GET_RX_STATS(RX_OCTS); 1679 stats->rx_ucast_frames = GET_RX_STATS(RX_UCAST); 1680 stats->rx_bcast_frames = GET_RX_STATS(RX_BCAST); 1681 stats->rx_mcast_frames = GET_RX_STATS(RX_MCAST); 1682 stats->rx_fcs_errors = GET_RX_STATS(RX_FCS); 1683 stats->rx_l2_errors = GET_RX_STATS(RX_L2ERR); 1684 stats->rx_drop_red = GET_RX_STATS(RX_RED); 1685 stats->rx_drop_red_bytes = GET_RX_STATS(RX_RED_OCTS); 1686 stats->rx_drop_overrun = GET_RX_STATS(RX_ORUN); 1687 stats->rx_drop_overrun_bytes = GET_RX_STATS(RX_ORUN_OCTS); 1688 stats->rx_drop_bcast = GET_RX_STATS(RX_DRP_BCAST); 1689 stats->rx_drop_mcast = GET_RX_STATS(RX_DRP_MCAST); 1690 stats->rx_drop_l3_bcast = GET_RX_STATS(RX_DRP_L3BCAST); 1691 stats->rx_drop_l3_mcast = GET_RX_STATS(RX_DRP_L3MCAST); 1692 1693 stats->tx_bytes = GET_TX_STATS(TX_OCTS); 1694 stats->tx_ucast_frames = GET_TX_STATS(TX_UCAST); 1695 stats->tx_bcast_frames = GET_TX_STATS(TX_BCAST); 1696 stats->tx_mcast_frames = GET_TX_STATS(TX_MCAST); 1697 stats->tx_drops = GET_TX_STATS(TX_DROP); 1698 1699 /* On T88 pass 2.0, the dummy SQE added for TSO notification 1700 * via CQE has 'dont_send' set. Hence HW drops the pkt pointed 1701 * pointed by dummy SQE and results in tx_drops counter being 1702 * incremented. Subtracting it from tx_tso counter will give 1703 * exact tx_drops counter. 1704 */ 1705 if (nic->t88 && nic->hw_tso) { 1706 for_each_possible_cpu(cpu) { 1707 drv_stats = per_cpu_ptr(nic->drv_stats, cpu); 1708 tmp_stats += drv_stats->tx_tso; 1709 } 1710 stats->tx_drops = tmp_stats - stats->tx_drops; 1711 } 1712 stats->tx_frames = stats->tx_ucast_frames + 1713 stats->tx_bcast_frames + 1714 stats->tx_mcast_frames; 1715 stats->rx_frames = stats->rx_ucast_frames + 1716 stats->rx_bcast_frames + 1717 stats->rx_mcast_frames; 1718 stats->rx_drops = stats->rx_drop_red + 1719 stats->rx_drop_overrun; 1720 1721 /* Update RQ and SQ stats */ 1722 for (qidx = 0; qidx < qs->rq_cnt; qidx++) 1723 nicvf_update_rq_stats(nic, qidx); 1724 for (qidx = 0; qidx < qs->sq_cnt; qidx++) 1725 nicvf_update_sq_stats(nic, qidx); 1726 } 1727 1728 static void nicvf_get_stats64(struct net_device *netdev, 1729 struct rtnl_link_stats64 *stats) 1730 { 1731 struct nicvf *nic = netdev_priv(netdev); 1732 struct nicvf_hw_stats *hw_stats = &nic->hw_stats; 1733 1734 nicvf_update_stats(nic); 1735 1736 stats->rx_bytes = hw_stats->rx_bytes; 1737 stats->rx_packets = hw_stats->rx_frames; 1738 stats->rx_dropped = hw_stats->rx_drops; 1739 stats->multicast = hw_stats->rx_mcast_frames; 1740 1741 stats->tx_bytes = hw_stats->tx_bytes; 1742 stats->tx_packets = hw_stats->tx_frames; 1743 stats->tx_dropped = hw_stats->tx_drops; 1744 1745 } 1746 1747 static void nicvf_tx_timeout(struct net_device *dev) 1748 { 1749 struct nicvf *nic = netdev_priv(dev); 1750 1751 netif_warn(nic, tx_err, dev, "Transmit timed out, resetting\n"); 1752 1753 this_cpu_inc(nic->drv_stats->tx_timeout); 1754 schedule_work(&nic->reset_task); 1755 } 1756 1757 static void nicvf_reset_task(struct work_struct *work) 1758 { 1759 struct nicvf *nic; 1760 1761 nic = container_of(work, struct nicvf, reset_task); 1762 1763 if (!netif_running(nic->netdev)) 1764 return; 1765 1766 nicvf_stop(nic->netdev); 1767 nicvf_open(nic->netdev); 1768 netif_trans_update(nic->netdev); 1769 } 1770 1771 static int nicvf_config_loopback(struct nicvf *nic, 1772 netdev_features_t features) 1773 { 1774 union nic_mbx mbx = {}; 1775 1776 mbx.lbk.msg = NIC_MBOX_MSG_LOOPBACK; 1777 mbx.lbk.vf_id = nic->vf_id; 1778 mbx.lbk.enable = (features & NETIF_F_LOOPBACK) != 0; 1779 1780 return nicvf_send_msg_to_pf(nic, &mbx); 1781 } 1782 1783 static netdev_features_t nicvf_fix_features(struct net_device *netdev, 1784 netdev_features_t features) 1785 { 1786 struct nicvf *nic = netdev_priv(netdev); 1787 1788 if ((features & NETIF_F_LOOPBACK) && 1789 netif_running(netdev) && !nic->loopback_supported) 1790 features &= ~NETIF_F_LOOPBACK; 1791 1792 return features; 1793 } 1794 1795 static int nicvf_set_features(struct net_device *netdev, 1796 netdev_features_t features) 1797 { 1798 struct nicvf *nic = netdev_priv(netdev); 1799 netdev_features_t changed = features ^ netdev->features; 1800 1801 if (changed & NETIF_F_HW_VLAN_CTAG_RX) 1802 nicvf_config_vlan_stripping(nic, features); 1803 1804 if ((changed & NETIF_F_LOOPBACK) && netif_running(netdev)) 1805 return nicvf_config_loopback(nic, features); 1806 1807 return 0; 1808 } 1809 1810 static void nicvf_set_xdp_queues(struct nicvf *nic, bool bpf_attached) 1811 { 1812 u8 cq_count, txq_count; 1813 1814 /* Set XDP Tx queue count same as Rx queue count */ 1815 if (!bpf_attached) 1816 nic->xdp_tx_queues = 0; 1817 else 1818 nic->xdp_tx_queues = nic->rx_queues; 1819 1820 /* If queue count > MAX_CMP_QUEUES_PER_QS, then additional qsets 1821 * needs to be allocated, check how many. 1822 */ 1823 txq_count = nic->xdp_tx_queues + nic->tx_queues; 1824 cq_count = max(nic->rx_queues, txq_count); 1825 if (cq_count > MAX_CMP_QUEUES_PER_QS) { 1826 nic->sqs_count = roundup(cq_count, MAX_CMP_QUEUES_PER_QS); 1827 nic->sqs_count = (nic->sqs_count / MAX_CMP_QUEUES_PER_QS) - 1; 1828 } else { 1829 nic->sqs_count = 0; 1830 } 1831 1832 /* Set primary Qset's resources */ 1833 nic->qs->rq_cnt = min_t(u8, nic->rx_queues, MAX_RCV_QUEUES_PER_QS); 1834 nic->qs->sq_cnt = min_t(u8, txq_count, MAX_SND_QUEUES_PER_QS); 1835 nic->qs->cq_cnt = max_t(u8, nic->qs->rq_cnt, nic->qs->sq_cnt); 1836 1837 /* Update stack */ 1838 nicvf_set_real_num_queues(nic->netdev, nic->tx_queues, nic->rx_queues); 1839 } 1840 1841 static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog) 1842 { 1843 struct net_device *dev = nic->netdev; 1844 bool if_up = netif_running(nic->netdev); 1845 struct bpf_prog *old_prog; 1846 bool bpf_attached = false; 1847 int ret = 0; 1848 1849 /* For now just support only the usual MTU sized frames, 1850 * plus some headroom for VLAN, QinQ. 1851 */ 1852 if (prog && dev->mtu > MAX_XDP_MTU) { 1853 netdev_warn(dev, "Jumbo frames not yet supported with XDP, current MTU %d.\n", 1854 dev->mtu); 1855 return -EOPNOTSUPP; 1856 } 1857 1858 /* ALL SQs attached to CQs i.e same as RQs, are treated as 1859 * XDP Tx queues and more Tx queues are allocated for 1860 * network stack to send pkts out. 1861 * 1862 * No of Tx queues are either same as Rx queues or whatever 1863 * is left in max no of queues possible. 1864 */ 1865 if ((nic->rx_queues + nic->tx_queues) > nic->max_queues) { 1866 netdev_warn(dev, 1867 "Failed to attach BPF prog, RXQs + TXQs > Max %d\n", 1868 nic->max_queues); 1869 return -ENOMEM; 1870 } 1871 1872 if (if_up) 1873 nicvf_stop(nic->netdev); 1874 1875 old_prog = xchg(&nic->xdp_prog, prog); 1876 /* Detach old prog, if any */ 1877 if (old_prog) 1878 bpf_prog_put(old_prog); 1879 1880 if (nic->xdp_prog) { 1881 /* Attach BPF program */ 1882 nic->xdp_prog = bpf_prog_add(nic->xdp_prog, nic->rx_queues - 1); 1883 if (!IS_ERR(nic->xdp_prog)) { 1884 bpf_attached = true; 1885 } else { 1886 ret = PTR_ERR(nic->xdp_prog); 1887 nic->xdp_prog = NULL; 1888 } 1889 } 1890 1891 /* Calculate Tx queues needed for XDP and network stack */ 1892 nicvf_set_xdp_queues(nic, bpf_attached); 1893 1894 if (if_up) { 1895 /* Reinitialize interface, clean slate */ 1896 nicvf_open(nic->netdev); 1897 netif_trans_update(nic->netdev); 1898 } 1899 1900 return ret; 1901 } 1902 1903 static int nicvf_xdp(struct net_device *netdev, struct netdev_bpf *xdp) 1904 { 1905 struct nicvf *nic = netdev_priv(netdev); 1906 1907 /* To avoid checks while retrieving buffer address from CQE_RX, 1908 * do not support XDP for T88 pass1.x silicons which are anyway 1909 * not in use widely. 1910 */ 1911 if (pass1_silicon(nic->pdev)) 1912 return -EOPNOTSUPP; 1913 1914 switch (xdp->command) { 1915 case XDP_SETUP_PROG: 1916 return nicvf_xdp_setup(nic, xdp->prog); 1917 case XDP_QUERY_PROG: 1918 xdp->prog_id = nic->xdp_prog ? nic->xdp_prog->aux->id : 0; 1919 return 0; 1920 default: 1921 return -EINVAL; 1922 } 1923 } 1924 1925 static int nicvf_config_hwtstamp(struct net_device *netdev, struct ifreq *ifr) 1926 { 1927 struct hwtstamp_config config; 1928 struct nicvf *nic = netdev_priv(netdev); 1929 1930 if (!nic->ptp_clock) 1931 return -ENODEV; 1932 1933 if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) 1934 return -EFAULT; 1935 1936 /* reserved for future extensions */ 1937 if (config.flags) 1938 return -EINVAL; 1939 1940 switch (config.tx_type) { 1941 case HWTSTAMP_TX_OFF: 1942 case HWTSTAMP_TX_ON: 1943 break; 1944 default: 1945 return -ERANGE; 1946 } 1947 1948 switch (config.rx_filter) { 1949 case HWTSTAMP_FILTER_NONE: 1950 nic->hw_rx_tstamp = false; 1951 break; 1952 case HWTSTAMP_FILTER_ALL: 1953 case HWTSTAMP_FILTER_SOME: 1954 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: 1955 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: 1956 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: 1957 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: 1958 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: 1959 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: 1960 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: 1961 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: 1962 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: 1963 case HWTSTAMP_FILTER_PTP_V2_EVENT: 1964 case HWTSTAMP_FILTER_PTP_V2_SYNC: 1965 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: 1966 nic->hw_rx_tstamp = true; 1967 config.rx_filter = HWTSTAMP_FILTER_ALL; 1968 break; 1969 default: 1970 return -ERANGE; 1971 } 1972 1973 if (netif_running(netdev)) 1974 nicvf_config_hw_rx_tstamp(nic, nic->hw_rx_tstamp); 1975 1976 if (copy_to_user(ifr->ifr_data, &config, sizeof(config))) 1977 return -EFAULT; 1978 1979 return 0; 1980 } 1981 1982 static int nicvf_ioctl(struct net_device *netdev, struct ifreq *req, int cmd) 1983 { 1984 switch (cmd) { 1985 case SIOCSHWTSTAMP: 1986 return nicvf_config_hwtstamp(netdev, req); 1987 default: 1988 return -EOPNOTSUPP; 1989 } 1990 } 1991 1992 static void __nicvf_set_rx_mode_task(u8 mode, struct xcast_addr_list *mc_addrs, 1993 struct nicvf *nic) 1994 { 1995 union nic_mbx mbx = {}; 1996 int idx; 1997 1998 /* From the inside of VM code flow we have only 128 bits memory 1999 * available to send message to host's PF, so send all mc addrs 2000 * one by one, starting from flush command in case if kernel 2001 * requests to configure specific MAC filtering 2002 */ 2003 2004 /* flush DMAC filters and reset RX mode */ 2005 mbx.xcast.msg = NIC_MBOX_MSG_RESET_XCAST; 2006 if (nicvf_send_msg_to_pf(nic, &mbx) < 0) 2007 goto free_mc; 2008 2009 if (mode & BGX_XCAST_MCAST_FILTER) { 2010 /* once enabling filtering, we need to signal to PF to add 2011 * its' own LMAC to the filter to accept packets for it. 2012 */ 2013 mbx.xcast.msg = NIC_MBOX_MSG_ADD_MCAST; 2014 mbx.xcast.mac = 0; 2015 if (nicvf_send_msg_to_pf(nic, &mbx) < 0) 2016 goto free_mc; 2017 } 2018 2019 /* check if we have any specific MACs to be added to PF DMAC filter */ 2020 if (mc_addrs) { 2021 /* now go through kernel list of MACs and add them one by one */ 2022 for (idx = 0; idx < mc_addrs->count; idx++) { 2023 mbx.xcast.msg = NIC_MBOX_MSG_ADD_MCAST; 2024 mbx.xcast.mac = mc_addrs->mc[idx]; 2025 if (nicvf_send_msg_to_pf(nic, &mbx) < 0) 2026 goto free_mc; 2027 } 2028 } 2029 2030 /* and finally set rx mode for PF accordingly */ 2031 mbx.xcast.msg = NIC_MBOX_MSG_SET_XCAST; 2032 mbx.xcast.mode = mode; 2033 2034 nicvf_send_msg_to_pf(nic, &mbx); 2035 free_mc: 2036 kfree(mc_addrs); 2037 } 2038 2039 static void nicvf_set_rx_mode_task(struct work_struct *work_arg) 2040 { 2041 struct nicvf_work *vf_work = container_of(work_arg, struct nicvf_work, 2042 work); 2043 struct nicvf *nic = container_of(vf_work, struct nicvf, rx_mode_work); 2044 u8 mode; 2045 struct xcast_addr_list *mc; 2046 2047 if (!vf_work) 2048 return; 2049 2050 /* Save message data locally to prevent them from 2051 * being overwritten by next ndo_set_rx_mode call(). 2052 */ 2053 spin_lock(&nic->rx_mode_wq_lock); 2054 mode = vf_work->mode; 2055 mc = vf_work->mc; 2056 vf_work->mc = NULL; 2057 spin_unlock(&nic->rx_mode_wq_lock); 2058 2059 __nicvf_set_rx_mode_task(mode, mc, nic); 2060 } 2061 2062 static void nicvf_set_rx_mode(struct net_device *netdev) 2063 { 2064 struct nicvf *nic = netdev_priv(netdev); 2065 struct netdev_hw_addr *ha; 2066 struct xcast_addr_list *mc_list = NULL; 2067 u8 mode = 0; 2068 2069 if (netdev->flags & IFF_PROMISC) { 2070 mode = BGX_XCAST_BCAST_ACCEPT | BGX_XCAST_MCAST_ACCEPT; 2071 } else { 2072 if (netdev->flags & IFF_BROADCAST) 2073 mode |= BGX_XCAST_BCAST_ACCEPT; 2074 2075 if (netdev->flags & IFF_ALLMULTI) { 2076 mode |= BGX_XCAST_MCAST_ACCEPT; 2077 } else if (netdev->flags & IFF_MULTICAST) { 2078 mode |= BGX_XCAST_MCAST_FILTER; 2079 /* here we need to copy mc addrs */ 2080 if (netdev_mc_count(netdev)) { 2081 mc_list = kmalloc(offsetof(typeof(*mc_list), 2082 mc[netdev_mc_count(netdev)]), 2083 GFP_ATOMIC); 2084 if (unlikely(!mc_list)) 2085 return; 2086 mc_list->count = 0; 2087 netdev_hw_addr_list_for_each(ha, &netdev->mc) { 2088 mc_list->mc[mc_list->count] = 2089 ether_addr_to_u64(ha->addr); 2090 mc_list->count++; 2091 } 2092 } 2093 } 2094 } 2095 spin_lock(&nic->rx_mode_wq_lock); 2096 kfree(nic->rx_mode_work.mc); 2097 nic->rx_mode_work.mc = mc_list; 2098 nic->rx_mode_work.mode = mode; 2099 queue_work(nic->nicvf_rx_mode_wq, &nic->rx_mode_work.work); 2100 spin_unlock(&nic->rx_mode_wq_lock); 2101 } 2102 2103 static const struct net_device_ops nicvf_netdev_ops = { 2104 .ndo_open = nicvf_open, 2105 .ndo_stop = nicvf_stop, 2106 .ndo_start_xmit = nicvf_xmit, 2107 .ndo_change_mtu = nicvf_change_mtu, 2108 .ndo_set_mac_address = nicvf_set_mac_address, 2109 .ndo_get_stats64 = nicvf_get_stats64, 2110 .ndo_tx_timeout = nicvf_tx_timeout, 2111 .ndo_fix_features = nicvf_fix_features, 2112 .ndo_set_features = nicvf_set_features, 2113 .ndo_bpf = nicvf_xdp, 2114 .ndo_do_ioctl = nicvf_ioctl, 2115 .ndo_set_rx_mode = nicvf_set_rx_mode, 2116 }; 2117 2118 static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 2119 { 2120 struct device *dev = &pdev->dev; 2121 struct net_device *netdev; 2122 struct nicvf *nic; 2123 int err, qcount; 2124 u16 sdevid; 2125 struct cavium_ptp *ptp_clock; 2126 2127 ptp_clock = cavium_ptp_get(); 2128 if (IS_ERR(ptp_clock)) { 2129 if (PTR_ERR(ptp_clock) == -ENODEV) 2130 /* In virtualized environment we proceed without ptp */ 2131 ptp_clock = NULL; 2132 else 2133 return PTR_ERR(ptp_clock); 2134 } 2135 2136 err = pci_enable_device(pdev); 2137 if (err) { 2138 dev_err(dev, "Failed to enable PCI device\n"); 2139 return err; 2140 } 2141 2142 err = pci_request_regions(pdev, DRV_NAME); 2143 if (err) { 2144 dev_err(dev, "PCI request regions failed 0x%x\n", err); 2145 goto err_disable_device; 2146 } 2147 2148 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(48)); 2149 if (err) { 2150 dev_err(dev, "Unable to get usable DMA configuration\n"); 2151 goto err_release_regions; 2152 } 2153 2154 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(48)); 2155 if (err) { 2156 dev_err(dev, "unable to get 48-bit DMA for consistent allocations\n"); 2157 goto err_release_regions; 2158 } 2159 2160 qcount = netif_get_num_default_rss_queues(); 2161 2162 /* Restrict multiqset support only for host bound VFs */ 2163 if (pdev->is_virtfn) { 2164 /* Set max number of queues per VF */ 2165 qcount = min_t(int, num_online_cpus(), 2166 (MAX_SQS_PER_VF + 1) * MAX_CMP_QUEUES_PER_QS); 2167 } 2168 2169 netdev = alloc_etherdev_mqs(sizeof(struct nicvf), qcount, qcount); 2170 if (!netdev) { 2171 err = -ENOMEM; 2172 goto err_release_regions; 2173 } 2174 2175 pci_set_drvdata(pdev, netdev); 2176 2177 SET_NETDEV_DEV(netdev, &pdev->dev); 2178 2179 nic = netdev_priv(netdev); 2180 nic->netdev = netdev; 2181 nic->pdev = pdev; 2182 nic->pnicvf = nic; 2183 nic->max_queues = qcount; 2184 /* If no of CPUs are too low, there won't be any queues left 2185 * for XDP_TX, hence double it. 2186 */ 2187 if (!nic->t88) 2188 nic->max_queues *= 2; 2189 nic->ptp_clock = ptp_clock; 2190 2191 /* MAP VF's configuration registers */ 2192 nic->reg_base = pcim_iomap(pdev, PCI_CFG_REG_BAR_NUM, 0); 2193 if (!nic->reg_base) { 2194 dev_err(dev, "Cannot map config register space, aborting\n"); 2195 err = -ENOMEM; 2196 goto err_free_netdev; 2197 } 2198 2199 nic->drv_stats = netdev_alloc_pcpu_stats(struct nicvf_drv_stats); 2200 if (!nic->drv_stats) { 2201 err = -ENOMEM; 2202 goto err_free_netdev; 2203 } 2204 2205 err = nicvf_set_qset_resources(nic); 2206 if (err) 2207 goto err_free_netdev; 2208 2209 /* Check if PF is alive and get MAC address for this VF */ 2210 err = nicvf_register_misc_interrupt(nic); 2211 if (err) 2212 goto err_free_netdev; 2213 2214 nicvf_send_vf_struct(nic); 2215 2216 if (!pass1_silicon(nic->pdev)) 2217 nic->hw_tso = true; 2218 2219 /* Get iommu domain for iova to physical addr conversion */ 2220 nic->iommu_domain = iommu_get_domain_for_dev(dev); 2221 2222 pci_read_config_word(nic->pdev, PCI_SUBSYSTEM_ID, &sdevid); 2223 if (sdevid == 0xA134) 2224 nic->t88 = true; 2225 2226 /* Check if this VF is in QS only mode */ 2227 if (nic->sqs_mode) 2228 return 0; 2229 2230 err = nicvf_set_real_num_queues(netdev, nic->tx_queues, nic->rx_queues); 2231 if (err) 2232 goto err_unregister_interrupts; 2233 2234 netdev->hw_features = (NETIF_F_RXCSUM | NETIF_F_SG | 2235 NETIF_F_TSO | NETIF_F_GRO | NETIF_F_TSO6 | 2236 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | 2237 NETIF_F_HW_VLAN_CTAG_RX); 2238 2239 netdev->hw_features |= NETIF_F_RXHASH; 2240 2241 netdev->features |= netdev->hw_features; 2242 netdev->hw_features |= NETIF_F_LOOPBACK; 2243 2244 netdev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | 2245 NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO6; 2246 2247 netdev->netdev_ops = &nicvf_netdev_ops; 2248 netdev->watchdog_timeo = NICVF_TX_TIMEOUT; 2249 2250 /* MTU range: 64 - 9200 */ 2251 netdev->min_mtu = NIC_HW_MIN_FRS; 2252 netdev->max_mtu = NIC_HW_MAX_FRS; 2253 2254 INIT_WORK(&nic->reset_task, nicvf_reset_task); 2255 2256 nic->nicvf_rx_mode_wq = alloc_ordered_workqueue("nicvf_rx_mode_wq_VF%d", 2257 WQ_MEM_RECLAIM, 2258 nic->vf_id); 2259 if (!nic->nicvf_rx_mode_wq) { 2260 err = -ENOMEM; 2261 dev_err(dev, "Failed to allocate work queue\n"); 2262 goto err_unregister_interrupts; 2263 } 2264 2265 INIT_WORK(&nic->rx_mode_work.work, nicvf_set_rx_mode_task); 2266 spin_lock_init(&nic->rx_mode_wq_lock); 2267 mutex_init(&nic->rx_mode_mtx); 2268 2269 err = register_netdev(netdev); 2270 if (err) { 2271 dev_err(dev, "Failed to register netdevice\n"); 2272 goto err_unregister_interrupts; 2273 } 2274 2275 nic->msg_enable = debug; 2276 2277 nicvf_set_ethtool_ops(netdev); 2278 2279 return 0; 2280 2281 err_unregister_interrupts: 2282 nicvf_unregister_interrupts(nic); 2283 err_free_netdev: 2284 pci_set_drvdata(pdev, NULL); 2285 if (nic->drv_stats) 2286 free_percpu(nic->drv_stats); 2287 free_netdev(netdev); 2288 err_release_regions: 2289 pci_release_regions(pdev); 2290 err_disable_device: 2291 pci_disable_device(pdev); 2292 return err; 2293 } 2294 2295 static void nicvf_remove(struct pci_dev *pdev) 2296 { 2297 struct net_device *netdev = pci_get_drvdata(pdev); 2298 struct nicvf *nic; 2299 struct net_device *pnetdev; 2300 2301 if (!netdev) 2302 return; 2303 2304 nic = netdev_priv(netdev); 2305 pnetdev = nic->pnicvf->netdev; 2306 2307 /* Check if this Qset is assigned to different VF. 2308 * If yes, clean primary and all secondary Qsets. 2309 */ 2310 if (pnetdev && (pnetdev->reg_state == NETREG_REGISTERED)) 2311 unregister_netdev(pnetdev); 2312 if (nic->nicvf_rx_mode_wq) { 2313 destroy_workqueue(nic->nicvf_rx_mode_wq); 2314 nic->nicvf_rx_mode_wq = NULL; 2315 } 2316 nicvf_unregister_interrupts(nic); 2317 pci_set_drvdata(pdev, NULL); 2318 if (nic->drv_stats) 2319 free_percpu(nic->drv_stats); 2320 cavium_ptp_put(nic->ptp_clock); 2321 free_netdev(netdev); 2322 pci_release_regions(pdev); 2323 pci_disable_device(pdev); 2324 } 2325 2326 static void nicvf_shutdown(struct pci_dev *pdev) 2327 { 2328 nicvf_remove(pdev); 2329 } 2330 2331 static struct pci_driver nicvf_driver = { 2332 .name = DRV_NAME, 2333 .id_table = nicvf_id_table, 2334 .probe = nicvf_probe, 2335 .remove = nicvf_remove, 2336 .shutdown = nicvf_shutdown, 2337 }; 2338 2339 static int __init nicvf_init_module(void) 2340 { 2341 pr_info("%s, ver %s\n", DRV_NAME, DRV_VERSION); 2342 return pci_register_driver(&nicvf_driver); 2343 } 2344 2345 static void __exit nicvf_cleanup_module(void) 2346 { 2347 pci_unregister_driver(&nicvf_driver); 2348 } 2349 2350 module_init(nicvf_init_module); 2351 module_exit(nicvf_cleanup_module); 2352