1 /* 2 * Copyright (C) 2015 Netronome Systems, Inc. 3 * 4 * This software is dual licensed under the GNU General License Version 2, 5 * June 1991 as shown in the file COPYING in the top-level directory of this 6 * source tree or the BSD 2-Clause License provided below. You have the 7 * option to license this software under the complete terms of either license. 8 * 9 * The BSD 2-Clause License: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * 1. Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * 2. Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 */ 33 34 /* 35 * nfp_net_common.c 36 * Netronome network device driver: Common functions between PF and VF 37 * Authors: Jakub Kicinski <jakub.kicinski@netronome.com> 38 * Jason McMullan <jason.mcmullan@netronome.com> 39 * Rolf Neugebauer <rolf.neugebauer@netronome.com> 40 * Brad Petrus <brad.petrus@netronome.com> 41 * Chris Telfer <chris.telfer@netronome.com> 42 */ 43 44 #include <linux/version.h> 45 #include <linux/module.h> 46 #include <linux/kernel.h> 47 #include <linux/init.h> 48 #include <linux/fs.h> 49 #include <linux/netdevice.h> 50 #include <linux/etherdevice.h> 51 #include <linux/interrupt.h> 52 #include <linux/ip.h> 53 #include <linux/ipv6.h> 54 #include <linux/pci.h> 55 #include <linux/pci_regs.h> 56 #include <linux/msi.h> 57 #include <linux/ethtool.h> 58 #include <linux/log2.h> 59 #include <linux/if_vlan.h> 60 #include <linux/random.h> 61 62 #include <linux/ktime.h> 63 64 #include <net/vxlan.h> 65 66 #include "nfp_net_ctrl.h" 67 #include "nfp_net.h" 68 69 /** 70 * nfp_net_get_fw_version() - Read and parse the FW version 71 * @fw_ver: Output fw_version structure to read to 72 * @ctrl_bar: Mapped address of the control BAR 73 */ 74 void nfp_net_get_fw_version(struct nfp_net_fw_version *fw_ver, 75 void __iomem *ctrl_bar) 76 { 77 u32 reg; 78 79 reg = readl(ctrl_bar + NFP_NET_CFG_VERSION); 80 put_unaligned_le32(reg, fw_ver); 81 } 82 83 /** 84 * nfp_net_reconfig() - Reconfigure the firmware 85 * @nn: NFP Net device to reconfigure 86 * @update: The value for the update field in the BAR config 87 * 88 * Write the update word to the BAR and ping the reconfig queue. The 89 * poll until the firmware has acknowledged the update by zeroing the 90 * update word. 91 * 92 * Return: Negative errno on error, 0 on success 93 */ 94 int nfp_net_reconfig(struct nfp_net *nn, u32 update) 95 { 96 int cnt, ret = 0; 97 u32 new; 98 99 spin_lock_bh(&nn->reconfig_lock); 100 101 nn_writel(nn, NFP_NET_CFG_UPDATE, update); 102 /* ensure update is written before pinging HW */ 103 nn_pci_flush(nn); 104 nfp_qcp_wr_ptr_add(nn->qcp_cfg, 1); 105 106 /* Poll update field, waiting for NFP to ack the config */ 107 for (cnt = 0; ; cnt++) { 108 new = nn_readl(nn, NFP_NET_CFG_UPDATE); 109 if (new == 0) 110 break; 111 if (new & NFP_NET_CFG_UPDATE_ERR) { 112 nn_err(nn, "Reconfig error: 0x%08x\n", new); 113 ret = -EIO; 114 break; 115 } else if (cnt >= NFP_NET_POLL_TIMEOUT) { 116 nn_err(nn, "Reconfig timeout for 0x%08x after %dms\n", 117 update, cnt); 118 ret = -EIO; 119 break; 120 } 121 mdelay(1); 122 } 123 124 spin_unlock_bh(&nn->reconfig_lock); 125 return ret; 126 } 127 128 /* Interrupt configuration and handling 129 */ 130 131 /** 132 * nfp_net_irq_unmask_msix() - Unmask MSI-X after automasking 133 * @nn: NFP Network structure 134 * @entry_nr: MSI-X table entry 135 * 136 * Clear the MSI-X table mask bit for the given entry bypassing Linux irq 137 * handling subsystem. Use *only* to reenable automasked vectors. 138 */ 139 static void nfp_net_irq_unmask_msix(struct nfp_net *nn, unsigned int entry_nr) 140 { 141 struct list_head *msi_head = &nn->pdev->dev.msi_list; 142 struct msi_desc *entry; 143 u32 off; 144 145 /* All MSI-Xs have the same mask_base */ 146 entry = list_first_entry(msi_head, struct msi_desc, list); 147 148 off = (PCI_MSIX_ENTRY_SIZE * entry_nr) + 149 PCI_MSIX_ENTRY_VECTOR_CTRL; 150 writel(0, entry->mask_base + off); 151 readl(entry->mask_base); 152 } 153 154 /** 155 * nfp_net_irq_unmask() - Unmask automasked interrupt 156 * @nn: NFP Network structure 157 * @entry_nr: MSI-X table entry 158 * 159 * If MSI-X auto-masking is enabled clear the mask bit, otherwise 160 * clear the ICR for the entry. 161 */ 162 static void nfp_net_irq_unmask(struct nfp_net *nn, unsigned int entry_nr) 163 { 164 if (nn->ctrl & NFP_NET_CFG_CTRL_MSIXAUTO) { 165 nfp_net_irq_unmask_msix(nn, entry_nr); 166 return; 167 } 168 169 nn_writeb(nn, NFP_NET_CFG_ICR(entry_nr), NFP_NET_CFG_ICR_UNMASKED); 170 nn_pci_flush(nn); 171 } 172 173 /** 174 * nfp_net_msix_alloc() - Try to allocate MSI-X irqs 175 * @nn: NFP Network structure 176 * @nr_vecs: Number of MSI-X vectors to allocate 177 * 178 * For MSI-X we want at least NFP_NET_NON_Q_VECTORS + 1 vectors. 179 * 180 * Return: Number of MSI-X vectors obtained or 0 on error. 181 */ 182 static int nfp_net_msix_alloc(struct nfp_net *nn, int nr_vecs) 183 { 184 struct pci_dev *pdev = nn->pdev; 185 int nvecs; 186 int i; 187 188 for (i = 0; i < nr_vecs; i++) 189 nn->irq_entries[i].entry = i; 190 191 nvecs = pci_enable_msix_range(pdev, nn->irq_entries, 192 NFP_NET_NON_Q_VECTORS + 1, nr_vecs); 193 if (nvecs < 0) { 194 nn_warn(nn, "Failed to enable MSI-X. Wanted %d-%d (err=%d)\n", 195 NFP_NET_NON_Q_VECTORS + 1, nr_vecs, nvecs); 196 return 0; 197 } 198 199 return nvecs; 200 } 201 202 /** 203 * nfp_net_irqs_wanted() - Work out how many interrupt vectors we want 204 * @nn: NFP Network structure 205 * 206 * We want a vector per CPU (or ring), whatever is smaller plus 207 * NFP_NET_NON_Q_VECTORS for LSC etc. 208 * 209 * Return: Number of interrupts wanted 210 */ 211 static int nfp_net_irqs_wanted(struct nfp_net *nn) 212 { 213 int ncpus; 214 int vecs; 215 216 ncpus = num_online_cpus(); 217 218 vecs = max_t(int, nn->num_tx_rings, nn->num_rx_rings); 219 vecs = min_t(int, vecs, ncpus); 220 221 return vecs + NFP_NET_NON_Q_VECTORS; 222 } 223 224 /** 225 * nfp_net_irqs_alloc() - allocates MSI-X irqs 226 * @nn: NFP Network structure 227 * 228 * Return: Number of irqs obtained or 0 on error. 229 */ 230 int nfp_net_irqs_alloc(struct nfp_net *nn) 231 { 232 int wanted_irqs; 233 234 wanted_irqs = nfp_net_irqs_wanted(nn); 235 236 nn->num_irqs = nfp_net_msix_alloc(nn, wanted_irqs); 237 if (nn->num_irqs == 0) { 238 nn_err(nn, "Failed to allocate MSI-X IRQs\n"); 239 return 0; 240 } 241 242 nn->num_r_vecs = nn->num_irqs - NFP_NET_NON_Q_VECTORS; 243 244 if (nn->num_irqs < wanted_irqs) 245 nn_warn(nn, "Unable to allocate %d vectors. Got %d instead\n", 246 wanted_irqs, nn->num_irqs); 247 248 return nn->num_irqs; 249 } 250 251 /** 252 * nfp_net_irqs_disable() - Disable interrupts 253 * @nn: NFP Network structure 254 * 255 * Undoes what @nfp_net_irqs_alloc() does. 256 */ 257 void nfp_net_irqs_disable(struct nfp_net *nn) 258 { 259 pci_disable_msix(nn->pdev); 260 } 261 262 /** 263 * nfp_net_irq_rxtx() - Interrupt service routine for RX/TX rings. 264 * @irq: Interrupt 265 * @data: Opaque data structure 266 * 267 * Return: Indicate if the interrupt has been handled. 268 */ 269 static irqreturn_t nfp_net_irq_rxtx(int irq, void *data) 270 { 271 struct nfp_net_r_vector *r_vec = data; 272 273 napi_schedule_irqoff(&r_vec->napi); 274 275 /* The FW auto-masks any interrupt, either via the MASK bit in 276 * the MSI-X table or via the per entry ICR field. So there 277 * is no need to disable interrupts here. 278 */ 279 return IRQ_HANDLED; 280 } 281 282 /** 283 * nfp_net_read_link_status() - Reread link status from control BAR 284 * @nn: NFP Network structure 285 */ 286 static void nfp_net_read_link_status(struct nfp_net *nn) 287 { 288 unsigned long flags; 289 bool link_up; 290 u32 sts; 291 292 spin_lock_irqsave(&nn->link_status_lock, flags); 293 294 sts = nn_readl(nn, NFP_NET_CFG_STS); 295 link_up = !!(sts & NFP_NET_CFG_STS_LINK); 296 297 if (nn->link_up == link_up) 298 goto out; 299 300 nn->link_up = link_up; 301 302 if (nn->link_up) { 303 netif_carrier_on(nn->netdev); 304 netdev_info(nn->netdev, "NIC Link is Up\n"); 305 } else { 306 netif_carrier_off(nn->netdev); 307 netdev_info(nn->netdev, "NIC Link is Down\n"); 308 } 309 out: 310 spin_unlock_irqrestore(&nn->link_status_lock, flags); 311 } 312 313 /** 314 * nfp_net_irq_lsc() - Interrupt service routine for link state changes 315 * @irq: Interrupt 316 * @data: Opaque data structure 317 * 318 * Return: Indicate if the interrupt has been handled. 319 */ 320 static irqreturn_t nfp_net_irq_lsc(int irq, void *data) 321 { 322 struct nfp_net *nn = data; 323 324 nfp_net_read_link_status(nn); 325 326 nfp_net_irq_unmask(nn, NFP_NET_IRQ_LSC_IDX); 327 328 return IRQ_HANDLED; 329 } 330 331 /** 332 * nfp_net_irq_exn() - Interrupt service routine for exceptions 333 * @irq: Interrupt 334 * @data: Opaque data structure 335 * 336 * Return: Indicate if the interrupt has been handled. 337 */ 338 static irqreturn_t nfp_net_irq_exn(int irq, void *data) 339 { 340 struct nfp_net *nn = data; 341 342 nn_err(nn, "%s: UNIMPLEMENTED.\n", __func__); 343 /* XXX TO BE IMPLEMENTED */ 344 return IRQ_HANDLED; 345 } 346 347 /** 348 * nfp_net_tx_ring_init() - Fill in the boilerplate for a TX ring 349 * @tx_ring: TX ring structure 350 */ 351 static void nfp_net_tx_ring_init(struct nfp_net_tx_ring *tx_ring) 352 { 353 struct nfp_net_r_vector *r_vec = tx_ring->r_vec; 354 struct nfp_net *nn = r_vec->nfp_net; 355 356 tx_ring->qcidx = tx_ring->idx * nn->stride_tx; 357 tx_ring->qcp_q = nn->tx_bar + NFP_QCP_QUEUE_OFF(tx_ring->qcidx); 358 } 359 360 /** 361 * nfp_net_rx_ring_init() - Fill in the boilerplate for a RX ring 362 * @rx_ring: RX ring structure 363 */ 364 static void nfp_net_rx_ring_init(struct nfp_net_rx_ring *rx_ring) 365 { 366 struct nfp_net_r_vector *r_vec = rx_ring->r_vec; 367 struct nfp_net *nn = r_vec->nfp_net; 368 369 rx_ring->fl_qcidx = rx_ring->idx * nn->stride_rx; 370 rx_ring->rx_qcidx = rx_ring->fl_qcidx + (nn->stride_rx - 1); 371 372 rx_ring->qcp_fl = nn->rx_bar + NFP_QCP_QUEUE_OFF(rx_ring->fl_qcidx); 373 rx_ring->qcp_rx = nn->rx_bar + NFP_QCP_QUEUE_OFF(rx_ring->rx_qcidx); 374 } 375 376 /** 377 * nfp_net_irqs_assign() - Assign IRQs and setup rvecs. 378 * @netdev: netdev structure 379 */ 380 static void nfp_net_irqs_assign(struct net_device *netdev) 381 { 382 struct nfp_net *nn = netdev_priv(netdev); 383 struct nfp_net_r_vector *r_vec; 384 int r; 385 386 /* Assumes nn->num_tx_rings == nn->num_rx_rings */ 387 if (nn->num_tx_rings > nn->num_r_vecs) { 388 nn_warn(nn, "More rings (%d) than vectors (%d).\n", 389 nn->num_tx_rings, nn->num_r_vecs); 390 nn->num_tx_rings = nn->num_r_vecs; 391 nn->num_rx_rings = nn->num_r_vecs; 392 } 393 394 nn->lsc_handler = nfp_net_irq_lsc; 395 nn->exn_handler = nfp_net_irq_exn; 396 397 for (r = 0; r < nn->num_r_vecs; r++) { 398 r_vec = &nn->r_vecs[r]; 399 r_vec->nfp_net = nn; 400 r_vec->handler = nfp_net_irq_rxtx; 401 r_vec->irq_idx = NFP_NET_NON_Q_VECTORS + r; 402 403 cpumask_set_cpu(r, &r_vec->affinity_mask); 404 405 r_vec->tx_ring = &nn->tx_rings[r]; 406 nn->tx_rings[r].idx = r; 407 nn->tx_rings[r].r_vec = r_vec; 408 nfp_net_tx_ring_init(r_vec->tx_ring); 409 410 r_vec->rx_ring = &nn->rx_rings[r]; 411 nn->rx_rings[r].idx = r; 412 nn->rx_rings[r].r_vec = r_vec; 413 nfp_net_rx_ring_init(r_vec->rx_ring); 414 } 415 } 416 417 /** 418 * nfp_net_aux_irq_request() - Request an auxiliary interrupt (LSC or EXN) 419 * @nn: NFP Network structure 420 * @ctrl_offset: Control BAR offset where IRQ configuration should be written 421 * @format: printf-style format to construct the interrupt name 422 * @name: Pointer to allocated space for interrupt name 423 * @name_sz: Size of space for interrupt name 424 * @vector_idx: Index of MSI-X vector used for this interrupt 425 * @handler: IRQ handler to register for this interrupt 426 */ 427 static int 428 nfp_net_aux_irq_request(struct nfp_net *nn, u32 ctrl_offset, 429 const char *format, char *name, size_t name_sz, 430 unsigned int vector_idx, irq_handler_t handler) 431 { 432 struct msix_entry *entry; 433 int err; 434 435 entry = &nn->irq_entries[vector_idx]; 436 437 snprintf(name, name_sz, format, netdev_name(nn->netdev)); 438 err = request_irq(entry->vector, handler, 0, name, nn); 439 if (err) { 440 nn_err(nn, "Failed to request IRQ %d (err=%d).\n", 441 entry->vector, err); 442 return err; 443 } 444 nn_writeb(nn, ctrl_offset, vector_idx); 445 446 return 0; 447 } 448 449 /** 450 * nfp_net_aux_irq_free() - Free an auxiliary interrupt (LSC or EXN) 451 * @nn: NFP Network structure 452 * @ctrl_offset: Control BAR offset where IRQ configuration should be written 453 * @vector_idx: Index of MSI-X vector used for this interrupt 454 */ 455 static void nfp_net_aux_irq_free(struct nfp_net *nn, u32 ctrl_offset, 456 unsigned int vector_idx) 457 { 458 nn_writeb(nn, ctrl_offset, 0xff); 459 free_irq(nn->irq_entries[vector_idx].vector, nn); 460 } 461 462 /* Transmit 463 * 464 * One queue controller peripheral queue is used for transmit. The 465 * driver en-queues packets for transmit by advancing the write 466 * pointer. The device indicates that packets have transmitted by 467 * advancing the read pointer. The driver maintains a local copy of 468 * the read and write pointer in @struct nfp_net_tx_ring. The driver 469 * keeps @wr_p in sync with the queue controller write pointer and can 470 * determine how many packets have been transmitted by comparing its 471 * copy of the read pointer @rd_p with the read pointer maintained by 472 * the queue controller peripheral. 473 */ 474 475 /** 476 * nfp_net_tx_full() - Check if the TX ring is full 477 * @tx_ring: TX ring to check 478 * @dcnt: Number of descriptors that need to be enqueued (must be >= 1) 479 * 480 * This function checks, based on the *host copy* of read/write 481 * pointer if a given TX ring is full. The real TX queue may have 482 * some newly made available slots. 483 * 484 * Return: True if the ring is full. 485 */ 486 static inline int nfp_net_tx_full(struct nfp_net_tx_ring *tx_ring, int dcnt) 487 { 488 return (tx_ring->wr_p - tx_ring->rd_p) >= (tx_ring->cnt - dcnt); 489 } 490 491 /* Wrappers for deciding when to stop and restart TX queues */ 492 static int nfp_net_tx_ring_should_wake(struct nfp_net_tx_ring *tx_ring) 493 { 494 return !nfp_net_tx_full(tx_ring, MAX_SKB_FRAGS * 4); 495 } 496 497 static int nfp_net_tx_ring_should_stop(struct nfp_net_tx_ring *tx_ring) 498 { 499 return nfp_net_tx_full(tx_ring, MAX_SKB_FRAGS + 1); 500 } 501 502 /** 503 * nfp_net_tx_ring_stop() - stop tx ring 504 * @nd_q: netdev queue 505 * @tx_ring: driver tx queue structure 506 * 507 * Safely stop TX ring. Remember that while we are running .start_xmit() 508 * someone else may be cleaning the TX ring completions so we need to be 509 * extra careful here. 510 */ 511 static void nfp_net_tx_ring_stop(struct netdev_queue *nd_q, 512 struct nfp_net_tx_ring *tx_ring) 513 { 514 netif_tx_stop_queue(nd_q); 515 516 /* We can race with the TX completion out of NAPI so recheck */ 517 smp_mb(); 518 if (unlikely(nfp_net_tx_ring_should_wake(tx_ring))) 519 netif_tx_start_queue(nd_q); 520 } 521 522 /** 523 * nfp_net_tx_tso() - Set up Tx descriptor for LSO 524 * @nn: NFP Net device 525 * @r_vec: per-ring structure 526 * @txbuf: Pointer to driver soft TX descriptor 527 * @txd: Pointer to HW TX descriptor 528 * @skb: Pointer to SKB 529 * 530 * Set up Tx descriptor for LSO, do nothing for non-LSO skbs. 531 * Return error on packet header greater than maximum supported LSO header size. 532 */ 533 static void nfp_net_tx_tso(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, 534 struct nfp_net_tx_buf *txbuf, 535 struct nfp_net_tx_desc *txd, struct sk_buff *skb) 536 { 537 u32 hdrlen; 538 u16 mss; 539 540 if (!skb_is_gso(skb)) 541 return; 542 543 if (!skb->encapsulation) 544 hdrlen = skb_transport_offset(skb) + tcp_hdrlen(skb); 545 else 546 hdrlen = skb_inner_transport_header(skb) - skb->data + 547 inner_tcp_hdrlen(skb); 548 549 txbuf->pkt_cnt = skb_shinfo(skb)->gso_segs; 550 txbuf->real_len += hdrlen * (txbuf->pkt_cnt - 1); 551 552 mss = skb_shinfo(skb)->gso_size & PCIE_DESC_TX_MSS_MASK; 553 txd->l4_offset = hdrlen; 554 txd->mss = cpu_to_le16(mss); 555 txd->flags |= PCIE_DESC_TX_LSO; 556 557 u64_stats_update_begin(&r_vec->tx_sync); 558 r_vec->tx_lso++; 559 u64_stats_update_end(&r_vec->tx_sync); 560 } 561 562 /** 563 * nfp_net_tx_csum() - Set TX CSUM offload flags in TX descriptor 564 * @nn: NFP Net device 565 * @r_vec: per-ring structure 566 * @txbuf: Pointer to driver soft TX descriptor 567 * @txd: Pointer to TX descriptor 568 * @skb: Pointer to SKB 569 * 570 * This function sets the TX checksum flags in the TX descriptor based 571 * on the configuration and the protocol of the packet to be transmitted. 572 */ 573 static void nfp_net_tx_csum(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, 574 struct nfp_net_tx_buf *txbuf, 575 struct nfp_net_tx_desc *txd, struct sk_buff *skb) 576 { 577 struct ipv6hdr *ipv6h; 578 struct iphdr *iph; 579 u8 l4_hdr; 580 581 if (!(nn->ctrl & NFP_NET_CFG_CTRL_TXCSUM)) 582 return; 583 584 if (skb->ip_summed != CHECKSUM_PARTIAL) 585 return; 586 587 txd->flags |= PCIE_DESC_TX_CSUM; 588 if (skb->encapsulation) 589 txd->flags |= PCIE_DESC_TX_ENCAP; 590 591 iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb); 592 ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb); 593 594 if (iph->version == 4) { 595 txd->flags |= PCIE_DESC_TX_IP4_CSUM; 596 l4_hdr = iph->protocol; 597 } else if (ipv6h->version == 6) { 598 l4_hdr = ipv6h->nexthdr; 599 } else { 600 nn_warn_ratelimit(nn, "partial checksum but ipv=%x!\n", 601 iph->version); 602 return; 603 } 604 605 switch (l4_hdr) { 606 case IPPROTO_TCP: 607 txd->flags |= PCIE_DESC_TX_TCP_CSUM; 608 break; 609 case IPPROTO_UDP: 610 txd->flags |= PCIE_DESC_TX_UDP_CSUM; 611 break; 612 default: 613 nn_warn_ratelimit(nn, "partial checksum but l4 proto=%x!\n", 614 l4_hdr); 615 return; 616 } 617 618 u64_stats_update_begin(&r_vec->tx_sync); 619 if (skb->encapsulation) 620 r_vec->hw_csum_tx_inner += txbuf->pkt_cnt; 621 else 622 r_vec->hw_csum_tx += txbuf->pkt_cnt; 623 u64_stats_update_end(&r_vec->tx_sync); 624 } 625 626 /** 627 * nfp_net_tx() - Main transmit entry point 628 * @skb: SKB to transmit 629 * @netdev: netdev structure 630 * 631 * Return: NETDEV_TX_OK on success. 632 */ 633 static int nfp_net_tx(struct sk_buff *skb, struct net_device *netdev) 634 { 635 struct nfp_net *nn = netdev_priv(netdev); 636 const struct skb_frag_struct *frag; 637 struct nfp_net_r_vector *r_vec; 638 struct nfp_net_tx_desc *txd, txdg; 639 struct nfp_net_tx_buf *txbuf; 640 struct nfp_net_tx_ring *tx_ring; 641 struct netdev_queue *nd_q; 642 dma_addr_t dma_addr; 643 unsigned int fsize; 644 int f, nr_frags; 645 int wr_idx; 646 u16 qidx; 647 648 qidx = skb_get_queue_mapping(skb); 649 tx_ring = &nn->tx_rings[qidx]; 650 r_vec = tx_ring->r_vec; 651 nd_q = netdev_get_tx_queue(nn->netdev, qidx); 652 653 nr_frags = skb_shinfo(skb)->nr_frags; 654 655 if (unlikely(nfp_net_tx_full(tx_ring, nr_frags + 1))) { 656 nn_warn_ratelimit(nn, "TX ring %d busy. wrp=%u rdp=%u\n", 657 qidx, tx_ring->wr_p, tx_ring->rd_p); 658 netif_tx_stop_queue(nd_q); 659 u64_stats_update_begin(&r_vec->tx_sync); 660 r_vec->tx_busy++; 661 u64_stats_update_end(&r_vec->tx_sync); 662 return NETDEV_TX_BUSY; 663 } 664 665 /* Start with the head skbuf */ 666 dma_addr = dma_map_single(&nn->pdev->dev, skb->data, skb_headlen(skb), 667 DMA_TO_DEVICE); 668 if (dma_mapping_error(&nn->pdev->dev, dma_addr)) 669 goto err_free; 670 671 wr_idx = tx_ring->wr_p % tx_ring->cnt; 672 673 /* Stash the soft descriptor of the head then initialize it */ 674 txbuf = &tx_ring->txbufs[wr_idx]; 675 txbuf->skb = skb; 676 txbuf->dma_addr = dma_addr; 677 txbuf->fidx = -1; 678 txbuf->pkt_cnt = 1; 679 txbuf->real_len = skb->len; 680 681 /* Build TX descriptor */ 682 txd = &tx_ring->txds[wr_idx]; 683 txd->offset_eop = (nr_frags == 0) ? PCIE_DESC_TX_EOP : 0; 684 txd->dma_len = cpu_to_le16(skb_headlen(skb)); 685 nfp_desc_set_dma_addr(txd, dma_addr); 686 txd->data_len = cpu_to_le16(skb->len); 687 688 txd->flags = 0; 689 txd->mss = 0; 690 txd->l4_offset = 0; 691 692 nfp_net_tx_tso(nn, r_vec, txbuf, txd, skb); 693 694 nfp_net_tx_csum(nn, r_vec, txbuf, txd, skb); 695 696 if (skb_vlan_tag_present(skb) && nn->ctrl & NFP_NET_CFG_CTRL_TXVLAN) { 697 txd->flags |= PCIE_DESC_TX_VLAN; 698 txd->vlan = cpu_to_le16(skb_vlan_tag_get(skb)); 699 } 700 701 /* Gather DMA */ 702 if (nr_frags > 0) { 703 /* all descs must match except for in addr, length and eop */ 704 txdg = *txd; 705 706 for (f = 0; f < nr_frags; f++) { 707 frag = &skb_shinfo(skb)->frags[f]; 708 fsize = skb_frag_size(frag); 709 710 dma_addr = skb_frag_dma_map(&nn->pdev->dev, frag, 0, 711 fsize, DMA_TO_DEVICE); 712 if (dma_mapping_error(&nn->pdev->dev, dma_addr)) 713 goto err_unmap; 714 715 wr_idx = (wr_idx + 1) % tx_ring->cnt; 716 tx_ring->txbufs[wr_idx].skb = skb; 717 tx_ring->txbufs[wr_idx].dma_addr = dma_addr; 718 tx_ring->txbufs[wr_idx].fidx = f; 719 720 txd = &tx_ring->txds[wr_idx]; 721 *txd = txdg; 722 txd->dma_len = cpu_to_le16(fsize); 723 nfp_desc_set_dma_addr(txd, dma_addr); 724 txd->offset_eop = 725 (f == nr_frags - 1) ? PCIE_DESC_TX_EOP : 0; 726 } 727 728 u64_stats_update_begin(&r_vec->tx_sync); 729 r_vec->tx_gather++; 730 u64_stats_update_end(&r_vec->tx_sync); 731 } 732 733 netdev_tx_sent_queue(nd_q, txbuf->real_len); 734 735 tx_ring->wr_p += nr_frags + 1; 736 if (nfp_net_tx_ring_should_stop(tx_ring)) 737 nfp_net_tx_ring_stop(nd_q, tx_ring); 738 739 tx_ring->wr_ptr_add += nr_frags + 1; 740 if (!skb->xmit_more || netif_xmit_stopped(nd_q)) { 741 /* force memory write before we let HW know */ 742 wmb(); 743 nfp_qcp_wr_ptr_add(tx_ring->qcp_q, tx_ring->wr_ptr_add); 744 tx_ring->wr_ptr_add = 0; 745 } 746 747 skb_tx_timestamp(skb); 748 749 return NETDEV_TX_OK; 750 751 err_unmap: 752 --f; 753 while (f >= 0) { 754 frag = &skb_shinfo(skb)->frags[f]; 755 dma_unmap_page(&nn->pdev->dev, 756 tx_ring->txbufs[wr_idx].dma_addr, 757 skb_frag_size(frag), DMA_TO_DEVICE); 758 tx_ring->txbufs[wr_idx].skb = NULL; 759 tx_ring->txbufs[wr_idx].dma_addr = 0; 760 tx_ring->txbufs[wr_idx].fidx = -2; 761 wr_idx = wr_idx - 1; 762 if (wr_idx < 0) 763 wr_idx += tx_ring->cnt; 764 } 765 dma_unmap_single(&nn->pdev->dev, tx_ring->txbufs[wr_idx].dma_addr, 766 skb_headlen(skb), DMA_TO_DEVICE); 767 tx_ring->txbufs[wr_idx].skb = NULL; 768 tx_ring->txbufs[wr_idx].dma_addr = 0; 769 tx_ring->txbufs[wr_idx].fidx = -2; 770 err_free: 771 nn_warn_ratelimit(nn, "Failed to map DMA TX buffer\n"); 772 u64_stats_update_begin(&r_vec->tx_sync); 773 r_vec->tx_errors++; 774 u64_stats_update_end(&r_vec->tx_sync); 775 dev_kfree_skb_any(skb); 776 return NETDEV_TX_OK; 777 } 778 779 /** 780 * nfp_net_tx_complete() - Handled completed TX packets 781 * @tx_ring: TX ring structure 782 * 783 * Return: Number of completed TX descriptors 784 */ 785 static void nfp_net_tx_complete(struct nfp_net_tx_ring *tx_ring) 786 { 787 struct nfp_net_r_vector *r_vec = tx_ring->r_vec; 788 struct nfp_net *nn = r_vec->nfp_net; 789 const struct skb_frag_struct *frag; 790 struct netdev_queue *nd_q; 791 u32 done_pkts = 0, done_bytes = 0; 792 struct sk_buff *skb; 793 int todo, nr_frags; 794 u32 qcp_rd_p; 795 int fidx; 796 int idx; 797 798 /* Work out how many descriptors have been transmitted */ 799 qcp_rd_p = nfp_qcp_rd_ptr_read(tx_ring->qcp_q); 800 801 if (qcp_rd_p == tx_ring->qcp_rd_p) 802 return; 803 804 if (qcp_rd_p > tx_ring->qcp_rd_p) 805 todo = qcp_rd_p - tx_ring->qcp_rd_p; 806 else 807 todo = qcp_rd_p + tx_ring->cnt - tx_ring->qcp_rd_p; 808 809 while (todo--) { 810 idx = tx_ring->rd_p % tx_ring->cnt; 811 tx_ring->rd_p++; 812 813 skb = tx_ring->txbufs[idx].skb; 814 if (!skb) 815 continue; 816 817 nr_frags = skb_shinfo(skb)->nr_frags; 818 fidx = tx_ring->txbufs[idx].fidx; 819 820 if (fidx == -1) { 821 /* unmap head */ 822 dma_unmap_single(&nn->pdev->dev, 823 tx_ring->txbufs[idx].dma_addr, 824 skb_headlen(skb), DMA_TO_DEVICE); 825 826 done_pkts += tx_ring->txbufs[idx].pkt_cnt; 827 done_bytes += tx_ring->txbufs[idx].real_len; 828 } else { 829 /* unmap fragment */ 830 frag = &skb_shinfo(skb)->frags[fidx]; 831 dma_unmap_page(&nn->pdev->dev, 832 tx_ring->txbufs[idx].dma_addr, 833 skb_frag_size(frag), DMA_TO_DEVICE); 834 } 835 836 /* check for last gather fragment */ 837 if (fidx == nr_frags - 1) 838 dev_kfree_skb_any(skb); 839 840 tx_ring->txbufs[idx].dma_addr = 0; 841 tx_ring->txbufs[idx].skb = NULL; 842 tx_ring->txbufs[idx].fidx = -2; 843 } 844 845 tx_ring->qcp_rd_p = qcp_rd_p; 846 847 u64_stats_update_begin(&r_vec->tx_sync); 848 r_vec->tx_bytes += done_bytes; 849 r_vec->tx_pkts += done_pkts; 850 u64_stats_update_end(&r_vec->tx_sync); 851 852 nd_q = netdev_get_tx_queue(nn->netdev, tx_ring->idx); 853 netdev_tx_completed_queue(nd_q, done_pkts, done_bytes); 854 if (nfp_net_tx_ring_should_wake(tx_ring)) { 855 /* Make sure TX thread will see updated tx_ring->rd_p */ 856 smp_mb(); 857 858 if (unlikely(netif_tx_queue_stopped(nd_q))) 859 netif_tx_wake_queue(nd_q); 860 } 861 862 WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt, 863 "TX ring corruption rd_p=%u wr_p=%u cnt=%u\n", 864 tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt); 865 } 866 867 /** 868 * nfp_net_tx_flush() - Free any untransmitted buffers currently on the TX ring 869 * @tx_ring: TX ring structure 870 * 871 * Assumes that the device is stopped 872 */ 873 static void nfp_net_tx_flush(struct nfp_net_tx_ring *tx_ring) 874 { 875 struct nfp_net_r_vector *r_vec = tx_ring->r_vec; 876 struct nfp_net *nn = r_vec->nfp_net; 877 struct pci_dev *pdev = nn->pdev; 878 const struct skb_frag_struct *frag; 879 struct netdev_queue *nd_q; 880 struct sk_buff *skb; 881 int nr_frags; 882 int fidx; 883 int idx; 884 885 while (tx_ring->rd_p != tx_ring->wr_p) { 886 idx = tx_ring->rd_p % tx_ring->cnt; 887 888 skb = tx_ring->txbufs[idx].skb; 889 if (skb) { 890 nr_frags = skb_shinfo(skb)->nr_frags; 891 fidx = tx_ring->txbufs[idx].fidx; 892 893 if (fidx == -1) { 894 /* unmap head */ 895 dma_unmap_single(&pdev->dev, 896 tx_ring->txbufs[idx].dma_addr, 897 skb_headlen(skb), 898 DMA_TO_DEVICE); 899 } else { 900 /* unmap fragment */ 901 frag = &skb_shinfo(skb)->frags[fidx]; 902 dma_unmap_page(&pdev->dev, 903 tx_ring->txbufs[idx].dma_addr, 904 skb_frag_size(frag), 905 DMA_TO_DEVICE); 906 } 907 908 /* check for last gather fragment */ 909 if (fidx == nr_frags - 1) 910 dev_kfree_skb_any(skb); 911 912 tx_ring->txbufs[idx].dma_addr = 0; 913 tx_ring->txbufs[idx].skb = NULL; 914 tx_ring->txbufs[idx].fidx = -2; 915 } 916 917 memset(&tx_ring->txds[idx], 0, sizeof(tx_ring->txds[idx])); 918 919 tx_ring->qcp_rd_p++; 920 tx_ring->rd_p++; 921 } 922 923 nd_q = netdev_get_tx_queue(nn->netdev, tx_ring->idx); 924 netdev_tx_reset_queue(nd_q); 925 } 926 927 static void nfp_net_tx_timeout(struct net_device *netdev) 928 { 929 struct nfp_net *nn = netdev_priv(netdev); 930 int i; 931 932 for (i = 0; i < nn->num_tx_rings; i++) { 933 if (!netif_tx_queue_stopped(netdev_get_tx_queue(netdev, i))) 934 continue; 935 nn_warn(nn, "TX timeout on ring: %d\n", i); 936 } 937 nn_warn(nn, "TX watchdog timeout\n"); 938 } 939 940 /* Receive processing 941 */ 942 943 /** 944 * nfp_net_rx_space() - return the number of free slots on the RX ring 945 * @rx_ring: RX ring structure 946 * 947 * Make sure we leave at least one slot free. 948 * 949 * Return: True if there is space on the RX ring 950 */ 951 static inline int nfp_net_rx_space(struct nfp_net_rx_ring *rx_ring) 952 { 953 return (rx_ring->cnt - 1) - (rx_ring->wr_p - rx_ring->rd_p); 954 } 955 956 /** 957 * nfp_net_rx_alloc_one() - Allocate and map skb for RX 958 * @rx_ring: RX ring structure of the skb 959 * @dma_addr: Pointer to storage for DMA address (output param) 960 * 961 * This function will allcate a new skb, map it for DMA. 962 * 963 * Return: allocated skb or NULL on failure. 964 */ 965 static struct sk_buff * 966 nfp_net_rx_alloc_one(struct nfp_net_rx_ring *rx_ring, dma_addr_t *dma_addr) 967 { 968 struct nfp_net *nn = rx_ring->r_vec->nfp_net; 969 struct sk_buff *skb; 970 971 skb = netdev_alloc_skb(nn->netdev, nn->fl_bufsz); 972 if (!skb) { 973 nn_warn_ratelimit(nn, "Failed to alloc receive SKB\n"); 974 return NULL; 975 } 976 977 *dma_addr = dma_map_single(&nn->pdev->dev, skb->data, 978 nn->fl_bufsz, DMA_FROM_DEVICE); 979 if (dma_mapping_error(&nn->pdev->dev, *dma_addr)) { 980 dev_kfree_skb_any(skb); 981 nn_warn_ratelimit(nn, "Failed to map DMA RX buffer\n"); 982 return NULL; 983 } 984 985 return skb; 986 } 987 988 /** 989 * nfp_net_rx_give_one() - Put mapped skb on the software and hardware rings 990 * @rx_ring: RX ring structure 991 * @skb: Skb to put on rings 992 * @dma_addr: DMA address of skb mapping 993 */ 994 static void nfp_net_rx_give_one(struct nfp_net_rx_ring *rx_ring, 995 struct sk_buff *skb, dma_addr_t dma_addr) 996 { 997 unsigned int wr_idx; 998 999 wr_idx = rx_ring->wr_p % rx_ring->cnt; 1000 1001 /* Stash SKB and DMA address away */ 1002 rx_ring->rxbufs[wr_idx].skb = skb; 1003 rx_ring->rxbufs[wr_idx].dma_addr = dma_addr; 1004 1005 /* Fill freelist descriptor */ 1006 rx_ring->rxds[wr_idx].fld.reserved = 0; 1007 rx_ring->rxds[wr_idx].fld.meta_len_dd = 0; 1008 nfp_desc_set_dma_addr(&rx_ring->rxds[wr_idx].fld, dma_addr); 1009 1010 rx_ring->wr_p++; 1011 rx_ring->wr_ptr_add++; 1012 if (rx_ring->wr_ptr_add >= NFP_NET_FL_BATCH) { 1013 /* Update write pointer of the freelist queue. Make 1014 * sure all writes are flushed before telling the hardware. 1015 */ 1016 wmb(); 1017 nfp_qcp_wr_ptr_add(rx_ring->qcp_fl, rx_ring->wr_ptr_add); 1018 rx_ring->wr_ptr_add = 0; 1019 } 1020 } 1021 1022 /** 1023 * nfp_net_rx_flush() - Free any buffers currently on the RX ring 1024 * @rx_ring: RX ring to remove buffers from 1025 * 1026 * Assumes that the device is stopped 1027 */ 1028 static void nfp_net_rx_flush(struct nfp_net_rx_ring *rx_ring) 1029 { 1030 struct nfp_net *nn = rx_ring->r_vec->nfp_net; 1031 struct pci_dev *pdev = nn->pdev; 1032 int idx; 1033 1034 while (rx_ring->rd_p != rx_ring->wr_p) { 1035 idx = rx_ring->rd_p % rx_ring->cnt; 1036 1037 if (rx_ring->rxbufs[idx].skb) { 1038 dma_unmap_single(&pdev->dev, 1039 rx_ring->rxbufs[idx].dma_addr, 1040 nn->fl_bufsz, DMA_FROM_DEVICE); 1041 dev_kfree_skb_any(rx_ring->rxbufs[idx].skb); 1042 rx_ring->rxbufs[idx].dma_addr = 0; 1043 rx_ring->rxbufs[idx].skb = NULL; 1044 } 1045 1046 memset(&rx_ring->rxds[idx], 0, sizeof(rx_ring->rxds[idx])); 1047 1048 rx_ring->rd_p++; 1049 } 1050 } 1051 1052 /** 1053 * nfp_net_rx_fill_freelist() - Attempt filling freelist with RX buffers 1054 * @rx_ring: RX ring to fill 1055 * 1056 * Try to fill as many buffers as possible into freelist. Return 1057 * number of buffers added. 1058 * 1059 * Return: Number of freelist buffers added. 1060 */ 1061 static int nfp_net_rx_fill_freelist(struct nfp_net_rx_ring *rx_ring) 1062 { 1063 struct sk_buff *skb; 1064 dma_addr_t dma_addr; 1065 1066 while (nfp_net_rx_space(rx_ring)) { 1067 skb = nfp_net_rx_alloc_one(rx_ring, &dma_addr); 1068 if (!skb) { 1069 nfp_net_rx_flush(rx_ring); 1070 return -ENOMEM; 1071 } 1072 nfp_net_rx_give_one(rx_ring, skb, dma_addr); 1073 } 1074 1075 return 0; 1076 } 1077 1078 /** 1079 * nfp_net_rx_csum_has_errors() - group check if rxd has any csum errors 1080 * @flags: RX descriptor flags field in CPU byte order 1081 */ 1082 static int nfp_net_rx_csum_has_errors(u16 flags) 1083 { 1084 u16 csum_all_checked, csum_all_ok; 1085 1086 csum_all_checked = flags & __PCIE_DESC_RX_CSUM_ALL; 1087 csum_all_ok = flags & __PCIE_DESC_RX_CSUM_ALL_OK; 1088 1089 return csum_all_checked != (csum_all_ok << PCIE_DESC_RX_CSUM_OK_SHIFT); 1090 } 1091 1092 /** 1093 * nfp_net_rx_csum() - set SKB checksum field based on RX descriptor flags 1094 * @nn: NFP Net device 1095 * @r_vec: per-ring structure 1096 * @rxd: Pointer to RX descriptor 1097 * @skb: Pointer to SKB 1098 */ 1099 static void nfp_net_rx_csum(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, 1100 struct nfp_net_rx_desc *rxd, struct sk_buff *skb) 1101 { 1102 skb_checksum_none_assert(skb); 1103 1104 if (!(nn->netdev->features & NETIF_F_RXCSUM)) 1105 return; 1106 1107 if (nfp_net_rx_csum_has_errors(le16_to_cpu(rxd->rxd.flags))) { 1108 u64_stats_update_begin(&r_vec->rx_sync); 1109 r_vec->hw_csum_rx_error++; 1110 u64_stats_update_end(&r_vec->rx_sync); 1111 return; 1112 } 1113 1114 /* Assume that the firmware will never report inner CSUM_OK unless outer 1115 * L4 headers were successfully parsed. FW will always report zero UDP 1116 * checksum as CSUM_OK. 1117 */ 1118 if (rxd->rxd.flags & PCIE_DESC_RX_TCP_CSUM_OK || 1119 rxd->rxd.flags & PCIE_DESC_RX_UDP_CSUM_OK) { 1120 __skb_incr_checksum_unnecessary(skb); 1121 u64_stats_update_begin(&r_vec->rx_sync); 1122 r_vec->hw_csum_rx_ok++; 1123 u64_stats_update_end(&r_vec->rx_sync); 1124 } 1125 1126 if (rxd->rxd.flags & PCIE_DESC_RX_I_TCP_CSUM_OK || 1127 rxd->rxd.flags & PCIE_DESC_RX_I_UDP_CSUM_OK) { 1128 __skb_incr_checksum_unnecessary(skb); 1129 u64_stats_update_begin(&r_vec->rx_sync); 1130 r_vec->hw_csum_rx_inner_ok++; 1131 u64_stats_update_end(&r_vec->rx_sync); 1132 } 1133 } 1134 1135 /** 1136 * nfp_net_set_hash() - Set SKB hash data 1137 * @netdev: adapter's net_device structure 1138 * @skb: SKB to set the hash data on 1139 * @rxd: RX descriptor 1140 * 1141 * The RSS hash and hash-type are pre-pended to the packet data. 1142 * Extract and decode it and set the skb fields. 1143 */ 1144 static void nfp_net_set_hash(struct net_device *netdev, struct sk_buff *skb, 1145 struct nfp_net_rx_desc *rxd) 1146 { 1147 struct nfp_net_rx_hash *rx_hash; 1148 1149 if (!(rxd->rxd.flags & PCIE_DESC_RX_RSS) || 1150 !(netdev->features & NETIF_F_RXHASH)) 1151 return; 1152 1153 rx_hash = (struct nfp_net_rx_hash *)(skb->data - sizeof(*rx_hash)); 1154 1155 switch (be32_to_cpu(rx_hash->hash_type)) { 1156 case NFP_NET_RSS_IPV4: 1157 case NFP_NET_RSS_IPV6: 1158 case NFP_NET_RSS_IPV6_EX: 1159 skb_set_hash(skb, be32_to_cpu(rx_hash->hash), PKT_HASH_TYPE_L3); 1160 break; 1161 default: 1162 skb_set_hash(skb, be32_to_cpu(rx_hash->hash), PKT_HASH_TYPE_L4); 1163 break; 1164 } 1165 } 1166 1167 /** 1168 * nfp_net_rx() - receive up to @budget packets on @rx_ring 1169 * @rx_ring: RX ring to receive from 1170 * @budget: NAPI budget 1171 * 1172 * Note, this function is separated out from the napi poll function to 1173 * more cleanly separate packet receive code from other bookkeeping 1174 * functions performed in the napi poll function. 1175 * 1176 * There are differences between the NFP-3200 firmware and the 1177 * NFP-6000 firmware. The NFP-3200 firmware uses a dedicated RX queue 1178 * to indicate that new packets have arrived. The NFP-6000 does not 1179 * have this queue and uses the DD bit in the RX descriptor. This 1180 * method cannot be used on the NFP-3200 as it causes a race 1181 * condition: The RX ring write pointer on the NFP-3200 is updated 1182 * after packets (and descriptors) have been DMAed. If the DD bit is 1183 * used and subsequently the read pointer is updated this may lead to 1184 * the RX queue to underflow (if the firmware has not yet update the 1185 * write pointer). Therefore we use slightly ugly conditional code 1186 * below to handle the differences. We may, in the future update the 1187 * NFP-3200 firmware to behave the same as the firmware on the 1188 * NFP-6000. 1189 * 1190 * Return: Number of packets received. 1191 */ 1192 static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) 1193 { 1194 struct nfp_net_r_vector *r_vec = rx_ring->r_vec; 1195 struct nfp_net *nn = r_vec->nfp_net; 1196 unsigned int data_len, meta_len; 1197 int avail = 0, pkts_polled = 0; 1198 struct sk_buff *skb, *new_skb; 1199 struct nfp_net_rx_desc *rxd; 1200 dma_addr_t new_dma_addr; 1201 u32 qcp_wr_p; 1202 int idx; 1203 1204 if (nn->is_nfp3200) { 1205 /* Work out how many packets arrived */ 1206 qcp_wr_p = nfp_qcp_wr_ptr_read(rx_ring->qcp_rx); 1207 idx = rx_ring->rd_p % rx_ring->cnt; 1208 1209 if (qcp_wr_p == idx) 1210 /* No new packets */ 1211 return 0; 1212 1213 if (qcp_wr_p > idx) 1214 avail = qcp_wr_p - idx; 1215 else 1216 avail = qcp_wr_p + rx_ring->cnt - idx; 1217 } else { 1218 avail = budget + 1; 1219 } 1220 1221 while (avail > 0 && pkts_polled < budget) { 1222 idx = rx_ring->rd_p % rx_ring->cnt; 1223 1224 rxd = &rx_ring->rxds[idx]; 1225 if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD)) { 1226 if (nn->is_nfp3200) 1227 nn_dbg(nn, "RX descriptor not valid (DD)%d:%u rxd[0]=%#x rxd[1]=%#x\n", 1228 rx_ring->idx, idx, 1229 rxd->vals[0], rxd->vals[1]); 1230 break; 1231 } 1232 /* Memory barrier to ensure that we won't do other reads 1233 * before the DD bit. 1234 */ 1235 dma_rmb(); 1236 1237 rx_ring->rd_p++; 1238 pkts_polled++; 1239 avail--; 1240 1241 skb = rx_ring->rxbufs[idx].skb; 1242 1243 new_skb = nfp_net_rx_alloc_one(rx_ring, &new_dma_addr); 1244 if (!new_skb) { 1245 nfp_net_rx_give_one(rx_ring, rx_ring->rxbufs[idx].skb, 1246 rx_ring->rxbufs[idx].dma_addr); 1247 u64_stats_update_begin(&r_vec->rx_sync); 1248 r_vec->rx_drops++; 1249 u64_stats_update_end(&r_vec->rx_sync); 1250 continue; 1251 } 1252 1253 dma_unmap_single(&nn->pdev->dev, 1254 rx_ring->rxbufs[idx].dma_addr, 1255 nn->fl_bufsz, DMA_FROM_DEVICE); 1256 1257 nfp_net_rx_give_one(rx_ring, new_skb, new_dma_addr); 1258 1259 meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK; 1260 data_len = le16_to_cpu(rxd->rxd.data_len); 1261 1262 if (WARN_ON_ONCE(data_len > nn->fl_bufsz)) { 1263 dev_kfree_skb_any(skb); 1264 continue; 1265 } 1266 1267 if (nn->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC) { 1268 /* The packet data starts after the metadata */ 1269 skb_reserve(skb, meta_len); 1270 } else { 1271 /* The packet data starts at a fixed offset */ 1272 skb_reserve(skb, nn->rx_offset); 1273 } 1274 1275 /* Adjust the SKB for the dynamic meta data pre-pended */ 1276 skb_put(skb, data_len - meta_len); 1277 1278 nfp_net_set_hash(nn->netdev, skb, rxd); 1279 1280 /* Pad small frames to minimum */ 1281 if (skb_put_padto(skb, 60)) 1282 break; 1283 1284 /* Stats update */ 1285 u64_stats_update_begin(&r_vec->rx_sync); 1286 r_vec->rx_pkts++; 1287 r_vec->rx_bytes += skb->len; 1288 u64_stats_update_end(&r_vec->rx_sync); 1289 1290 skb_record_rx_queue(skb, rx_ring->idx); 1291 skb->protocol = eth_type_trans(skb, nn->netdev); 1292 1293 nfp_net_rx_csum(nn, r_vec, rxd, skb); 1294 1295 if (rxd->rxd.flags & PCIE_DESC_RX_VLAN) 1296 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), 1297 le16_to_cpu(rxd->rxd.vlan)); 1298 1299 napi_gro_receive(&rx_ring->r_vec->napi, skb); 1300 } 1301 1302 if (nn->is_nfp3200) 1303 nfp_qcp_rd_ptr_add(rx_ring->qcp_rx, pkts_polled); 1304 1305 return pkts_polled; 1306 } 1307 1308 /** 1309 * nfp_net_poll() - napi poll function 1310 * @napi: NAPI structure 1311 * @budget: NAPI budget 1312 * 1313 * Return: number of packets polled. 1314 */ 1315 static int nfp_net_poll(struct napi_struct *napi, int budget) 1316 { 1317 struct nfp_net_r_vector *r_vec = 1318 container_of(napi, struct nfp_net_r_vector, napi); 1319 struct nfp_net_rx_ring *rx_ring = r_vec->rx_ring; 1320 struct nfp_net_tx_ring *tx_ring = r_vec->tx_ring; 1321 struct nfp_net *nn = r_vec->nfp_net; 1322 struct netdev_queue *txq; 1323 unsigned int pkts_polled; 1324 1325 tx_ring = &nn->tx_rings[rx_ring->idx]; 1326 txq = netdev_get_tx_queue(nn->netdev, tx_ring->idx); 1327 nfp_net_tx_complete(tx_ring); 1328 1329 pkts_polled = nfp_net_rx(rx_ring, budget); 1330 1331 if (pkts_polled < budget) { 1332 napi_complete_done(napi, pkts_polled); 1333 nfp_net_irq_unmask(nn, r_vec->irq_idx); 1334 } 1335 1336 return pkts_polled; 1337 } 1338 1339 /* Setup and Configuration 1340 */ 1341 1342 /** 1343 * nfp_net_tx_ring_free() - Free resources allocated to a TX ring 1344 * @tx_ring: TX ring to free 1345 */ 1346 static void nfp_net_tx_ring_free(struct nfp_net_tx_ring *tx_ring) 1347 { 1348 struct nfp_net_r_vector *r_vec = tx_ring->r_vec; 1349 struct nfp_net *nn = r_vec->nfp_net; 1350 struct pci_dev *pdev = nn->pdev; 1351 1352 nn_writeq(nn, NFP_NET_CFG_TXR_ADDR(tx_ring->idx), 0); 1353 nn_writeb(nn, NFP_NET_CFG_TXR_SZ(tx_ring->idx), 0); 1354 nn_writeb(nn, NFP_NET_CFG_TXR_VEC(tx_ring->idx), 0); 1355 1356 kfree(tx_ring->txbufs); 1357 1358 if (tx_ring->txds) 1359 dma_free_coherent(&pdev->dev, tx_ring->size, 1360 tx_ring->txds, tx_ring->dma); 1361 1362 tx_ring->cnt = 0; 1363 tx_ring->wr_p = 0; 1364 tx_ring->rd_p = 0; 1365 tx_ring->qcp_rd_p = 0; 1366 tx_ring->wr_ptr_add = 0; 1367 1368 tx_ring->txbufs = NULL; 1369 tx_ring->txds = NULL; 1370 tx_ring->dma = 0; 1371 tx_ring->size = 0; 1372 } 1373 1374 /** 1375 * nfp_net_tx_ring_alloc() - Allocate resource for a TX ring 1376 * @tx_ring: TX Ring structure to allocate 1377 * 1378 * Return: 0 on success, negative errno otherwise. 1379 */ 1380 static int nfp_net_tx_ring_alloc(struct nfp_net_tx_ring *tx_ring) 1381 { 1382 struct nfp_net_r_vector *r_vec = tx_ring->r_vec; 1383 struct nfp_net *nn = r_vec->nfp_net; 1384 struct pci_dev *pdev = nn->pdev; 1385 int sz; 1386 1387 tx_ring->cnt = nn->txd_cnt; 1388 1389 tx_ring->size = sizeof(*tx_ring->txds) * tx_ring->cnt; 1390 tx_ring->txds = dma_zalloc_coherent(&pdev->dev, tx_ring->size, 1391 &tx_ring->dma, GFP_KERNEL); 1392 if (!tx_ring->txds) 1393 goto err_alloc; 1394 1395 sz = sizeof(*tx_ring->txbufs) * tx_ring->cnt; 1396 tx_ring->txbufs = kzalloc(sz, GFP_KERNEL); 1397 if (!tx_ring->txbufs) 1398 goto err_alloc; 1399 1400 /* Write the DMA address, size and MSI-X info to the device */ 1401 nn_writeq(nn, NFP_NET_CFG_TXR_ADDR(tx_ring->idx), tx_ring->dma); 1402 nn_writeb(nn, NFP_NET_CFG_TXR_SZ(tx_ring->idx), ilog2(tx_ring->cnt)); 1403 nn_writeb(nn, NFP_NET_CFG_TXR_VEC(tx_ring->idx), r_vec->irq_idx); 1404 1405 netif_set_xps_queue(nn->netdev, &r_vec->affinity_mask, tx_ring->idx); 1406 1407 nn_dbg(nn, "TxQ%02d: QCidx=%02d cnt=%d dma=%#llx host=%p\n", 1408 tx_ring->idx, tx_ring->qcidx, 1409 tx_ring->cnt, (unsigned long long)tx_ring->dma, tx_ring->txds); 1410 1411 return 0; 1412 1413 err_alloc: 1414 nfp_net_tx_ring_free(tx_ring); 1415 return -ENOMEM; 1416 } 1417 1418 /** 1419 * nfp_net_rx_ring_free() - Free resources allocated to a RX ring 1420 * @rx_ring: RX ring to free 1421 */ 1422 static void nfp_net_rx_ring_free(struct nfp_net_rx_ring *rx_ring) 1423 { 1424 struct nfp_net_r_vector *r_vec = rx_ring->r_vec; 1425 struct nfp_net *nn = r_vec->nfp_net; 1426 struct pci_dev *pdev = nn->pdev; 1427 1428 nn_writeq(nn, NFP_NET_CFG_RXR_ADDR(rx_ring->idx), 0); 1429 nn_writeb(nn, NFP_NET_CFG_RXR_SZ(rx_ring->idx), 0); 1430 nn_writeb(nn, NFP_NET_CFG_RXR_VEC(rx_ring->idx), 0); 1431 1432 kfree(rx_ring->rxbufs); 1433 1434 if (rx_ring->rxds) 1435 dma_free_coherent(&pdev->dev, rx_ring->size, 1436 rx_ring->rxds, rx_ring->dma); 1437 1438 rx_ring->cnt = 0; 1439 rx_ring->wr_p = 0; 1440 rx_ring->rd_p = 0; 1441 rx_ring->wr_ptr_add = 0; 1442 1443 rx_ring->rxbufs = NULL; 1444 rx_ring->rxds = NULL; 1445 rx_ring->dma = 0; 1446 rx_ring->size = 0; 1447 } 1448 1449 /** 1450 * nfp_net_rx_ring_alloc() - Allocate resource for a RX ring 1451 * @rx_ring: RX ring to allocate 1452 * 1453 * Return: 0 on success, negative errno otherwise. 1454 */ 1455 static int nfp_net_rx_ring_alloc(struct nfp_net_rx_ring *rx_ring) 1456 { 1457 struct nfp_net_r_vector *r_vec = rx_ring->r_vec; 1458 struct nfp_net *nn = r_vec->nfp_net; 1459 struct pci_dev *pdev = nn->pdev; 1460 int sz; 1461 1462 rx_ring->cnt = nn->rxd_cnt; 1463 1464 rx_ring->size = sizeof(*rx_ring->rxds) * rx_ring->cnt; 1465 rx_ring->rxds = dma_zalloc_coherent(&pdev->dev, rx_ring->size, 1466 &rx_ring->dma, GFP_KERNEL); 1467 if (!rx_ring->rxds) 1468 goto err_alloc; 1469 1470 sz = sizeof(*rx_ring->rxbufs) * rx_ring->cnt; 1471 rx_ring->rxbufs = kzalloc(sz, GFP_KERNEL); 1472 if (!rx_ring->rxbufs) 1473 goto err_alloc; 1474 1475 /* Write the DMA address, size and MSI-X info to the device */ 1476 nn_writeq(nn, NFP_NET_CFG_RXR_ADDR(rx_ring->idx), rx_ring->dma); 1477 nn_writeb(nn, NFP_NET_CFG_RXR_SZ(rx_ring->idx), ilog2(rx_ring->cnt)); 1478 nn_writeb(nn, NFP_NET_CFG_RXR_VEC(rx_ring->idx), r_vec->irq_idx); 1479 1480 nn_dbg(nn, "RxQ%02d: FlQCidx=%02d RxQCidx=%02d cnt=%d dma=%#llx host=%p\n", 1481 rx_ring->idx, rx_ring->fl_qcidx, rx_ring->rx_qcidx, 1482 rx_ring->cnt, (unsigned long long)rx_ring->dma, rx_ring->rxds); 1483 1484 return 0; 1485 1486 err_alloc: 1487 nfp_net_rx_ring_free(rx_ring); 1488 return -ENOMEM; 1489 } 1490 1491 static void __nfp_net_free_rings(struct nfp_net *nn, unsigned int n_free) 1492 { 1493 struct nfp_net_r_vector *r_vec; 1494 struct msix_entry *entry; 1495 1496 while (n_free--) { 1497 r_vec = &nn->r_vecs[n_free]; 1498 entry = &nn->irq_entries[r_vec->irq_idx]; 1499 1500 nfp_net_rx_ring_free(r_vec->rx_ring); 1501 nfp_net_tx_ring_free(r_vec->tx_ring); 1502 1503 irq_set_affinity_hint(entry->vector, NULL); 1504 free_irq(entry->vector, r_vec); 1505 1506 netif_napi_del(&r_vec->napi); 1507 } 1508 } 1509 1510 /** 1511 * nfp_net_free_rings() - Free all ring resources 1512 * @nn: NFP Net device to reconfigure 1513 */ 1514 static void nfp_net_free_rings(struct nfp_net *nn) 1515 { 1516 __nfp_net_free_rings(nn, nn->num_r_vecs); 1517 } 1518 1519 /** 1520 * nfp_net_alloc_rings() - Allocate resources for RX and TX rings 1521 * @nn: NFP Net device to reconfigure 1522 * 1523 * Return: 0 on success or negative errno on error. 1524 */ 1525 static int nfp_net_alloc_rings(struct nfp_net *nn) 1526 { 1527 struct nfp_net_r_vector *r_vec; 1528 struct msix_entry *entry; 1529 int err; 1530 int r; 1531 1532 for (r = 0; r < nn->num_r_vecs; r++) { 1533 r_vec = &nn->r_vecs[r]; 1534 entry = &nn->irq_entries[r_vec->irq_idx]; 1535 1536 /* Setup NAPI */ 1537 netif_napi_add(nn->netdev, &r_vec->napi, 1538 nfp_net_poll, NAPI_POLL_WEIGHT); 1539 1540 snprintf(r_vec->name, sizeof(r_vec->name), 1541 "%s-rxtx-%d", nn->netdev->name, r); 1542 err = request_irq(entry->vector, r_vec->handler, 0, 1543 r_vec->name, r_vec); 1544 if (err) { 1545 nn_dbg(nn, "Error requesting IRQ %d\n", entry->vector); 1546 goto err_napi_del; 1547 } 1548 1549 irq_set_affinity_hint(entry->vector, &r_vec->affinity_mask); 1550 1551 nn_dbg(nn, "RV%02d: irq=%03d/%03d\n", 1552 r, entry->vector, entry->entry); 1553 1554 /* Allocate TX ring resources */ 1555 err = nfp_net_tx_ring_alloc(r_vec->tx_ring); 1556 if (err) 1557 goto err_free_irq; 1558 1559 /* Allocate RX ring resources */ 1560 err = nfp_net_rx_ring_alloc(r_vec->rx_ring); 1561 if (err) 1562 goto err_free_tx; 1563 } 1564 1565 return 0; 1566 1567 err_free_tx: 1568 nfp_net_tx_ring_free(r_vec->tx_ring); 1569 err_free_irq: 1570 irq_set_affinity_hint(entry->vector, NULL); 1571 free_irq(entry->vector, r_vec); 1572 err_napi_del: 1573 netif_napi_del(&r_vec->napi); 1574 __nfp_net_free_rings(nn, r); 1575 return err; 1576 } 1577 1578 /** 1579 * nfp_net_rss_write_itbl() - Write RSS indirection table to device 1580 * @nn: NFP Net device to reconfigure 1581 */ 1582 void nfp_net_rss_write_itbl(struct nfp_net *nn) 1583 { 1584 int i; 1585 1586 for (i = 0; i < NFP_NET_CFG_RSS_ITBL_SZ; i += 4) 1587 nn_writel(nn, NFP_NET_CFG_RSS_ITBL + i, 1588 get_unaligned_le32(nn->rss_itbl + i)); 1589 } 1590 1591 /** 1592 * nfp_net_rss_write_key() - Write RSS hash key to device 1593 * @nn: NFP Net device to reconfigure 1594 */ 1595 void nfp_net_rss_write_key(struct nfp_net *nn) 1596 { 1597 int i; 1598 1599 for (i = 0; i < NFP_NET_CFG_RSS_KEY_SZ; i += 4) 1600 nn_writel(nn, NFP_NET_CFG_RSS_KEY + i, 1601 get_unaligned_le32(nn->rss_key + i)); 1602 } 1603 1604 /** 1605 * nfp_net_coalesce_write_cfg() - Write irq coalescence configuration to HW 1606 * @nn: NFP Net device to reconfigure 1607 */ 1608 void nfp_net_coalesce_write_cfg(struct nfp_net *nn) 1609 { 1610 u8 i; 1611 u32 factor; 1612 u32 value; 1613 1614 /* Compute factor used to convert coalesce '_usecs' parameters to 1615 * ME timestamp ticks. There are 16 ME clock cycles for each timestamp 1616 * count. 1617 */ 1618 factor = nn->me_freq_mhz / 16; 1619 1620 /* copy RX interrupt coalesce parameters */ 1621 value = (nn->rx_coalesce_max_frames << 16) | 1622 (factor * nn->rx_coalesce_usecs); 1623 for (i = 0; i < nn->num_r_vecs; i++) 1624 nn_writel(nn, NFP_NET_CFG_RXR_IRQ_MOD(i), value); 1625 1626 /* copy TX interrupt coalesce parameters */ 1627 value = (nn->tx_coalesce_max_frames << 16) | 1628 (factor * nn->tx_coalesce_usecs); 1629 for (i = 0; i < nn->num_r_vecs; i++) 1630 nn_writel(nn, NFP_NET_CFG_TXR_IRQ_MOD(i), value); 1631 } 1632 1633 /** 1634 * nfp_net_write_mac_addr() - Write mac address to device registers 1635 * @nn: NFP Net device to reconfigure 1636 * @mac: Six-byte MAC address to be written 1637 * 1638 * We do a bit of byte swapping dance because firmware is LE. 1639 */ 1640 static void nfp_net_write_mac_addr(struct nfp_net *nn, const u8 *mac) 1641 { 1642 nn_writel(nn, NFP_NET_CFG_MACADDR + 0, 1643 get_unaligned_be32(nn->netdev->dev_addr)); 1644 /* We can't do writew for NFP-3200 compatibility */ 1645 nn_writel(nn, NFP_NET_CFG_MACADDR + 4, 1646 get_unaligned_be16(nn->netdev->dev_addr + 4) << 16); 1647 } 1648 1649 /** 1650 * nfp_net_clear_config_and_disable() - Clear control BAR and disable NFP 1651 * @nn: NFP Net device to reconfigure 1652 */ 1653 static void nfp_net_clear_config_and_disable(struct nfp_net *nn) 1654 { 1655 u32 new_ctrl, update; 1656 int err; 1657 1658 new_ctrl = nn->ctrl; 1659 new_ctrl &= ~NFP_NET_CFG_CTRL_ENABLE; 1660 update = NFP_NET_CFG_UPDATE_GEN; 1661 update |= NFP_NET_CFG_UPDATE_MSIX; 1662 update |= NFP_NET_CFG_UPDATE_RING; 1663 1664 if (nn->cap & NFP_NET_CFG_CTRL_RINGCFG) 1665 new_ctrl &= ~NFP_NET_CFG_CTRL_RINGCFG; 1666 1667 nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE, 0); 1668 nn_writeq(nn, NFP_NET_CFG_RXRS_ENABLE, 0); 1669 1670 nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl); 1671 err = nfp_net_reconfig(nn, update); 1672 if (err) { 1673 nn_err(nn, "Could not disable device: %d\n", err); 1674 return; 1675 } 1676 1677 nn->ctrl = new_ctrl; 1678 } 1679 1680 /** 1681 * nfp_net_start_vec() - Start ring vector 1682 * @nn: NFP Net device structure 1683 * @r_vec: Ring vector to be started 1684 */ 1685 static int nfp_net_start_vec(struct nfp_net *nn, struct nfp_net_r_vector *r_vec) 1686 { 1687 unsigned int irq_vec; 1688 int err = 0; 1689 1690 irq_vec = nn->irq_entries[r_vec->irq_idx].vector; 1691 1692 disable_irq(irq_vec); 1693 1694 err = nfp_net_rx_fill_freelist(r_vec->rx_ring); 1695 if (err) { 1696 nn_err(nn, "RV%02d: couldn't allocate enough buffers\n", 1697 r_vec->irq_idx); 1698 goto out; 1699 } 1700 1701 napi_enable(&r_vec->napi); 1702 out: 1703 enable_irq(irq_vec); 1704 1705 return err; 1706 } 1707 1708 static int nfp_net_netdev_open(struct net_device *netdev) 1709 { 1710 struct nfp_net *nn = netdev_priv(netdev); 1711 int err, r; 1712 u32 update = 0; 1713 u32 new_ctrl; 1714 1715 if (nn->ctrl & NFP_NET_CFG_CTRL_ENABLE) { 1716 nn_err(nn, "Dev is already enabled: 0x%08x\n", nn->ctrl); 1717 return -EBUSY; 1718 } 1719 1720 new_ctrl = nn->ctrl; 1721 1722 /* Step 1: Allocate resources for rings and the like 1723 * - Request interrupts 1724 * - Allocate RX and TX ring resources 1725 * - Setup initial RSS table 1726 */ 1727 err = nfp_net_aux_irq_request(nn, NFP_NET_CFG_EXN, "%s-exn", 1728 nn->exn_name, sizeof(nn->exn_name), 1729 NFP_NET_IRQ_EXN_IDX, nn->exn_handler); 1730 if (err) 1731 return err; 1732 1733 err = nfp_net_alloc_rings(nn); 1734 if (err) 1735 goto err_free_exn; 1736 1737 err = netif_set_real_num_tx_queues(netdev, nn->num_tx_rings); 1738 if (err) 1739 goto err_free_rings; 1740 1741 err = netif_set_real_num_rx_queues(netdev, nn->num_rx_rings); 1742 if (err) 1743 goto err_free_rings; 1744 1745 if (nn->cap & NFP_NET_CFG_CTRL_RSS) { 1746 nfp_net_rss_write_key(nn); 1747 nfp_net_rss_write_itbl(nn); 1748 nn_writel(nn, NFP_NET_CFG_RSS_CTRL, nn->rss_cfg); 1749 update |= NFP_NET_CFG_UPDATE_RSS; 1750 } 1751 1752 if (nn->cap & NFP_NET_CFG_CTRL_IRQMOD) { 1753 nfp_net_coalesce_write_cfg(nn); 1754 1755 new_ctrl |= NFP_NET_CFG_CTRL_IRQMOD; 1756 update |= NFP_NET_CFG_UPDATE_IRQMOD; 1757 } 1758 1759 /* Step 2: Configure the NFP 1760 * - Enable rings from 0 to tx_rings/rx_rings - 1. 1761 * - Write MAC address (in case it changed) 1762 * - Set the MTU 1763 * - Set the Freelist buffer size 1764 * - Enable the FW 1765 */ 1766 nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE, nn->num_tx_rings == 64 ? 1767 0xffffffffffffffffULL : ((u64)1 << nn->num_tx_rings) - 1); 1768 1769 nn_writeq(nn, NFP_NET_CFG_RXRS_ENABLE, nn->num_rx_rings == 64 ? 1770 0xffffffffffffffffULL : ((u64)1 << nn->num_rx_rings) - 1); 1771 1772 nfp_net_write_mac_addr(nn, netdev->dev_addr); 1773 1774 nn_writel(nn, NFP_NET_CFG_MTU, netdev->mtu); 1775 nn_writel(nn, NFP_NET_CFG_FLBUFSZ, nn->fl_bufsz); 1776 1777 /* Enable device */ 1778 new_ctrl |= NFP_NET_CFG_CTRL_ENABLE; 1779 update |= NFP_NET_CFG_UPDATE_GEN; 1780 update |= NFP_NET_CFG_UPDATE_MSIX; 1781 update |= NFP_NET_CFG_UPDATE_RING; 1782 if (nn->cap & NFP_NET_CFG_CTRL_RINGCFG) 1783 new_ctrl |= NFP_NET_CFG_CTRL_RINGCFG; 1784 1785 nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl); 1786 err = nfp_net_reconfig(nn, update); 1787 if (err) 1788 goto err_clear_config; 1789 1790 nn->ctrl = new_ctrl; 1791 1792 /* Since reconfiguration requests while NFP is down are ignored we 1793 * have to wipe the entire VXLAN configuration and reinitialize it. 1794 */ 1795 if (nn->ctrl & NFP_NET_CFG_CTRL_VXLAN) { 1796 memset(&nn->vxlan_ports, 0, sizeof(nn->vxlan_ports)); 1797 memset(&nn->vxlan_usecnt, 0, sizeof(nn->vxlan_usecnt)); 1798 vxlan_get_rx_port(netdev); 1799 } 1800 1801 /* Step 3: Enable for kernel 1802 * - put some freelist descriptors on each RX ring 1803 * - enable NAPI on each ring 1804 * - enable all TX queues 1805 * - set link state 1806 */ 1807 for (r = 0; r < nn->num_r_vecs; r++) { 1808 err = nfp_net_start_vec(nn, &nn->r_vecs[r]); 1809 if (err) 1810 goto err_disable_napi; 1811 } 1812 1813 netif_tx_wake_all_queues(netdev); 1814 1815 err = nfp_net_aux_irq_request(nn, NFP_NET_CFG_LSC, "%s-lsc", 1816 nn->lsc_name, sizeof(nn->lsc_name), 1817 NFP_NET_IRQ_LSC_IDX, nn->lsc_handler); 1818 if (err) 1819 goto err_stop_tx; 1820 nfp_net_read_link_status(nn); 1821 1822 return 0; 1823 1824 err_stop_tx: 1825 netif_tx_disable(netdev); 1826 for (r = 0; r < nn->num_r_vecs; r++) 1827 nfp_net_tx_flush(nn->r_vecs[r].tx_ring); 1828 err_disable_napi: 1829 while (r--) { 1830 napi_disable(&nn->r_vecs[r].napi); 1831 nfp_net_rx_flush(nn->r_vecs[r].rx_ring); 1832 } 1833 err_clear_config: 1834 nfp_net_clear_config_and_disable(nn); 1835 err_free_rings: 1836 nfp_net_free_rings(nn); 1837 err_free_exn: 1838 nfp_net_aux_irq_free(nn, NFP_NET_CFG_EXN, NFP_NET_IRQ_EXN_IDX); 1839 return err; 1840 } 1841 1842 /** 1843 * nfp_net_netdev_close() - Called when the device is downed 1844 * @netdev: netdev structure 1845 */ 1846 static int nfp_net_netdev_close(struct net_device *netdev) 1847 { 1848 struct nfp_net *nn = netdev_priv(netdev); 1849 int r; 1850 1851 if (!(nn->ctrl & NFP_NET_CFG_CTRL_ENABLE)) { 1852 nn_err(nn, "Dev is not up: 0x%08x\n", nn->ctrl); 1853 return 0; 1854 } 1855 1856 /* Step 1: Disable RX and TX rings from the Linux kernel perspective 1857 */ 1858 nfp_net_aux_irq_free(nn, NFP_NET_CFG_LSC, NFP_NET_IRQ_LSC_IDX); 1859 netif_carrier_off(netdev); 1860 nn->link_up = false; 1861 1862 for (r = 0; r < nn->num_r_vecs; r++) 1863 napi_disable(&nn->r_vecs[r].napi); 1864 1865 netif_tx_disable(netdev); 1866 1867 /* Step 2: Tell NFP 1868 */ 1869 nfp_net_clear_config_and_disable(nn); 1870 1871 /* Step 3: Free resources 1872 */ 1873 for (r = 0; r < nn->num_r_vecs; r++) { 1874 nfp_net_rx_flush(nn->r_vecs[r].rx_ring); 1875 nfp_net_tx_flush(nn->r_vecs[r].tx_ring); 1876 } 1877 1878 nfp_net_free_rings(nn); 1879 nfp_net_aux_irq_free(nn, NFP_NET_CFG_EXN, NFP_NET_IRQ_EXN_IDX); 1880 1881 nn_dbg(nn, "%s down", netdev->name); 1882 return 0; 1883 } 1884 1885 static void nfp_net_set_rx_mode(struct net_device *netdev) 1886 { 1887 struct nfp_net *nn = netdev_priv(netdev); 1888 u32 new_ctrl; 1889 1890 new_ctrl = nn->ctrl; 1891 1892 if (netdev->flags & IFF_PROMISC) { 1893 if (nn->cap & NFP_NET_CFG_CTRL_PROMISC) 1894 new_ctrl |= NFP_NET_CFG_CTRL_PROMISC; 1895 else 1896 nn_warn(nn, "FW does not support promiscuous mode\n"); 1897 } else { 1898 new_ctrl &= ~NFP_NET_CFG_CTRL_PROMISC; 1899 } 1900 1901 if (new_ctrl == nn->ctrl) 1902 return; 1903 1904 nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl); 1905 if (nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN)) 1906 return; 1907 1908 nn->ctrl = new_ctrl; 1909 } 1910 1911 static int nfp_net_change_mtu(struct net_device *netdev, int new_mtu) 1912 { 1913 struct nfp_net *nn = netdev_priv(netdev); 1914 u32 tmp; 1915 1916 nn_dbg(nn, "New MTU = %d\n", new_mtu); 1917 1918 if (new_mtu < 68 || new_mtu > nn->max_mtu) { 1919 nn_err(nn, "New MTU (%d) is not valid\n", new_mtu); 1920 return -EINVAL; 1921 } 1922 1923 netdev->mtu = new_mtu; 1924 1925 /* Freelist buffer size rounded up to the nearest 1K */ 1926 tmp = new_mtu + ETH_HLEN + VLAN_HLEN + NFP_NET_MAX_PREPEND; 1927 nn->fl_bufsz = roundup(tmp, 1024); 1928 1929 /* restart if running */ 1930 if (netif_running(netdev)) { 1931 nfp_net_netdev_close(netdev); 1932 nfp_net_netdev_open(netdev); 1933 } 1934 1935 return 0; 1936 } 1937 1938 static struct rtnl_link_stats64 *nfp_net_stat64(struct net_device *netdev, 1939 struct rtnl_link_stats64 *stats) 1940 { 1941 struct nfp_net *nn = netdev_priv(netdev); 1942 int r; 1943 1944 for (r = 0; r < nn->num_r_vecs; r++) { 1945 struct nfp_net_r_vector *r_vec = &nn->r_vecs[r]; 1946 u64 data[3]; 1947 unsigned int start; 1948 1949 do { 1950 start = u64_stats_fetch_begin(&r_vec->rx_sync); 1951 data[0] = r_vec->rx_pkts; 1952 data[1] = r_vec->rx_bytes; 1953 data[2] = r_vec->rx_drops; 1954 } while (u64_stats_fetch_retry(&r_vec->rx_sync, start)); 1955 stats->rx_packets += data[0]; 1956 stats->rx_bytes += data[1]; 1957 stats->rx_dropped += data[2]; 1958 1959 do { 1960 start = u64_stats_fetch_begin(&r_vec->tx_sync); 1961 data[0] = r_vec->tx_pkts; 1962 data[1] = r_vec->tx_bytes; 1963 data[2] = r_vec->tx_errors; 1964 } while (u64_stats_fetch_retry(&r_vec->tx_sync, start)); 1965 stats->tx_packets += data[0]; 1966 stats->tx_bytes += data[1]; 1967 stats->tx_errors += data[2]; 1968 } 1969 1970 return stats; 1971 } 1972 1973 static int nfp_net_set_features(struct net_device *netdev, 1974 netdev_features_t features) 1975 { 1976 netdev_features_t changed = netdev->features ^ features; 1977 struct nfp_net *nn = netdev_priv(netdev); 1978 u32 new_ctrl; 1979 int err; 1980 1981 /* Assume this is not called with features we have not advertised */ 1982 1983 new_ctrl = nn->ctrl; 1984 1985 if (changed & NETIF_F_RXCSUM) { 1986 if (features & NETIF_F_RXCSUM) 1987 new_ctrl |= NFP_NET_CFG_CTRL_RXCSUM; 1988 else 1989 new_ctrl &= ~NFP_NET_CFG_CTRL_RXCSUM; 1990 } 1991 1992 if (changed & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) { 1993 if (features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) 1994 new_ctrl |= NFP_NET_CFG_CTRL_TXCSUM; 1995 else 1996 new_ctrl &= ~NFP_NET_CFG_CTRL_TXCSUM; 1997 } 1998 1999 if (changed & (NETIF_F_TSO | NETIF_F_TSO6)) { 2000 if (features & (NETIF_F_TSO | NETIF_F_TSO6)) 2001 new_ctrl |= NFP_NET_CFG_CTRL_LSO; 2002 else 2003 new_ctrl &= ~NFP_NET_CFG_CTRL_LSO; 2004 } 2005 2006 if (changed & NETIF_F_HW_VLAN_CTAG_RX) { 2007 if (features & NETIF_F_HW_VLAN_CTAG_RX) 2008 new_ctrl |= NFP_NET_CFG_CTRL_RXVLAN; 2009 else 2010 new_ctrl &= ~NFP_NET_CFG_CTRL_RXVLAN; 2011 } 2012 2013 if (changed & NETIF_F_HW_VLAN_CTAG_TX) { 2014 if (features & NETIF_F_HW_VLAN_CTAG_TX) 2015 new_ctrl |= NFP_NET_CFG_CTRL_TXVLAN; 2016 else 2017 new_ctrl &= ~NFP_NET_CFG_CTRL_TXVLAN; 2018 } 2019 2020 if (changed & NETIF_F_SG) { 2021 if (features & NETIF_F_SG) 2022 new_ctrl |= NFP_NET_CFG_CTRL_GATHER; 2023 else 2024 new_ctrl &= ~NFP_NET_CFG_CTRL_GATHER; 2025 } 2026 2027 nn_dbg(nn, "Feature change 0x%llx -> 0x%llx (changed=0x%llx)\n", 2028 netdev->features, features, changed); 2029 2030 if (new_ctrl == nn->ctrl) 2031 return 0; 2032 2033 nn_dbg(nn, "NIC ctrl: 0x%x -> 0x%x\n", nn->ctrl, new_ctrl); 2034 nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl); 2035 err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN); 2036 if (err) 2037 return err; 2038 2039 nn->ctrl = new_ctrl; 2040 2041 return 0; 2042 } 2043 2044 static netdev_features_t 2045 nfp_net_features_check(struct sk_buff *skb, struct net_device *dev, 2046 netdev_features_t features) 2047 { 2048 u8 l4_hdr; 2049 2050 /* We can't do TSO over double tagged packets (802.1AD) */ 2051 features &= vlan_features_check(skb, features); 2052 2053 if (!skb->encapsulation) 2054 return features; 2055 2056 /* Ensure that inner L4 header offset fits into TX descriptor field */ 2057 if (skb_is_gso(skb)) { 2058 u32 hdrlen; 2059 2060 hdrlen = skb_inner_transport_header(skb) - skb->data + 2061 inner_tcp_hdrlen(skb); 2062 2063 if (unlikely(hdrlen > NFP_NET_LSO_MAX_HDR_SZ)) 2064 features &= ~NETIF_F_GSO_MASK; 2065 } 2066 2067 /* VXLAN/GRE check */ 2068 switch (vlan_get_protocol(skb)) { 2069 case htons(ETH_P_IP): 2070 l4_hdr = ip_hdr(skb)->protocol; 2071 break; 2072 case htons(ETH_P_IPV6): 2073 l4_hdr = ipv6_hdr(skb)->nexthdr; 2074 break; 2075 default: 2076 return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); 2077 } 2078 2079 if (skb->inner_protocol_type != ENCAP_TYPE_ETHER || 2080 skb->inner_protocol != htons(ETH_P_TEB) || 2081 (l4_hdr != IPPROTO_UDP && l4_hdr != IPPROTO_GRE) || 2082 (l4_hdr == IPPROTO_UDP && 2083 (skb_inner_mac_header(skb) - skb_transport_header(skb) != 2084 sizeof(struct udphdr) + sizeof(struct vxlanhdr)))) 2085 return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); 2086 2087 return features; 2088 } 2089 2090 /** 2091 * nfp_net_set_vxlan_port() - set vxlan port in SW and reconfigure HW 2092 * @nn: NFP Net device to reconfigure 2093 * @idx: Index into the port table where new port should be written 2094 * @port: UDP port to configure (pass zero to remove VXLAN port) 2095 */ 2096 static void nfp_net_set_vxlan_port(struct nfp_net *nn, int idx, __be16 port) 2097 { 2098 int i; 2099 2100 nn->vxlan_ports[idx] = port; 2101 2102 if (!(nn->ctrl & NFP_NET_CFG_CTRL_VXLAN)) 2103 return; 2104 2105 BUILD_BUG_ON(NFP_NET_N_VXLAN_PORTS & 1); 2106 for (i = 0; i < NFP_NET_N_VXLAN_PORTS; i += 2) 2107 nn_writel(nn, NFP_NET_CFG_VXLAN_PORT + i * sizeof(port), 2108 be16_to_cpu(nn->vxlan_ports[i + 1]) << 16 | 2109 be16_to_cpu(nn->vxlan_ports[i])); 2110 2111 nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_VXLAN); 2112 } 2113 2114 /** 2115 * nfp_net_find_vxlan_idx() - find table entry of the port or a free one 2116 * @nn: NFP Network structure 2117 * @port: UDP port to look for 2118 * 2119 * Return: if the port is already in the table -- it's position; 2120 * if the port is not in the table -- free position to use; 2121 * if the table is full -- -ENOSPC. 2122 */ 2123 static int nfp_net_find_vxlan_idx(struct nfp_net *nn, __be16 port) 2124 { 2125 int i, free_idx = -ENOSPC; 2126 2127 for (i = 0; i < NFP_NET_N_VXLAN_PORTS; i++) { 2128 if (nn->vxlan_ports[i] == port) 2129 return i; 2130 if (!nn->vxlan_usecnt[i]) 2131 free_idx = i; 2132 } 2133 2134 return free_idx; 2135 } 2136 2137 static void nfp_net_add_vxlan_port(struct net_device *netdev, 2138 sa_family_t sa_family, __be16 port) 2139 { 2140 struct nfp_net *nn = netdev_priv(netdev); 2141 int idx; 2142 2143 idx = nfp_net_find_vxlan_idx(nn, port); 2144 if (idx == -ENOSPC) 2145 return; 2146 2147 if (!nn->vxlan_usecnt[idx]++) 2148 nfp_net_set_vxlan_port(nn, idx, port); 2149 } 2150 2151 static void nfp_net_del_vxlan_port(struct net_device *netdev, 2152 sa_family_t sa_family, __be16 port) 2153 { 2154 struct nfp_net *nn = netdev_priv(netdev); 2155 int idx; 2156 2157 idx = nfp_net_find_vxlan_idx(nn, port); 2158 if (!nn->vxlan_usecnt[idx] || idx == -ENOSPC) 2159 return; 2160 2161 if (!--nn->vxlan_usecnt[idx]) 2162 nfp_net_set_vxlan_port(nn, idx, 0); 2163 } 2164 2165 static const struct net_device_ops nfp_net_netdev_ops = { 2166 .ndo_open = nfp_net_netdev_open, 2167 .ndo_stop = nfp_net_netdev_close, 2168 .ndo_start_xmit = nfp_net_tx, 2169 .ndo_get_stats64 = nfp_net_stat64, 2170 .ndo_tx_timeout = nfp_net_tx_timeout, 2171 .ndo_set_rx_mode = nfp_net_set_rx_mode, 2172 .ndo_change_mtu = nfp_net_change_mtu, 2173 .ndo_set_mac_address = eth_mac_addr, 2174 .ndo_set_features = nfp_net_set_features, 2175 .ndo_features_check = nfp_net_features_check, 2176 .ndo_add_vxlan_port = nfp_net_add_vxlan_port, 2177 .ndo_del_vxlan_port = nfp_net_del_vxlan_port, 2178 }; 2179 2180 /** 2181 * nfp_net_info() - Print general info about the NIC 2182 * @nn: NFP Net device to reconfigure 2183 */ 2184 void nfp_net_info(struct nfp_net *nn) 2185 { 2186 nn_info(nn, "Netronome %s %sNetdev: TxQs=%d/%d RxQs=%d/%d\n", 2187 nn->is_nfp3200 ? "NFP-32xx" : "NFP-6xxx", 2188 nn->is_vf ? "VF " : "", 2189 nn->num_tx_rings, nn->max_tx_rings, 2190 nn->num_rx_rings, nn->max_rx_rings); 2191 nn_info(nn, "VER: %d.%d.%d.%d, Maximum supported MTU: %d\n", 2192 nn->fw_ver.resv, nn->fw_ver.class, 2193 nn->fw_ver.major, nn->fw_ver.minor, 2194 nn->max_mtu); 2195 nn_info(nn, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", 2196 nn->cap, 2197 nn->cap & NFP_NET_CFG_CTRL_PROMISC ? "PROMISC " : "", 2198 nn->cap & NFP_NET_CFG_CTRL_L2BC ? "L2BCFILT " : "", 2199 nn->cap & NFP_NET_CFG_CTRL_L2MC ? "L2MCFILT " : "", 2200 nn->cap & NFP_NET_CFG_CTRL_RXCSUM ? "RXCSUM " : "", 2201 nn->cap & NFP_NET_CFG_CTRL_TXCSUM ? "TXCSUM " : "", 2202 nn->cap & NFP_NET_CFG_CTRL_RXVLAN ? "RXVLAN " : "", 2203 nn->cap & NFP_NET_CFG_CTRL_TXVLAN ? "TXVLAN " : "", 2204 nn->cap & NFP_NET_CFG_CTRL_SCATTER ? "SCATTER " : "", 2205 nn->cap & NFP_NET_CFG_CTRL_GATHER ? "GATHER " : "", 2206 nn->cap & NFP_NET_CFG_CTRL_LSO ? "TSO " : "", 2207 nn->cap & NFP_NET_CFG_CTRL_RSS ? "RSS " : "", 2208 nn->cap & NFP_NET_CFG_CTRL_L2SWITCH ? "L2SWITCH " : "", 2209 nn->cap & NFP_NET_CFG_CTRL_MSIXAUTO ? "AUTOMASK " : "", 2210 nn->cap & NFP_NET_CFG_CTRL_IRQMOD ? "IRQMOD " : "", 2211 nn->cap & NFP_NET_CFG_CTRL_VXLAN ? "VXLAN " : "", 2212 nn->cap & NFP_NET_CFG_CTRL_NVGRE ? "NVGRE " : ""); 2213 } 2214 2215 /** 2216 * nfp_net_netdev_alloc() - Allocate netdev and related structure 2217 * @pdev: PCI device 2218 * @max_tx_rings: Maximum number of TX rings supported by device 2219 * @max_rx_rings: Maximum number of RX rings supported by device 2220 * 2221 * This function allocates a netdev device and fills in the initial 2222 * part of the @struct nfp_net structure. 2223 * 2224 * Return: NFP Net device structure, or ERR_PTR on error. 2225 */ 2226 struct nfp_net *nfp_net_netdev_alloc(struct pci_dev *pdev, 2227 int max_tx_rings, int max_rx_rings) 2228 { 2229 struct net_device *netdev; 2230 struct nfp_net *nn; 2231 int nqs; 2232 2233 netdev = alloc_etherdev_mqs(sizeof(struct nfp_net), 2234 max_tx_rings, max_rx_rings); 2235 if (!netdev) 2236 return ERR_PTR(-ENOMEM); 2237 2238 SET_NETDEV_DEV(netdev, &pdev->dev); 2239 nn = netdev_priv(netdev); 2240 2241 nn->netdev = netdev; 2242 nn->pdev = pdev; 2243 2244 nn->max_tx_rings = max_tx_rings; 2245 nn->max_rx_rings = max_rx_rings; 2246 2247 nqs = netif_get_num_default_rss_queues(); 2248 nn->num_tx_rings = min_t(int, nqs, max_tx_rings); 2249 nn->num_rx_rings = min_t(int, nqs, max_rx_rings); 2250 2251 nn->txd_cnt = NFP_NET_TX_DESCS_DEFAULT; 2252 nn->rxd_cnt = NFP_NET_RX_DESCS_DEFAULT; 2253 2254 spin_lock_init(&nn->reconfig_lock); 2255 spin_lock_init(&nn->link_status_lock); 2256 2257 return nn; 2258 } 2259 2260 /** 2261 * nfp_net_netdev_free() - Undo what @nfp_net_netdev_alloc() did 2262 * @nn: NFP Net device to reconfigure 2263 */ 2264 void nfp_net_netdev_free(struct nfp_net *nn) 2265 { 2266 free_netdev(nn->netdev); 2267 } 2268 2269 /** 2270 * nfp_net_rss_init() - Set the initial RSS parameters 2271 * @nn: NFP Net device to reconfigure 2272 */ 2273 static void nfp_net_rss_init(struct nfp_net *nn) 2274 { 2275 int i; 2276 2277 netdev_rss_key_fill(nn->rss_key, NFP_NET_CFG_RSS_KEY_SZ); 2278 2279 for (i = 0; i < sizeof(nn->rss_itbl); i++) 2280 nn->rss_itbl[i] = 2281 ethtool_rxfh_indir_default(i, nn->num_rx_rings); 2282 2283 /* Enable IPv4/IPv6 TCP by default */ 2284 nn->rss_cfg = NFP_NET_CFG_RSS_IPV4_TCP | 2285 NFP_NET_CFG_RSS_IPV6_TCP | 2286 NFP_NET_CFG_RSS_TOEPLITZ | 2287 NFP_NET_CFG_RSS_MASK; 2288 } 2289 2290 /** 2291 * nfp_net_irqmod_init() - Set the initial IRQ moderation parameters 2292 * @nn: NFP Net device to reconfigure 2293 */ 2294 static void nfp_net_irqmod_init(struct nfp_net *nn) 2295 { 2296 nn->rx_coalesce_usecs = 50; 2297 nn->rx_coalesce_max_frames = 64; 2298 nn->tx_coalesce_usecs = 50; 2299 nn->tx_coalesce_max_frames = 64; 2300 } 2301 2302 /** 2303 * nfp_net_netdev_init() - Initialise/finalise the netdev structure 2304 * @netdev: netdev structure 2305 * 2306 * Return: 0 on success or negative errno on error. 2307 */ 2308 int nfp_net_netdev_init(struct net_device *netdev) 2309 { 2310 struct nfp_net *nn = netdev_priv(netdev); 2311 int err; 2312 2313 /* Get some of the read-only fields from the BAR */ 2314 nn->cap = nn_readl(nn, NFP_NET_CFG_CAP); 2315 nn->max_mtu = nn_readl(nn, NFP_NET_CFG_MAX_MTU); 2316 2317 nfp_net_write_mac_addr(nn, nn->netdev->dev_addr); 2318 2319 /* Set default MTU and Freelist buffer size */ 2320 if (nn->max_mtu < NFP_NET_DEFAULT_MTU) 2321 netdev->mtu = nn->max_mtu; 2322 else 2323 netdev->mtu = NFP_NET_DEFAULT_MTU; 2324 nn->fl_bufsz = NFP_NET_DEFAULT_RX_BUFSZ; 2325 2326 /* Advertise/enable offloads based on capabilities 2327 * 2328 * Note: netdev->features show the currently enabled features 2329 * and netdev->hw_features advertises which features are 2330 * supported. By default we enable most features. 2331 */ 2332 netdev->hw_features = NETIF_F_HIGHDMA; 2333 if (nn->cap & NFP_NET_CFG_CTRL_RXCSUM) { 2334 netdev->hw_features |= NETIF_F_RXCSUM; 2335 nn->ctrl |= NFP_NET_CFG_CTRL_RXCSUM; 2336 } 2337 if (nn->cap & NFP_NET_CFG_CTRL_TXCSUM) { 2338 netdev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; 2339 nn->ctrl |= NFP_NET_CFG_CTRL_TXCSUM; 2340 } 2341 if (nn->cap & NFP_NET_CFG_CTRL_GATHER) { 2342 netdev->hw_features |= NETIF_F_SG; 2343 nn->ctrl |= NFP_NET_CFG_CTRL_GATHER; 2344 } 2345 if ((nn->cap & NFP_NET_CFG_CTRL_LSO) && nn->fw_ver.major > 2) { 2346 netdev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6; 2347 nn->ctrl |= NFP_NET_CFG_CTRL_LSO; 2348 } 2349 if (nn->cap & NFP_NET_CFG_CTRL_RSS) { 2350 netdev->hw_features |= NETIF_F_RXHASH; 2351 nfp_net_rss_init(nn); 2352 nn->ctrl |= NFP_NET_CFG_CTRL_RSS; 2353 } 2354 if (nn->cap & NFP_NET_CFG_CTRL_VXLAN && 2355 nn->cap & NFP_NET_CFG_CTRL_NVGRE) { 2356 if (nn->cap & NFP_NET_CFG_CTRL_LSO) 2357 netdev->hw_features |= NETIF_F_GSO_GRE | 2358 NETIF_F_GSO_UDP_TUNNEL; 2359 nn->ctrl |= NFP_NET_CFG_CTRL_VXLAN | NFP_NET_CFG_CTRL_NVGRE; 2360 2361 netdev->hw_enc_features = netdev->hw_features; 2362 } 2363 2364 netdev->vlan_features = netdev->hw_features; 2365 2366 if (nn->cap & NFP_NET_CFG_CTRL_RXVLAN) { 2367 netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; 2368 nn->ctrl |= NFP_NET_CFG_CTRL_RXVLAN; 2369 } 2370 if (nn->cap & NFP_NET_CFG_CTRL_TXVLAN) { 2371 netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX; 2372 nn->ctrl |= NFP_NET_CFG_CTRL_TXVLAN; 2373 } 2374 2375 netdev->features = netdev->hw_features; 2376 2377 /* Advertise but disable TSO by default. */ 2378 netdev->features &= ~(NETIF_F_TSO | NETIF_F_TSO6); 2379 2380 /* Allow L2 Broadcast and Multicast through by default, if supported */ 2381 if (nn->cap & NFP_NET_CFG_CTRL_L2BC) 2382 nn->ctrl |= NFP_NET_CFG_CTRL_L2BC; 2383 if (nn->cap & NFP_NET_CFG_CTRL_L2MC) 2384 nn->ctrl |= NFP_NET_CFG_CTRL_L2MC; 2385 2386 /* Allow IRQ moderation, if supported */ 2387 if (nn->cap & NFP_NET_CFG_CTRL_IRQMOD) { 2388 nfp_net_irqmod_init(nn); 2389 nn->ctrl |= NFP_NET_CFG_CTRL_IRQMOD; 2390 } 2391 2392 /* On NFP-3200 enable MSI-X auto-masking, if supported and the 2393 * interrupts are not shared. 2394 */ 2395 if (nn->is_nfp3200 && nn->cap & NFP_NET_CFG_CTRL_MSIXAUTO) 2396 nn->ctrl |= NFP_NET_CFG_CTRL_MSIXAUTO; 2397 2398 /* On NFP4000/NFP6000, determine RX packet/metadata boundary offset */ 2399 if (nn->fw_ver.major >= 2) 2400 nn->rx_offset = nn_readl(nn, NFP_NET_CFG_RX_OFFSET); 2401 else 2402 nn->rx_offset = NFP_NET_RX_OFFSET; 2403 2404 /* Stash the re-configuration queue away. First odd queue in TX Bar */ 2405 nn->qcp_cfg = nn->tx_bar + NFP_QCP_QUEUE_ADDR_SZ; 2406 2407 /* Make sure the FW knows the netdev is supposed to be disabled here */ 2408 nn_writel(nn, NFP_NET_CFG_CTRL, 0); 2409 nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE, 0); 2410 nn_writeq(nn, NFP_NET_CFG_RXRS_ENABLE, 0); 2411 err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_RING | 2412 NFP_NET_CFG_UPDATE_GEN); 2413 if (err) 2414 return err; 2415 2416 /* Finalise the netdev setup */ 2417 ether_setup(netdev); 2418 netdev->netdev_ops = &nfp_net_netdev_ops; 2419 netdev->watchdog_timeo = msecs_to_jiffies(5 * 1000); 2420 netif_carrier_off(netdev); 2421 2422 nfp_net_set_ethtool_ops(netdev); 2423 nfp_net_irqs_assign(netdev); 2424 2425 return register_netdev(netdev); 2426 } 2427 2428 /** 2429 * nfp_net_netdev_clean() - Undo what nfp_net_netdev_init() did. 2430 * @netdev: netdev structure 2431 */ 2432 void nfp_net_netdev_clean(struct net_device *netdev) 2433 { 2434 unregister_netdev(netdev); 2435 } 2436