1 /* 2 * Copyright 2008-2010 Cisco Systems, Inc. All rights reserved. 3 * Copyright 2007 Nuova Systems, Inc. All rights reserved. 4 * 5 * This program is free software; you may redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; version 2 of the License. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 10 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 11 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 12 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 13 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 14 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 15 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 16 * SOFTWARE. 17 * 18 */ 19 20 #include <linux/module.h> 21 #include <linux/kernel.h> 22 #include <linux/string.h> 23 #include <linux/errno.h> 24 #include <linux/types.h> 25 #include <linux/init.h> 26 #include <linux/interrupt.h> 27 #include <linux/workqueue.h> 28 #include <linux/pci.h> 29 #include <linux/netdevice.h> 30 #include <linux/etherdevice.h> 31 #include <linux/if.h> 32 #include <linux/if_ether.h> 33 #include <linux/if_vlan.h> 34 #include <linux/in.h> 35 #include <linux/ip.h> 36 #include <linux/ipv6.h> 37 #include <linux/tcp.h> 38 #include <linux/rtnetlink.h> 39 #include <linux/prefetch.h> 40 #include <net/ip6_checksum.h> 41 #include <linux/ktime.h> 42 #include <linux/numa.h> 43 #ifdef CONFIG_RFS_ACCEL 44 #include <linux/cpu_rmap.h> 45 #endif 46 #include <linux/crash_dump.h> 47 #include <net/busy_poll.h> 48 #include <net/vxlan.h> 49 50 #include "cq_enet_desc.h" 51 #include "vnic_dev.h" 52 #include "vnic_intr.h" 53 #include "vnic_stats.h" 54 #include "vnic_vic.h" 55 #include "enic_res.h" 56 #include "enic.h" 57 #include "enic_dev.h" 58 #include "enic_pp.h" 59 #include "enic_clsf.h" 60 61 #define ENIC_NOTIFY_TIMER_PERIOD (2 * HZ) 62 #define WQ_ENET_MAX_DESC_LEN (1 << WQ_ENET_LEN_BITS) 63 #define MAX_TSO (1 << 16) 64 #define ENIC_DESC_MAX_SPLITS (MAX_TSO / WQ_ENET_MAX_DESC_LEN + 1) 65 66 #define PCI_DEVICE_ID_CISCO_VIC_ENET 0x0043 /* ethernet vnic */ 67 #define PCI_DEVICE_ID_CISCO_VIC_ENET_DYN 0x0044 /* enet dynamic vnic */ 68 #define PCI_DEVICE_ID_CISCO_VIC_ENET_VF 0x0071 /* enet SRIOV VF */ 69 70 #define RX_COPYBREAK_DEFAULT 256 71 72 /* Supported devices */ 73 static const struct pci_device_id enic_id_table[] = { 74 { PCI_VDEVICE(CISCO, PCI_DEVICE_ID_CISCO_VIC_ENET) }, 75 { PCI_VDEVICE(CISCO, PCI_DEVICE_ID_CISCO_VIC_ENET_DYN) }, 76 { PCI_VDEVICE(CISCO, PCI_DEVICE_ID_CISCO_VIC_ENET_VF) }, 77 { 0, } /* end of table */ 78 }; 79 80 MODULE_DESCRIPTION(DRV_DESCRIPTION); 81 MODULE_AUTHOR("Scott Feldman <scofeldm@cisco.com>"); 82 MODULE_LICENSE("GPL"); 83 MODULE_DEVICE_TABLE(pci, enic_id_table); 84 85 #define ENIC_LARGE_PKT_THRESHOLD 1000 86 #define ENIC_MAX_COALESCE_TIMERS 10 87 /* Interrupt moderation table, which will be used to decide the 88 * coalescing timer values 89 * {rx_rate in Mbps, mapping percentage of the range} 90 */ 91 static struct enic_intr_mod_table mod_table[ENIC_MAX_COALESCE_TIMERS + 1] = { 92 {4000, 0}, 93 {4400, 10}, 94 {5060, 20}, 95 {5230, 30}, 96 {5540, 40}, 97 {5820, 50}, 98 {6120, 60}, 99 {6435, 70}, 100 {6745, 80}, 101 {7000, 90}, 102 {0xFFFFFFFF, 100} 103 }; 104 105 /* This table helps the driver to pick different ranges for rx coalescing 106 * timer depending on the link speed. 107 */ 108 static struct enic_intr_mod_range mod_range[ENIC_MAX_LINK_SPEEDS] = { 109 {0, 0}, /* 0 - 4 Gbps */ 110 {0, 3}, /* 4 - 10 Gbps */ 111 {3, 6}, /* 10 - 40 Gbps */ 112 }; 113 114 static void enic_init_affinity_hint(struct enic *enic) 115 { 116 int numa_node = dev_to_node(&enic->pdev->dev); 117 int i; 118 119 for (i = 0; i < enic->intr_count; i++) { 120 if (enic_is_err_intr(enic, i) || enic_is_notify_intr(enic, i) || 121 (cpumask_available(enic->msix[i].affinity_mask) && 122 !cpumask_empty(enic->msix[i].affinity_mask))) 123 continue; 124 if (zalloc_cpumask_var(&enic->msix[i].affinity_mask, 125 GFP_KERNEL)) 126 cpumask_set_cpu(cpumask_local_spread(i, numa_node), 127 enic->msix[i].affinity_mask); 128 } 129 } 130 131 static void enic_free_affinity_hint(struct enic *enic) 132 { 133 int i; 134 135 for (i = 0; i < enic->intr_count; i++) { 136 if (enic_is_err_intr(enic, i) || enic_is_notify_intr(enic, i)) 137 continue; 138 free_cpumask_var(enic->msix[i].affinity_mask); 139 } 140 } 141 142 static void enic_set_affinity_hint(struct enic *enic) 143 { 144 int i; 145 int err; 146 147 for (i = 0; i < enic->intr_count; i++) { 148 if (enic_is_err_intr(enic, i) || 149 enic_is_notify_intr(enic, i) || 150 !cpumask_available(enic->msix[i].affinity_mask) || 151 cpumask_empty(enic->msix[i].affinity_mask)) 152 continue; 153 err = irq_set_affinity_hint(enic->msix_entry[i].vector, 154 enic->msix[i].affinity_mask); 155 if (err) 156 netdev_warn(enic->netdev, "irq_set_affinity_hint failed, err %d\n", 157 err); 158 } 159 160 for (i = 0; i < enic->wq_count; i++) { 161 int wq_intr = enic_msix_wq_intr(enic, i); 162 163 if (cpumask_available(enic->msix[wq_intr].affinity_mask) && 164 !cpumask_empty(enic->msix[wq_intr].affinity_mask)) 165 netif_set_xps_queue(enic->netdev, 166 enic->msix[wq_intr].affinity_mask, 167 i); 168 } 169 } 170 171 static void enic_unset_affinity_hint(struct enic *enic) 172 { 173 int i; 174 175 for (i = 0; i < enic->intr_count; i++) 176 irq_set_affinity_hint(enic->msix_entry[i].vector, NULL); 177 } 178 179 static void enic_udp_tunnel_add(struct net_device *netdev, 180 struct udp_tunnel_info *ti) 181 { 182 struct enic *enic = netdev_priv(netdev); 183 __be16 port = ti->port; 184 int err; 185 186 spin_lock_bh(&enic->devcmd_lock); 187 188 if (ti->type != UDP_TUNNEL_TYPE_VXLAN) { 189 netdev_info(netdev, "udp_tnl: only vxlan tunnel offload supported"); 190 goto error; 191 } 192 193 switch (ti->sa_family) { 194 case AF_INET6: 195 if (!(enic->vxlan.flags & ENIC_VXLAN_OUTER_IPV6)) { 196 netdev_info(netdev, "vxlan: only IPv4 offload supported"); 197 goto error; 198 } 199 /* Fall through */ 200 case AF_INET: 201 break; 202 default: 203 goto error; 204 } 205 206 if (enic->vxlan.vxlan_udp_port_number) { 207 if (ntohs(port) == enic->vxlan.vxlan_udp_port_number) 208 netdev_warn(netdev, "vxlan: udp port already offloaded"); 209 else 210 netdev_info(netdev, "vxlan: offload supported for only one UDP port"); 211 212 goto error; 213 } 214 if ((vnic_dev_get_res_count(enic->vdev, RES_TYPE_WQ) != 1) && 215 !(enic->vxlan.flags & ENIC_VXLAN_MULTI_WQ)) { 216 netdev_info(netdev, "vxlan: vxlan offload with multi wq not supported on this adapter"); 217 goto error; 218 } 219 220 err = vnic_dev_overlay_offload_cfg(enic->vdev, 221 OVERLAY_CFG_VXLAN_PORT_UPDATE, 222 ntohs(port)); 223 if (err) 224 goto error; 225 226 err = vnic_dev_overlay_offload_ctrl(enic->vdev, OVERLAY_FEATURE_VXLAN, 227 enic->vxlan.patch_level); 228 if (err) 229 goto error; 230 231 enic->vxlan.vxlan_udp_port_number = ntohs(port); 232 233 netdev_info(netdev, "vxlan fw-vers-%d: offload enabled for udp port: %d, sa_family: %d ", 234 (int)enic->vxlan.patch_level, ntohs(port), ti->sa_family); 235 236 goto unlock; 237 238 error: 239 netdev_info(netdev, "failed to offload udp port: %d, sa_family: %d, type: %d", 240 ntohs(port), ti->sa_family, ti->type); 241 unlock: 242 spin_unlock_bh(&enic->devcmd_lock); 243 } 244 245 static void enic_udp_tunnel_del(struct net_device *netdev, 246 struct udp_tunnel_info *ti) 247 { 248 struct enic *enic = netdev_priv(netdev); 249 int err; 250 251 spin_lock_bh(&enic->devcmd_lock); 252 253 if ((ntohs(ti->port) != enic->vxlan.vxlan_udp_port_number) || 254 ti->type != UDP_TUNNEL_TYPE_VXLAN) { 255 netdev_info(netdev, "udp_tnl: port:%d, sa_family: %d, type: %d not offloaded", 256 ntohs(ti->port), ti->sa_family, ti->type); 257 goto unlock; 258 } 259 260 err = vnic_dev_overlay_offload_ctrl(enic->vdev, OVERLAY_FEATURE_VXLAN, 261 OVERLAY_OFFLOAD_DISABLE); 262 if (err) { 263 netdev_err(netdev, "vxlan: del offload udp port: %d failed", 264 ntohs(ti->port)); 265 goto unlock; 266 } 267 268 enic->vxlan.vxlan_udp_port_number = 0; 269 270 netdev_info(netdev, "vxlan: del offload udp port %d, family %d\n", 271 ntohs(ti->port), ti->sa_family); 272 273 unlock: 274 spin_unlock_bh(&enic->devcmd_lock); 275 } 276 277 static netdev_features_t enic_features_check(struct sk_buff *skb, 278 struct net_device *dev, 279 netdev_features_t features) 280 { 281 const struct ethhdr *eth = (struct ethhdr *)skb_inner_mac_header(skb); 282 struct enic *enic = netdev_priv(dev); 283 struct udphdr *udph; 284 u16 port = 0; 285 u8 proto; 286 287 if (!skb->encapsulation) 288 return features; 289 290 features = vxlan_features_check(skb, features); 291 292 switch (vlan_get_protocol(skb)) { 293 case htons(ETH_P_IPV6): 294 if (!(enic->vxlan.flags & ENIC_VXLAN_OUTER_IPV6)) 295 goto out; 296 proto = ipv6_hdr(skb)->nexthdr; 297 break; 298 case htons(ETH_P_IP): 299 proto = ip_hdr(skb)->protocol; 300 break; 301 default: 302 goto out; 303 } 304 305 switch (eth->h_proto) { 306 case ntohs(ETH_P_IPV6): 307 if (!(enic->vxlan.flags & ENIC_VXLAN_INNER_IPV6)) 308 goto out; 309 /* Fall through */ 310 case ntohs(ETH_P_IP): 311 break; 312 default: 313 goto out; 314 } 315 316 317 if (proto == IPPROTO_UDP) { 318 udph = udp_hdr(skb); 319 port = be16_to_cpu(udph->dest); 320 } 321 322 /* HW supports offload of only one UDP port. Remove CSUM and GSO MASK 323 * for other UDP port tunnels 324 */ 325 if (port != enic->vxlan.vxlan_udp_port_number) 326 goto out; 327 328 return features; 329 330 out: 331 return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); 332 } 333 334 int enic_is_dynamic(struct enic *enic) 335 { 336 return enic->pdev->device == PCI_DEVICE_ID_CISCO_VIC_ENET_DYN; 337 } 338 339 int enic_sriov_enabled(struct enic *enic) 340 { 341 return (enic->priv_flags & ENIC_SRIOV_ENABLED) ? 1 : 0; 342 } 343 344 static int enic_is_sriov_vf(struct enic *enic) 345 { 346 return enic->pdev->device == PCI_DEVICE_ID_CISCO_VIC_ENET_VF; 347 } 348 349 int enic_is_valid_vf(struct enic *enic, int vf) 350 { 351 #ifdef CONFIG_PCI_IOV 352 return vf >= 0 && vf < enic->num_vfs; 353 #else 354 return 0; 355 #endif 356 } 357 358 static void enic_free_wq_buf(struct vnic_wq *wq, struct vnic_wq_buf *buf) 359 { 360 struct enic *enic = vnic_dev_priv(wq->vdev); 361 362 if (buf->sop) 363 pci_unmap_single(enic->pdev, buf->dma_addr, 364 buf->len, PCI_DMA_TODEVICE); 365 else 366 pci_unmap_page(enic->pdev, buf->dma_addr, 367 buf->len, PCI_DMA_TODEVICE); 368 369 if (buf->os_buf) 370 dev_kfree_skb_any(buf->os_buf); 371 } 372 373 static void enic_wq_free_buf(struct vnic_wq *wq, 374 struct cq_desc *cq_desc, struct vnic_wq_buf *buf, void *opaque) 375 { 376 enic_free_wq_buf(wq, buf); 377 } 378 379 static int enic_wq_service(struct vnic_dev *vdev, struct cq_desc *cq_desc, 380 u8 type, u16 q_number, u16 completed_index, void *opaque) 381 { 382 struct enic *enic = vnic_dev_priv(vdev); 383 384 spin_lock(&enic->wq_lock[q_number]); 385 386 vnic_wq_service(&enic->wq[q_number], cq_desc, 387 completed_index, enic_wq_free_buf, 388 opaque); 389 390 if (netif_tx_queue_stopped(netdev_get_tx_queue(enic->netdev, q_number)) && 391 vnic_wq_desc_avail(&enic->wq[q_number]) >= 392 (MAX_SKB_FRAGS + ENIC_DESC_MAX_SPLITS)) 393 netif_wake_subqueue(enic->netdev, q_number); 394 395 spin_unlock(&enic->wq_lock[q_number]); 396 397 return 0; 398 } 399 400 static bool enic_log_q_error(struct enic *enic) 401 { 402 unsigned int i; 403 u32 error_status; 404 bool err = false; 405 406 for (i = 0; i < enic->wq_count; i++) { 407 error_status = vnic_wq_error_status(&enic->wq[i]); 408 err |= error_status; 409 if (error_status) 410 netdev_err(enic->netdev, "WQ[%d] error_status %d\n", 411 i, error_status); 412 } 413 414 for (i = 0; i < enic->rq_count; i++) { 415 error_status = vnic_rq_error_status(&enic->rq[i]); 416 err |= error_status; 417 if (error_status) 418 netdev_err(enic->netdev, "RQ[%d] error_status %d\n", 419 i, error_status); 420 } 421 422 return err; 423 } 424 425 static void enic_msglvl_check(struct enic *enic) 426 { 427 u32 msg_enable = vnic_dev_msg_lvl(enic->vdev); 428 429 if (msg_enable != enic->msg_enable) { 430 netdev_info(enic->netdev, "msg lvl changed from 0x%x to 0x%x\n", 431 enic->msg_enable, msg_enable); 432 enic->msg_enable = msg_enable; 433 } 434 } 435 436 static void enic_mtu_check(struct enic *enic) 437 { 438 u32 mtu = vnic_dev_mtu(enic->vdev); 439 struct net_device *netdev = enic->netdev; 440 441 if (mtu && mtu != enic->port_mtu) { 442 enic->port_mtu = mtu; 443 if (enic_is_dynamic(enic) || enic_is_sriov_vf(enic)) { 444 mtu = max_t(int, ENIC_MIN_MTU, 445 min_t(int, ENIC_MAX_MTU, mtu)); 446 if (mtu != netdev->mtu) 447 schedule_work(&enic->change_mtu_work); 448 } else { 449 if (mtu < netdev->mtu) 450 netdev_warn(netdev, 451 "interface MTU (%d) set higher " 452 "than switch port MTU (%d)\n", 453 netdev->mtu, mtu); 454 } 455 } 456 } 457 458 static void enic_link_check(struct enic *enic) 459 { 460 int link_status = vnic_dev_link_status(enic->vdev); 461 int carrier_ok = netif_carrier_ok(enic->netdev); 462 463 if (link_status && !carrier_ok) { 464 netdev_info(enic->netdev, "Link UP\n"); 465 netif_carrier_on(enic->netdev); 466 } else if (!link_status && carrier_ok) { 467 netdev_info(enic->netdev, "Link DOWN\n"); 468 netif_carrier_off(enic->netdev); 469 } 470 } 471 472 static void enic_notify_check(struct enic *enic) 473 { 474 enic_msglvl_check(enic); 475 enic_mtu_check(enic); 476 enic_link_check(enic); 477 } 478 479 #define ENIC_TEST_INTR(pba, i) (pba & (1 << i)) 480 481 static irqreturn_t enic_isr_legacy(int irq, void *data) 482 { 483 struct net_device *netdev = data; 484 struct enic *enic = netdev_priv(netdev); 485 unsigned int io_intr = enic_legacy_io_intr(); 486 unsigned int err_intr = enic_legacy_err_intr(); 487 unsigned int notify_intr = enic_legacy_notify_intr(); 488 u32 pba; 489 490 vnic_intr_mask(&enic->intr[io_intr]); 491 492 pba = vnic_intr_legacy_pba(enic->legacy_pba); 493 if (!pba) { 494 vnic_intr_unmask(&enic->intr[io_intr]); 495 return IRQ_NONE; /* not our interrupt */ 496 } 497 498 if (ENIC_TEST_INTR(pba, notify_intr)) { 499 enic_notify_check(enic); 500 vnic_intr_return_all_credits(&enic->intr[notify_intr]); 501 } 502 503 if (ENIC_TEST_INTR(pba, err_intr)) { 504 vnic_intr_return_all_credits(&enic->intr[err_intr]); 505 enic_log_q_error(enic); 506 /* schedule recovery from WQ/RQ error */ 507 schedule_work(&enic->reset); 508 return IRQ_HANDLED; 509 } 510 511 if (ENIC_TEST_INTR(pba, io_intr)) 512 napi_schedule_irqoff(&enic->napi[0]); 513 else 514 vnic_intr_unmask(&enic->intr[io_intr]); 515 516 return IRQ_HANDLED; 517 } 518 519 static irqreturn_t enic_isr_msi(int irq, void *data) 520 { 521 struct enic *enic = data; 522 523 /* With MSI, there is no sharing of interrupts, so this is 524 * our interrupt and there is no need to ack it. The device 525 * is not providing per-vector masking, so the OS will not 526 * write to PCI config space to mask/unmask the interrupt. 527 * We're using mask_on_assertion for MSI, so the device 528 * automatically masks the interrupt when the interrupt is 529 * generated. Later, when exiting polling, the interrupt 530 * will be unmasked (see enic_poll). 531 * 532 * Also, the device uses the same PCIe Traffic Class (TC) 533 * for Memory Write data and MSI, so there are no ordering 534 * issues; the MSI will always arrive at the Root Complex 535 * _after_ corresponding Memory Writes (i.e. descriptor 536 * writes). 537 */ 538 539 napi_schedule_irqoff(&enic->napi[0]); 540 541 return IRQ_HANDLED; 542 } 543 544 static irqreturn_t enic_isr_msix(int irq, void *data) 545 { 546 struct napi_struct *napi = data; 547 548 napi_schedule_irqoff(napi); 549 550 return IRQ_HANDLED; 551 } 552 553 static irqreturn_t enic_isr_msix_err(int irq, void *data) 554 { 555 struct enic *enic = data; 556 unsigned int intr = enic_msix_err_intr(enic); 557 558 vnic_intr_return_all_credits(&enic->intr[intr]); 559 560 if (enic_log_q_error(enic)) 561 /* schedule recovery from WQ/RQ error */ 562 schedule_work(&enic->reset); 563 564 return IRQ_HANDLED; 565 } 566 567 static irqreturn_t enic_isr_msix_notify(int irq, void *data) 568 { 569 struct enic *enic = data; 570 unsigned int intr = enic_msix_notify_intr(enic); 571 572 enic_notify_check(enic); 573 vnic_intr_return_all_credits(&enic->intr[intr]); 574 575 return IRQ_HANDLED; 576 } 577 578 static int enic_queue_wq_skb_cont(struct enic *enic, struct vnic_wq *wq, 579 struct sk_buff *skb, unsigned int len_left, 580 int loopback) 581 { 582 const skb_frag_t *frag; 583 dma_addr_t dma_addr; 584 585 /* Queue additional data fragments */ 586 for (frag = skb_shinfo(skb)->frags; len_left; frag++) { 587 len_left -= skb_frag_size(frag); 588 dma_addr = skb_frag_dma_map(&enic->pdev->dev, frag, 0, 589 skb_frag_size(frag), 590 DMA_TO_DEVICE); 591 if (unlikely(enic_dma_map_check(enic, dma_addr))) 592 return -ENOMEM; 593 enic_queue_wq_desc_cont(wq, skb, dma_addr, skb_frag_size(frag), 594 (len_left == 0), /* EOP? */ 595 loopback); 596 } 597 598 return 0; 599 } 600 601 static int enic_queue_wq_skb_vlan(struct enic *enic, struct vnic_wq *wq, 602 struct sk_buff *skb, int vlan_tag_insert, 603 unsigned int vlan_tag, int loopback) 604 { 605 unsigned int head_len = skb_headlen(skb); 606 unsigned int len_left = skb->len - head_len; 607 int eop = (len_left == 0); 608 dma_addr_t dma_addr; 609 int err = 0; 610 611 dma_addr = pci_map_single(enic->pdev, skb->data, head_len, 612 PCI_DMA_TODEVICE); 613 if (unlikely(enic_dma_map_check(enic, dma_addr))) 614 return -ENOMEM; 615 616 /* Queue the main skb fragment. The fragments are no larger 617 * than max MTU(9000)+ETH_HDR_LEN(14) bytes, which is less 618 * than WQ_ENET_MAX_DESC_LEN length. So only one descriptor 619 * per fragment is queued. 620 */ 621 enic_queue_wq_desc(wq, skb, dma_addr, head_len, vlan_tag_insert, 622 vlan_tag, eop, loopback); 623 624 if (!eop) 625 err = enic_queue_wq_skb_cont(enic, wq, skb, len_left, loopback); 626 627 return err; 628 } 629 630 static int enic_queue_wq_skb_csum_l4(struct enic *enic, struct vnic_wq *wq, 631 struct sk_buff *skb, int vlan_tag_insert, 632 unsigned int vlan_tag, int loopback) 633 { 634 unsigned int head_len = skb_headlen(skb); 635 unsigned int len_left = skb->len - head_len; 636 unsigned int hdr_len = skb_checksum_start_offset(skb); 637 unsigned int csum_offset = hdr_len + skb->csum_offset; 638 int eop = (len_left == 0); 639 dma_addr_t dma_addr; 640 int err = 0; 641 642 dma_addr = pci_map_single(enic->pdev, skb->data, head_len, 643 PCI_DMA_TODEVICE); 644 if (unlikely(enic_dma_map_check(enic, dma_addr))) 645 return -ENOMEM; 646 647 /* Queue the main skb fragment. The fragments are no larger 648 * than max MTU(9000)+ETH_HDR_LEN(14) bytes, which is less 649 * than WQ_ENET_MAX_DESC_LEN length. So only one descriptor 650 * per fragment is queued. 651 */ 652 enic_queue_wq_desc_csum_l4(wq, skb, dma_addr, head_len, csum_offset, 653 hdr_len, vlan_tag_insert, vlan_tag, eop, 654 loopback); 655 656 if (!eop) 657 err = enic_queue_wq_skb_cont(enic, wq, skb, len_left, loopback); 658 659 return err; 660 } 661 662 static void enic_preload_tcp_csum_encap(struct sk_buff *skb) 663 { 664 const struct ethhdr *eth = (struct ethhdr *)skb_inner_mac_header(skb); 665 666 switch (eth->h_proto) { 667 case ntohs(ETH_P_IP): 668 inner_ip_hdr(skb)->check = 0; 669 inner_tcp_hdr(skb)->check = 670 ~csum_tcpudp_magic(inner_ip_hdr(skb)->saddr, 671 inner_ip_hdr(skb)->daddr, 0, 672 IPPROTO_TCP, 0); 673 break; 674 case ntohs(ETH_P_IPV6): 675 inner_tcp_hdr(skb)->check = 676 ~csum_ipv6_magic(&inner_ipv6_hdr(skb)->saddr, 677 &inner_ipv6_hdr(skb)->daddr, 0, 678 IPPROTO_TCP, 0); 679 break; 680 default: 681 WARN_ONCE(1, "Non ipv4/ipv6 inner pkt for encap offload"); 682 break; 683 } 684 } 685 686 static void enic_preload_tcp_csum(struct sk_buff *skb) 687 { 688 /* Preload TCP csum field with IP pseudo hdr calculated 689 * with IP length set to zero. HW will later add in length 690 * to each TCP segment resulting from the TSO. 691 */ 692 693 if (skb->protocol == cpu_to_be16(ETH_P_IP)) { 694 ip_hdr(skb)->check = 0; 695 tcp_hdr(skb)->check = ~csum_tcpudp_magic(ip_hdr(skb)->saddr, 696 ip_hdr(skb)->daddr, 0, IPPROTO_TCP, 0); 697 } else if (skb->protocol == cpu_to_be16(ETH_P_IPV6)) { 698 tcp_v6_gso_csum_prep(skb); 699 } 700 } 701 702 static int enic_queue_wq_skb_tso(struct enic *enic, struct vnic_wq *wq, 703 struct sk_buff *skb, unsigned int mss, 704 int vlan_tag_insert, unsigned int vlan_tag, 705 int loopback) 706 { 707 unsigned int frag_len_left = skb_headlen(skb); 708 unsigned int len_left = skb->len - frag_len_left; 709 int eop = (len_left == 0); 710 unsigned int offset = 0; 711 unsigned int hdr_len; 712 dma_addr_t dma_addr; 713 unsigned int len; 714 skb_frag_t *frag; 715 716 if (skb->encapsulation) { 717 hdr_len = skb_inner_transport_header(skb) - skb->data; 718 hdr_len += inner_tcp_hdrlen(skb); 719 enic_preload_tcp_csum_encap(skb); 720 } else { 721 hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); 722 enic_preload_tcp_csum(skb); 723 } 724 725 /* Queue WQ_ENET_MAX_DESC_LEN length descriptors 726 * for the main skb fragment 727 */ 728 while (frag_len_left) { 729 len = min(frag_len_left, (unsigned int)WQ_ENET_MAX_DESC_LEN); 730 dma_addr = pci_map_single(enic->pdev, skb->data + offset, len, 731 PCI_DMA_TODEVICE); 732 if (unlikely(enic_dma_map_check(enic, dma_addr))) 733 return -ENOMEM; 734 enic_queue_wq_desc_tso(wq, skb, dma_addr, len, mss, hdr_len, 735 vlan_tag_insert, vlan_tag, 736 eop && (len == frag_len_left), loopback); 737 frag_len_left -= len; 738 offset += len; 739 } 740 741 if (eop) 742 return 0; 743 744 /* Queue WQ_ENET_MAX_DESC_LEN length descriptors 745 * for additional data fragments 746 */ 747 for (frag = skb_shinfo(skb)->frags; len_left; frag++) { 748 len_left -= skb_frag_size(frag); 749 frag_len_left = skb_frag_size(frag); 750 offset = 0; 751 752 while (frag_len_left) { 753 len = min(frag_len_left, 754 (unsigned int)WQ_ENET_MAX_DESC_LEN); 755 dma_addr = skb_frag_dma_map(&enic->pdev->dev, frag, 756 offset, len, 757 DMA_TO_DEVICE); 758 if (unlikely(enic_dma_map_check(enic, dma_addr))) 759 return -ENOMEM; 760 enic_queue_wq_desc_cont(wq, skb, dma_addr, len, 761 (len_left == 0) && 762 (len == frag_len_left),/*EOP*/ 763 loopback); 764 frag_len_left -= len; 765 offset += len; 766 } 767 } 768 769 return 0; 770 } 771 772 static inline int enic_queue_wq_skb_encap(struct enic *enic, struct vnic_wq *wq, 773 struct sk_buff *skb, 774 int vlan_tag_insert, 775 unsigned int vlan_tag, int loopback) 776 { 777 unsigned int head_len = skb_headlen(skb); 778 unsigned int len_left = skb->len - head_len; 779 /* Hardware will overwrite the checksum fields, calculating from 780 * scratch and ignoring the value placed by software. 781 * Offload mode = 00 782 * mss[2], mss[1], mss[0] bits are set 783 */ 784 unsigned int mss_or_csum = 7; 785 int eop = (len_left == 0); 786 dma_addr_t dma_addr; 787 int err = 0; 788 789 dma_addr = pci_map_single(enic->pdev, skb->data, head_len, 790 PCI_DMA_TODEVICE); 791 if (unlikely(enic_dma_map_check(enic, dma_addr))) 792 return -ENOMEM; 793 794 enic_queue_wq_desc_ex(wq, skb, dma_addr, head_len, mss_or_csum, 0, 795 vlan_tag_insert, vlan_tag, 796 WQ_ENET_OFFLOAD_MODE_CSUM, eop, 1 /* SOP */, eop, 797 loopback); 798 if (!eop) 799 err = enic_queue_wq_skb_cont(enic, wq, skb, len_left, loopback); 800 801 return err; 802 } 803 804 static inline void enic_queue_wq_skb(struct enic *enic, 805 struct vnic_wq *wq, struct sk_buff *skb) 806 { 807 unsigned int mss = skb_shinfo(skb)->gso_size; 808 unsigned int vlan_tag = 0; 809 int vlan_tag_insert = 0; 810 int loopback = 0; 811 int err; 812 813 if (skb_vlan_tag_present(skb)) { 814 /* VLAN tag from trunking driver */ 815 vlan_tag_insert = 1; 816 vlan_tag = skb_vlan_tag_get(skb); 817 } else if (enic->loop_enable) { 818 vlan_tag = enic->loop_tag; 819 loopback = 1; 820 } 821 822 if (mss) 823 err = enic_queue_wq_skb_tso(enic, wq, skb, mss, 824 vlan_tag_insert, vlan_tag, 825 loopback); 826 else if (skb->encapsulation) 827 err = enic_queue_wq_skb_encap(enic, wq, skb, vlan_tag_insert, 828 vlan_tag, loopback); 829 else if (skb->ip_summed == CHECKSUM_PARTIAL) 830 err = enic_queue_wq_skb_csum_l4(enic, wq, skb, vlan_tag_insert, 831 vlan_tag, loopback); 832 else 833 err = enic_queue_wq_skb_vlan(enic, wq, skb, vlan_tag_insert, 834 vlan_tag, loopback); 835 if (unlikely(err)) { 836 struct vnic_wq_buf *buf; 837 838 buf = wq->to_use->prev; 839 /* while not EOP of previous pkt && queue not empty. 840 * For all non EOP bufs, os_buf is NULL. 841 */ 842 while (!buf->os_buf && (buf->next != wq->to_clean)) { 843 enic_free_wq_buf(wq, buf); 844 wq->ring.desc_avail++; 845 buf = buf->prev; 846 } 847 wq->to_use = buf->next; 848 dev_kfree_skb(skb); 849 } 850 } 851 852 /* netif_tx_lock held, process context with BHs disabled, or BH */ 853 static netdev_tx_t enic_hard_start_xmit(struct sk_buff *skb, 854 struct net_device *netdev) 855 { 856 struct enic *enic = netdev_priv(netdev); 857 struct vnic_wq *wq; 858 unsigned int txq_map; 859 struct netdev_queue *txq; 860 861 if (skb->len <= 0) { 862 dev_kfree_skb_any(skb); 863 return NETDEV_TX_OK; 864 } 865 866 txq_map = skb_get_queue_mapping(skb) % enic->wq_count; 867 wq = &enic->wq[txq_map]; 868 txq = netdev_get_tx_queue(netdev, txq_map); 869 870 /* Non-TSO sends must fit within ENIC_NON_TSO_MAX_DESC descs, 871 * which is very likely. In the off chance it's going to take 872 * more than * ENIC_NON_TSO_MAX_DESC, linearize the skb. 873 */ 874 875 if (skb_shinfo(skb)->gso_size == 0 && 876 skb_shinfo(skb)->nr_frags + 1 > ENIC_NON_TSO_MAX_DESC && 877 skb_linearize(skb)) { 878 dev_kfree_skb_any(skb); 879 return NETDEV_TX_OK; 880 } 881 882 spin_lock(&enic->wq_lock[txq_map]); 883 884 if (vnic_wq_desc_avail(wq) < 885 skb_shinfo(skb)->nr_frags + ENIC_DESC_MAX_SPLITS) { 886 netif_tx_stop_queue(txq); 887 /* This is a hard error, log it */ 888 netdev_err(netdev, "BUG! Tx ring full when queue awake!\n"); 889 spin_unlock(&enic->wq_lock[txq_map]); 890 return NETDEV_TX_BUSY; 891 } 892 893 enic_queue_wq_skb(enic, wq, skb); 894 895 if (vnic_wq_desc_avail(wq) < MAX_SKB_FRAGS + ENIC_DESC_MAX_SPLITS) 896 netif_tx_stop_queue(txq); 897 skb_tx_timestamp(skb); 898 if (!netdev_xmit_more() || netif_xmit_stopped(txq)) 899 vnic_wq_doorbell(wq); 900 901 spin_unlock(&enic->wq_lock[txq_map]); 902 903 return NETDEV_TX_OK; 904 } 905 906 /* dev_base_lock rwlock held, nominally process context */ 907 static void enic_get_stats(struct net_device *netdev, 908 struct rtnl_link_stats64 *net_stats) 909 { 910 struct enic *enic = netdev_priv(netdev); 911 struct vnic_stats *stats; 912 int err; 913 914 err = enic_dev_stats_dump(enic, &stats); 915 /* return only when pci_zalloc_consistent fails in vnic_dev_stats_dump 916 * For other failures, like devcmd failure, we return previously 917 * recorded stats. 918 */ 919 if (err == -ENOMEM) 920 return; 921 922 net_stats->tx_packets = stats->tx.tx_frames_ok; 923 net_stats->tx_bytes = stats->tx.tx_bytes_ok; 924 net_stats->tx_errors = stats->tx.tx_errors; 925 net_stats->tx_dropped = stats->tx.tx_drops; 926 927 net_stats->rx_packets = stats->rx.rx_frames_ok; 928 net_stats->rx_bytes = stats->rx.rx_bytes_ok; 929 net_stats->rx_errors = stats->rx.rx_errors; 930 net_stats->multicast = stats->rx.rx_multicast_frames_ok; 931 net_stats->rx_over_errors = enic->rq_truncated_pkts; 932 net_stats->rx_crc_errors = enic->rq_bad_fcs; 933 net_stats->rx_dropped = stats->rx.rx_no_bufs + stats->rx.rx_drop; 934 } 935 936 static int enic_mc_sync(struct net_device *netdev, const u8 *mc_addr) 937 { 938 struct enic *enic = netdev_priv(netdev); 939 940 if (enic->mc_count == ENIC_MULTICAST_PERFECT_FILTERS) { 941 unsigned int mc_count = netdev_mc_count(netdev); 942 943 netdev_warn(netdev, "Registering only %d out of %d multicast addresses\n", 944 ENIC_MULTICAST_PERFECT_FILTERS, mc_count); 945 946 return -ENOSPC; 947 } 948 949 enic_dev_add_addr(enic, mc_addr); 950 enic->mc_count++; 951 952 return 0; 953 } 954 955 static int enic_mc_unsync(struct net_device *netdev, const u8 *mc_addr) 956 { 957 struct enic *enic = netdev_priv(netdev); 958 959 enic_dev_del_addr(enic, mc_addr); 960 enic->mc_count--; 961 962 return 0; 963 } 964 965 static int enic_uc_sync(struct net_device *netdev, const u8 *uc_addr) 966 { 967 struct enic *enic = netdev_priv(netdev); 968 969 if (enic->uc_count == ENIC_UNICAST_PERFECT_FILTERS) { 970 unsigned int uc_count = netdev_uc_count(netdev); 971 972 netdev_warn(netdev, "Registering only %d out of %d unicast addresses\n", 973 ENIC_UNICAST_PERFECT_FILTERS, uc_count); 974 975 return -ENOSPC; 976 } 977 978 enic_dev_add_addr(enic, uc_addr); 979 enic->uc_count++; 980 981 return 0; 982 } 983 984 static int enic_uc_unsync(struct net_device *netdev, const u8 *uc_addr) 985 { 986 struct enic *enic = netdev_priv(netdev); 987 988 enic_dev_del_addr(enic, uc_addr); 989 enic->uc_count--; 990 991 return 0; 992 } 993 994 void enic_reset_addr_lists(struct enic *enic) 995 { 996 struct net_device *netdev = enic->netdev; 997 998 __dev_uc_unsync(netdev, NULL); 999 __dev_mc_unsync(netdev, NULL); 1000 1001 enic->mc_count = 0; 1002 enic->uc_count = 0; 1003 enic->flags = 0; 1004 } 1005 1006 static int enic_set_mac_addr(struct net_device *netdev, char *addr) 1007 { 1008 struct enic *enic = netdev_priv(netdev); 1009 1010 if (enic_is_dynamic(enic) || enic_is_sriov_vf(enic)) { 1011 if (!is_valid_ether_addr(addr) && !is_zero_ether_addr(addr)) 1012 return -EADDRNOTAVAIL; 1013 } else { 1014 if (!is_valid_ether_addr(addr)) 1015 return -EADDRNOTAVAIL; 1016 } 1017 1018 memcpy(netdev->dev_addr, addr, netdev->addr_len); 1019 1020 return 0; 1021 } 1022 1023 static int enic_set_mac_address_dynamic(struct net_device *netdev, void *p) 1024 { 1025 struct enic *enic = netdev_priv(netdev); 1026 struct sockaddr *saddr = p; 1027 char *addr = saddr->sa_data; 1028 int err; 1029 1030 if (netif_running(enic->netdev)) { 1031 err = enic_dev_del_station_addr(enic); 1032 if (err) 1033 return err; 1034 } 1035 1036 err = enic_set_mac_addr(netdev, addr); 1037 if (err) 1038 return err; 1039 1040 if (netif_running(enic->netdev)) { 1041 err = enic_dev_add_station_addr(enic); 1042 if (err) 1043 return err; 1044 } 1045 1046 return err; 1047 } 1048 1049 static int enic_set_mac_address(struct net_device *netdev, void *p) 1050 { 1051 struct sockaddr *saddr = p; 1052 char *addr = saddr->sa_data; 1053 struct enic *enic = netdev_priv(netdev); 1054 int err; 1055 1056 err = enic_dev_del_station_addr(enic); 1057 if (err) 1058 return err; 1059 1060 err = enic_set_mac_addr(netdev, addr); 1061 if (err) 1062 return err; 1063 1064 return enic_dev_add_station_addr(enic); 1065 } 1066 1067 /* netif_tx_lock held, BHs disabled */ 1068 static void enic_set_rx_mode(struct net_device *netdev) 1069 { 1070 struct enic *enic = netdev_priv(netdev); 1071 int directed = 1; 1072 int multicast = (netdev->flags & IFF_MULTICAST) ? 1 : 0; 1073 int broadcast = (netdev->flags & IFF_BROADCAST) ? 1 : 0; 1074 int promisc = (netdev->flags & IFF_PROMISC) || 1075 netdev_uc_count(netdev) > ENIC_UNICAST_PERFECT_FILTERS; 1076 int allmulti = (netdev->flags & IFF_ALLMULTI) || 1077 netdev_mc_count(netdev) > ENIC_MULTICAST_PERFECT_FILTERS; 1078 unsigned int flags = netdev->flags | 1079 (allmulti ? IFF_ALLMULTI : 0) | 1080 (promisc ? IFF_PROMISC : 0); 1081 1082 if (enic->flags != flags) { 1083 enic->flags = flags; 1084 enic_dev_packet_filter(enic, directed, 1085 multicast, broadcast, promisc, allmulti); 1086 } 1087 1088 if (!promisc) { 1089 __dev_uc_sync(netdev, enic_uc_sync, enic_uc_unsync); 1090 if (!allmulti) 1091 __dev_mc_sync(netdev, enic_mc_sync, enic_mc_unsync); 1092 } 1093 } 1094 1095 /* netif_tx_lock held, BHs disabled */ 1096 static void enic_tx_timeout(struct net_device *netdev, unsigned int txqueue) 1097 { 1098 struct enic *enic = netdev_priv(netdev); 1099 schedule_work(&enic->tx_hang_reset); 1100 } 1101 1102 static int enic_set_vf_mac(struct net_device *netdev, int vf, u8 *mac) 1103 { 1104 struct enic *enic = netdev_priv(netdev); 1105 struct enic_port_profile *pp; 1106 int err; 1107 1108 ENIC_PP_BY_INDEX(enic, vf, pp, &err); 1109 if (err) 1110 return err; 1111 1112 if (is_valid_ether_addr(mac) || is_zero_ether_addr(mac)) { 1113 if (vf == PORT_SELF_VF) { 1114 memcpy(pp->vf_mac, mac, ETH_ALEN); 1115 return 0; 1116 } else { 1117 /* 1118 * For sriov vf's set the mac in hw 1119 */ 1120 ENIC_DEVCMD_PROXY_BY_INDEX(vf, err, enic, 1121 vnic_dev_set_mac_addr, mac); 1122 return enic_dev_status_to_errno(err); 1123 } 1124 } else 1125 return -EINVAL; 1126 } 1127 1128 static int enic_set_vf_port(struct net_device *netdev, int vf, 1129 struct nlattr *port[]) 1130 { 1131 struct enic *enic = netdev_priv(netdev); 1132 struct enic_port_profile prev_pp; 1133 struct enic_port_profile *pp; 1134 int err = 0, restore_pp = 1; 1135 1136 ENIC_PP_BY_INDEX(enic, vf, pp, &err); 1137 if (err) 1138 return err; 1139 1140 if (!port[IFLA_PORT_REQUEST]) 1141 return -EOPNOTSUPP; 1142 1143 memcpy(&prev_pp, pp, sizeof(*enic->pp)); 1144 memset(pp, 0, sizeof(*enic->pp)); 1145 1146 pp->set |= ENIC_SET_REQUEST; 1147 pp->request = nla_get_u8(port[IFLA_PORT_REQUEST]); 1148 1149 if (port[IFLA_PORT_PROFILE]) { 1150 pp->set |= ENIC_SET_NAME; 1151 memcpy(pp->name, nla_data(port[IFLA_PORT_PROFILE]), 1152 PORT_PROFILE_MAX); 1153 } 1154 1155 if (port[IFLA_PORT_INSTANCE_UUID]) { 1156 pp->set |= ENIC_SET_INSTANCE; 1157 memcpy(pp->instance_uuid, 1158 nla_data(port[IFLA_PORT_INSTANCE_UUID]), PORT_UUID_MAX); 1159 } 1160 1161 if (port[IFLA_PORT_HOST_UUID]) { 1162 pp->set |= ENIC_SET_HOST; 1163 memcpy(pp->host_uuid, 1164 nla_data(port[IFLA_PORT_HOST_UUID]), PORT_UUID_MAX); 1165 } 1166 1167 if (vf == PORT_SELF_VF) { 1168 /* Special case handling: mac came from IFLA_VF_MAC */ 1169 if (!is_zero_ether_addr(prev_pp.vf_mac)) 1170 memcpy(pp->mac_addr, prev_pp.vf_mac, ETH_ALEN); 1171 1172 if (is_zero_ether_addr(netdev->dev_addr)) 1173 eth_hw_addr_random(netdev); 1174 } else { 1175 /* SR-IOV VF: get mac from adapter */ 1176 ENIC_DEVCMD_PROXY_BY_INDEX(vf, err, enic, 1177 vnic_dev_get_mac_addr, pp->mac_addr); 1178 if (err) { 1179 netdev_err(netdev, "Error getting mac for vf %d\n", vf); 1180 memcpy(pp, &prev_pp, sizeof(*pp)); 1181 return enic_dev_status_to_errno(err); 1182 } 1183 } 1184 1185 err = enic_process_set_pp_request(enic, vf, &prev_pp, &restore_pp); 1186 if (err) { 1187 if (restore_pp) { 1188 /* Things are still the way they were: Implicit 1189 * DISASSOCIATE failed 1190 */ 1191 memcpy(pp, &prev_pp, sizeof(*pp)); 1192 } else { 1193 memset(pp, 0, sizeof(*pp)); 1194 if (vf == PORT_SELF_VF) 1195 eth_zero_addr(netdev->dev_addr); 1196 } 1197 } else { 1198 /* Set flag to indicate that the port assoc/disassoc 1199 * request has been sent out to fw 1200 */ 1201 pp->set |= ENIC_PORT_REQUEST_APPLIED; 1202 1203 /* If DISASSOCIATE, clean up all assigned/saved macaddresses */ 1204 if (pp->request == PORT_REQUEST_DISASSOCIATE) { 1205 eth_zero_addr(pp->mac_addr); 1206 if (vf == PORT_SELF_VF) 1207 eth_zero_addr(netdev->dev_addr); 1208 } 1209 } 1210 1211 if (vf == PORT_SELF_VF) 1212 eth_zero_addr(pp->vf_mac); 1213 1214 return err; 1215 } 1216 1217 static int enic_get_vf_port(struct net_device *netdev, int vf, 1218 struct sk_buff *skb) 1219 { 1220 struct enic *enic = netdev_priv(netdev); 1221 u16 response = PORT_PROFILE_RESPONSE_SUCCESS; 1222 struct enic_port_profile *pp; 1223 int err; 1224 1225 ENIC_PP_BY_INDEX(enic, vf, pp, &err); 1226 if (err) 1227 return err; 1228 1229 if (!(pp->set & ENIC_PORT_REQUEST_APPLIED)) 1230 return -ENODATA; 1231 1232 err = enic_process_get_pp_request(enic, vf, pp->request, &response); 1233 if (err) 1234 return err; 1235 1236 if (nla_put_u16(skb, IFLA_PORT_REQUEST, pp->request) || 1237 nla_put_u16(skb, IFLA_PORT_RESPONSE, response) || 1238 ((pp->set & ENIC_SET_NAME) && 1239 nla_put(skb, IFLA_PORT_PROFILE, PORT_PROFILE_MAX, pp->name)) || 1240 ((pp->set & ENIC_SET_INSTANCE) && 1241 nla_put(skb, IFLA_PORT_INSTANCE_UUID, PORT_UUID_MAX, 1242 pp->instance_uuid)) || 1243 ((pp->set & ENIC_SET_HOST) && 1244 nla_put(skb, IFLA_PORT_HOST_UUID, PORT_UUID_MAX, pp->host_uuid))) 1245 goto nla_put_failure; 1246 return 0; 1247 1248 nla_put_failure: 1249 return -EMSGSIZE; 1250 } 1251 1252 static void enic_free_rq_buf(struct vnic_rq *rq, struct vnic_rq_buf *buf) 1253 { 1254 struct enic *enic = vnic_dev_priv(rq->vdev); 1255 1256 if (!buf->os_buf) 1257 return; 1258 1259 pci_unmap_single(enic->pdev, buf->dma_addr, 1260 buf->len, PCI_DMA_FROMDEVICE); 1261 dev_kfree_skb_any(buf->os_buf); 1262 buf->os_buf = NULL; 1263 } 1264 1265 static int enic_rq_alloc_buf(struct vnic_rq *rq) 1266 { 1267 struct enic *enic = vnic_dev_priv(rq->vdev); 1268 struct net_device *netdev = enic->netdev; 1269 struct sk_buff *skb; 1270 unsigned int len = netdev->mtu + VLAN_ETH_HLEN; 1271 unsigned int os_buf_index = 0; 1272 dma_addr_t dma_addr; 1273 struct vnic_rq_buf *buf = rq->to_use; 1274 1275 if (buf->os_buf) { 1276 enic_queue_rq_desc(rq, buf->os_buf, os_buf_index, buf->dma_addr, 1277 buf->len); 1278 1279 return 0; 1280 } 1281 skb = netdev_alloc_skb_ip_align(netdev, len); 1282 if (!skb) 1283 return -ENOMEM; 1284 1285 dma_addr = pci_map_single(enic->pdev, skb->data, len, 1286 PCI_DMA_FROMDEVICE); 1287 if (unlikely(enic_dma_map_check(enic, dma_addr))) { 1288 dev_kfree_skb(skb); 1289 return -ENOMEM; 1290 } 1291 1292 enic_queue_rq_desc(rq, skb, os_buf_index, 1293 dma_addr, len); 1294 1295 return 0; 1296 } 1297 1298 static void enic_intr_update_pkt_size(struct vnic_rx_bytes_counter *pkt_size, 1299 u32 pkt_len) 1300 { 1301 if (ENIC_LARGE_PKT_THRESHOLD <= pkt_len) 1302 pkt_size->large_pkt_bytes_cnt += pkt_len; 1303 else 1304 pkt_size->small_pkt_bytes_cnt += pkt_len; 1305 } 1306 1307 static bool enic_rxcopybreak(struct net_device *netdev, struct sk_buff **skb, 1308 struct vnic_rq_buf *buf, u16 len) 1309 { 1310 struct enic *enic = netdev_priv(netdev); 1311 struct sk_buff *new_skb; 1312 1313 if (len > enic->rx_copybreak) 1314 return false; 1315 new_skb = netdev_alloc_skb_ip_align(netdev, len); 1316 if (!new_skb) 1317 return false; 1318 pci_dma_sync_single_for_cpu(enic->pdev, buf->dma_addr, len, 1319 DMA_FROM_DEVICE); 1320 memcpy(new_skb->data, (*skb)->data, len); 1321 *skb = new_skb; 1322 1323 return true; 1324 } 1325 1326 static void enic_rq_indicate_buf(struct vnic_rq *rq, 1327 struct cq_desc *cq_desc, struct vnic_rq_buf *buf, 1328 int skipped, void *opaque) 1329 { 1330 struct enic *enic = vnic_dev_priv(rq->vdev); 1331 struct net_device *netdev = enic->netdev; 1332 struct sk_buff *skb; 1333 struct vnic_cq *cq = &enic->cq[enic_cq_rq(enic, rq->index)]; 1334 1335 u8 type, color, eop, sop, ingress_port, vlan_stripped; 1336 u8 fcoe, fcoe_sof, fcoe_fc_crc_ok, fcoe_enc_error, fcoe_eof; 1337 u8 tcp_udp_csum_ok, udp, tcp, ipv4_csum_ok; 1338 u8 ipv6, ipv4, ipv4_fragment, fcs_ok, rss_type, csum_not_calc; 1339 u8 packet_error; 1340 u16 q_number, completed_index, bytes_written, vlan_tci, checksum; 1341 u32 rss_hash; 1342 bool outer_csum_ok = true, encap = false; 1343 1344 if (skipped) 1345 return; 1346 1347 skb = buf->os_buf; 1348 1349 cq_enet_rq_desc_dec((struct cq_enet_rq_desc *)cq_desc, 1350 &type, &color, &q_number, &completed_index, 1351 &ingress_port, &fcoe, &eop, &sop, &rss_type, 1352 &csum_not_calc, &rss_hash, &bytes_written, 1353 &packet_error, &vlan_stripped, &vlan_tci, &checksum, 1354 &fcoe_sof, &fcoe_fc_crc_ok, &fcoe_enc_error, 1355 &fcoe_eof, &tcp_udp_csum_ok, &udp, &tcp, 1356 &ipv4_csum_ok, &ipv6, &ipv4, &ipv4_fragment, 1357 &fcs_ok); 1358 1359 if (packet_error) { 1360 1361 if (!fcs_ok) { 1362 if (bytes_written > 0) 1363 enic->rq_bad_fcs++; 1364 else if (bytes_written == 0) 1365 enic->rq_truncated_pkts++; 1366 } 1367 1368 pci_unmap_single(enic->pdev, buf->dma_addr, buf->len, 1369 PCI_DMA_FROMDEVICE); 1370 dev_kfree_skb_any(skb); 1371 buf->os_buf = NULL; 1372 1373 return; 1374 } 1375 1376 if (eop && bytes_written > 0) { 1377 1378 /* Good receive 1379 */ 1380 1381 if (!enic_rxcopybreak(netdev, &skb, buf, bytes_written)) { 1382 buf->os_buf = NULL; 1383 pci_unmap_single(enic->pdev, buf->dma_addr, buf->len, 1384 PCI_DMA_FROMDEVICE); 1385 } 1386 prefetch(skb->data - NET_IP_ALIGN); 1387 1388 skb_put(skb, bytes_written); 1389 skb->protocol = eth_type_trans(skb, netdev); 1390 skb_record_rx_queue(skb, q_number); 1391 if ((netdev->features & NETIF_F_RXHASH) && rss_hash && 1392 (type == 3)) { 1393 switch (rss_type) { 1394 case CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv4: 1395 case CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv6: 1396 case CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv6_EX: 1397 skb_set_hash(skb, rss_hash, PKT_HASH_TYPE_L4); 1398 break; 1399 case CQ_ENET_RQ_DESC_RSS_TYPE_IPv4: 1400 case CQ_ENET_RQ_DESC_RSS_TYPE_IPv6: 1401 case CQ_ENET_RQ_DESC_RSS_TYPE_IPv6_EX: 1402 skb_set_hash(skb, rss_hash, PKT_HASH_TYPE_L3); 1403 break; 1404 } 1405 } 1406 if (enic->vxlan.vxlan_udp_port_number) { 1407 switch (enic->vxlan.patch_level) { 1408 case 0: 1409 if (fcoe) { 1410 encap = true; 1411 outer_csum_ok = fcoe_fc_crc_ok; 1412 } 1413 break; 1414 case 2: 1415 if ((type == 7) && 1416 (rss_hash & BIT(0))) { 1417 encap = true; 1418 outer_csum_ok = (rss_hash & BIT(1)) && 1419 (rss_hash & BIT(2)); 1420 } 1421 break; 1422 } 1423 } 1424 1425 /* Hardware does not provide whole packet checksum. It only 1426 * provides pseudo checksum. Since hw validates the packet 1427 * checksum but not provide us the checksum value. use 1428 * CHECSUM_UNNECESSARY. 1429 * 1430 * In case of encap pkt tcp_udp_csum_ok/tcp_udp_csum_ok is 1431 * inner csum_ok. outer_csum_ok is set by hw when outer udp 1432 * csum is correct or is zero. 1433 */ 1434 if ((netdev->features & NETIF_F_RXCSUM) && !csum_not_calc && 1435 tcp_udp_csum_ok && outer_csum_ok && 1436 (ipv4_csum_ok || ipv6)) { 1437 skb->ip_summed = CHECKSUM_UNNECESSARY; 1438 skb->csum_level = encap; 1439 } 1440 1441 if (vlan_stripped) 1442 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci); 1443 1444 skb_mark_napi_id(skb, &enic->napi[rq->index]); 1445 if (!(netdev->features & NETIF_F_GRO)) 1446 netif_receive_skb(skb); 1447 else 1448 napi_gro_receive(&enic->napi[q_number], skb); 1449 if (enic->rx_coalesce_setting.use_adaptive_rx_coalesce) 1450 enic_intr_update_pkt_size(&cq->pkt_size_counter, 1451 bytes_written); 1452 } else { 1453 1454 /* Buffer overflow 1455 */ 1456 1457 pci_unmap_single(enic->pdev, buf->dma_addr, buf->len, 1458 PCI_DMA_FROMDEVICE); 1459 dev_kfree_skb_any(skb); 1460 buf->os_buf = NULL; 1461 } 1462 } 1463 1464 static int enic_rq_service(struct vnic_dev *vdev, struct cq_desc *cq_desc, 1465 u8 type, u16 q_number, u16 completed_index, void *opaque) 1466 { 1467 struct enic *enic = vnic_dev_priv(vdev); 1468 1469 vnic_rq_service(&enic->rq[q_number], cq_desc, 1470 completed_index, VNIC_RQ_RETURN_DESC, 1471 enic_rq_indicate_buf, opaque); 1472 1473 return 0; 1474 } 1475 1476 static void enic_set_int_moderation(struct enic *enic, struct vnic_rq *rq) 1477 { 1478 unsigned int intr = enic_msix_rq_intr(enic, rq->index); 1479 struct vnic_cq *cq = &enic->cq[enic_cq_rq(enic, rq->index)]; 1480 u32 timer = cq->tobe_rx_coal_timeval; 1481 1482 if (cq->tobe_rx_coal_timeval != cq->cur_rx_coal_timeval) { 1483 vnic_intr_coalescing_timer_set(&enic->intr[intr], timer); 1484 cq->cur_rx_coal_timeval = cq->tobe_rx_coal_timeval; 1485 } 1486 } 1487 1488 static void enic_calc_int_moderation(struct enic *enic, struct vnic_rq *rq) 1489 { 1490 struct enic_rx_coal *rx_coal = &enic->rx_coalesce_setting; 1491 struct vnic_cq *cq = &enic->cq[enic_cq_rq(enic, rq->index)]; 1492 struct vnic_rx_bytes_counter *pkt_size_counter = &cq->pkt_size_counter; 1493 int index; 1494 u32 timer; 1495 u32 range_start; 1496 u32 traffic; 1497 u64 delta; 1498 ktime_t now = ktime_get(); 1499 1500 delta = ktime_us_delta(now, cq->prev_ts); 1501 if (delta < ENIC_AIC_TS_BREAK) 1502 return; 1503 cq->prev_ts = now; 1504 1505 traffic = pkt_size_counter->large_pkt_bytes_cnt + 1506 pkt_size_counter->small_pkt_bytes_cnt; 1507 /* The table takes Mbps 1508 * traffic *= 8 => bits 1509 * traffic *= (10^6 / delta) => bps 1510 * traffic /= 10^6 => Mbps 1511 * 1512 * Combining, traffic *= (8 / delta) 1513 */ 1514 1515 traffic <<= 3; 1516 traffic = delta > UINT_MAX ? 0 : traffic / (u32)delta; 1517 1518 for (index = 0; index < ENIC_MAX_COALESCE_TIMERS; index++) 1519 if (traffic < mod_table[index].rx_rate) 1520 break; 1521 range_start = (pkt_size_counter->small_pkt_bytes_cnt > 1522 pkt_size_counter->large_pkt_bytes_cnt << 1) ? 1523 rx_coal->small_pkt_range_start : 1524 rx_coal->large_pkt_range_start; 1525 timer = range_start + ((rx_coal->range_end - range_start) * 1526 mod_table[index].range_percent / 100); 1527 /* Damping */ 1528 cq->tobe_rx_coal_timeval = (timer + cq->tobe_rx_coal_timeval) >> 1; 1529 1530 pkt_size_counter->large_pkt_bytes_cnt = 0; 1531 pkt_size_counter->small_pkt_bytes_cnt = 0; 1532 } 1533 1534 static int enic_poll(struct napi_struct *napi, int budget) 1535 { 1536 struct net_device *netdev = napi->dev; 1537 struct enic *enic = netdev_priv(netdev); 1538 unsigned int cq_rq = enic_cq_rq(enic, 0); 1539 unsigned int cq_wq = enic_cq_wq(enic, 0); 1540 unsigned int intr = enic_legacy_io_intr(); 1541 unsigned int rq_work_to_do = budget; 1542 unsigned int wq_work_to_do = ENIC_WQ_NAPI_BUDGET; 1543 unsigned int work_done, rq_work_done = 0, wq_work_done; 1544 int err; 1545 1546 wq_work_done = vnic_cq_service(&enic->cq[cq_wq], wq_work_to_do, 1547 enic_wq_service, NULL); 1548 1549 if (budget > 0) 1550 rq_work_done = vnic_cq_service(&enic->cq[cq_rq], 1551 rq_work_to_do, enic_rq_service, NULL); 1552 1553 /* Accumulate intr event credits for this polling 1554 * cycle. An intr event is the completion of a 1555 * a WQ or RQ packet. 1556 */ 1557 1558 work_done = rq_work_done + wq_work_done; 1559 1560 if (work_done > 0) 1561 vnic_intr_return_credits(&enic->intr[intr], 1562 work_done, 1563 0 /* don't unmask intr */, 1564 0 /* don't reset intr timer */); 1565 1566 err = vnic_rq_fill(&enic->rq[0], enic_rq_alloc_buf); 1567 1568 /* Buffer allocation failed. Stay in polling 1569 * mode so we can try to fill the ring again. 1570 */ 1571 1572 if (err) 1573 rq_work_done = rq_work_to_do; 1574 if (enic->rx_coalesce_setting.use_adaptive_rx_coalesce) 1575 /* Call the function which refreshes the intr coalescing timer 1576 * value based on the traffic. 1577 */ 1578 enic_calc_int_moderation(enic, &enic->rq[0]); 1579 1580 if ((rq_work_done < budget) && napi_complete_done(napi, rq_work_done)) { 1581 1582 /* Some work done, but not enough to stay in polling, 1583 * exit polling 1584 */ 1585 1586 if (enic->rx_coalesce_setting.use_adaptive_rx_coalesce) 1587 enic_set_int_moderation(enic, &enic->rq[0]); 1588 vnic_intr_unmask(&enic->intr[intr]); 1589 } 1590 1591 return rq_work_done; 1592 } 1593 1594 #ifdef CONFIG_RFS_ACCEL 1595 static void enic_free_rx_cpu_rmap(struct enic *enic) 1596 { 1597 free_irq_cpu_rmap(enic->netdev->rx_cpu_rmap); 1598 enic->netdev->rx_cpu_rmap = NULL; 1599 } 1600 1601 static void enic_set_rx_cpu_rmap(struct enic *enic) 1602 { 1603 int i, res; 1604 1605 if (vnic_dev_get_intr_mode(enic->vdev) == VNIC_DEV_INTR_MODE_MSIX) { 1606 enic->netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(enic->rq_count); 1607 if (unlikely(!enic->netdev->rx_cpu_rmap)) 1608 return; 1609 for (i = 0; i < enic->rq_count; i++) { 1610 res = irq_cpu_rmap_add(enic->netdev->rx_cpu_rmap, 1611 enic->msix_entry[i].vector); 1612 if (unlikely(res)) { 1613 enic_free_rx_cpu_rmap(enic); 1614 return; 1615 } 1616 } 1617 } 1618 } 1619 1620 #else 1621 1622 static void enic_free_rx_cpu_rmap(struct enic *enic) 1623 { 1624 } 1625 1626 static void enic_set_rx_cpu_rmap(struct enic *enic) 1627 { 1628 } 1629 1630 #endif /* CONFIG_RFS_ACCEL */ 1631 1632 static int enic_poll_msix_wq(struct napi_struct *napi, int budget) 1633 { 1634 struct net_device *netdev = napi->dev; 1635 struct enic *enic = netdev_priv(netdev); 1636 unsigned int wq_index = (napi - &enic->napi[0]) - enic->rq_count; 1637 struct vnic_wq *wq = &enic->wq[wq_index]; 1638 unsigned int cq; 1639 unsigned int intr; 1640 unsigned int wq_work_to_do = ENIC_WQ_NAPI_BUDGET; 1641 unsigned int wq_work_done; 1642 unsigned int wq_irq; 1643 1644 wq_irq = wq->index; 1645 cq = enic_cq_wq(enic, wq_irq); 1646 intr = enic_msix_wq_intr(enic, wq_irq); 1647 wq_work_done = vnic_cq_service(&enic->cq[cq], wq_work_to_do, 1648 enic_wq_service, NULL); 1649 1650 vnic_intr_return_credits(&enic->intr[intr], wq_work_done, 1651 0 /* don't unmask intr */, 1652 1 /* reset intr timer */); 1653 if (!wq_work_done) { 1654 napi_complete(napi); 1655 vnic_intr_unmask(&enic->intr[intr]); 1656 return 0; 1657 } 1658 1659 return budget; 1660 } 1661 1662 static int enic_poll_msix_rq(struct napi_struct *napi, int budget) 1663 { 1664 struct net_device *netdev = napi->dev; 1665 struct enic *enic = netdev_priv(netdev); 1666 unsigned int rq = (napi - &enic->napi[0]); 1667 unsigned int cq = enic_cq_rq(enic, rq); 1668 unsigned int intr = enic_msix_rq_intr(enic, rq); 1669 unsigned int work_to_do = budget; 1670 unsigned int work_done = 0; 1671 int err; 1672 1673 /* Service RQ 1674 */ 1675 1676 if (budget > 0) 1677 work_done = vnic_cq_service(&enic->cq[cq], 1678 work_to_do, enic_rq_service, NULL); 1679 1680 /* Return intr event credits for this polling 1681 * cycle. An intr event is the completion of a 1682 * RQ packet. 1683 */ 1684 1685 if (work_done > 0) 1686 vnic_intr_return_credits(&enic->intr[intr], 1687 work_done, 1688 0 /* don't unmask intr */, 1689 0 /* don't reset intr timer */); 1690 1691 err = vnic_rq_fill(&enic->rq[rq], enic_rq_alloc_buf); 1692 1693 /* Buffer allocation failed. Stay in polling mode 1694 * so we can try to fill the ring again. 1695 */ 1696 1697 if (err) 1698 work_done = work_to_do; 1699 if (enic->rx_coalesce_setting.use_adaptive_rx_coalesce) 1700 /* Call the function which refreshes the intr coalescing timer 1701 * value based on the traffic. 1702 */ 1703 enic_calc_int_moderation(enic, &enic->rq[rq]); 1704 1705 if ((work_done < budget) && napi_complete_done(napi, work_done)) { 1706 1707 /* Some work done, but not enough to stay in polling, 1708 * exit polling 1709 */ 1710 1711 if (enic->rx_coalesce_setting.use_adaptive_rx_coalesce) 1712 enic_set_int_moderation(enic, &enic->rq[rq]); 1713 vnic_intr_unmask(&enic->intr[intr]); 1714 } 1715 1716 return work_done; 1717 } 1718 1719 static void enic_notify_timer(struct timer_list *t) 1720 { 1721 struct enic *enic = from_timer(enic, t, notify_timer); 1722 1723 enic_notify_check(enic); 1724 1725 mod_timer(&enic->notify_timer, 1726 round_jiffies(jiffies + ENIC_NOTIFY_TIMER_PERIOD)); 1727 } 1728 1729 static void enic_free_intr(struct enic *enic) 1730 { 1731 struct net_device *netdev = enic->netdev; 1732 unsigned int i; 1733 1734 enic_free_rx_cpu_rmap(enic); 1735 switch (vnic_dev_get_intr_mode(enic->vdev)) { 1736 case VNIC_DEV_INTR_MODE_INTX: 1737 free_irq(enic->pdev->irq, netdev); 1738 break; 1739 case VNIC_DEV_INTR_MODE_MSI: 1740 free_irq(enic->pdev->irq, enic); 1741 break; 1742 case VNIC_DEV_INTR_MODE_MSIX: 1743 for (i = 0; i < ARRAY_SIZE(enic->msix); i++) 1744 if (enic->msix[i].requested) 1745 free_irq(enic->msix_entry[i].vector, 1746 enic->msix[i].devid); 1747 break; 1748 default: 1749 break; 1750 } 1751 } 1752 1753 static int enic_request_intr(struct enic *enic) 1754 { 1755 struct net_device *netdev = enic->netdev; 1756 unsigned int i, intr; 1757 int err = 0; 1758 1759 enic_set_rx_cpu_rmap(enic); 1760 switch (vnic_dev_get_intr_mode(enic->vdev)) { 1761 1762 case VNIC_DEV_INTR_MODE_INTX: 1763 1764 err = request_irq(enic->pdev->irq, enic_isr_legacy, 1765 IRQF_SHARED, netdev->name, netdev); 1766 break; 1767 1768 case VNIC_DEV_INTR_MODE_MSI: 1769 1770 err = request_irq(enic->pdev->irq, enic_isr_msi, 1771 0, netdev->name, enic); 1772 break; 1773 1774 case VNIC_DEV_INTR_MODE_MSIX: 1775 1776 for (i = 0; i < enic->rq_count; i++) { 1777 intr = enic_msix_rq_intr(enic, i); 1778 snprintf(enic->msix[intr].devname, 1779 sizeof(enic->msix[intr].devname), 1780 "%s-rx-%u", netdev->name, i); 1781 enic->msix[intr].isr = enic_isr_msix; 1782 enic->msix[intr].devid = &enic->napi[i]; 1783 } 1784 1785 for (i = 0; i < enic->wq_count; i++) { 1786 int wq = enic_cq_wq(enic, i); 1787 1788 intr = enic_msix_wq_intr(enic, i); 1789 snprintf(enic->msix[intr].devname, 1790 sizeof(enic->msix[intr].devname), 1791 "%s-tx-%u", netdev->name, i); 1792 enic->msix[intr].isr = enic_isr_msix; 1793 enic->msix[intr].devid = &enic->napi[wq]; 1794 } 1795 1796 intr = enic_msix_err_intr(enic); 1797 snprintf(enic->msix[intr].devname, 1798 sizeof(enic->msix[intr].devname), 1799 "%s-err", netdev->name); 1800 enic->msix[intr].isr = enic_isr_msix_err; 1801 enic->msix[intr].devid = enic; 1802 1803 intr = enic_msix_notify_intr(enic); 1804 snprintf(enic->msix[intr].devname, 1805 sizeof(enic->msix[intr].devname), 1806 "%s-notify", netdev->name); 1807 enic->msix[intr].isr = enic_isr_msix_notify; 1808 enic->msix[intr].devid = enic; 1809 1810 for (i = 0; i < ARRAY_SIZE(enic->msix); i++) 1811 enic->msix[i].requested = 0; 1812 1813 for (i = 0; i < enic->intr_count; i++) { 1814 err = request_irq(enic->msix_entry[i].vector, 1815 enic->msix[i].isr, 0, 1816 enic->msix[i].devname, 1817 enic->msix[i].devid); 1818 if (err) { 1819 enic_free_intr(enic); 1820 break; 1821 } 1822 enic->msix[i].requested = 1; 1823 } 1824 1825 break; 1826 1827 default: 1828 break; 1829 } 1830 1831 return err; 1832 } 1833 1834 static void enic_synchronize_irqs(struct enic *enic) 1835 { 1836 unsigned int i; 1837 1838 switch (vnic_dev_get_intr_mode(enic->vdev)) { 1839 case VNIC_DEV_INTR_MODE_INTX: 1840 case VNIC_DEV_INTR_MODE_MSI: 1841 synchronize_irq(enic->pdev->irq); 1842 break; 1843 case VNIC_DEV_INTR_MODE_MSIX: 1844 for (i = 0; i < enic->intr_count; i++) 1845 synchronize_irq(enic->msix_entry[i].vector); 1846 break; 1847 default: 1848 break; 1849 } 1850 } 1851 1852 static void enic_set_rx_coal_setting(struct enic *enic) 1853 { 1854 unsigned int speed; 1855 int index = -1; 1856 struct enic_rx_coal *rx_coal = &enic->rx_coalesce_setting; 1857 1858 /* 1. Read the link speed from fw 1859 * 2. Pick the default range for the speed 1860 * 3. Update it in enic->rx_coalesce_setting 1861 */ 1862 speed = vnic_dev_port_speed(enic->vdev); 1863 if (ENIC_LINK_SPEED_10G < speed) 1864 index = ENIC_LINK_40G_INDEX; 1865 else if (ENIC_LINK_SPEED_4G < speed) 1866 index = ENIC_LINK_10G_INDEX; 1867 else 1868 index = ENIC_LINK_4G_INDEX; 1869 1870 rx_coal->small_pkt_range_start = mod_range[index].small_pkt_range_start; 1871 rx_coal->large_pkt_range_start = mod_range[index].large_pkt_range_start; 1872 rx_coal->range_end = ENIC_RX_COALESCE_RANGE_END; 1873 1874 /* Start with the value provided by UCSM */ 1875 for (index = 0; index < enic->rq_count; index++) 1876 enic->cq[index].cur_rx_coal_timeval = 1877 enic->config.intr_timer_usec; 1878 1879 rx_coal->use_adaptive_rx_coalesce = 1; 1880 } 1881 1882 static int enic_dev_notify_set(struct enic *enic) 1883 { 1884 int err; 1885 1886 spin_lock_bh(&enic->devcmd_lock); 1887 switch (vnic_dev_get_intr_mode(enic->vdev)) { 1888 case VNIC_DEV_INTR_MODE_INTX: 1889 err = vnic_dev_notify_set(enic->vdev, 1890 enic_legacy_notify_intr()); 1891 break; 1892 case VNIC_DEV_INTR_MODE_MSIX: 1893 err = vnic_dev_notify_set(enic->vdev, 1894 enic_msix_notify_intr(enic)); 1895 break; 1896 default: 1897 err = vnic_dev_notify_set(enic->vdev, -1 /* no intr */); 1898 break; 1899 } 1900 spin_unlock_bh(&enic->devcmd_lock); 1901 1902 return err; 1903 } 1904 1905 static void enic_notify_timer_start(struct enic *enic) 1906 { 1907 switch (vnic_dev_get_intr_mode(enic->vdev)) { 1908 case VNIC_DEV_INTR_MODE_MSI: 1909 mod_timer(&enic->notify_timer, jiffies); 1910 break; 1911 default: 1912 /* Using intr for notification for INTx/MSI-X */ 1913 break; 1914 } 1915 } 1916 1917 /* rtnl lock is held, process context */ 1918 static int enic_open(struct net_device *netdev) 1919 { 1920 struct enic *enic = netdev_priv(netdev); 1921 unsigned int i; 1922 int err, ret; 1923 1924 err = enic_request_intr(enic); 1925 if (err) { 1926 netdev_err(netdev, "Unable to request irq.\n"); 1927 return err; 1928 } 1929 enic_init_affinity_hint(enic); 1930 enic_set_affinity_hint(enic); 1931 1932 err = enic_dev_notify_set(enic); 1933 if (err) { 1934 netdev_err(netdev, 1935 "Failed to alloc notify buffer, aborting.\n"); 1936 goto err_out_free_intr; 1937 } 1938 1939 for (i = 0; i < enic->rq_count; i++) { 1940 /* enable rq before updating rq desc */ 1941 vnic_rq_enable(&enic->rq[i]); 1942 vnic_rq_fill(&enic->rq[i], enic_rq_alloc_buf); 1943 /* Need at least one buffer on ring to get going */ 1944 if (vnic_rq_desc_used(&enic->rq[i]) == 0) { 1945 netdev_err(netdev, "Unable to alloc receive buffers\n"); 1946 err = -ENOMEM; 1947 goto err_out_free_rq; 1948 } 1949 } 1950 1951 for (i = 0; i < enic->wq_count; i++) 1952 vnic_wq_enable(&enic->wq[i]); 1953 1954 if (!enic_is_dynamic(enic) && !enic_is_sriov_vf(enic)) 1955 enic_dev_add_station_addr(enic); 1956 1957 enic_set_rx_mode(netdev); 1958 1959 netif_tx_wake_all_queues(netdev); 1960 1961 for (i = 0; i < enic->rq_count; i++) 1962 napi_enable(&enic->napi[i]); 1963 1964 if (vnic_dev_get_intr_mode(enic->vdev) == VNIC_DEV_INTR_MODE_MSIX) 1965 for (i = 0; i < enic->wq_count; i++) 1966 napi_enable(&enic->napi[enic_cq_wq(enic, i)]); 1967 enic_dev_enable(enic); 1968 1969 for (i = 0; i < enic->intr_count; i++) 1970 vnic_intr_unmask(&enic->intr[i]); 1971 1972 enic_notify_timer_start(enic); 1973 enic_rfs_timer_start(enic); 1974 1975 return 0; 1976 1977 err_out_free_rq: 1978 for (i = 0; i < enic->rq_count; i++) { 1979 ret = vnic_rq_disable(&enic->rq[i]); 1980 if (!ret) 1981 vnic_rq_clean(&enic->rq[i], enic_free_rq_buf); 1982 } 1983 enic_dev_notify_unset(enic); 1984 err_out_free_intr: 1985 enic_unset_affinity_hint(enic); 1986 enic_free_intr(enic); 1987 1988 return err; 1989 } 1990 1991 /* rtnl lock is held, process context */ 1992 static int enic_stop(struct net_device *netdev) 1993 { 1994 struct enic *enic = netdev_priv(netdev); 1995 unsigned int i; 1996 int err; 1997 1998 for (i = 0; i < enic->intr_count; i++) { 1999 vnic_intr_mask(&enic->intr[i]); 2000 (void)vnic_intr_masked(&enic->intr[i]); /* flush write */ 2001 } 2002 2003 enic_synchronize_irqs(enic); 2004 2005 del_timer_sync(&enic->notify_timer); 2006 enic_rfs_flw_tbl_free(enic); 2007 2008 enic_dev_disable(enic); 2009 2010 for (i = 0; i < enic->rq_count; i++) 2011 napi_disable(&enic->napi[i]); 2012 2013 netif_carrier_off(netdev); 2014 if (vnic_dev_get_intr_mode(enic->vdev) == VNIC_DEV_INTR_MODE_MSIX) 2015 for (i = 0; i < enic->wq_count; i++) 2016 napi_disable(&enic->napi[enic_cq_wq(enic, i)]); 2017 netif_tx_disable(netdev); 2018 2019 if (!enic_is_dynamic(enic) && !enic_is_sriov_vf(enic)) 2020 enic_dev_del_station_addr(enic); 2021 2022 for (i = 0; i < enic->wq_count; i++) { 2023 err = vnic_wq_disable(&enic->wq[i]); 2024 if (err) 2025 return err; 2026 } 2027 for (i = 0; i < enic->rq_count; i++) { 2028 err = vnic_rq_disable(&enic->rq[i]); 2029 if (err) 2030 return err; 2031 } 2032 2033 enic_dev_notify_unset(enic); 2034 enic_unset_affinity_hint(enic); 2035 enic_free_intr(enic); 2036 2037 for (i = 0; i < enic->wq_count; i++) 2038 vnic_wq_clean(&enic->wq[i], enic_free_wq_buf); 2039 for (i = 0; i < enic->rq_count; i++) 2040 vnic_rq_clean(&enic->rq[i], enic_free_rq_buf); 2041 for (i = 0; i < enic->cq_count; i++) 2042 vnic_cq_clean(&enic->cq[i]); 2043 for (i = 0; i < enic->intr_count; i++) 2044 vnic_intr_clean(&enic->intr[i]); 2045 2046 return 0; 2047 } 2048 2049 static int _enic_change_mtu(struct net_device *netdev, int new_mtu) 2050 { 2051 bool running = netif_running(netdev); 2052 int err = 0; 2053 2054 ASSERT_RTNL(); 2055 if (running) { 2056 err = enic_stop(netdev); 2057 if (err) 2058 return err; 2059 } 2060 2061 netdev->mtu = new_mtu; 2062 2063 if (running) { 2064 err = enic_open(netdev); 2065 if (err) 2066 return err; 2067 } 2068 2069 return 0; 2070 } 2071 2072 static int enic_change_mtu(struct net_device *netdev, int new_mtu) 2073 { 2074 struct enic *enic = netdev_priv(netdev); 2075 2076 if (enic_is_dynamic(enic) || enic_is_sriov_vf(enic)) 2077 return -EOPNOTSUPP; 2078 2079 if (netdev->mtu > enic->port_mtu) 2080 netdev_warn(netdev, 2081 "interface MTU (%d) set higher than port MTU (%d)\n", 2082 netdev->mtu, enic->port_mtu); 2083 2084 return _enic_change_mtu(netdev, new_mtu); 2085 } 2086 2087 static void enic_change_mtu_work(struct work_struct *work) 2088 { 2089 struct enic *enic = container_of(work, struct enic, change_mtu_work); 2090 struct net_device *netdev = enic->netdev; 2091 int new_mtu = vnic_dev_mtu(enic->vdev); 2092 2093 rtnl_lock(); 2094 (void)_enic_change_mtu(netdev, new_mtu); 2095 rtnl_unlock(); 2096 2097 netdev_info(netdev, "interface MTU set as %d\n", netdev->mtu); 2098 } 2099 2100 #ifdef CONFIG_NET_POLL_CONTROLLER 2101 static void enic_poll_controller(struct net_device *netdev) 2102 { 2103 struct enic *enic = netdev_priv(netdev); 2104 struct vnic_dev *vdev = enic->vdev; 2105 unsigned int i, intr; 2106 2107 switch (vnic_dev_get_intr_mode(vdev)) { 2108 case VNIC_DEV_INTR_MODE_MSIX: 2109 for (i = 0; i < enic->rq_count; i++) { 2110 intr = enic_msix_rq_intr(enic, i); 2111 enic_isr_msix(enic->msix_entry[intr].vector, 2112 &enic->napi[i]); 2113 } 2114 2115 for (i = 0; i < enic->wq_count; i++) { 2116 intr = enic_msix_wq_intr(enic, i); 2117 enic_isr_msix(enic->msix_entry[intr].vector, 2118 &enic->napi[enic_cq_wq(enic, i)]); 2119 } 2120 2121 break; 2122 case VNIC_DEV_INTR_MODE_MSI: 2123 enic_isr_msi(enic->pdev->irq, enic); 2124 break; 2125 case VNIC_DEV_INTR_MODE_INTX: 2126 enic_isr_legacy(enic->pdev->irq, netdev); 2127 break; 2128 default: 2129 break; 2130 } 2131 } 2132 #endif 2133 2134 static int enic_dev_wait(struct vnic_dev *vdev, 2135 int (*start)(struct vnic_dev *, int), 2136 int (*finished)(struct vnic_dev *, int *), 2137 int arg) 2138 { 2139 unsigned long time; 2140 int done; 2141 int err; 2142 2143 BUG_ON(in_interrupt()); 2144 2145 err = start(vdev, arg); 2146 if (err) 2147 return err; 2148 2149 /* Wait for func to complete...2 seconds max 2150 */ 2151 2152 time = jiffies + (HZ * 2); 2153 do { 2154 2155 err = finished(vdev, &done); 2156 if (err) 2157 return err; 2158 2159 if (done) 2160 return 0; 2161 2162 schedule_timeout_uninterruptible(HZ / 10); 2163 2164 } while (time_after(time, jiffies)); 2165 2166 return -ETIMEDOUT; 2167 } 2168 2169 static int enic_dev_open(struct enic *enic) 2170 { 2171 int err; 2172 u32 flags = CMD_OPENF_IG_DESCCACHE; 2173 2174 err = enic_dev_wait(enic->vdev, vnic_dev_open, 2175 vnic_dev_open_done, flags); 2176 if (err) 2177 dev_err(enic_get_dev(enic), "vNIC device open failed, err %d\n", 2178 err); 2179 2180 return err; 2181 } 2182 2183 static int enic_dev_soft_reset(struct enic *enic) 2184 { 2185 int err; 2186 2187 err = enic_dev_wait(enic->vdev, vnic_dev_soft_reset, 2188 vnic_dev_soft_reset_done, 0); 2189 if (err) 2190 netdev_err(enic->netdev, "vNIC soft reset failed, err %d\n", 2191 err); 2192 2193 return err; 2194 } 2195 2196 static int enic_dev_hang_reset(struct enic *enic) 2197 { 2198 int err; 2199 2200 err = enic_dev_wait(enic->vdev, vnic_dev_hang_reset, 2201 vnic_dev_hang_reset_done, 0); 2202 if (err) 2203 netdev_err(enic->netdev, "vNIC hang reset failed, err %d\n", 2204 err); 2205 2206 return err; 2207 } 2208 2209 int __enic_set_rsskey(struct enic *enic) 2210 { 2211 union vnic_rss_key *rss_key_buf_va; 2212 dma_addr_t rss_key_buf_pa; 2213 int i, kidx, bidx, err; 2214 2215 rss_key_buf_va = pci_zalloc_consistent(enic->pdev, 2216 sizeof(union vnic_rss_key), 2217 &rss_key_buf_pa); 2218 if (!rss_key_buf_va) 2219 return -ENOMEM; 2220 2221 for (i = 0; i < ENIC_RSS_LEN; i++) { 2222 kidx = i / ENIC_RSS_BYTES_PER_KEY; 2223 bidx = i % ENIC_RSS_BYTES_PER_KEY; 2224 rss_key_buf_va->key[kidx].b[bidx] = enic->rss_key[i]; 2225 } 2226 spin_lock_bh(&enic->devcmd_lock); 2227 err = enic_set_rss_key(enic, 2228 rss_key_buf_pa, 2229 sizeof(union vnic_rss_key)); 2230 spin_unlock_bh(&enic->devcmd_lock); 2231 2232 pci_free_consistent(enic->pdev, sizeof(union vnic_rss_key), 2233 rss_key_buf_va, rss_key_buf_pa); 2234 2235 return err; 2236 } 2237 2238 static int enic_set_rsskey(struct enic *enic) 2239 { 2240 netdev_rss_key_fill(enic->rss_key, ENIC_RSS_LEN); 2241 2242 return __enic_set_rsskey(enic); 2243 } 2244 2245 static int enic_set_rsscpu(struct enic *enic, u8 rss_hash_bits) 2246 { 2247 dma_addr_t rss_cpu_buf_pa; 2248 union vnic_rss_cpu *rss_cpu_buf_va = NULL; 2249 unsigned int i; 2250 int err; 2251 2252 rss_cpu_buf_va = pci_alloc_consistent(enic->pdev, 2253 sizeof(union vnic_rss_cpu), &rss_cpu_buf_pa); 2254 if (!rss_cpu_buf_va) 2255 return -ENOMEM; 2256 2257 for (i = 0; i < (1 << rss_hash_bits); i++) 2258 (*rss_cpu_buf_va).cpu[i/4].b[i%4] = i % enic->rq_count; 2259 2260 spin_lock_bh(&enic->devcmd_lock); 2261 err = enic_set_rss_cpu(enic, 2262 rss_cpu_buf_pa, 2263 sizeof(union vnic_rss_cpu)); 2264 spin_unlock_bh(&enic->devcmd_lock); 2265 2266 pci_free_consistent(enic->pdev, sizeof(union vnic_rss_cpu), 2267 rss_cpu_buf_va, rss_cpu_buf_pa); 2268 2269 return err; 2270 } 2271 2272 static int enic_set_niccfg(struct enic *enic, u8 rss_default_cpu, 2273 u8 rss_hash_type, u8 rss_hash_bits, u8 rss_base_cpu, u8 rss_enable) 2274 { 2275 const u8 tso_ipid_split_en = 0; 2276 const u8 ig_vlan_strip_en = 1; 2277 int err; 2278 2279 /* Enable VLAN tag stripping. 2280 */ 2281 2282 spin_lock_bh(&enic->devcmd_lock); 2283 err = enic_set_nic_cfg(enic, 2284 rss_default_cpu, rss_hash_type, 2285 rss_hash_bits, rss_base_cpu, 2286 rss_enable, tso_ipid_split_en, 2287 ig_vlan_strip_en); 2288 spin_unlock_bh(&enic->devcmd_lock); 2289 2290 return err; 2291 } 2292 2293 static int enic_set_rss_nic_cfg(struct enic *enic) 2294 { 2295 struct device *dev = enic_get_dev(enic); 2296 const u8 rss_default_cpu = 0; 2297 const u8 rss_hash_bits = 7; 2298 const u8 rss_base_cpu = 0; 2299 u8 rss_hash_type; 2300 int res; 2301 u8 rss_enable = ENIC_SETTING(enic, RSS) && (enic->rq_count > 1); 2302 2303 spin_lock_bh(&enic->devcmd_lock); 2304 res = vnic_dev_capable_rss_hash_type(enic->vdev, &rss_hash_type); 2305 spin_unlock_bh(&enic->devcmd_lock); 2306 if (res) { 2307 /* defaults for old adapters 2308 */ 2309 rss_hash_type = NIC_CFG_RSS_HASH_TYPE_IPV4 | 2310 NIC_CFG_RSS_HASH_TYPE_TCP_IPV4 | 2311 NIC_CFG_RSS_HASH_TYPE_IPV6 | 2312 NIC_CFG_RSS_HASH_TYPE_TCP_IPV6; 2313 } 2314 2315 if (rss_enable) { 2316 if (!enic_set_rsskey(enic)) { 2317 if (enic_set_rsscpu(enic, rss_hash_bits)) { 2318 rss_enable = 0; 2319 dev_warn(dev, "RSS disabled, " 2320 "Failed to set RSS cpu indirection table."); 2321 } 2322 } else { 2323 rss_enable = 0; 2324 dev_warn(dev, "RSS disabled, Failed to set RSS key.\n"); 2325 } 2326 } 2327 2328 return enic_set_niccfg(enic, rss_default_cpu, rss_hash_type, 2329 rss_hash_bits, rss_base_cpu, rss_enable); 2330 } 2331 2332 static void enic_reset(struct work_struct *work) 2333 { 2334 struct enic *enic = container_of(work, struct enic, reset); 2335 2336 if (!netif_running(enic->netdev)) 2337 return; 2338 2339 rtnl_lock(); 2340 2341 spin_lock(&enic->enic_api_lock); 2342 enic_stop(enic->netdev); 2343 enic_dev_soft_reset(enic); 2344 enic_reset_addr_lists(enic); 2345 enic_init_vnic_resources(enic); 2346 enic_set_rss_nic_cfg(enic); 2347 enic_dev_set_ig_vlan_rewrite_mode(enic); 2348 enic_open(enic->netdev); 2349 spin_unlock(&enic->enic_api_lock); 2350 call_netdevice_notifiers(NETDEV_REBOOT, enic->netdev); 2351 2352 rtnl_unlock(); 2353 } 2354 2355 static void enic_tx_hang_reset(struct work_struct *work) 2356 { 2357 struct enic *enic = container_of(work, struct enic, tx_hang_reset); 2358 2359 rtnl_lock(); 2360 2361 spin_lock(&enic->enic_api_lock); 2362 enic_dev_hang_notify(enic); 2363 enic_stop(enic->netdev); 2364 enic_dev_hang_reset(enic); 2365 enic_reset_addr_lists(enic); 2366 enic_init_vnic_resources(enic); 2367 enic_set_rss_nic_cfg(enic); 2368 enic_dev_set_ig_vlan_rewrite_mode(enic); 2369 enic_open(enic->netdev); 2370 spin_unlock(&enic->enic_api_lock); 2371 call_netdevice_notifiers(NETDEV_REBOOT, enic->netdev); 2372 2373 rtnl_unlock(); 2374 } 2375 2376 static int enic_set_intr_mode(struct enic *enic) 2377 { 2378 unsigned int n = min_t(unsigned int, enic->rq_count, ENIC_RQ_MAX); 2379 unsigned int m = min_t(unsigned int, enic->wq_count, ENIC_WQ_MAX); 2380 unsigned int i; 2381 2382 /* Set interrupt mode (INTx, MSI, MSI-X) depending 2383 * on system capabilities. 2384 * 2385 * Try MSI-X first 2386 * 2387 * We need n RQs, m WQs, n+m CQs, and n+m+2 INTRs 2388 * (the second to last INTR is used for WQ/RQ errors) 2389 * (the last INTR is used for notifications) 2390 */ 2391 2392 BUG_ON(ARRAY_SIZE(enic->msix_entry) < n + m + 2); 2393 for (i = 0; i < n + m + 2; i++) 2394 enic->msix_entry[i].entry = i; 2395 2396 /* Use multiple RQs if RSS is enabled 2397 */ 2398 2399 if (ENIC_SETTING(enic, RSS) && 2400 enic->config.intr_mode < 1 && 2401 enic->rq_count >= n && 2402 enic->wq_count >= m && 2403 enic->cq_count >= n + m && 2404 enic->intr_count >= n + m + 2) { 2405 2406 if (pci_enable_msix_range(enic->pdev, enic->msix_entry, 2407 n + m + 2, n + m + 2) > 0) { 2408 2409 enic->rq_count = n; 2410 enic->wq_count = m; 2411 enic->cq_count = n + m; 2412 enic->intr_count = n + m + 2; 2413 2414 vnic_dev_set_intr_mode(enic->vdev, 2415 VNIC_DEV_INTR_MODE_MSIX); 2416 2417 return 0; 2418 } 2419 } 2420 2421 if (enic->config.intr_mode < 1 && 2422 enic->rq_count >= 1 && 2423 enic->wq_count >= m && 2424 enic->cq_count >= 1 + m && 2425 enic->intr_count >= 1 + m + 2) { 2426 if (pci_enable_msix_range(enic->pdev, enic->msix_entry, 2427 1 + m + 2, 1 + m + 2) > 0) { 2428 2429 enic->rq_count = 1; 2430 enic->wq_count = m; 2431 enic->cq_count = 1 + m; 2432 enic->intr_count = 1 + m + 2; 2433 2434 vnic_dev_set_intr_mode(enic->vdev, 2435 VNIC_DEV_INTR_MODE_MSIX); 2436 2437 return 0; 2438 } 2439 } 2440 2441 /* Next try MSI 2442 * 2443 * We need 1 RQ, 1 WQ, 2 CQs, and 1 INTR 2444 */ 2445 2446 if (enic->config.intr_mode < 2 && 2447 enic->rq_count >= 1 && 2448 enic->wq_count >= 1 && 2449 enic->cq_count >= 2 && 2450 enic->intr_count >= 1 && 2451 !pci_enable_msi(enic->pdev)) { 2452 2453 enic->rq_count = 1; 2454 enic->wq_count = 1; 2455 enic->cq_count = 2; 2456 enic->intr_count = 1; 2457 2458 vnic_dev_set_intr_mode(enic->vdev, VNIC_DEV_INTR_MODE_MSI); 2459 2460 return 0; 2461 } 2462 2463 /* Next try INTx 2464 * 2465 * We need 1 RQ, 1 WQ, 2 CQs, and 3 INTRs 2466 * (the first INTR is used for WQ/RQ) 2467 * (the second INTR is used for WQ/RQ errors) 2468 * (the last INTR is used for notifications) 2469 */ 2470 2471 if (enic->config.intr_mode < 3 && 2472 enic->rq_count >= 1 && 2473 enic->wq_count >= 1 && 2474 enic->cq_count >= 2 && 2475 enic->intr_count >= 3) { 2476 2477 enic->rq_count = 1; 2478 enic->wq_count = 1; 2479 enic->cq_count = 2; 2480 enic->intr_count = 3; 2481 2482 vnic_dev_set_intr_mode(enic->vdev, VNIC_DEV_INTR_MODE_INTX); 2483 2484 return 0; 2485 } 2486 2487 vnic_dev_set_intr_mode(enic->vdev, VNIC_DEV_INTR_MODE_UNKNOWN); 2488 2489 return -EINVAL; 2490 } 2491 2492 static void enic_clear_intr_mode(struct enic *enic) 2493 { 2494 switch (vnic_dev_get_intr_mode(enic->vdev)) { 2495 case VNIC_DEV_INTR_MODE_MSIX: 2496 pci_disable_msix(enic->pdev); 2497 break; 2498 case VNIC_DEV_INTR_MODE_MSI: 2499 pci_disable_msi(enic->pdev); 2500 break; 2501 default: 2502 break; 2503 } 2504 2505 vnic_dev_set_intr_mode(enic->vdev, VNIC_DEV_INTR_MODE_UNKNOWN); 2506 } 2507 2508 static const struct net_device_ops enic_netdev_dynamic_ops = { 2509 .ndo_open = enic_open, 2510 .ndo_stop = enic_stop, 2511 .ndo_start_xmit = enic_hard_start_xmit, 2512 .ndo_get_stats64 = enic_get_stats, 2513 .ndo_validate_addr = eth_validate_addr, 2514 .ndo_set_rx_mode = enic_set_rx_mode, 2515 .ndo_set_mac_address = enic_set_mac_address_dynamic, 2516 .ndo_change_mtu = enic_change_mtu, 2517 .ndo_vlan_rx_add_vid = enic_vlan_rx_add_vid, 2518 .ndo_vlan_rx_kill_vid = enic_vlan_rx_kill_vid, 2519 .ndo_tx_timeout = enic_tx_timeout, 2520 .ndo_set_vf_port = enic_set_vf_port, 2521 .ndo_get_vf_port = enic_get_vf_port, 2522 .ndo_set_vf_mac = enic_set_vf_mac, 2523 #ifdef CONFIG_NET_POLL_CONTROLLER 2524 .ndo_poll_controller = enic_poll_controller, 2525 #endif 2526 #ifdef CONFIG_RFS_ACCEL 2527 .ndo_rx_flow_steer = enic_rx_flow_steer, 2528 #endif 2529 .ndo_udp_tunnel_add = enic_udp_tunnel_add, 2530 .ndo_udp_tunnel_del = enic_udp_tunnel_del, 2531 .ndo_features_check = enic_features_check, 2532 }; 2533 2534 static const struct net_device_ops enic_netdev_ops = { 2535 .ndo_open = enic_open, 2536 .ndo_stop = enic_stop, 2537 .ndo_start_xmit = enic_hard_start_xmit, 2538 .ndo_get_stats64 = enic_get_stats, 2539 .ndo_validate_addr = eth_validate_addr, 2540 .ndo_set_mac_address = enic_set_mac_address, 2541 .ndo_set_rx_mode = enic_set_rx_mode, 2542 .ndo_change_mtu = enic_change_mtu, 2543 .ndo_vlan_rx_add_vid = enic_vlan_rx_add_vid, 2544 .ndo_vlan_rx_kill_vid = enic_vlan_rx_kill_vid, 2545 .ndo_tx_timeout = enic_tx_timeout, 2546 .ndo_set_vf_port = enic_set_vf_port, 2547 .ndo_get_vf_port = enic_get_vf_port, 2548 .ndo_set_vf_mac = enic_set_vf_mac, 2549 #ifdef CONFIG_NET_POLL_CONTROLLER 2550 .ndo_poll_controller = enic_poll_controller, 2551 #endif 2552 #ifdef CONFIG_RFS_ACCEL 2553 .ndo_rx_flow_steer = enic_rx_flow_steer, 2554 #endif 2555 .ndo_udp_tunnel_add = enic_udp_tunnel_add, 2556 .ndo_udp_tunnel_del = enic_udp_tunnel_del, 2557 .ndo_features_check = enic_features_check, 2558 }; 2559 2560 static void enic_dev_deinit(struct enic *enic) 2561 { 2562 unsigned int i; 2563 2564 for (i = 0; i < enic->rq_count; i++) { 2565 napi_hash_del(&enic->napi[i]); 2566 netif_napi_del(&enic->napi[i]); 2567 } 2568 if (vnic_dev_get_intr_mode(enic->vdev) == VNIC_DEV_INTR_MODE_MSIX) 2569 for (i = 0; i < enic->wq_count; i++) 2570 netif_napi_del(&enic->napi[enic_cq_wq(enic, i)]); 2571 2572 enic_free_vnic_resources(enic); 2573 enic_clear_intr_mode(enic); 2574 enic_free_affinity_hint(enic); 2575 } 2576 2577 static void enic_kdump_kernel_config(struct enic *enic) 2578 { 2579 if (is_kdump_kernel()) { 2580 dev_info(enic_get_dev(enic), "Running from within kdump kernel. Using minimal resources\n"); 2581 enic->rq_count = 1; 2582 enic->wq_count = 1; 2583 enic->config.rq_desc_count = ENIC_MIN_RQ_DESCS; 2584 enic->config.wq_desc_count = ENIC_MIN_WQ_DESCS; 2585 enic->config.mtu = min_t(u16, 1500, enic->config.mtu); 2586 } 2587 } 2588 2589 static int enic_dev_init(struct enic *enic) 2590 { 2591 struct device *dev = enic_get_dev(enic); 2592 struct net_device *netdev = enic->netdev; 2593 unsigned int i; 2594 int err; 2595 2596 /* Get interrupt coalesce timer info */ 2597 err = enic_dev_intr_coal_timer_info(enic); 2598 if (err) { 2599 dev_warn(dev, "Using default conversion factor for " 2600 "interrupt coalesce timer\n"); 2601 vnic_dev_intr_coal_timer_info_default(enic->vdev); 2602 } 2603 2604 /* Get vNIC configuration 2605 */ 2606 2607 err = enic_get_vnic_config(enic); 2608 if (err) { 2609 dev_err(dev, "Get vNIC configuration failed, aborting\n"); 2610 return err; 2611 } 2612 2613 /* Get available resource counts 2614 */ 2615 2616 enic_get_res_counts(enic); 2617 2618 /* modify resource count if we are in kdump_kernel 2619 */ 2620 enic_kdump_kernel_config(enic); 2621 2622 /* Set interrupt mode based on resource counts and system 2623 * capabilities 2624 */ 2625 2626 err = enic_set_intr_mode(enic); 2627 if (err) { 2628 dev_err(dev, "Failed to set intr mode based on resource " 2629 "counts and system capabilities, aborting\n"); 2630 return err; 2631 } 2632 2633 /* Allocate and configure vNIC resources 2634 */ 2635 2636 err = enic_alloc_vnic_resources(enic); 2637 if (err) { 2638 dev_err(dev, "Failed to alloc vNIC resources, aborting\n"); 2639 goto err_out_free_vnic_resources; 2640 } 2641 2642 enic_init_vnic_resources(enic); 2643 2644 err = enic_set_rss_nic_cfg(enic); 2645 if (err) { 2646 dev_err(dev, "Failed to config nic, aborting\n"); 2647 goto err_out_free_vnic_resources; 2648 } 2649 2650 switch (vnic_dev_get_intr_mode(enic->vdev)) { 2651 default: 2652 netif_napi_add(netdev, &enic->napi[0], enic_poll, 64); 2653 break; 2654 case VNIC_DEV_INTR_MODE_MSIX: 2655 for (i = 0; i < enic->rq_count; i++) { 2656 netif_napi_add(netdev, &enic->napi[i], 2657 enic_poll_msix_rq, NAPI_POLL_WEIGHT); 2658 } 2659 for (i = 0; i < enic->wq_count; i++) 2660 netif_napi_add(netdev, &enic->napi[enic_cq_wq(enic, i)], 2661 enic_poll_msix_wq, NAPI_POLL_WEIGHT); 2662 break; 2663 } 2664 2665 return 0; 2666 2667 err_out_free_vnic_resources: 2668 enic_free_affinity_hint(enic); 2669 enic_clear_intr_mode(enic); 2670 enic_free_vnic_resources(enic); 2671 2672 return err; 2673 } 2674 2675 static void enic_iounmap(struct enic *enic) 2676 { 2677 unsigned int i; 2678 2679 for (i = 0; i < ARRAY_SIZE(enic->bar); i++) 2680 if (enic->bar[i].vaddr) 2681 iounmap(enic->bar[i].vaddr); 2682 } 2683 2684 static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 2685 { 2686 struct device *dev = &pdev->dev; 2687 struct net_device *netdev; 2688 struct enic *enic; 2689 int using_dac = 0; 2690 unsigned int i; 2691 int err; 2692 #ifdef CONFIG_PCI_IOV 2693 int pos = 0; 2694 #endif 2695 int num_pps = 1; 2696 2697 /* Allocate net device structure and initialize. Private 2698 * instance data is initialized to zero. 2699 */ 2700 2701 netdev = alloc_etherdev_mqs(sizeof(struct enic), 2702 ENIC_RQ_MAX, ENIC_WQ_MAX); 2703 if (!netdev) 2704 return -ENOMEM; 2705 2706 pci_set_drvdata(pdev, netdev); 2707 2708 SET_NETDEV_DEV(netdev, &pdev->dev); 2709 2710 enic = netdev_priv(netdev); 2711 enic->netdev = netdev; 2712 enic->pdev = pdev; 2713 2714 /* Setup PCI resources 2715 */ 2716 2717 err = pci_enable_device_mem(pdev); 2718 if (err) { 2719 dev_err(dev, "Cannot enable PCI device, aborting\n"); 2720 goto err_out_free_netdev; 2721 } 2722 2723 err = pci_request_regions(pdev, DRV_NAME); 2724 if (err) { 2725 dev_err(dev, "Cannot request PCI regions, aborting\n"); 2726 goto err_out_disable_device; 2727 } 2728 2729 pci_set_master(pdev); 2730 2731 /* Query PCI controller on system for DMA addressing 2732 * limitation for the device. Try 47-bit first, and 2733 * fail to 32-bit. 2734 */ 2735 2736 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(47)); 2737 if (err) { 2738 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); 2739 if (err) { 2740 dev_err(dev, "No usable DMA configuration, aborting\n"); 2741 goto err_out_release_regions; 2742 } 2743 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); 2744 if (err) { 2745 dev_err(dev, "Unable to obtain %u-bit DMA " 2746 "for consistent allocations, aborting\n", 32); 2747 goto err_out_release_regions; 2748 } 2749 } else { 2750 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(47)); 2751 if (err) { 2752 dev_err(dev, "Unable to obtain %u-bit DMA " 2753 "for consistent allocations, aborting\n", 47); 2754 goto err_out_release_regions; 2755 } 2756 using_dac = 1; 2757 } 2758 2759 /* Map vNIC resources from BAR0-5 2760 */ 2761 2762 for (i = 0; i < ARRAY_SIZE(enic->bar); i++) { 2763 if (!(pci_resource_flags(pdev, i) & IORESOURCE_MEM)) 2764 continue; 2765 enic->bar[i].len = pci_resource_len(pdev, i); 2766 enic->bar[i].vaddr = pci_iomap(pdev, i, enic->bar[i].len); 2767 if (!enic->bar[i].vaddr) { 2768 dev_err(dev, "Cannot memory-map BAR %d, aborting\n", i); 2769 err = -ENODEV; 2770 goto err_out_iounmap; 2771 } 2772 enic->bar[i].bus_addr = pci_resource_start(pdev, i); 2773 } 2774 2775 /* Register vNIC device 2776 */ 2777 2778 enic->vdev = vnic_dev_register(NULL, enic, pdev, enic->bar, 2779 ARRAY_SIZE(enic->bar)); 2780 if (!enic->vdev) { 2781 dev_err(dev, "vNIC registration failed, aborting\n"); 2782 err = -ENODEV; 2783 goto err_out_iounmap; 2784 } 2785 2786 err = vnic_devcmd_init(enic->vdev); 2787 2788 if (err) 2789 goto err_out_vnic_unregister; 2790 2791 #ifdef CONFIG_PCI_IOV 2792 /* Get number of subvnics */ 2793 pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV); 2794 if (pos) { 2795 pci_read_config_word(pdev, pos + PCI_SRIOV_TOTAL_VF, 2796 &enic->num_vfs); 2797 if (enic->num_vfs) { 2798 err = pci_enable_sriov(pdev, enic->num_vfs); 2799 if (err) { 2800 dev_err(dev, "SRIOV enable failed, aborting." 2801 " pci_enable_sriov() returned %d\n", 2802 err); 2803 goto err_out_vnic_unregister; 2804 } 2805 enic->priv_flags |= ENIC_SRIOV_ENABLED; 2806 num_pps = enic->num_vfs; 2807 } 2808 } 2809 #endif 2810 2811 /* Allocate structure for port profiles */ 2812 enic->pp = kcalloc(num_pps, sizeof(*enic->pp), GFP_KERNEL); 2813 if (!enic->pp) { 2814 err = -ENOMEM; 2815 goto err_out_disable_sriov_pp; 2816 } 2817 2818 /* Issue device open to get device in known state 2819 */ 2820 2821 err = enic_dev_open(enic); 2822 if (err) { 2823 dev_err(dev, "vNIC dev open failed, aborting\n"); 2824 goto err_out_disable_sriov; 2825 } 2826 2827 /* Setup devcmd lock 2828 */ 2829 2830 spin_lock_init(&enic->devcmd_lock); 2831 spin_lock_init(&enic->enic_api_lock); 2832 2833 /* 2834 * Set ingress vlan rewrite mode before vnic initialization 2835 */ 2836 2837 err = enic_dev_set_ig_vlan_rewrite_mode(enic); 2838 if (err) { 2839 dev_err(dev, 2840 "Failed to set ingress vlan rewrite mode, aborting.\n"); 2841 goto err_out_dev_close; 2842 } 2843 2844 /* Issue device init to initialize the vnic-to-switch link. 2845 * We'll start with carrier off and wait for link UP 2846 * notification later to turn on carrier. We don't need 2847 * to wait here for the vnic-to-switch link initialization 2848 * to complete; link UP notification is the indication that 2849 * the process is complete. 2850 */ 2851 2852 netif_carrier_off(netdev); 2853 2854 /* Do not call dev_init for a dynamic vnic. 2855 * For a dynamic vnic, init_prov_info will be 2856 * called later by an upper layer. 2857 */ 2858 2859 if (!enic_is_dynamic(enic)) { 2860 err = vnic_dev_init(enic->vdev, 0); 2861 if (err) { 2862 dev_err(dev, "vNIC dev init failed, aborting\n"); 2863 goto err_out_dev_close; 2864 } 2865 } 2866 2867 err = enic_dev_init(enic); 2868 if (err) { 2869 dev_err(dev, "Device initialization failed, aborting\n"); 2870 goto err_out_dev_close; 2871 } 2872 2873 netif_set_real_num_tx_queues(netdev, enic->wq_count); 2874 netif_set_real_num_rx_queues(netdev, enic->rq_count); 2875 2876 /* Setup notification timer, HW reset task, and wq locks 2877 */ 2878 2879 timer_setup(&enic->notify_timer, enic_notify_timer, 0); 2880 2881 enic_rfs_flw_tbl_init(enic); 2882 enic_set_rx_coal_setting(enic); 2883 INIT_WORK(&enic->reset, enic_reset); 2884 INIT_WORK(&enic->tx_hang_reset, enic_tx_hang_reset); 2885 INIT_WORK(&enic->change_mtu_work, enic_change_mtu_work); 2886 2887 for (i = 0; i < enic->wq_count; i++) 2888 spin_lock_init(&enic->wq_lock[i]); 2889 2890 /* Register net device 2891 */ 2892 2893 enic->port_mtu = enic->config.mtu; 2894 2895 err = enic_set_mac_addr(netdev, enic->mac_addr); 2896 if (err) { 2897 dev_err(dev, "Invalid MAC address, aborting\n"); 2898 goto err_out_dev_deinit; 2899 } 2900 2901 enic->tx_coalesce_usecs = enic->config.intr_timer_usec; 2902 /* rx coalesce time already got initialized. This gets used 2903 * if adaptive coal is turned off 2904 */ 2905 enic->rx_coalesce_usecs = enic->tx_coalesce_usecs; 2906 2907 if (enic_is_dynamic(enic) || enic_is_sriov_vf(enic)) 2908 netdev->netdev_ops = &enic_netdev_dynamic_ops; 2909 else 2910 netdev->netdev_ops = &enic_netdev_ops; 2911 2912 netdev->watchdog_timeo = 2 * HZ; 2913 enic_set_ethtool_ops(netdev); 2914 2915 netdev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; 2916 if (ENIC_SETTING(enic, LOOP)) { 2917 netdev->features &= ~NETIF_F_HW_VLAN_CTAG_TX; 2918 enic->loop_enable = 1; 2919 enic->loop_tag = enic->config.loop_tag; 2920 dev_info(dev, "loopback tag=0x%04x\n", enic->loop_tag); 2921 } 2922 if (ENIC_SETTING(enic, TXCSUM)) 2923 netdev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM; 2924 if (ENIC_SETTING(enic, TSO)) 2925 netdev->hw_features |= NETIF_F_TSO | 2926 NETIF_F_TSO6 | NETIF_F_TSO_ECN; 2927 if (ENIC_SETTING(enic, RSS)) 2928 netdev->hw_features |= NETIF_F_RXHASH; 2929 if (ENIC_SETTING(enic, RXCSUM)) 2930 netdev->hw_features |= NETIF_F_RXCSUM; 2931 if (ENIC_SETTING(enic, VXLAN)) { 2932 u64 patch_level; 2933 u64 a1 = 0; 2934 2935 netdev->hw_enc_features |= NETIF_F_RXCSUM | 2936 NETIF_F_TSO | 2937 NETIF_F_TSO6 | 2938 NETIF_F_TSO_ECN | 2939 NETIF_F_GSO_UDP_TUNNEL | 2940 NETIF_F_HW_CSUM | 2941 NETIF_F_GSO_UDP_TUNNEL_CSUM; 2942 netdev->hw_features |= netdev->hw_enc_features; 2943 /* get bit mask from hw about supported offload bit level 2944 * BIT(0) = fw supports patch_level 0 2945 * fcoe bit = encap 2946 * fcoe_fc_crc_ok = outer csum ok 2947 * BIT(1) = always set by fw 2948 * BIT(2) = fw supports patch_level 2 2949 * BIT(0) in rss_hash = encap 2950 * BIT(1,2) in rss_hash = outer_ip_csum_ok/ 2951 * outer_tcp_csum_ok 2952 * used in enic_rq_indicate_buf 2953 */ 2954 err = vnic_dev_get_supported_feature_ver(enic->vdev, 2955 VIC_FEATURE_VXLAN, 2956 &patch_level, &a1); 2957 if (err) 2958 patch_level = 0; 2959 enic->vxlan.flags = (u8)a1; 2960 /* mask bits that are supported by driver 2961 */ 2962 patch_level &= BIT_ULL(0) | BIT_ULL(2); 2963 patch_level = fls(patch_level); 2964 patch_level = patch_level ? patch_level - 1 : 0; 2965 enic->vxlan.patch_level = patch_level; 2966 } 2967 2968 netdev->features |= netdev->hw_features; 2969 netdev->vlan_features |= netdev->features; 2970 2971 #ifdef CONFIG_RFS_ACCEL 2972 netdev->hw_features |= NETIF_F_NTUPLE; 2973 #endif 2974 2975 if (using_dac) 2976 netdev->features |= NETIF_F_HIGHDMA; 2977 2978 netdev->priv_flags |= IFF_UNICAST_FLT; 2979 2980 /* MTU range: 68 - 9000 */ 2981 netdev->min_mtu = ENIC_MIN_MTU; 2982 netdev->max_mtu = ENIC_MAX_MTU; 2983 netdev->mtu = enic->port_mtu; 2984 2985 err = register_netdev(netdev); 2986 if (err) { 2987 dev_err(dev, "Cannot register net device, aborting\n"); 2988 goto err_out_dev_deinit; 2989 } 2990 enic->rx_copybreak = RX_COPYBREAK_DEFAULT; 2991 2992 return 0; 2993 2994 err_out_dev_deinit: 2995 enic_dev_deinit(enic); 2996 err_out_dev_close: 2997 vnic_dev_close(enic->vdev); 2998 err_out_disable_sriov: 2999 kfree(enic->pp); 3000 err_out_disable_sriov_pp: 3001 #ifdef CONFIG_PCI_IOV 3002 if (enic_sriov_enabled(enic)) { 3003 pci_disable_sriov(pdev); 3004 enic->priv_flags &= ~ENIC_SRIOV_ENABLED; 3005 } 3006 #endif 3007 err_out_vnic_unregister: 3008 vnic_dev_unregister(enic->vdev); 3009 err_out_iounmap: 3010 enic_iounmap(enic); 3011 err_out_release_regions: 3012 pci_release_regions(pdev); 3013 err_out_disable_device: 3014 pci_disable_device(pdev); 3015 err_out_free_netdev: 3016 free_netdev(netdev); 3017 3018 return err; 3019 } 3020 3021 static void enic_remove(struct pci_dev *pdev) 3022 { 3023 struct net_device *netdev = pci_get_drvdata(pdev); 3024 3025 if (netdev) { 3026 struct enic *enic = netdev_priv(netdev); 3027 3028 cancel_work_sync(&enic->reset); 3029 cancel_work_sync(&enic->change_mtu_work); 3030 unregister_netdev(netdev); 3031 enic_dev_deinit(enic); 3032 vnic_dev_close(enic->vdev); 3033 #ifdef CONFIG_PCI_IOV 3034 if (enic_sriov_enabled(enic)) { 3035 pci_disable_sriov(pdev); 3036 enic->priv_flags &= ~ENIC_SRIOV_ENABLED; 3037 } 3038 #endif 3039 kfree(enic->pp); 3040 vnic_dev_unregister(enic->vdev); 3041 enic_iounmap(enic); 3042 pci_release_regions(pdev); 3043 pci_disable_device(pdev); 3044 free_netdev(netdev); 3045 } 3046 } 3047 3048 static struct pci_driver enic_driver = { 3049 .name = DRV_NAME, 3050 .id_table = enic_id_table, 3051 .probe = enic_probe, 3052 .remove = enic_remove, 3053 }; 3054 3055 static int __init enic_init_module(void) 3056 { 3057 return pci_register_driver(&enic_driver); 3058 } 3059 3060 static void __exit enic_cleanup_module(void) 3061 { 3062 pci_unregister_driver(&enic_driver); 3063 } 3064 3065 module_init(enic_init_module); 3066 module_exit(enic_cleanup_module); 3067