1 /* 2 * Copyright(c) 2017 - 2018 Intel Corporation. 3 * 4 * This file is provided under a dual BSD/GPLv2 license. When using or 5 * redistributing this file, you may do so under either license. 6 * 7 * GPL LICENSE SUMMARY 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of version 2 of the GNU General Public License as 11 * published by the Free Software Foundation. 12 * 13 * This program is distributed in the hope that it will be useful, but 14 * WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * General Public License for more details. 17 * 18 * BSD LICENSE 19 * 20 * Redistribution and use in source and binary forms, with or without 21 * modification, are permitted provided that the following conditions 22 * are met: 23 * 24 * - Redistributions of source code must retain the above copyright 25 * notice, this list of conditions and the following disclaimer. 26 * - Redistributions in binary form must reproduce the above copyright 27 * notice, this list of conditions and the following disclaimer in 28 * the documentation and/or other materials provided with the 29 * distribution. 30 * - Neither the name of Intel Corporation nor the names of its 31 * contributors may be used to endorse or promote products derived 32 * from this software without specific prior written permission. 33 * 34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45 * 46 */ 47 48 /* 49 * This file contains HFI1 support for VNIC functionality 50 */ 51 52 #include <linux/io.h> 53 #include <linux/if_vlan.h> 54 55 #include "vnic.h" 56 57 #define HFI_TX_TIMEOUT_MS 1000 58 59 #define HFI1_VNIC_RCV_Q_SIZE 1024 60 61 #define HFI1_VNIC_UP 0 62 63 static DEFINE_SPINLOCK(vport_cntr_lock); 64 65 static int setup_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt) 66 { 67 unsigned int rcvctrl_ops = 0; 68 int ret; 69 70 uctxt->do_interrupt = &handle_receive_interrupt; 71 72 /* Now allocate the RcvHdr queue and eager buffers. */ 73 ret = hfi1_create_rcvhdrq(dd, uctxt); 74 if (ret) 75 goto done; 76 77 ret = hfi1_setup_eagerbufs(uctxt); 78 if (ret) 79 goto done; 80 81 if (uctxt->rcvhdrtail_kvaddr) 82 clear_rcvhdrtail(uctxt); 83 84 rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB; 85 rcvctrl_ops |= HFI1_RCVCTRL_INTRAVAIL_ENB; 86 87 if (!HFI1_CAP_KGET_MASK(uctxt->flags, MULTI_PKT_EGR)) 88 rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB; 89 if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_EGR_FULL)) 90 rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB; 91 if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_RHQ_FULL)) 92 rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB; 93 if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL)) 94 rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB; 95 96 hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt); 97 done: 98 return ret; 99 } 100 101 static int allocate_vnic_ctxt(struct hfi1_devdata *dd, 102 struct hfi1_ctxtdata **vnic_ctxt) 103 { 104 struct hfi1_ctxtdata *uctxt; 105 int ret; 106 107 if (dd->flags & HFI1_FROZEN) 108 return -EIO; 109 110 ret = hfi1_create_ctxtdata(dd->pport, dd->node, &uctxt); 111 if (ret < 0) { 112 dd_dev_err(dd, "Unable to create ctxtdata, failing open\n"); 113 return -ENOMEM; 114 } 115 116 uctxt->flags = HFI1_CAP_KGET(MULTI_PKT_EGR) | 117 HFI1_CAP_KGET(NODROP_RHQ_FULL) | 118 HFI1_CAP_KGET(NODROP_EGR_FULL) | 119 HFI1_CAP_KGET(DMA_RTAIL); 120 uctxt->seq_cnt = 1; 121 uctxt->is_vnic = true; 122 123 msix_request_rcd_irq(uctxt); 124 125 hfi1_stats.sps_ctxts++; 126 dd_dev_dbg(dd, "created vnic context %d\n", uctxt->ctxt); 127 *vnic_ctxt = uctxt; 128 129 return 0; 130 } 131 132 static void deallocate_vnic_ctxt(struct hfi1_devdata *dd, 133 struct hfi1_ctxtdata *uctxt) 134 { 135 dd_dev_dbg(dd, "closing vnic context %d\n", uctxt->ctxt); 136 flush_wc(); 137 138 /* 139 * Disable receive context and interrupt available, reset all 140 * RcvCtxtCtrl bits to default values. 141 */ 142 hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS | 143 HFI1_RCVCTRL_TIDFLOW_DIS | 144 HFI1_RCVCTRL_INTRAVAIL_DIS | 145 HFI1_RCVCTRL_ONE_PKT_EGR_DIS | 146 HFI1_RCVCTRL_NO_RHQ_DROP_DIS | 147 HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt); 148 149 /* msix_intr will always be > 0, only clean up if this is true */ 150 if (uctxt->msix_intr) 151 msix_free_irq(dd, uctxt->msix_intr); 152 153 uctxt->event_flags = 0; 154 155 hfi1_clear_tids(uctxt); 156 hfi1_clear_ctxt_pkey(dd, uctxt); 157 158 hfi1_stats.sps_ctxts--; 159 160 hfi1_free_ctxt(uctxt); 161 } 162 163 void hfi1_vnic_setup(struct hfi1_devdata *dd) 164 { 165 idr_init(&dd->vnic.vesw_idr); 166 } 167 168 void hfi1_vnic_cleanup(struct hfi1_devdata *dd) 169 { 170 idr_destroy(&dd->vnic.vesw_idr); 171 } 172 173 #define SUM_GRP_COUNTERS(stats, qstats, x_grp) do { \ 174 u64 *src64, *dst64; \ 175 for (src64 = &qstats->x_grp.unicast, \ 176 dst64 = &stats->x_grp.unicast; \ 177 dst64 <= &stats->x_grp.s_1519_max;) { \ 178 *dst64++ += *src64++; \ 179 } \ 180 } while (0) 181 182 /* hfi1_vnic_update_stats - update statistics */ 183 static void hfi1_vnic_update_stats(struct hfi1_vnic_vport_info *vinfo, 184 struct opa_vnic_stats *stats) 185 { 186 struct net_device *netdev = vinfo->netdev; 187 u8 i; 188 189 /* add tx counters on different queues */ 190 for (i = 0; i < vinfo->num_tx_q; i++) { 191 struct opa_vnic_stats *qstats = &vinfo->stats[i]; 192 struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats; 193 194 stats->netstats.tx_fifo_errors += qnstats->tx_fifo_errors; 195 stats->netstats.tx_carrier_errors += qnstats->tx_carrier_errors; 196 stats->tx_drop_state += qstats->tx_drop_state; 197 stats->tx_dlid_zero += qstats->tx_dlid_zero; 198 199 SUM_GRP_COUNTERS(stats, qstats, tx_grp); 200 stats->netstats.tx_packets += qnstats->tx_packets; 201 stats->netstats.tx_bytes += qnstats->tx_bytes; 202 } 203 204 /* add rx counters on different queues */ 205 for (i = 0; i < vinfo->num_rx_q; i++) { 206 struct opa_vnic_stats *qstats = &vinfo->stats[i]; 207 struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats; 208 209 stats->netstats.rx_fifo_errors += qnstats->rx_fifo_errors; 210 stats->netstats.rx_nohandler += qnstats->rx_nohandler; 211 stats->rx_drop_state += qstats->rx_drop_state; 212 stats->rx_oversize += qstats->rx_oversize; 213 stats->rx_runt += qstats->rx_runt; 214 215 SUM_GRP_COUNTERS(stats, qstats, rx_grp); 216 stats->netstats.rx_packets += qnstats->rx_packets; 217 stats->netstats.rx_bytes += qnstats->rx_bytes; 218 } 219 220 stats->netstats.tx_errors = stats->netstats.tx_fifo_errors + 221 stats->netstats.tx_carrier_errors + 222 stats->tx_drop_state + stats->tx_dlid_zero; 223 stats->netstats.tx_dropped = stats->netstats.tx_errors; 224 225 stats->netstats.rx_errors = stats->netstats.rx_fifo_errors + 226 stats->netstats.rx_nohandler + 227 stats->rx_drop_state + stats->rx_oversize + 228 stats->rx_runt; 229 stats->netstats.rx_dropped = stats->netstats.rx_errors; 230 231 netdev->stats.tx_packets = stats->netstats.tx_packets; 232 netdev->stats.tx_bytes = stats->netstats.tx_bytes; 233 netdev->stats.tx_fifo_errors = stats->netstats.tx_fifo_errors; 234 netdev->stats.tx_carrier_errors = stats->netstats.tx_carrier_errors; 235 netdev->stats.tx_errors = stats->netstats.tx_errors; 236 netdev->stats.tx_dropped = stats->netstats.tx_dropped; 237 238 netdev->stats.rx_packets = stats->netstats.rx_packets; 239 netdev->stats.rx_bytes = stats->netstats.rx_bytes; 240 netdev->stats.rx_fifo_errors = stats->netstats.rx_fifo_errors; 241 netdev->stats.multicast = stats->rx_grp.mcastbcast; 242 netdev->stats.rx_length_errors = stats->rx_oversize + stats->rx_runt; 243 netdev->stats.rx_errors = stats->netstats.rx_errors; 244 netdev->stats.rx_dropped = stats->netstats.rx_dropped; 245 } 246 247 /* update_len_counters - update pkt's len histogram counters */ 248 static inline void update_len_counters(struct opa_vnic_grp_stats *grp, 249 int len) 250 { 251 /* account for 4 byte FCS */ 252 if (len >= 1515) 253 grp->s_1519_max++; 254 else if (len >= 1020) 255 grp->s_1024_1518++; 256 else if (len >= 508) 257 grp->s_512_1023++; 258 else if (len >= 252) 259 grp->s_256_511++; 260 else if (len >= 124) 261 grp->s_128_255++; 262 else if (len >= 61) 263 grp->s_65_127++; 264 else 265 grp->s_64++; 266 } 267 268 /* hfi1_vnic_update_tx_counters - update transmit counters */ 269 static void hfi1_vnic_update_tx_counters(struct hfi1_vnic_vport_info *vinfo, 270 u8 q_idx, struct sk_buff *skb, int err) 271 { 272 struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb); 273 struct opa_vnic_stats *stats = &vinfo->stats[q_idx]; 274 struct opa_vnic_grp_stats *tx_grp = &stats->tx_grp; 275 u16 vlan_tci; 276 277 stats->netstats.tx_packets++; 278 stats->netstats.tx_bytes += skb->len + ETH_FCS_LEN; 279 280 update_len_counters(tx_grp, skb->len); 281 282 /* rest of the counts are for good packets only */ 283 if (unlikely(err)) 284 return; 285 286 if (is_multicast_ether_addr(mac_hdr->h_dest)) 287 tx_grp->mcastbcast++; 288 else 289 tx_grp->unicast++; 290 291 if (!__vlan_get_tag(skb, &vlan_tci)) 292 tx_grp->vlan++; 293 else 294 tx_grp->untagged++; 295 } 296 297 /* hfi1_vnic_update_rx_counters - update receive counters */ 298 static void hfi1_vnic_update_rx_counters(struct hfi1_vnic_vport_info *vinfo, 299 u8 q_idx, struct sk_buff *skb, int err) 300 { 301 struct ethhdr *mac_hdr = (struct ethhdr *)skb->data; 302 struct opa_vnic_stats *stats = &vinfo->stats[q_idx]; 303 struct opa_vnic_grp_stats *rx_grp = &stats->rx_grp; 304 u16 vlan_tci; 305 306 stats->netstats.rx_packets++; 307 stats->netstats.rx_bytes += skb->len + ETH_FCS_LEN; 308 309 update_len_counters(rx_grp, skb->len); 310 311 /* rest of the counts are for good packets only */ 312 if (unlikely(err)) 313 return; 314 315 if (is_multicast_ether_addr(mac_hdr->h_dest)) 316 rx_grp->mcastbcast++; 317 else 318 rx_grp->unicast++; 319 320 if (!__vlan_get_tag(skb, &vlan_tci)) 321 rx_grp->vlan++; 322 else 323 rx_grp->untagged++; 324 } 325 326 /* This function is overloaded for opa_vnic specific implementation */ 327 static void hfi1_vnic_get_stats64(struct net_device *netdev, 328 struct rtnl_link_stats64 *stats) 329 { 330 struct opa_vnic_stats *vstats = (struct opa_vnic_stats *)stats; 331 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); 332 333 hfi1_vnic_update_stats(vinfo, vstats); 334 } 335 336 static u64 create_bypass_pbc(u32 vl, u32 dw_len) 337 { 338 u64 pbc; 339 340 pbc = ((u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT) 341 | PBC_INSERT_BYPASS_ICRC | PBC_CREDIT_RETURN 342 | PBC_PACKET_BYPASS 343 | ((vl & PBC_VL_MASK) << PBC_VL_SHIFT) 344 | (dw_len & PBC_LENGTH_DWS_MASK) << PBC_LENGTH_DWS_SHIFT; 345 346 return pbc; 347 } 348 349 /* hfi1_vnic_maybe_stop_tx - stop tx queue if required */ 350 static void hfi1_vnic_maybe_stop_tx(struct hfi1_vnic_vport_info *vinfo, 351 u8 q_idx) 352 { 353 netif_stop_subqueue(vinfo->netdev, q_idx); 354 if (!hfi1_vnic_sdma_write_avail(vinfo, q_idx)) 355 return; 356 357 netif_start_subqueue(vinfo->netdev, q_idx); 358 } 359 360 static netdev_tx_t hfi1_netdev_start_xmit(struct sk_buff *skb, 361 struct net_device *netdev) 362 { 363 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); 364 u8 pad_len, q_idx = skb->queue_mapping; 365 struct hfi1_devdata *dd = vinfo->dd; 366 struct opa_vnic_skb_mdata *mdata; 367 u32 pkt_len, total_len; 368 int err = -EINVAL; 369 u64 pbc; 370 371 v_dbg("xmit: queue %d skb len %d\n", q_idx, skb->len); 372 if (unlikely(!netif_oper_up(netdev))) { 373 vinfo->stats[q_idx].tx_drop_state++; 374 goto tx_finish; 375 } 376 377 /* take out meta data */ 378 mdata = (struct opa_vnic_skb_mdata *)skb->data; 379 skb_pull(skb, sizeof(*mdata)); 380 if (unlikely(mdata->flags & OPA_VNIC_SKB_MDATA_ENCAP_ERR)) { 381 vinfo->stats[q_idx].tx_dlid_zero++; 382 goto tx_finish; 383 } 384 385 /* add tail padding (for 8 bytes size alignment) and icrc */ 386 pad_len = -(skb->len + OPA_VNIC_ICRC_TAIL_LEN) & 0x7; 387 pad_len += OPA_VNIC_ICRC_TAIL_LEN; 388 389 /* 390 * pkt_len is how much data we have to write, includes header and data. 391 * total_len is length of the packet in Dwords plus the PBC should not 392 * include the CRC. 393 */ 394 pkt_len = (skb->len + pad_len) >> 2; 395 total_len = pkt_len + 2; /* PBC + packet */ 396 397 pbc = create_bypass_pbc(mdata->vl, total_len); 398 399 skb_get(skb); 400 v_dbg("pbc 0x%016llX len %d pad_len %d\n", pbc, skb->len, pad_len); 401 err = dd->process_vnic_dma_send(dd, q_idx, vinfo, skb, pbc, pad_len); 402 if (unlikely(err)) { 403 if (err == -ENOMEM) 404 vinfo->stats[q_idx].netstats.tx_fifo_errors++; 405 else if (err != -EBUSY) 406 vinfo->stats[q_idx].netstats.tx_carrier_errors++; 407 } 408 /* remove the header before updating tx counters */ 409 skb_pull(skb, OPA_VNIC_HDR_LEN); 410 411 if (unlikely(err == -EBUSY)) { 412 hfi1_vnic_maybe_stop_tx(vinfo, q_idx); 413 dev_kfree_skb_any(skb); 414 return NETDEV_TX_BUSY; 415 } 416 417 tx_finish: 418 /* update tx counters */ 419 hfi1_vnic_update_tx_counters(vinfo, q_idx, skb, err); 420 dev_kfree_skb_any(skb); 421 return NETDEV_TX_OK; 422 } 423 424 static u16 hfi1_vnic_select_queue(struct net_device *netdev, 425 struct sk_buff *skb, 426 struct net_device *sb_dev, 427 select_queue_fallback_t fallback) 428 { 429 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); 430 struct opa_vnic_skb_mdata *mdata; 431 struct sdma_engine *sde; 432 433 mdata = (struct opa_vnic_skb_mdata *)skb->data; 434 sde = sdma_select_engine_vl(vinfo->dd, mdata->entropy, mdata->vl); 435 return sde->this_idx; 436 } 437 438 /* hfi1_vnic_decap_skb - strip OPA header from the skb (ethernet) packet */ 439 static inline int hfi1_vnic_decap_skb(struct hfi1_vnic_rx_queue *rxq, 440 struct sk_buff *skb) 441 { 442 struct hfi1_vnic_vport_info *vinfo = rxq->vinfo; 443 int max_len = vinfo->netdev->mtu + VLAN_ETH_HLEN; 444 int rc = -EFAULT; 445 446 skb_pull(skb, OPA_VNIC_HDR_LEN); 447 448 /* Validate Packet length */ 449 if (unlikely(skb->len > max_len)) 450 vinfo->stats[rxq->idx].rx_oversize++; 451 else if (unlikely(skb->len < ETH_ZLEN)) 452 vinfo->stats[rxq->idx].rx_runt++; 453 else 454 rc = 0; 455 return rc; 456 } 457 458 static inline struct sk_buff *hfi1_vnic_get_skb(struct hfi1_vnic_rx_queue *rxq) 459 { 460 unsigned char *pad_info; 461 struct sk_buff *skb; 462 463 skb = skb_dequeue(&rxq->skbq); 464 if (unlikely(!skb)) 465 return NULL; 466 467 /* remove tail padding and icrc */ 468 pad_info = skb->data + skb->len - 1; 469 skb_trim(skb, (skb->len - OPA_VNIC_ICRC_TAIL_LEN - 470 ((*pad_info) & 0x7))); 471 472 return skb; 473 } 474 475 /* hfi1_vnic_handle_rx - handle skb receive */ 476 static void hfi1_vnic_handle_rx(struct hfi1_vnic_rx_queue *rxq, 477 int *work_done, int work_to_do) 478 { 479 struct hfi1_vnic_vport_info *vinfo = rxq->vinfo; 480 struct sk_buff *skb; 481 int rc; 482 483 while (1) { 484 if (*work_done >= work_to_do) 485 break; 486 487 skb = hfi1_vnic_get_skb(rxq); 488 if (unlikely(!skb)) 489 break; 490 491 rc = hfi1_vnic_decap_skb(rxq, skb); 492 /* update rx counters */ 493 hfi1_vnic_update_rx_counters(vinfo, rxq->idx, skb, rc); 494 if (unlikely(rc)) { 495 dev_kfree_skb_any(skb); 496 continue; 497 } 498 499 skb_checksum_none_assert(skb); 500 skb->protocol = eth_type_trans(skb, rxq->netdev); 501 502 napi_gro_receive(&rxq->napi, skb); 503 (*work_done)++; 504 } 505 } 506 507 /* hfi1_vnic_napi - napi receive polling callback function */ 508 static int hfi1_vnic_napi(struct napi_struct *napi, int budget) 509 { 510 struct hfi1_vnic_rx_queue *rxq = container_of(napi, 511 struct hfi1_vnic_rx_queue, napi); 512 struct hfi1_vnic_vport_info *vinfo = rxq->vinfo; 513 int work_done = 0; 514 515 v_dbg("napi %d budget %d\n", rxq->idx, budget); 516 hfi1_vnic_handle_rx(rxq, &work_done, budget); 517 518 v_dbg("napi %d work_done %d\n", rxq->idx, work_done); 519 if (work_done < budget) 520 napi_complete(napi); 521 522 return work_done; 523 } 524 525 void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet) 526 { 527 struct hfi1_devdata *dd = packet->rcd->dd; 528 struct hfi1_vnic_vport_info *vinfo = NULL; 529 struct hfi1_vnic_rx_queue *rxq; 530 struct sk_buff *skb; 531 int l4_type, vesw_id = -1; 532 u8 q_idx; 533 534 l4_type = hfi1_16B_get_l4(packet->ebuf); 535 if (likely(l4_type == OPA_16B_L4_ETHR)) { 536 vesw_id = HFI1_VNIC_GET_VESWID(packet->ebuf); 537 vinfo = idr_find(&dd->vnic.vesw_idr, vesw_id); 538 539 /* 540 * In case of invalid vesw id, count the error on 541 * the first available vport. 542 */ 543 if (unlikely(!vinfo)) { 544 struct hfi1_vnic_vport_info *vinfo_tmp; 545 int id_tmp = 0; 546 547 vinfo_tmp = idr_get_next(&dd->vnic.vesw_idr, &id_tmp); 548 if (vinfo_tmp) { 549 spin_lock(&vport_cntr_lock); 550 vinfo_tmp->stats[0].netstats.rx_nohandler++; 551 spin_unlock(&vport_cntr_lock); 552 } 553 } 554 } 555 556 if (unlikely(!vinfo)) { 557 dd_dev_warn(dd, "vnic rcv err: l4 %d vesw id %d ctx %d\n", 558 l4_type, vesw_id, packet->rcd->ctxt); 559 return; 560 } 561 562 q_idx = packet->rcd->vnic_q_idx; 563 rxq = &vinfo->rxq[q_idx]; 564 if (unlikely(!netif_oper_up(vinfo->netdev))) { 565 vinfo->stats[q_idx].rx_drop_state++; 566 skb_queue_purge(&rxq->skbq); 567 return; 568 } 569 570 if (unlikely(skb_queue_len(&rxq->skbq) > HFI1_VNIC_RCV_Q_SIZE)) { 571 vinfo->stats[q_idx].netstats.rx_fifo_errors++; 572 return; 573 } 574 575 skb = netdev_alloc_skb(vinfo->netdev, packet->tlen); 576 if (unlikely(!skb)) { 577 vinfo->stats[q_idx].netstats.rx_fifo_errors++; 578 return; 579 } 580 581 memcpy(skb->data, packet->ebuf, packet->tlen); 582 skb_put(skb, packet->tlen); 583 skb_queue_tail(&rxq->skbq, skb); 584 585 if (napi_schedule_prep(&rxq->napi)) { 586 v_dbg("napi %d scheduling\n", q_idx); 587 __napi_schedule(&rxq->napi); 588 } 589 } 590 591 static int hfi1_vnic_up(struct hfi1_vnic_vport_info *vinfo) 592 { 593 struct hfi1_devdata *dd = vinfo->dd; 594 struct net_device *netdev = vinfo->netdev; 595 int i, rc; 596 597 /* ensure virtual eth switch id is valid */ 598 if (!vinfo->vesw_id) 599 return -EINVAL; 600 601 rc = idr_alloc(&dd->vnic.vesw_idr, vinfo, vinfo->vesw_id, 602 vinfo->vesw_id + 1, GFP_NOWAIT); 603 if (rc < 0) 604 return rc; 605 606 for (i = 0; i < vinfo->num_rx_q; i++) { 607 struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i]; 608 609 skb_queue_head_init(&rxq->skbq); 610 napi_enable(&rxq->napi); 611 } 612 613 netif_carrier_on(netdev); 614 netif_tx_start_all_queues(netdev); 615 set_bit(HFI1_VNIC_UP, &vinfo->flags); 616 617 return 0; 618 } 619 620 static void hfi1_vnic_down(struct hfi1_vnic_vport_info *vinfo) 621 { 622 struct hfi1_devdata *dd = vinfo->dd; 623 u8 i; 624 625 clear_bit(HFI1_VNIC_UP, &vinfo->flags); 626 netif_carrier_off(vinfo->netdev); 627 netif_tx_disable(vinfo->netdev); 628 idr_remove(&dd->vnic.vesw_idr, vinfo->vesw_id); 629 630 /* ensure irqs see the change */ 631 msix_vnic_synchronize_irq(dd); 632 633 /* remove unread skbs */ 634 for (i = 0; i < vinfo->num_rx_q; i++) { 635 struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i]; 636 637 napi_disable(&rxq->napi); 638 skb_queue_purge(&rxq->skbq); 639 } 640 } 641 642 static int hfi1_netdev_open(struct net_device *netdev) 643 { 644 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); 645 int rc; 646 647 mutex_lock(&vinfo->lock); 648 rc = hfi1_vnic_up(vinfo); 649 mutex_unlock(&vinfo->lock); 650 return rc; 651 } 652 653 static int hfi1_netdev_close(struct net_device *netdev) 654 { 655 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); 656 657 mutex_lock(&vinfo->lock); 658 if (test_bit(HFI1_VNIC_UP, &vinfo->flags)) 659 hfi1_vnic_down(vinfo); 660 mutex_unlock(&vinfo->lock); 661 return 0; 662 } 663 664 static int hfi1_vnic_allot_ctxt(struct hfi1_devdata *dd, 665 struct hfi1_ctxtdata **vnic_ctxt) 666 { 667 int rc; 668 669 rc = allocate_vnic_ctxt(dd, vnic_ctxt); 670 if (rc) { 671 dd_dev_err(dd, "vnic ctxt alloc failed %d\n", rc); 672 return rc; 673 } 674 675 rc = setup_vnic_ctxt(dd, *vnic_ctxt); 676 if (rc) { 677 dd_dev_err(dd, "vnic ctxt setup failed %d\n", rc); 678 deallocate_vnic_ctxt(dd, *vnic_ctxt); 679 *vnic_ctxt = NULL; 680 } 681 682 return rc; 683 } 684 685 static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo) 686 { 687 struct hfi1_devdata *dd = vinfo->dd; 688 int i, rc = 0; 689 690 mutex_lock(&hfi1_mutex); 691 if (!dd->vnic.num_vports) { 692 rc = hfi1_vnic_txreq_init(dd); 693 if (rc) 694 goto txreq_fail; 695 } 696 697 for (i = dd->vnic.num_ctxt; i < vinfo->num_rx_q; i++) { 698 rc = hfi1_vnic_allot_ctxt(dd, &dd->vnic.ctxt[i]); 699 if (rc) 700 break; 701 hfi1_rcd_get(dd->vnic.ctxt[i]); 702 dd->vnic.ctxt[i]->vnic_q_idx = i; 703 } 704 705 if (i < vinfo->num_rx_q) { 706 /* 707 * If required amount of contexts is not 708 * allocated successfully then remaining contexts 709 * are released. 710 */ 711 while (i-- > dd->vnic.num_ctxt) { 712 deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]); 713 hfi1_rcd_put(dd->vnic.ctxt[i]); 714 dd->vnic.ctxt[i] = NULL; 715 } 716 goto alloc_fail; 717 } 718 719 if (dd->vnic.num_ctxt != i) { 720 dd->vnic.num_ctxt = i; 721 hfi1_init_vnic_rsm(dd); 722 } 723 724 dd->vnic.num_vports++; 725 hfi1_vnic_sdma_init(vinfo); 726 alloc_fail: 727 if (!dd->vnic.num_vports) 728 hfi1_vnic_txreq_deinit(dd); 729 txreq_fail: 730 mutex_unlock(&hfi1_mutex); 731 return rc; 732 } 733 734 static void hfi1_vnic_deinit(struct hfi1_vnic_vport_info *vinfo) 735 { 736 struct hfi1_devdata *dd = vinfo->dd; 737 int i; 738 739 mutex_lock(&hfi1_mutex); 740 if (--dd->vnic.num_vports == 0) { 741 for (i = 0; i < dd->vnic.num_ctxt; i++) { 742 deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]); 743 hfi1_rcd_put(dd->vnic.ctxt[i]); 744 dd->vnic.ctxt[i] = NULL; 745 } 746 hfi1_deinit_vnic_rsm(dd); 747 dd->vnic.num_ctxt = 0; 748 hfi1_vnic_txreq_deinit(dd); 749 } 750 mutex_unlock(&hfi1_mutex); 751 } 752 753 static void hfi1_vnic_set_vesw_id(struct net_device *netdev, int id) 754 { 755 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); 756 bool reopen = false; 757 758 /* 759 * If vesw_id is being changed, and if the vnic port is up, 760 * reset the vnic port to ensure new vesw_id gets picked up 761 */ 762 if (id != vinfo->vesw_id) { 763 mutex_lock(&vinfo->lock); 764 if (test_bit(HFI1_VNIC_UP, &vinfo->flags)) { 765 hfi1_vnic_down(vinfo); 766 reopen = true; 767 } 768 769 vinfo->vesw_id = id; 770 if (reopen) 771 hfi1_vnic_up(vinfo); 772 773 mutex_unlock(&vinfo->lock); 774 } 775 } 776 777 /* netdev ops */ 778 static const struct net_device_ops hfi1_netdev_ops = { 779 .ndo_open = hfi1_netdev_open, 780 .ndo_stop = hfi1_netdev_close, 781 .ndo_start_xmit = hfi1_netdev_start_xmit, 782 .ndo_select_queue = hfi1_vnic_select_queue, 783 .ndo_get_stats64 = hfi1_vnic_get_stats64, 784 }; 785 786 static void hfi1_vnic_free_rn(struct net_device *netdev) 787 { 788 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); 789 790 hfi1_vnic_deinit(vinfo); 791 mutex_destroy(&vinfo->lock); 792 free_netdev(netdev); 793 } 794 795 struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device, 796 u8 port_num, 797 enum rdma_netdev_t type, 798 const char *name, 799 unsigned char name_assign_type, 800 void (*setup)(struct net_device *)) 801 { 802 struct hfi1_devdata *dd = dd_from_ibdev(device); 803 struct hfi1_vnic_vport_info *vinfo; 804 struct net_device *netdev; 805 struct rdma_netdev *rn; 806 int i, size, rc; 807 808 if (!dd->num_vnic_contexts) 809 return ERR_PTR(-ENOMEM); 810 811 if (!port_num || (port_num > dd->num_pports)) 812 return ERR_PTR(-EINVAL); 813 814 if (type != RDMA_NETDEV_OPA_VNIC) 815 return ERR_PTR(-EOPNOTSUPP); 816 817 size = sizeof(struct opa_vnic_rdma_netdev) + sizeof(*vinfo); 818 netdev = alloc_netdev_mqs(size, name, name_assign_type, setup, 819 dd->num_sdma, dd->num_vnic_contexts); 820 if (!netdev) 821 return ERR_PTR(-ENOMEM); 822 823 rn = netdev_priv(netdev); 824 vinfo = opa_vnic_dev_priv(netdev); 825 vinfo->dd = dd; 826 vinfo->num_tx_q = dd->num_sdma; 827 vinfo->num_rx_q = dd->num_vnic_contexts; 828 vinfo->netdev = netdev; 829 rn->free_rdma_netdev = hfi1_vnic_free_rn; 830 rn->set_id = hfi1_vnic_set_vesw_id; 831 832 netdev->features = NETIF_F_HIGHDMA | NETIF_F_SG; 833 netdev->hw_features = netdev->features; 834 netdev->vlan_features = netdev->features; 835 netdev->watchdog_timeo = msecs_to_jiffies(HFI_TX_TIMEOUT_MS); 836 netdev->netdev_ops = &hfi1_netdev_ops; 837 mutex_init(&vinfo->lock); 838 839 for (i = 0; i < vinfo->num_rx_q; i++) { 840 struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i]; 841 842 rxq->idx = i; 843 rxq->vinfo = vinfo; 844 rxq->netdev = netdev; 845 netif_napi_add(netdev, &rxq->napi, hfi1_vnic_napi, 64); 846 } 847 848 rc = hfi1_vnic_init(vinfo); 849 if (rc) 850 goto init_fail; 851 852 return netdev; 853 init_fail: 854 mutex_destroy(&vinfo->lock); 855 free_netdev(netdev); 856 return ERR_PTR(rc); 857 } 858