1 /* 2 * Copyright(c) 2017 Intel Corporation. 3 * 4 * This file is provided under a dual BSD/GPLv2 license. When using or 5 * redistributing this file, you may do so under either license. 6 * 7 * GPL LICENSE SUMMARY 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of version 2 of the GNU General Public License as 11 * published by the Free Software Foundation. 12 * 13 * This program is distributed in the hope that it will be useful, but 14 * WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * General Public License for more details. 17 * 18 * BSD LICENSE 19 * 20 * Redistribution and use in source and binary forms, with or without 21 * modification, are permitted provided that the following conditions 22 * are met: 23 * 24 * - Redistributions of source code must retain the above copyright 25 * notice, this list of conditions and the following disclaimer. 26 * - Redistributions in binary form must reproduce the above copyright 27 * notice, this list of conditions and the following disclaimer in 28 * the documentation and/or other materials provided with the 29 * distribution. 30 * - Neither the name of Intel Corporation nor the names of its 31 * contributors may be used to endorse or promote products derived 32 * from this software without specific prior written permission. 33 * 34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45 * 46 */ 47 48 /* 49 * This file contains HFI1 support for VNIC functionality 50 */ 51 52 #include <linux/io.h> 53 #include <linux/if_vlan.h> 54 55 #include "vnic.h" 56 57 #define HFI_TX_TIMEOUT_MS 1000 58 59 #define HFI1_VNIC_RCV_Q_SIZE 1024 60 61 #define HFI1_VNIC_UP 0 62 63 static DEFINE_SPINLOCK(vport_cntr_lock); 64 65 static int setup_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt) 66 { 67 unsigned int rcvctrl_ops = 0; 68 int ret; 69 70 hfi1_init_ctxt(uctxt->sc); 71 72 uctxt->do_interrupt = &handle_receive_interrupt; 73 74 /* Now allocate the RcvHdr queue and eager buffers. */ 75 ret = hfi1_create_rcvhdrq(dd, uctxt); 76 if (ret) 77 goto done; 78 79 ret = hfi1_setup_eagerbufs(uctxt); 80 if (ret) 81 goto done; 82 83 if (uctxt->rcvhdrtail_kvaddr) 84 clear_rcvhdrtail(uctxt); 85 86 rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB; 87 rcvctrl_ops |= HFI1_RCVCTRL_INTRAVAIL_ENB; 88 89 if (!HFI1_CAP_KGET_MASK(uctxt->flags, MULTI_PKT_EGR)) 90 rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB; 91 if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_EGR_FULL)) 92 rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB; 93 if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_RHQ_FULL)) 94 rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB; 95 if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL)) 96 rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB; 97 98 hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt); 99 100 uctxt->is_vnic = true; 101 done: 102 return ret; 103 } 104 105 static int allocate_vnic_ctxt(struct hfi1_devdata *dd, 106 struct hfi1_ctxtdata **vnic_ctxt) 107 { 108 struct hfi1_ctxtdata *uctxt; 109 int ret; 110 111 if (dd->flags & HFI1_FROZEN) 112 return -EIO; 113 114 ret = hfi1_create_ctxtdata(dd->pport, dd->node, &uctxt); 115 if (ret < 0) { 116 dd_dev_err(dd, "Unable to create ctxtdata, failing open\n"); 117 return -ENOMEM; 118 } 119 120 uctxt->flags = HFI1_CAP_KGET(MULTI_PKT_EGR) | 121 HFI1_CAP_KGET(NODROP_RHQ_FULL) | 122 HFI1_CAP_KGET(NODROP_EGR_FULL) | 123 HFI1_CAP_KGET(DMA_RTAIL); 124 uctxt->seq_cnt = 1; 125 126 /* Allocate and enable a PIO send context */ 127 uctxt->sc = sc_alloc(dd, SC_VNIC, uctxt->rcvhdrqentsize, 128 uctxt->numa_id); 129 130 ret = uctxt->sc ? 0 : -ENOMEM; 131 if (ret) 132 goto bail; 133 134 dd_dev_dbg(dd, "allocated vnic send context %u(%u)\n", 135 uctxt->sc->sw_index, uctxt->sc->hw_context); 136 ret = sc_enable(uctxt->sc); 137 if (ret) 138 goto bail; 139 140 if (dd->num_msix_entries) 141 hfi1_set_vnic_msix_info(uctxt); 142 143 hfi1_stats.sps_ctxts++; 144 dd_dev_dbg(dd, "created vnic context %d\n", uctxt->ctxt); 145 *vnic_ctxt = uctxt; 146 147 return ret; 148 bail: 149 hfi1_free_ctxt(uctxt); 150 dd_dev_dbg(dd, "vnic allocation failed. rc %d\n", ret); 151 return ret; 152 } 153 154 static void deallocate_vnic_ctxt(struct hfi1_devdata *dd, 155 struct hfi1_ctxtdata *uctxt) 156 { 157 dd_dev_dbg(dd, "closing vnic context %d\n", uctxt->ctxt); 158 flush_wc(); 159 160 if (dd->num_msix_entries) 161 hfi1_reset_vnic_msix_info(uctxt); 162 163 /* 164 * Disable receive context and interrupt available, reset all 165 * RcvCtxtCtrl bits to default values. 166 */ 167 hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS | 168 HFI1_RCVCTRL_TIDFLOW_DIS | 169 HFI1_RCVCTRL_INTRAVAIL_DIS | 170 HFI1_RCVCTRL_ONE_PKT_EGR_DIS | 171 HFI1_RCVCTRL_NO_RHQ_DROP_DIS | 172 HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt); 173 /* 174 * VNIC contexts are allocated from user context pool. 175 * Release them back to user context pool. 176 * 177 * Reset context integrity checks to default. 178 * (writes to CSRs probably belong in chip.c) 179 */ 180 write_kctxt_csr(dd, uctxt->sc->hw_context, SEND_CTXT_CHECK_ENABLE, 181 hfi1_pkt_default_send_ctxt_mask(dd, SC_USER)); 182 sc_disable(uctxt->sc); 183 184 dd->send_contexts[uctxt->sc->sw_index].type = SC_USER; 185 186 uctxt->event_flags = 0; 187 188 hfi1_clear_tids(uctxt); 189 hfi1_clear_ctxt_pkey(dd, uctxt); 190 191 hfi1_stats.sps_ctxts--; 192 193 hfi1_free_ctxt(uctxt); 194 } 195 196 void hfi1_vnic_setup(struct hfi1_devdata *dd) 197 { 198 idr_init(&dd->vnic.vesw_idr); 199 } 200 201 void hfi1_vnic_cleanup(struct hfi1_devdata *dd) 202 { 203 idr_destroy(&dd->vnic.vesw_idr); 204 } 205 206 #define SUM_GRP_COUNTERS(stats, qstats, x_grp) do { \ 207 u64 *src64, *dst64; \ 208 for (src64 = &qstats->x_grp.unicast, \ 209 dst64 = &stats->x_grp.unicast; \ 210 dst64 <= &stats->x_grp.s_1519_max;) { \ 211 *dst64++ += *src64++; \ 212 } \ 213 } while (0) 214 215 /* hfi1_vnic_update_stats - update statistics */ 216 static void hfi1_vnic_update_stats(struct hfi1_vnic_vport_info *vinfo, 217 struct opa_vnic_stats *stats) 218 { 219 struct net_device *netdev = vinfo->netdev; 220 u8 i; 221 222 /* add tx counters on different queues */ 223 for (i = 0; i < vinfo->num_tx_q; i++) { 224 struct opa_vnic_stats *qstats = &vinfo->stats[i]; 225 struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats; 226 227 stats->netstats.tx_fifo_errors += qnstats->tx_fifo_errors; 228 stats->netstats.tx_carrier_errors += qnstats->tx_carrier_errors; 229 stats->tx_drop_state += qstats->tx_drop_state; 230 stats->tx_dlid_zero += qstats->tx_dlid_zero; 231 232 SUM_GRP_COUNTERS(stats, qstats, tx_grp); 233 stats->netstats.tx_packets += qnstats->tx_packets; 234 stats->netstats.tx_bytes += qnstats->tx_bytes; 235 } 236 237 /* add rx counters on different queues */ 238 for (i = 0; i < vinfo->num_rx_q; i++) { 239 struct opa_vnic_stats *qstats = &vinfo->stats[i]; 240 struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats; 241 242 stats->netstats.rx_fifo_errors += qnstats->rx_fifo_errors; 243 stats->netstats.rx_nohandler += qnstats->rx_nohandler; 244 stats->rx_drop_state += qstats->rx_drop_state; 245 stats->rx_oversize += qstats->rx_oversize; 246 stats->rx_runt += qstats->rx_runt; 247 248 SUM_GRP_COUNTERS(stats, qstats, rx_grp); 249 stats->netstats.rx_packets += qnstats->rx_packets; 250 stats->netstats.rx_bytes += qnstats->rx_bytes; 251 } 252 253 stats->netstats.tx_errors = stats->netstats.tx_fifo_errors + 254 stats->netstats.tx_carrier_errors + 255 stats->tx_drop_state + stats->tx_dlid_zero; 256 stats->netstats.tx_dropped = stats->netstats.tx_errors; 257 258 stats->netstats.rx_errors = stats->netstats.rx_fifo_errors + 259 stats->netstats.rx_nohandler + 260 stats->rx_drop_state + stats->rx_oversize + 261 stats->rx_runt; 262 stats->netstats.rx_dropped = stats->netstats.rx_errors; 263 264 netdev->stats.tx_packets = stats->netstats.tx_packets; 265 netdev->stats.tx_bytes = stats->netstats.tx_bytes; 266 netdev->stats.tx_fifo_errors = stats->netstats.tx_fifo_errors; 267 netdev->stats.tx_carrier_errors = stats->netstats.tx_carrier_errors; 268 netdev->stats.tx_errors = stats->netstats.tx_errors; 269 netdev->stats.tx_dropped = stats->netstats.tx_dropped; 270 271 netdev->stats.rx_packets = stats->netstats.rx_packets; 272 netdev->stats.rx_bytes = stats->netstats.rx_bytes; 273 netdev->stats.rx_fifo_errors = stats->netstats.rx_fifo_errors; 274 netdev->stats.multicast = stats->rx_grp.mcastbcast; 275 netdev->stats.rx_length_errors = stats->rx_oversize + stats->rx_runt; 276 netdev->stats.rx_errors = stats->netstats.rx_errors; 277 netdev->stats.rx_dropped = stats->netstats.rx_dropped; 278 } 279 280 /* update_len_counters - update pkt's len histogram counters */ 281 static inline void update_len_counters(struct opa_vnic_grp_stats *grp, 282 int len) 283 { 284 /* account for 4 byte FCS */ 285 if (len >= 1515) 286 grp->s_1519_max++; 287 else if (len >= 1020) 288 grp->s_1024_1518++; 289 else if (len >= 508) 290 grp->s_512_1023++; 291 else if (len >= 252) 292 grp->s_256_511++; 293 else if (len >= 124) 294 grp->s_128_255++; 295 else if (len >= 61) 296 grp->s_65_127++; 297 else 298 grp->s_64++; 299 } 300 301 /* hfi1_vnic_update_tx_counters - update transmit counters */ 302 static void hfi1_vnic_update_tx_counters(struct hfi1_vnic_vport_info *vinfo, 303 u8 q_idx, struct sk_buff *skb, int err) 304 { 305 struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb); 306 struct opa_vnic_stats *stats = &vinfo->stats[q_idx]; 307 struct opa_vnic_grp_stats *tx_grp = &stats->tx_grp; 308 u16 vlan_tci; 309 310 stats->netstats.tx_packets++; 311 stats->netstats.tx_bytes += skb->len + ETH_FCS_LEN; 312 313 update_len_counters(tx_grp, skb->len); 314 315 /* rest of the counts are for good packets only */ 316 if (unlikely(err)) 317 return; 318 319 if (is_multicast_ether_addr(mac_hdr->h_dest)) 320 tx_grp->mcastbcast++; 321 else 322 tx_grp->unicast++; 323 324 if (!__vlan_get_tag(skb, &vlan_tci)) 325 tx_grp->vlan++; 326 else 327 tx_grp->untagged++; 328 } 329 330 /* hfi1_vnic_update_rx_counters - update receive counters */ 331 static void hfi1_vnic_update_rx_counters(struct hfi1_vnic_vport_info *vinfo, 332 u8 q_idx, struct sk_buff *skb, int err) 333 { 334 struct ethhdr *mac_hdr = (struct ethhdr *)skb->data; 335 struct opa_vnic_stats *stats = &vinfo->stats[q_idx]; 336 struct opa_vnic_grp_stats *rx_grp = &stats->rx_grp; 337 u16 vlan_tci; 338 339 stats->netstats.rx_packets++; 340 stats->netstats.rx_bytes += skb->len + ETH_FCS_LEN; 341 342 update_len_counters(rx_grp, skb->len); 343 344 /* rest of the counts are for good packets only */ 345 if (unlikely(err)) 346 return; 347 348 if (is_multicast_ether_addr(mac_hdr->h_dest)) 349 rx_grp->mcastbcast++; 350 else 351 rx_grp->unicast++; 352 353 if (!__vlan_get_tag(skb, &vlan_tci)) 354 rx_grp->vlan++; 355 else 356 rx_grp->untagged++; 357 } 358 359 /* This function is overloaded for opa_vnic specific implementation */ 360 static void hfi1_vnic_get_stats64(struct net_device *netdev, 361 struct rtnl_link_stats64 *stats) 362 { 363 struct opa_vnic_stats *vstats = (struct opa_vnic_stats *)stats; 364 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); 365 366 hfi1_vnic_update_stats(vinfo, vstats); 367 } 368 369 static u64 create_bypass_pbc(u32 vl, u32 dw_len) 370 { 371 u64 pbc; 372 373 pbc = ((u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT) 374 | PBC_INSERT_BYPASS_ICRC | PBC_CREDIT_RETURN 375 | PBC_PACKET_BYPASS 376 | ((vl & PBC_VL_MASK) << PBC_VL_SHIFT) 377 | (dw_len & PBC_LENGTH_DWS_MASK) << PBC_LENGTH_DWS_SHIFT; 378 379 return pbc; 380 } 381 382 /* hfi1_vnic_maybe_stop_tx - stop tx queue if required */ 383 static void hfi1_vnic_maybe_stop_tx(struct hfi1_vnic_vport_info *vinfo, 384 u8 q_idx) 385 { 386 netif_stop_subqueue(vinfo->netdev, q_idx); 387 if (!hfi1_vnic_sdma_write_avail(vinfo, q_idx)) 388 return; 389 390 netif_start_subqueue(vinfo->netdev, q_idx); 391 } 392 393 static netdev_tx_t hfi1_netdev_start_xmit(struct sk_buff *skb, 394 struct net_device *netdev) 395 { 396 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); 397 u8 pad_len, q_idx = skb->queue_mapping; 398 struct hfi1_devdata *dd = vinfo->dd; 399 struct opa_vnic_skb_mdata *mdata; 400 u32 pkt_len, total_len; 401 int err = -EINVAL; 402 u64 pbc; 403 404 v_dbg("xmit: queue %d skb len %d\n", q_idx, skb->len); 405 if (unlikely(!netif_oper_up(netdev))) { 406 vinfo->stats[q_idx].tx_drop_state++; 407 goto tx_finish; 408 } 409 410 /* take out meta data */ 411 mdata = (struct opa_vnic_skb_mdata *)skb->data; 412 skb_pull(skb, sizeof(*mdata)); 413 if (unlikely(mdata->flags & OPA_VNIC_SKB_MDATA_ENCAP_ERR)) { 414 vinfo->stats[q_idx].tx_dlid_zero++; 415 goto tx_finish; 416 } 417 418 /* add tail padding (for 8 bytes size alignment) and icrc */ 419 pad_len = -(skb->len + OPA_VNIC_ICRC_TAIL_LEN) & 0x7; 420 pad_len += OPA_VNIC_ICRC_TAIL_LEN; 421 422 /* 423 * pkt_len is how much data we have to write, includes header and data. 424 * total_len is length of the packet in Dwords plus the PBC should not 425 * include the CRC. 426 */ 427 pkt_len = (skb->len + pad_len) >> 2; 428 total_len = pkt_len + 2; /* PBC + packet */ 429 430 pbc = create_bypass_pbc(mdata->vl, total_len); 431 432 skb_get(skb); 433 v_dbg("pbc 0x%016llX len %d pad_len %d\n", pbc, skb->len, pad_len); 434 err = dd->process_vnic_dma_send(dd, q_idx, vinfo, skb, pbc, pad_len); 435 if (unlikely(err)) { 436 if (err == -ENOMEM) 437 vinfo->stats[q_idx].netstats.tx_fifo_errors++; 438 else if (err != -EBUSY) 439 vinfo->stats[q_idx].netstats.tx_carrier_errors++; 440 } 441 /* remove the header before updating tx counters */ 442 skb_pull(skb, OPA_VNIC_HDR_LEN); 443 444 if (unlikely(err == -EBUSY)) { 445 hfi1_vnic_maybe_stop_tx(vinfo, q_idx); 446 dev_kfree_skb_any(skb); 447 return NETDEV_TX_BUSY; 448 } 449 450 tx_finish: 451 /* update tx counters */ 452 hfi1_vnic_update_tx_counters(vinfo, q_idx, skb, err); 453 dev_kfree_skb_any(skb); 454 return NETDEV_TX_OK; 455 } 456 457 static u16 hfi1_vnic_select_queue(struct net_device *netdev, 458 struct sk_buff *skb, 459 void *accel_priv, 460 select_queue_fallback_t fallback) 461 { 462 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); 463 struct opa_vnic_skb_mdata *mdata; 464 struct sdma_engine *sde; 465 466 mdata = (struct opa_vnic_skb_mdata *)skb->data; 467 sde = sdma_select_engine_vl(vinfo->dd, mdata->entropy, mdata->vl); 468 return sde->this_idx; 469 } 470 471 /* hfi1_vnic_decap_skb - strip OPA header from the skb (ethernet) packet */ 472 static inline int hfi1_vnic_decap_skb(struct hfi1_vnic_rx_queue *rxq, 473 struct sk_buff *skb) 474 { 475 struct hfi1_vnic_vport_info *vinfo = rxq->vinfo; 476 int max_len = vinfo->netdev->mtu + VLAN_ETH_HLEN; 477 int rc = -EFAULT; 478 479 skb_pull(skb, OPA_VNIC_HDR_LEN); 480 481 /* Validate Packet length */ 482 if (unlikely(skb->len > max_len)) 483 vinfo->stats[rxq->idx].rx_oversize++; 484 else if (unlikely(skb->len < ETH_ZLEN)) 485 vinfo->stats[rxq->idx].rx_runt++; 486 else 487 rc = 0; 488 return rc; 489 } 490 491 static inline struct sk_buff *hfi1_vnic_get_skb(struct hfi1_vnic_rx_queue *rxq) 492 { 493 unsigned char *pad_info; 494 struct sk_buff *skb; 495 496 skb = skb_dequeue(&rxq->skbq); 497 if (unlikely(!skb)) 498 return NULL; 499 500 /* remove tail padding and icrc */ 501 pad_info = skb->data + skb->len - 1; 502 skb_trim(skb, (skb->len - OPA_VNIC_ICRC_TAIL_LEN - 503 ((*pad_info) & 0x7))); 504 505 return skb; 506 } 507 508 /* hfi1_vnic_handle_rx - handle skb receive */ 509 static void hfi1_vnic_handle_rx(struct hfi1_vnic_rx_queue *rxq, 510 int *work_done, int work_to_do) 511 { 512 struct hfi1_vnic_vport_info *vinfo = rxq->vinfo; 513 struct sk_buff *skb; 514 int rc; 515 516 while (1) { 517 if (*work_done >= work_to_do) 518 break; 519 520 skb = hfi1_vnic_get_skb(rxq); 521 if (unlikely(!skb)) 522 break; 523 524 rc = hfi1_vnic_decap_skb(rxq, skb); 525 /* update rx counters */ 526 hfi1_vnic_update_rx_counters(vinfo, rxq->idx, skb, rc); 527 if (unlikely(rc)) { 528 dev_kfree_skb_any(skb); 529 continue; 530 } 531 532 skb_checksum_none_assert(skb); 533 skb->protocol = eth_type_trans(skb, rxq->netdev); 534 535 napi_gro_receive(&rxq->napi, skb); 536 (*work_done)++; 537 } 538 } 539 540 /* hfi1_vnic_napi - napi receive polling callback function */ 541 static int hfi1_vnic_napi(struct napi_struct *napi, int budget) 542 { 543 struct hfi1_vnic_rx_queue *rxq = container_of(napi, 544 struct hfi1_vnic_rx_queue, napi); 545 struct hfi1_vnic_vport_info *vinfo = rxq->vinfo; 546 int work_done = 0; 547 548 v_dbg("napi %d budget %d\n", rxq->idx, budget); 549 hfi1_vnic_handle_rx(rxq, &work_done, budget); 550 551 v_dbg("napi %d work_done %d\n", rxq->idx, work_done); 552 if (work_done < budget) 553 napi_complete(napi); 554 555 return work_done; 556 } 557 558 void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet) 559 { 560 struct hfi1_devdata *dd = packet->rcd->dd; 561 struct hfi1_vnic_vport_info *vinfo = NULL; 562 struct hfi1_vnic_rx_queue *rxq; 563 struct sk_buff *skb; 564 int l4_type, vesw_id = -1; 565 u8 q_idx; 566 567 l4_type = hfi1_16B_get_l4(packet->ebuf); 568 if (likely(l4_type == OPA_16B_L4_ETHR)) { 569 vesw_id = HFI1_VNIC_GET_VESWID(packet->ebuf); 570 vinfo = idr_find(&dd->vnic.vesw_idr, vesw_id); 571 572 /* 573 * In case of invalid vesw id, count the error on 574 * the first available vport. 575 */ 576 if (unlikely(!vinfo)) { 577 struct hfi1_vnic_vport_info *vinfo_tmp; 578 int id_tmp = 0; 579 580 vinfo_tmp = idr_get_next(&dd->vnic.vesw_idr, &id_tmp); 581 if (vinfo_tmp) { 582 spin_lock(&vport_cntr_lock); 583 vinfo_tmp->stats[0].netstats.rx_nohandler++; 584 spin_unlock(&vport_cntr_lock); 585 } 586 } 587 } 588 589 if (unlikely(!vinfo)) { 590 dd_dev_warn(dd, "vnic rcv err: l4 %d vesw id %d ctx %d\n", 591 l4_type, vesw_id, packet->rcd->ctxt); 592 return; 593 } 594 595 q_idx = packet->rcd->vnic_q_idx; 596 rxq = &vinfo->rxq[q_idx]; 597 if (unlikely(!netif_oper_up(vinfo->netdev))) { 598 vinfo->stats[q_idx].rx_drop_state++; 599 skb_queue_purge(&rxq->skbq); 600 return; 601 } 602 603 if (unlikely(skb_queue_len(&rxq->skbq) > HFI1_VNIC_RCV_Q_SIZE)) { 604 vinfo->stats[q_idx].netstats.rx_fifo_errors++; 605 return; 606 } 607 608 skb = netdev_alloc_skb(vinfo->netdev, packet->tlen); 609 if (unlikely(!skb)) { 610 vinfo->stats[q_idx].netstats.rx_fifo_errors++; 611 return; 612 } 613 614 memcpy(skb->data, packet->ebuf, packet->tlen); 615 skb_put(skb, packet->tlen); 616 skb_queue_tail(&rxq->skbq, skb); 617 618 if (napi_schedule_prep(&rxq->napi)) { 619 v_dbg("napi %d scheduling\n", q_idx); 620 __napi_schedule(&rxq->napi); 621 } 622 } 623 624 static int hfi1_vnic_up(struct hfi1_vnic_vport_info *vinfo) 625 { 626 struct hfi1_devdata *dd = vinfo->dd; 627 struct net_device *netdev = vinfo->netdev; 628 int i, rc; 629 630 /* ensure virtual eth switch id is valid */ 631 if (!vinfo->vesw_id) 632 return -EINVAL; 633 634 rc = idr_alloc(&dd->vnic.vesw_idr, vinfo, vinfo->vesw_id, 635 vinfo->vesw_id + 1, GFP_NOWAIT); 636 if (rc < 0) 637 return rc; 638 639 for (i = 0; i < vinfo->num_rx_q; i++) { 640 struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i]; 641 642 skb_queue_head_init(&rxq->skbq); 643 napi_enable(&rxq->napi); 644 } 645 646 netif_carrier_on(netdev); 647 netif_tx_start_all_queues(netdev); 648 set_bit(HFI1_VNIC_UP, &vinfo->flags); 649 650 return 0; 651 } 652 653 static void hfi1_vnic_down(struct hfi1_vnic_vport_info *vinfo) 654 { 655 struct hfi1_devdata *dd = vinfo->dd; 656 u8 i; 657 658 clear_bit(HFI1_VNIC_UP, &vinfo->flags); 659 netif_carrier_off(vinfo->netdev); 660 netif_tx_disable(vinfo->netdev); 661 idr_remove(&dd->vnic.vesw_idr, vinfo->vesw_id); 662 663 /* ensure irqs see the change */ 664 hfi1_vnic_synchronize_irq(dd); 665 666 /* remove unread skbs */ 667 for (i = 0; i < vinfo->num_rx_q; i++) { 668 struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i]; 669 670 napi_disable(&rxq->napi); 671 skb_queue_purge(&rxq->skbq); 672 } 673 } 674 675 static int hfi1_netdev_open(struct net_device *netdev) 676 { 677 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); 678 int rc; 679 680 mutex_lock(&vinfo->lock); 681 rc = hfi1_vnic_up(vinfo); 682 mutex_unlock(&vinfo->lock); 683 return rc; 684 } 685 686 static int hfi1_netdev_close(struct net_device *netdev) 687 { 688 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); 689 690 mutex_lock(&vinfo->lock); 691 if (test_bit(HFI1_VNIC_UP, &vinfo->flags)) 692 hfi1_vnic_down(vinfo); 693 mutex_unlock(&vinfo->lock); 694 return 0; 695 } 696 697 static int hfi1_vnic_allot_ctxt(struct hfi1_devdata *dd, 698 struct hfi1_ctxtdata **vnic_ctxt) 699 { 700 int rc; 701 702 rc = allocate_vnic_ctxt(dd, vnic_ctxt); 703 if (rc) { 704 dd_dev_err(dd, "vnic ctxt alloc failed %d\n", rc); 705 return rc; 706 } 707 708 rc = setup_vnic_ctxt(dd, *vnic_ctxt); 709 if (rc) { 710 dd_dev_err(dd, "vnic ctxt setup failed %d\n", rc); 711 deallocate_vnic_ctxt(dd, *vnic_ctxt); 712 *vnic_ctxt = NULL; 713 } 714 715 return rc; 716 } 717 718 static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo) 719 { 720 struct hfi1_devdata *dd = vinfo->dd; 721 int i, rc = 0; 722 723 mutex_lock(&hfi1_mutex); 724 if (!dd->vnic.num_vports) { 725 rc = hfi1_vnic_txreq_init(dd); 726 if (rc) 727 goto txreq_fail; 728 729 dd->vnic.msix_idx = dd->first_dyn_msix_idx; 730 } 731 732 for (i = dd->vnic.num_ctxt; i < vinfo->num_rx_q; i++) { 733 rc = hfi1_vnic_allot_ctxt(dd, &dd->vnic.ctxt[i]); 734 if (rc) 735 break; 736 hfi1_rcd_get(dd->vnic.ctxt[i]); 737 dd->vnic.ctxt[i]->vnic_q_idx = i; 738 } 739 740 if (i < vinfo->num_rx_q) { 741 /* 742 * If required amount of contexts is not 743 * allocated successfully then remaining contexts 744 * are released. 745 */ 746 while (i-- > dd->vnic.num_ctxt) { 747 deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]); 748 hfi1_rcd_put(dd->vnic.ctxt[i]); 749 dd->vnic.ctxt[i] = NULL; 750 } 751 goto alloc_fail; 752 } 753 754 if (dd->vnic.num_ctxt != i) { 755 dd->vnic.num_ctxt = i; 756 hfi1_init_vnic_rsm(dd); 757 } 758 759 dd->vnic.num_vports++; 760 hfi1_vnic_sdma_init(vinfo); 761 alloc_fail: 762 if (!dd->vnic.num_vports) 763 hfi1_vnic_txreq_deinit(dd); 764 txreq_fail: 765 mutex_unlock(&hfi1_mutex); 766 return rc; 767 } 768 769 static void hfi1_vnic_deinit(struct hfi1_vnic_vport_info *vinfo) 770 { 771 struct hfi1_devdata *dd = vinfo->dd; 772 int i; 773 774 mutex_lock(&hfi1_mutex); 775 if (--dd->vnic.num_vports == 0) { 776 for (i = 0; i < dd->vnic.num_ctxt; i++) { 777 deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]); 778 hfi1_rcd_put(dd->vnic.ctxt[i]); 779 dd->vnic.ctxt[i] = NULL; 780 } 781 hfi1_deinit_vnic_rsm(dd); 782 dd->vnic.num_ctxt = 0; 783 hfi1_vnic_txreq_deinit(dd); 784 } 785 mutex_unlock(&hfi1_mutex); 786 } 787 788 static void hfi1_vnic_set_vesw_id(struct net_device *netdev, int id) 789 { 790 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); 791 bool reopen = false; 792 793 /* 794 * If vesw_id is being changed, and if the vnic port is up, 795 * reset the vnic port to ensure new vesw_id gets picked up 796 */ 797 if (id != vinfo->vesw_id) { 798 mutex_lock(&vinfo->lock); 799 if (test_bit(HFI1_VNIC_UP, &vinfo->flags)) { 800 hfi1_vnic_down(vinfo); 801 reopen = true; 802 } 803 804 vinfo->vesw_id = id; 805 if (reopen) 806 hfi1_vnic_up(vinfo); 807 808 mutex_unlock(&vinfo->lock); 809 } 810 } 811 812 /* netdev ops */ 813 static const struct net_device_ops hfi1_netdev_ops = { 814 .ndo_open = hfi1_netdev_open, 815 .ndo_stop = hfi1_netdev_close, 816 .ndo_start_xmit = hfi1_netdev_start_xmit, 817 .ndo_select_queue = hfi1_vnic_select_queue, 818 .ndo_get_stats64 = hfi1_vnic_get_stats64, 819 }; 820 821 static void hfi1_vnic_free_rn(struct net_device *netdev) 822 { 823 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); 824 825 hfi1_vnic_deinit(vinfo); 826 mutex_destroy(&vinfo->lock); 827 free_netdev(netdev); 828 } 829 830 struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device, 831 u8 port_num, 832 enum rdma_netdev_t type, 833 const char *name, 834 unsigned char name_assign_type, 835 void (*setup)(struct net_device *)) 836 { 837 struct hfi1_devdata *dd = dd_from_ibdev(device); 838 struct hfi1_vnic_vport_info *vinfo; 839 struct net_device *netdev; 840 struct rdma_netdev *rn; 841 int i, size, rc; 842 843 if (!port_num || (port_num > dd->num_pports)) 844 return ERR_PTR(-EINVAL); 845 846 if (type != RDMA_NETDEV_OPA_VNIC) 847 return ERR_PTR(-EOPNOTSUPP); 848 849 size = sizeof(struct opa_vnic_rdma_netdev) + sizeof(*vinfo); 850 netdev = alloc_netdev_mqs(size, name, name_assign_type, setup, 851 dd->chip_sdma_engines, HFI1_NUM_VNIC_CTXT); 852 if (!netdev) 853 return ERR_PTR(-ENOMEM); 854 855 rn = netdev_priv(netdev); 856 vinfo = opa_vnic_dev_priv(netdev); 857 vinfo->dd = dd; 858 vinfo->num_tx_q = dd->chip_sdma_engines; 859 vinfo->num_rx_q = HFI1_NUM_VNIC_CTXT; 860 vinfo->netdev = netdev; 861 rn->free_rdma_netdev = hfi1_vnic_free_rn; 862 rn->set_id = hfi1_vnic_set_vesw_id; 863 864 netdev->features = NETIF_F_HIGHDMA | NETIF_F_SG; 865 netdev->hw_features = netdev->features; 866 netdev->vlan_features = netdev->features; 867 netdev->watchdog_timeo = msecs_to_jiffies(HFI_TX_TIMEOUT_MS); 868 netdev->netdev_ops = &hfi1_netdev_ops; 869 mutex_init(&vinfo->lock); 870 871 for (i = 0; i < vinfo->num_rx_q; i++) { 872 struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i]; 873 874 rxq->idx = i; 875 rxq->vinfo = vinfo; 876 rxq->netdev = netdev; 877 netif_napi_add(netdev, &rxq->napi, hfi1_vnic_napi, 64); 878 } 879 880 rc = hfi1_vnic_init(vinfo); 881 if (rc) 882 goto init_fail; 883 884 return netdev; 885 init_fail: 886 mutex_destroy(&vinfo->lock); 887 free_netdev(netdev); 888 return ERR_PTR(rc); 889 } 890