1 /* 2 * Copyright(c) 2017 Intel Corporation. 3 * 4 * This file is provided under a dual BSD/GPLv2 license. When using or 5 * redistributing this file, you may do so under either license. 6 * 7 * GPL LICENSE SUMMARY 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of version 2 of the GNU General Public License as 11 * published by the Free Software Foundation. 12 * 13 * This program is distributed in the hope that it will be useful, but 14 * WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * General Public License for more details. 17 * 18 * BSD LICENSE 19 * 20 * Redistribution and use in source and binary forms, with or without 21 * modification, are permitted provided that the following conditions 22 * are met: 23 * 24 * - Redistributions of source code must retain the above copyright 25 * notice, this list of conditions and the following disclaimer. 26 * - Redistributions in binary form must reproduce the above copyright 27 * notice, this list of conditions and the following disclaimer in 28 * the documentation and/or other materials provided with the 29 * distribution. 30 * - Neither the name of Intel Corporation nor the names of its 31 * contributors may be used to endorse or promote products derived 32 * from this software without specific prior written permission. 33 * 34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45 * 46 */ 47 48 /* 49 * This file contains HFI1 support for VNIC functionality 50 */ 51 52 #include <linux/io.h> 53 #include <linux/if_vlan.h> 54 55 #include "vnic.h" 56 57 #define HFI_TX_TIMEOUT_MS 1000 58 59 #define HFI1_VNIC_RCV_Q_SIZE 1024 60 61 #define HFI1_VNIC_UP 0 62 63 static DEFINE_SPINLOCK(vport_cntr_lock); 64 65 static int setup_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt) 66 { 67 unsigned int rcvctrl_ops = 0; 68 int ret; 69 70 hfi1_init_ctxt(uctxt->sc); 71 72 uctxt->do_interrupt = &handle_receive_interrupt; 73 74 /* Now allocate the RcvHdr queue and eager buffers. */ 75 ret = hfi1_create_rcvhdrq(dd, uctxt); 76 if (ret) 77 goto done; 78 79 ret = hfi1_setup_eagerbufs(uctxt); 80 if (ret) 81 goto done; 82 83 if (uctxt->rcvhdrtail_kvaddr) 84 clear_rcvhdrtail(uctxt); 85 86 rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB; 87 rcvctrl_ops |= HFI1_RCVCTRL_INTRAVAIL_ENB; 88 89 if (!HFI1_CAP_KGET_MASK(uctxt->flags, MULTI_PKT_EGR)) 90 rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB; 91 if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_EGR_FULL)) 92 rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB; 93 if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_RHQ_FULL)) 94 rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB; 95 if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL)) 96 rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB; 97 98 hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt->ctxt); 99 100 uctxt->is_vnic = true; 101 done: 102 return ret; 103 } 104 105 static int allocate_vnic_ctxt(struct hfi1_devdata *dd, 106 struct hfi1_ctxtdata **vnic_ctxt) 107 { 108 struct hfi1_ctxtdata *uctxt; 109 unsigned int ctxt; 110 int ret; 111 112 if (dd->flags & HFI1_FROZEN) 113 return -EIO; 114 115 for (ctxt = dd->first_dyn_alloc_ctxt; 116 ctxt < dd->num_rcv_contexts; ctxt++) 117 if (!dd->rcd[ctxt]) 118 break; 119 120 if (ctxt == dd->num_rcv_contexts) 121 return -EBUSY; 122 123 uctxt = hfi1_create_ctxtdata(dd->pport, ctxt, dd->node); 124 if (!uctxt) { 125 dd_dev_err(dd, "Unable to create ctxtdata, failing open\n"); 126 return -ENOMEM; 127 } 128 129 uctxt->flags = HFI1_CAP_KGET(MULTI_PKT_EGR) | 130 HFI1_CAP_KGET(NODROP_RHQ_FULL) | 131 HFI1_CAP_KGET(NODROP_EGR_FULL) | 132 HFI1_CAP_KGET(DMA_RTAIL); 133 uctxt->seq_cnt = 1; 134 135 /* Allocate and enable a PIO send context */ 136 uctxt->sc = sc_alloc(dd, SC_VNIC, uctxt->rcvhdrqentsize, 137 uctxt->numa_id); 138 139 ret = uctxt->sc ? 0 : -ENOMEM; 140 if (ret) 141 goto bail; 142 143 dd_dev_dbg(dd, "allocated vnic send context %u(%u)\n", 144 uctxt->sc->sw_index, uctxt->sc->hw_context); 145 ret = sc_enable(uctxt->sc); 146 if (ret) 147 goto bail; 148 149 if (dd->num_msix_entries) 150 hfi1_set_vnic_msix_info(uctxt); 151 152 hfi1_stats.sps_ctxts++; 153 dd_dev_dbg(dd, "created vnic context %d\n", uctxt->ctxt); 154 *vnic_ctxt = uctxt; 155 156 return ret; 157 bail: 158 /* 159 * hfi1_free_ctxtdata() also releases send_context 160 * structure if uctxt->sc is not null 161 */ 162 dd->rcd[uctxt->ctxt] = NULL; 163 hfi1_free_ctxtdata(dd, uctxt); 164 dd_dev_dbg(dd, "vnic allocation failed. rc %d\n", ret); 165 return ret; 166 } 167 168 static void deallocate_vnic_ctxt(struct hfi1_devdata *dd, 169 struct hfi1_ctxtdata *uctxt) 170 { 171 unsigned long flags; 172 173 dd_dev_dbg(dd, "closing vnic context %d\n", uctxt->ctxt); 174 flush_wc(); 175 176 if (dd->num_msix_entries) 177 hfi1_reset_vnic_msix_info(uctxt); 178 179 spin_lock_irqsave(&dd->uctxt_lock, flags); 180 /* 181 * Disable receive context and interrupt available, reset all 182 * RcvCtxtCtrl bits to default values. 183 */ 184 hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS | 185 HFI1_RCVCTRL_TIDFLOW_DIS | 186 HFI1_RCVCTRL_INTRAVAIL_DIS | 187 HFI1_RCVCTRL_ONE_PKT_EGR_DIS | 188 HFI1_RCVCTRL_NO_RHQ_DROP_DIS | 189 HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt->ctxt); 190 /* 191 * VNIC contexts are allocated from user context pool. 192 * Release them back to user context pool. 193 * 194 * Reset context integrity checks to default. 195 * (writes to CSRs probably belong in chip.c) 196 */ 197 write_kctxt_csr(dd, uctxt->sc->hw_context, SEND_CTXT_CHECK_ENABLE, 198 hfi1_pkt_default_send_ctxt_mask(dd, SC_USER)); 199 sc_disable(uctxt->sc); 200 201 dd->send_contexts[uctxt->sc->sw_index].type = SC_USER; 202 spin_unlock_irqrestore(&dd->uctxt_lock, flags); 203 204 dd->rcd[uctxt->ctxt] = NULL; 205 uctxt->event_flags = 0; 206 207 hfi1_clear_tids(uctxt); 208 hfi1_clear_ctxt_pkey(dd, uctxt); 209 210 hfi1_stats.sps_ctxts--; 211 hfi1_free_ctxtdata(dd, uctxt); 212 } 213 214 void hfi1_vnic_setup(struct hfi1_devdata *dd) 215 { 216 idr_init(&dd->vnic.vesw_idr); 217 } 218 219 void hfi1_vnic_cleanup(struct hfi1_devdata *dd) 220 { 221 idr_destroy(&dd->vnic.vesw_idr); 222 } 223 224 #define SUM_GRP_COUNTERS(stats, qstats, x_grp) do { \ 225 u64 *src64, *dst64; \ 226 for (src64 = &qstats->x_grp.unicast, \ 227 dst64 = &stats->x_grp.unicast; \ 228 dst64 <= &stats->x_grp.s_1519_max;) { \ 229 *dst64++ += *src64++; \ 230 } \ 231 } while (0) 232 233 /* hfi1_vnic_update_stats - update statistics */ 234 static void hfi1_vnic_update_stats(struct hfi1_vnic_vport_info *vinfo, 235 struct opa_vnic_stats *stats) 236 { 237 struct net_device *netdev = vinfo->netdev; 238 u8 i; 239 240 /* add tx counters on different queues */ 241 for (i = 0; i < vinfo->num_tx_q; i++) { 242 struct opa_vnic_stats *qstats = &vinfo->stats[i]; 243 struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats; 244 245 stats->netstats.tx_fifo_errors += qnstats->tx_fifo_errors; 246 stats->netstats.tx_carrier_errors += qnstats->tx_carrier_errors; 247 stats->tx_drop_state += qstats->tx_drop_state; 248 stats->tx_dlid_zero += qstats->tx_dlid_zero; 249 250 SUM_GRP_COUNTERS(stats, qstats, tx_grp); 251 stats->netstats.tx_packets += qnstats->tx_packets; 252 stats->netstats.tx_bytes += qnstats->tx_bytes; 253 } 254 255 /* add rx counters on different queues */ 256 for (i = 0; i < vinfo->num_rx_q; i++) { 257 struct opa_vnic_stats *qstats = &vinfo->stats[i]; 258 struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats; 259 260 stats->netstats.rx_fifo_errors += qnstats->rx_fifo_errors; 261 stats->netstats.rx_nohandler += qnstats->rx_nohandler; 262 stats->rx_drop_state += qstats->rx_drop_state; 263 stats->rx_oversize += qstats->rx_oversize; 264 stats->rx_runt += qstats->rx_runt; 265 266 SUM_GRP_COUNTERS(stats, qstats, rx_grp); 267 stats->netstats.rx_packets += qnstats->rx_packets; 268 stats->netstats.rx_bytes += qnstats->rx_bytes; 269 } 270 271 stats->netstats.tx_errors = stats->netstats.tx_fifo_errors + 272 stats->netstats.tx_carrier_errors + 273 stats->tx_drop_state + stats->tx_dlid_zero; 274 stats->netstats.tx_dropped = stats->netstats.tx_errors; 275 276 stats->netstats.rx_errors = stats->netstats.rx_fifo_errors + 277 stats->netstats.rx_nohandler + 278 stats->rx_drop_state + stats->rx_oversize + 279 stats->rx_runt; 280 stats->netstats.rx_dropped = stats->netstats.rx_errors; 281 282 netdev->stats.tx_packets = stats->netstats.tx_packets; 283 netdev->stats.tx_bytes = stats->netstats.tx_bytes; 284 netdev->stats.tx_fifo_errors = stats->netstats.tx_fifo_errors; 285 netdev->stats.tx_carrier_errors = stats->netstats.tx_carrier_errors; 286 netdev->stats.tx_errors = stats->netstats.tx_errors; 287 netdev->stats.tx_dropped = stats->netstats.tx_dropped; 288 289 netdev->stats.rx_packets = stats->netstats.rx_packets; 290 netdev->stats.rx_bytes = stats->netstats.rx_bytes; 291 netdev->stats.rx_fifo_errors = stats->netstats.rx_fifo_errors; 292 netdev->stats.multicast = stats->rx_grp.mcastbcast; 293 netdev->stats.rx_length_errors = stats->rx_oversize + stats->rx_runt; 294 netdev->stats.rx_errors = stats->netstats.rx_errors; 295 netdev->stats.rx_dropped = stats->netstats.rx_dropped; 296 } 297 298 /* update_len_counters - update pkt's len histogram counters */ 299 static inline void update_len_counters(struct opa_vnic_grp_stats *grp, 300 int len) 301 { 302 /* account for 4 byte FCS */ 303 if (len >= 1515) 304 grp->s_1519_max++; 305 else if (len >= 1020) 306 grp->s_1024_1518++; 307 else if (len >= 508) 308 grp->s_512_1023++; 309 else if (len >= 252) 310 grp->s_256_511++; 311 else if (len >= 124) 312 grp->s_128_255++; 313 else if (len >= 61) 314 grp->s_65_127++; 315 else 316 grp->s_64++; 317 } 318 319 /* hfi1_vnic_update_tx_counters - update transmit counters */ 320 static void hfi1_vnic_update_tx_counters(struct hfi1_vnic_vport_info *vinfo, 321 u8 q_idx, struct sk_buff *skb, int err) 322 { 323 struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb); 324 struct opa_vnic_stats *stats = &vinfo->stats[q_idx]; 325 struct opa_vnic_grp_stats *tx_grp = &stats->tx_grp; 326 u16 vlan_tci; 327 328 stats->netstats.tx_packets++; 329 stats->netstats.tx_bytes += skb->len + ETH_FCS_LEN; 330 331 update_len_counters(tx_grp, skb->len); 332 333 /* rest of the counts are for good packets only */ 334 if (unlikely(err)) 335 return; 336 337 if (is_multicast_ether_addr(mac_hdr->h_dest)) 338 tx_grp->mcastbcast++; 339 else 340 tx_grp->unicast++; 341 342 if (!__vlan_get_tag(skb, &vlan_tci)) 343 tx_grp->vlan++; 344 else 345 tx_grp->untagged++; 346 } 347 348 /* hfi1_vnic_update_rx_counters - update receive counters */ 349 static void hfi1_vnic_update_rx_counters(struct hfi1_vnic_vport_info *vinfo, 350 u8 q_idx, struct sk_buff *skb, int err) 351 { 352 struct ethhdr *mac_hdr = (struct ethhdr *)skb->data; 353 struct opa_vnic_stats *stats = &vinfo->stats[q_idx]; 354 struct opa_vnic_grp_stats *rx_grp = &stats->rx_grp; 355 u16 vlan_tci; 356 357 stats->netstats.rx_packets++; 358 stats->netstats.rx_bytes += skb->len + ETH_FCS_LEN; 359 360 update_len_counters(rx_grp, skb->len); 361 362 /* rest of the counts are for good packets only */ 363 if (unlikely(err)) 364 return; 365 366 if (is_multicast_ether_addr(mac_hdr->h_dest)) 367 rx_grp->mcastbcast++; 368 else 369 rx_grp->unicast++; 370 371 if (!__vlan_get_tag(skb, &vlan_tci)) 372 rx_grp->vlan++; 373 else 374 rx_grp->untagged++; 375 } 376 377 /* This function is overloaded for opa_vnic specific implementation */ 378 static void hfi1_vnic_get_stats64(struct net_device *netdev, 379 struct rtnl_link_stats64 *stats) 380 { 381 struct opa_vnic_stats *vstats = (struct opa_vnic_stats *)stats; 382 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); 383 384 hfi1_vnic_update_stats(vinfo, vstats); 385 } 386 387 static u64 create_bypass_pbc(u32 vl, u32 dw_len) 388 { 389 u64 pbc; 390 391 pbc = ((u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT) 392 | PBC_INSERT_BYPASS_ICRC | PBC_CREDIT_RETURN 393 | PBC_PACKET_BYPASS 394 | ((vl & PBC_VL_MASK) << PBC_VL_SHIFT) 395 | (dw_len & PBC_LENGTH_DWS_MASK) << PBC_LENGTH_DWS_SHIFT; 396 397 return pbc; 398 } 399 400 /* hfi1_vnic_maybe_stop_tx - stop tx queue if required */ 401 static void hfi1_vnic_maybe_stop_tx(struct hfi1_vnic_vport_info *vinfo, 402 u8 q_idx) 403 { 404 netif_stop_subqueue(vinfo->netdev, q_idx); 405 if (!hfi1_vnic_sdma_write_avail(vinfo, q_idx)) 406 return; 407 408 netif_start_subqueue(vinfo->netdev, q_idx); 409 } 410 411 static netdev_tx_t hfi1_netdev_start_xmit(struct sk_buff *skb, 412 struct net_device *netdev) 413 { 414 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); 415 u8 pad_len, q_idx = skb->queue_mapping; 416 struct hfi1_devdata *dd = vinfo->dd; 417 struct opa_vnic_skb_mdata *mdata; 418 u32 pkt_len, total_len; 419 int err = -EINVAL; 420 u64 pbc; 421 422 v_dbg("xmit: queue %d skb len %d\n", q_idx, skb->len); 423 if (unlikely(!netif_oper_up(netdev))) { 424 vinfo->stats[q_idx].tx_drop_state++; 425 goto tx_finish; 426 } 427 428 /* take out meta data */ 429 mdata = (struct opa_vnic_skb_mdata *)skb->data; 430 skb_pull(skb, sizeof(*mdata)); 431 if (unlikely(mdata->flags & OPA_VNIC_SKB_MDATA_ENCAP_ERR)) { 432 vinfo->stats[q_idx].tx_dlid_zero++; 433 goto tx_finish; 434 } 435 436 /* add tail padding (for 8 bytes size alignment) and icrc */ 437 pad_len = -(skb->len + OPA_VNIC_ICRC_TAIL_LEN) & 0x7; 438 pad_len += OPA_VNIC_ICRC_TAIL_LEN; 439 440 /* 441 * pkt_len is how much data we have to write, includes header and data. 442 * total_len is length of the packet in Dwords plus the PBC should not 443 * include the CRC. 444 */ 445 pkt_len = (skb->len + pad_len) >> 2; 446 total_len = pkt_len + 2; /* PBC + packet */ 447 448 pbc = create_bypass_pbc(mdata->vl, total_len); 449 450 skb_get(skb); 451 v_dbg("pbc 0x%016llX len %d pad_len %d\n", pbc, skb->len, pad_len); 452 err = dd->process_vnic_dma_send(dd, q_idx, vinfo, skb, pbc, pad_len); 453 if (unlikely(err)) { 454 if (err == -ENOMEM) 455 vinfo->stats[q_idx].netstats.tx_fifo_errors++; 456 else if (err != -EBUSY) 457 vinfo->stats[q_idx].netstats.tx_carrier_errors++; 458 } 459 /* remove the header before updating tx counters */ 460 skb_pull(skb, OPA_VNIC_HDR_LEN); 461 462 if (unlikely(err == -EBUSY)) { 463 hfi1_vnic_maybe_stop_tx(vinfo, q_idx); 464 dev_kfree_skb_any(skb); 465 return NETDEV_TX_BUSY; 466 } 467 468 tx_finish: 469 /* update tx counters */ 470 hfi1_vnic_update_tx_counters(vinfo, q_idx, skb, err); 471 dev_kfree_skb_any(skb); 472 return NETDEV_TX_OK; 473 } 474 475 static u16 hfi1_vnic_select_queue(struct net_device *netdev, 476 struct sk_buff *skb, 477 void *accel_priv, 478 select_queue_fallback_t fallback) 479 { 480 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); 481 struct opa_vnic_skb_mdata *mdata; 482 struct sdma_engine *sde; 483 484 mdata = (struct opa_vnic_skb_mdata *)skb->data; 485 sde = sdma_select_engine_vl(vinfo->dd, mdata->entropy, mdata->vl); 486 return sde->this_idx; 487 } 488 489 /* hfi1_vnic_decap_skb - strip OPA header from the skb (ethernet) packet */ 490 static inline int hfi1_vnic_decap_skb(struct hfi1_vnic_rx_queue *rxq, 491 struct sk_buff *skb) 492 { 493 struct hfi1_vnic_vport_info *vinfo = rxq->vinfo; 494 int max_len = vinfo->netdev->mtu + VLAN_ETH_HLEN; 495 int rc = -EFAULT; 496 497 skb_pull(skb, OPA_VNIC_HDR_LEN); 498 499 /* Validate Packet length */ 500 if (unlikely(skb->len > max_len)) 501 vinfo->stats[rxq->idx].rx_oversize++; 502 else if (unlikely(skb->len < ETH_ZLEN)) 503 vinfo->stats[rxq->idx].rx_runt++; 504 else 505 rc = 0; 506 return rc; 507 } 508 509 static inline struct sk_buff *hfi1_vnic_get_skb(struct hfi1_vnic_rx_queue *rxq) 510 { 511 unsigned char *pad_info; 512 struct sk_buff *skb; 513 514 skb = skb_dequeue(&rxq->skbq); 515 if (unlikely(!skb)) 516 return NULL; 517 518 /* remove tail padding and icrc */ 519 pad_info = skb->data + skb->len - 1; 520 skb_trim(skb, (skb->len - OPA_VNIC_ICRC_TAIL_LEN - 521 ((*pad_info) & 0x7))); 522 523 return skb; 524 } 525 526 /* hfi1_vnic_handle_rx - handle skb receive */ 527 static void hfi1_vnic_handle_rx(struct hfi1_vnic_rx_queue *rxq, 528 int *work_done, int work_to_do) 529 { 530 struct hfi1_vnic_vport_info *vinfo = rxq->vinfo; 531 struct sk_buff *skb; 532 int rc; 533 534 while (1) { 535 if (*work_done >= work_to_do) 536 break; 537 538 skb = hfi1_vnic_get_skb(rxq); 539 if (unlikely(!skb)) 540 break; 541 542 rc = hfi1_vnic_decap_skb(rxq, skb); 543 /* update rx counters */ 544 hfi1_vnic_update_rx_counters(vinfo, rxq->idx, skb, rc); 545 if (unlikely(rc)) { 546 dev_kfree_skb_any(skb); 547 continue; 548 } 549 550 skb_checksum_none_assert(skb); 551 skb->protocol = eth_type_trans(skb, rxq->netdev); 552 553 napi_gro_receive(&rxq->napi, skb); 554 (*work_done)++; 555 } 556 } 557 558 /* hfi1_vnic_napi - napi receive polling callback function */ 559 static int hfi1_vnic_napi(struct napi_struct *napi, int budget) 560 { 561 struct hfi1_vnic_rx_queue *rxq = container_of(napi, 562 struct hfi1_vnic_rx_queue, napi); 563 struct hfi1_vnic_vport_info *vinfo = rxq->vinfo; 564 int work_done = 0; 565 566 v_dbg("napi %d budget %d\n", rxq->idx, budget); 567 hfi1_vnic_handle_rx(rxq, &work_done, budget); 568 569 v_dbg("napi %d work_done %d\n", rxq->idx, work_done); 570 if (work_done < budget) 571 napi_complete(napi); 572 573 return work_done; 574 } 575 576 void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet) 577 { 578 struct hfi1_devdata *dd = packet->rcd->dd; 579 struct hfi1_vnic_vport_info *vinfo = NULL; 580 struct hfi1_vnic_rx_queue *rxq; 581 struct sk_buff *skb; 582 int l4_type, vesw_id = -1; 583 u8 q_idx; 584 585 l4_type = HFI1_GET_L4_TYPE(packet->ebuf); 586 if (likely(l4_type == OPA_VNIC_L4_ETHR)) { 587 vesw_id = HFI1_VNIC_GET_VESWID(packet->ebuf); 588 vinfo = idr_find(&dd->vnic.vesw_idr, vesw_id); 589 590 /* 591 * In case of invalid vesw id, count the error on 592 * the first available vport. 593 */ 594 if (unlikely(!vinfo)) { 595 struct hfi1_vnic_vport_info *vinfo_tmp; 596 int id_tmp = 0; 597 598 vinfo_tmp = idr_get_next(&dd->vnic.vesw_idr, &id_tmp); 599 if (vinfo_tmp) { 600 spin_lock(&vport_cntr_lock); 601 vinfo_tmp->stats[0].netstats.rx_nohandler++; 602 spin_unlock(&vport_cntr_lock); 603 } 604 } 605 } 606 607 if (unlikely(!vinfo)) { 608 dd_dev_warn(dd, "vnic rcv err: l4 %d vesw id %d ctx %d\n", 609 l4_type, vesw_id, packet->rcd->ctxt); 610 return; 611 } 612 613 q_idx = packet->rcd->vnic_q_idx; 614 rxq = &vinfo->rxq[q_idx]; 615 if (unlikely(!netif_oper_up(vinfo->netdev))) { 616 vinfo->stats[q_idx].rx_drop_state++; 617 skb_queue_purge(&rxq->skbq); 618 return; 619 } 620 621 if (unlikely(skb_queue_len(&rxq->skbq) > HFI1_VNIC_RCV_Q_SIZE)) { 622 vinfo->stats[q_idx].netstats.rx_fifo_errors++; 623 return; 624 } 625 626 skb = netdev_alloc_skb(vinfo->netdev, packet->tlen); 627 if (unlikely(!skb)) { 628 vinfo->stats[q_idx].netstats.rx_fifo_errors++; 629 return; 630 } 631 632 memcpy(skb->data, packet->ebuf, packet->tlen); 633 skb_put(skb, packet->tlen); 634 skb_queue_tail(&rxq->skbq, skb); 635 636 if (napi_schedule_prep(&rxq->napi)) { 637 v_dbg("napi %d scheduling\n", q_idx); 638 __napi_schedule(&rxq->napi); 639 } 640 } 641 642 static int hfi1_vnic_up(struct hfi1_vnic_vport_info *vinfo) 643 { 644 struct hfi1_devdata *dd = vinfo->dd; 645 struct net_device *netdev = vinfo->netdev; 646 int i, rc; 647 648 /* ensure virtual eth switch id is valid */ 649 if (!vinfo->vesw_id) 650 return -EINVAL; 651 652 rc = idr_alloc(&dd->vnic.vesw_idr, vinfo, vinfo->vesw_id, 653 vinfo->vesw_id + 1, GFP_NOWAIT); 654 if (rc < 0) 655 return rc; 656 657 for (i = 0; i < vinfo->num_rx_q; i++) { 658 struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i]; 659 660 skb_queue_head_init(&rxq->skbq); 661 napi_enable(&rxq->napi); 662 } 663 664 netif_carrier_on(netdev); 665 netif_tx_start_all_queues(netdev); 666 set_bit(HFI1_VNIC_UP, &vinfo->flags); 667 668 return 0; 669 } 670 671 static void hfi1_vnic_down(struct hfi1_vnic_vport_info *vinfo) 672 { 673 struct hfi1_devdata *dd = vinfo->dd; 674 u8 i; 675 676 clear_bit(HFI1_VNIC_UP, &vinfo->flags); 677 netif_carrier_off(vinfo->netdev); 678 netif_tx_disable(vinfo->netdev); 679 idr_remove(&dd->vnic.vesw_idr, vinfo->vesw_id); 680 681 /* ensure irqs see the change */ 682 hfi1_vnic_synchronize_irq(dd); 683 684 /* remove unread skbs */ 685 for (i = 0; i < vinfo->num_rx_q; i++) { 686 struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i]; 687 688 napi_disable(&rxq->napi); 689 skb_queue_purge(&rxq->skbq); 690 } 691 } 692 693 static int hfi1_netdev_open(struct net_device *netdev) 694 { 695 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); 696 int rc; 697 698 mutex_lock(&vinfo->lock); 699 rc = hfi1_vnic_up(vinfo); 700 mutex_unlock(&vinfo->lock); 701 return rc; 702 } 703 704 static int hfi1_netdev_close(struct net_device *netdev) 705 { 706 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); 707 708 mutex_lock(&vinfo->lock); 709 if (test_bit(HFI1_VNIC_UP, &vinfo->flags)) 710 hfi1_vnic_down(vinfo); 711 mutex_unlock(&vinfo->lock); 712 return 0; 713 } 714 715 static int hfi1_vnic_allot_ctxt(struct hfi1_devdata *dd, 716 struct hfi1_ctxtdata **vnic_ctxt) 717 { 718 int rc; 719 720 rc = allocate_vnic_ctxt(dd, vnic_ctxt); 721 if (rc) { 722 dd_dev_err(dd, "vnic ctxt alloc failed %d\n", rc); 723 return rc; 724 } 725 726 rc = setup_vnic_ctxt(dd, *vnic_ctxt); 727 if (rc) { 728 dd_dev_err(dd, "vnic ctxt setup failed %d\n", rc); 729 deallocate_vnic_ctxt(dd, *vnic_ctxt); 730 *vnic_ctxt = NULL; 731 } 732 733 return rc; 734 } 735 736 static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo) 737 { 738 struct hfi1_devdata *dd = vinfo->dd; 739 int i, rc = 0; 740 741 mutex_lock(&hfi1_mutex); 742 if (!dd->vnic.num_vports) { 743 rc = hfi1_vnic_txreq_init(dd); 744 if (rc) 745 goto txreq_fail; 746 747 dd->vnic.msix_idx = dd->first_dyn_msix_idx; 748 } 749 750 for (i = dd->vnic.num_ctxt; i < vinfo->num_rx_q; i++) { 751 rc = hfi1_vnic_allot_ctxt(dd, &dd->vnic.ctxt[i]); 752 if (rc) 753 break; 754 dd->vnic.ctxt[i]->vnic_q_idx = i; 755 } 756 757 if (i < vinfo->num_rx_q) { 758 /* 759 * If required amount of contexts is not 760 * allocated successfully then remaining contexts 761 * are released. 762 */ 763 while (i-- > dd->vnic.num_ctxt) { 764 deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]); 765 dd->vnic.ctxt[i] = NULL; 766 } 767 goto alloc_fail; 768 } 769 770 if (dd->vnic.num_ctxt != i) { 771 dd->vnic.num_ctxt = i; 772 hfi1_init_vnic_rsm(dd); 773 } 774 775 dd->vnic.num_vports++; 776 hfi1_vnic_sdma_init(vinfo); 777 alloc_fail: 778 if (!dd->vnic.num_vports) 779 hfi1_vnic_txreq_deinit(dd); 780 txreq_fail: 781 mutex_unlock(&hfi1_mutex); 782 return rc; 783 } 784 785 static void hfi1_vnic_deinit(struct hfi1_vnic_vport_info *vinfo) 786 { 787 struct hfi1_devdata *dd = vinfo->dd; 788 int i; 789 790 mutex_lock(&hfi1_mutex); 791 if (--dd->vnic.num_vports == 0) { 792 for (i = 0; i < dd->vnic.num_ctxt; i++) { 793 deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]); 794 dd->vnic.ctxt[i] = NULL; 795 } 796 hfi1_deinit_vnic_rsm(dd); 797 dd->vnic.num_ctxt = 0; 798 hfi1_vnic_txreq_deinit(dd); 799 } 800 mutex_unlock(&hfi1_mutex); 801 } 802 803 static void hfi1_vnic_set_vesw_id(struct net_device *netdev, int id) 804 { 805 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); 806 bool reopen = false; 807 808 /* 809 * If vesw_id is being changed, and if the vnic port is up, 810 * reset the vnic port to ensure new vesw_id gets picked up 811 */ 812 if (id != vinfo->vesw_id) { 813 mutex_lock(&vinfo->lock); 814 if (test_bit(HFI1_VNIC_UP, &vinfo->flags)) { 815 hfi1_vnic_down(vinfo); 816 reopen = true; 817 } 818 819 vinfo->vesw_id = id; 820 if (reopen) 821 hfi1_vnic_up(vinfo); 822 823 mutex_unlock(&vinfo->lock); 824 } 825 } 826 827 /* netdev ops */ 828 static const struct net_device_ops hfi1_netdev_ops = { 829 .ndo_open = hfi1_netdev_open, 830 .ndo_stop = hfi1_netdev_close, 831 .ndo_start_xmit = hfi1_netdev_start_xmit, 832 .ndo_select_queue = hfi1_vnic_select_queue, 833 .ndo_get_stats64 = hfi1_vnic_get_stats64, 834 }; 835 836 static void hfi1_vnic_free_rn(struct net_device *netdev) 837 { 838 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev); 839 840 hfi1_vnic_deinit(vinfo); 841 mutex_destroy(&vinfo->lock); 842 free_netdev(netdev); 843 } 844 845 struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device, 846 u8 port_num, 847 enum rdma_netdev_t type, 848 const char *name, 849 unsigned char name_assign_type, 850 void (*setup)(struct net_device *)) 851 { 852 struct hfi1_devdata *dd = dd_from_ibdev(device); 853 struct hfi1_vnic_vport_info *vinfo; 854 struct net_device *netdev; 855 struct rdma_netdev *rn; 856 int i, size, rc; 857 858 if (!port_num || (port_num > dd->num_pports)) 859 return ERR_PTR(-EINVAL); 860 861 if (type != RDMA_NETDEV_OPA_VNIC) 862 return ERR_PTR(-EOPNOTSUPP); 863 864 size = sizeof(struct opa_vnic_rdma_netdev) + sizeof(*vinfo); 865 netdev = alloc_netdev_mqs(size, name, name_assign_type, setup, 866 dd->chip_sdma_engines, HFI1_NUM_VNIC_CTXT); 867 if (!netdev) 868 return ERR_PTR(-ENOMEM); 869 870 rn = netdev_priv(netdev); 871 vinfo = opa_vnic_dev_priv(netdev); 872 vinfo->dd = dd; 873 vinfo->num_tx_q = dd->chip_sdma_engines; 874 vinfo->num_rx_q = HFI1_NUM_VNIC_CTXT; 875 vinfo->netdev = netdev; 876 rn->free_rdma_netdev = hfi1_vnic_free_rn; 877 rn->set_id = hfi1_vnic_set_vesw_id; 878 879 netdev->features = NETIF_F_HIGHDMA | NETIF_F_SG; 880 netdev->hw_features = netdev->features; 881 netdev->vlan_features = netdev->features; 882 netdev->watchdog_timeo = msecs_to_jiffies(HFI_TX_TIMEOUT_MS); 883 netdev->netdev_ops = &hfi1_netdev_ops; 884 mutex_init(&vinfo->lock); 885 886 for (i = 0; i < vinfo->num_rx_q; i++) { 887 struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i]; 888 889 rxq->idx = i; 890 rxq->vinfo = vinfo; 891 rxq->netdev = netdev; 892 netif_napi_add(netdev, &rxq->napi, hfi1_vnic_napi, 64); 893 } 894 895 rc = hfi1_vnic_init(vinfo); 896 if (rc) 897 goto init_fail; 898 899 return netdev; 900 init_fail: 901 mutex_destroy(&vinfo->lock); 902 free_netdev(netdev); 903 return ERR_PTR(rc); 904 } 905