1 /*
2  * Copyright(c) 2017 Intel Corporation.
3  *
4  * This file is provided under a dual BSD/GPLv2 license.  When using or
5  * redistributing this file, you may do so under either license.
6  *
7  * GPL LICENSE SUMMARY
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of version 2 of the GNU General Public License as
11  * published by the Free Software Foundation.
12  *
13  * This program is distributed in the hope that it will be useful, but
14  * WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * General Public License for more details.
17  *
18  * BSD LICENSE
19  *
20  * Redistribution and use in source and binary forms, with or without
21  * modification, are permitted provided that the following conditions
22  * are met:
23  *
24  *  - Redistributions of source code must retain the above copyright
25  *    notice, this list of conditions and the following disclaimer.
26  *  - Redistributions in binary form must reproduce the above copyright
27  *    notice, this list of conditions and the following disclaimer in
28  *    the documentation and/or other materials provided with the
29  *    distribution.
30  *  - Neither the name of Intel Corporation nor the names of its
31  *    contributors may be used to endorse or promote products derived
32  *    from this software without specific prior written permission.
33  *
34  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
36  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
38  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
39  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
40  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
42  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
43  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45  *
46  */
47 
48 /*
49  * This file contains HFI1 support for VNIC functionality
50  */
51 
52 #include <linux/io.h>
53 #include <linux/if_vlan.h>
54 
55 #include "vnic.h"
56 
57 #define HFI_TX_TIMEOUT_MS 1000
58 
59 #define HFI1_VNIC_RCV_Q_SIZE   1024
60 
61 #define HFI1_VNIC_UP 0
62 
63 static DEFINE_SPINLOCK(vport_cntr_lock);
64 
65 static int setup_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt)
66 {
67 	unsigned int rcvctrl_ops = 0;
68 	int ret;
69 
70 	hfi1_init_ctxt(uctxt->sc);
71 
72 	uctxt->do_interrupt = &handle_receive_interrupt;
73 
74 	/* Now allocate the RcvHdr queue and eager buffers. */
75 	ret = hfi1_create_rcvhdrq(dd, uctxt);
76 	if (ret)
77 		goto done;
78 
79 	ret = hfi1_setup_eagerbufs(uctxt);
80 	if (ret)
81 		goto done;
82 
83 	if (uctxt->rcvhdrtail_kvaddr)
84 		clear_rcvhdrtail(uctxt);
85 
86 	rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB;
87 	rcvctrl_ops |= HFI1_RCVCTRL_INTRAVAIL_ENB;
88 
89 	if (!HFI1_CAP_KGET_MASK(uctxt->flags, MULTI_PKT_EGR))
90 		rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB;
91 	if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_EGR_FULL))
92 		rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB;
93 	if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_RHQ_FULL))
94 		rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB;
95 	if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL))
96 		rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB;
97 
98 	hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt);
99 
100 	uctxt->is_vnic = true;
101 done:
102 	return ret;
103 }
104 
105 static int allocate_vnic_ctxt(struct hfi1_devdata *dd,
106 			      struct hfi1_ctxtdata **vnic_ctxt)
107 {
108 	struct hfi1_ctxtdata *uctxt;
109 	int ret;
110 
111 	if (dd->flags & HFI1_FROZEN)
112 		return -EIO;
113 
114 	ret = hfi1_create_ctxtdata(dd->pport, dd->node, &uctxt);
115 	if (ret < 0) {
116 		dd_dev_err(dd, "Unable to create ctxtdata, failing open\n");
117 		return -ENOMEM;
118 	}
119 
120 	uctxt->flags = HFI1_CAP_KGET(MULTI_PKT_EGR) |
121 			HFI1_CAP_KGET(NODROP_RHQ_FULL) |
122 			HFI1_CAP_KGET(NODROP_EGR_FULL) |
123 			HFI1_CAP_KGET(DMA_RTAIL);
124 	uctxt->seq_cnt = 1;
125 
126 	/* Allocate and enable a PIO send context */
127 	uctxt->sc = sc_alloc(dd, SC_VNIC, uctxt->rcvhdrqentsize,
128 			     uctxt->numa_id);
129 
130 	ret = uctxt->sc ? 0 : -ENOMEM;
131 	if (ret)
132 		goto bail;
133 
134 	dd_dev_dbg(dd, "allocated vnic send context %u(%u)\n",
135 		   uctxt->sc->sw_index, uctxt->sc->hw_context);
136 	ret = sc_enable(uctxt->sc);
137 	if (ret)
138 		goto bail;
139 
140 	if (dd->num_msix_entries)
141 		hfi1_set_vnic_msix_info(uctxt);
142 
143 	hfi1_stats.sps_ctxts++;
144 	dd_dev_dbg(dd, "created vnic context %d\n", uctxt->ctxt);
145 	*vnic_ctxt = uctxt;
146 
147 	return ret;
148 bail:
149 	hfi1_free_ctxt(uctxt);
150 	dd_dev_dbg(dd, "vnic allocation failed. rc %d\n", ret);
151 	return ret;
152 }
153 
154 static void deallocate_vnic_ctxt(struct hfi1_devdata *dd,
155 				 struct hfi1_ctxtdata *uctxt)
156 {
157 	dd_dev_dbg(dd, "closing vnic context %d\n", uctxt->ctxt);
158 	flush_wc();
159 
160 	if (dd->num_msix_entries)
161 		hfi1_reset_vnic_msix_info(uctxt);
162 
163 	/*
164 	 * Disable receive context and interrupt available, reset all
165 	 * RcvCtxtCtrl bits to default values.
166 	 */
167 	hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS |
168 		     HFI1_RCVCTRL_TIDFLOW_DIS |
169 		     HFI1_RCVCTRL_INTRAVAIL_DIS |
170 		     HFI1_RCVCTRL_ONE_PKT_EGR_DIS |
171 		     HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
172 		     HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt);
173 	/*
174 	 * VNIC contexts are allocated from user context pool.
175 	 * Release them back to user context pool.
176 	 *
177 	 * Reset context integrity checks to default.
178 	 * (writes to CSRs probably belong in chip.c)
179 	 */
180 	write_kctxt_csr(dd, uctxt->sc->hw_context, SEND_CTXT_CHECK_ENABLE,
181 			hfi1_pkt_default_send_ctxt_mask(dd, SC_USER));
182 	sc_disable(uctxt->sc);
183 
184 	dd->send_contexts[uctxt->sc->sw_index].type = SC_USER;
185 
186 	uctxt->event_flags = 0;
187 
188 	hfi1_clear_tids(uctxt);
189 	hfi1_clear_ctxt_pkey(dd, uctxt);
190 
191 	hfi1_stats.sps_ctxts--;
192 
193 	hfi1_free_ctxt(uctxt);
194 }
195 
196 void hfi1_vnic_setup(struct hfi1_devdata *dd)
197 {
198 	idr_init(&dd->vnic.vesw_idr);
199 }
200 
201 void hfi1_vnic_cleanup(struct hfi1_devdata *dd)
202 {
203 	idr_destroy(&dd->vnic.vesw_idr);
204 }
205 
206 #define SUM_GRP_COUNTERS(stats, qstats, x_grp) do {            \
207 		u64 *src64, *dst64;                            \
208 		for (src64 = &qstats->x_grp.unicast,           \
209 			dst64 = &stats->x_grp.unicast;         \
210 			dst64 <= &stats->x_grp.s_1519_max;) {  \
211 			*dst64++ += *src64++;                  \
212 		}                                              \
213 	} while (0)
214 
215 /* hfi1_vnic_update_stats - update statistics */
216 static void hfi1_vnic_update_stats(struct hfi1_vnic_vport_info *vinfo,
217 				   struct opa_vnic_stats *stats)
218 {
219 	struct net_device *netdev = vinfo->netdev;
220 	u8 i;
221 
222 	/* add tx counters on different queues */
223 	for (i = 0; i < vinfo->num_tx_q; i++) {
224 		struct opa_vnic_stats *qstats = &vinfo->stats[i];
225 		struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats;
226 
227 		stats->netstats.tx_fifo_errors += qnstats->tx_fifo_errors;
228 		stats->netstats.tx_carrier_errors += qnstats->tx_carrier_errors;
229 		stats->tx_drop_state += qstats->tx_drop_state;
230 		stats->tx_dlid_zero += qstats->tx_dlid_zero;
231 
232 		SUM_GRP_COUNTERS(stats, qstats, tx_grp);
233 		stats->netstats.tx_packets += qnstats->tx_packets;
234 		stats->netstats.tx_bytes += qnstats->tx_bytes;
235 	}
236 
237 	/* add rx counters on different queues */
238 	for (i = 0; i < vinfo->num_rx_q; i++) {
239 		struct opa_vnic_stats *qstats = &vinfo->stats[i];
240 		struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats;
241 
242 		stats->netstats.rx_fifo_errors += qnstats->rx_fifo_errors;
243 		stats->netstats.rx_nohandler += qnstats->rx_nohandler;
244 		stats->rx_drop_state += qstats->rx_drop_state;
245 		stats->rx_oversize += qstats->rx_oversize;
246 		stats->rx_runt += qstats->rx_runt;
247 
248 		SUM_GRP_COUNTERS(stats, qstats, rx_grp);
249 		stats->netstats.rx_packets += qnstats->rx_packets;
250 		stats->netstats.rx_bytes += qnstats->rx_bytes;
251 	}
252 
253 	stats->netstats.tx_errors = stats->netstats.tx_fifo_errors +
254 				    stats->netstats.tx_carrier_errors +
255 				    stats->tx_drop_state + stats->tx_dlid_zero;
256 	stats->netstats.tx_dropped = stats->netstats.tx_errors;
257 
258 	stats->netstats.rx_errors = stats->netstats.rx_fifo_errors +
259 				    stats->netstats.rx_nohandler +
260 				    stats->rx_drop_state + stats->rx_oversize +
261 				    stats->rx_runt;
262 	stats->netstats.rx_dropped = stats->netstats.rx_errors;
263 
264 	netdev->stats.tx_packets = stats->netstats.tx_packets;
265 	netdev->stats.tx_bytes = stats->netstats.tx_bytes;
266 	netdev->stats.tx_fifo_errors = stats->netstats.tx_fifo_errors;
267 	netdev->stats.tx_carrier_errors = stats->netstats.tx_carrier_errors;
268 	netdev->stats.tx_errors = stats->netstats.tx_errors;
269 	netdev->stats.tx_dropped = stats->netstats.tx_dropped;
270 
271 	netdev->stats.rx_packets = stats->netstats.rx_packets;
272 	netdev->stats.rx_bytes = stats->netstats.rx_bytes;
273 	netdev->stats.rx_fifo_errors = stats->netstats.rx_fifo_errors;
274 	netdev->stats.multicast = stats->rx_grp.mcastbcast;
275 	netdev->stats.rx_length_errors = stats->rx_oversize + stats->rx_runt;
276 	netdev->stats.rx_errors = stats->netstats.rx_errors;
277 	netdev->stats.rx_dropped = stats->netstats.rx_dropped;
278 }
279 
280 /* update_len_counters - update pkt's len histogram counters */
281 static inline void update_len_counters(struct opa_vnic_grp_stats *grp,
282 				       int len)
283 {
284 	/* account for 4 byte FCS */
285 	if (len >= 1515)
286 		grp->s_1519_max++;
287 	else if (len >= 1020)
288 		grp->s_1024_1518++;
289 	else if (len >= 508)
290 		grp->s_512_1023++;
291 	else if (len >= 252)
292 		grp->s_256_511++;
293 	else if (len >= 124)
294 		grp->s_128_255++;
295 	else if (len >= 61)
296 		grp->s_65_127++;
297 	else
298 		grp->s_64++;
299 }
300 
301 /* hfi1_vnic_update_tx_counters - update transmit counters */
302 static void hfi1_vnic_update_tx_counters(struct hfi1_vnic_vport_info *vinfo,
303 					 u8 q_idx, struct sk_buff *skb, int err)
304 {
305 	struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
306 	struct opa_vnic_stats *stats = &vinfo->stats[q_idx];
307 	struct opa_vnic_grp_stats *tx_grp = &stats->tx_grp;
308 	u16 vlan_tci;
309 
310 	stats->netstats.tx_packets++;
311 	stats->netstats.tx_bytes += skb->len + ETH_FCS_LEN;
312 
313 	update_len_counters(tx_grp, skb->len);
314 
315 	/* rest of the counts are for good packets only */
316 	if (unlikely(err))
317 		return;
318 
319 	if (is_multicast_ether_addr(mac_hdr->h_dest))
320 		tx_grp->mcastbcast++;
321 	else
322 		tx_grp->unicast++;
323 
324 	if (!__vlan_get_tag(skb, &vlan_tci))
325 		tx_grp->vlan++;
326 	else
327 		tx_grp->untagged++;
328 }
329 
330 /* hfi1_vnic_update_rx_counters - update receive counters */
331 static void hfi1_vnic_update_rx_counters(struct hfi1_vnic_vport_info *vinfo,
332 					 u8 q_idx, struct sk_buff *skb, int err)
333 {
334 	struct ethhdr *mac_hdr = (struct ethhdr *)skb->data;
335 	struct opa_vnic_stats *stats = &vinfo->stats[q_idx];
336 	struct opa_vnic_grp_stats *rx_grp = &stats->rx_grp;
337 	u16 vlan_tci;
338 
339 	stats->netstats.rx_packets++;
340 	stats->netstats.rx_bytes += skb->len + ETH_FCS_LEN;
341 
342 	update_len_counters(rx_grp, skb->len);
343 
344 	/* rest of the counts are for good packets only */
345 	if (unlikely(err))
346 		return;
347 
348 	if (is_multicast_ether_addr(mac_hdr->h_dest))
349 		rx_grp->mcastbcast++;
350 	else
351 		rx_grp->unicast++;
352 
353 	if (!__vlan_get_tag(skb, &vlan_tci))
354 		rx_grp->vlan++;
355 	else
356 		rx_grp->untagged++;
357 }
358 
359 /* This function is overloaded for opa_vnic specific implementation */
360 static void hfi1_vnic_get_stats64(struct net_device *netdev,
361 				  struct rtnl_link_stats64 *stats)
362 {
363 	struct opa_vnic_stats *vstats = (struct opa_vnic_stats *)stats;
364 	struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
365 
366 	hfi1_vnic_update_stats(vinfo, vstats);
367 }
368 
369 static u64 create_bypass_pbc(u32 vl, u32 dw_len)
370 {
371 	u64 pbc;
372 
373 	pbc = ((u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT)
374 		| PBC_INSERT_BYPASS_ICRC | PBC_CREDIT_RETURN
375 		| PBC_PACKET_BYPASS
376 		| ((vl & PBC_VL_MASK) << PBC_VL_SHIFT)
377 		| (dw_len & PBC_LENGTH_DWS_MASK) << PBC_LENGTH_DWS_SHIFT;
378 
379 	return pbc;
380 }
381 
382 /* hfi1_vnic_maybe_stop_tx - stop tx queue if required */
383 static void hfi1_vnic_maybe_stop_tx(struct hfi1_vnic_vport_info *vinfo,
384 				    u8 q_idx)
385 {
386 	netif_stop_subqueue(vinfo->netdev, q_idx);
387 	if (!hfi1_vnic_sdma_write_avail(vinfo, q_idx))
388 		return;
389 
390 	netif_start_subqueue(vinfo->netdev, q_idx);
391 }
392 
393 static netdev_tx_t hfi1_netdev_start_xmit(struct sk_buff *skb,
394 					  struct net_device *netdev)
395 {
396 	struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
397 	u8 pad_len, q_idx = skb->queue_mapping;
398 	struct hfi1_devdata *dd = vinfo->dd;
399 	struct opa_vnic_skb_mdata *mdata;
400 	u32 pkt_len, total_len;
401 	int err = -EINVAL;
402 	u64 pbc;
403 
404 	v_dbg("xmit: queue %d skb len %d\n", q_idx, skb->len);
405 	if (unlikely(!netif_oper_up(netdev))) {
406 		vinfo->stats[q_idx].tx_drop_state++;
407 		goto tx_finish;
408 	}
409 
410 	/* take out meta data */
411 	mdata = (struct opa_vnic_skb_mdata *)skb->data;
412 	skb_pull(skb, sizeof(*mdata));
413 	if (unlikely(mdata->flags & OPA_VNIC_SKB_MDATA_ENCAP_ERR)) {
414 		vinfo->stats[q_idx].tx_dlid_zero++;
415 		goto tx_finish;
416 	}
417 
418 	/* add tail padding (for 8 bytes size alignment) and icrc */
419 	pad_len = -(skb->len + OPA_VNIC_ICRC_TAIL_LEN) & 0x7;
420 	pad_len += OPA_VNIC_ICRC_TAIL_LEN;
421 
422 	/*
423 	 * pkt_len is how much data we have to write, includes header and data.
424 	 * total_len is length of the packet in Dwords plus the PBC should not
425 	 * include the CRC.
426 	 */
427 	pkt_len = (skb->len + pad_len) >> 2;
428 	total_len = pkt_len + 2; /* PBC + packet */
429 
430 	pbc = create_bypass_pbc(mdata->vl, total_len);
431 
432 	skb_get(skb);
433 	v_dbg("pbc 0x%016llX len %d pad_len %d\n", pbc, skb->len, pad_len);
434 	err = dd->process_vnic_dma_send(dd, q_idx, vinfo, skb, pbc, pad_len);
435 	if (unlikely(err)) {
436 		if (err == -ENOMEM)
437 			vinfo->stats[q_idx].netstats.tx_fifo_errors++;
438 		else if (err != -EBUSY)
439 			vinfo->stats[q_idx].netstats.tx_carrier_errors++;
440 	}
441 	/* remove the header before updating tx counters */
442 	skb_pull(skb, OPA_VNIC_HDR_LEN);
443 
444 	if (unlikely(err == -EBUSY)) {
445 		hfi1_vnic_maybe_stop_tx(vinfo, q_idx);
446 		dev_kfree_skb_any(skb);
447 		return NETDEV_TX_BUSY;
448 	}
449 
450 tx_finish:
451 	/* update tx counters */
452 	hfi1_vnic_update_tx_counters(vinfo, q_idx, skb, err);
453 	dev_kfree_skb_any(skb);
454 	return NETDEV_TX_OK;
455 }
456 
457 static u16 hfi1_vnic_select_queue(struct net_device *netdev,
458 				  struct sk_buff *skb,
459 				  void *accel_priv,
460 				  select_queue_fallback_t fallback)
461 {
462 	struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
463 	struct opa_vnic_skb_mdata *mdata;
464 	struct sdma_engine *sde;
465 
466 	mdata = (struct opa_vnic_skb_mdata *)skb->data;
467 	sde = sdma_select_engine_vl(vinfo->dd, mdata->entropy, mdata->vl);
468 	return sde->this_idx;
469 }
470 
471 /* hfi1_vnic_decap_skb - strip OPA header from the skb (ethernet) packet */
472 static inline int hfi1_vnic_decap_skb(struct hfi1_vnic_rx_queue *rxq,
473 				      struct sk_buff *skb)
474 {
475 	struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
476 	int max_len = vinfo->netdev->mtu + VLAN_ETH_HLEN;
477 	int rc = -EFAULT;
478 
479 	skb_pull(skb, OPA_VNIC_HDR_LEN);
480 
481 	/* Validate Packet length */
482 	if (unlikely(skb->len > max_len))
483 		vinfo->stats[rxq->idx].rx_oversize++;
484 	else if (unlikely(skb->len < ETH_ZLEN))
485 		vinfo->stats[rxq->idx].rx_runt++;
486 	else
487 		rc = 0;
488 	return rc;
489 }
490 
491 static inline struct sk_buff *hfi1_vnic_get_skb(struct hfi1_vnic_rx_queue *rxq)
492 {
493 	unsigned char *pad_info;
494 	struct sk_buff *skb;
495 
496 	skb = skb_dequeue(&rxq->skbq);
497 	if (unlikely(!skb))
498 		return NULL;
499 
500 	/* remove tail padding and icrc */
501 	pad_info = skb->data + skb->len - 1;
502 	skb_trim(skb, (skb->len - OPA_VNIC_ICRC_TAIL_LEN -
503 		       ((*pad_info) & 0x7)));
504 
505 	return skb;
506 }
507 
508 /* hfi1_vnic_handle_rx - handle skb receive */
509 static void hfi1_vnic_handle_rx(struct hfi1_vnic_rx_queue *rxq,
510 				int *work_done, int work_to_do)
511 {
512 	struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
513 	struct sk_buff *skb;
514 	int rc;
515 
516 	while (1) {
517 		if (*work_done >= work_to_do)
518 			break;
519 
520 		skb = hfi1_vnic_get_skb(rxq);
521 		if (unlikely(!skb))
522 			break;
523 
524 		rc = hfi1_vnic_decap_skb(rxq, skb);
525 		/* update rx counters */
526 		hfi1_vnic_update_rx_counters(vinfo, rxq->idx, skb, rc);
527 		if (unlikely(rc)) {
528 			dev_kfree_skb_any(skb);
529 			continue;
530 		}
531 
532 		skb_checksum_none_assert(skb);
533 		skb->protocol = eth_type_trans(skb, rxq->netdev);
534 
535 		napi_gro_receive(&rxq->napi, skb);
536 		(*work_done)++;
537 	}
538 }
539 
540 /* hfi1_vnic_napi - napi receive polling callback function */
541 static int hfi1_vnic_napi(struct napi_struct *napi, int budget)
542 {
543 	struct hfi1_vnic_rx_queue *rxq = container_of(napi,
544 					      struct hfi1_vnic_rx_queue, napi);
545 	struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
546 	int work_done = 0;
547 
548 	v_dbg("napi %d budget %d\n", rxq->idx, budget);
549 	hfi1_vnic_handle_rx(rxq, &work_done, budget);
550 
551 	v_dbg("napi %d work_done %d\n", rxq->idx, work_done);
552 	if (work_done < budget)
553 		napi_complete(napi);
554 
555 	return work_done;
556 }
557 
558 void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet)
559 {
560 	struct hfi1_devdata *dd = packet->rcd->dd;
561 	struct hfi1_vnic_vport_info *vinfo = NULL;
562 	struct hfi1_vnic_rx_queue *rxq;
563 	struct sk_buff *skb;
564 	int l4_type, vesw_id = -1;
565 	u8 q_idx;
566 
567 	l4_type = hfi1_16B_get_l4(packet->ebuf);
568 	if (likely(l4_type == OPA_16B_L4_ETHR)) {
569 		vesw_id = HFI1_VNIC_GET_VESWID(packet->ebuf);
570 		vinfo = idr_find(&dd->vnic.vesw_idr, vesw_id);
571 
572 		/*
573 		 * In case of invalid vesw id, count the error on
574 		 * the first available vport.
575 		 */
576 		if (unlikely(!vinfo)) {
577 			struct hfi1_vnic_vport_info *vinfo_tmp;
578 			int id_tmp = 0;
579 
580 			vinfo_tmp =  idr_get_next(&dd->vnic.vesw_idr, &id_tmp);
581 			if (vinfo_tmp) {
582 				spin_lock(&vport_cntr_lock);
583 				vinfo_tmp->stats[0].netstats.rx_nohandler++;
584 				spin_unlock(&vport_cntr_lock);
585 			}
586 		}
587 	}
588 
589 	if (unlikely(!vinfo)) {
590 		dd_dev_warn(dd, "vnic rcv err: l4 %d vesw id %d ctx %d\n",
591 			    l4_type, vesw_id, packet->rcd->ctxt);
592 		return;
593 	}
594 
595 	q_idx = packet->rcd->vnic_q_idx;
596 	rxq = &vinfo->rxq[q_idx];
597 	if (unlikely(!netif_oper_up(vinfo->netdev))) {
598 		vinfo->stats[q_idx].rx_drop_state++;
599 		skb_queue_purge(&rxq->skbq);
600 		return;
601 	}
602 
603 	if (unlikely(skb_queue_len(&rxq->skbq) > HFI1_VNIC_RCV_Q_SIZE)) {
604 		vinfo->stats[q_idx].netstats.rx_fifo_errors++;
605 		return;
606 	}
607 
608 	skb = netdev_alloc_skb(vinfo->netdev, packet->tlen);
609 	if (unlikely(!skb)) {
610 		vinfo->stats[q_idx].netstats.rx_fifo_errors++;
611 		return;
612 	}
613 
614 	memcpy(skb->data, packet->ebuf, packet->tlen);
615 	skb_put(skb, packet->tlen);
616 	skb_queue_tail(&rxq->skbq, skb);
617 
618 	if (napi_schedule_prep(&rxq->napi)) {
619 		v_dbg("napi %d scheduling\n", q_idx);
620 		__napi_schedule(&rxq->napi);
621 	}
622 }
623 
624 static int hfi1_vnic_up(struct hfi1_vnic_vport_info *vinfo)
625 {
626 	struct hfi1_devdata *dd = vinfo->dd;
627 	struct net_device *netdev = vinfo->netdev;
628 	int i, rc;
629 
630 	/* ensure virtual eth switch id is valid */
631 	if (!vinfo->vesw_id)
632 		return -EINVAL;
633 
634 	rc = idr_alloc(&dd->vnic.vesw_idr, vinfo, vinfo->vesw_id,
635 		       vinfo->vesw_id + 1, GFP_NOWAIT);
636 	if (rc < 0)
637 		return rc;
638 
639 	for (i = 0; i < vinfo->num_rx_q; i++) {
640 		struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
641 
642 		skb_queue_head_init(&rxq->skbq);
643 		napi_enable(&rxq->napi);
644 	}
645 
646 	netif_carrier_on(netdev);
647 	netif_tx_start_all_queues(netdev);
648 	set_bit(HFI1_VNIC_UP, &vinfo->flags);
649 
650 	return 0;
651 }
652 
653 static void hfi1_vnic_down(struct hfi1_vnic_vport_info *vinfo)
654 {
655 	struct hfi1_devdata *dd = vinfo->dd;
656 	u8 i;
657 
658 	clear_bit(HFI1_VNIC_UP, &vinfo->flags);
659 	netif_carrier_off(vinfo->netdev);
660 	netif_tx_disable(vinfo->netdev);
661 	idr_remove(&dd->vnic.vesw_idr, vinfo->vesw_id);
662 
663 	/* ensure irqs see the change */
664 	hfi1_vnic_synchronize_irq(dd);
665 
666 	/* remove unread skbs */
667 	for (i = 0; i < vinfo->num_rx_q; i++) {
668 		struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
669 
670 		napi_disable(&rxq->napi);
671 		skb_queue_purge(&rxq->skbq);
672 	}
673 }
674 
675 static int hfi1_netdev_open(struct net_device *netdev)
676 {
677 	struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
678 	int rc;
679 
680 	mutex_lock(&vinfo->lock);
681 	rc = hfi1_vnic_up(vinfo);
682 	mutex_unlock(&vinfo->lock);
683 	return rc;
684 }
685 
686 static int hfi1_netdev_close(struct net_device *netdev)
687 {
688 	struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
689 
690 	mutex_lock(&vinfo->lock);
691 	if (test_bit(HFI1_VNIC_UP, &vinfo->flags))
692 		hfi1_vnic_down(vinfo);
693 	mutex_unlock(&vinfo->lock);
694 	return 0;
695 }
696 
697 static int hfi1_vnic_allot_ctxt(struct hfi1_devdata *dd,
698 				struct hfi1_ctxtdata **vnic_ctxt)
699 {
700 	int rc;
701 
702 	rc = allocate_vnic_ctxt(dd, vnic_ctxt);
703 	if (rc) {
704 		dd_dev_err(dd, "vnic ctxt alloc failed %d\n", rc);
705 		return rc;
706 	}
707 
708 	rc = setup_vnic_ctxt(dd, *vnic_ctxt);
709 	if (rc) {
710 		dd_dev_err(dd, "vnic ctxt setup failed %d\n", rc);
711 		deallocate_vnic_ctxt(dd, *vnic_ctxt);
712 		*vnic_ctxt = NULL;
713 	}
714 
715 	return rc;
716 }
717 
718 static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo)
719 {
720 	struct hfi1_devdata *dd = vinfo->dd;
721 	int i, rc = 0;
722 
723 	mutex_lock(&hfi1_mutex);
724 	if (!dd->vnic.num_vports) {
725 		rc = hfi1_vnic_txreq_init(dd);
726 		if (rc)
727 			goto txreq_fail;
728 
729 		dd->vnic.msix_idx = dd->first_dyn_msix_idx;
730 	}
731 
732 	for (i = dd->vnic.num_ctxt; i < vinfo->num_rx_q; i++) {
733 		rc = hfi1_vnic_allot_ctxt(dd, &dd->vnic.ctxt[i]);
734 		if (rc)
735 			break;
736 		hfi1_rcd_get(dd->vnic.ctxt[i]);
737 		dd->vnic.ctxt[i]->vnic_q_idx = i;
738 	}
739 
740 	if (i < vinfo->num_rx_q) {
741 		/*
742 		 * If required amount of contexts is not
743 		 * allocated successfully then remaining contexts
744 		 * are released.
745 		 */
746 		while (i-- > dd->vnic.num_ctxt) {
747 			deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]);
748 			hfi1_rcd_put(dd->vnic.ctxt[i]);
749 			dd->vnic.ctxt[i] = NULL;
750 		}
751 		goto alloc_fail;
752 	}
753 
754 	if (dd->vnic.num_ctxt != i) {
755 		dd->vnic.num_ctxt = i;
756 		hfi1_init_vnic_rsm(dd);
757 	}
758 
759 	dd->vnic.num_vports++;
760 	hfi1_vnic_sdma_init(vinfo);
761 alloc_fail:
762 	if (!dd->vnic.num_vports)
763 		hfi1_vnic_txreq_deinit(dd);
764 txreq_fail:
765 	mutex_unlock(&hfi1_mutex);
766 	return rc;
767 }
768 
769 static void hfi1_vnic_deinit(struct hfi1_vnic_vport_info *vinfo)
770 {
771 	struct hfi1_devdata *dd = vinfo->dd;
772 	int i;
773 
774 	mutex_lock(&hfi1_mutex);
775 	if (--dd->vnic.num_vports == 0) {
776 		for (i = 0; i < dd->vnic.num_ctxt; i++) {
777 			deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]);
778 			hfi1_rcd_put(dd->vnic.ctxt[i]);
779 			dd->vnic.ctxt[i] = NULL;
780 		}
781 		hfi1_deinit_vnic_rsm(dd);
782 		dd->vnic.num_ctxt = 0;
783 		hfi1_vnic_txreq_deinit(dd);
784 	}
785 	mutex_unlock(&hfi1_mutex);
786 }
787 
788 static void hfi1_vnic_set_vesw_id(struct net_device *netdev, int id)
789 {
790 	struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
791 	bool reopen = false;
792 
793 	/*
794 	 * If vesw_id is being changed, and if the vnic port is up,
795 	 * reset the vnic port to ensure new vesw_id gets picked up
796 	 */
797 	if (id != vinfo->vesw_id) {
798 		mutex_lock(&vinfo->lock);
799 		if (test_bit(HFI1_VNIC_UP, &vinfo->flags)) {
800 			hfi1_vnic_down(vinfo);
801 			reopen = true;
802 		}
803 
804 		vinfo->vesw_id = id;
805 		if (reopen)
806 			hfi1_vnic_up(vinfo);
807 
808 		mutex_unlock(&vinfo->lock);
809 	}
810 }
811 
812 /* netdev ops */
813 static const struct net_device_ops hfi1_netdev_ops = {
814 	.ndo_open = hfi1_netdev_open,
815 	.ndo_stop = hfi1_netdev_close,
816 	.ndo_start_xmit = hfi1_netdev_start_xmit,
817 	.ndo_select_queue = hfi1_vnic_select_queue,
818 	.ndo_get_stats64 = hfi1_vnic_get_stats64,
819 };
820 
821 static void hfi1_vnic_free_rn(struct net_device *netdev)
822 {
823 	struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
824 
825 	hfi1_vnic_deinit(vinfo);
826 	mutex_destroy(&vinfo->lock);
827 	free_netdev(netdev);
828 }
829 
830 struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device,
831 				      u8 port_num,
832 				      enum rdma_netdev_t type,
833 				      const char *name,
834 				      unsigned char name_assign_type,
835 				      void (*setup)(struct net_device *))
836 {
837 	struct hfi1_devdata *dd = dd_from_ibdev(device);
838 	struct hfi1_vnic_vport_info *vinfo;
839 	struct net_device *netdev;
840 	struct rdma_netdev *rn;
841 	int i, size, rc;
842 
843 	if (!port_num || (port_num > dd->num_pports))
844 		return ERR_PTR(-EINVAL);
845 
846 	if (type != RDMA_NETDEV_OPA_VNIC)
847 		return ERR_PTR(-EOPNOTSUPP);
848 
849 	size = sizeof(struct opa_vnic_rdma_netdev) + sizeof(*vinfo);
850 	netdev = alloc_netdev_mqs(size, name, name_assign_type, setup,
851 				  dd->chip_sdma_engines, HFI1_NUM_VNIC_CTXT);
852 	if (!netdev)
853 		return ERR_PTR(-ENOMEM);
854 
855 	rn = netdev_priv(netdev);
856 	vinfo = opa_vnic_dev_priv(netdev);
857 	vinfo->dd = dd;
858 	vinfo->num_tx_q = dd->chip_sdma_engines;
859 	vinfo->num_rx_q = HFI1_NUM_VNIC_CTXT;
860 	vinfo->netdev = netdev;
861 	rn->free_rdma_netdev = hfi1_vnic_free_rn;
862 	rn->set_id = hfi1_vnic_set_vesw_id;
863 
864 	netdev->features = NETIF_F_HIGHDMA | NETIF_F_SG;
865 	netdev->hw_features = netdev->features;
866 	netdev->vlan_features = netdev->features;
867 	netdev->watchdog_timeo = msecs_to_jiffies(HFI_TX_TIMEOUT_MS);
868 	netdev->netdev_ops = &hfi1_netdev_ops;
869 	mutex_init(&vinfo->lock);
870 
871 	for (i = 0; i < vinfo->num_rx_q; i++) {
872 		struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
873 
874 		rxq->idx = i;
875 		rxq->vinfo = vinfo;
876 		rxq->netdev = netdev;
877 		netif_napi_add(netdev, &rxq->napi, hfi1_vnic_napi, 64);
878 	}
879 
880 	rc = hfi1_vnic_init(vinfo);
881 	if (rc)
882 		goto init_fail;
883 
884 	return netdev;
885 init_fail:
886 	mutex_destroy(&vinfo->lock);
887 	free_netdev(netdev);
888 	return ERR_PTR(rc);
889 }
890