xref: /openbmc/linux/drivers/infiniband/hw/hfi1/vnic_main.c (revision c0ecca6604b80e438b032578634c6e133c7028f6)
1 /*
2  * Copyright(c) 2017 - 2020 Intel Corporation.
3  *
4  * This file is provided under a dual BSD/GPLv2 license.  When using or
5  * redistributing this file, you may do so under either license.
6  *
7  * GPL LICENSE SUMMARY
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of version 2 of the GNU General Public License as
11  * published by the Free Software Foundation.
12  *
13  * This program is distributed in the hope that it will be useful, but
14  * WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * General Public License for more details.
17  *
18  * BSD LICENSE
19  *
20  * Redistribution and use in source and binary forms, with or without
21  * modification, are permitted provided that the following conditions
22  * are met:
23  *
24  *  - Redistributions of source code must retain the above copyright
25  *    notice, this list of conditions and the following disclaimer.
26  *  - Redistributions in binary form must reproduce the above copyright
27  *    notice, this list of conditions and the following disclaimer in
28  *    the documentation and/or other materials provided with the
29  *    distribution.
30  *  - Neither the name of Intel Corporation nor the names of its
31  *    contributors may be used to endorse or promote products derived
32  *    from this software without specific prior written permission.
33  *
34  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
36  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
38  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
39  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
40  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
42  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
43  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45  *
46  */
47 
48 /*
49  * This file contains HFI1 support for VNIC functionality
50  */
51 
52 #include <linux/io.h>
53 #include <linux/if_vlan.h>
54 
55 #include "vnic.h"
56 #include "netdev.h"
57 
58 #define HFI_TX_TIMEOUT_MS 1000
59 
60 #define HFI1_VNIC_RCV_Q_SIZE   1024
61 
62 #define HFI1_VNIC_UP 0
63 
64 static DEFINE_SPINLOCK(vport_cntr_lock);
65 
66 #define SUM_GRP_COUNTERS(stats, qstats, x_grp) do {            \
67 		u64 *src64, *dst64;                            \
68 		for (src64 = &qstats->x_grp.unicast,           \
69 			dst64 = &stats->x_grp.unicast;         \
70 			dst64 <= &stats->x_grp.s_1519_max;) {  \
71 			*dst64++ += *src64++;                  \
72 		}                                              \
73 	} while (0)
74 
75 #define VNIC_MASK (0xFF)
76 #define VNIC_ID(val) ((1ull << 24) | ((val) & VNIC_MASK))
77 
78 /* hfi1_vnic_update_stats - update statistics */
79 static void hfi1_vnic_update_stats(struct hfi1_vnic_vport_info *vinfo,
80 				   struct opa_vnic_stats *stats)
81 {
82 	struct net_device *netdev = vinfo->netdev;
83 	u8 i;
84 
85 	/* add tx counters on different queues */
86 	for (i = 0; i < vinfo->num_tx_q; i++) {
87 		struct opa_vnic_stats *qstats = &vinfo->stats[i];
88 		struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats;
89 
90 		stats->netstats.tx_fifo_errors += qnstats->tx_fifo_errors;
91 		stats->netstats.tx_carrier_errors += qnstats->tx_carrier_errors;
92 		stats->tx_drop_state += qstats->tx_drop_state;
93 		stats->tx_dlid_zero += qstats->tx_dlid_zero;
94 
95 		SUM_GRP_COUNTERS(stats, qstats, tx_grp);
96 		stats->netstats.tx_packets += qnstats->tx_packets;
97 		stats->netstats.tx_bytes += qnstats->tx_bytes;
98 	}
99 
100 	/* add rx counters on different queues */
101 	for (i = 0; i < vinfo->num_rx_q; i++) {
102 		struct opa_vnic_stats *qstats = &vinfo->stats[i];
103 		struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats;
104 
105 		stats->netstats.rx_fifo_errors += qnstats->rx_fifo_errors;
106 		stats->netstats.rx_nohandler += qnstats->rx_nohandler;
107 		stats->rx_drop_state += qstats->rx_drop_state;
108 		stats->rx_oversize += qstats->rx_oversize;
109 		stats->rx_runt += qstats->rx_runt;
110 
111 		SUM_GRP_COUNTERS(stats, qstats, rx_grp);
112 		stats->netstats.rx_packets += qnstats->rx_packets;
113 		stats->netstats.rx_bytes += qnstats->rx_bytes;
114 	}
115 
116 	stats->netstats.tx_errors = stats->netstats.tx_fifo_errors +
117 				    stats->netstats.tx_carrier_errors +
118 				    stats->tx_drop_state + stats->tx_dlid_zero;
119 	stats->netstats.tx_dropped = stats->netstats.tx_errors;
120 
121 	stats->netstats.rx_errors = stats->netstats.rx_fifo_errors +
122 				    stats->netstats.rx_nohandler +
123 				    stats->rx_drop_state + stats->rx_oversize +
124 				    stats->rx_runt;
125 	stats->netstats.rx_dropped = stats->netstats.rx_errors;
126 
127 	netdev->stats.tx_packets = stats->netstats.tx_packets;
128 	netdev->stats.tx_bytes = stats->netstats.tx_bytes;
129 	netdev->stats.tx_fifo_errors = stats->netstats.tx_fifo_errors;
130 	netdev->stats.tx_carrier_errors = stats->netstats.tx_carrier_errors;
131 	netdev->stats.tx_errors = stats->netstats.tx_errors;
132 	netdev->stats.tx_dropped = stats->netstats.tx_dropped;
133 
134 	netdev->stats.rx_packets = stats->netstats.rx_packets;
135 	netdev->stats.rx_bytes = stats->netstats.rx_bytes;
136 	netdev->stats.rx_fifo_errors = stats->netstats.rx_fifo_errors;
137 	netdev->stats.multicast = stats->rx_grp.mcastbcast;
138 	netdev->stats.rx_length_errors = stats->rx_oversize + stats->rx_runt;
139 	netdev->stats.rx_errors = stats->netstats.rx_errors;
140 	netdev->stats.rx_dropped = stats->netstats.rx_dropped;
141 }
142 
143 /* update_len_counters - update pkt's len histogram counters */
144 static inline void update_len_counters(struct opa_vnic_grp_stats *grp,
145 				       int len)
146 {
147 	/* account for 4 byte FCS */
148 	if (len >= 1515)
149 		grp->s_1519_max++;
150 	else if (len >= 1020)
151 		grp->s_1024_1518++;
152 	else if (len >= 508)
153 		grp->s_512_1023++;
154 	else if (len >= 252)
155 		grp->s_256_511++;
156 	else if (len >= 124)
157 		grp->s_128_255++;
158 	else if (len >= 61)
159 		grp->s_65_127++;
160 	else
161 		grp->s_64++;
162 }
163 
164 /* hfi1_vnic_update_tx_counters - update transmit counters */
165 static void hfi1_vnic_update_tx_counters(struct hfi1_vnic_vport_info *vinfo,
166 					 u8 q_idx, struct sk_buff *skb, int err)
167 {
168 	struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
169 	struct opa_vnic_stats *stats = &vinfo->stats[q_idx];
170 	struct opa_vnic_grp_stats *tx_grp = &stats->tx_grp;
171 	u16 vlan_tci;
172 
173 	stats->netstats.tx_packets++;
174 	stats->netstats.tx_bytes += skb->len + ETH_FCS_LEN;
175 
176 	update_len_counters(tx_grp, skb->len);
177 
178 	/* rest of the counts are for good packets only */
179 	if (unlikely(err))
180 		return;
181 
182 	if (is_multicast_ether_addr(mac_hdr->h_dest))
183 		tx_grp->mcastbcast++;
184 	else
185 		tx_grp->unicast++;
186 
187 	if (!__vlan_get_tag(skb, &vlan_tci))
188 		tx_grp->vlan++;
189 	else
190 		tx_grp->untagged++;
191 }
192 
193 /* hfi1_vnic_update_rx_counters - update receive counters */
194 static void hfi1_vnic_update_rx_counters(struct hfi1_vnic_vport_info *vinfo,
195 					 u8 q_idx, struct sk_buff *skb, int err)
196 {
197 	struct ethhdr *mac_hdr = (struct ethhdr *)skb->data;
198 	struct opa_vnic_stats *stats = &vinfo->stats[q_idx];
199 	struct opa_vnic_grp_stats *rx_grp = &stats->rx_grp;
200 	u16 vlan_tci;
201 
202 	stats->netstats.rx_packets++;
203 	stats->netstats.rx_bytes += skb->len + ETH_FCS_LEN;
204 
205 	update_len_counters(rx_grp, skb->len);
206 
207 	/* rest of the counts are for good packets only */
208 	if (unlikely(err))
209 		return;
210 
211 	if (is_multicast_ether_addr(mac_hdr->h_dest))
212 		rx_grp->mcastbcast++;
213 	else
214 		rx_grp->unicast++;
215 
216 	if (!__vlan_get_tag(skb, &vlan_tci))
217 		rx_grp->vlan++;
218 	else
219 		rx_grp->untagged++;
220 }
221 
222 /* This function is overloaded for opa_vnic specific implementation */
223 static void hfi1_vnic_get_stats64(struct net_device *netdev,
224 				  struct rtnl_link_stats64 *stats)
225 {
226 	struct opa_vnic_stats *vstats = (struct opa_vnic_stats *)stats;
227 	struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
228 
229 	hfi1_vnic_update_stats(vinfo, vstats);
230 }
231 
232 static u64 create_bypass_pbc(u32 vl, u32 dw_len)
233 {
234 	u64 pbc;
235 
236 	pbc = ((u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT)
237 		| PBC_INSERT_BYPASS_ICRC | PBC_CREDIT_RETURN
238 		| PBC_PACKET_BYPASS
239 		| ((vl & PBC_VL_MASK) << PBC_VL_SHIFT)
240 		| (dw_len & PBC_LENGTH_DWS_MASK) << PBC_LENGTH_DWS_SHIFT;
241 
242 	return pbc;
243 }
244 
245 /* hfi1_vnic_maybe_stop_tx - stop tx queue if required */
246 static void hfi1_vnic_maybe_stop_tx(struct hfi1_vnic_vport_info *vinfo,
247 				    u8 q_idx)
248 {
249 	netif_stop_subqueue(vinfo->netdev, q_idx);
250 	if (!hfi1_vnic_sdma_write_avail(vinfo, q_idx))
251 		return;
252 
253 	netif_start_subqueue(vinfo->netdev, q_idx);
254 }
255 
256 static netdev_tx_t hfi1_netdev_start_xmit(struct sk_buff *skb,
257 					  struct net_device *netdev)
258 {
259 	struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
260 	u8 pad_len, q_idx = skb->queue_mapping;
261 	struct hfi1_devdata *dd = vinfo->dd;
262 	struct opa_vnic_skb_mdata *mdata;
263 	u32 pkt_len, total_len;
264 	int err = -EINVAL;
265 	u64 pbc;
266 
267 	v_dbg("xmit: queue %d skb len %d\n", q_idx, skb->len);
268 	if (unlikely(!netif_oper_up(netdev))) {
269 		vinfo->stats[q_idx].tx_drop_state++;
270 		goto tx_finish;
271 	}
272 
273 	/* take out meta data */
274 	mdata = (struct opa_vnic_skb_mdata *)skb->data;
275 	skb_pull(skb, sizeof(*mdata));
276 	if (unlikely(mdata->flags & OPA_VNIC_SKB_MDATA_ENCAP_ERR)) {
277 		vinfo->stats[q_idx].tx_dlid_zero++;
278 		goto tx_finish;
279 	}
280 
281 	/* add tail padding (for 8 bytes size alignment) and icrc */
282 	pad_len = -(skb->len + OPA_VNIC_ICRC_TAIL_LEN) & 0x7;
283 	pad_len += OPA_VNIC_ICRC_TAIL_LEN;
284 
285 	/*
286 	 * pkt_len is how much data we have to write, includes header and data.
287 	 * total_len is length of the packet in Dwords plus the PBC should not
288 	 * include the CRC.
289 	 */
290 	pkt_len = (skb->len + pad_len) >> 2;
291 	total_len = pkt_len + 2; /* PBC + packet */
292 
293 	pbc = create_bypass_pbc(mdata->vl, total_len);
294 
295 	skb_get(skb);
296 	v_dbg("pbc 0x%016llX len %d pad_len %d\n", pbc, skb->len, pad_len);
297 	err = dd->process_vnic_dma_send(dd, q_idx, vinfo, skb, pbc, pad_len);
298 	if (unlikely(err)) {
299 		if (err == -ENOMEM)
300 			vinfo->stats[q_idx].netstats.tx_fifo_errors++;
301 		else if (err != -EBUSY)
302 			vinfo->stats[q_idx].netstats.tx_carrier_errors++;
303 	}
304 	/* remove the header before updating tx counters */
305 	skb_pull(skb, OPA_VNIC_HDR_LEN);
306 
307 	if (unlikely(err == -EBUSY)) {
308 		hfi1_vnic_maybe_stop_tx(vinfo, q_idx);
309 		dev_kfree_skb_any(skb);
310 		return NETDEV_TX_BUSY;
311 	}
312 
313 tx_finish:
314 	/* update tx counters */
315 	hfi1_vnic_update_tx_counters(vinfo, q_idx, skb, err);
316 	dev_kfree_skb_any(skb);
317 	return NETDEV_TX_OK;
318 }
319 
320 static u16 hfi1_vnic_select_queue(struct net_device *netdev,
321 				  struct sk_buff *skb,
322 				  struct net_device *sb_dev)
323 {
324 	struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
325 	struct opa_vnic_skb_mdata *mdata;
326 	struct sdma_engine *sde;
327 
328 	mdata = (struct opa_vnic_skb_mdata *)skb->data;
329 	sde = sdma_select_engine_vl(vinfo->dd, mdata->entropy, mdata->vl);
330 	return sde->this_idx;
331 }
332 
333 /* hfi1_vnic_decap_skb - strip OPA header from the skb (ethernet) packet */
334 static inline int hfi1_vnic_decap_skb(struct hfi1_vnic_rx_queue *rxq,
335 				      struct sk_buff *skb)
336 {
337 	struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
338 	int max_len = vinfo->netdev->mtu + VLAN_ETH_HLEN;
339 	int rc = -EFAULT;
340 
341 	skb_pull(skb, OPA_VNIC_HDR_LEN);
342 
343 	/* Validate Packet length */
344 	if (unlikely(skb->len > max_len))
345 		vinfo->stats[rxq->idx].rx_oversize++;
346 	else if (unlikely(skb->len < ETH_ZLEN))
347 		vinfo->stats[rxq->idx].rx_runt++;
348 	else
349 		rc = 0;
350 	return rc;
351 }
352 
353 static struct hfi1_vnic_vport_info *get_vnic_port(struct hfi1_devdata *dd,
354 						  int vesw_id)
355 {
356 	int vnic_id = VNIC_ID(vesw_id);
357 
358 	return hfi1_netdev_get_data(dd, vnic_id);
359 }
360 
361 static struct hfi1_vnic_vport_info *get_first_vnic_port(struct hfi1_devdata *dd)
362 {
363 	struct hfi1_vnic_vport_info *vinfo;
364 	int next_id = VNIC_ID(0);
365 
366 	vinfo = hfi1_netdev_get_first_data(dd, &next_id);
367 
368 	if (next_id > VNIC_ID(VNIC_MASK))
369 		return NULL;
370 
371 	return vinfo;
372 }
373 
374 void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet)
375 {
376 	struct hfi1_devdata *dd = packet->rcd->dd;
377 	struct hfi1_vnic_vport_info *vinfo = NULL;
378 	struct hfi1_vnic_rx_queue *rxq;
379 	struct sk_buff *skb;
380 	int l4_type, vesw_id = -1, rc;
381 	u8 q_idx;
382 	unsigned char *pad_info;
383 
384 	l4_type = hfi1_16B_get_l4(packet->ebuf);
385 	if (likely(l4_type == OPA_16B_L4_ETHR)) {
386 		vesw_id = HFI1_VNIC_GET_VESWID(packet->ebuf);
387 		vinfo = get_vnic_port(dd, vesw_id);
388 
389 		/*
390 		 * In case of invalid vesw id, count the error on
391 		 * the first available vport.
392 		 */
393 		if (unlikely(!vinfo)) {
394 			struct hfi1_vnic_vport_info *vinfo_tmp;
395 
396 			vinfo_tmp = get_first_vnic_port(dd);
397 			if (vinfo_tmp) {
398 				spin_lock(&vport_cntr_lock);
399 				vinfo_tmp->stats[0].netstats.rx_nohandler++;
400 				spin_unlock(&vport_cntr_lock);
401 			}
402 		}
403 	}
404 
405 	if (unlikely(!vinfo)) {
406 		dd_dev_warn(dd, "vnic rcv err: l4 %d vesw id %d ctx %d\n",
407 			    l4_type, vesw_id, packet->rcd->ctxt);
408 		return;
409 	}
410 
411 	q_idx = packet->rcd->vnic_q_idx;
412 	rxq = &vinfo->rxq[q_idx];
413 	if (unlikely(!netif_oper_up(vinfo->netdev))) {
414 		vinfo->stats[q_idx].rx_drop_state++;
415 		return;
416 	}
417 
418 	skb = netdev_alloc_skb(vinfo->netdev, packet->tlen);
419 	if (unlikely(!skb)) {
420 		vinfo->stats[q_idx].netstats.rx_fifo_errors++;
421 		return;
422 	}
423 
424 	memcpy(skb->data, packet->ebuf, packet->tlen);
425 	skb_put(skb, packet->tlen);
426 
427 	pad_info = skb->data + skb->len - 1;
428 	skb_trim(skb, (skb->len - OPA_VNIC_ICRC_TAIL_LEN -
429 		       ((*pad_info) & 0x7)));
430 
431 	rc = hfi1_vnic_decap_skb(rxq, skb);
432 
433 	/* update rx counters */
434 	hfi1_vnic_update_rx_counters(vinfo, rxq->idx, skb, rc);
435 	if (unlikely(rc)) {
436 		dev_kfree_skb_any(skb);
437 		return;
438 	}
439 
440 	skb_checksum_none_assert(skb);
441 	skb->protocol = eth_type_trans(skb, rxq->netdev);
442 
443 	napi_gro_receive(&rxq->napi, skb);
444 }
445 
446 static int hfi1_vnic_up(struct hfi1_vnic_vport_info *vinfo)
447 {
448 	struct hfi1_devdata *dd = vinfo->dd;
449 	struct net_device *netdev = vinfo->netdev;
450 	int rc;
451 
452 	/* ensure virtual eth switch id is valid */
453 	if (!vinfo->vesw_id)
454 		return -EINVAL;
455 
456 	rc = hfi1_netdev_add_data(dd, VNIC_ID(vinfo->vesw_id), vinfo);
457 	if (rc < 0)
458 		return rc;
459 
460 	rc = hfi1_netdev_rx_init(dd);
461 	if (rc)
462 		goto err_remove;
463 
464 	netif_carrier_on(netdev);
465 	netif_tx_start_all_queues(netdev);
466 	set_bit(HFI1_VNIC_UP, &vinfo->flags);
467 
468 	return 0;
469 
470 err_remove:
471 	hfi1_netdev_remove_data(dd, VNIC_ID(vinfo->vesw_id));
472 	return rc;
473 }
474 
475 static void hfi1_vnic_down(struct hfi1_vnic_vport_info *vinfo)
476 {
477 	struct hfi1_devdata *dd = vinfo->dd;
478 
479 	clear_bit(HFI1_VNIC_UP, &vinfo->flags);
480 	netif_carrier_off(vinfo->netdev);
481 	netif_tx_disable(vinfo->netdev);
482 	hfi1_netdev_remove_data(dd, VNIC_ID(vinfo->vesw_id));
483 
484 	hfi1_netdev_rx_destroy(dd);
485 }
486 
487 static int hfi1_netdev_open(struct net_device *netdev)
488 {
489 	struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
490 	int rc;
491 
492 	mutex_lock(&vinfo->lock);
493 	rc = hfi1_vnic_up(vinfo);
494 	mutex_unlock(&vinfo->lock);
495 	return rc;
496 }
497 
498 static int hfi1_netdev_close(struct net_device *netdev)
499 {
500 	struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
501 
502 	mutex_lock(&vinfo->lock);
503 	if (test_bit(HFI1_VNIC_UP, &vinfo->flags))
504 		hfi1_vnic_down(vinfo);
505 	mutex_unlock(&vinfo->lock);
506 	return 0;
507 }
508 
509 static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo)
510 {
511 	struct hfi1_devdata *dd = vinfo->dd;
512 	int rc = 0;
513 
514 	mutex_lock(&hfi1_mutex);
515 	if (!dd->vnic_num_vports) {
516 		rc = hfi1_vnic_txreq_init(dd);
517 		if (rc)
518 			goto txreq_fail;
519 	}
520 
521 	rc = hfi1_netdev_rx_init(dd);
522 	if (rc) {
523 		dd_dev_err(dd, "Unable to initialize netdev contexts\n");
524 		goto alloc_fail;
525 	}
526 
527 	hfi1_init_vnic_rsm(dd);
528 
529 	dd->vnic_num_vports++;
530 	hfi1_vnic_sdma_init(vinfo);
531 
532 alloc_fail:
533 	if (!dd->vnic_num_vports)
534 		hfi1_vnic_txreq_deinit(dd);
535 txreq_fail:
536 	mutex_unlock(&hfi1_mutex);
537 	return rc;
538 }
539 
540 static void hfi1_vnic_deinit(struct hfi1_vnic_vport_info *vinfo)
541 {
542 	struct hfi1_devdata *dd = vinfo->dd;
543 
544 	mutex_lock(&hfi1_mutex);
545 	if (--dd->vnic_num_vports == 0) {
546 		hfi1_deinit_vnic_rsm(dd);
547 		hfi1_vnic_txreq_deinit(dd);
548 	}
549 	mutex_unlock(&hfi1_mutex);
550 	hfi1_netdev_rx_destroy(dd);
551 }
552 
553 static void hfi1_vnic_set_vesw_id(struct net_device *netdev, int id)
554 {
555 	struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
556 	bool reopen = false;
557 
558 	/*
559 	 * If vesw_id is being changed, and if the vnic port is up,
560 	 * reset the vnic port to ensure new vesw_id gets picked up
561 	 */
562 	if (id != vinfo->vesw_id) {
563 		mutex_lock(&vinfo->lock);
564 		if (test_bit(HFI1_VNIC_UP, &vinfo->flags)) {
565 			hfi1_vnic_down(vinfo);
566 			reopen = true;
567 		}
568 
569 		vinfo->vesw_id = id;
570 		if (reopen)
571 			hfi1_vnic_up(vinfo);
572 
573 		mutex_unlock(&vinfo->lock);
574 	}
575 }
576 
577 /* netdev ops */
578 static const struct net_device_ops hfi1_netdev_ops = {
579 	.ndo_open = hfi1_netdev_open,
580 	.ndo_stop = hfi1_netdev_close,
581 	.ndo_start_xmit = hfi1_netdev_start_xmit,
582 	.ndo_select_queue = hfi1_vnic_select_queue,
583 	.ndo_get_stats64 = hfi1_vnic_get_stats64,
584 };
585 
586 static void hfi1_vnic_free_rn(struct net_device *netdev)
587 {
588 	struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
589 
590 	hfi1_vnic_deinit(vinfo);
591 	mutex_destroy(&vinfo->lock);
592 	free_netdev(netdev);
593 }
594 
595 struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device,
596 				      u32 port_num,
597 				      enum rdma_netdev_t type,
598 				      const char *name,
599 				      unsigned char name_assign_type,
600 				      void (*setup)(struct net_device *))
601 {
602 	struct hfi1_devdata *dd = dd_from_ibdev(device);
603 	struct hfi1_vnic_vport_info *vinfo;
604 	struct net_device *netdev;
605 	struct rdma_netdev *rn;
606 	int i, size, rc;
607 
608 	if (!dd->num_netdev_contexts)
609 		return ERR_PTR(-ENOMEM);
610 
611 	if (!port_num || (port_num > dd->num_pports))
612 		return ERR_PTR(-EINVAL);
613 
614 	if (type != RDMA_NETDEV_OPA_VNIC)
615 		return ERR_PTR(-EOPNOTSUPP);
616 
617 	size = sizeof(struct opa_vnic_rdma_netdev) + sizeof(*vinfo);
618 	netdev = alloc_netdev_mqs(size, name, name_assign_type, setup,
619 				  chip_sdma_engines(dd),
620 				  dd->num_netdev_contexts);
621 	if (!netdev)
622 		return ERR_PTR(-ENOMEM);
623 
624 	rn = netdev_priv(netdev);
625 	vinfo = opa_vnic_dev_priv(netdev);
626 	vinfo->dd = dd;
627 	vinfo->num_tx_q = chip_sdma_engines(dd);
628 	vinfo->num_rx_q = dd->num_netdev_contexts;
629 	vinfo->netdev = netdev;
630 	rn->free_rdma_netdev = hfi1_vnic_free_rn;
631 	rn->set_id = hfi1_vnic_set_vesw_id;
632 
633 	netdev->features = NETIF_F_HIGHDMA | NETIF_F_SG;
634 	netdev->hw_features = netdev->features;
635 	netdev->vlan_features = netdev->features;
636 	netdev->watchdog_timeo = msecs_to_jiffies(HFI_TX_TIMEOUT_MS);
637 	netdev->netdev_ops = &hfi1_netdev_ops;
638 	mutex_init(&vinfo->lock);
639 
640 	for (i = 0; i < vinfo->num_rx_q; i++) {
641 		struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
642 
643 		rxq->idx = i;
644 		rxq->vinfo = vinfo;
645 		rxq->netdev = netdev;
646 	}
647 
648 	rc = hfi1_vnic_init(vinfo);
649 	if (rc)
650 		goto init_fail;
651 
652 	return netdev;
653 init_fail:
654 	mutex_destroy(&vinfo->lock);
655 	free_netdev(netdev);
656 	return ERR_PTR(rc);
657 }
658