xref: /openbmc/linux/drivers/net/ethernet/sun/sunvnet.c (revision a2b78e9b)
1 /* sunvnet.c: Sun LDOM Virtual Network Driver.
2  *
3  * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
4  */
5 
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7 
8 #include <linux/module.h>
9 #include <linux/kernel.h>
10 #include <linux/types.h>
11 #include <linux/slab.h>
12 #include <linux/delay.h>
13 #include <linux/init.h>
14 #include <linux/netdevice.h>
15 #include <linux/ethtool.h>
16 #include <linux/etherdevice.h>
17 #include <linux/mutex.h>
18 #include <linux/if_vlan.h>
19 
20 #if IS_ENABLED(CONFIG_IPV6)
21 #include <linux/icmpv6.h>
22 #endif
23 
24 #include <net/icmp.h>
25 #include <net/route.h>
26 
27 #include <asm/vio.h>
28 #include <asm/ldc.h>
29 
30 #include "sunvnet.h"
31 
32 #define DRV_MODULE_NAME		"sunvnet"
33 #define DRV_MODULE_VERSION	"1.0"
34 #define DRV_MODULE_RELDATE	"June 25, 2007"
35 
36 static char version[] =
37 	DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
38 MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
39 MODULE_DESCRIPTION("Sun LDOM virtual network driver");
40 MODULE_LICENSE("GPL");
41 MODULE_VERSION(DRV_MODULE_VERSION);
42 
43 /* Heuristic for the number of times to exponentially backoff and
44  * retry sending an LDC trigger when EAGAIN is encountered
45  */
46 #define	VNET_MAX_RETRIES	10
47 
48 static int __vnet_tx_trigger(struct vnet_port *port, u32 start);
49 
50 /* Ordered from largest major to lowest */
51 static struct vio_version vnet_versions[] = {
52 	{ .major = 1, .minor = 6 },
53 	{ .major = 1, .minor = 0 },
54 };
55 
56 static inline u32 vnet_tx_dring_avail(struct vio_dring_state *dr)
57 {
58 	return vio_dring_avail(dr, VNET_TX_RING_SIZE);
59 }
60 
61 static int vnet_handle_unknown(struct vnet_port *port, void *arg)
62 {
63 	struct vio_msg_tag *pkt = arg;
64 
65 	pr_err("Received unknown msg [%02x:%02x:%04x:%08x]\n",
66 	       pkt->type, pkt->stype, pkt->stype_env, pkt->sid);
67 	pr_err("Resetting connection\n");
68 
69 	ldc_disconnect(port->vio.lp);
70 
71 	return -ECONNRESET;
72 }
73 
74 static int vnet_send_attr(struct vio_driver_state *vio)
75 {
76 	struct vnet_port *port = to_vnet_port(vio);
77 	struct net_device *dev = port->vp->dev;
78 	struct vio_net_attr_info pkt;
79 	int framelen = ETH_FRAME_LEN;
80 	int i;
81 
82 	memset(&pkt, 0, sizeof(pkt));
83 	pkt.tag.type = VIO_TYPE_CTRL;
84 	pkt.tag.stype = VIO_SUBTYPE_INFO;
85 	pkt.tag.stype_env = VIO_ATTR_INFO;
86 	pkt.tag.sid = vio_send_sid(vio);
87 	if (vio_version_before(vio, 1, 2))
88 		pkt.xfer_mode = VIO_DRING_MODE;
89 	else
90 		pkt.xfer_mode = VIO_NEW_DRING_MODE;
91 	pkt.addr_type = VNET_ADDR_ETHERMAC;
92 	pkt.ack_freq = 0;
93 	for (i = 0; i < 6; i++)
94 		pkt.addr |= (u64)dev->dev_addr[i] << ((5 - i) * 8);
95 	if (vio_version_after(vio, 1, 3)) {
96 		if (port->rmtu) {
97 			port->rmtu = min(VNET_MAXPACKET, port->rmtu);
98 			pkt.mtu = port->rmtu;
99 		} else {
100 			port->rmtu = VNET_MAXPACKET;
101 			pkt.mtu = port->rmtu;
102 		}
103 		if (vio_version_after_eq(vio, 1, 6))
104 			pkt.options = VIO_TX_DRING;
105 	} else if (vio_version_before(vio, 1, 3)) {
106 		pkt.mtu = framelen;
107 	} else { /* v1.3 */
108 		pkt.mtu = framelen + VLAN_HLEN;
109 	}
110 
111 	pkt.plnk_updt = PHYSLINK_UPDATE_NONE;
112 	pkt.cflags = 0;
113 
114 	viodbg(HS, "SEND NET ATTR xmode[0x%x] atype[0x%x] addr[%llx] "
115 	       "ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] mtu[%llu] "
116 	       "cflags[0x%04x] lso_max[%u]\n",
117 	       pkt.xfer_mode, pkt.addr_type,
118 	       (unsigned long long)pkt.addr,
119 	       pkt.ack_freq, pkt.plnk_updt, pkt.options,
120 	       (unsigned long long)pkt.mtu, pkt.cflags, pkt.ipv4_lso_maxlen);
121 
122 
123 	return vio_ldc_send(vio, &pkt, sizeof(pkt));
124 }
125 
126 static int handle_attr_info(struct vio_driver_state *vio,
127 			    struct vio_net_attr_info *pkt)
128 {
129 	struct vnet_port *port = to_vnet_port(vio);
130 	u64	localmtu;
131 	u8	xfer_mode;
132 
133 	viodbg(HS, "GOT NET ATTR xmode[0x%x] atype[0x%x] addr[%llx] "
134 	       "ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] mtu[%llu] "
135 	       " (rmtu[%llu]) cflags[0x%04x] lso_max[%u]\n",
136 	       pkt->xfer_mode, pkt->addr_type,
137 	       (unsigned long long)pkt->addr,
138 	       pkt->ack_freq, pkt->plnk_updt, pkt->options,
139 	       (unsigned long long)pkt->mtu, port->rmtu, pkt->cflags,
140 	       pkt->ipv4_lso_maxlen);
141 
142 	pkt->tag.sid = vio_send_sid(vio);
143 
144 	xfer_mode = pkt->xfer_mode;
145 	/* for version < 1.2, VIO_DRING_MODE = 0x3 and no bitmask */
146 	if (vio_version_before(vio, 1, 2) && xfer_mode == VIO_DRING_MODE)
147 		xfer_mode = VIO_NEW_DRING_MODE;
148 
149 	/* MTU negotiation:
150 	 *	< v1.3 - ETH_FRAME_LEN exactly
151 	 *	> v1.3 - MIN(pkt.mtu, VNET_MAXPACKET, port->rmtu) and change
152 	 *			pkt->mtu for ACK
153 	 *	= v1.3 - ETH_FRAME_LEN + VLAN_HLEN exactly
154 	 */
155 	if (vio_version_before(vio, 1, 3)) {
156 		localmtu = ETH_FRAME_LEN;
157 	} else if (vio_version_after(vio, 1, 3)) {
158 		localmtu = port->rmtu ? port->rmtu : VNET_MAXPACKET;
159 		localmtu = min(pkt->mtu, localmtu);
160 		pkt->mtu = localmtu;
161 	} else { /* v1.3 */
162 		localmtu = ETH_FRAME_LEN + VLAN_HLEN;
163 	}
164 	port->rmtu = localmtu;
165 
166 	/* for version >= 1.6, ACK packet mode we support */
167 	if (vio_version_after_eq(vio, 1, 6)) {
168 		pkt->xfer_mode = VIO_NEW_DRING_MODE;
169 		pkt->options = VIO_TX_DRING;
170 	}
171 
172 	if (!(xfer_mode | VIO_NEW_DRING_MODE) ||
173 	    pkt->addr_type != VNET_ADDR_ETHERMAC ||
174 	    pkt->mtu != localmtu) {
175 		viodbg(HS, "SEND NET ATTR NACK\n");
176 
177 		pkt->tag.stype = VIO_SUBTYPE_NACK;
178 
179 		(void) vio_ldc_send(vio, pkt, sizeof(*pkt));
180 
181 		return -ECONNRESET;
182 	} else {
183 		viodbg(HS, "SEND NET ATTR ACK xmode[0x%x] atype[0x%x] "
184 		       "addr[%llx] ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] "
185 		       "mtu[%llu] (rmtu[%llu]) cflags[0x%04x] lso_max[%u]\n",
186 		       pkt->xfer_mode, pkt->addr_type,
187 		       (unsigned long long)pkt->addr,
188 		       pkt->ack_freq, pkt->plnk_updt, pkt->options,
189 		       (unsigned long long)pkt->mtu, port->rmtu, pkt->cflags,
190 		       pkt->ipv4_lso_maxlen);
191 
192 		pkt->tag.stype = VIO_SUBTYPE_ACK;
193 
194 		return vio_ldc_send(vio, pkt, sizeof(*pkt));
195 	}
196 
197 }
198 
199 static int handle_attr_ack(struct vio_driver_state *vio,
200 			   struct vio_net_attr_info *pkt)
201 {
202 	viodbg(HS, "GOT NET ATTR ACK\n");
203 
204 	return 0;
205 }
206 
207 static int handle_attr_nack(struct vio_driver_state *vio,
208 			    struct vio_net_attr_info *pkt)
209 {
210 	viodbg(HS, "GOT NET ATTR NACK\n");
211 
212 	return -ECONNRESET;
213 }
214 
215 static int vnet_handle_attr(struct vio_driver_state *vio, void *arg)
216 {
217 	struct vio_net_attr_info *pkt = arg;
218 
219 	switch (pkt->tag.stype) {
220 	case VIO_SUBTYPE_INFO:
221 		return handle_attr_info(vio, pkt);
222 
223 	case VIO_SUBTYPE_ACK:
224 		return handle_attr_ack(vio, pkt);
225 
226 	case VIO_SUBTYPE_NACK:
227 		return handle_attr_nack(vio, pkt);
228 
229 	default:
230 		return -ECONNRESET;
231 	}
232 }
233 
234 static void vnet_handshake_complete(struct vio_driver_state *vio)
235 {
236 	struct vio_dring_state *dr;
237 
238 	dr = &vio->drings[VIO_DRIVER_RX_RING];
239 	dr->snd_nxt = dr->rcv_nxt = 1;
240 
241 	dr = &vio->drings[VIO_DRIVER_TX_RING];
242 	dr->snd_nxt = dr->rcv_nxt = 1;
243 }
244 
245 /* The hypervisor interface that implements copying to/from imported
246  * memory from another domain requires that copies are done to 8-byte
247  * aligned buffers, and that the lengths of such copies are also 8-byte
248  * multiples.
249  *
250  * So we align skb->data to an 8-byte multiple and pad-out the data
251  * area so we can round the copy length up to the next multiple of
252  * 8 for the copy.
253  *
254  * The transmitter puts the actual start of the packet 6 bytes into
255  * the buffer it sends over, so that the IP headers after the ethernet
256  * header are aligned properly.  These 6 bytes are not in the descriptor
257  * length, they are simply implied.  This offset is represented using
258  * the VNET_PACKET_SKIP macro.
259  */
260 static struct sk_buff *alloc_and_align_skb(struct net_device *dev,
261 					   unsigned int len)
262 {
263 	struct sk_buff *skb = netdev_alloc_skb(dev, len+VNET_PACKET_SKIP+8+8);
264 	unsigned long addr, off;
265 
266 	if (unlikely(!skb))
267 		return NULL;
268 
269 	addr = (unsigned long) skb->data;
270 	off = ((addr + 7UL) & ~7UL) - addr;
271 	if (off)
272 		skb_reserve(skb, off);
273 
274 	return skb;
275 }
276 
277 static int vnet_rx_one(struct vnet_port *port, unsigned int len,
278 		       struct ldc_trans_cookie *cookies, int ncookies)
279 {
280 	struct net_device *dev = port->vp->dev;
281 	unsigned int copy_len;
282 	struct sk_buff *skb;
283 	int err;
284 
285 	err = -EMSGSIZE;
286 	if (unlikely(len < ETH_ZLEN || len > port->rmtu)) {
287 		dev->stats.rx_length_errors++;
288 		goto out_dropped;
289 	}
290 
291 	skb = alloc_and_align_skb(dev, len);
292 	err = -ENOMEM;
293 	if (unlikely(!skb)) {
294 		dev->stats.rx_missed_errors++;
295 		goto out_dropped;
296 	}
297 
298 	copy_len = (len + VNET_PACKET_SKIP + 7U) & ~7U;
299 	skb_put(skb, copy_len);
300 	err = ldc_copy(port->vio.lp, LDC_COPY_IN,
301 		       skb->data, copy_len, 0,
302 		       cookies, ncookies);
303 	if (unlikely(err < 0)) {
304 		dev->stats.rx_frame_errors++;
305 		goto out_free_skb;
306 	}
307 
308 	skb_pull(skb, VNET_PACKET_SKIP);
309 	skb_trim(skb, len);
310 	skb->protocol = eth_type_trans(skb, dev);
311 
312 	dev->stats.rx_packets++;
313 	dev->stats.rx_bytes += len;
314 
315 	netif_rx(skb);
316 
317 	return 0;
318 
319 out_free_skb:
320 	kfree_skb(skb);
321 
322 out_dropped:
323 	dev->stats.rx_dropped++;
324 	return err;
325 }
326 
327 static int vnet_send_ack(struct vnet_port *port, struct vio_dring_state *dr,
328 			 u32 start, u32 end, u8 vio_dring_state)
329 {
330 	struct vio_dring_data hdr = {
331 		.tag = {
332 			.type		= VIO_TYPE_DATA,
333 			.stype		= VIO_SUBTYPE_ACK,
334 			.stype_env	= VIO_DRING_DATA,
335 			.sid		= vio_send_sid(&port->vio),
336 		},
337 		.dring_ident		= dr->ident,
338 		.start_idx		= start,
339 		.end_idx		= end,
340 		.state			= vio_dring_state,
341 	};
342 	int err, delay;
343 	int retries = 0;
344 
345 	hdr.seq = dr->snd_nxt;
346 	delay = 1;
347 	do {
348 		err = vio_ldc_send(&port->vio, &hdr, sizeof(hdr));
349 		if (err > 0) {
350 			dr->snd_nxt++;
351 			break;
352 		}
353 		udelay(delay);
354 		if ((delay <<= 1) > 128)
355 			delay = 128;
356 		if (retries++ > VNET_MAX_RETRIES) {
357 			pr_info("ECONNRESET %x:%x:%x:%x:%x:%x\n",
358 				port->raddr[0], port->raddr[1],
359 				port->raddr[2], port->raddr[3],
360 				port->raddr[4], port->raddr[5]);
361 			break;
362 		}
363 	} while (err == -EAGAIN);
364 
365 	if (err <= 0 && vio_dring_state == VIO_DRING_STOPPED) {
366 		port->stop_rx_idx = end;
367 		port->stop_rx = true;
368 	} else {
369 		port->stop_rx_idx = 0;
370 		port->stop_rx = false;
371 	}
372 
373 	return err;
374 }
375 
376 static u32 next_idx(u32 idx, struct vio_dring_state *dr)
377 {
378 	if (++idx == dr->num_entries)
379 		idx = 0;
380 	return idx;
381 }
382 
383 static u32 prev_idx(u32 idx, struct vio_dring_state *dr)
384 {
385 	if (idx == 0)
386 		idx = dr->num_entries - 1;
387 	else
388 		idx--;
389 
390 	return idx;
391 }
392 
393 static struct vio_net_desc *get_rx_desc(struct vnet_port *port,
394 					struct vio_dring_state *dr,
395 					u32 index)
396 {
397 	struct vio_net_desc *desc = port->vio.desc_buf;
398 	int err;
399 
400 	err = ldc_get_dring_entry(port->vio.lp, desc, dr->entry_size,
401 				  (index * dr->entry_size),
402 				  dr->cookies, dr->ncookies);
403 	if (err < 0)
404 		return ERR_PTR(err);
405 
406 	return desc;
407 }
408 
409 static int put_rx_desc(struct vnet_port *port,
410 		       struct vio_dring_state *dr,
411 		       struct vio_net_desc *desc,
412 		       u32 index)
413 {
414 	int err;
415 
416 	err = ldc_put_dring_entry(port->vio.lp, desc, dr->entry_size,
417 				  (index * dr->entry_size),
418 				  dr->cookies, dr->ncookies);
419 	if (err < 0)
420 		return err;
421 
422 	return 0;
423 }
424 
425 static int vnet_walk_rx_one(struct vnet_port *port,
426 			    struct vio_dring_state *dr,
427 			    u32 index, int *needs_ack)
428 {
429 	struct vio_net_desc *desc = get_rx_desc(port, dr, index);
430 	struct vio_driver_state *vio = &port->vio;
431 	int err;
432 
433 	if (IS_ERR(desc))
434 		return PTR_ERR(desc);
435 
436 	if (desc->hdr.state != VIO_DESC_READY)
437 		return 1;
438 
439 	rmb();
440 
441 	viodbg(DATA, "vio_walk_rx_one desc[%02x:%02x:%08x:%08x:%llx:%llx]\n",
442 	       desc->hdr.state, desc->hdr.ack,
443 	       desc->size, desc->ncookies,
444 	       desc->cookies[0].cookie_addr,
445 	       desc->cookies[0].cookie_size);
446 
447 	err = vnet_rx_one(port, desc->size, desc->cookies, desc->ncookies);
448 	if (err == -ECONNRESET)
449 		return err;
450 	desc->hdr.state = VIO_DESC_DONE;
451 	err = put_rx_desc(port, dr, desc, index);
452 	if (err < 0)
453 		return err;
454 	*needs_ack = desc->hdr.ack;
455 	return 0;
456 }
457 
458 static int vnet_walk_rx(struct vnet_port *port, struct vio_dring_state *dr,
459 			u32 start, u32 end)
460 {
461 	struct vio_driver_state *vio = &port->vio;
462 	int ack_start = -1, ack_end = -1;
463 
464 	end = (end == (u32) -1) ? prev_idx(start, dr) : next_idx(end, dr);
465 
466 	viodbg(DATA, "vnet_walk_rx start[%08x] end[%08x]\n", start, end);
467 
468 	while (start != end) {
469 		int ack = 0, err = vnet_walk_rx_one(port, dr, start, &ack);
470 		if (err == -ECONNRESET)
471 			return err;
472 		if (err != 0)
473 			break;
474 		if (ack_start == -1)
475 			ack_start = start;
476 		ack_end = start;
477 		start = next_idx(start, dr);
478 		if (ack && start != end) {
479 			err = vnet_send_ack(port, dr, ack_start, ack_end,
480 					    VIO_DRING_ACTIVE);
481 			if (err == -ECONNRESET)
482 				return err;
483 			ack_start = -1;
484 		}
485 	}
486 	if (unlikely(ack_start == -1))
487 		ack_start = ack_end = prev_idx(start, dr);
488 	return vnet_send_ack(port, dr, ack_start, ack_end, VIO_DRING_STOPPED);
489 }
490 
491 static int vnet_rx(struct vnet_port *port, void *msgbuf)
492 {
493 	struct vio_dring_data *pkt = msgbuf;
494 	struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_RX_RING];
495 	struct vio_driver_state *vio = &port->vio;
496 
497 	viodbg(DATA, "vnet_rx stype_env[%04x] seq[%016llx] rcv_nxt[%016llx]\n",
498 	       pkt->tag.stype_env, pkt->seq, dr->rcv_nxt);
499 
500 	if (unlikely(pkt->tag.stype_env != VIO_DRING_DATA))
501 		return 0;
502 	if (unlikely(pkt->seq != dr->rcv_nxt)) {
503 		pr_err("RX out of sequence seq[0x%llx] rcv_nxt[0x%llx]\n",
504 		       pkt->seq, dr->rcv_nxt);
505 		return 0;
506 	}
507 
508 	dr->rcv_nxt++;
509 
510 	/* XXX Validate pkt->start_idx and pkt->end_idx XXX */
511 
512 	return vnet_walk_rx(port, dr, pkt->start_idx, pkt->end_idx);
513 }
514 
515 static int idx_is_pending(struct vio_dring_state *dr, u32 end)
516 {
517 	u32 idx = dr->cons;
518 	int found = 0;
519 
520 	while (idx != dr->prod) {
521 		if (idx == end) {
522 			found = 1;
523 			break;
524 		}
525 		idx = next_idx(idx, dr);
526 	}
527 	return found;
528 }
529 
530 static int vnet_ack(struct vnet_port *port, void *msgbuf)
531 {
532 	struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
533 	struct vio_dring_data *pkt = msgbuf;
534 	struct net_device *dev;
535 	struct vnet *vp;
536 	u32 end;
537 	struct vio_net_desc *desc;
538 	if (unlikely(pkt->tag.stype_env != VIO_DRING_DATA))
539 		return 0;
540 
541 	end = pkt->end_idx;
542 	if (unlikely(!idx_is_pending(dr, end)))
543 		return 0;
544 
545 	/* sync for race conditions with vnet_start_xmit() and tell xmit it
546 	 * is time to send a trigger.
547 	 */
548 	dr->cons = next_idx(end, dr);
549 	desc = vio_dring_entry(dr, dr->cons);
550 	if (desc->hdr.state == VIO_DESC_READY && port->start_cons) {
551 		/* vnet_start_xmit() just populated this dring but missed
552 		 * sending the "start" LDC message to the consumer.
553 		 * Send a "start" trigger on its behalf.
554 		 */
555 		if (__vnet_tx_trigger(port, dr->cons) > 0)
556 			port->start_cons = false;
557 		else
558 			port->start_cons = true;
559 	} else {
560 		port->start_cons = true;
561 	}
562 
563 
564 	vp = port->vp;
565 	dev = vp->dev;
566 	if (unlikely(netif_queue_stopped(dev) &&
567 		     vnet_tx_dring_avail(dr) >= VNET_TX_WAKEUP_THRESH(dr)))
568 		return 1;
569 
570 	return 0;
571 }
572 
573 static int vnet_nack(struct vnet_port *port, void *msgbuf)
574 {
575 	/* XXX just reset or similar XXX */
576 	return 0;
577 }
578 
579 static int handle_mcast(struct vnet_port *port, void *msgbuf)
580 {
581 	struct vio_net_mcast_info *pkt = msgbuf;
582 
583 	if (pkt->tag.stype != VIO_SUBTYPE_ACK)
584 		pr_err("%s: Got unexpected MCAST reply [%02x:%02x:%04x:%08x]\n",
585 		       port->vp->dev->name,
586 		       pkt->tag.type,
587 		       pkt->tag.stype,
588 		       pkt->tag.stype_env,
589 		       pkt->tag.sid);
590 
591 	return 0;
592 }
593 
594 static void maybe_tx_wakeup(unsigned long param)
595 {
596 	struct vnet *vp = (struct vnet *)param;
597 	struct net_device *dev = vp->dev;
598 
599 	netif_tx_lock(dev);
600 	if (likely(netif_queue_stopped(dev))) {
601 		struct vnet_port *port;
602 		int wake = 1;
603 
604 		list_for_each_entry(port, &vp->port_list, list) {
605 			struct vio_dring_state *dr;
606 
607 			dr = &port->vio.drings[VIO_DRIVER_TX_RING];
608 			if (vnet_tx_dring_avail(dr) <
609 			    VNET_TX_WAKEUP_THRESH(dr)) {
610 				wake = 0;
611 				break;
612 			}
613 		}
614 		if (wake)
615 			netif_wake_queue(dev);
616 	}
617 	netif_tx_unlock(dev);
618 }
619 
620 static void vnet_event(void *arg, int event)
621 {
622 	struct vnet_port *port = arg;
623 	struct vio_driver_state *vio = &port->vio;
624 	unsigned long flags;
625 	int tx_wakeup, err;
626 
627 	spin_lock_irqsave(&vio->lock, flags);
628 
629 	if (unlikely(event == LDC_EVENT_RESET ||
630 		     event == LDC_EVENT_UP)) {
631 		vio_link_state_change(vio, event);
632 		spin_unlock_irqrestore(&vio->lock, flags);
633 
634 		if (event == LDC_EVENT_RESET) {
635 			port->rmtu = 0;
636 			vio_port_up(vio);
637 		}
638 		return;
639 	}
640 
641 	if (unlikely(event != LDC_EVENT_DATA_READY)) {
642 		pr_warn("Unexpected LDC event %d\n", event);
643 		spin_unlock_irqrestore(&vio->lock, flags);
644 		return;
645 	}
646 
647 	tx_wakeup = err = 0;
648 	while (1) {
649 		union {
650 			struct vio_msg_tag tag;
651 			u64 raw[8];
652 		} msgbuf;
653 
654 		err = ldc_read(vio->lp, &msgbuf, sizeof(msgbuf));
655 		if (unlikely(err < 0)) {
656 			if (err == -ECONNRESET)
657 				vio_conn_reset(vio);
658 			break;
659 		}
660 		if (err == 0)
661 			break;
662 		viodbg(DATA, "TAG [%02x:%02x:%04x:%08x]\n",
663 		       msgbuf.tag.type,
664 		       msgbuf.tag.stype,
665 		       msgbuf.tag.stype_env,
666 		       msgbuf.tag.sid);
667 		err = vio_validate_sid(vio, &msgbuf.tag);
668 		if (err < 0)
669 			break;
670 
671 		if (likely(msgbuf.tag.type == VIO_TYPE_DATA)) {
672 			if (msgbuf.tag.stype == VIO_SUBTYPE_INFO) {
673 				err = vnet_rx(port, &msgbuf);
674 			} else if (msgbuf.tag.stype == VIO_SUBTYPE_ACK) {
675 				err = vnet_ack(port, &msgbuf);
676 				if (err > 0)
677 					tx_wakeup |= err;
678 			} else if (msgbuf.tag.stype == VIO_SUBTYPE_NACK) {
679 				err = vnet_nack(port, &msgbuf);
680 			}
681 		} else if (msgbuf.tag.type == VIO_TYPE_CTRL) {
682 			if (msgbuf.tag.stype_env == VNET_MCAST_INFO)
683 				err = handle_mcast(port, &msgbuf);
684 			else
685 				err = vio_control_pkt_engine(vio, &msgbuf);
686 			if (err)
687 				break;
688 		} else {
689 			err = vnet_handle_unknown(port, &msgbuf);
690 		}
691 		if (err == -ECONNRESET)
692 			break;
693 	}
694 	spin_unlock(&vio->lock);
695 	/* Kick off a tasklet to wake the queue.  We cannot call
696 	 * maybe_tx_wakeup directly here because we could deadlock on
697 	 * netif_tx_lock() with dev_watchdog()
698 	 */
699 	if (unlikely(tx_wakeup && err != -ECONNRESET))
700 		tasklet_schedule(&port->vp->vnet_tx_wakeup);
701 
702 	local_irq_restore(flags);
703 }
704 
705 static int __vnet_tx_trigger(struct vnet_port *port, u32 start)
706 {
707 	struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
708 	struct vio_dring_data hdr = {
709 		.tag = {
710 			.type		= VIO_TYPE_DATA,
711 			.stype		= VIO_SUBTYPE_INFO,
712 			.stype_env	= VIO_DRING_DATA,
713 			.sid		= vio_send_sid(&port->vio),
714 		},
715 		.dring_ident		= dr->ident,
716 		.start_idx		= start,
717 		.end_idx		= (u32) -1,
718 	};
719 	int err, delay;
720 	int retries = 0;
721 
722 	if (port->stop_rx) {
723 		err = vnet_send_ack(port,
724 				    &port->vio.drings[VIO_DRIVER_RX_RING],
725 				    port->stop_rx_idx, -1,
726 				    VIO_DRING_STOPPED);
727 		if (err <= 0)
728 			return err;
729 	}
730 
731 	hdr.seq = dr->snd_nxt;
732 	delay = 1;
733 	do {
734 		err = vio_ldc_send(&port->vio, &hdr, sizeof(hdr));
735 		if (err > 0) {
736 			dr->snd_nxt++;
737 			break;
738 		}
739 		udelay(delay);
740 		if ((delay <<= 1) > 128)
741 			delay = 128;
742 		if (retries++ > VNET_MAX_RETRIES)
743 			break;
744 	} while (err == -EAGAIN);
745 
746 	return err;
747 }
748 
749 static inline bool port_is_up(struct vnet_port *vnet)
750 {
751 	struct vio_driver_state *vio = &vnet->vio;
752 
753 	return !!(vio->hs_state & VIO_HS_COMPLETE);
754 }
755 
756 struct vnet_port *__tx_port_find(struct vnet *vp, struct sk_buff *skb)
757 {
758 	unsigned int hash = vnet_hashfn(skb->data);
759 	struct hlist_head *hp = &vp->port_hash[hash];
760 	struct vnet_port *port;
761 
762 	hlist_for_each_entry(port, hp, hash) {
763 		if (!port_is_up(port))
764 			continue;
765 		if (ether_addr_equal(port->raddr, skb->data))
766 			return port;
767 	}
768 	list_for_each_entry(port, &vp->port_list, list) {
769 		if (!port->switch_port)
770 			continue;
771 		if (!port_is_up(port))
772 			continue;
773 		return port;
774 	}
775 	return NULL;
776 }
777 
778 struct vnet_port *tx_port_find(struct vnet *vp, struct sk_buff *skb)
779 {
780 	struct vnet_port *ret;
781 	unsigned long flags;
782 
783 	spin_lock_irqsave(&vp->lock, flags);
784 	ret = __tx_port_find(vp, skb);
785 	spin_unlock_irqrestore(&vp->lock, flags);
786 
787 	return ret;
788 }
789 
790 static struct sk_buff *vnet_clean_tx_ring(struct vnet_port *port,
791 					  unsigned *pending)
792 {
793 	struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
794 	struct sk_buff *skb = NULL;
795 	int i, txi;
796 
797 	*pending = 0;
798 
799 	txi = dr->prod-1;
800 	if (txi < 0)
801 		txi = VNET_TX_RING_SIZE-1;
802 
803 	for (i = 0; i < VNET_TX_RING_SIZE; ++i) {
804 		struct vio_net_desc *d;
805 
806 		d = vio_dring_entry(dr, txi);
807 
808 		if (d->hdr.state == VIO_DESC_DONE) {
809 			if (port->tx_bufs[txi].skb) {
810 				BUG_ON(port->tx_bufs[txi].skb->next);
811 
812 				port->tx_bufs[txi].skb->next = skb;
813 				skb = port->tx_bufs[txi].skb;
814 				port->tx_bufs[txi].skb = NULL;
815 
816 				ldc_unmap(port->vio.lp,
817 					  port->tx_bufs[txi].cookies,
818 					  port->tx_bufs[txi].ncookies);
819 			}
820 			d->hdr.state = VIO_DESC_FREE;
821 		} else if (d->hdr.state == VIO_DESC_READY) {
822 			(*pending)++;
823 		} else if (d->hdr.state == VIO_DESC_FREE) {
824 			break;
825 		}
826 		--txi;
827 		if (txi < 0)
828 			txi = VNET_TX_RING_SIZE-1;
829 	}
830 	return skb;
831 }
832 
833 static inline void vnet_free_skbs(struct sk_buff *skb)
834 {
835 	struct sk_buff *next;
836 
837 	while (skb) {
838 		next = skb->next;
839 		skb->next = NULL;
840 		dev_kfree_skb(skb);
841 		skb = next;
842 	}
843 }
844 
845 static void vnet_clean_timer_expire(unsigned long port0)
846 {
847 	struct vnet_port *port = (struct vnet_port *)port0;
848 	struct sk_buff *freeskbs;
849 	unsigned pending;
850 	unsigned long flags;
851 
852 	spin_lock_irqsave(&port->vio.lock, flags);
853 	freeskbs = vnet_clean_tx_ring(port, &pending);
854 	spin_unlock_irqrestore(&port->vio.lock, flags);
855 
856 	vnet_free_skbs(freeskbs);
857 
858 	if (pending)
859 		(void)mod_timer(&port->clean_timer,
860 				jiffies + VNET_CLEAN_TIMEOUT);
861 	 else
862 		del_timer(&port->clean_timer);
863 }
864 
865 static inline struct sk_buff *vnet_skb_shape(struct sk_buff *skb, void **pstart,
866 					     int *plen)
867 {
868 	struct sk_buff *nskb;
869 	int len, pad;
870 
871 	len = skb->len;
872 	pad = 0;
873 	if (len < ETH_ZLEN) {
874 		pad += ETH_ZLEN - skb->len;
875 		len += pad;
876 	}
877 	len += VNET_PACKET_SKIP;
878 	pad += 8 - (len & 7);
879 	len += 8 - (len & 7);
880 
881 	if (((unsigned long)skb->data & 7) != VNET_PACKET_SKIP ||
882 	    skb_tailroom(skb) < pad ||
883 	    skb_headroom(skb) < VNET_PACKET_SKIP) {
884 		nskb = alloc_and_align_skb(skb->dev, skb->len);
885 		skb_reserve(nskb, VNET_PACKET_SKIP);
886 		if (skb_copy_bits(skb, 0, nskb->data, skb->len)) {
887 			dev_kfree_skb(nskb);
888 			dev_kfree_skb(skb);
889 			return NULL;
890 		}
891 		(void)skb_put(nskb, skb->len);
892 		dev_kfree_skb(skb);
893 		skb = nskb;
894 	}
895 
896 	*pstart = skb->data - VNET_PACKET_SKIP;
897 	*plen = len;
898 	return skb;
899 }
900 
901 static int vnet_start_xmit(struct sk_buff *skb, struct net_device *dev)
902 {
903 	struct vnet *vp = netdev_priv(dev);
904 	struct vnet_port *port = tx_port_find(vp, skb);
905 	struct vio_dring_state *dr;
906 	struct vio_net_desc *d;
907 	unsigned long flags;
908 	unsigned int len;
909 	struct sk_buff *freeskbs = NULL;
910 	int i, err, txi;
911 	void *start = NULL;
912 	int nlen = 0;
913 	unsigned pending = 0;
914 
915 	if (unlikely(!port))
916 		goto out_dropped;
917 
918 	skb = vnet_skb_shape(skb, &start, &nlen);
919 
920 	if (unlikely(!skb))
921 		goto out_dropped;
922 
923 	if (skb->len > port->rmtu) {
924 		unsigned long localmtu = port->rmtu - ETH_HLEN;
925 
926 		if (vio_version_after_eq(&port->vio, 1, 3))
927 			localmtu -= VLAN_HLEN;
928 
929 		if (skb->protocol == htons(ETH_P_IP)) {
930 			struct flowi4 fl4;
931 			struct rtable *rt = NULL;
932 
933 			memset(&fl4, 0, sizeof(fl4));
934 			fl4.flowi4_oif = dev->ifindex;
935 			fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
936 			fl4.daddr = ip_hdr(skb)->daddr;
937 			fl4.saddr = ip_hdr(skb)->saddr;
938 
939 			rt = ip_route_output_key(dev_net(dev), &fl4);
940 			if (!IS_ERR(rt)) {
941 				skb_dst_set(skb, &rt->dst);
942 				icmp_send(skb, ICMP_DEST_UNREACH,
943 					  ICMP_FRAG_NEEDED,
944 					  htonl(localmtu));
945 			}
946 		}
947 #if IS_ENABLED(CONFIG_IPV6)
948 		else if (skb->protocol == htons(ETH_P_IPV6))
949 			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, localmtu);
950 #endif
951 		goto out_dropped;
952 	}
953 
954 	spin_lock_irqsave(&port->vio.lock, flags);
955 
956 	dr = &port->vio.drings[VIO_DRIVER_TX_RING];
957 	if (unlikely(vnet_tx_dring_avail(dr) < 2)) {
958 		if (!netif_queue_stopped(dev)) {
959 			netif_stop_queue(dev);
960 
961 			/* This is a hard error, log it. */
962 			netdev_err(dev, "BUG! Tx Ring full when queue awake!\n");
963 			dev->stats.tx_errors++;
964 		}
965 		spin_unlock_irqrestore(&port->vio.lock, flags);
966 		return NETDEV_TX_BUSY;
967 	}
968 
969 	d = vio_dring_cur(dr);
970 
971 	txi = dr->prod;
972 
973 	freeskbs = vnet_clean_tx_ring(port, &pending);
974 
975 	BUG_ON(port->tx_bufs[txi].skb);
976 
977 	len = skb->len;
978 	if (len < ETH_ZLEN)
979 		len = ETH_ZLEN;
980 
981 	port->tx_bufs[txi].skb = skb;
982 	skb = NULL;
983 
984 	err = ldc_map_single(port->vio.lp, start, nlen,
985 			     port->tx_bufs[txi].cookies, VNET_MAXCOOKIES,
986 			     (LDC_MAP_SHADOW | LDC_MAP_DIRECT | LDC_MAP_RW));
987 	if (err < 0) {
988 		netdev_info(dev, "tx buffer map error %d\n", err);
989 		goto out_dropped_unlock;
990 	}
991 	port->tx_bufs[txi].ncookies = err;
992 
993 	/* We don't rely on the ACKs to free the skb in vnet_start_xmit(),
994 	 * thus it is safe to not set VIO_ACK_ENABLE for each transmission:
995 	 * the protocol itself does not require it as long as the peer
996 	 * sends a VIO_SUBTYPE_ACK for VIO_DRING_STOPPED.
997 	 *
998 	 * An ACK for every packet in the ring is expensive as the
999 	 * sending of LDC messages is slow and affects performance.
1000 	 */
1001 	d->hdr.ack = VIO_ACK_DISABLE;
1002 	d->size = len;
1003 	d->ncookies = port->tx_bufs[txi].ncookies;
1004 	for (i = 0; i < d->ncookies; i++)
1005 		d->cookies[i] = port->tx_bufs[txi].cookies[i];
1006 
1007 	/* This has to be a non-SMP write barrier because we are writing
1008 	 * to memory which is shared with the peer LDOM.
1009 	 */
1010 	wmb();
1011 
1012 	d->hdr.state = VIO_DESC_READY;
1013 
1014 	/* Exactly one ldc "start" trigger (for dr->cons) needs to be sent
1015 	 * to notify the consumer that some descriptors are READY.
1016 	 * After that "start" trigger, no additional triggers are needed until
1017 	 * a DRING_STOPPED is received from the consumer. The dr->cons field
1018 	 * (set up by vnet_ack()) has the value of the next dring index
1019 	 * that has not yet been ack-ed. We send a "start" trigger here
1020 	 * if, and only if, start_cons is true (reset it afterward). Conversely,
1021 	 * vnet_ack() should check if the dring corresponding to cons
1022 	 * is marked READY, but start_cons was false.
1023 	 * If so, vnet_ack() should send out the missed "start" trigger.
1024 	 *
1025 	 * Note that the wmb() above makes sure the cookies et al. are
1026 	 * not globally visible before the VIO_DESC_READY, and that the
1027 	 * stores are ordered correctly by the compiler. The consumer will
1028 	 * not proceed until the VIO_DESC_READY is visible assuring that
1029 	 * the consumer does not observe anything related to descriptors
1030 	 * out of order. The HV trap from the LDC start trigger is the
1031 	 * producer to consumer announcement that work is available to the
1032 	 * consumer
1033 	 */
1034 	if (!port->start_cons)
1035 		goto ldc_start_done; /* previous trigger suffices */
1036 
1037 	err = __vnet_tx_trigger(port, dr->cons);
1038 	if (unlikely(err < 0)) {
1039 		netdev_info(dev, "TX trigger error %d\n", err);
1040 		d->hdr.state = VIO_DESC_FREE;
1041 		dev->stats.tx_carrier_errors++;
1042 		goto out_dropped_unlock;
1043 	}
1044 
1045 ldc_start_done:
1046 	port->start_cons = false;
1047 
1048 	dev->stats.tx_packets++;
1049 	dev->stats.tx_bytes += port->tx_bufs[txi].skb->len;
1050 
1051 	dr->prod = (dr->prod + 1) & (VNET_TX_RING_SIZE - 1);
1052 	if (unlikely(vnet_tx_dring_avail(dr) < 2)) {
1053 		netif_stop_queue(dev);
1054 		if (vnet_tx_dring_avail(dr) > VNET_TX_WAKEUP_THRESH(dr))
1055 			netif_wake_queue(dev);
1056 	}
1057 
1058 	spin_unlock_irqrestore(&port->vio.lock, flags);
1059 
1060 	vnet_free_skbs(freeskbs);
1061 
1062 	(void)mod_timer(&port->clean_timer, jiffies + VNET_CLEAN_TIMEOUT);
1063 
1064 	return NETDEV_TX_OK;
1065 
1066 out_dropped_unlock:
1067 	spin_unlock_irqrestore(&port->vio.lock, flags);
1068 
1069 out_dropped:
1070 	if (skb)
1071 		dev_kfree_skb(skb);
1072 	vnet_free_skbs(freeskbs);
1073 	if (pending)
1074 		(void)mod_timer(&port->clean_timer,
1075 				jiffies + VNET_CLEAN_TIMEOUT);
1076 	else
1077 		del_timer(&port->clean_timer);
1078 	dev->stats.tx_dropped++;
1079 	return NETDEV_TX_OK;
1080 }
1081 
1082 static void vnet_tx_timeout(struct net_device *dev)
1083 {
1084 	/* XXX Implement me XXX */
1085 }
1086 
1087 static int vnet_open(struct net_device *dev)
1088 {
1089 	netif_carrier_on(dev);
1090 	netif_start_queue(dev);
1091 
1092 	return 0;
1093 }
1094 
1095 static int vnet_close(struct net_device *dev)
1096 {
1097 	netif_stop_queue(dev);
1098 	netif_carrier_off(dev);
1099 
1100 	return 0;
1101 }
1102 
1103 static struct vnet_mcast_entry *__vnet_mc_find(struct vnet *vp, u8 *addr)
1104 {
1105 	struct vnet_mcast_entry *m;
1106 
1107 	for (m = vp->mcast_list; m; m = m->next) {
1108 		if (ether_addr_equal(m->addr, addr))
1109 			return m;
1110 	}
1111 	return NULL;
1112 }
1113 
1114 static void __update_mc_list(struct vnet *vp, struct net_device *dev)
1115 {
1116 	struct netdev_hw_addr *ha;
1117 
1118 	netdev_for_each_mc_addr(ha, dev) {
1119 		struct vnet_mcast_entry *m;
1120 
1121 		m = __vnet_mc_find(vp, ha->addr);
1122 		if (m) {
1123 			m->hit = 1;
1124 			continue;
1125 		}
1126 
1127 		if (!m) {
1128 			m = kzalloc(sizeof(*m), GFP_ATOMIC);
1129 			if (!m)
1130 				continue;
1131 			memcpy(m->addr, ha->addr, ETH_ALEN);
1132 			m->hit = 1;
1133 
1134 			m->next = vp->mcast_list;
1135 			vp->mcast_list = m;
1136 		}
1137 	}
1138 }
1139 
1140 static void __send_mc_list(struct vnet *vp, struct vnet_port *port)
1141 {
1142 	struct vio_net_mcast_info info;
1143 	struct vnet_mcast_entry *m, **pp;
1144 	int n_addrs;
1145 
1146 	memset(&info, 0, sizeof(info));
1147 
1148 	info.tag.type = VIO_TYPE_CTRL;
1149 	info.tag.stype = VIO_SUBTYPE_INFO;
1150 	info.tag.stype_env = VNET_MCAST_INFO;
1151 	info.tag.sid = vio_send_sid(&port->vio);
1152 	info.set = 1;
1153 
1154 	n_addrs = 0;
1155 	for (m = vp->mcast_list; m; m = m->next) {
1156 		if (m->sent)
1157 			continue;
1158 		m->sent = 1;
1159 		memcpy(&info.mcast_addr[n_addrs * ETH_ALEN],
1160 		       m->addr, ETH_ALEN);
1161 		if (++n_addrs == VNET_NUM_MCAST) {
1162 			info.count = n_addrs;
1163 
1164 			(void) vio_ldc_send(&port->vio, &info,
1165 					    sizeof(info));
1166 			n_addrs = 0;
1167 		}
1168 	}
1169 	if (n_addrs) {
1170 		info.count = n_addrs;
1171 		(void) vio_ldc_send(&port->vio, &info, sizeof(info));
1172 	}
1173 
1174 	info.set = 0;
1175 
1176 	n_addrs = 0;
1177 	pp = &vp->mcast_list;
1178 	while ((m = *pp) != NULL) {
1179 		if (m->hit) {
1180 			m->hit = 0;
1181 			pp = &m->next;
1182 			continue;
1183 		}
1184 
1185 		memcpy(&info.mcast_addr[n_addrs * ETH_ALEN],
1186 		       m->addr, ETH_ALEN);
1187 		if (++n_addrs == VNET_NUM_MCAST) {
1188 			info.count = n_addrs;
1189 			(void) vio_ldc_send(&port->vio, &info,
1190 					    sizeof(info));
1191 			n_addrs = 0;
1192 		}
1193 
1194 		*pp = m->next;
1195 		kfree(m);
1196 	}
1197 	if (n_addrs) {
1198 		info.count = n_addrs;
1199 		(void) vio_ldc_send(&port->vio, &info, sizeof(info));
1200 	}
1201 }
1202 
1203 static void vnet_set_rx_mode(struct net_device *dev)
1204 {
1205 	struct vnet *vp = netdev_priv(dev);
1206 	struct vnet_port *port;
1207 	unsigned long flags;
1208 
1209 	spin_lock_irqsave(&vp->lock, flags);
1210 	if (!list_empty(&vp->port_list)) {
1211 		port = list_entry(vp->port_list.next, struct vnet_port, list);
1212 
1213 		if (port->switch_port) {
1214 			__update_mc_list(vp, dev);
1215 			__send_mc_list(vp, port);
1216 		}
1217 	}
1218 	spin_unlock_irqrestore(&vp->lock, flags);
1219 }
1220 
1221 static int vnet_change_mtu(struct net_device *dev, int new_mtu)
1222 {
1223 	if (new_mtu < 68 || new_mtu > 65535)
1224 		return -EINVAL;
1225 
1226 	dev->mtu = new_mtu;
1227 	return 0;
1228 }
1229 
1230 static int vnet_set_mac_addr(struct net_device *dev, void *p)
1231 {
1232 	return -EINVAL;
1233 }
1234 
1235 static void vnet_get_drvinfo(struct net_device *dev,
1236 			     struct ethtool_drvinfo *info)
1237 {
1238 	strlcpy(info->driver, DRV_MODULE_NAME, sizeof(info->driver));
1239 	strlcpy(info->version, DRV_MODULE_VERSION, sizeof(info->version));
1240 }
1241 
1242 static u32 vnet_get_msglevel(struct net_device *dev)
1243 {
1244 	struct vnet *vp = netdev_priv(dev);
1245 	return vp->msg_enable;
1246 }
1247 
1248 static void vnet_set_msglevel(struct net_device *dev, u32 value)
1249 {
1250 	struct vnet *vp = netdev_priv(dev);
1251 	vp->msg_enable = value;
1252 }
1253 
1254 static const struct ethtool_ops vnet_ethtool_ops = {
1255 	.get_drvinfo		= vnet_get_drvinfo,
1256 	.get_msglevel		= vnet_get_msglevel,
1257 	.set_msglevel		= vnet_set_msglevel,
1258 	.get_link		= ethtool_op_get_link,
1259 };
1260 
1261 static void vnet_port_free_tx_bufs(struct vnet_port *port)
1262 {
1263 	struct vio_dring_state *dr;
1264 	int i;
1265 
1266 	dr = &port->vio.drings[VIO_DRIVER_TX_RING];
1267 	if (dr->base) {
1268 		ldc_free_exp_dring(port->vio.lp, dr->base,
1269 				   (dr->entry_size * dr->num_entries),
1270 				   dr->cookies, dr->ncookies);
1271 		dr->base = NULL;
1272 		dr->entry_size = 0;
1273 		dr->num_entries = 0;
1274 		dr->pending = 0;
1275 		dr->ncookies = 0;
1276 	}
1277 
1278 	for (i = 0; i < VNET_TX_RING_SIZE; i++) {
1279 		struct vio_net_desc *d;
1280 		void *skb = port->tx_bufs[i].skb;
1281 
1282 		if (!skb)
1283 			continue;
1284 
1285 		d = vio_dring_entry(dr, i);
1286 		if (d->hdr.state == VIO_DESC_READY)
1287 			pr_warn("active transmit buffers freed\n");
1288 
1289 		ldc_unmap(port->vio.lp,
1290 			  port->tx_bufs[i].cookies,
1291 			  port->tx_bufs[i].ncookies);
1292 		dev_kfree_skb(skb);
1293 		port->tx_bufs[i].skb = NULL;
1294 		d->hdr.state = VIO_DESC_FREE;
1295 	}
1296 }
1297 
1298 static int vnet_port_alloc_tx_bufs(struct vnet_port *port)
1299 {
1300 	struct vio_dring_state *dr;
1301 	unsigned long len;
1302 	int i, err, ncookies;
1303 	void *dring;
1304 
1305 	dr = &port->vio.drings[VIO_DRIVER_TX_RING];
1306 
1307 	len = (VNET_TX_RING_SIZE *
1308 	       (sizeof(struct vio_net_desc) +
1309 		(sizeof(struct ldc_trans_cookie) * 2)));
1310 
1311 	ncookies = VIO_MAX_RING_COOKIES;
1312 	dring = ldc_alloc_exp_dring(port->vio.lp, len,
1313 				    dr->cookies, &ncookies,
1314 				    (LDC_MAP_SHADOW |
1315 				     LDC_MAP_DIRECT |
1316 				     LDC_MAP_RW));
1317 	if (IS_ERR(dring)) {
1318 		err = PTR_ERR(dring);
1319 		goto err_out;
1320 	}
1321 
1322 	dr->base = dring;
1323 	dr->entry_size = (sizeof(struct vio_net_desc) +
1324 			  (sizeof(struct ldc_trans_cookie) * 2));
1325 	dr->num_entries = VNET_TX_RING_SIZE;
1326 	dr->prod = dr->cons = 0;
1327 	port->start_cons  = true; /* need an initial trigger */
1328 	dr->pending = VNET_TX_RING_SIZE;
1329 	dr->ncookies = ncookies;
1330 
1331 	for (i = 0; i < VNET_TX_RING_SIZE; ++i) {
1332 		struct vio_net_desc *d;
1333 
1334 		d = vio_dring_entry(dr, i);
1335 		d->hdr.state = VIO_DESC_FREE;
1336 	}
1337 	return 0;
1338 
1339 err_out:
1340 	vnet_port_free_tx_bufs(port);
1341 
1342 	return err;
1343 }
1344 
1345 static LIST_HEAD(vnet_list);
1346 static DEFINE_MUTEX(vnet_list_mutex);
1347 
1348 static const struct net_device_ops vnet_ops = {
1349 	.ndo_open		= vnet_open,
1350 	.ndo_stop		= vnet_close,
1351 	.ndo_set_rx_mode	= vnet_set_rx_mode,
1352 	.ndo_set_mac_address	= vnet_set_mac_addr,
1353 	.ndo_validate_addr	= eth_validate_addr,
1354 	.ndo_tx_timeout		= vnet_tx_timeout,
1355 	.ndo_change_mtu		= vnet_change_mtu,
1356 	.ndo_start_xmit		= vnet_start_xmit,
1357 };
1358 
1359 static struct vnet *vnet_new(const u64 *local_mac)
1360 {
1361 	struct net_device *dev;
1362 	struct vnet *vp;
1363 	int err, i;
1364 
1365 	dev = alloc_etherdev(sizeof(*vp));
1366 	if (!dev)
1367 		return ERR_PTR(-ENOMEM);
1368 	dev->needed_headroom = VNET_PACKET_SKIP + 8;
1369 	dev->needed_tailroom = 8;
1370 
1371 	for (i = 0; i < ETH_ALEN; i++)
1372 		dev->dev_addr[i] = (*local_mac >> (5 - i) * 8) & 0xff;
1373 
1374 	vp = netdev_priv(dev);
1375 
1376 	spin_lock_init(&vp->lock);
1377 	tasklet_init(&vp->vnet_tx_wakeup, maybe_tx_wakeup, (unsigned long)vp);
1378 	vp->dev = dev;
1379 
1380 	INIT_LIST_HEAD(&vp->port_list);
1381 	for (i = 0; i < VNET_PORT_HASH_SIZE; i++)
1382 		INIT_HLIST_HEAD(&vp->port_hash[i]);
1383 	INIT_LIST_HEAD(&vp->list);
1384 	vp->local_mac = *local_mac;
1385 
1386 	dev->netdev_ops = &vnet_ops;
1387 	dev->ethtool_ops = &vnet_ethtool_ops;
1388 	dev->watchdog_timeo = VNET_TX_TIMEOUT;
1389 
1390 	err = register_netdev(dev);
1391 	if (err) {
1392 		pr_err("Cannot register net device, aborting\n");
1393 		goto err_out_free_dev;
1394 	}
1395 
1396 	netdev_info(dev, "Sun LDOM vnet %pM\n", dev->dev_addr);
1397 
1398 	list_add(&vp->list, &vnet_list);
1399 
1400 	return vp;
1401 
1402 err_out_free_dev:
1403 	free_netdev(dev);
1404 
1405 	return ERR_PTR(err);
1406 }
1407 
1408 static struct vnet *vnet_find_or_create(const u64 *local_mac)
1409 {
1410 	struct vnet *iter, *vp;
1411 
1412 	mutex_lock(&vnet_list_mutex);
1413 	vp = NULL;
1414 	list_for_each_entry(iter, &vnet_list, list) {
1415 		if (iter->local_mac == *local_mac) {
1416 			vp = iter;
1417 			break;
1418 		}
1419 	}
1420 	if (!vp)
1421 		vp = vnet_new(local_mac);
1422 	mutex_unlock(&vnet_list_mutex);
1423 
1424 	return vp;
1425 }
1426 
1427 static void vnet_cleanup(void)
1428 {
1429 	struct vnet *vp;
1430 	struct net_device *dev;
1431 
1432 	mutex_lock(&vnet_list_mutex);
1433 	while (!list_empty(&vnet_list)) {
1434 		vp = list_first_entry(&vnet_list, struct vnet, list);
1435 		list_del(&vp->list);
1436 		dev = vp->dev;
1437 		tasklet_kill(&vp->vnet_tx_wakeup);
1438 		/* vio_unregister_driver() should have cleaned up port_list */
1439 		BUG_ON(!list_empty(&vp->port_list));
1440 		unregister_netdev(dev);
1441 		free_netdev(dev);
1442 	}
1443 	mutex_unlock(&vnet_list_mutex);
1444 }
1445 
1446 static const char *local_mac_prop = "local-mac-address";
1447 
1448 static struct vnet *vnet_find_parent(struct mdesc_handle *hp,
1449 						u64 port_node)
1450 {
1451 	const u64 *local_mac = NULL;
1452 	u64 a;
1453 
1454 	mdesc_for_each_arc(a, hp, port_node, MDESC_ARC_TYPE_BACK) {
1455 		u64 target = mdesc_arc_target(hp, a);
1456 		const char *name;
1457 
1458 		name = mdesc_get_property(hp, target, "name", NULL);
1459 		if (!name || strcmp(name, "network"))
1460 			continue;
1461 
1462 		local_mac = mdesc_get_property(hp, target,
1463 					       local_mac_prop, NULL);
1464 		if (local_mac)
1465 			break;
1466 	}
1467 	if (!local_mac)
1468 		return ERR_PTR(-ENODEV);
1469 
1470 	return vnet_find_or_create(local_mac);
1471 }
1472 
1473 static struct ldc_channel_config vnet_ldc_cfg = {
1474 	.event		= vnet_event,
1475 	.mtu		= 64,
1476 	.mode		= LDC_MODE_UNRELIABLE,
1477 };
1478 
1479 static struct vio_driver_ops vnet_vio_ops = {
1480 	.send_attr		= vnet_send_attr,
1481 	.handle_attr		= vnet_handle_attr,
1482 	.handshake_complete	= vnet_handshake_complete,
1483 };
1484 
1485 static void print_version(void)
1486 {
1487 	printk_once(KERN_INFO "%s", version);
1488 }
1489 
1490 const char *remote_macaddr_prop = "remote-mac-address";
1491 
1492 static int vnet_port_probe(struct vio_dev *vdev, const struct vio_device_id *id)
1493 {
1494 	struct mdesc_handle *hp;
1495 	struct vnet_port *port;
1496 	unsigned long flags;
1497 	struct vnet *vp;
1498 	const u64 *rmac;
1499 	int len, i, err, switch_port;
1500 
1501 	print_version();
1502 
1503 	hp = mdesc_grab();
1504 
1505 	vp = vnet_find_parent(hp, vdev->mp);
1506 	if (IS_ERR(vp)) {
1507 		pr_err("Cannot find port parent vnet\n");
1508 		err = PTR_ERR(vp);
1509 		goto err_out_put_mdesc;
1510 	}
1511 
1512 	rmac = mdesc_get_property(hp, vdev->mp, remote_macaddr_prop, &len);
1513 	err = -ENODEV;
1514 	if (!rmac) {
1515 		pr_err("Port lacks %s property\n", remote_macaddr_prop);
1516 		goto err_out_put_mdesc;
1517 	}
1518 
1519 	port = kzalloc(sizeof(*port), GFP_KERNEL);
1520 	err = -ENOMEM;
1521 	if (!port)
1522 		goto err_out_put_mdesc;
1523 
1524 	for (i = 0; i < ETH_ALEN; i++)
1525 		port->raddr[i] = (*rmac >> (5 - i) * 8) & 0xff;
1526 
1527 	port->vp = vp;
1528 
1529 	err = vio_driver_init(&port->vio, vdev, VDEV_NETWORK,
1530 			      vnet_versions, ARRAY_SIZE(vnet_versions),
1531 			      &vnet_vio_ops, vp->dev->name);
1532 	if (err)
1533 		goto err_out_free_port;
1534 
1535 	err = vio_ldc_alloc(&port->vio, &vnet_ldc_cfg, port);
1536 	if (err)
1537 		goto err_out_free_port;
1538 
1539 	err = vnet_port_alloc_tx_bufs(port);
1540 	if (err)
1541 		goto err_out_free_ldc;
1542 
1543 	INIT_HLIST_NODE(&port->hash);
1544 	INIT_LIST_HEAD(&port->list);
1545 
1546 	switch_port = 0;
1547 	if (mdesc_get_property(hp, vdev->mp, "switch-port", NULL) != NULL)
1548 		switch_port = 1;
1549 	port->switch_port = switch_port;
1550 
1551 	spin_lock_irqsave(&vp->lock, flags);
1552 	if (switch_port)
1553 		list_add(&port->list, &vp->port_list);
1554 	else
1555 		list_add_tail(&port->list, &vp->port_list);
1556 	hlist_add_head(&port->hash, &vp->port_hash[vnet_hashfn(port->raddr)]);
1557 	spin_unlock_irqrestore(&vp->lock, flags);
1558 
1559 	dev_set_drvdata(&vdev->dev, port);
1560 
1561 	pr_info("%s: PORT ( remote-mac %pM%s )\n",
1562 		vp->dev->name, port->raddr, switch_port ? " switch-port" : "");
1563 
1564 	setup_timer(&port->clean_timer, vnet_clean_timer_expire,
1565 		    (unsigned long)port);
1566 
1567 	vio_port_up(&port->vio);
1568 
1569 	mdesc_release(hp);
1570 
1571 	return 0;
1572 
1573 err_out_free_ldc:
1574 	vio_ldc_free(&port->vio);
1575 
1576 err_out_free_port:
1577 	kfree(port);
1578 
1579 err_out_put_mdesc:
1580 	mdesc_release(hp);
1581 	return err;
1582 }
1583 
1584 static int vnet_port_remove(struct vio_dev *vdev)
1585 {
1586 	struct vnet_port *port = dev_get_drvdata(&vdev->dev);
1587 
1588 	if (port) {
1589 		struct vnet *vp = port->vp;
1590 		unsigned long flags;
1591 
1592 		del_timer_sync(&port->vio.timer);
1593 		del_timer_sync(&port->clean_timer);
1594 
1595 		spin_lock_irqsave(&vp->lock, flags);
1596 		list_del(&port->list);
1597 		hlist_del(&port->hash);
1598 		spin_unlock_irqrestore(&vp->lock, flags);
1599 
1600 		vnet_port_free_tx_bufs(port);
1601 		vio_ldc_free(&port->vio);
1602 
1603 		dev_set_drvdata(&vdev->dev, NULL);
1604 
1605 		kfree(port);
1606 
1607 	}
1608 	return 0;
1609 }
1610 
1611 static const struct vio_device_id vnet_port_match[] = {
1612 	{
1613 		.type = "vnet-port",
1614 	},
1615 	{},
1616 };
1617 MODULE_DEVICE_TABLE(vio, vnet_port_match);
1618 
1619 static struct vio_driver vnet_port_driver = {
1620 	.id_table	= vnet_port_match,
1621 	.probe		= vnet_port_probe,
1622 	.remove		= vnet_port_remove,
1623 	.name		= "vnet_port",
1624 };
1625 
1626 static int __init vnet_init(void)
1627 {
1628 	return vio_register_driver(&vnet_port_driver);
1629 }
1630 
1631 static void __exit vnet_exit(void)
1632 {
1633 	vio_unregister_driver(&vnet_port_driver);
1634 	vnet_cleanup();
1635 }
1636 
1637 module_init(vnet_init);
1638 module_exit(vnet_exit);
1639