xref: /openbmc/linux/drivers/net/ethernet/sun/sunvnet.c (revision 78dcff7b)
1 /* sunvnet.c: Sun LDOM Virtual Network Driver.
2  *
3  * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
4  */
5 
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7 
8 #include <linux/module.h>
9 #include <linux/kernel.h>
10 #include <linux/types.h>
11 #include <linux/slab.h>
12 #include <linux/delay.h>
13 #include <linux/init.h>
14 #include <linux/netdevice.h>
15 #include <linux/ethtool.h>
16 #include <linux/etherdevice.h>
17 #include <linux/mutex.h>
18 
19 #include <asm/vio.h>
20 #include <asm/ldc.h>
21 
22 #include "sunvnet.h"
23 
24 #define DRV_MODULE_NAME		"sunvnet"
25 #define DRV_MODULE_VERSION	"1.0"
26 #define DRV_MODULE_RELDATE	"June 25, 2007"
27 
28 static char version[] =
29 	DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
30 MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
31 MODULE_DESCRIPTION("Sun LDOM virtual network driver");
32 MODULE_LICENSE("GPL");
33 MODULE_VERSION(DRV_MODULE_VERSION);
34 
35 /* Heuristic for the number of times to exponentially backoff and
36  * retry sending an LDC trigger when EAGAIN is encountered
37  */
38 #define	VNET_MAX_RETRIES	10
39 
40 /* Ordered from largest major to lowest */
41 static struct vio_version vnet_versions[] = {
42 	{ .major = 1, .minor = 0 },
43 };
44 
45 static inline u32 vnet_tx_dring_avail(struct vio_dring_state *dr)
46 {
47 	return vio_dring_avail(dr, VNET_TX_RING_SIZE);
48 }
49 
50 static int vnet_handle_unknown(struct vnet_port *port, void *arg)
51 {
52 	struct vio_msg_tag *pkt = arg;
53 
54 	pr_err("Received unknown msg [%02x:%02x:%04x:%08x]\n",
55 	       pkt->type, pkt->stype, pkt->stype_env, pkt->sid);
56 	pr_err("Resetting connection\n");
57 
58 	ldc_disconnect(port->vio.lp);
59 
60 	return -ECONNRESET;
61 }
62 
63 static int vnet_send_attr(struct vio_driver_state *vio)
64 {
65 	struct vnet_port *port = to_vnet_port(vio);
66 	struct net_device *dev = port->vp->dev;
67 	struct vio_net_attr_info pkt;
68 	int i;
69 
70 	memset(&pkt, 0, sizeof(pkt));
71 	pkt.tag.type = VIO_TYPE_CTRL;
72 	pkt.tag.stype = VIO_SUBTYPE_INFO;
73 	pkt.tag.stype_env = VIO_ATTR_INFO;
74 	pkt.tag.sid = vio_send_sid(vio);
75 	pkt.xfer_mode = VIO_DRING_MODE;
76 	pkt.addr_type = VNET_ADDR_ETHERMAC;
77 	pkt.ack_freq = 0;
78 	for (i = 0; i < 6; i++)
79 		pkt.addr |= (u64)dev->dev_addr[i] << ((5 - i) * 8);
80 	pkt.mtu = ETH_FRAME_LEN;
81 
82 	viodbg(HS, "SEND NET ATTR xmode[0x%x] atype[0x%x] addr[%llx] "
83 	       "ackfreq[%u] mtu[%llu]\n",
84 	       pkt.xfer_mode, pkt.addr_type,
85 	       (unsigned long long) pkt.addr,
86 	       pkt.ack_freq,
87 	       (unsigned long long) pkt.mtu);
88 
89 	return vio_ldc_send(vio, &pkt, sizeof(pkt));
90 }
91 
92 static int handle_attr_info(struct vio_driver_state *vio,
93 			    struct vio_net_attr_info *pkt)
94 {
95 	viodbg(HS, "GOT NET ATTR INFO xmode[0x%x] atype[0x%x] addr[%llx] "
96 	       "ackfreq[%u] mtu[%llu]\n",
97 	       pkt->xfer_mode, pkt->addr_type,
98 	       (unsigned long long) pkt->addr,
99 	       pkt->ack_freq,
100 	       (unsigned long long) pkt->mtu);
101 
102 	pkt->tag.sid = vio_send_sid(vio);
103 
104 	if (pkt->xfer_mode != VIO_DRING_MODE ||
105 	    pkt->addr_type != VNET_ADDR_ETHERMAC ||
106 	    pkt->mtu != ETH_FRAME_LEN) {
107 		viodbg(HS, "SEND NET ATTR NACK\n");
108 
109 		pkt->tag.stype = VIO_SUBTYPE_NACK;
110 
111 		(void) vio_ldc_send(vio, pkt, sizeof(*pkt));
112 
113 		return -ECONNRESET;
114 	} else {
115 		viodbg(HS, "SEND NET ATTR ACK\n");
116 
117 		pkt->tag.stype = VIO_SUBTYPE_ACK;
118 
119 		return vio_ldc_send(vio, pkt, sizeof(*pkt));
120 	}
121 
122 }
123 
124 static int handle_attr_ack(struct vio_driver_state *vio,
125 			   struct vio_net_attr_info *pkt)
126 {
127 	viodbg(HS, "GOT NET ATTR ACK\n");
128 
129 	return 0;
130 }
131 
132 static int handle_attr_nack(struct vio_driver_state *vio,
133 			    struct vio_net_attr_info *pkt)
134 {
135 	viodbg(HS, "GOT NET ATTR NACK\n");
136 
137 	return -ECONNRESET;
138 }
139 
140 static int vnet_handle_attr(struct vio_driver_state *vio, void *arg)
141 {
142 	struct vio_net_attr_info *pkt = arg;
143 
144 	switch (pkt->tag.stype) {
145 	case VIO_SUBTYPE_INFO:
146 		return handle_attr_info(vio, pkt);
147 
148 	case VIO_SUBTYPE_ACK:
149 		return handle_attr_ack(vio, pkt);
150 
151 	case VIO_SUBTYPE_NACK:
152 		return handle_attr_nack(vio, pkt);
153 
154 	default:
155 		return -ECONNRESET;
156 	}
157 }
158 
159 static void vnet_handshake_complete(struct vio_driver_state *vio)
160 {
161 	struct vio_dring_state *dr;
162 
163 	dr = &vio->drings[VIO_DRIVER_RX_RING];
164 	dr->snd_nxt = dr->rcv_nxt = 1;
165 
166 	dr = &vio->drings[VIO_DRIVER_TX_RING];
167 	dr->snd_nxt = dr->rcv_nxt = 1;
168 }
169 
170 /* The hypervisor interface that implements copying to/from imported
171  * memory from another domain requires that copies are done to 8-byte
172  * aligned buffers, and that the lengths of such copies are also 8-byte
173  * multiples.
174  *
175  * So we align skb->data to an 8-byte multiple and pad-out the data
176  * area so we can round the copy length up to the next multiple of
177  * 8 for the copy.
178  *
179  * The transmitter puts the actual start of the packet 6 bytes into
180  * the buffer it sends over, so that the IP headers after the ethernet
181  * header are aligned properly.  These 6 bytes are not in the descriptor
182  * length, they are simply implied.  This offset is represented using
183  * the VNET_PACKET_SKIP macro.
184  */
185 static struct sk_buff *alloc_and_align_skb(struct net_device *dev,
186 					   unsigned int len)
187 {
188 	struct sk_buff *skb = netdev_alloc_skb(dev, len+VNET_PACKET_SKIP+8+8);
189 	unsigned long addr, off;
190 
191 	if (unlikely(!skb))
192 		return NULL;
193 
194 	addr = (unsigned long) skb->data;
195 	off = ((addr + 7UL) & ~7UL) - addr;
196 	if (off)
197 		skb_reserve(skb, off);
198 
199 	return skb;
200 }
201 
202 static int vnet_rx_one(struct vnet_port *port, unsigned int len,
203 		       struct ldc_trans_cookie *cookies, int ncookies)
204 {
205 	struct net_device *dev = port->vp->dev;
206 	unsigned int copy_len;
207 	struct sk_buff *skb;
208 	int err;
209 
210 	err = -EMSGSIZE;
211 	if (unlikely(len < ETH_ZLEN || len > ETH_FRAME_LEN)) {
212 		dev->stats.rx_length_errors++;
213 		goto out_dropped;
214 	}
215 
216 	skb = alloc_and_align_skb(dev, len);
217 	err = -ENOMEM;
218 	if (unlikely(!skb)) {
219 		dev->stats.rx_missed_errors++;
220 		goto out_dropped;
221 	}
222 
223 	copy_len = (len + VNET_PACKET_SKIP + 7U) & ~7U;
224 	skb_put(skb, copy_len);
225 	err = ldc_copy(port->vio.lp, LDC_COPY_IN,
226 		       skb->data, copy_len, 0,
227 		       cookies, ncookies);
228 	if (unlikely(err < 0)) {
229 		dev->stats.rx_frame_errors++;
230 		goto out_free_skb;
231 	}
232 
233 	skb_pull(skb, VNET_PACKET_SKIP);
234 	skb_trim(skb, len);
235 	skb->protocol = eth_type_trans(skb, dev);
236 
237 	dev->stats.rx_packets++;
238 	dev->stats.rx_bytes += len;
239 
240 	netif_rx(skb);
241 
242 	return 0;
243 
244 out_free_skb:
245 	kfree_skb(skb);
246 
247 out_dropped:
248 	dev->stats.rx_dropped++;
249 	return err;
250 }
251 
252 static int vnet_send_ack(struct vnet_port *port, struct vio_dring_state *dr,
253 			 u32 start, u32 end, u8 vio_dring_state)
254 {
255 	struct vio_dring_data hdr = {
256 		.tag = {
257 			.type		= VIO_TYPE_DATA,
258 			.stype		= VIO_SUBTYPE_ACK,
259 			.stype_env	= VIO_DRING_DATA,
260 			.sid		= vio_send_sid(&port->vio),
261 		},
262 		.dring_ident		= dr->ident,
263 		.start_idx		= start,
264 		.end_idx		= end,
265 		.state			= vio_dring_state,
266 	};
267 	int err, delay;
268 	int retries = 0;
269 
270 	hdr.seq = dr->snd_nxt;
271 	delay = 1;
272 	do {
273 		err = vio_ldc_send(&port->vio, &hdr, sizeof(hdr));
274 		if (err > 0) {
275 			dr->snd_nxt++;
276 			break;
277 		}
278 		udelay(delay);
279 		if ((delay <<= 1) > 128)
280 			delay = 128;
281 		if (retries++ > VNET_MAX_RETRIES) {
282 			pr_info("ECONNRESET %x:%x:%x:%x:%x:%x\n",
283 				port->raddr[0], port->raddr[1],
284 				port->raddr[2], port->raddr[3],
285 				port->raddr[4], port->raddr[5]);
286 			err = -ECONNRESET;
287 		}
288 	} while (err == -EAGAIN);
289 
290 	return err;
291 }
292 
293 static u32 next_idx(u32 idx, struct vio_dring_state *dr)
294 {
295 	if (++idx == dr->num_entries)
296 		idx = 0;
297 	return idx;
298 }
299 
300 static u32 prev_idx(u32 idx, struct vio_dring_state *dr)
301 {
302 	if (idx == 0)
303 		idx = dr->num_entries - 1;
304 	else
305 		idx--;
306 
307 	return idx;
308 }
309 
310 static struct vio_net_desc *get_rx_desc(struct vnet_port *port,
311 					struct vio_dring_state *dr,
312 					u32 index)
313 {
314 	struct vio_net_desc *desc = port->vio.desc_buf;
315 	int err;
316 
317 	err = ldc_get_dring_entry(port->vio.lp, desc, dr->entry_size,
318 				  (index * dr->entry_size),
319 				  dr->cookies, dr->ncookies);
320 	if (err < 0)
321 		return ERR_PTR(err);
322 
323 	return desc;
324 }
325 
326 static int put_rx_desc(struct vnet_port *port,
327 		       struct vio_dring_state *dr,
328 		       struct vio_net_desc *desc,
329 		       u32 index)
330 {
331 	int err;
332 
333 	err = ldc_put_dring_entry(port->vio.lp, desc, dr->entry_size,
334 				  (index * dr->entry_size),
335 				  dr->cookies, dr->ncookies);
336 	if (err < 0)
337 		return err;
338 
339 	return 0;
340 }
341 
342 static int vnet_walk_rx_one(struct vnet_port *port,
343 			    struct vio_dring_state *dr,
344 			    u32 index, int *needs_ack)
345 {
346 	struct vio_net_desc *desc = get_rx_desc(port, dr, index);
347 	struct vio_driver_state *vio = &port->vio;
348 	int err;
349 
350 	if (IS_ERR(desc))
351 		return PTR_ERR(desc);
352 
353 	if (desc->hdr.state != VIO_DESC_READY)
354 		return 1;
355 
356 	rmb();
357 
358 	viodbg(DATA, "vio_walk_rx_one desc[%02x:%02x:%08x:%08x:%llx:%llx]\n",
359 	       desc->hdr.state, desc->hdr.ack,
360 	       desc->size, desc->ncookies,
361 	       desc->cookies[0].cookie_addr,
362 	       desc->cookies[0].cookie_size);
363 
364 	err = vnet_rx_one(port, desc->size, desc->cookies, desc->ncookies);
365 	if (err == -ECONNRESET)
366 		return err;
367 	desc->hdr.state = VIO_DESC_DONE;
368 	err = put_rx_desc(port, dr, desc, index);
369 	if (err < 0)
370 		return err;
371 	*needs_ack = desc->hdr.ack;
372 	return 0;
373 }
374 
375 static int vnet_walk_rx(struct vnet_port *port, struct vio_dring_state *dr,
376 			u32 start, u32 end)
377 {
378 	struct vio_driver_state *vio = &port->vio;
379 	int ack_start = -1, ack_end = -1;
380 
381 	end = (end == (u32) -1) ? prev_idx(start, dr) : next_idx(end, dr);
382 
383 	viodbg(DATA, "vnet_walk_rx start[%08x] end[%08x]\n", start, end);
384 
385 	while (start != end) {
386 		int ack = 0, err = vnet_walk_rx_one(port, dr, start, &ack);
387 		if (err == -ECONNRESET)
388 			return err;
389 		if (err != 0)
390 			break;
391 		if (ack_start == -1)
392 			ack_start = start;
393 		ack_end = start;
394 		start = next_idx(start, dr);
395 		if (ack && start != end) {
396 			err = vnet_send_ack(port, dr, ack_start, ack_end,
397 					    VIO_DRING_ACTIVE);
398 			if (err == -ECONNRESET)
399 				return err;
400 			ack_start = -1;
401 		}
402 	}
403 	if (unlikely(ack_start == -1))
404 		ack_start = ack_end = prev_idx(start, dr);
405 	return vnet_send_ack(port, dr, ack_start, ack_end, VIO_DRING_STOPPED);
406 }
407 
408 static int vnet_rx(struct vnet_port *port, void *msgbuf)
409 {
410 	struct vio_dring_data *pkt = msgbuf;
411 	struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_RX_RING];
412 	struct vio_driver_state *vio = &port->vio;
413 
414 	viodbg(DATA, "vnet_rx stype_env[%04x] seq[%016llx] rcv_nxt[%016llx]\n",
415 	       pkt->tag.stype_env, pkt->seq, dr->rcv_nxt);
416 
417 	if (unlikely(pkt->tag.stype_env != VIO_DRING_DATA))
418 		return 0;
419 	if (unlikely(pkt->seq != dr->rcv_nxt)) {
420 		pr_err("RX out of sequence seq[0x%llx] rcv_nxt[0x%llx]\n",
421 		       pkt->seq, dr->rcv_nxt);
422 		return 0;
423 	}
424 
425 	dr->rcv_nxt++;
426 
427 	/* XXX Validate pkt->start_idx and pkt->end_idx XXX */
428 
429 	return vnet_walk_rx(port, dr, pkt->start_idx, pkt->end_idx);
430 }
431 
432 static int idx_is_pending(struct vio_dring_state *dr, u32 end)
433 {
434 	u32 idx = dr->cons;
435 	int found = 0;
436 
437 	while (idx != dr->prod) {
438 		if (idx == end) {
439 			found = 1;
440 			break;
441 		}
442 		idx = next_idx(idx, dr);
443 	}
444 	return found;
445 }
446 
447 static int vnet_ack(struct vnet_port *port, void *msgbuf)
448 {
449 	struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
450 	struct vio_dring_data *pkt = msgbuf;
451 	struct net_device *dev;
452 	struct vnet *vp;
453 	u32 end;
454 
455 	if (unlikely(pkt->tag.stype_env != VIO_DRING_DATA))
456 		return 0;
457 
458 	end = pkt->end_idx;
459 	if (unlikely(!idx_is_pending(dr, end)))
460 		return 0;
461 
462 	dr->cons = next_idx(end, dr);
463 
464 	vp = port->vp;
465 	dev = vp->dev;
466 	if (unlikely(netif_queue_stopped(dev) &&
467 		     vnet_tx_dring_avail(dr) >= VNET_TX_WAKEUP_THRESH(dr)))
468 		return 1;
469 
470 	return 0;
471 }
472 
473 static int vnet_nack(struct vnet_port *port, void *msgbuf)
474 {
475 	/* XXX just reset or similar XXX */
476 	return 0;
477 }
478 
479 static int handle_mcast(struct vnet_port *port, void *msgbuf)
480 {
481 	struct vio_net_mcast_info *pkt = msgbuf;
482 
483 	if (pkt->tag.stype != VIO_SUBTYPE_ACK)
484 		pr_err("%s: Got unexpected MCAST reply [%02x:%02x:%04x:%08x]\n",
485 		       port->vp->dev->name,
486 		       pkt->tag.type,
487 		       pkt->tag.stype,
488 		       pkt->tag.stype_env,
489 		       pkt->tag.sid);
490 
491 	return 0;
492 }
493 
494 static void maybe_tx_wakeup(unsigned long param)
495 {
496 	struct vnet *vp = (struct vnet *)param;
497 	struct net_device *dev = vp->dev;
498 
499 	netif_tx_lock(dev);
500 	if (likely(netif_queue_stopped(dev))) {
501 		struct vnet_port *port;
502 		int wake = 1;
503 
504 		list_for_each_entry(port, &vp->port_list, list) {
505 			struct vio_dring_state *dr;
506 
507 			dr = &port->vio.drings[VIO_DRIVER_TX_RING];
508 			if (vnet_tx_dring_avail(dr) <
509 			    VNET_TX_WAKEUP_THRESH(dr)) {
510 				wake = 0;
511 				break;
512 			}
513 		}
514 		if (wake)
515 			netif_wake_queue(dev);
516 	}
517 	netif_tx_unlock(dev);
518 }
519 
520 static void vnet_event(void *arg, int event)
521 {
522 	struct vnet_port *port = arg;
523 	struct vio_driver_state *vio = &port->vio;
524 	unsigned long flags;
525 	int tx_wakeup, err;
526 
527 	spin_lock_irqsave(&vio->lock, flags);
528 
529 	if (unlikely(event == LDC_EVENT_RESET ||
530 		     event == LDC_EVENT_UP)) {
531 		vio_link_state_change(vio, event);
532 		spin_unlock_irqrestore(&vio->lock, flags);
533 
534 		if (event == LDC_EVENT_RESET)
535 			vio_port_up(vio);
536 		return;
537 	}
538 
539 	if (unlikely(event != LDC_EVENT_DATA_READY)) {
540 		pr_warning("Unexpected LDC event %d\n", event);
541 		spin_unlock_irqrestore(&vio->lock, flags);
542 		return;
543 	}
544 
545 	tx_wakeup = err = 0;
546 	while (1) {
547 		union {
548 			struct vio_msg_tag tag;
549 			u64 raw[8];
550 		} msgbuf;
551 
552 		err = ldc_read(vio->lp, &msgbuf, sizeof(msgbuf));
553 		if (unlikely(err < 0)) {
554 			if (err == -ECONNRESET)
555 				vio_conn_reset(vio);
556 			break;
557 		}
558 		if (err == 0)
559 			break;
560 		viodbg(DATA, "TAG [%02x:%02x:%04x:%08x]\n",
561 		       msgbuf.tag.type,
562 		       msgbuf.tag.stype,
563 		       msgbuf.tag.stype_env,
564 		       msgbuf.tag.sid);
565 		err = vio_validate_sid(vio, &msgbuf.tag);
566 		if (err < 0)
567 			break;
568 
569 		if (likely(msgbuf.tag.type == VIO_TYPE_DATA)) {
570 			if (msgbuf.tag.stype == VIO_SUBTYPE_INFO) {
571 				err = vnet_rx(port, &msgbuf);
572 			} else if (msgbuf.tag.stype == VIO_SUBTYPE_ACK) {
573 				err = vnet_ack(port, &msgbuf);
574 				if (err > 0)
575 					tx_wakeup |= err;
576 			} else if (msgbuf.tag.stype == VIO_SUBTYPE_NACK) {
577 				err = vnet_nack(port, &msgbuf);
578 			}
579 		} else if (msgbuf.tag.type == VIO_TYPE_CTRL) {
580 			if (msgbuf.tag.stype_env == VNET_MCAST_INFO)
581 				err = handle_mcast(port, &msgbuf);
582 			else
583 				err = vio_control_pkt_engine(vio, &msgbuf);
584 			if (err)
585 				break;
586 		} else {
587 			err = vnet_handle_unknown(port, &msgbuf);
588 		}
589 		if (err == -ECONNRESET)
590 			break;
591 	}
592 	spin_unlock(&vio->lock);
593 	/* Kick off a tasklet to wake the queue.  We cannot call
594 	 * maybe_tx_wakeup directly here because we could deadlock on
595 	 * netif_tx_lock() with dev_watchdog()
596 	 */
597 	if (unlikely(tx_wakeup && err != -ECONNRESET))
598 		tasklet_schedule(&port->vp->vnet_tx_wakeup);
599 
600 	local_irq_restore(flags);
601 }
602 
603 static int __vnet_tx_trigger(struct vnet_port *port)
604 {
605 	struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
606 	struct vio_dring_data hdr = {
607 		.tag = {
608 			.type		= VIO_TYPE_DATA,
609 			.stype		= VIO_SUBTYPE_INFO,
610 			.stype_env	= VIO_DRING_DATA,
611 			.sid		= vio_send_sid(&port->vio),
612 		},
613 		.dring_ident		= dr->ident,
614 		.start_idx		= dr->prod,
615 		.end_idx		= (u32) -1,
616 	};
617 	int err, delay;
618 	int retries = 0;
619 
620 	hdr.seq = dr->snd_nxt;
621 	delay = 1;
622 	do {
623 		err = vio_ldc_send(&port->vio, &hdr, sizeof(hdr));
624 		if (err > 0) {
625 			dr->snd_nxt++;
626 			break;
627 		}
628 		udelay(delay);
629 		if ((delay <<= 1) > 128)
630 			delay = 128;
631 		if (retries++ > VNET_MAX_RETRIES)
632 			break;
633 	} while (err == -EAGAIN);
634 
635 	return err;
636 }
637 
638 static inline bool port_is_up(struct vnet_port *vnet)
639 {
640 	struct vio_driver_state *vio = &vnet->vio;
641 
642 	return !!(vio->hs_state & VIO_HS_COMPLETE);
643 }
644 
645 struct vnet_port *__tx_port_find(struct vnet *vp, struct sk_buff *skb)
646 {
647 	unsigned int hash = vnet_hashfn(skb->data);
648 	struct hlist_head *hp = &vp->port_hash[hash];
649 	struct vnet_port *port;
650 
651 	hlist_for_each_entry(port, hp, hash) {
652 		if (!port_is_up(port))
653 			continue;
654 		if (ether_addr_equal(port->raddr, skb->data))
655 			return port;
656 	}
657 	list_for_each_entry(port, &vp->port_list, list) {
658 		if (!port->switch_port)
659 			continue;
660 		if (!port_is_up(port))
661 			continue;
662 		return port;
663 	}
664 	return NULL;
665 }
666 
667 struct vnet_port *tx_port_find(struct vnet *vp, struct sk_buff *skb)
668 {
669 	struct vnet_port *ret;
670 	unsigned long flags;
671 
672 	spin_lock_irqsave(&vp->lock, flags);
673 	ret = __tx_port_find(vp, skb);
674 	spin_unlock_irqrestore(&vp->lock, flags);
675 
676 	return ret;
677 }
678 
679 static int vnet_start_xmit(struct sk_buff *skb, struct net_device *dev)
680 {
681 	struct vnet *vp = netdev_priv(dev);
682 	struct vnet_port *port = tx_port_find(vp, skb);
683 	struct vio_dring_state *dr;
684 	struct vio_net_desc *d;
685 	unsigned long flags;
686 	unsigned int len;
687 	void *tx_buf;
688 	int i, err;
689 
690 	if (unlikely(!port))
691 		goto out_dropped;
692 
693 	spin_lock_irqsave(&port->vio.lock, flags);
694 
695 	dr = &port->vio.drings[VIO_DRIVER_TX_RING];
696 	if (unlikely(vnet_tx_dring_avail(dr) < 2)) {
697 		if (!netif_queue_stopped(dev)) {
698 			netif_stop_queue(dev);
699 
700 			/* This is a hard error, log it. */
701 			netdev_err(dev, "BUG! Tx Ring full when queue awake!\n");
702 			dev->stats.tx_errors++;
703 		}
704 		spin_unlock_irqrestore(&port->vio.lock, flags);
705 		return NETDEV_TX_BUSY;
706 	}
707 
708 	d = vio_dring_cur(dr);
709 
710 	tx_buf = port->tx_bufs[dr->prod].buf;
711 	skb_copy_from_linear_data(skb, tx_buf + VNET_PACKET_SKIP, skb->len);
712 
713 	len = skb->len;
714 	if (len < ETH_ZLEN) {
715 		len = ETH_ZLEN;
716 		memset(tx_buf+VNET_PACKET_SKIP+skb->len, 0, len - skb->len);
717 	}
718 
719 	/* We don't rely on the ACKs to free the skb in vnet_start_xmit(),
720 	 * thus it is safe to not set VIO_ACK_ENABLE for each transmission:
721 	 * the protocol itself does not require it as long as the peer
722 	 * sends a VIO_SUBTYPE_ACK for VIO_DRING_STOPPED.
723 	 *
724 	 * An ACK for every packet in the ring is expensive as the
725 	 * sending of LDC messages is slow and affects performance.
726 	 */
727 	d->hdr.ack = VIO_ACK_DISABLE;
728 	d->size = len;
729 	d->ncookies = port->tx_bufs[dr->prod].ncookies;
730 	for (i = 0; i < d->ncookies; i++)
731 		d->cookies[i] = port->tx_bufs[dr->prod].cookies[i];
732 
733 	/* This has to be a non-SMP write barrier because we are writing
734 	 * to memory which is shared with the peer LDOM.
735 	 */
736 	wmb();
737 
738 	d->hdr.state = VIO_DESC_READY;
739 
740 	err = __vnet_tx_trigger(port);
741 	if (unlikely(err < 0)) {
742 		netdev_info(dev, "TX trigger error %d\n", err);
743 		d->hdr.state = VIO_DESC_FREE;
744 		dev->stats.tx_carrier_errors++;
745 		goto out_dropped_unlock;
746 	}
747 
748 	dev->stats.tx_packets++;
749 	dev->stats.tx_bytes += skb->len;
750 
751 	dr->prod = (dr->prod + 1) & (VNET_TX_RING_SIZE - 1);
752 	if (unlikely(vnet_tx_dring_avail(dr) < 2)) {
753 		netif_stop_queue(dev);
754 		if (vnet_tx_dring_avail(dr) > VNET_TX_WAKEUP_THRESH(dr))
755 			netif_wake_queue(dev);
756 	}
757 
758 	spin_unlock_irqrestore(&port->vio.lock, flags);
759 
760 	dev_kfree_skb(skb);
761 
762 	return NETDEV_TX_OK;
763 
764 out_dropped_unlock:
765 	spin_unlock_irqrestore(&port->vio.lock, flags);
766 
767 out_dropped:
768 	dev_kfree_skb(skb);
769 	dev->stats.tx_dropped++;
770 	return NETDEV_TX_OK;
771 }
772 
773 static void vnet_tx_timeout(struct net_device *dev)
774 {
775 	/* XXX Implement me XXX */
776 }
777 
778 static int vnet_open(struct net_device *dev)
779 {
780 	netif_carrier_on(dev);
781 	netif_start_queue(dev);
782 
783 	return 0;
784 }
785 
786 static int vnet_close(struct net_device *dev)
787 {
788 	netif_stop_queue(dev);
789 	netif_carrier_off(dev);
790 
791 	return 0;
792 }
793 
794 static struct vnet_mcast_entry *__vnet_mc_find(struct vnet *vp, u8 *addr)
795 {
796 	struct vnet_mcast_entry *m;
797 
798 	for (m = vp->mcast_list; m; m = m->next) {
799 		if (ether_addr_equal(m->addr, addr))
800 			return m;
801 	}
802 	return NULL;
803 }
804 
805 static void __update_mc_list(struct vnet *vp, struct net_device *dev)
806 {
807 	struct netdev_hw_addr *ha;
808 
809 	netdev_for_each_mc_addr(ha, dev) {
810 		struct vnet_mcast_entry *m;
811 
812 		m = __vnet_mc_find(vp, ha->addr);
813 		if (m) {
814 			m->hit = 1;
815 			continue;
816 		}
817 
818 		if (!m) {
819 			m = kzalloc(sizeof(*m), GFP_ATOMIC);
820 			if (!m)
821 				continue;
822 			memcpy(m->addr, ha->addr, ETH_ALEN);
823 			m->hit = 1;
824 
825 			m->next = vp->mcast_list;
826 			vp->mcast_list = m;
827 		}
828 	}
829 }
830 
831 static void __send_mc_list(struct vnet *vp, struct vnet_port *port)
832 {
833 	struct vio_net_mcast_info info;
834 	struct vnet_mcast_entry *m, **pp;
835 	int n_addrs;
836 
837 	memset(&info, 0, sizeof(info));
838 
839 	info.tag.type = VIO_TYPE_CTRL;
840 	info.tag.stype = VIO_SUBTYPE_INFO;
841 	info.tag.stype_env = VNET_MCAST_INFO;
842 	info.tag.sid = vio_send_sid(&port->vio);
843 	info.set = 1;
844 
845 	n_addrs = 0;
846 	for (m = vp->mcast_list; m; m = m->next) {
847 		if (m->sent)
848 			continue;
849 		m->sent = 1;
850 		memcpy(&info.mcast_addr[n_addrs * ETH_ALEN],
851 		       m->addr, ETH_ALEN);
852 		if (++n_addrs == VNET_NUM_MCAST) {
853 			info.count = n_addrs;
854 
855 			(void) vio_ldc_send(&port->vio, &info,
856 					    sizeof(info));
857 			n_addrs = 0;
858 		}
859 	}
860 	if (n_addrs) {
861 		info.count = n_addrs;
862 		(void) vio_ldc_send(&port->vio, &info, sizeof(info));
863 	}
864 
865 	info.set = 0;
866 
867 	n_addrs = 0;
868 	pp = &vp->mcast_list;
869 	while ((m = *pp) != NULL) {
870 		if (m->hit) {
871 			m->hit = 0;
872 			pp = &m->next;
873 			continue;
874 		}
875 
876 		memcpy(&info.mcast_addr[n_addrs * ETH_ALEN],
877 		       m->addr, ETH_ALEN);
878 		if (++n_addrs == VNET_NUM_MCAST) {
879 			info.count = n_addrs;
880 			(void) vio_ldc_send(&port->vio, &info,
881 					    sizeof(info));
882 			n_addrs = 0;
883 		}
884 
885 		*pp = m->next;
886 		kfree(m);
887 	}
888 	if (n_addrs) {
889 		info.count = n_addrs;
890 		(void) vio_ldc_send(&port->vio, &info, sizeof(info));
891 	}
892 }
893 
894 static void vnet_set_rx_mode(struct net_device *dev)
895 {
896 	struct vnet *vp = netdev_priv(dev);
897 	struct vnet_port *port;
898 	unsigned long flags;
899 
900 	spin_lock_irqsave(&vp->lock, flags);
901 	if (!list_empty(&vp->port_list)) {
902 		port = list_entry(vp->port_list.next, struct vnet_port, list);
903 
904 		if (port->switch_port) {
905 			__update_mc_list(vp, dev);
906 			__send_mc_list(vp, port);
907 		}
908 	}
909 	spin_unlock_irqrestore(&vp->lock, flags);
910 }
911 
912 static int vnet_change_mtu(struct net_device *dev, int new_mtu)
913 {
914 	if (new_mtu != ETH_DATA_LEN)
915 		return -EINVAL;
916 
917 	dev->mtu = new_mtu;
918 	return 0;
919 }
920 
921 static int vnet_set_mac_addr(struct net_device *dev, void *p)
922 {
923 	return -EINVAL;
924 }
925 
926 static void vnet_get_drvinfo(struct net_device *dev,
927 			     struct ethtool_drvinfo *info)
928 {
929 	strlcpy(info->driver, DRV_MODULE_NAME, sizeof(info->driver));
930 	strlcpy(info->version, DRV_MODULE_VERSION, sizeof(info->version));
931 }
932 
933 static u32 vnet_get_msglevel(struct net_device *dev)
934 {
935 	struct vnet *vp = netdev_priv(dev);
936 	return vp->msg_enable;
937 }
938 
939 static void vnet_set_msglevel(struct net_device *dev, u32 value)
940 {
941 	struct vnet *vp = netdev_priv(dev);
942 	vp->msg_enable = value;
943 }
944 
945 static const struct ethtool_ops vnet_ethtool_ops = {
946 	.get_drvinfo		= vnet_get_drvinfo,
947 	.get_msglevel		= vnet_get_msglevel,
948 	.set_msglevel		= vnet_set_msglevel,
949 	.get_link		= ethtool_op_get_link,
950 };
951 
952 static void vnet_port_free_tx_bufs(struct vnet_port *port)
953 {
954 	struct vio_dring_state *dr;
955 	int i;
956 
957 	dr = &port->vio.drings[VIO_DRIVER_TX_RING];
958 	if (dr->base) {
959 		ldc_free_exp_dring(port->vio.lp, dr->base,
960 				   (dr->entry_size * dr->num_entries),
961 				   dr->cookies, dr->ncookies);
962 		dr->base = NULL;
963 		dr->entry_size = 0;
964 		dr->num_entries = 0;
965 		dr->pending = 0;
966 		dr->ncookies = 0;
967 	}
968 
969 	for (i = 0; i < VNET_TX_RING_SIZE; i++) {
970 		void *buf = port->tx_bufs[i].buf;
971 
972 		if (!buf)
973 			continue;
974 
975 		ldc_unmap(port->vio.lp,
976 			  port->tx_bufs[i].cookies,
977 			  port->tx_bufs[i].ncookies);
978 
979 		kfree(buf);
980 		port->tx_bufs[i].buf = NULL;
981 	}
982 }
983 
984 static int vnet_port_alloc_tx_bufs(struct vnet_port *port)
985 {
986 	struct vio_dring_state *dr;
987 	unsigned long len;
988 	int i, err, ncookies;
989 	void *dring;
990 
991 	for (i = 0; i < VNET_TX_RING_SIZE; i++) {
992 		void *buf = kzalloc(ETH_FRAME_LEN + 8, GFP_KERNEL);
993 		int map_len = (ETH_FRAME_LEN + 7) & ~7;
994 
995 		err = -ENOMEM;
996 		if (!buf)
997 			goto err_out;
998 
999 		err = -EFAULT;
1000 		if ((unsigned long)buf & (8UL - 1)) {
1001 			pr_err("TX buffer misaligned\n");
1002 			kfree(buf);
1003 			goto err_out;
1004 		}
1005 
1006 		err = ldc_map_single(port->vio.lp, buf, map_len,
1007 				     port->tx_bufs[i].cookies, 2,
1008 				     (LDC_MAP_SHADOW |
1009 				      LDC_MAP_DIRECT |
1010 				      LDC_MAP_RW));
1011 		if (err < 0) {
1012 			kfree(buf);
1013 			goto err_out;
1014 		}
1015 		port->tx_bufs[i].buf = buf;
1016 		port->tx_bufs[i].ncookies = err;
1017 	}
1018 
1019 	dr = &port->vio.drings[VIO_DRIVER_TX_RING];
1020 
1021 	len = (VNET_TX_RING_SIZE *
1022 	       (sizeof(struct vio_net_desc) +
1023 		(sizeof(struct ldc_trans_cookie) * 2)));
1024 
1025 	ncookies = VIO_MAX_RING_COOKIES;
1026 	dring = ldc_alloc_exp_dring(port->vio.lp, len,
1027 				    dr->cookies, &ncookies,
1028 				    (LDC_MAP_SHADOW |
1029 				     LDC_MAP_DIRECT |
1030 				     LDC_MAP_RW));
1031 	if (IS_ERR(dring)) {
1032 		err = PTR_ERR(dring);
1033 		goto err_out;
1034 	}
1035 
1036 	dr->base = dring;
1037 	dr->entry_size = (sizeof(struct vio_net_desc) +
1038 			  (sizeof(struct ldc_trans_cookie) * 2));
1039 	dr->num_entries = VNET_TX_RING_SIZE;
1040 	dr->prod = dr->cons = 0;
1041 	dr->pending = VNET_TX_RING_SIZE;
1042 	dr->ncookies = ncookies;
1043 
1044 	return 0;
1045 
1046 err_out:
1047 	vnet_port_free_tx_bufs(port);
1048 
1049 	return err;
1050 }
1051 
1052 static LIST_HEAD(vnet_list);
1053 static DEFINE_MUTEX(vnet_list_mutex);
1054 
1055 static const struct net_device_ops vnet_ops = {
1056 	.ndo_open		= vnet_open,
1057 	.ndo_stop		= vnet_close,
1058 	.ndo_set_rx_mode	= vnet_set_rx_mode,
1059 	.ndo_set_mac_address	= vnet_set_mac_addr,
1060 	.ndo_validate_addr	= eth_validate_addr,
1061 	.ndo_tx_timeout		= vnet_tx_timeout,
1062 	.ndo_change_mtu		= vnet_change_mtu,
1063 	.ndo_start_xmit		= vnet_start_xmit,
1064 };
1065 
1066 static struct vnet *vnet_new(const u64 *local_mac)
1067 {
1068 	struct net_device *dev;
1069 	struct vnet *vp;
1070 	int err, i;
1071 
1072 	dev = alloc_etherdev(sizeof(*vp));
1073 	if (!dev)
1074 		return ERR_PTR(-ENOMEM);
1075 
1076 	for (i = 0; i < ETH_ALEN; i++)
1077 		dev->dev_addr[i] = (*local_mac >> (5 - i) * 8) & 0xff;
1078 
1079 	vp = netdev_priv(dev);
1080 
1081 	spin_lock_init(&vp->lock);
1082 	tasklet_init(&vp->vnet_tx_wakeup, maybe_tx_wakeup, (unsigned long)vp);
1083 	vp->dev = dev;
1084 
1085 	INIT_LIST_HEAD(&vp->port_list);
1086 	for (i = 0; i < VNET_PORT_HASH_SIZE; i++)
1087 		INIT_HLIST_HEAD(&vp->port_hash[i]);
1088 	INIT_LIST_HEAD(&vp->list);
1089 	vp->local_mac = *local_mac;
1090 
1091 	dev->netdev_ops = &vnet_ops;
1092 	dev->ethtool_ops = &vnet_ethtool_ops;
1093 	dev->watchdog_timeo = VNET_TX_TIMEOUT;
1094 
1095 	err = register_netdev(dev);
1096 	if (err) {
1097 		pr_err("Cannot register net device, aborting\n");
1098 		goto err_out_free_dev;
1099 	}
1100 
1101 	netdev_info(dev, "Sun LDOM vnet %pM\n", dev->dev_addr);
1102 
1103 	list_add(&vp->list, &vnet_list);
1104 
1105 	return vp;
1106 
1107 err_out_free_dev:
1108 	free_netdev(dev);
1109 
1110 	return ERR_PTR(err);
1111 }
1112 
1113 static struct vnet *vnet_find_or_create(const u64 *local_mac)
1114 {
1115 	struct vnet *iter, *vp;
1116 
1117 	mutex_lock(&vnet_list_mutex);
1118 	vp = NULL;
1119 	list_for_each_entry(iter, &vnet_list, list) {
1120 		if (iter->local_mac == *local_mac) {
1121 			vp = iter;
1122 			break;
1123 		}
1124 	}
1125 	if (!vp)
1126 		vp = vnet_new(local_mac);
1127 	mutex_unlock(&vnet_list_mutex);
1128 
1129 	return vp;
1130 }
1131 
1132 static void vnet_cleanup(void)
1133 {
1134 	struct vnet *vp;
1135 	struct net_device *dev;
1136 
1137 	mutex_lock(&vnet_list_mutex);
1138 	while (!list_empty(&vnet_list)) {
1139 		vp = list_first_entry(&vnet_list, struct vnet, list);
1140 		list_del(&vp->list);
1141 		dev = vp->dev;
1142 		tasklet_kill(&vp->vnet_tx_wakeup);
1143 		/* vio_unregister_driver() should have cleaned up port_list */
1144 		BUG_ON(!list_empty(&vp->port_list));
1145 		unregister_netdev(dev);
1146 		free_netdev(dev);
1147 	}
1148 	mutex_unlock(&vnet_list_mutex);
1149 }
1150 
1151 static const char *local_mac_prop = "local-mac-address";
1152 
1153 static struct vnet *vnet_find_parent(struct mdesc_handle *hp,
1154 						u64 port_node)
1155 {
1156 	const u64 *local_mac = NULL;
1157 	u64 a;
1158 
1159 	mdesc_for_each_arc(a, hp, port_node, MDESC_ARC_TYPE_BACK) {
1160 		u64 target = mdesc_arc_target(hp, a);
1161 		const char *name;
1162 
1163 		name = mdesc_get_property(hp, target, "name", NULL);
1164 		if (!name || strcmp(name, "network"))
1165 			continue;
1166 
1167 		local_mac = mdesc_get_property(hp, target,
1168 					       local_mac_prop, NULL);
1169 		if (local_mac)
1170 			break;
1171 	}
1172 	if (!local_mac)
1173 		return ERR_PTR(-ENODEV);
1174 
1175 	return vnet_find_or_create(local_mac);
1176 }
1177 
1178 static struct ldc_channel_config vnet_ldc_cfg = {
1179 	.event		= vnet_event,
1180 	.mtu		= 64,
1181 	.mode		= LDC_MODE_UNRELIABLE,
1182 };
1183 
1184 static struct vio_driver_ops vnet_vio_ops = {
1185 	.send_attr		= vnet_send_attr,
1186 	.handle_attr		= vnet_handle_attr,
1187 	.handshake_complete	= vnet_handshake_complete,
1188 };
1189 
1190 static void print_version(void)
1191 {
1192 	printk_once(KERN_INFO "%s", version);
1193 }
1194 
1195 const char *remote_macaddr_prop = "remote-mac-address";
1196 
1197 static int vnet_port_probe(struct vio_dev *vdev, const struct vio_device_id *id)
1198 {
1199 	struct mdesc_handle *hp;
1200 	struct vnet_port *port;
1201 	unsigned long flags;
1202 	struct vnet *vp;
1203 	const u64 *rmac;
1204 	int len, i, err, switch_port;
1205 
1206 	print_version();
1207 
1208 	hp = mdesc_grab();
1209 
1210 	vp = vnet_find_parent(hp, vdev->mp);
1211 	if (IS_ERR(vp)) {
1212 		pr_err("Cannot find port parent vnet\n");
1213 		err = PTR_ERR(vp);
1214 		goto err_out_put_mdesc;
1215 	}
1216 
1217 	rmac = mdesc_get_property(hp, vdev->mp, remote_macaddr_prop, &len);
1218 	err = -ENODEV;
1219 	if (!rmac) {
1220 		pr_err("Port lacks %s property\n", remote_macaddr_prop);
1221 		goto err_out_put_mdesc;
1222 	}
1223 
1224 	port = kzalloc(sizeof(*port), GFP_KERNEL);
1225 	err = -ENOMEM;
1226 	if (!port)
1227 		goto err_out_put_mdesc;
1228 
1229 	for (i = 0; i < ETH_ALEN; i++)
1230 		port->raddr[i] = (*rmac >> (5 - i) * 8) & 0xff;
1231 
1232 	port->vp = vp;
1233 
1234 	err = vio_driver_init(&port->vio, vdev, VDEV_NETWORK,
1235 			      vnet_versions, ARRAY_SIZE(vnet_versions),
1236 			      &vnet_vio_ops, vp->dev->name);
1237 	if (err)
1238 		goto err_out_free_port;
1239 
1240 	err = vio_ldc_alloc(&port->vio, &vnet_ldc_cfg, port);
1241 	if (err)
1242 		goto err_out_free_port;
1243 
1244 	err = vnet_port_alloc_tx_bufs(port);
1245 	if (err)
1246 		goto err_out_free_ldc;
1247 
1248 	INIT_HLIST_NODE(&port->hash);
1249 	INIT_LIST_HEAD(&port->list);
1250 
1251 	switch_port = 0;
1252 	if (mdesc_get_property(hp, vdev->mp, "switch-port", NULL) != NULL)
1253 		switch_port = 1;
1254 	port->switch_port = switch_port;
1255 
1256 	spin_lock_irqsave(&vp->lock, flags);
1257 	if (switch_port)
1258 		list_add(&port->list, &vp->port_list);
1259 	else
1260 		list_add_tail(&port->list, &vp->port_list);
1261 	hlist_add_head(&port->hash, &vp->port_hash[vnet_hashfn(port->raddr)]);
1262 	spin_unlock_irqrestore(&vp->lock, flags);
1263 
1264 	dev_set_drvdata(&vdev->dev, port);
1265 
1266 	pr_info("%s: PORT ( remote-mac %pM%s )\n",
1267 		vp->dev->name, port->raddr, switch_port ? " switch-port" : "");
1268 
1269 	vio_port_up(&port->vio);
1270 
1271 	mdesc_release(hp);
1272 
1273 	return 0;
1274 
1275 err_out_free_ldc:
1276 	vio_ldc_free(&port->vio);
1277 
1278 err_out_free_port:
1279 	kfree(port);
1280 
1281 err_out_put_mdesc:
1282 	mdesc_release(hp);
1283 	return err;
1284 }
1285 
1286 static int vnet_port_remove(struct vio_dev *vdev)
1287 {
1288 	struct vnet_port *port = dev_get_drvdata(&vdev->dev);
1289 
1290 	if (port) {
1291 		struct vnet *vp = port->vp;
1292 		unsigned long flags;
1293 
1294 		del_timer_sync(&port->vio.timer);
1295 
1296 		spin_lock_irqsave(&vp->lock, flags);
1297 		list_del(&port->list);
1298 		hlist_del(&port->hash);
1299 		spin_unlock_irqrestore(&vp->lock, flags);
1300 
1301 		vnet_port_free_tx_bufs(port);
1302 		vio_ldc_free(&port->vio);
1303 
1304 		dev_set_drvdata(&vdev->dev, NULL);
1305 
1306 		kfree(port);
1307 
1308 	}
1309 	return 0;
1310 }
1311 
1312 static const struct vio_device_id vnet_port_match[] = {
1313 	{
1314 		.type = "vnet-port",
1315 	},
1316 	{},
1317 };
1318 MODULE_DEVICE_TABLE(vio, vnet_port_match);
1319 
1320 static struct vio_driver vnet_port_driver = {
1321 	.id_table	= vnet_port_match,
1322 	.probe		= vnet_port_probe,
1323 	.remove		= vnet_port_remove,
1324 	.name		= "vnet_port",
1325 };
1326 
1327 static int __init vnet_init(void)
1328 {
1329 	return vio_register_driver(&vnet_port_driver);
1330 }
1331 
1332 static void __exit vnet_exit(void)
1333 {
1334 	vio_unregister_driver(&vnet_port_driver);
1335 	vnet_cleanup();
1336 }
1337 
1338 module_init(vnet_init);
1339 module_exit(vnet_exit);
1340