xref: /openbmc/linux/drivers/net/ethernet/sun/sunvnet.c (revision 33ac9dba)
1 /* sunvnet.c: Sun LDOM Virtual Network Driver.
2  *
3  * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
4  */
5 
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7 
8 #include <linux/module.h>
9 #include <linux/kernel.h>
10 #include <linux/types.h>
11 #include <linux/slab.h>
12 #include <linux/delay.h>
13 #include <linux/init.h>
14 #include <linux/netdevice.h>
15 #include <linux/ethtool.h>
16 #include <linux/etherdevice.h>
17 #include <linux/mutex.h>
18 
19 #include <asm/vio.h>
20 #include <asm/ldc.h>
21 
22 #include "sunvnet.h"
23 
24 #define DRV_MODULE_NAME		"sunvnet"
25 #define DRV_MODULE_VERSION	"1.0"
26 #define DRV_MODULE_RELDATE	"June 25, 2007"
27 
28 static char version[] =
29 	DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
30 MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
31 MODULE_DESCRIPTION("Sun LDOM virtual network driver");
32 MODULE_LICENSE("GPL");
33 MODULE_VERSION(DRV_MODULE_VERSION);
34 
35 /* Heuristic for the number of times to exponentially backoff and
36  * retry sending an LDC trigger when EAGAIN is encountered
37  */
38 #define	VNET_MAX_RETRIES	10
39 
40 /* Ordered from largest major to lowest */
41 static struct vio_version vnet_versions[] = {
42 	{ .major = 1, .minor = 0 },
43 };
44 
45 static inline u32 vnet_tx_dring_avail(struct vio_dring_state *dr)
46 {
47 	return vio_dring_avail(dr, VNET_TX_RING_SIZE);
48 }
49 
50 static int vnet_handle_unknown(struct vnet_port *port, void *arg)
51 {
52 	struct vio_msg_tag *pkt = arg;
53 
54 	pr_err("Received unknown msg [%02x:%02x:%04x:%08x]\n",
55 	       pkt->type, pkt->stype, pkt->stype_env, pkt->sid);
56 	pr_err("Resetting connection\n");
57 
58 	ldc_disconnect(port->vio.lp);
59 
60 	return -ECONNRESET;
61 }
62 
63 static int vnet_send_attr(struct vio_driver_state *vio)
64 {
65 	struct vnet_port *port = to_vnet_port(vio);
66 	struct net_device *dev = port->vp->dev;
67 	struct vio_net_attr_info pkt;
68 	int i;
69 
70 	memset(&pkt, 0, sizeof(pkt));
71 	pkt.tag.type = VIO_TYPE_CTRL;
72 	pkt.tag.stype = VIO_SUBTYPE_INFO;
73 	pkt.tag.stype_env = VIO_ATTR_INFO;
74 	pkt.tag.sid = vio_send_sid(vio);
75 	pkt.xfer_mode = VIO_DRING_MODE;
76 	pkt.addr_type = VNET_ADDR_ETHERMAC;
77 	pkt.ack_freq = 0;
78 	for (i = 0; i < 6; i++)
79 		pkt.addr |= (u64)dev->dev_addr[i] << ((5 - i) * 8);
80 	pkt.mtu = ETH_FRAME_LEN;
81 
82 	viodbg(HS, "SEND NET ATTR xmode[0x%x] atype[0x%x] addr[%llx] "
83 	       "ackfreq[%u] mtu[%llu]\n",
84 	       pkt.xfer_mode, pkt.addr_type,
85 	       (unsigned long long) pkt.addr,
86 	       pkt.ack_freq,
87 	       (unsigned long long) pkt.mtu);
88 
89 	return vio_ldc_send(vio, &pkt, sizeof(pkt));
90 }
91 
92 static int handle_attr_info(struct vio_driver_state *vio,
93 			    struct vio_net_attr_info *pkt)
94 {
95 	viodbg(HS, "GOT NET ATTR INFO xmode[0x%x] atype[0x%x] addr[%llx] "
96 	       "ackfreq[%u] mtu[%llu]\n",
97 	       pkt->xfer_mode, pkt->addr_type,
98 	       (unsigned long long) pkt->addr,
99 	       pkt->ack_freq,
100 	       (unsigned long long) pkt->mtu);
101 
102 	pkt->tag.sid = vio_send_sid(vio);
103 
104 	if (pkt->xfer_mode != VIO_DRING_MODE ||
105 	    pkt->addr_type != VNET_ADDR_ETHERMAC ||
106 	    pkt->mtu != ETH_FRAME_LEN) {
107 		viodbg(HS, "SEND NET ATTR NACK\n");
108 
109 		pkt->tag.stype = VIO_SUBTYPE_NACK;
110 
111 		(void) vio_ldc_send(vio, pkt, sizeof(*pkt));
112 
113 		return -ECONNRESET;
114 	} else {
115 		viodbg(HS, "SEND NET ATTR ACK\n");
116 
117 		pkt->tag.stype = VIO_SUBTYPE_ACK;
118 
119 		return vio_ldc_send(vio, pkt, sizeof(*pkt));
120 	}
121 
122 }
123 
124 static int handle_attr_ack(struct vio_driver_state *vio,
125 			   struct vio_net_attr_info *pkt)
126 {
127 	viodbg(HS, "GOT NET ATTR ACK\n");
128 
129 	return 0;
130 }
131 
132 static int handle_attr_nack(struct vio_driver_state *vio,
133 			    struct vio_net_attr_info *pkt)
134 {
135 	viodbg(HS, "GOT NET ATTR NACK\n");
136 
137 	return -ECONNRESET;
138 }
139 
140 static int vnet_handle_attr(struct vio_driver_state *vio, void *arg)
141 {
142 	struct vio_net_attr_info *pkt = arg;
143 
144 	switch (pkt->tag.stype) {
145 	case VIO_SUBTYPE_INFO:
146 		return handle_attr_info(vio, pkt);
147 
148 	case VIO_SUBTYPE_ACK:
149 		return handle_attr_ack(vio, pkt);
150 
151 	case VIO_SUBTYPE_NACK:
152 		return handle_attr_nack(vio, pkt);
153 
154 	default:
155 		return -ECONNRESET;
156 	}
157 }
158 
159 static void vnet_handshake_complete(struct vio_driver_state *vio)
160 {
161 	struct vio_dring_state *dr;
162 
163 	dr = &vio->drings[VIO_DRIVER_RX_RING];
164 	dr->snd_nxt = dr->rcv_nxt = 1;
165 
166 	dr = &vio->drings[VIO_DRIVER_TX_RING];
167 	dr->snd_nxt = dr->rcv_nxt = 1;
168 }
169 
170 /* The hypervisor interface that implements copying to/from imported
171  * memory from another domain requires that copies are done to 8-byte
172  * aligned buffers, and that the lengths of such copies are also 8-byte
173  * multiples.
174  *
175  * So we align skb->data to an 8-byte multiple and pad-out the data
176  * area so we can round the copy length up to the next multiple of
177  * 8 for the copy.
178  *
179  * The transmitter puts the actual start of the packet 6 bytes into
180  * the buffer it sends over, so that the IP headers after the ethernet
181  * header are aligned properly.  These 6 bytes are not in the descriptor
182  * length, they are simply implied.  This offset is represented using
183  * the VNET_PACKET_SKIP macro.
184  */
185 static struct sk_buff *alloc_and_align_skb(struct net_device *dev,
186 					   unsigned int len)
187 {
188 	struct sk_buff *skb = netdev_alloc_skb(dev, len+VNET_PACKET_SKIP+8+8);
189 	unsigned long addr, off;
190 
191 	if (unlikely(!skb))
192 		return NULL;
193 
194 	addr = (unsigned long) skb->data;
195 	off = ((addr + 7UL) & ~7UL) - addr;
196 	if (off)
197 		skb_reserve(skb, off);
198 
199 	return skb;
200 }
201 
202 static int vnet_rx_one(struct vnet_port *port, unsigned int len,
203 		       struct ldc_trans_cookie *cookies, int ncookies)
204 {
205 	struct net_device *dev = port->vp->dev;
206 	unsigned int copy_len;
207 	struct sk_buff *skb;
208 	int err;
209 
210 	err = -EMSGSIZE;
211 	if (unlikely(len < ETH_ZLEN || len > ETH_FRAME_LEN)) {
212 		dev->stats.rx_length_errors++;
213 		goto out_dropped;
214 	}
215 
216 	skb = alloc_and_align_skb(dev, len);
217 	err = -ENOMEM;
218 	if (unlikely(!skb)) {
219 		dev->stats.rx_missed_errors++;
220 		goto out_dropped;
221 	}
222 
223 	copy_len = (len + VNET_PACKET_SKIP + 7U) & ~7U;
224 	skb_put(skb, copy_len);
225 	err = ldc_copy(port->vio.lp, LDC_COPY_IN,
226 		       skb->data, copy_len, 0,
227 		       cookies, ncookies);
228 	if (unlikely(err < 0)) {
229 		dev->stats.rx_frame_errors++;
230 		goto out_free_skb;
231 	}
232 
233 	skb_pull(skb, VNET_PACKET_SKIP);
234 	skb_trim(skb, len);
235 	skb->protocol = eth_type_trans(skb, dev);
236 
237 	dev->stats.rx_packets++;
238 	dev->stats.rx_bytes += len;
239 
240 	netif_rx(skb);
241 
242 	return 0;
243 
244 out_free_skb:
245 	kfree_skb(skb);
246 
247 out_dropped:
248 	dev->stats.rx_dropped++;
249 	return err;
250 }
251 
252 static int vnet_send_ack(struct vnet_port *port, struct vio_dring_state *dr,
253 			 u32 start, u32 end, u8 vio_dring_state)
254 {
255 	struct vio_dring_data hdr = {
256 		.tag = {
257 			.type		= VIO_TYPE_DATA,
258 			.stype		= VIO_SUBTYPE_ACK,
259 			.stype_env	= VIO_DRING_DATA,
260 			.sid		= vio_send_sid(&port->vio),
261 		},
262 		.dring_ident		= dr->ident,
263 		.start_idx		= start,
264 		.end_idx		= end,
265 		.state			= vio_dring_state,
266 	};
267 	int err, delay;
268 	int retries = 0;
269 
270 	hdr.seq = dr->snd_nxt;
271 	delay = 1;
272 	do {
273 		err = vio_ldc_send(&port->vio, &hdr, sizeof(hdr));
274 		if (err > 0) {
275 			dr->snd_nxt++;
276 			break;
277 		}
278 		udelay(delay);
279 		if ((delay <<= 1) > 128)
280 			delay = 128;
281 		if (retries++ > VNET_MAX_RETRIES) {
282 			pr_info("ECONNRESET %x:%x:%x:%x:%x:%x\n",
283 				port->raddr[0], port->raddr[1],
284 				port->raddr[2], port->raddr[3],
285 				port->raddr[4], port->raddr[5]);
286 			err = -ECONNRESET;
287 		}
288 	} while (err == -EAGAIN);
289 
290 	return err;
291 }
292 
293 static u32 next_idx(u32 idx, struct vio_dring_state *dr)
294 {
295 	if (++idx == dr->num_entries)
296 		idx = 0;
297 	return idx;
298 }
299 
300 static u32 prev_idx(u32 idx, struct vio_dring_state *dr)
301 {
302 	if (idx == 0)
303 		idx = dr->num_entries - 1;
304 	else
305 		idx--;
306 
307 	return idx;
308 }
309 
310 static struct vio_net_desc *get_rx_desc(struct vnet_port *port,
311 					struct vio_dring_state *dr,
312 					u32 index)
313 {
314 	struct vio_net_desc *desc = port->vio.desc_buf;
315 	int err;
316 
317 	err = ldc_get_dring_entry(port->vio.lp, desc, dr->entry_size,
318 				  (index * dr->entry_size),
319 				  dr->cookies, dr->ncookies);
320 	if (err < 0)
321 		return ERR_PTR(err);
322 
323 	return desc;
324 }
325 
326 static int put_rx_desc(struct vnet_port *port,
327 		       struct vio_dring_state *dr,
328 		       struct vio_net_desc *desc,
329 		       u32 index)
330 {
331 	int err;
332 
333 	err = ldc_put_dring_entry(port->vio.lp, desc, dr->entry_size,
334 				  (index * dr->entry_size),
335 				  dr->cookies, dr->ncookies);
336 	if (err < 0)
337 		return err;
338 
339 	return 0;
340 }
341 
342 static int vnet_walk_rx_one(struct vnet_port *port,
343 			    struct vio_dring_state *dr,
344 			    u32 index, int *needs_ack)
345 {
346 	struct vio_net_desc *desc = get_rx_desc(port, dr, index);
347 	struct vio_driver_state *vio = &port->vio;
348 	int err;
349 
350 	if (IS_ERR(desc))
351 		return PTR_ERR(desc);
352 
353 	viodbg(DATA, "vio_walk_rx_one desc[%02x:%02x:%08x:%08x:%llx:%llx]\n",
354 	       desc->hdr.state, desc->hdr.ack,
355 	       desc->size, desc->ncookies,
356 	       desc->cookies[0].cookie_addr,
357 	       desc->cookies[0].cookie_size);
358 
359 	if (desc->hdr.state != VIO_DESC_READY)
360 		return 1;
361 	err = vnet_rx_one(port, desc->size, desc->cookies, desc->ncookies);
362 	if (err == -ECONNRESET)
363 		return err;
364 	desc->hdr.state = VIO_DESC_DONE;
365 	err = put_rx_desc(port, dr, desc, index);
366 	if (err < 0)
367 		return err;
368 	*needs_ack = desc->hdr.ack;
369 	return 0;
370 }
371 
372 static int vnet_walk_rx(struct vnet_port *port, struct vio_dring_state *dr,
373 			u32 start, u32 end)
374 {
375 	struct vio_driver_state *vio = &port->vio;
376 	int ack_start = -1, ack_end = -1;
377 
378 	end = (end == (u32) -1) ? prev_idx(start, dr) : next_idx(end, dr);
379 
380 	viodbg(DATA, "vnet_walk_rx start[%08x] end[%08x]\n", start, end);
381 
382 	while (start != end) {
383 		int ack = 0, err = vnet_walk_rx_one(port, dr, start, &ack);
384 		if (err == -ECONNRESET)
385 			return err;
386 		if (err != 0)
387 			break;
388 		if (ack_start == -1)
389 			ack_start = start;
390 		ack_end = start;
391 		start = next_idx(start, dr);
392 		if (ack && start != end) {
393 			err = vnet_send_ack(port, dr, ack_start, ack_end,
394 					    VIO_DRING_ACTIVE);
395 			if (err == -ECONNRESET)
396 				return err;
397 			ack_start = -1;
398 		}
399 	}
400 	if (unlikely(ack_start == -1))
401 		ack_start = ack_end = prev_idx(start, dr);
402 	return vnet_send_ack(port, dr, ack_start, ack_end, VIO_DRING_STOPPED);
403 }
404 
405 static int vnet_rx(struct vnet_port *port, void *msgbuf)
406 {
407 	struct vio_dring_data *pkt = msgbuf;
408 	struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_RX_RING];
409 	struct vio_driver_state *vio = &port->vio;
410 
411 	viodbg(DATA, "vnet_rx stype_env[%04x] seq[%016llx] rcv_nxt[%016llx]\n",
412 	       pkt->tag.stype_env, pkt->seq, dr->rcv_nxt);
413 
414 	if (unlikely(pkt->tag.stype_env != VIO_DRING_DATA))
415 		return 0;
416 	if (unlikely(pkt->seq != dr->rcv_nxt)) {
417 		pr_err("RX out of sequence seq[0x%llx] rcv_nxt[0x%llx]\n",
418 		       pkt->seq, dr->rcv_nxt);
419 		return 0;
420 	}
421 
422 	dr->rcv_nxt++;
423 
424 	/* XXX Validate pkt->start_idx and pkt->end_idx XXX */
425 
426 	return vnet_walk_rx(port, dr, pkt->start_idx, pkt->end_idx);
427 }
428 
429 static int idx_is_pending(struct vio_dring_state *dr, u32 end)
430 {
431 	u32 idx = dr->cons;
432 	int found = 0;
433 
434 	while (idx != dr->prod) {
435 		if (idx == end) {
436 			found = 1;
437 			break;
438 		}
439 		idx = next_idx(idx, dr);
440 	}
441 	return found;
442 }
443 
444 static int vnet_ack(struct vnet_port *port, void *msgbuf)
445 {
446 	struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
447 	struct vio_dring_data *pkt = msgbuf;
448 	struct net_device *dev;
449 	struct vnet *vp;
450 	u32 end;
451 
452 	if (unlikely(pkt->tag.stype_env != VIO_DRING_DATA))
453 		return 0;
454 
455 	end = pkt->end_idx;
456 	if (unlikely(!idx_is_pending(dr, end)))
457 		return 0;
458 
459 	dr->cons = next_idx(end, dr);
460 
461 	vp = port->vp;
462 	dev = vp->dev;
463 	if (unlikely(netif_queue_stopped(dev) &&
464 		     vnet_tx_dring_avail(dr) >= VNET_TX_WAKEUP_THRESH(dr)))
465 		return 1;
466 
467 	return 0;
468 }
469 
470 static int vnet_nack(struct vnet_port *port, void *msgbuf)
471 {
472 	/* XXX just reset or similar XXX */
473 	return 0;
474 }
475 
476 static int handle_mcast(struct vnet_port *port, void *msgbuf)
477 {
478 	struct vio_net_mcast_info *pkt = msgbuf;
479 
480 	if (pkt->tag.stype != VIO_SUBTYPE_ACK)
481 		pr_err("%s: Got unexpected MCAST reply [%02x:%02x:%04x:%08x]\n",
482 		       port->vp->dev->name,
483 		       pkt->tag.type,
484 		       pkt->tag.stype,
485 		       pkt->tag.stype_env,
486 		       pkt->tag.sid);
487 
488 	return 0;
489 }
490 
491 static void maybe_tx_wakeup(unsigned long param)
492 {
493 	struct vnet *vp = (struct vnet *)param;
494 	struct net_device *dev = vp->dev;
495 
496 	netif_tx_lock(dev);
497 	if (likely(netif_queue_stopped(dev))) {
498 		struct vnet_port *port;
499 		int wake = 1;
500 
501 		list_for_each_entry(port, &vp->port_list, list) {
502 			struct vio_dring_state *dr;
503 
504 			dr = &port->vio.drings[VIO_DRIVER_TX_RING];
505 			if (vnet_tx_dring_avail(dr) <
506 			    VNET_TX_WAKEUP_THRESH(dr)) {
507 				wake = 0;
508 				break;
509 			}
510 		}
511 		if (wake)
512 			netif_wake_queue(dev);
513 	}
514 	netif_tx_unlock(dev);
515 }
516 
517 static void vnet_event(void *arg, int event)
518 {
519 	struct vnet_port *port = arg;
520 	struct vio_driver_state *vio = &port->vio;
521 	unsigned long flags;
522 	int tx_wakeup, err;
523 
524 	spin_lock_irqsave(&vio->lock, flags);
525 
526 	if (unlikely(event == LDC_EVENT_RESET ||
527 		     event == LDC_EVENT_UP)) {
528 		vio_link_state_change(vio, event);
529 		spin_unlock_irqrestore(&vio->lock, flags);
530 
531 		if (event == LDC_EVENT_RESET)
532 			vio_port_up(vio);
533 		return;
534 	}
535 
536 	if (unlikely(event != LDC_EVENT_DATA_READY)) {
537 		pr_warning("Unexpected LDC event %d\n", event);
538 		spin_unlock_irqrestore(&vio->lock, flags);
539 		return;
540 	}
541 
542 	tx_wakeup = err = 0;
543 	while (1) {
544 		union {
545 			struct vio_msg_tag tag;
546 			u64 raw[8];
547 		} msgbuf;
548 
549 		err = ldc_read(vio->lp, &msgbuf, sizeof(msgbuf));
550 		if (unlikely(err < 0)) {
551 			if (err == -ECONNRESET)
552 				vio_conn_reset(vio);
553 			break;
554 		}
555 		if (err == 0)
556 			break;
557 		viodbg(DATA, "TAG [%02x:%02x:%04x:%08x]\n",
558 		       msgbuf.tag.type,
559 		       msgbuf.tag.stype,
560 		       msgbuf.tag.stype_env,
561 		       msgbuf.tag.sid);
562 		err = vio_validate_sid(vio, &msgbuf.tag);
563 		if (err < 0)
564 			break;
565 
566 		if (likely(msgbuf.tag.type == VIO_TYPE_DATA)) {
567 			if (msgbuf.tag.stype == VIO_SUBTYPE_INFO) {
568 				err = vnet_rx(port, &msgbuf);
569 			} else if (msgbuf.tag.stype == VIO_SUBTYPE_ACK) {
570 				err = vnet_ack(port, &msgbuf);
571 				if (err > 0)
572 					tx_wakeup |= err;
573 			} else if (msgbuf.tag.stype == VIO_SUBTYPE_NACK) {
574 				err = vnet_nack(port, &msgbuf);
575 			}
576 		} else if (msgbuf.tag.type == VIO_TYPE_CTRL) {
577 			if (msgbuf.tag.stype_env == VNET_MCAST_INFO)
578 				err = handle_mcast(port, &msgbuf);
579 			else
580 				err = vio_control_pkt_engine(vio, &msgbuf);
581 			if (err)
582 				break;
583 		} else {
584 			err = vnet_handle_unknown(port, &msgbuf);
585 		}
586 		if (err == -ECONNRESET)
587 			break;
588 	}
589 	spin_unlock(&vio->lock);
590 	/* Kick off a tasklet to wake the queue.  We cannot call
591 	 * maybe_tx_wakeup directly here because we could deadlock on
592 	 * netif_tx_lock() with dev_watchdog()
593 	 */
594 	if (unlikely(tx_wakeup && err != -ECONNRESET))
595 		tasklet_schedule(&port->vp->vnet_tx_wakeup);
596 
597 	local_irq_restore(flags);
598 }
599 
600 static int __vnet_tx_trigger(struct vnet_port *port)
601 {
602 	struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
603 	struct vio_dring_data hdr = {
604 		.tag = {
605 			.type		= VIO_TYPE_DATA,
606 			.stype		= VIO_SUBTYPE_INFO,
607 			.stype_env	= VIO_DRING_DATA,
608 			.sid		= vio_send_sid(&port->vio),
609 		},
610 		.dring_ident		= dr->ident,
611 		.start_idx		= dr->prod,
612 		.end_idx		= (u32) -1,
613 	};
614 	int err, delay;
615 	int retries = 0;
616 
617 	hdr.seq = dr->snd_nxt;
618 	delay = 1;
619 	do {
620 		err = vio_ldc_send(&port->vio, &hdr, sizeof(hdr));
621 		if (err > 0) {
622 			dr->snd_nxt++;
623 			break;
624 		}
625 		udelay(delay);
626 		if ((delay <<= 1) > 128)
627 			delay = 128;
628 		if (retries++ > VNET_MAX_RETRIES)
629 			break;
630 	} while (err == -EAGAIN);
631 
632 	return err;
633 }
634 
635 static inline bool port_is_up(struct vnet_port *vnet)
636 {
637 	struct vio_driver_state *vio = &vnet->vio;
638 
639 	return !!(vio->hs_state & VIO_HS_COMPLETE);
640 }
641 
642 struct vnet_port *__tx_port_find(struct vnet *vp, struct sk_buff *skb)
643 {
644 	unsigned int hash = vnet_hashfn(skb->data);
645 	struct hlist_head *hp = &vp->port_hash[hash];
646 	struct vnet_port *port;
647 
648 	hlist_for_each_entry(port, hp, hash) {
649 		if (!port_is_up(port))
650 			continue;
651 		if (ether_addr_equal(port->raddr, skb->data))
652 			return port;
653 	}
654 	list_for_each_entry(port, &vp->port_list, list) {
655 		if (!port->switch_port)
656 			continue;
657 		if (!port_is_up(port))
658 			continue;
659 		return port;
660 	}
661 	return NULL;
662 }
663 
664 struct vnet_port *tx_port_find(struct vnet *vp, struct sk_buff *skb)
665 {
666 	struct vnet_port *ret;
667 	unsigned long flags;
668 
669 	spin_lock_irqsave(&vp->lock, flags);
670 	ret = __tx_port_find(vp, skb);
671 	spin_unlock_irqrestore(&vp->lock, flags);
672 
673 	return ret;
674 }
675 
676 static int vnet_start_xmit(struct sk_buff *skb, struct net_device *dev)
677 {
678 	struct vnet *vp = netdev_priv(dev);
679 	struct vnet_port *port = tx_port_find(vp, skb);
680 	struct vio_dring_state *dr;
681 	struct vio_net_desc *d;
682 	unsigned long flags;
683 	unsigned int len;
684 	void *tx_buf;
685 	int i, err;
686 
687 	if (unlikely(!port))
688 		goto out_dropped;
689 
690 	spin_lock_irqsave(&port->vio.lock, flags);
691 
692 	dr = &port->vio.drings[VIO_DRIVER_TX_RING];
693 	if (unlikely(vnet_tx_dring_avail(dr) < 2)) {
694 		if (!netif_queue_stopped(dev)) {
695 			netif_stop_queue(dev);
696 
697 			/* This is a hard error, log it. */
698 			netdev_err(dev, "BUG! Tx Ring full when queue awake!\n");
699 			dev->stats.tx_errors++;
700 		}
701 		spin_unlock_irqrestore(&port->vio.lock, flags);
702 		return NETDEV_TX_BUSY;
703 	}
704 
705 	d = vio_dring_cur(dr);
706 
707 	tx_buf = port->tx_bufs[dr->prod].buf;
708 	skb_copy_from_linear_data(skb, tx_buf + VNET_PACKET_SKIP, skb->len);
709 
710 	len = skb->len;
711 	if (len < ETH_ZLEN) {
712 		len = ETH_ZLEN;
713 		memset(tx_buf+VNET_PACKET_SKIP+skb->len, 0, len - skb->len);
714 	}
715 
716 	/* We don't rely on the ACKs to free the skb in vnet_start_xmit(),
717 	 * thus it is safe to not set VIO_ACK_ENABLE for each transmission:
718 	 * the protocol itself does not require it as long as the peer
719 	 * sends a VIO_SUBTYPE_ACK for VIO_DRING_STOPPED.
720 	 *
721 	 * An ACK for every packet in the ring is expensive as the
722 	 * sending of LDC messages is slow and affects performance.
723 	 */
724 	d->hdr.ack = VIO_ACK_DISABLE;
725 	d->size = len;
726 	d->ncookies = port->tx_bufs[dr->prod].ncookies;
727 	for (i = 0; i < d->ncookies; i++)
728 		d->cookies[i] = port->tx_bufs[dr->prod].cookies[i];
729 
730 	/* This has to be a non-SMP write barrier because we are writing
731 	 * to memory which is shared with the peer LDOM.
732 	 */
733 	wmb();
734 
735 	d->hdr.state = VIO_DESC_READY;
736 
737 	err = __vnet_tx_trigger(port);
738 	if (unlikely(err < 0)) {
739 		netdev_info(dev, "TX trigger error %d\n", err);
740 		d->hdr.state = VIO_DESC_FREE;
741 		dev->stats.tx_carrier_errors++;
742 		goto out_dropped_unlock;
743 	}
744 
745 	dev->stats.tx_packets++;
746 	dev->stats.tx_bytes += skb->len;
747 
748 	dr->prod = (dr->prod + 1) & (VNET_TX_RING_SIZE - 1);
749 	if (unlikely(vnet_tx_dring_avail(dr) < 2)) {
750 		netif_stop_queue(dev);
751 		if (vnet_tx_dring_avail(dr) > VNET_TX_WAKEUP_THRESH(dr))
752 			netif_wake_queue(dev);
753 	}
754 
755 	spin_unlock_irqrestore(&port->vio.lock, flags);
756 
757 	dev_kfree_skb(skb);
758 
759 	return NETDEV_TX_OK;
760 
761 out_dropped_unlock:
762 	spin_unlock_irqrestore(&port->vio.lock, flags);
763 
764 out_dropped:
765 	dev_kfree_skb(skb);
766 	dev->stats.tx_dropped++;
767 	return NETDEV_TX_OK;
768 }
769 
770 static void vnet_tx_timeout(struct net_device *dev)
771 {
772 	/* XXX Implement me XXX */
773 }
774 
775 static int vnet_open(struct net_device *dev)
776 {
777 	netif_carrier_on(dev);
778 	netif_start_queue(dev);
779 
780 	return 0;
781 }
782 
783 static int vnet_close(struct net_device *dev)
784 {
785 	netif_stop_queue(dev);
786 	netif_carrier_off(dev);
787 
788 	return 0;
789 }
790 
791 static struct vnet_mcast_entry *__vnet_mc_find(struct vnet *vp, u8 *addr)
792 {
793 	struct vnet_mcast_entry *m;
794 
795 	for (m = vp->mcast_list; m; m = m->next) {
796 		if (ether_addr_equal(m->addr, addr))
797 			return m;
798 	}
799 	return NULL;
800 }
801 
802 static void __update_mc_list(struct vnet *vp, struct net_device *dev)
803 {
804 	struct netdev_hw_addr *ha;
805 
806 	netdev_for_each_mc_addr(ha, dev) {
807 		struct vnet_mcast_entry *m;
808 
809 		m = __vnet_mc_find(vp, ha->addr);
810 		if (m) {
811 			m->hit = 1;
812 			continue;
813 		}
814 
815 		if (!m) {
816 			m = kzalloc(sizeof(*m), GFP_ATOMIC);
817 			if (!m)
818 				continue;
819 			memcpy(m->addr, ha->addr, ETH_ALEN);
820 			m->hit = 1;
821 
822 			m->next = vp->mcast_list;
823 			vp->mcast_list = m;
824 		}
825 	}
826 }
827 
828 static void __send_mc_list(struct vnet *vp, struct vnet_port *port)
829 {
830 	struct vio_net_mcast_info info;
831 	struct vnet_mcast_entry *m, **pp;
832 	int n_addrs;
833 
834 	memset(&info, 0, sizeof(info));
835 
836 	info.tag.type = VIO_TYPE_CTRL;
837 	info.tag.stype = VIO_SUBTYPE_INFO;
838 	info.tag.stype_env = VNET_MCAST_INFO;
839 	info.tag.sid = vio_send_sid(&port->vio);
840 	info.set = 1;
841 
842 	n_addrs = 0;
843 	for (m = vp->mcast_list; m; m = m->next) {
844 		if (m->sent)
845 			continue;
846 		m->sent = 1;
847 		memcpy(&info.mcast_addr[n_addrs * ETH_ALEN],
848 		       m->addr, ETH_ALEN);
849 		if (++n_addrs == VNET_NUM_MCAST) {
850 			info.count = n_addrs;
851 
852 			(void) vio_ldc_send(&port->vio, &info,
853 					    sizeof(info));
854 			n_addrs = 0;
855 		}
856 	}
857 	if (n_addrs) {
858 		info.count = n_addrs;
859 		(void) vio_ldc_send(&port->vio, &info, sizeof(info));
860 	}
861 
862 	info.set = 0;
863 
864 	n_addrs = 0;
865 	pp = &vp->mcast_list;
866 	while ((m = *pp) != NULL) {
867 		if (m->hit) {
868 			m->hit = 0;
869 			pp = &m->next;
870 			continue;
871 		}
872 
873 		memcpy(&info.mcast_addr[n_addrs * ETH_ALEN],
874 		       m->addr, ETH_ALEN);
875 		if (++n_addrs == VNET_NUM_MCAST) {
876 			info.count = n_addrs;
877 			(void) vio_ldc_send(&port->vio, &info,
878 					    sizeof(info));
879 			n_addrs = 0;
880 		}
881 
882 		*pp = m->next;
883 		kfree(m);
884 	}
885 	if (n_addrs) {
886 		info.count = n_addrs;
887 		(void) vio_ldc_send(&port->vio, &info, sizeof(info));
888 	}
889 }
890 
891 static void vnet_set_rx_mode(struct net_device *dev)
892 {
893 	struct vnet *vp = netdev_priv(dev);
894 	struct vnet_port *port;
895 	unsigned long flags;
896 
897 	spin_lock_irqsave(&vp->lock, flags);
898 	if (!list_empty(&vp->port_list)) {
899 		port = list_entry(vp->port_list.next, struct vnet_port, list);
900 
901 		if (port->switch_port) {
902 			__update_mc_list(vp, dev);
903 			__send_mc_list(vp, port);
904 		}
905 	}
906 	spin_unlock_irqrestore(&vp->lock, flags);
907 }
908 
909 static int vnet_change_mtu(struct net_device *dev, int new_mtu)
910 {
911 	if (new_mtu != ETH_DATA_LEN)
912 		return -EINVAL;
913 
914 	dev->mtu = new_mtu;
915 	return 0;
916 }
917 
918 static int vnet_set_mac_addr(struct net_device *dev, void *p)
919 {
920 	return -EINVAL;
921 }
922 
923 static void vnet_get_drvinfo(struct net_device *dev,
924 			     struct ethtool_drvinfo *info)
925 {
926 	strlcpy(info->driver, DRV_MODULE_NAME, sizeof(info->driver));
927 	strlcpy(info->version, DRV_MODULE_VERSION, sizeof(info->version));
928 }
929 
930 static u32 vnet_get_msglevel(struct net_device *dev)
931 {
932 	struct vnet *vp = netdev_priv(dev);
933 	return vp->msg_enable;
934 }
935 
936 static void vnet_set_msglevel(struct net_device *dev, u32 value)
937 {
938 	struct vnet *vp = netdev_priv(dev);
939 	vp->msg_enable = value;
940 }
941 
942 static const struct ethtool_ops vnet_ethtool_ops = {
943 	.get_drvinfo		= vnet_get_drvinfo,
944 	.get_msglevel		= vnet_get_msglevel,
945 	.set_msglevel		= vnet_set_msglevel,
946 	.get_link		= ethtool_op_get_link,
947 };
948 
949 static void vnet_port_free_tx_bufs(struct vnet_port *port)
950 {
951 	struct vio_dring_state *dr;
952 	int i;
953 
954 	dr = &port->vio.drings[VIO_DRIVER_TX_RING];
955 	if (dr->base) {
956 		ldc_free_exp_dring(port->vio.lp, dr->base,
957 				   (dr->entry_size * dr->num_entries),
958 				   dr->cookies, dr->ncookies);
959 		dr->base = NULL;
960 		dr->entry_size = 0;
961 		dr->num_entries = 0;
962 		dr->pending = 0;
963 		dr->ncookies = 0;
964 	}
965 
966 	for (i = 0; i < VNET_TX_RING_SIZE; i++) {
967 		void *buf = port->tx_bufs[i].buf;
968 
969 		if (!buf)
970 			continue;
971 
972 		ldc_unmap(port->vio.lp,
973 			  port->tx_bufs[i].cookies,
974 			  port->tx_bufs[i].ncookies);
975 
976 		kfree(buf);
977 		port->tx_bufs[i].buf = NULL;
978 	}
979 }
980 
981 static int vnet_port_alloc_tx_bufs(struct vnet_port *port)
982 {
983 	struct vio_dring_state *dr;
984 	unsigned long len;
985 	int i, err, ncookies;
986 	void *dring;
987 
988 	for (i = 0; i < VNET_TX_RING_SIZE; i++) {
989 		void *buf = kzalloc(ETH_FRAME_LEN + 8, GFP_KERNEL);
990 		int map_len = (ETH_FRAME_LEN + 7) & ~7;
991 
992 		err = -ENOMEM;
993 		if (!buf)
994 			goto err_out;
995 
996 		err = -EFAULT;
997 		if ((unsigned long)buf & (8UL - 1)) {
998 			pr_err("TX buffer misaligned\n");
999 			kfree(buf);
1000 			goto err_out;
1001 		}
1002 
1003 		err = ldc_map_single(port->vio.lp, buf, map_len,
1004 				     port->tx_bufs[i].cookies, 2,
1005 				     (LDC_MAP_SHADOW |
1006 				      LDC_MAP_DIRECT |
1007 				      LDC_MAP_RW));
1008 		if (err < 0) {
1009 			kfree(buf);
1010 			goto err_out;
1011 		}
1012 		port->tx_bufs[i].buf = buf;
1013 		port->tx_bufs[i].ncookies = err;
1014 	}
1015 
1016 	dr = &port->vio.drings[VIO_DRIVER_TX_RING];
1017 
1018 	len = (VNET_TX_RING_SIZE *
1019 	       (sizeof(struct vio_net_desc) +
1020 		(sizeof(struct ldc_trans_cookie) * 2)));
1021 
1022 	ncookies = VIO_MAX_RING_COOKIES;
1023 	dring = ldc_alloc_exp_dring(port->vio.lp, len,
1024 				    dr->cookies, &ncookies,
1025 				    (LDC_MAP_SHADOW |
1026 				     LDC_MAP_DIRECT |
1027 				     LDC_MAP_RW));
1028 	if (IS_ERR(dring)) {
1029 		err = PTR_ERR(dring);
1030 		goto err_out;
1031 	}
1032 
1033 	dr->base = dring;
1034 	dr->entry_size = (sizeof(struct vio_net_desc) +
1035 			  (sizeof(struct ldc_trans_cookie) * 2));
1036 	dr->num_entries = VNET_TX_RING_SIZE;
1037 	dr->prod = dr->cons = 0;
1038 	dr->pending = VNET_TX_RING_SIZE;
1039 	dr->ncookies = ncookies;
1040 
1041 	return 0;
1042 
1043 err_out:
1044 	vnet_port_free_tx_bufs(port);
1045 
1046 	return err;
1047 }
1048 
1049 static LIST_HEAD(vnet_list);
1050 static DEFINE_MUTEX(vnet_list_mutex);
1051 
1052 static const struct net_device_ops vnet_ops = {
1053 	.ndo_open		= vnet_open,
1054 	.ndo_stop		= vnet_close,
1055 	.ndo_set_rx_mode	= vnet_set_rx_mode,
1056 	.ndo_set_mac_address	= vnet_set_mac_addr,
1057 	.ndo_validate_addr	= eth_validate_addr,
1058 	.ndo_tx_timeout		= vnet_tx_timeout,
1059 	.ndo_change_mtu		= vnet_change_mtu,
1060 	.ndo_start_xmit		= vnet_start_xmit,
1061 };
1062 
1063 static struct vnet *vnet_new(const u64 *local_mac)
1064 {
1065 	struct net_device *dev;
1066 	struct vnet *vp;
1067 	int err, i;
1068 
1069 	dev = alloc_etherdev(sizeof(*vp));
1070 	if (!dev)
1071 		return ERR_PTR(-ENOMEM);
1072 
1073 	for (i = 0; i < ETH_ALEN; i++)
1074 		dev->dev_addr[i] = (*local_mac >> (5 - i) * 8) & 0xff;
1075 
1076 	vp = netdev_priv(dev);
1077 
1078 	spin_lock_init(&vp->lock);
1079 	tasklet_init(&vp->vnet_tx_wakeup, maybe_tx_wakeup, (unsigned long)vp);
1080 	vp->dev = dev;
1081 
1082 	INIT_LIST_HEAD(&vp->port_list);
1083 	for (i = 0; i < VNET_PORT_HASH_SIZE; i++)
1084 		INIT_HLIST_HEAD(&vp->port_hash[i]);
1085 	INIT_LIST_HEAD(&vp->list);
1086 	vp->local_mac = *local_mac;
1087 
1088 	dev->netdev_ops = &vnet_ops;
1089 	dev->ethtool_ops = &vnet_ethtool_ops;
1090 	dev->watchdog_timeo = VNET_TX_TIMEOUT;
1091 
1092 	err = register_netdev(dev);
1093 	if (err) {
1094 		pr_err("Cannot register net device, aborting\n");
1095 		goto err_out_free_dev;
1096 	}
1097 
1098 	netdev_info(dev, "Sun LDOM vnet %pM\n", dev->dev_addr);
1099 
1100 	list_add(&vp->list, &vnet_list);
1101 
1102 	return vp;
1103 
1104 err_out_free_dev:
1105 	free_netdev(dev);
1106 
1107 	return ERR_PTR(err);
1108 }
1109 
1110 static struct vnet *vnet_find_or_create(const u64 *local_mac)
1111 {
1112 	struct vnet *iter, *vp;
1113 
1114 	mutex_lock(&vnet_list_mutex);
1115 	vp = NULL;
1116 	list_for_each_entry(iter, &vnet_list, list) {
1117 		if (iter->local_mac == *local_mac) {
1118 			vp = iter;
1119 			break;
1120 		}
1121 	}
1122 	if (!vp)
1123 		vp = vnet_new(local_mac);
1124 	mutex_unlock(&vnet_list_mutex);
1125 
1126 	return vp;
1127 }
1128 
1129 static void vnet_cleanup(void)
1130 {
1131 	struct vnet *vp;
1132 	struct net_device *dev;
1133 
1134 	mutex_lock(&vnet_list_mutex);
1135 	while (!list_empty(&vnet_list)) {
1136 		vp = list_first_entry(&vnet_list, struct vnet, list);
1137 		list_del(&vp->list);
1138 		dev = vp->dev;
1139 		tasklet_kill(&vp->vnet_tx_wakeup);
1140 		/* vio_unregister_driver() should have cleaned up port_list */
1141 		BUG_ON(!list_empty(&vp->port_list));
1142 		unregister_netdev(dev);
1143 		free_netdev(dev);
1144 	}
1145 	mutex_unlock(&vnet_list_mutex);
1146 }
1147 
1148 static const char *local_mac_prop = "local-mac-address";
1149 
1150 static struct vnet *vnet_find_parent(struct mdesc_handle *hp,
1151 						u64 port_node)
1152 {
1153 	const u64 *local_mac = NULL;
1154 	u64 a;
1155 
1156 	mdesc_for_each_arc(a, hp, port_node, MDESC_ARC_TYPE_BACK) {
1157 		u64 target = mdesc_arc_target(hp, a);
1158 		const char *name;
1159 
1160 		name = mdesc_get_property(hp, target, "name", NULL);
1161 		if (!name || strcmp(name, "network"))
1162 			continue;
1163 
1164 		local_mac = mdesc_get_property(hp, target,
1165 					       local_mac_prop, NULL);
1166 		if (local_mac)
1167 			break;
1168 	}
1169 	if (!local_mac)
1170 		return ERR_PTR(-ENODEV);
1171 
1172 	return vnet_find_or_create(local_mac);
1173 }
1174 
1175 static struct ldc_channel_config vnet_ldc_cfg = {
1176 	.event		= vnet_event,
1177 	.mtu		= 64,
1178 	.mode		= LDC_MODE_UNRELIABLE,
1179 };
1180 
1181 static struct vio_driver_ops vnet_vio_ops = {
1182 	.send_attr		= vnet_send_attr,
1183 	.handle_attr		= vnet_handle_attr,
1184 	.handshake_complete	= vnet_handshake_complete,
1185 };
1186 
1187 static void print_version(void)
1188 {
1189 	printk_once(KERN_INFO "%s", version);
1190 }
1191 
1192 const char *remote_macaddr_prop = "remote-mac-address";
1193 
1194 static int vnet_port_probe(struct vio_dev *vdev, const struct vio_device_id *id)
1195 {
1196 	struct mdesc_handle *hp;
1197 	struct vnet_port *port;
1198 	unsigned long flags;
1199 	struct vnet *vp;
1200 	const u64 *rmac;
1201 	int len, i, err, switch_port;
1202 
1203 	print_version();
1204 
1205 	hp = mdesc_grab();
1206 
1207 	vp = vnet_find_parent(hp, vdev->mp);
1208 	if (IS_ERR(vp)) {
1209 		pr_err("Cannot find port parent vnet\n");
1210 		err = PTR_ERR(vp);
1211 		goto err_out_put_mdesc;
1212 	}
1213 
1214 	rmac = mdesc_get_property(hp, vdev->mp, remote_macaddr_prop, &len);
1215 	err = -ENODEV;
1216 	if (!rmac) {
1217 		pr_err("Port lacks %s property\n", remote_macaddr_prop);
1218 		goto err_out_put_mdesc;
1219 	}
1220 
1221 	port = kzalloc(sizeof(*port), GFP_KERNEL);
1222 	err = -ENOMEM;
1223 	if (!port)
1224 		goto err_out_put_mdesc;
1225 
1226 	for (i = 0; i < ETH_ALEN; i++)
1227 		port->raddr[i] = (*rmac >> (5 - i) * 8) & 0xff;
1228 
1229 	port->vp = vp;
1230 
1231 	err = vio_driver_init(&port->vio, vdev, VDEV_NETWORK,
1232 			      vnet_versions, ARRAY_SIZE(vnet_versions),
1233 			      &vnet_vio_ops, vp->dev->name);
1234 	if (err)
1235 		goto err_out_free_port;
1236 
1237 	err = vio_ldc_alloc(&port->vio, &vnet_ldc_cfg, port);
1238 	if (err)
1239 		goto err_out_free_port;
1240 
1241 	err = vnet_port_alloc_tx_bufs(port);
1242 	if (err)
1243 		goto err_out_free_ldc;
1244 
1245 	INIT_HLIST_NODE(&port->hash);
1246 	INIT_LIST_HEAD(&port->list);
1247 
1248 	switch_port = 0;
1249 	if (mdesc_get_property(hp, vdev->mp, "switch-port", NULL) != NULL)
1250 		switch_port = 1;
1251 	port->switch_port = switch_port;
1252 
1253 	spin_lock_irqsave(&vp->lock, flags);
1254 	if (switch_port)
1255 		list_add(&port->list, &vp->port_list);
1256 	else
1257 		list_add_tail(&port->list, &vp->port_list);
1258 	hlist_add_head(&port->hash, &vp->port_hash[vnet_hashfn(port->raddr)]);
1259 	spin_unlock_irqrestore(&vp->lock, flags);
1260 
1261 	dev_set_drvdata(&vdev->dev, port);
1262 
1263 	pr_info("%s: PORT ( remote-mac %pM%s )\n",
1264 		vp->dev->name, port->raddr, switch_port ? " switch-port" : "");
1265 
1266 	vio_port_up(&port->vio);
1267 
1268 	mdesc_release(hp);
1269 
1270 	return 0;
1271 
1272 err_out_free_ldc:
1273 	vio_ldc_free(&port->vio);
1274 
1275 err_out_free_port:
1276 	kfree(port);
1277 
1278 err_out_put_mdesc:
1279 	mdesc_release(hp);
1280 	return err;
1281 }
1282 
1283 static int vnet_port_remove(struct vio_dev *vdev)
1284 {
1285 	struct vnet_port *port = dev_get_drvdata(&vdev->dev);
1286 
1287 	if (port) {
1288 		struct vnet *vp = port->vp;
1289 		unsigned long flags;
1290 
1291 		del_timer_sync(&port->vio.timer);
1292 
1293 		spin_lock_irqsave(&vp->lock, flags);
1294 		list_del(&port->list);
1295 		hlist_del(&port->hash);
1296 		spin_unlock_irqrestore(&vp->lock, flags);
1297 
1298 		vnet_port_free_tx_bufs(port);
1299 		vio_ldc_free(&port->vio);
1300 
1301 		dev_set_drvdata(&vdev->dev, NULL);
1302 
1303 		kfree(port);
1304 
1305 	}
1306 	return 0;
1307 }
1308 
1309 static const struct vio_device_id vnet_port_match[] = {
1310 	{
1311 		.type = "vnet-port",
1312 	},
1313 	{},
1314 };
1315 MODULE_DEVICE_TABLE(vio, vnet_port_match);
1316 
1317 static struct vio_driver vnet_port_driver = {
1318 	.id_table	= vnet_port_match,
1319 	.probe		= vnet_port_probe,
1320 	.remove		= vnet_port_remove,
1321 	.name		= "vnet_port",
1322 };
1323 
1324 static int __init vnet_init(void)
1325 {
1326 	return vio_register_driver(&vnet_port_driver);
1327 }
1328 
1329 static void __exit vnet_exit(void)
1330 {
1331 	vio_unregister_driver(&vnet_port_driver);
1332 	vnet_cleanup();
1333 }
1334 
1335 module_init(vnet_init);
1336 module_exit(vnet_exit);
1337