xref: /openbmc/linux/arch/sparc/kernel/ldc.c (revision 85762a65)
1 /* ldc.c: Logical Domain Channel link-layer protocol driver.
2  *
3  * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
4  */
5 
6 #include <linux/kernel.h>
7 #include <linux/export.h>
8 #include <linux/slab.h>
9 #include <linux/spinlock.h>
10 #include <linux/delay.h>
11 #include <linux/errno.h>
12 #include <linux/string.h>
13 #include <linux/scatterlist.h>
14 #include <linux/interrupt.h>
15 #include <linux/list.h>
16 #include <linux/init.h>
17 #include <linux/bitmap.h>
18 #include <linux/iommu-common.h>
19 
20 #include <asm/hypervisor.h>
21 #include <asm/iommu.h>
22 #include <asm/page.h>
23 #include <asm/ldc.h>
24 #include <asm/mdesc.h>
25 
26 #define DRV_MODULE_NAME		"ldc"
27 #define PFX DRV_MODULE_NAME	": "
28 #define DRV_MODULE_VERSION	"1.1"
29 #define DRV_MODULE_RELDATE	"July 22, 2008"
30 
31 #define COOKIE_PGSZ_CODE	0xf000000000000000ULL
32 #define COOKIE_PGSZ_CODE_SHIFT	60ULL
33 
34 
35 static char version[] =
36 	DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
37 
38 /* Packet header layout for unreliable and reliable mode frames.
39  * When in RAW mode, packets are simply straight 64-byte payloads
40  * with no headers.
41  */
42 struct ldc_packet {
43 	u8			type;
44 #define LDC_CTRL		0x01
45 #define LDC_DATA		0x02
46 #define LDC_ERR			0x10
47 
48 	u8			stype;
49 #define LDC_INFO		0x01
50 #define LDC_ACK			0x02
51 #define LDC_NACK		0x04
52 
53 	u8			ctrl;
54 #define LDC_VERS		0x01 /* Link Version		*/
55 #define LDC_RTS			0x02 /* Request To Send		*/
56 #define LDC_RTR			0x03 /* Ready To Receive	*/
57 #define LDC_RDX			0x04 /* Ready for Data eXchange	*/
58 #define LDC_CTRL_MSK		0x0f
59 
60 	u8			env;
61 #define LDC_LEN			0x3f
62 #define LDC_FRAG_MASK		0xc0
63 #define LDC_START		0x40
64 #define LDC_STOP		0x80
65 
66 	u32			seqid;
67 
68 	union {
69 		u8		u_data[LDC_PACKET_SIZE - 8];
70 		struct {
71 			u32	pad;
72 			u32	ackid;
73 			u8	r_data[LDC_PACKET_SIZE - 8 - 8];
74 		} r;
75 	} u;
76 };
77 
78 struct ldc_version {
79 	u16 major;
80 	u16 minor;
81 };
82 
83 /* Ordered from largest major to lowest.  */
84 static struct ldc_version ver_arr[] = {
85 	{ .major = 1, .minor = 0 },
86 };
87 
88 #define LDC_DEFAULT_MTU			(4 * LDC_PACKET_SIZE)
89 #define LDC_DEFAULT_NUM_ENTRIES		(PAGE_SIZE / LDC_PACKET_SIZE)
90 
91 struct ldc_channel;
92 
93 struct ldc_mode_ops {
94 	int (*write)(struct ldc_channel *, const void *, unsigned int);
95 	int (*read)(struct ldc_channel *, void *, unsigned int);
96 };
97 
98 static const struct ldc_mode_ops raw_ops;
99 static const struct ldc_mode_ops nonraw_ops;
100 static const struct ldc_mode_ops stream_ops;
101 
102 int ldom_domaining_enabled;
103 
104 struct ldc_iommu {
105 	/* Protects ldc_unmap.  */
106 	spinlock_t			lock;
107 	struct ldc_mtable_entry		*page_table;
108 	struct iommu_map_table		iommu_map_table;
109 };
110 
111 struct ldc_channel {
112 	/* Protects all operations that depend upon channel state.  */
113 	spinlock_t			lock;
114 
115 	unsigned long			id;
116 
117 	u8				*mssbuf;
118 	u32				mssbuf_len;
119 	u32				mssbuf_off;
120 
121 	struct ldc_packet		*tx_base;
122 	unsigned long			tx_head;
123 	unsigned long			tx_tail;
124 	unsigned long			tx_num_entries;
125 	unsigned long			tx_ra;
126 
127 	unsigned long			tx_acked;
128 
129 	struct ldc_packet		*rx_base;
130 	unsigned long			rx_head;
131 	unsigned long			rx_tail;
132 	unsigned long			rx_num_entries;
133 	unsigned long			rx_ra;
134 
135 	u32				rcv_nxt;
136 	u32				snd_nxt;
137 
138 	unsigned long			chan_state;
139 
140 	struct ldc_channel_config	cfg;
141 	void				*event_arg;
142 
143 	const struct ldc_mode_ops	*mops;
144 
145 	struct ldc_iommu		iommu;
146 
147 	struct ldc_version		ver;
148 
149 	u8				hs_state;
150 #define LDC_HS_CLOSED			0x00
151 #define LDC_HS_OPEN			0x01
152 #define LDC_HS_GOTVERS			0x02
153 #define LDC_HS_SENTRTR			0x03
154 #define LDC_HS_GOTRTR			0x04
155 #define LDC_HS_COMPLETE			0x10
156 
157 	u8				flags;
158 #define LDC_FLAG_ALLOCED_QUEUES		0x01
159 #define LDC_FLAG_REGISTERED_QUEUES	0x02
160 #define LDC_FLAG_REGISTERED_IRQS	0x04
161 #define LDC_FLAG_RESET			0x10
162 
163 	u8				mss;
164 	u8				state;
165 
166 #define LDC_IRQ_NAME_MAX		32
167 	char				rx_irq_name[LDC_IRQ_NAME_MAX];
168 	char				tx_irq_name[LDC_IRQ_NAME_MAX];
169 
170 	struct hlist_head		mh_list;
171 
172 	struct hlist_node		list;
173 };
174 
175 #define ldcdbg(TYPE, f, a...) \
176 do {	if (lp->cfg.debug & LDC_DEBUG_##TYPE) \
177 		printk(KERN_INFO PFX "ID[%lu] " f, lp->id, ## a); \
178 } while (0)
179 
180 static const char *state_to_str(u8 state)
181 {
182 	switch (state) {
183 	case LDC_STATE_INVALID:
184 		return "INVALID";
185 	case LDC_STATE_INIT:
186 		return "INIT";
187 	case LDC_STATE_BOUND:
188 		return "BOUND";
189 	case LDC_STATE_READY:
190 		return "READY";
191 	case LDC_STATE_CONNECTED:
192 		return "CONNECTED";
193 	default:
194 		return "<UNKNOWN>";
195 	}
196 }
197 
198 static unsigned long __advance(unsigned long off, unsigned long num_entries)
199 {
200 	off += LDC_PACKET_SIZE;
201 	if (off == (num_entries * LDC_PACKET_SIZE))
202 		off = 0;
203 
204 	return off;
205 }
206 
207 static unsigned long rx_advance(struct ldc_channel *lp, unsigned long off)
208 {
209 	return __advance(off, lp->rx_num_entries);
210 }
211 
212 static unsigned long tx_advance(struct ldc_channel *lp, unsigned long off)
213 {
214 	return __advance(off, lp->tx_num_entries);
215 }
216 
217 static struct ldc_packet *handshake_get_tx_packet(struct ldc_channel *lp,
218 						  unsigned long *new_tail)
219 {
220 	struct ldc_packet *p;
221 	unsigned long t;
222 
223 	t = tx_advance(lp, lp->tx_tail);
224 	if (t == lp->tx_head)
225 		return NULL;
226 
227 	*new_tail = t;
228 
229 	p = lp->tx_base;
230 	return p + (lp->tx_tail / LDC_PACKET_SIZE);
231 }
232 
233 /* When we are in reliable or stream mode, have to track the next packet
234  * we haven't gotten an ACK for in the TX queue using tx_acked.  We have
235  * to be careful not to stomp over the queue past that point.  During
236  * the handshake, we don't have TX data packets pending in the queue
237  * and that's why handshake_get_tx_packet() need not be mindful of
238  * lp->tx_acked.
239  */
240 static unsigned long head_for_data(struct ldc_channel *lp)
241 {
242 	if (lp->cfg.mode == LDC_MODE_STREAM)
243 		return lp->tx_acked;
244 	return lp->tx_head;
245 }
246 
247 static int tx_has_space_for(struct ldc_channel *lp, unsigned int size)
248 {
249 	unsigned long limit, tail, new_tail, diff;
250 	unsigned int mss;
251 
252 	limit = head_for_data(lp);
253 	tail = lp->tx_tail;
254 	new_tail = tx_advance(lp, tail);
255 	if (new_tail == limit)
256 		return 0;
257 
258 	if (limit > new_tail)
259 		diff = limit - new_tail;
260 	else
261 		diff = (limit +
262 			((lp->tx_num_entries * LDC_PACKET_SIZE) - new_tail));
263 	diff /= LDC_PACKET_SIZE;
264 	mss = lp->mss;
265 
266 	if (diff * mss < size)
267 		return 0;
268 
269 	return 1;
270 }
271 
272 static struct ldc_packet *data_get_tx_packet(struct ldc_channel *lp,
273 					     unsigned long *new_tail)
274 {
275 	struct ldc_packet *p;
276 	unsigned long h, t;
277 
278 	h = head_for_data(lp);
279 	t = tx_advance(lp, lp->tx_tail);
280 	if (t == h)
281 		return NULL;
282 
283 	*new_tail = t;
284 
285 	p = lp->tx_base;
286 	return p + (lp->tx_tail / LDC_PACKET_SIZE);
287 }
288 
289 static int set_tx_tail(struct ldc_channel *lp, unsigned long tail)
290 {
291 	unsigned long orig_tail = lp->tx_tail;
292 	int limit = 1000;
293 
294 	lp->tx_tail = tail;
295 	while (limit-- > 0) {
296 		unsigned long err;
297 
298 		err = sun4v_ldc_tx_set_qtail(lp->id, tail);
299 		if (!err)
300 			return 0;
301 
302 		if (err != HV_EWOULDBLOCK) {
303 			lp->tx_tail = orig_tail;
304 			return -EINVAL;
305 		}
306 		udelay(1);
307 	}
308 
309 	lp->tx_tail = orig_tail;
310 	return -EBUSY;
311 }
312 
313 /* This just updates the head value in the hypervisor using
314  * a polling loop with a timeout.  The caller takes care of
315  * upating software state representing the head change, if any.
316  */
317 static int __set_rx_head(struct ldc_channel *lp, unsigned long head)
318 {
319 	int limit = 1000;
320 
321 	while (limit-- > 0) {
322 		unsigned long err;
323 
324 		err = sun4v_ldc_rx_set_qhead(lp->id, head);
325 		if (!err)
326 			return 0;
327 
328 		if (err != HV_EWOULDBLOCK)
329 			return -EINVAL;
330 
331 		udelay(1);
332 	}
333 
334 	return -EBUSY;
335 }
336 
337 static int send_tx_packet(struct ldc_channel *lp,
338 			  struct ldc_packet *p,
339 			  unsigned long new_tail)
340 {
341 	BUG_ON(p != (lp->tx_base + (lp->tx_tail / LDC_PACKET_SIZE)));
342 
343 	return set_tx_tail(lp, new_tail);
344 }
345 
346 static struct ldc_packet *handshake_compose_ctrl(struct ldc_channel *lp,
347 						 u8 stype, u8 ctrl,
348 						 void *data, int dlen,
349 						 unsigned long *new_tail)
350 {
351 	struct ldc_packet *p = handshake_get_tx_packet(lp, new_tail);
352 
353 	if (p) {
354 		memset(p, 0, sizeof(*p));
355 		p->type = LDC_CTRL;
356 		p->stype = stype;
357 		p->ctrl = ctrl;
358 		if (data)
359 			memcpy(p->u.u_data, data, dlen);
360 	}
361 	return p;
362 }
363 
364 static int start_handshake(struct ldc_channel *lp)
365 {
366 	struct ldc_packet *p;
367 	struct ldc_version *ver;
368 	unsigned long new_tail;
369 
370 	ver = &ver_arr[0];
371 
372 	ldcdbg(HS, "SEND VER INFO maj[%u] min[%u]\n",
373 	       ver->major, ver->minor);
374 
375 	p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
376 				   ver, sizeof(*ver), &new_tail);
377 	if (p) {
378 		int err = send_tx_packet(lp, p, new_tail);
379 		if (!err)
380 			lp->flags &= ~LDC_FLAG_RESET;
381 		return err;
382 	}
383 	return -EBUSY;
384 }
385 
386 static int send_version_nack(struct ldc_channel *lp,
387 			     u16 major, u16 minor)
388 {
389 	struct ldc_packet *p;
390 	struct ldc_version ver;
391 	unsigned long new_tail;
392 
393 	ver.major = major;
394 	ver.minor = minor;
395 
396 	p = handshake_compose_ctrl(lp, LDC_NACK, LDC_VERS,
397 				   &ver, sizeof(ver), &new_tail);
398 	if (p) {
399 		ldcdbg(HS, "SEND VER NACK maj[%u] min[%u]\n",
400 		       ver.major, ver.minor);
401 
402 		return send_tx_packet(lp, p, new_tail);
403 	}
404 	return -EBUSY;
405 }
406 
407 static int send_version_ack(struct ldc_channel *lp,
408 			    struct ldc_version *vp)
409 {
410 	struct ldc_packet *p;
411 	unsigned long new_tail;
412 
413 	p = handshake_compose_ctrl(lp, LDC_ACK, LDC_VERS,
414 				   vp, sizeof(*vp), &new_tail);
415 	if (p) {
416 		ldcdbg(HS, "SEND VER ACK maj[%u] min[%u]\n",
417 		       vp->major, vp->minor);
418 
419 		return send_tx_packet(lp, p, new_tail);
420 	}
421 	return -EBUSY;
422 }
423 
424 static int send_rts(struct ldc_channel *lp)
425 {
426 	struct ldc_packet *p;
427 	unsigned long new_tail;
428 
429 	p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTS, NULL, 0,
430 				   &new_tail);
431 	if (p) {
432 		p->env = lp->cfg.mode;
433 		p->seqid = 0;
434 		lp->rcv_nxt = 0;
435 
436 		ldcdbg(HS, "SEND RTS env[0x%x] seqid[0x%x]\n",
437 		       p->env, p->seqid);
438 
439 		return send_tx_packet(lp, p, new_tail);
440 	}
441 	return -EBUSY;
442 }
443 
444 static int send_rtr(struct ldc_channel *lp)
445 {
446 	struct ldc_packet *p;
447 	unsigned long new_tail;
448 
449 	p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTR, NULL, 0,
450 				   &new_tail);
451 	if (p) {
452 		p->env = lp->cfg.mode;
453 		p->seqid = 0;
454 
455 		ldcdbg(HS, "SEND RTR env[0x%x] seqid[0x%x]\n",
456 		       p->env, p->seqid);
457 
458 		return send_tx_packet(lp, p, new_tail);
459 	}
460 	return -EBUSY;
461 }
462 
463 static int send_rdx(struct ldc_channel *lp)
464 {
465 	struct ldc_packet *p;
466 	unsigned long new_tail;
467 
468 	p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RDX, NULL, 0,
469 				   &new_tail);
470 	if (p) {
471 		p->env = 0;
472 		p->seqid = ++lp->snd_nxt;
473 		p->u.r.ackid = lp->rcv_nxt;
474 
475 		ldcdbg(HS, "SEND RDX env[0x%x] seqid[0x%x] ackid[0x%x]\n",
476 		       p->env, p->seqid, p->u.r.ackid);
477 
478 		return send_tx_packet(lp, p, new_tail);
479 	}
480 	return -EBUSY;
481 }
482 
483 static int send_data_nack(struct ldc_channel *lp, struct ldc_packet *data_pkt)
484 {
485 	struct ldc_packet *p;
486 	unsigned long new_tail;
487 	int err;
488 
489 	p = data_get_tx_packet(lp, &new_tail);
490 	if (!p)
491 		return -EBUSY;
492 	memset(p, 0, sizeof(*p));
493 	p->type = data_pkt->type;
494 	p->stype = LDC_NACK;
495 	p->ctrl = data_pkt->ctrl & LDC_CTRL_MSK;
496 	p->seqid = lp->snd_nxt + 1;
497 	p->u.r.ackid = lp->rcv_nxt;
498 
499 	ldcdbg(HS, "SEND DATA NACK type[0x%x] ctl[0x%x] seq[0x%x] ack[0x%x]\n",
500 	       p->type, p->ctrl, p->seqid, p->u.r.ackid);
501 
502 	err = send_tx_packet(lp, p, new_tail);
503 	if (!err)
504 		lp->snd_nxt++;
505 
506 	return err;
507 }
508 
509 static int ldc_abort(struct ldc_channel *lp)
510 {
511 	unsigned long hv_err;
512 
513 	ldcdbg(STATE, "ABORT\n");
514 
515 	/* We report but do not act upon the hypervisor errors because
516 	 * there really isn't much we can do if they fail at this point.
517 	 */
518 	hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
519 	if (hv_err)
520 		printk(KERN_ERR PFX "ldc_abort: "
521 		       "sun4v_ldc_tx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
522 		       lp->id, lp->tx_ra, lp->tx_num_entries, hv_err);
523 
524 	hv_err = sun4v_ldc_tx_get_state(lp->id,
525 					&lp->tx_head,
526 					&lp->tx_tail,
527 					&lp->chan_state);
528 	if (hv_err)
529 		printk(KERN_ERR PFX "ldc_abort: "
530 		       "sun4v_ldc_tx_get_state(%lx,...) failed, err=%lu\n",
531 		       lp->id, hv_err);
532 
533 	hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
534 	if (hv_err)
535 		printk(KERN_ERR PFX "ldc_abort: "
536 		       "sun4v_ldc_rx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
537 		       lp->id, lp->rx_ra, lp->rx_num_entries, hv_err);
538 
539 	/* Refetch the RX queue state as well, because we could be invoked
540 	 * here in the queue processing context.
541 	 */
542 	hv_err = sun4v_ldc_rx_get_state(lp->id,
543 					&lp->rx_head,
544 					&lp->rx_tail,
545 					&lp->chan_state);
546 	if (hv_err)
547 		printk(KERN_ERR PFX "ldc_abort: "
548 		       "sun4v_ldc_rx_get_state(%lx,...) failed, err=%lu\n",
549 		       lp->id, hv_err);
550 
551 	return -ECONNRESET;
552 }
553 
554 static struct ldc_version *find_by_major(u16 major)
555 {
556 	struct ldc_version *ret = NULL;
557 	int i;
558 
559 	for (i = 0; i < ARRAY_SIZE(ver_arr); i++) {
560 		struct ldc_version *v = &ver_arr[i];
561 		if (v->major <= major) {
562 			ret = v;
563 			break;
564 		}
565 	}
566 	return ret;
567 }
568 
569 static int process_ver_info(struct ldc_channel *lp, struct ldc_version *vp)
570 {
571 	struct ldc_version *vap;
572 	int err;
573 
574 	ldcdbg(HS, "GOT VERSION INFO major[%x] minor[%x]\n",
575 	       vp->major, vp->minor);
576 
577 	if (lp->hs_state == LDC_HS_GOTVERS) {
578 		lp->hs_state = LDC_HS_OPEN;
579 		memset(&lp->ver, 0, sizeof(lp->ver));
580 	}
581 
582 	vap = find_by_major(vp->major);
583 	if (!vap) {
584 		err = send_version_nack(lp, 0, 0);
585 	} else if (vap->major != vp->major) {
586 		err = send_version_nack(lp, vap->major, vap->minor);
587 	} else {
588 		struct ldc_version ver = *vp;
589 		if (ver.minor > vap->minor)
590 			ver.minor = vap->minor;
591 		err = send_version_ack(lp, &ver);
592 		if (!err) {
593 			lp->ver = ver;
594 			lp->hs_state = LDC_HS_GOTVERS;
595 		}
596 	}
597 	if (err)
598 		return ldc_abort(lp);
599 
600 	return 0;
601 }
602 
603 static int process_ver_ack(struct ldc_channel *lp, struct ldc_version *vp)
604 {
605 	ldcdbg(HS, "GOT VERSION ACK major[%x] minor[%x]\n",
606 	       vp->major, vp->minor);
607 
608 	if (lp->hs_state == LDC_HS_GOTVERS) {
609 		if (lp->ver.major != vp->major ||
610 		    lp->ver.minor != vp->minor)
611 			return ldc_abort(lp);
612 	} else {
613 		lp->ver = *vp;
614 		lp->hs_state = LDC_HS_GOTVERS;
615 	}
616 	if (send_rts(lp))
617 		return ldc_abort(lp);
618 	return 0;
619 }
620 
621 static int process_ver_nack(struct ldc_channel *lp, struct ldc_version *vp)
622 {
623 	struct ldc_version *vap;
624 	struct ldc_packet *p;
625 	unsigned long new_tail;
626 
627 	if (vp->major == 0 && vp->minor == 0)
628 		return ldc_abort(lp);
629 
630 	vap = find_by_major(vp->major);
631 	if (!vap)
632 		return ldc_abort(lp);
633 
634 	p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
635 					   vap, sizeof(*vap),
636 					   &new_tail);
637 	if (!p)
638 		return ldc_abort(lp);
639 
640 	return send_tx_packet(lp, p, new_tail);
641 }
642 
643 static int process_version(struct ldc_channel *lp,
644 			   struct ldc_packet *p)
645 {
646 	struct ldc_version *vp;
647 
648 	vp = (struct ldc_version *) p->u.u_data;
649 
650 	switch (p->stype) {
651 	case LDC_INFO:
652 		return process_ver_info(lp, vp);
653 
654 	case LDC_ACK:
655 		return process_ver_ack(lp, vp);
656 
657 	case LDC_NACK:
658 		return process_ver_nack(lp, vp);
659 
660 	default:
661 		return ldc_abort(lp);
662 	}
663 }
664 
665 static int process_rts(struct ldc_channel *lp,
666 		       struct ldc_packet *p)
667 {
668 	ldcdbg(HS, "GOT RTS stype[%x] seqid[%x] env[%x]\n",
669 	       p->stype, p->seqid, p->env);
670 
671 	if (p->stype     != LDC_INFO	   ||
672 	    lp->hs_state != LDC_HS_GOTVERS ||
673 	    p->env       != lp->cfg.mode)
674 		return ldc_abort(lp);
675 
676 	lp->snd_nxt = p->seqid;
677 	lp->rcv_nxt = p->seqid;
678 	lp->hs_state = LDC_HS_SENTRTR;
679 	if (send_rtr(lp))
680 		return ldc_abort(lp);
681 
682 	return 0;
683 }
684 
685 static int process_rtr(struct ldc_channel *lp,
686 		       struct ldc_packet *p)
687 {
688 	ldcdbg(HS, "GOT RTR stype[%x] seqid[%x] env[%x]\n",
689 	       p->stype, p->seqid, p->env);
690 
691 	if (p->stype     != LDC_INFO ||
692 	    p->env       != lp->cfg.mode)
693 		return ldc_abort(lp);
694 
695 	lp->snd_nxt = p->seqid;
696 	lp->hs_state = LDC_HS_COMPLETE;
697 	ldc_set_state(lp, LDC_STATE_CONNECTED);
698 	send_rdx(lp);
699 
700 	return LDC_EVENT_UP;
701 }
702 
703 static int rx_seq_ok(struct ldc_channel *lp, u32 seqid)
704 {
705 	return lp->rcv_nxt + 1 == seqid;
706 }
707 
708 static int process_rdx(struct ldc_channel *lp,
709 		       struct ldc_packet *p)
710 {
711 	ldcdbg(HS, "GOT RDX stype[%x] seqid[%x] env[%x] ackid[%x]\n",
712 	       p->stype, p->seqid, p->env, p->u.r.ackid);
713 
714 	if (p->stype != LDC_INFO ||
715 	    !(rx_seq_ok(lp, p->seqid)))
716 		return ldc_abort(lp);
717 
718 	lp->rcv_nxt = p->seqid;
719 
720 	lp->hs_state = LDC_HS_COMPLETE;
721 	ldc_set_state(lp, LDC_STATE_CONNECTED);
722 
723 	return LDC_EVENT_UP;
724 }
725 
726 static int process_control_frame(struct ldc_channel *lp,
727 				 struct ldc_packet *p)
728 {
729 	switch (p->ctrl) {
730 	case LDC_VERS:
731 		return process_version(lp, p);
732 
733 	case LDC_RTS:
734 		return process_rts(lp, p);
735 
736 	case LDC_RTR:
737 		return process_rtr(lp, p);
738 
739 	case LDC_RDX:
740 		return process_rdx(lp, p);
741 
742 	default:
743 		return ldc_abort(lp);
744 	}
745 }
746 
747 static int process_error_frame(struct ldc_channel *lp,
748 			       struct ldc_packet *p)
749 {
750 	return ldc_abort(lp);
751 }
752 
753 static int process_data_ack(struct ldc_channel *lp,
754 			    struct ldc_packet *ack)
755 {
756 	unsigned long head = lp->tx_acked;
757 	u32 ackid = ack->u.r.ackid;
758 
759 	while (1) {
760 		struct ldc_packet *p = lp->tx_base + (head / LDC_PACKET_SIZE);
761 
762 		head = tx_advance(lp, head);
763 
764 		if (p->seqid == ackid) {
765 			lp->tx_acked = head;
766 			return 0;
767 		}
768 		if (head == lp->tx_tail)
769 			return ldc_abort(lp);
770 	}
771 
772 	return 0;
773 }
774 
775 static void send_events(struct ldc_channel *lp, unsigned int event_mask)
776 {
777 	if (event_mask & LDC_EVENT_RESET)
778 		lp->cfg.event(lp->event_arg, LDC_EVENT_RESET);
779 	if (event_mask & LDC_EVENT_UP)
780 		lp->cfg.event(lp->event_arg, LDC_EVENT_UP);
781 	if (event_mask & LDC_EVENT_DATA_READY)
782 		lp->cfg.event(lp->event_arg, LDC_EVENT_DATA_READY);
783 }
784 
785 static irqreturn_t ldc_rx(int irq, void *dev_id)
786 {
787 	struct ldc_channel *lp = dev_id;
788 	unsigned long orig_state, flags;
789 	unsigned int event_mask;
790 
791 	spin_lock_irqsave(&lp->lock, flags);
792 
793 	orig_state = lp->chan_state;
794 
795 	/* We should probably check for hypervisor errors here and
796 	 * reset the LDC channel if we get one.
797 	 */
798 	sun4v_ldc_rx_get_state(lp->id,
799 			       &lp->rx_head,
800 			       &lp->rx_tail,
801 			       &lp->chan_state);
802 
803 	ldcdbg(RX, "RX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
804 	       orig_state, lp->chan_state, lp->rx_head, lp->rx_tail);
805 
806 	event_mask = 0;
807 
808 	if (lp->cfg.mode == LDC_MODE_RAW &&
809 	    lp->chan_state == LDC_CHANNEL_UP) {
810 		lp->hs_state = LDC_HS_COMPLETE;
811 		ldc_set_state(lp, LDC_STATE_CONNECTED);
812 
813 		event_mask |= LDC_EVENT_UP;
814 
815 		orig_state = lp->chan_state;
816 	}
817 
818 	/* If we are in reset state, flush the RX queue and ignore
819 	 * everything.
820 	 */
821 	if (lp->flags & LDC_FLAG_RESET) {
822 		(void) ldc_rx_reset(lp);
823 		goto out;
824 	}
825 
826 	/* Once we finish the handshake, we let the ldc_read()
827 	 * paths do all of the control frame and state management.
828 	 * Just trigger the callback.
829 	 */
830 	if (lp->hs_state == LDC_HS_COMPLETE) {
831 handshake_complete:
832 		if (lp->chan_state != orig_state) {
833 			unsigned int event = LDC_EVENT_RESET;
834 
835 			if (lp->chan_state == LDC_CHANNEL_UP)
836 				event = LDC_EVENT_UP;
837 
838 			event_mask |= event;
839 		}
840 		if (lp->rx_head != lp->rx_tail)
841 			event_mask |= LDC_EVENT_DATA_READY;
842 
843 		goto out;
844 	}
845 
846 	if (lp->chan_state != orig_state)
847 		goto out;
848 
849 	while (lp->rx_head != lp->rx_tail) {
850 		struct ldc_packet *p;
851 		unsigned long new;
852 		int err;
853 
854 		p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
855 
856 		switch (p->type) {
857 		case LDC_CTRL:
858 			err = process_control_frame(lp, p);
859 			if (err > 0)
860 				event_mask |= err;
861 			break;
862 
863 		case LDC_DATA:
864 			event_mask |= LDC_EVENT_DATA_READY;
865 			err = 0;
866 			break;
867 
868 		case LDC_ERR:
869 			err = process_error_frame(lp, p);
870 			break;
871 
872 		default:
873 			err = ldc_abort(lp);
874 			break;
875 		}
876 
877 		if (err < 0)
878 			break;
879 
880 		new = lp->rx_head;
881 		new += LDC_PACKET_SIZE;
882 		if (new == (lp->rx_num_entries * LDC_PACKET_SIZE))
883 			new = 0;
884 		lp->rx_head = new;
885 
886 		err = __set_rx_head(lp, new);
887 		if (err < 0) {
888 			(void) ldc_abort(lp);
889 			break;
890 		}
891 		if (lp->hs_state == LDC_HS_COMPLETE)
892 			goto handshake_complete;
893 	}
894 
895 out:
896 	spin_unlock_irqrestore(&lp->lock, flags);
897 
898 	send_events(lp, event_mask);
899 
900 	return IRQ_HANDLED;
901 }
902 
903 static irqreturn_t ldc_tx(int irq, void *dev_id)
904 {
905 	struct ldc_channel *lp = dev_id;
906 	unsigned long flags, orig_state;
907 	unsigned int event_mask = 0;
908 
909 	spin_lock_irqsave(&lp->lock, flags);
910 
911 	orig_state = lp->chan_state;
912 
913 	/* We should probably check for hypervisor errors here and
914 	 * reset the LDC channel if we get one.
915 	 */
916 	sun4v_ldc_tx_get_state(lp->id,
917 			       &lp->tx_head,
918 			       &lp->tx_tail,
919 			       &lp->chan_state);
920 
921 	ldcdbg(TX, " TX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
922 	       orig_state, lp->chan_state, lp->tx_head, lp->tx_tail);
923 
924 	if (lp->cfg.mode == LDC_MODE_RAW &&
925 	    lp->chan_state == LDC_CHANNEL_UP) {
926 		lp->hs_state = LDC_HS_COMPLETE;
927 		ldc_set_state(lp, LDC_STATE_CONNECTED);
928 
929 		event_mask |= LDC_EVENT_UP;
930 	}
931 
932 	spin_unlock_irqrestore(&lp->lock, flags);
933 
934 	send_events(lp, event_mask);
935 
936 	return IRQ_HANDLED;
937 }
938 
939 /* XXX ldc_alloc() and ldc_free() needs to run under a mutex so
940  * XXX that addition and removal from the ldc_channel_list has
941  * XXX atomicity, otherwise the __ldc_channel_exists() check is
942  * XXX totally pointless as another thread can slip into ldc_alloc()
943  * XXX and add a channel with the same ID.  There also needs to be
944  * XXX a spinlock for ldc_channel_list.
945  */
946 static HLIST_HEAD(ldc_channel_list);
947 
948 static int __ldc_channel_exists(unsigned long id)
949 {
950 	struct ldc_channel *lp;
951 
952 	hlist_for_each_entry(lp, &ldc_channel_list, list) {
953 		if (lp->id == id)
954 			return 1;
955 	}
956 	return 0;
957 }
958 
959 static int alloc_queue(const char *name, unsigned long num_entries,
960 		       struct ldc_packet **base, unsigned long *ra)
961 {
962 	unsigned long size, order;
963 	void *q;
964 
965 	size = num_entries * LDC_PACKET_SIZE;
966 	order = get_order(size);
967 
968 	q = (void *) __get_free_pages(GFP_KERNEL, order);
969 	if (!q) {
970 		printk(KERN_ERR PFX "Alloc of %s queue failed with "
971 		       "size=%lu order=%lu\n", name, size, order);
972 		return -ENOMEM;
973 	}
974 
975 	memset(q, 0, PAGE_SIZE << order);
976 
977 	*base = q;
978 	*ra = __pa(q);
979 
980 	return 0;
981 }
982 
983 static void free_queue(unsigned long num_entries, struct ldc_packet *q)
984 {
985 	unsigned long size, order;
986 
987 	if (!q)
988 		return;
989 
990 	size = num_entries * LDC_PACKET_SIZE;
991 	order = get_order(size);
992 
993 	free_pages((unsigned long)q, order);
994 }
995 
996 static unsigned long ldc_cookie_to_index(u64 cookie, void *arg)
997 {
998 	u64 szcode = cookie >> COOKIE_PGSZ_CODE_SHIFT;
999 	/* struct ldc_iommu *ldc_iommu = (struct ldc_iommu *)arg; */
1000 
1001 	cookie &= ~COOKIE_PGSZ_CODE;
1002 
1003 	return (cookie >> (13ULL + (szcode * 3ULL)));
1004 }
1005 
1006 static void ldc_demap(struct ldc_iommu *iommu, unsigned long id, u64 cookie,
1007 		      unsigned long entry, unsigned long npages)
1008 {
1009 	struct ldc_mtable_entry *base;
1010 	unsigned long i, shift;
1011 
1012 	shift = (cookie >> COOKIE_PGSZ_CODE_SHIFT) * 3;
1013 	base = iommu->page_table + entry;
1014 	for (i = 0; i < npages; i++) {
1015 		if (base->cookie)
1016 			sun4v_ldc_revoke(id, cookie + (i << shift),
1017 					 base->cookie);
1018 		base->mte = 0;
1019 	}
1020 }
1021 
1022 /* XXX Make this configurable... XXX */
1023 #define LDC_IOTABLE_SIZE	(8 * 1024)
1024 
1025 static int ldc_iommu_init(const char *name, struct ldc_channel *lp)
1026 {
1027 	unsigned long sz, num_tsb_entries, tsbsize, order;
1028 	struct ldc_iommu *ldc_iommu = &lp->iommu;
1029 	struct iommu_map_table *iommu = &ldc_iommu->iommu_map_table;
1030 	struct ldc_mtable_entry *table;
1031 	unsigned long hv_err;
1032 	int err;
1033 
1034 	num_tsb_entries = LDC_IOTABLE_SIZE;
1035 	tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
1036 	spin_lock_init(&ldc_iommu->lock);
1037 
1038 	sz = num_tsb_entries / 8;
1039 	sz = (sz + 7UL) & ~7UL;
1040 	iommu->map = kzalloc(sz, GFP_KERNEL);
1041 	if (!iommu->map) {
1042 		printk(KERN_ERR PFX "Alloc of arena map failed, sz=%lu\n", sz);
1043 		return -ENOMEM;
1044 	}
1045 	iommu_tbl_pool_init(iommu, num_tsb_entries, PAGE_SHIFT,
1046 			    NULL, false /* no large pool */,
1047 			    1 /* npools */,
1048 			    true /* skip span boundary check */);
1049 
1050 	order = get_order(tsbsize);
1051 
1052 	table = (struct ldc_mtable_entry *)
1053 		__get_free_pages(GFP_KERNEL, order);
1054 	err = -ENOMEM;
1055 	if (!table) {
1056 		printk(KERN_ERR PFX "Alloc of MTE table failed, "
1057 		       "size=%lu order=%lu\n", tsbsize, order);
1058 		goto out_free_map;
1059 	}
1060 
1061 	memset(table, 0, PAGE_SIZE << order);
1062 
1063 	ldc_iommu->page_table = table;
1064 
1065 	hv_err = sun4v_ldc_set_map_table(lp->id, __pa(table),
1066 					 num_tsb_entries);
1067 	err = -EINVAL;
1068 	if (hv_err)
1069 		goto out_free_table;
1070 
1071 	return 0;
1072 
1073 out_free_table:
1074 	free_pages((unsigned long) table, order);
1075 	ldc_iommu->page_table = NULL;
1076 
1077 out_free_map:
1078 	kfree(iommu->map);
1079 	iommu->map = NULL;
1080 
1081 	return err;
1082 }
1083 
1084 static void ldc_iommu_release(struct ldc_channel *lp)
1085 {
1086 	struct ldc_iommu *ldc_iommu = &lp->iommu;
1087 	struct iommu_map_table *iommu = &ldc_iommu->iommu_map_table;
1088 	unsigned long num_tsb_entries, tsbsize, order;
1089 
1090 	(void) sun4v_ldc_set_map_table(lp->id, 0, 0);
1091 
1092 	num_tsb_entries = iommu->poolsize * iommu->nr_pools;
1093 	tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
1094 	order = get_order(tsbsize);
1095 
1096 	free_pages((unsigned long) ldc_iommu->page_table, order);
1097 	ldc_iommu->page_table = NULL;
1098 
1099 	kfree(iommu->map);
1100 	iommu->map = NULL;
1101 }
1102 
1103 struct ldc_channel *ldc_alloc(unsigned long id,
1104 			      const struct ldc_channel_config *cfgp,
1105 			      void *event_arg,
1106 			      const char *name)
1107 {
1108 	struct ldc_channel *lp;
1109 	const struct ldc_mode_ops *mops;
1110 	unsigned long dummy1, dummy2, hv_err;
1111 	u8 mss, *mssbuf;
1112 	int err;
1113 
1114 	err = -ENODEV;
1115 	if (!ldom_domaining_enabled)
1116 		goto out_err;
1117 
1118 	err = -EINVAL;
1119 	if (!cfgp)
1120 		goto out_err;
1121 	if (!name)
1122 		goto out_err;
1123 
1124 	switch (cfgp->mode) {
1125 	case LDC_MODE_RAW:
1126 		mops = &raw_ops;
1127 		mss = LDC_PACKET_SIZE;
1128 		break;
1129 
1130 	case LDC_MODE_UNRELIABLE:
1131 		mops = &nonraw_ops;
1132 		mss = LDC_PACKET_SIZE - 8;
1133 		break;
1134 
1135 	case LDC_MODE_STREAM:
1136 		mops = &stream_ops;
1137 		mss = LDC_PACKET_SIZE - 8 - 8;
1138 		break;
1139 
1140 	default:
1141 		goto out_err;
1142 	}
1143 
1144 	if (!cfgp->event || !event_arg || !cfgp->rx_irq || !cfgp->tx_irq)
1145 		goto out_err;
1146 
1147 	hv_err = sun4v_ldc_tx_qinfo(id, &dummy1, &dummy2);
1148 	err = -ENODEV;
1149 	if (hv_err == HV_ECHANNEL)
1150 		goto out_err;
1151 
1152 	err = -EEXIST;
1153 	if (__ldc_channel_exists(id))
1154 		goto out_err;
1155 
1156 	mssbuf = NULL;
1157 
1158 	lp = kzalloc(sizeof(*lp), GFP_KERNEL);
1159 	err = -ENOMEM;
1160 	if (!lp)
1161 		goto out_err;
1162 
1163 	spin_lock_init(&lp->lock);
1164 
1165 	lp->id = id;
1166 
1167 	err = ldc_iommu_init(name, lp);
1168 	if (err)
1169 		goto out_free_ldc;
1170 
1171 	lp->mops = mops;
1172 	lp->mss = mss;
1173 
1174 	lp->cfg = *cfgp;
1175 	if (!lp->cfg.mtu)
1176 		lp->cfg.mtu = LDC_DEFAULT_MTU;
1177 
1178 	if (lp->cfg.mode == LDC_MODE_STREAM) {
1179 		mssbuf = kzalloc(lp->cfg.mtu, GFP_KERNEL);
1180 		if (!mssbuf) {
1181 			err = -ENOMEM;
1182 			goto out_free_iommu;
1183 		}
1184 		lp->mssbuf = mssbuf;
1185 	}
1186 
1187 	lp->event_arg = event_arg;
1188 
1189 	/* XXX allow setting via ldc_channel_config to override defaults
1190 	 * XXX or use some formula based upon mtu
1191 	 */
1192 	lp->tx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
1193 	lp->rx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
1194 
1195 	err = alloc_queue("TX", lp->tx_num_entries,
1196 			  &lp->tx_base, &lp->tx_ra);
1197 	if (err)
1198 		goto out_free_mssbuf;
1199 
1200 	err = alloc_queue("RX", lp->rx_num_entries,
1201 			  &lp->rx_base, &lp->rx_ra);
1202 	if (err)
1203 		goto out_free_txq;
1204 
1205 	lp->flags |= LDC_FLAG_ALLOCED_QUEUES;
1206 
1207 	lp->hs_state = LDC_HS_CLOSED;
1208 	ldc_set_state(lp, LDC_STATE_INIT);
1209 
1210 	INIT_HLIST_NODE(&lp->list);
1211 	hlist_add_head(&lp->list, &ldc_channel_list);
1212 
1213 	INIT_HLIST_HEAD(&lp->mh_list);
1214 
1215 	snprintf(lp->rx_irq_name, LDC_IRQ_NAME_MAX, "%s RX", name);
1216 	snprintf(lp->tx_irq_name, LDC_IRQ_NAME_MAX, "%s TX", name);
1217 
1218 	err = request_irq(lp->cfg.rx_irq, ldc_rx, 0,
1219 			  lp->rx_irq_name, lp);
1220 	if (err)
1221 		goto out_free_txq;
1222 
1223 	err = request_irq(lp->cfg.tx_irq, ldc_tx, 0,
1224 			  lp->tx_irq_name, lp);
1225 	if (err) {
1226 		free_irq(lp->cfg.rx_irq, lp);
1227 		goto out_free_txq;
1228 	}
1229 
1230 	return lp;
1231 
1232 out_free_txq:
1233 	free_queue(lp->tx_num_entries, lp->tx_base);
1234 
1235 out_free_mssbuf:
1236 	kfree(mssbuf);
1237 
1238 out_free_iommu:
1239 	ldc_iommu_release(lp);
1240 
1241 out_free_ldc:
1242 	kfree(lp);
1243 
1244 out_err:
1245 	return ERR_PTR(err);
1246 }
1247 EXPORT_SYMBOL(ldc_alloc);
1248 
1249 void ldc_unbind(struct ldc_channel *lp)
1250 {
1251 	if (lp->flags & LDC_FLAG_REGISTERED_IRQS) {
1252 		free_irq(lp->cfg.rx_irq, lp);
1253 		free_irq(lp->cfg.tx_irq, lp);
1254 		lp->flags &= ~LDC_FLAG_REGISTERED_IRQS;
1255 	}
1256 
1257 	if (lp->flags & LDC_FLAG_REGISTERED_QUEUES) {
1258 		sun4v_ldc_tx_qconf(lp->id, 0, 0);
1259 		sun4v_ldc_rx_qconf(lp->id, 0, 0);
1260 		lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
1261 	}
1262 	if (lp->flags & LDC_FLAG_ALLOCED_QUEUES) {
1263 		free_queue(lp->tx_num_entries, lp->tx_base);
1264 		free_queue(lp->rx_num_entries, lp->rx_base);
1265 		lp->flags &= ~LDC_FLAG_ALLOCED_QUEUES;
1266 	}
1267 
1268 	ldc_set_state(lp, LDC_STATE_INIT);
1269 }
1270 EXPORT_SYMBOL(ldc_unbind);
1271 
1272 void ldc_free(struct ldc_channel *lp)
1273 {
1274 	ldc_unbind(lp);
1275 	hlist_del(&lp->list);
1276 	kfree(lp->mssbuf);
1277 	ldc_iommu_release(lp);
1278 
1279 	kfree(lp);
1280 }
1281 EXPORT_SYMBOL(ldc_free);
1282 
1283 /* Bind the channel.  This registers the LDC queues with
1284  * the hypervisor and puts the channel into a pseudo-listening
1285  * state.  This does not initiate a handshake, ldc_connect() does
1286  * that.
1287  */
1288 int ldc_bind(struct ldc_channel *lp)
1289 {
1290 	unsigned long hv_err, flags;
1291 	int err = -EINVAL;
1292 
1293 	if (lp->state != LDC_STATE_INIT)
1294 		return -EINVAL;
1295 
1296 	spin_lock_irqsave(&lp->lock, flags);
1297 
1298 	enable_irq(lp->cfg.rx_irq);
1299 	enable_irq(lp->cfg.tx_irq);
1300 
1301 	lp->flags |= LDC_FLAG_REGISTERED_IRQS;
1302 
1303 	err = -ENODEV;
1304 	hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
1305 	if (hv_err)
1306 		goto out_free_irqs;
1307 
1308 	hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
1309 	if (hv_err)
1310 		goto out_free_irqs;
1311 
1312 	hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
1313 	if (hv_err)
1314 		goto out_unmap_tx;
1315 
1316 	hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
1317 	if (hv_err)
1318 		goto out_unmap_tx;
1319 
1320 	lp->flags |= LDC_FLAG_REGISTERED_QUEUES;
1321 
1322 	hv_err = sun4v_ldc_tx_get_state(lp->id,
1323 					&lp->tx_head,
1324 					&lp->tx_tail,
1325 					&lp->chan_state);
1326 	err = -EBUSY;
1327 	if (hv_err)
1328 		goto out_unmap_rx;
1329 
1330 	lp->tx_acked = lp->tx_head;
1331 
1332 	lp->hs_state = LDC_HS_OPEN;
1333 	ldc_set_state(lp, LDC_STATE_BOUND);
1334 
1335 	spin_unlock_irqrestore(&lp->lock, flags);
1336 
1337 	return 0;
1338 
1339 out_unmap_rx:
1340 	lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
1341 	sun4v_ldc_rx_qconf(lp->id, 0, 0);
1342 
1343 out_unmap_tx:
1344 	sun4v_ldc_tx_qconf(lp->id, 0, 0);
1345 
1346 out_free_irqs:
1347 	lp->flags &= ~LDC_FLAG_REGISTERED_IRQS;
1348 	free_irq(lp->cfg.tx_irq, lp);
1349 	free_irq(lp->cfg.rx_irq, lp);
1350 
1351 	spin_unlock_irqrestore(&lp->lock, flags);
1352 
1353 	return err;
1354 }
1355 EXPORT_SYMBOL(ldc_bind);
1356 
1357 int ldc_connect(struct ldc_channel *lp)
1358 {
1359 	unsigned long flags;
1360 	int err;
1361 
1362 	if (lp->cfg.mode == LDC_MODE_RAW)
1363 		return -EINVAL;
1364 
1365 	spin_lock_irqsave(&lp->lock, flags);
1366 
1367 	if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
1368 	    !(lp->flags & LDC_FLAG_REGISTERED_QUEUES) ||
1369 	    lp->hs_state != LDC_HS_OPEN)
1370 		err = ((lp->hs_state > LDC_HS_OPEN) ? 0 : -EINVAL);
1371 	else
1372 		err = start_handshake(lp);
1373 
1374 	spin_unlock_irqrestore(&lp->lock, flags);
1375 
1376 	return err;
1377 }
1378 EXPORT_SYMBOL(ldc_connect);
1379 
1380 int ldc_disconnect(struct ldc_channel *lp)
1381 {
1382 	unsigned long hv_err, flags;
1383 	int err;
1384 
1385 	if (lp->cfg.mode == LDC_MODE_RAW)
1386 		return -EINVAL;
1387 
1388 	if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
1389 	    !(lp->flags & LDC_FLAG_REGISTERED_QUEUES))
1390 		return -EINVAL;
1391 
1392 	spin_lock_irqsave(&lp->lock, flags);
1393 
1394 	err = -ENODEV;
1395 	hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
1396 	if (hv_err)
1397 		goto out_err;
1398 
1399 	hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
1400 	if (hv_err)
1401 		goto out_err;
1402 
1403 	hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
1404 	if (hv_err)
1405 		goto out_err;
1406 
1407 	hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
1408 	if (hv_err)
1409 		goto out_err;
1410 
1411 	ldc_set_state(lp, LDC_STATE_BOUND);
1412 	lp->hs_state = LDC_HS_OPEN;
1413 	lp->flags |= LDC_FLAG_RESET;
1414 
1415 	spin_unlock_irqrestore(&lp->lock, flags);
1416 
1417 	return 0;
1418 
1419 out_err:
1420 	sun4v_ldc_tx_qconf(lp->id, 0, 0);
1421 	sun4v_ldc_rx_qconf(lp->id, 0, 0);
1422 	free_irq(lp->cfg.tx_irq, lp);
1423 	free_irq(lp->cfg.rx_irq, lp);
1424 	lp->flags &= ~(LDC_FLAG_REGISTERED_IRQS |
1425 		       LDC_FLAG_REGISTERED_QUEUES);
1426 	ldc_set_state(lp, LDC_STATE_INIT);
1427 
1428 	spin_unlock_irqrestore(&lp->lock, flags);
1429 
1430 	return err;
1431 }
1432 EXPORT_SYMBOL(ldc_disconnect);
1433 
1434 int ldc_state(struct ldc_channel *lp)
1435 {
1436 	return lp->state;
1437 }
1438 EXPORT_SYMBOL(ldc_state);
1439 
1440 void ldc_set_state(struct ldc_channel *lp, u8 state)
1441 {
1442 	ldcdbg(STATE, "STATE (%s) --> (%s)\n",
1443 	       state_to_str(lp->state),
1444 	       state_to_str(state));
1445 
1446 	lp->state = state;
1447 }
1448 
1449 int ldc_mode(struct ldc_channel *lp)
1450 {
1451 	return lp->cfg.mode;
1452 }
1453 
1454 int ldc_rx_reset(struct ldc_channel *lp)
1455 {
1456 	return __set_rx_head(lp, lp->rx_tail);
1457 }
1458 
1459 void __ldc_print(struct ldc_channel *lp, const char *caller)
1460 {
1461 	pr_info("%s: id=0x%lx flags=0x%x state=%s cstate=0x%lx hsstate=0x%x\n"
1462 		"\trx_h=0x%lx rx_t=0x%lx rx_n=%ld\n"
1463 		"\ttx_h=0x%lx tx_t=0x%lx tx_n=%ld\n"
1464 		"\trcv_nxt=%u snd_nxt=%u\n",
1465 		caller, lp->id, lp->flags, state_to_str(lp->state),
1466 		lp->chan_state, lp->hs_state,
1467 		lp->rx_head, lp->rx_tail, lp->rx_num_entries,
1468 		lp->tx_head, lp->tx_tail, lp->tx_num_entries,
1469 		lp->rcv_nxt, lp->snd_nxt);
1470 }
1471 
1472 static int write_raw(struct ldc_channel *lp, const void *buf, unsigned int size)
1473 {
1474 	struct ldc_packet *p;
1475 	unsigned long new_tail;
1476 	int err;
1477 
1478 	if (size > LDC_PACKET_SIZE)
1479 		return -EMSGSIZE;
1480 
1481 	p = data_get_tx_packet(lp, &new_tail);
1482 	if (!p)
1483 		return -EAGAIN;
1484 
1485 	memcpy(p, buf, size);
1486 
1487 	err = send_tx_packet(lp, p, new_tail);
1488 	if (!err)
1489 		err = size;
1490 
1491 	return err;
1492 }
1493 
1494 static int read_raw(struct ldc_channel *lp, void *buf, unsigned int size)
1495 {
1496 	struct ldc_packet *p;
1497 	unsigned long hv_err, new;
1498 	int err;
1499 
1500 	if (size < LDC_PACKET_SIZE)
1501 		return -EINVAL;
1502 
1503 	hv_err = sun4v_ldc_rx_get_state(lp->id,
1504 					&lp->rx_head,
1505 					&lp->rx_tail,
1506 					&lp->chan_state);
1507 	if (hv_err)
1508 		return ldc_abort(lp);
1509 
1510 	if (lp->chan_state == LDC_CHANNEL_DOWN ||
1511 	    lp->chan_state == LDC_CHANNEL_RESETTING)
1512 		return -ECONNRESET;
1513 
1514 	if (lp->rx_head == lp->rx_tail)
1515 		return 0;
1516 
1517 	p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
1518 	memcpy(buf, p, LDC_PACKET_SIZE);
1519 
1520 	new = rx_advance(lp, lp->rx_head);
1521 	lp->rx_head = new;
1522 
1523 	err = __set_rx_head(lp, new);
1524 	if (err < 0)
1525 		err = -ECONNRESET;
1526 	else
1527 		err = LDC_PACKET_SIZE;
1528 
1529 	return err;
1530 }
1531 
1532 static const struct ldc_mode_ops raw_ops = {
1533 	.write		=	write_raw,
1534 	.read		=	read_raw,
1535 };
1536 
1537 static int write_nonraw(struct ldc_channel *lp, const void *buf,
1538 			unsigned int size)
1539 {
1540 	unsigned long hv_err, tail;
1541 	unsigned int copied;
1542 	u32 seq;
1543 	int err;
1544 
1545 	hv_err = sun4v_ldc_tx_get_state(lp->id, &lp->tx_head, &lp->tx_tail,
1546 					&lp->chan_state);
1547 	if (unlikely(hv_err))
1548 		return -EBUSY;
1549 
1550 	if (unlikely(lp->chan_state != LDC_CHANNEL_UP))
1551 		return ldc_abort(lp);
1552 
1553 	if (!tx_has_space_for(lp, size))
1554 		return -EAGAIN;
1555 
1556 	seq = lp->snd_nxt;
1557 	copied = 0;
1558 	tail = lp->tx_tail;
1559 	while (copied < size) {
1560 		struct ldc_packet *p = lp->tx_base + (tail / LDC_PACKET_SIZE);
1561 		u8 *data = ((lp->cfg.mode == LDC_MODE_UNRELIABLE) ?
1562 			    p->u.u_data :
1563 			    p->u.r.r_data);
1564 		int data_len;
1565 
1566 		p->type = LDC_DATA;
1567 		p->stype = LDC_INFO;
1568 		p->ctrl = 0;
1569 
1570 		data_len = size - copied;
1571 		if (data_len > lp->mss)
1572 			data_len = lp->mss;
1573 
1574 		BUG_ON(data_len > LDC_LEN);
1575 
1576 		p->env = (data_len |
1577 			  (copied == 0 ? LDC_START : 0) |
1578 			  (data_len == size - copied ? LDC_STOP : 0));
1579 
1580 		p->seqid = ++seq;
1581 
1582 		ldcdbg(DATA, "SENT DATA [%02x:%02x:%02x:%02x:%08x]\n",
1583 		       p->type,
1584 		       p->stype,
1585 		       p->ctrl,
1586 		       p->env,
1587 		       p->seqid);
1588 
1589 		memcpy(data, buf, data_len);
1590 		buf += data_len;
1591 		copied += data_len;
1592 
1593 		tail = tx_advance(lp, tail);
1594 	}
1595 
1596 	err = set_tx_tail(lp, tail);
1597 	if (!err) {
1598 		lp->snd_nxt = seq;
1599 		err = size;
1600 	}
1601 
1602 	return err;
1603 }
1604 
1605 static int rx_bad_seq(struct ldc_channel *lp, struct ldc_packet *p,
1606 		      struct ldc_packet *first_frag)
1607 {
1608 	int err;
1609 
1610 	if (first_frag)
1611 		lp->rcv_nxt = first_frag->seqid - 1;
1612 
1613 	err = send_data_nack(lp, p);
1614 	if (err)
1615 		return err;
1616 
1617 	err = ldc_rx_reset(lp);
1618 	if (err < 0)
1619 		return ldc_abort(lp);
1620 
1621 	return 0;
1622 }
1623 
1624 static int data_ack_nack(struct ldc_channel *lp, struct ldc_packet *p)
1625 {
1626 	if (p->stype & LDC_ACK) {
1627 		int err = process_data_ack(lp, p);
1628 		if (err)
1629 			return err;
1630 	}
1631 	if (p->stype & LDC_NACK)
1632 		return ldc_abort(lp);
1633 
1634 	return 0;
1635 }
1636 
1637 static int rx_data_wait(struct ldc_channel *lp, unsigned long cur_head)
1638 {
1639 	unsigned long dummy;
1640 	int limit = 1000;
1641 
1642 	ldcdbg(DATA, "DATA WAIT cur_head[%lx] rx_head[%lx] rx_tail[%lx]\n",
1643 	       cur_head, lp->rx_head, lp->rx_tail);
1644 	while (limit-- > 0) {
1645 		unsigned long hv_err;
1646 
1647 		hv_err = sun4v_ldc_rx_get_state(lp->id,
1648 						&dummy,
1649 						&lp->rx_tail,
1650 						&lp->chan_state);
1651 		if (hv_err)
1652 			return ldc_abort(lp);
1653 
1654 		if (lp->chan_state == LDC_CHANNEL_DOWN ||
1655 		    lp->chan_state == LDC_CHANNEL_RESETTING)
1656 			return -ECONNRESET;
1657 
1658 		if (cur_head != lp->rx_tail) {
1659 			ldcdbg(DATA, "DATA WAIT DONE "
1660 			       "head[%lx] tail[%lx] chan_state[%lx]\n",
1661 			       dummy, lp->rx_tail, lp->chan_state);
1662 			return 0;
1663 		}
1664 
1665 		udelay(1);
1666 	}
1667 	return -EAGAIN;
1668 }
1669 
1670 static int rx_set_head(struct ldc_channel *lp, unsigned long head)
1671 {
1672 	int err = __set_rx_head(lp, head);
1673 
1674 	if (err < 0)
1675 		return ldc_abort(lp);
1676 
1677 	lp->rx_head = head;
1678 	return 0;
1679 }
1680 
1681 static void send_data_ack(struct ldc_channel *lp)
1682 {
1683 	unsigned long new_tail;
1684 	struct ldc_packet *p;
1685 
1686 	p = data_get_tx_packet(lp, &new_tail);
1687 	if (likely(p)) {
1688 		int err;
1689 
1690 		memset(p, 0, sizeof(*p));
1691 		p->type = LDC_DATA;
1692 		p->stype = LDC_ACK;
1693 		p->ctrl = 0;
1694 		p->seqid = lp->snd_nxt + 1;
1695 		p->u.r.ackid = lp->rcv_nxt;
1696 
1697 		err = send_tx_packet(lp, p, new_tail);
1698 		if (!err)
1699 			lp->snd_nxt++;
1700 	}
1701 }
1702 
1703 static int read_nonraw(struct ldc_channel *lp, void *buf, unsigned int size)
1704 {
1705 	struct ldc_packet *first_frag;
1706 	unsigned long hv_err, new;
1707 	int err, copied;
1708 
1709 	hv_err = sun4v_ldc_rx_get_state(lp->id,
1710 					&lp->rx_head,
1711 					&lp->rx_tail,
1712 					&lp->chan_state);
1713 	if (hv_err)
1714 		return ldc_abort(lp);
1715 
1716 	if (lp->chan_state == LDC_CHANNEL_DOWN ||
1717 	    lp->chan_state == LDC_CHANNEL_RESETTING)
1718 		return -ECONNRESET;
1719 
1720 	if (lp->rx_head == lp->rx_tail)
1721 		return 0;
1722 
1723 	first_frag = NULL;
1724 	copied = err = 0;
1725 	new = lp->rx_head;
1726 	while (1) {
1727 		struct ldc_packet *p;
1728 		int pkt_len;
1729 
1730 		BUG_ON(new == lp->rx_tail);
1731 		p = lp->rx_base + (new / LDC_PACKET_SIZE);
1732 
1733 		ldcdbg(RX, "RX read pkt[%02x:%02x:%02x:%02x:%08x:%08x] "
1734 		       "rcv_nxt[%08x]\n",
1735 		       p->type,
1736 		       p->stype,
1737 		       p->ctrl,
1738 		       p->env,
1739 		       p->seqid,
1740 		       p->u.r.ackid,
1741 		       lp->rcv_nxt);
1742 
1743 		if (unlikely(!rx_seq_ok(lp, p->seqid))) {
1744 			err = rx_bad_seq(lp, p, first_frag);
1745 			copied = 0;
1746 			break;
1747 		}
1748 
1749 		if (p->type & LDC_CTRL) {
1750 			err = process_control_frame(lp, p);
1751 			if (err < 0)
1752 				break;
1753 			err = 0;
1754 		}
1755 
1756 		lp->rcv_nxt = p->seqid;
1757 
1758 		if (!(p->type & LDC_DATA)) {
1759 			new = rx_advance(lp, new);
1760 			goto no_data;
1761 		}
1762 		if (p->stype & (LDC_ACK | LDC_NACK)) {
1763 			err = data_ack_nack(lp, p);
1764 			if (err)
1765 				break;
1766 		}
1767 		if (!(p->stype & LDC_INFO)) {
1768 			new = rx_advance(lp, new);
1769 			err = rx_set_head(lp, new);
1770 			if (err)
1771 				break;
1772 			goto no_data;
1773 		}
1774 
1775 		pkt_len = p->env & LDC_LEN;
1776 
1777 		/* Every initial packet starts with the START bit set.
1778 		 *
1779 		 * Singleton packets will have both START+STOP set.
1780 		 *
1781 		 * Fragments will have START set in the first frame, STOP
1782 		 * set in the last frame, and neither bit set in middle
1783 		 * frames of the packet.
1784 		 *
1785 		 * Therefore if we are at the beginning of a packet and
1786 		 * we don't see START, or we are in the middle of a fragmented
1787 		 * packet and do see START, we are unsynchronized and should
1788 		 * flush the RX queue.
1789 		 */
1790 		if ((first_frag == NULL && !(p->env & LDC_START)) ||
1791 		    (first_frag != NULL &&  (p->env & LDC_START))) {
1792 			if (!first_frag)
1793 				new = rx_advance(lp, new);
1794 
1795 			err = rx_set_head(lp, new);
1796 			if (err)
1797 				break;
1798 
1799 			if (!first_frag)
1800 				goto no_data;
1801 		}
1802 		if (!first_frag)
1803 			first_frag = p;
1804 
1805 		if (pkt_len > size - copied) {
1806 			/* User didn't give us a big enough buffer,
1807 			 * what to do?  This is a pretty serious error.
1808 			 *
1809 			 * Since we haven't updated the RX ring head to
1810 			 * consume any of the packets, signal the error
1811 			 * to the user and just leave the RX ring alone.
1812 			 *
1813 			 * This seems the best behavior because this allows
1814 			 * a user of the LDC layer to start with a small
1815 			 * RX buffer for ldc_read() calls and use -EMSGSIZE
1816 			 * as a cue to enlarge it's read buffer.
1817 			 */
1818 			err = -EMSGSIZE;
1819 			break;
1820 		}
1821 
1822 		/* Ok, we are gonna eat this one.  */
1823 		new = rx_advance(lp, new);
1824 
1825 		memcpy(buf,
1826 		       (lp->cfg.mode == LDC_MODE_UNRELIABLE ?
1827 			p->u.u_data : p->u.r.r_data), pkt_len);
1828 		buf += pkt_len;
1829 		copied += pkt_len;
1830 
1831 		if (p->env & LDC_STOP)
1832 			break;
1833 
1834 no_data:
1835 		if (new == lp->rx_tail) {
1836 			err = rx_data_wait(lp, new);
1837 			if (err)
1838 				break;
1839 		}
1840 	}
1841 
1842 	if (!err)
1843 		err = rx_set_head(lp, new);
1844 
1845 	if (err && first_frag)
1846 		lp->rcv_nxt = first_frag->seqid - 1;
1847 
1848 	if (!err) {
1849 		err = copied;
1850 		if (err > 0 && lp->cfg.mode != LDC_MODE_UNRELIABLE)
1851 			send_data_ack(lp);
1852 	}
1853 
1854 	return err;
1855 }
1856 
1857 static const struct ldc_mode_ops nonraw_ops = {
1858 	.write		=	write_nonraw,
1859 	.read		=	read_nonraw,
1860 };
1861 
1862 static int write_stream(struct ldc_channel *lp, const void *buf,
1863 			unsigned int size)
1864 {
1865 	if (size > lp->cfg.mtu)
1866 		size = lp->cfg.mtu;
1867 	return write_nonraw(lp, buf, size);
1868 }
1869 
1870 static int read_stream(struct ldc_channel *lp, void *buf, unsigned int size)
1871 {
1872 	if (!lp->mssbuf_len) {
1873 		int err = read_nonraw(lp, lp->mssbuf, lp->cfg.mtu);
1874 		if (err < 0)
1875 			return err;
1876 
1877 		lp->mssbuf_len = err;
1878 		lp->mssbuf_off = 0;
1879 	}
1880 
1881 	if (size > lp->mssbuf_len)
1882 		size = lp->mssbuf_len;
1883 	memcpy(buf, lp->mssbuf + lp->mssbuf_off, size);
1884 
1885 	lp->mssbuf_off += size;
1886 	lp->mssbuf_len -= size;
1887 
1888 	return size;
1889 }
1890 
1891 static const struct ldc_mode_ops stream_ops = {
1892 	.write		=	write_stream,
1893 	.read		=	read_stream,
1894 };
1895 
1896 int ldc_write(struct ldc_channel *lp, const void *buf, unsigned int size)
1897 {
1898 	unsigned long flags;
1899 	int err;
1900 
1901 	if (!buf)
1902 		return -EINVAL;
1903 
1904 	if (!size)
1905 		return 0;
1906 
1907 	spin_lock_irqsave(&lp->lock, flags);
1908 
1909 	if (lp->hs_state != LDC_HS_COMPLETE)
1910 		err = -ENOTCONN;
1911 	else
1912 		err = lp->mops->write(lp, buf, size);
1913 
1914 	spin_unlock_irqrestore(&lp->lock, flags);
1915 
1916 	return err;
1917 }
1918 EXPORT_SYMBOL(ldc_write);
1919 
1920 int ldc_read(struct ldc_channel *lp, void *buf, unsigned int size)
1921 {
1922 	unsigned long flags;
1923 	int err;
1924 
1925 	if (!buf)
1926 		return -EINVAL;
1927 
1928 	if (!size)
1929 		return 0;
1930 
1931 	spin_lock_irqsave(&lp->lock, flags);
1932 
1933 	if (lp->hs_state != LDC_HS_COMPLETE)
1934 		err = -ENOTCONN;
1935 	else
1936 		err = lp->mops->read(lp, buf, size);
1937 
1938 	spin_unlock_irqrestore(&lp->lock, flags);
1939 
1940 	return err;
1941 }
1942 EXPORT_SYMBOL(ldc_read);
1943 
1944 static u64 pagesize_code(void)
1945 {
1946 	switch (PAGE_SIZE) {
1947 	default:
1948 	case (8ULL * 1024ULL):
1949 		return 0;
1950 	case (64ULL * 1024ULL):
1951 		return 1;
1952 	case (512ULL * 1024ULL):
1953 		return 2;
1954 	case (4ULL * 1024ULL * 1024ULL):
1955 		return 3;
1956 	case (32ULL * 1024ULL * 1024ULL):
1957 		return 4;
1958 	case (256ULL * 1024ULL * 1024ULL):
1959 		return 5;
1960 	}
1961 }
1962 
1963 static u64 make_cookie(u64 index, u64 pgsz_code, u64 page_offset)
1964 {
1965 	return ((pgsz_code << COOKIE_PGSZ_CODE_SHIFT) |
1966 		(index << PAGE_SHIFT) |
1967 		page_offset);
1968 }
1969 
1970 
1971 static struct ldc_mtable_entry *alloc_npages(struct ldc_iommu *iommu,
1972 					     unsigned long npages)
1973 {
1974 	long entry;
1975 
1976 	entry = iommu_tbl_range_alloc(NULL, &iommu->iommu_map_table,
1977 				      npages, NULL, (unsigned long)-1, 0);
1978 	if (unlikely(entry == IOMMU_ERROR_CODE))
1979 		return NULL;
1980 
1981 	return iommu->page_table + entry;
1982 }
1983 
1984 static u64 perm_to_mte(unsigned int map_perm)
1985 {
1986 	u64 mte_base;
1987 
1988 	mte_base = pagesize_code();
1989 
1990 	if (map_perm & LDC_MAP_SHADOW) {
1991 		if (map_perm & LDC_MAP_R)
1992 			mte_base |= LDC_MTE_COPY_R;
1993 		if (map_perm & LDC_MAP_W)
1994 			mte_base |= LDC_MTE_COPY_W;
1995 	}
1996 	if (map_perm & LDC_MAP_DIRECT) {
1997 		if (map_perm & LDC_MAP_R)
1998 			mte_base |= LDC_MTE_READ;
1999 		if (map_perm & LDC_MAP_W)
2000 			mte_base |= LDC_MTE_WRITE;
2001 		if (map_perm & LDC_MAP_X)
2002 			mte_base |= LDC_MTE_EXEC;
2003 	}
2004 	if (map_perm & LDC_MAP_IO) {
2005 		if (map_perm & LDC_MAP_R)
2006 			mte_base |= LDC_MTE_IOMMU_R;
2007 		if (map_perm & LDC_MAP_W)
2008 			mte_base |= LDC_MTE_IOMMU_W;
2009 	}
2010 
2011 	return mte_base;
2012 }
2013 
2014 static int pages_in_region(unsigned long base, long len)
2015 {
2016 	int count = 0;
2017 
2018 	do {
2019 		unsigned long new = (base + PAGE_SIZE) & PAGE_MASK;
2020 
2021 		len -= (new - base);
2022 		base = new;
2023 		count++;
2024 	} while (len > 0);
2025 
2026 	return count;
2027 }
2028 
2029 struct cookie_state {
2030 	struct ldc_mtable_entry		*page_table;
2031 	struct ldc_trans_cookie		*cookies;
2032 	u64				mte_base;
2033 	u64				prev_cookie;
2034 	u32				pte_idx;
2035 	u32				nc;
2036 };
2037 
2038 static void fill_cookies(struct cookie_state *sp, unsigned long pa,
2039 			 unsigned long off, unsigned long len)
2040 {
2041 	do {
2042 		unsigned long tlen, new = pa + PAGE_SIZE;
2043 		u64 this_cookie;
2044 
2045 		sp->page_table[sp->pte_idx].mte = sp->mte_base | pa;
2046 
2047 		tlen = PAGE_SIZE;
2048 		if (off)
2049 			tlen = PAGE_SIZE - off;
2050 		if (tlen > len)
2051 			tlen = len;
2052 
2053 		this_cookie = make_cookie(sp->pte_idx,
2054 					  pagesize_code(), off);
2055 
2056 		off = 0;
2057 
2058 		if (this_cookie == sp->prev_cookie) {
2059 			sp->cookies[sp->nc - 1].cookie_size += tlen;
2060 		} else {
2061 			sp->cookies[sp->nc].cookie_addr = this_cookie;
2062 			sp->cookies[sp->nc].cookie_size = tlen;
2063 			sp->nc++;
2064 		}
2065 		sp->prev_cookie = this_cookie + tlen;
2066 
2067 		sp->pte_idx++;
2068 
2069 		len -= tlen;
2070 		pa = new;
2071 	} while (len > 0);
2072 }
2073 
2074 static int sg_count_one(struct scatterlist *sg)
2075 {
2076 	unsigned long base = page_to_pfn(sg_page(sg)) << PAGE_SHIFT;
2077 	long len = sg->length;
2078 
2079 	if ((sg->offset | len) & (8UL - 1))
2080 		return -EFAULT;
2081 
2082 	return pages_in_region(base + sg->offset, len);
2083 }
2084 
2085 static int sg_count_pages(struct scatterlist *sg, int num_sg)
2086 {
2087 	int count;
2088 	int i;
2089 
2090 	count = 0;
2091 	for (i = 0; i < num_sg; i++) {
2092 		int err = sg_count_one(sg + i);
2093 		if (err < 0)
2094 			return err;
2095 		count += err;
2096 	}
2097 
2098 	return count;
2099 }
2100 
2101 int ldc_map_sg(struct ldc_channel *lp,
2102 	       struct scatterlist *sg, int num_sg,
2103 	       struct ldc_trans_cookie *cookies, int ncookies,
2104 	       unsigned int map_perm)
2105 {
2106 	unsigned long i, npages;
2107 	struct ldc_mtable_entry *base;
2108 	struct cookie_state state;
2109 	struct ldc_iommu *iommu;
2110 	int err;
2111 	struct scatterlist *s;
2112 
2113 	if (map_perm & ~LDC_MAP_ALL)
2114 		return -EINVAL;
2115 
2116 	err = sg_count_pages(sg, num_sg);
2117 	if (err < 0)
2118 		return err;
2119 
2120 	npages = err;
2121 	if (err > ncookies)
2122 		return -EMSGSIZE;
2123 
2124 	iommu = &lp->iommu;
2125 
2126 	base = alloc_npages(iommu, npages);
2127 
2128 	if (!base)
2129 		return -ENOMEM;
2130 
2131 	state.page_table = iommu->page_table;
2132 	state.cookies = cookies;
2133 	state.mte_base = perm_to_mte(map_perm);
2134 	state.prev_cookie = ~(u64)0;
2135 	state.pte_idx = (base - iommu->page_table);
2136 	state.nc = 0;
2137 
2138 	for_each_sg(sg, s, num_sg, i) {
2139 		fill_cookies(&state, page_to_pfn(sg_page(s)) << PAGE_SHIFT,
2140 			     s->offset, s->length);
2141 	}
2142 
2143 	return state.nc;
2144 }
2145 EXPORT_SYMBOL(ldc_map_sg);
2146 
2147 int ldc_map_single(struct ldc_channel *lp,
2148 		   void *buf, unsigned int len,
2149 		   struct ldc_trans_cookie *cookies, int ncookies,
2150 		   unsigned int map_perm)
2151 {
2152 	unsigned long npages, pa;
2153 	struct ldc_mtable_entry *base;
2154 	struct cookie_state state;
2155 	struct ldc_iommu *iommu;
2156 
2157 	if ((map_perm & ~LDC_MAP_ALL) || (ncookies < 1))
2158 		return -EINVAL;
2159 
2160 	pa = __pa(buf);
2161 	if ((pa | len) & (8UL - 1))
2162 		return -EFAULT;
2163 
2164 	npages = pages_in_region(pa, len);
2165 
2166 	iommu = &lp->iommu;
2167 
2168 	base = alloc_npages(iommu, npages);
2169 
2170 	if (!base)
2171 		return -ENOMEM;
2172 
2173 	state.page_table = iommu->page_table;
2174 	state.cookies = cookies;
2175 	state.mte_base = perm_to_mte(map_perm);
2176 	state.prev_cookie = ~(u64)0;
2177 	state.pte_idx = (base - iommu->page_table);
2178 	state.nc = 0;
2179 	fill_cookies(&state, (pa & PAGE_MASK), (pa & ~PAGE_MASK), len);
2180 	BUG_ON(state.nc > ncookies);
2181 
2182 	return state.nc;
2183 }
2184 EXPORT_SYMBOL(ldc_map_single);
2185 
2186 
2187 static void free_npages(unsigned long id, struct ldc_iommu *iommu,
2188 			u64 cookie, u64 size)
2189 {
2190 	unsigned long npages, entry;
2191 
2192 	npages = PAGE_ALIGN(((cookie & ~PAGE_MASK) + size)) >> PAGE_SHIFT;
2193 
2194 	entry = ldc_cookie_to_index(cookie, iommu);
2195 	ldc_demap(iommu, id, cookie, entry, npages);
2196 	iommu_tbl_range_free(&iommu->iommu_map_table, cookie, npages, entry);
2197 }
2198 
2199 void ldc_unmap(struct ldc_channel *lp, struct ldc_trans_cookie *cookies,
2200 	       int ncookies)
2201 {
2202 	struct ldc_iommu *iommu = &lp->iommu;
2203 	int i;
2204 	unsigned long flags;
2205 
2206 	spin_lock_irqsave(&iommu->lock, flags);
2207 	for (i = 0; i < ncookies; i++) {
2208 		u64 addr = cookies[i].cookie_addr;
2209 		u64 size = cookies[i].cookie_size;
2210 
2211 		free_npages(lp->id, iommu, addr, size);
2212 	}
2213 	spin_unlock_irqrestore(&iommu->lock, flags);
2214 }
2215 EXPORT_SYMBOL(ldc_unmap);
2216 
2217 int ldc_copy(struct ldc_channel *lp, int copy_dir,
2218 	     void *buf, unsigned int len, unsigned long offset,
2219 	     struct ldc_trans_cookie *cookies, int ncookies)
2220 {
2221 	unsigned int orig_len;
2222 	unsigned long ra;
2223 	int i;
2224 
2225 	if (copy_dir != LDC_COPY_IN && copy_dir != LDC_COPY_OUT) {
2226 		printk(KERN_ERR PFX "ldc_copy: ID[%lu] Bad copy_dir[%d]\n",
2227 		       lp->id, copy_dir);
2228 		return -EINVAL;
2229 	}
2230 
2231 	ra = __pa(buf);
2232 	if ((ra | len | offset) & (8UL - 1)) {
2233 		printk(KERN_ERR PFX "ldc_copy: ID[%lu] Unaligned buffer "
2234 		       "ra[%lx] len[%x] offset[%lx]\n",
2235 		       lp->id, ra, len, offset);
2236 		return -EFAULT;
2237 	}
2238 
2239 	if (lp->hs_state != LDC_HS_COMPLETE ||
2240 	    (lp->flags & LDC_FLAG_RESET)) {
2241 		printk(KERN_ERR PFX "ldc_copy: ID[%lu] Link down hs_state[%x] "
2242 		       "flags[%x]\n", lp->id, lp->hs_state, lp->flags);
2243 		return -ECONNRESET;
2244 	}
2245 
2246 	orig_len = len;
2247 	for (i = 0; i < ncookies; i++) {
2248 		unsigned long cookie_raddr = cookies[i].cookie_addr;
2249 		unsigned long this_len = cookies[i].cookie_size;
2250 		unsigned long actual_len;
2251 
2252 		if (unlikely(offset)) {
2253 			unsigned long this_off = offset;
2254 
2255 			if (this_off > this_len)
2256 				this_off = this_len;
2257 
2258 			offset -= this_off;
2259 			this_len -= this_off;
2260 			if (!this_len)
2261 				continue;
2262 			cookie_raddr += this_off;
2263 		}
2264 
2265 		if (this_len > len)
2266 			this_len = len;
2267 
2268 		while (1) {
2269 			unsigned long hv_err;
2270 
2271 			hv_err = sun4v_ldc_copy(lp->id, copy_dir,
2272 						cookie_raddr, ra,
2273 						this_len, &actual_len);
2274 			if (unlikely(hv_err)) {
2275 				printk(KERN_ERR PFX "ldc_copy: ID[%lu] "
2276 				       "HV error %lu\n",
2277 				       lp->id, hv_err);
2278 				if (lp->hs_state != LDC_HS_COMPLETE ||
2279 				    (lp->flags & LDC_FLAG_RESET))
2280 					return -ECONNRESET;
2281 				else
2282 					return -EFAULT;
2283 			}
2284 
2285 			cookie_raddr += actual_len;
2286 			ra += actual_len;
2287 			len -= actual_len;
2288 			if (actual_len == this_len)
2289 				break;
2290 
2291 			this_len -= actual_len;
2292 		}
2293 
2294 		if (!len)
2295 			break;
2296 	}
2297 
2298 	/* It is caller policy what to do about short copies.
2299 	 * For example, a networking driver can declare the
2300 	 * packet a runt and drop it.
2301 	 */
2302 
2303 	return orig_len - len;
2304 }
2305 EXPORT_SYMBOL(ldc_copy);
2306 
2307 void *ldc_alloc_exp_dring(struct ldc_channel *lp, unsigned int len,
2308 			  struct ldc_trans_cookie *cookies, int *ncookies,
2309 			  unsigned int map_perm)
2310 {
2311 	void *buf;
2312 	int err;
2313 
2314 	if (len & (8UL - 1))
2315 		return ERR_PTR(-EINVAL);
2316 
2317 	buf = kzalloc(len, GFP_ATOMIC);
2318 	if (!buf)
2319 		return ERR_PTR(-ENOMEM);
2320 
2321 	err = ldc_map_single(lp, buf, len, cookies, *ncookies, map_perm);
2322 	if (err < 0) {
2323 		kfree(buf);
2324 		return ERR_PTR(err);
2325 	}
2326 	*ncookies = err;
2327 
2328 	return buf;
2329 }
2330 EXPORT_SYMBOL(ldc_alloc_exp_dring);
2331 
2332 void ldc_free_exp_dring(struct ldc_channel *lp, void *buf, unsigned int len,
2333 			struct ldc_trans_cookie *cookies, int ncookies)
2334 {
2335 	ldc_unmap(lp, cookies, ncookies);
2336 	kfree(buf);
2337 }
2338 EXPORT_SYMBOL(ldc_free_exp_dring);
2339 
2340 static int __init ldc_init(void)
2341 {
2342 	unsigned long major, minor;
2343 	struct mdesc_handle *hp;
2344 	const u64 *v;
2345 	int err;
2346 	u64 mp;
2347 
2348 	hp = mdesc_grab();
2349 	if (!hp)
2350 		return -ENODEV;
2351 
2352 	mp = mdesc_node_by_name(hp, MDESC_NODE_NULL, "platform");
2353 	err = -ENODEV;
2354 	if (mp == MDESC_NODE_NULL)
2355 		goto out;
2356 
2357 	v = mdesc_get_property(hp, mp, "domaining-enabled", NULL);
2358 	if (!v)
2359 		goto out;
2360 
2361 	major = 1;
2362 	minor = 0;
2363 	if (sun4v_hvapi_register(HV_GRP_LDOM, major, &minor)) {
2364 		printk(KERN_INFO PFX "Could not register LDOM hvapi.\n");
2365 		goto out;
2366 	}
2367 
2368 	printk(KERN_INFO "%s", version);
2369 
2370 	if (!*v) {
2371 		printk(KERN_INFO PFX "Domaining disabled.\n");
2372 		goto out;
2373 	}
2374 	ldom_domaining_enabled = 1;
2375 	err = 0;
2376 
2377 out:
2378 	mdesc_release(hp);
2379 	return err;
2380 }
2381 
2382 core_initcall(ldc_init);
2383