xref: /openbmc/linux/drivers/block/aoe/aoecmd.c (revision 565d76cb)
1 /* Copyright (c) 2007 Coraid, Inc.  See COPYING for GPL terms. */
2 /*
3  * aoecmd.c
4  * Filesystem request handling methods
5  */
6 
7 #include <linux/ata.h>
8 #include <linux/slab.h>
9 #include <linux/hdreg.h>
10 #include <linux/blkdev.h>
11 #include <linux/skbuff.h>
12 #include <linux/netdevice.h>
13 #include <linux/genhd.h>
14 #include <linux/moduleparam.h>
15 #include <net/net_namespace.h>
16 #include <asm/unaligned.h>
17 #include "aoe.h"
18 
19 static int aoe_deadsecs = 60 * 3;
20 module_param(aoe_deadsecs, int, 0644);
21 MODULE_PARM_DESC(aoe_deadsecs, "After aoe_deadsecs seconds, give up and fail dev.");
22 
23 static int aoe_maxout = 16;
24 module_param(aoe_maxout, int, 0644);
25 MODULE_PARM_DESC(aoe_maxout,
26 	"Only aoe_maxout outstanding packets for every MAC on eX.Y.");
27 
28 static struct sk_buff *
29 new_skb(ulong len)
30 {
31 	struct sk_buff *skb;
32 
33 	skb = alloc_skb(len, GFP_ATOMIC);
34 	if (skb) {
35 		skb_reset_mac_header(skb);
36 		skb_reset_network_header(skb);
37 		skb->protocol = __constant_htons(ETH_P_AOE);
38 	}
39 	return skb;
40 }
41 
42 static struct frame *
43 getframe(struct aoetgt *t, int tag)
44 {
45 	struct frame *f, *e;
46 
47 	f = t->frames;
48 	e = f + t->nframes;
49 	for (; f<e; f++)
50 		if (f->tag == tag)
51 			return f;
52 	return NULL;
53 }
54 
55 /*
56  * Leave the top bit clear so we have tagspace for userland.
57  * The bottom 16 bits are the xmit tick for rexmit/rttavg processing.
58  * This driver reserves tag -1 to mean "unused frame."
59  */
60 static int
61 newtag(struct aoetgt *t)
62 {
63 	register ulong n;
64 
65 	n = jiffies & 0xffff;
66 	return n |= (++t->lasttag & 0x7fff) << 16;
67 }
68 
69 static int
70 aoehdr_atainit(struct aoedev *d, struct aoetgt *t, struct aoe_hdr *h)
71 {
72 	u32 host_tag = newtag(t);
73 
74 	memcpy(h->src, t->ifp->nd->dev_addr, sizeof h->src);
75 	memcpy(h->dst, t->addr, sizeof h->dst);
76 	h->type = __constant_cpu_to_be16(ETH_P_AOE);
77 	h->verfl = AOE_HVER;
78 	h->major = cpu_to_be16(d->aoemajor);
79 	h->minor = d->aoeminor;
80 	h->cmd = AOECMD_ATA;
81 	h->tag = cpu_to_be32(host_tag);
82 
83 	return host_tag;
84 }
85 
86 static inline void
87 put_lba(struct aoe_atahdr *ah, sector_t lba)
88 {
89 	ah->lba0 = lba;
90 	ah->lba1 = lba >>= 8;
91 	ah->lba2 = lba >>= 8;
92 	ah->lba3 = lba >>= 8;
93 	ah->lba4 = lba >>= 8;
94 	ah->lba5 = lba >>= 8;
95 }
96 
97 static void
98 ifrotate(struct aoetgt *t)
99 {
100 	t->ifp++;
101 	if (t->ifp >= &t->ifs[NAOEIFS] || t->ifp->nd == NULL)
102 		t->ifp = t->ifs;
103 	if (t->ifp->nd == NULL) {
104 		printk(KERN_INFO "aoe: no interface to rotate to\n");
105 		BUG();
106 	}
107 }
108 
109 static void
110 skb_pool_put(struct aoedev *d, struct sk_buff *skb)
111 {
112 	__skb_queue_tail(&d->skbpool, skb);
113 }
114 
115 static struct sk_buff *
116 skb_pool_get(struct aoedev *d)
117 {
118 	struct sk_buff *skb = skb_peek(&d->skbpool);
119 
120 	if (skb && atomic_read(&skb_shinfo(skb)->dataref) == 1) {
121 		__skb_unlink(skb, &d->skbpool);
122 		return skb;
123 	}
124 	if (skb_queue_len(&d->skbpool) < NSKBPOOLMAX &&
125 	    (skb = new_skb(ETH_ZLEN)))
126 		return skb;
127 
128 	return NULL;
129 }
130 
131 /* freeframe is where we do our load balancing so it's a little hairy. */
132 static struct frame *
133 freeframe(struct aoedev *d)
134 {
135 	struct frame *f, *e, *rf;
136 	struct aoetgt **t;
137 	struct sk_buff *skb;
138 
139 	if (d->targets[0] == NULL) {	/* shouldn't happen, but I'm paranoid */
140 		printk(KERN_ERR "aoe: NULL TARGETS!\n");
141 		return NULL;
142 	}
143 	t = d->tgt;
144 	t++;
145 	if (t >= &d->targets[NTARGETS] || !*t)
146 		t = d->targets;
147 	for (;;) {
148 		if ((*t)->nout < (*t)->maxout
149 		&& t != d->htgt
150 		&& (*t)->ifp->nd) {
151 			rf = NULL;
152 			f = (*t)->frames;
153 			e = f + (*t)->nframes;
154 			for (; f < e; f++) {
155 				if (f->tag != FREETAG)
156 					continue;
157 				skb = f->skb;
158 				if (!skb
159 				&& !(f->skb = skb = new_skb(ETH_ZLEN)))
160 					continue;
161 				if (atomic_read(&skb_shinfo(skb)->dataref)
162 					!= 1) {
163 					if (!rf)
164 						rf = f;
165 					continue;
166 				}
167 gotone:				skb_shinfo(skb)->nr_frags = skb->data_len = 0;
168 				skb_trim(skb, 0);
169 				d->tgt = t;
170 				ifrotate(*t);
171 				return f;
172 			}
173 			/* Work can be done, but the network layer is
174 			   holding our precious packets.  Try to grab
175 			   one from the pool. */
176 			f = rf;
177 			if (f == NULL) {	/* more paranoia */
178 				printk(KERN_ERR
179 					"aoe: freeframe: %s.\n",
180 					"unexpected null rf");
181 				d->flags |= DEVFL_KICKME;
182 				return NULL;
183 			}
184 			skb = skb_pool_get(d);
185 			if (skb) {
186 				skb_pool_put(d, f->skb);
187 				f->skb = skb;
188 				goto gotone;
189 			}
190 			(*t)->dataref++;
191 			if ((*t)->nout == 0)
192 				d->flags |= DEVFL_KICKME;
193 		}
194 		if (t == d->tgt)	/* we've looped and found nada */
195 			break;
196 		t++;
197 		if (t >= &d->targets[NTARGETS] || !*t)
198 			t = d->targets;
199 	}
200 	return NULL;
201 }
202 
203 static int
204 aoecmd_ata_rw(struct aoedev *d)
205 {
206 	struct frame *f;
207 	struct aoe_hdr *h;
208 	struct aoe_atahdr *ah;
209 	struct buf *buf;
210 	struct bio_vec *bv;
211 	struct aoetgt *t;
212 	struct sk_buff *skb;
213 	ulong bcnt;
214 	char writebit, extbit;
215 
216 	writebit = 0x10;
217 	extbit = 0x4;
218 
219 	f = freeframe(d);
220 	if (f == NULL)
221 		return 0;
222 	t = *d->tgt;
223 	buf = d->inprocess;
224 	bv = buf->bv;
225 	bcnt = t->ifp->maxbcnt;
226 	if (bcnt == 0)
227 		bcnt = DEFAULTBCNT;
228 	if (bcnt > buf->bv_resid)
229 		bcnt = buf->bv_resid;
230 	/* initialize the headers & frame */
231 	skb = f->skb;
232 	h = (struct aoe_hdr *) skb_mac_header(skb);
233 	ah = (struct aoe_atahdr *) (h+1);
234 	skb_put(skb, sizeof *h + sizeof *ah);
235 	memset(h, 0, skb->len);
236 	f->tag = aoehdr_atainit(d, t, h);
237 	t->nout++;
238 	f->waited = 0;
239 	f->buf = buf;
240 	f->bufaddr = page_address(bv->bv_page) + buf->bv_off;
241 	f->bcnt = bcnt;
242 	f->lba = buf->sector;
243 
244 	/* set up ata header */
245 	ah->scnt = bcnt >> 9;
246 	put_lba(ah, buf->sector);
247 	if (d->flags & DEVFL_EXT) {
248 		ah->aflags |= AOEAFL_EXT;
249 	} else {
250 		extbit = 0;
251 		ah->lba3 &= 0x0f;
252 		ah->lba3 |= 0xe0;	/* LBA bit + obsolete 0xa0 */
253 	}
254 	if (bio_data_dir(buf->bio) == WRITE) {
255 		skb_fill_page_desc(skb, 0, bv->bv_page, buf->bv_off, bcnt);
256 		ah->aflags |= AOEAFL_WRITE;
257 		skb->len += bcnt;
258 		skb->data_len = bcnt;
259 		t->wpkts++;
260 	} else {
261 		t->rpkts++;
262 		writebit = 0;
263 	}
264 
265 	ah->cmdstat = ATA_CMD_PIO_READ | writebit | extbit;
266 
267 	/* mark all tracking fields and load out */
268 	buf->nframesout += 1;
269 	buf->bv_off += bcnt;
270 	buf->bv_resid -= bcnt;
271 	buf->resid -= bcnt;
272 	buf->sector += bcnt >> 9;
273 	if (buf->resid == 0) {
274 		d->inprocess = NULL;
275 	} else if (buf->bv_resid == 0) {
276 		buf->bv = ++bv;
277 		buf->bv_resid = bv->bv_len;
278 		WARN_ON(buf->bv_resid == 0);
279 		buf->bv_off = bv->bv_offset;
280 	}
281 
282 	skb->dev = t->ifp->nd;
283 	skb = skb_clone(skb, GFP_ATOMIC);
284 	if (skb)
285 		__skb_queue_tail(&d->sendq, skb);
286 	return 1;
287 }
288 
289 /* some callers cannot sleep, and they can call this function,
290  * transmitting the packets later, when interrupts are on
291  */
292 static void
293 aoecmd_cfg_pkts(ushort aoemajor, unsigned char aoeminor, struct sk_buff_head *queue)
294 {
295 	struct aoe_hdr *h;
296 	struct aoe_cfghdr *ch;
297 	struct sk_buff *skb;
298 	struct net_device *ifp;
299 
300 	rcu_read_lock();
301 	for_each_netdev_rcu(&init_net, ifp) {
302 		dev_hold(ifp);
303 		if (!is_aoe_netif(ifp))
304 			goto cont;
305 
306 		skb = new_skb(sizeof *h + sizeof *ch);
307 		if (skb == NULL) {
308 			printk(KERN_INFO "aoe: skb alloc failure\n");
309 			goto cont;
310 		}
311 		skb_put(skb, sizeof *h + sizeof *ch);
312 		skb->dev = ifp;
313 		__skb_queue_tail(queue, skb);
314 		h = (struct aoe_hdr *) skb_mac_header(skb);
315 		memset(h, 0, sizeof *h + sizeof *ch);
316 
317 		memset(h->dst, 0xff, sizeof h->dst);
318 		memcpy(h->src, ifp->dev_addr, sizeof h->src);
319 		h->type = __constant_cpu_to_be16(ETH_P_AOE);
320 		h->verfl = AOE_HVER;
321 		h->major = cpu_to_be16(aoemajor);
322 		h->minor = aoeminor;
323 		h->cmd = AOECMD_CFG;
324 
325 cont:
326 		dev_put(ifp);
327 	}
328 	rcu_read_unlock();
329 }
330 
331 static void
332 resend(struct aoedev *d, struct aoetgt *t, struct frame *f)
333 {
334 	struct sk_buff *skb;
335 	struct aoe_hdr *h;
336 	struct aoe_atahdr *ah;
337 	char buf[128];
338 	u32 n;
339 
340 	ifrotate(t);
341 	n = newtag(t);
342 	skb = f->skb;
343 	h = (struct aoe_hdr *) skb_mac_header(skb);
344 	ah = (struct aoe_atahdr *) (h+1);
345 
346 	snprintf(buf, sizeof buf,
347 		"%15s e%ld.%d oldtag=%08x@%08lx newtag=%08x s=%pm d=%pm nout=%d\n",
348 		"retransmit", d->aoemajor, d->aoeminor, f->tag, jiffies, n,
349 		h->src, h->dst, t->nout);
350 	aoechr_error(buf);
351 
352 	f->tag = n;
353 	h->tag = cpu_to_be32(n);
354 	memcpy(h->dst, t->addr, sizeof h->dst);
355 	memcpy(h->src, t->ifp->nd->dev_addr, sizeof h->src);
356 
357 	switch (ah->cmdstat) {
358 	default:
359 		break;
360 	case ATA_CMD_PIO_READ:
361 	case ATA_CMD_PIO_READ_EXT:
362 	case ATA_CMD_PIO_WRITE:
363 	case ATA_CMD_PIO_WRITE_EXT:
364 		put_lba(ah, f->lba);
365 
366 		n = f->bcnt;
367 		if (n > DEFAULTBCNT)
368 			n = DEFAULTBCNT;
369 		ah->scnt = n >> 9;
370 		if (ah->aflags & AOEAFL_WRITE) {
371 			skb_fill_page_desc(skb, 0, virt_to_page(f->bufaddr),
372 				offset_in_page(f->bufaddr), n);
373 			skb->len = sizeof *h + sizeof *ah + n;
374 			skb->data_len = n;
375 		}
376 	}
377 	skb->dev = t->ifp->nd;
378 	skb = skb_clone(skb, GFP_ATOMIC);
379 	if (skb == NULL)
380 		return;
381 	__skb_queue_tail(&d->sendq, skb);
382 }
383 
384 static int
385 tsince(int tag)
386 {
387 	int n;
388 
389 	n = jiffies & 0xffff;
390 	n -= tag & 0xffff;
391 	if (n < 0)
392 		n += 1<<16;
393 	return n;
394 }
395 
396 static struct aoeif *
397 getif(struct aoetgt *t, struct net_device *nd)
398 {
399 	struct aoeif *p, *e;
400 
401 	p = t->ifs;
402 	e = p + NAOEIFS;
403 	for (; p < e; p++)
404 		if (p->nd == nd)
405 			return p;
406 	return NULL;
407 }
408 
409 static struct aoeif *
410 addif(struct aoetgt *t, struct net_device *nd)
411 {
412 	struct aoeif *p;
413 
414 	p = getif(t, NULL);
415 	if (!p)
416 		return NULL;
417 	p->nd = nd;
418 	p->maxbcnt = DEFAULTBCNT;
419 	p->lost = 0;
420 	p->lostjumbo = 0;
421 	return p;
422 }
423 
424 static void
425 ejectif(struct aoetgt *t, struct aoeif *ifp)
426 {
427 	struct aoeif *e;
428 	ulong n;
429 
430 	e = t->ifs + NAOEIFS - 1;
431 	n = (e - ifp) * sizeof *ifp;
432 	memmove(ifp, ifp+1, n);
433 	e->nd = NULL;
434 }
435 
436 static int
437 sthtith(struct aoedev *d)
438 {
439 	struct frame *f, *e, *nf;
440 	struct sk_buff *skb;
441 	struct aoetgt *ht = *d->htgt;
442 
443 	f = ht->frames;
444 	e = f + ht->nframes;
445 	for (; f < e; f++) {
446 		if (f->tag == FREETAG)
447 			continue;
448 		nf = freeframe(d);
449 		if (!nf)
450 			return 0;
451 		skb = nf->skb;
452 		*nf = *f;
453 		f->skb = skb;
454 		f->tag = FREETAG;
455 		nf->waited = 0;
456 		ht->nout--;
457 		(*d->tgt)->nout++;
458 		resend(d, *d->tgt, nf);
459 	}
460 	/* he's clean, he's useless.  take away his interfaces */
461 	memset(ht->ifs, 0, sizeof ht->ifs);
462 	d->htgt = NULL;
463 	return 1;
464 }
465 
466 static inline unsigned char
467 ata_scnt(unsigned char *packet) {
468 	struct aoe_hdr *h;
469 	struct aoe_atahdr *ah;
470 
471 	h = (struct aoe_hdr *) packet;
472 	ah = (struct aoe_atahdr *) (h+1);
473 	return ah->scnt;
474 }
475 
476 static void
477 rexmit_timer(ulong vp)
478 {
479 	struct sk_buff_head queue;
480 	struct aoedev *d;
481 	struct aoetgt *t, **tt, **te;
482 	struct aoeif *ifp;
483 	struct frame *f, *e;
484 	register long timeout;
485 	ulong flags, n;
486 
487 	d = (struct aoedev *) vp;
488 
489 	/* timeout is always ~150% of the moving average */
490 	timeout = d->rttavg;
491 	timeout += timeout >> 1;
492 
493 	spin_lock_irqsave(&d->lock, flags);
494 
495 	if (d->flags & DEVFL_TKILL) {
496 		spin_unlock_irqrestore(&d->lock, flags);
497 		return;
498 	}
499 	tt = d->targets;
500 	te = tt + NTARGETS;
501 	for (; tt < te && *tt; tt++) {
502 		t = *tt;
503 		f = t->frames;
504 		e = f + t->nframes;
505 		for (; f < e; f++) {
506 			if (f->tag == FREETAG
507 			|| tsince(f->tag) < timeout)
508 				continue;
509 			n = f->waited += timeout;
510 			n /= HZ;
511 			if (n > aoe_deadsecs) {
512 				/* waited too long.  device failure. */
513 				aoedev_downdev(d);
514 				break;
515 			}
516 
517 			if (n > HELPWAIT /* see if another target can help */
518 			&& (tt != d->targets || d->targets[1]))
519 				d->htgt = tt;
520 
521 			if (t->nout == t->maxout) {
522 				if (t->maxout > 1)
523 					t->maxout--;
524 				t->lastwadj = jiffies;
525 			}
526 
527 			ifp = getif(t, f->skb->dev);
528 			if (ifp && ++ifp->lost > (t->nframes << 1)
529 			&& (ifp != t->ifs || t->ifs[1].nd)) {
530 				ejectif(t, ifp);
531 				ifp = NULL;
532 			}
533 
534 			if (ata_scnt(skb_mac_header(f->skb)) > DEFAULTBCNT / 512
535 			&& ifp && ++ifp->lostjumbo > (t->nframes << 1)
536 			&& ifp->maxbcnt != DEFAULTBCNT) {
537 				printk(KERN_INFO
538 					"aoe: e%ld.%d: "
539 					"too many lost jumbo on "
540 					"%s:%pm - "
541 					"falling back to %d frames.\n",
542 					d->aoemajor, d->aoeminor,
543 					ifp->nd->name, t->addr,
544 					DEFAULTBCNT);
545 				ifp->maxbcnt = 0;
546 			}
547 			resend(d, t, f);
548 		}
549 
550 		/* window check */
551 		if (t->nout == t->maxout
552 		&& t->maxout < t->nframes
553 		&& (jiffies - t->lastwadj)/HZ > 10) {
554 			t->maxout++;
555 			t->lastwadj = jiffies;
556 		}
557 	}
558 
559 	if (!skb_queue_empty(&d->sendq)) {
560 		n = d->rttavg <<= 1;
561 		if (n > MAXTIMER)
562 			d->rttavg = MAXTIMER;
563 	}
564 
565 	if (d->flags & DEVFL_KICKME || d->htgt) {
566 		d->flags &= ~DEVFL_KICKME;
567 		aoecmd_work(d);
568 	}
569 
570 	__skb_queue_head_init(&queue);
571 	skb_queue_splice_init(&d->sendq, &queue);
572 
573 	d->timer.expires = jiffies + TIMERTICK;
574 	add_timer(&d->timer);
575 
576 	spin_unlock_irqrestore(&d->lock, flags);
577 
578 	aoenet_xmit(&queue);
579 }
580 
581 /* enters with d->lock held */
582 void
583 aoecmd_work(struct aoedev *d)
584 {
585 	struct buf *buf;
586 loop:
587 	if (d->htgt && !sthtith(d))
588 		return;
589 	if (d->inprocess == NULL) {
590 		if (list_empty(&d->bufq))
591 			return;
592 		buf = container_of(d->bufq.next, struct buf, bufs);
593 		list_del(d->bufq.next);
594 		d->inprocess = buf;
595 	}
596 	if (aoecmd_ata_rw(d))
597 		goto loop;
598 }
599 
600 /* this function performs work that has been deferred until sleeping is OK
601  */
602 void
603 aoecmd_sleepwork(struct work_struct *work)
604 {
605 	struct aoedev *d = container_of(work, struct aoedev, work);
606 
607 	if (d->flags & DEVFL_GDALLOC)
608 		aoeblk_gdalloc(d);
609 
610 	if (d->flags & DEVFL_NEWSIZE) {
611 		struct block_device *bd;
612 		unsigned long flags;
613 		u64 ssize;
614 
615 		ssize = get_capacity(d->gd);
616 		bd = bdget_disk(d->gd, 0);
617 
618 		if (bd) {
619 			mutex_lock(&bd->bd_inode->i_mutex);
620 			i_size_write(bd->bd_inode, (loff_t)ssize<<9);
621 			mutex_unlock(&bd->bd_inode->i_mutex);
622 			bdput(bd);
623 		}
624 		spin_lock_irqsave(&d->lock, flags);
625 		d->flags |= DEVFL_UP;
626 		d->flags &= ~DEVFL_NEWSIZE;
627 		spin_unlock_irqrestore(&d->lock, flags);
628 	}
629 }
630 
631 static void
632 ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id)
633 {
634 	u64 ssize;
635 	u16 n;
636 
637 	/* word 83: command set supported */
638 	n = get_unaligned_le16(&id[83 << 1]);
639 
640 	/* word 86: command set/feature enabled */
641 	n |= get_unaligned_le16(&id[86 << 1]);
642 
643 	if (n & (1<<10)) {	/* bit 10: LBA 48 */
644 		d->flags |= DEVFL_EXT;
645 
646 		/* word 100: number lba48 sectors */
647 		ssize = get_unaligned_le64(&id[100 << 1]);
648 
649 		/* set as in ide-disk.c:init_idedisk_capacity */
650 		d->geo.cylinders = ssize;
651 		d->geo.cylinders /= (255 * 63);
652 		d->geo.heads = 255;
653 		d->geo.sectors = 63;
654 	} else {
655 		d->flags &= ~DEVFL_EXT;
656 
657 		/* number lba28 sectors */
658 		ssize = get_unaligned_le32(&id[60 << 1]);
659 
660 		/* NOTE: obsolete in ATA 6 */
661 		d->geo.cylinders = get_unaligned_le16(&id[54 << 1]);
662 		d->geo.heads = get_unaligned_le16(&id[55 << 1]);
663 		d->geo.sectors = get_unaligned_le16(&id[56 << 1]);
664 	}
665 
666 	if (d->ssize != ssize)
667 		printk(KERN_INFO
668 			"aoe: %pm e%ld.%d v%04x has %llu sectors\n",
669 			t->addr,
670 			d->aoemajor, d->aoeminor,
671 			d->fw_ver, (long long)ssize);
672 	d->ssize = ssize;
673 	d->geo.start = 0;
674 	if (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE))
675 		return;
676 	if (d->gd != NULL) {
677 		set_capacity(d->gd, ssize);
678 		d->flags |= DEVFL_NEWSIZE;
679 	} else
680 		d->flags |= DEVFL_GDALLOC;
681 	schedule_work(&d->work);
682 }
683 
684 static void
685 calc_rttavg(struct aoedev *d, int rtt)
686 {
687 	register long n;
688 
689 	n = rtt;
690 	if (n < 0) {
691 		n = -rtt;
692 		if (n < MINTIMER)
693 			n = MINTIMER;
694 		else if (n > MAXTIMER)
695 			n = MAXTIMER;
696 		d->mintimer += (n - d->mintimer) >> 1;
697 	} else if (n < d->mintimer)
698 		n = d->mintimer;
699 	else if (n > MAXTIMER)
700 		n = MAXTIMER;
701 
702 	/* g == .25; cf. Congestion Avoidance and Control, Jacobson & Karels; 1988 */
703 	n -= d->rttavg;
704 	d->rttavg += n >> 2;
705 }
706 
707 static struct aoetgt *
708 gettgt(struct aoedev *d, char *addr)
709 {
710 	struct aoetgt **t, **e;
711 
712 	t = d->targets;
713 	e = t + NTARGETS;
714 	for (; t < e && *t; t++)
715 		if (memcmp((*t)->addr, addr, sizeof((*t)->addr)) == 0)
716 			return *t;
717 	return NULL;
718 }
719 
720 static inline void
721 diskstats(struct gendisk *disk, struct bio *bio, ulong duration, sector_t sector)
722 {
723 	unsigned long n_sect = bio->bi_size >> 9;
724 	const int rw = bio_data_dir(bio);
725 	struct hd_struct *part;
726 	int cpu;
727 
728 	cpu = part_stat_lock();
729 	part = disk_map_sector_rcu(disk, sector);
730 
731 	part_stat_inc(cpu, part, ios[rw]);
732 	part_stat_add(cpu, part, ticks[rw], duration);
733 	part_stat_add(cpu, part, sectors[rw], n_sect);
734 	part_stat_add(cpu, part, io_ticks, duration);
735 
736 	part_stat_unlock();
737 }
738 
739 void
740 aoecmd_ata_rsp(struct sk_buff *skb)
741 {
742 	struct sk_buff_head queue;
743 	struct aoedev *d;
744 	struct aoe_hdr *hin, *hout;
745 	struct aoe_atahdr *ahin, *ahout;
746 	struct frame *f;
747 	struct buf *buf;
748 	struct aoetgt *t;
749 	struct aoeif *ifp;
750 	register long n;
751 	ulong flags;
752 	char ebuf[128];
753 	u16 aoemajor;
754 
755 	hin = (struct aoe_hdr *) skb_mac_header(skb);
756 	aoemajor = get_unaligned_be16(&hin->major);
757 	d = aoedev_by_aoeaddr(aoemajor, hin->minor);
758 	if (d == NULL) {
759 		snprintf(ebuf, sizeof ebuf, "aoecmd_ata_rsp: ata response "
760 			"for unknown device %d.%d\n",
761 			 aoemajor, hin->minor);
762 		aoechr_error(ebuf);
763 		return;
764 	}
765 
766 	spin_lock_irqsave(&d->lock, flags);
767 
768 	n = get_unaligned_be32(&hin->tag);
769 	t = gettgt(d, hin->src);
770 	if (t == NULL) {
771 		printk(KERN_INFO "aoe: can't find target e%ld.%d:%pm\n",
772 			d->aoemajor, d->aoeminor, hin->src);
773 		spin_unlock_irqrestore(&d->lock, flags);
774 		return;
775 	}
776 	f = getframe(t, n);
777 	if (f == NULL) {
778 		calc_rttavg(d, -tsince(n));
779 		spin_unlock_irqrestore(&d->lock, flags);
780 		snprintf(ebuf, sizeof ebuf,
781 			"%15s e%d.%d    tag=%08x@%08lx\n",
782 			"unexpected rsp",
783 			get_unaligned_be16(&hin->major),
784 			hin->minor,
785 			get_unaligned_be32(&hin->tag),
786 			jiffies);
787 		aoechr_error(ebuf);
788 		return;
789 	}
790 
791 	calc_rttavg(d, tsince(f->tag));
792 
793 	ahin = (struct aoe_atahdr *) (hin+1);
794 	hout = (struct aoe_hdr *) skb_mac_header(f->skb);
795 	ahout = (struct aoe_atahdr *) (hout+1);
796 	buf = f->buf;
797 
798 	if (ahin->cmdstat & 0xa9) {	/* these bits cleared on success */
799 		printk(KERN_ERR
800 			"aoe: ata error cmd=%2.2Xh stat=%2.2Xh from e%ld.%d\n",
801 			ahout->cmdstat, ahin->cmdstat,
802 			d->aoemajor, d->aoeminor);
803 		if (buf)
804 			buf->flags |= BUFFL_FAIL;
805 	} else {
806 		if (d->htgt && t == *d->htgt) /* I'll help myself, thank you. */
807 			d->htgt = NULL;
808 		n = ahout->scnt << 9;
809 		switch (ahout->cmdstat) {
810 		case ATA_CMD_PIO_READ:
811 		case ATA_CMD_PIO_READ_EXT:
812 			if (skb->len - sizeof *hin - sizeof *ahin < n) {
813 				printk(KERN_ERR
814 					"aoe: %s.  skb->len=%d need=%ld\n",
815 					"runt data size in read", skb->len, n);
816 				/* fail frame f?  just returning will rexmit. */
817 				spin_unlock_irqrestore(&d->lock, flags);
818 				return;
819 			}
820 			memcpy(f->bufaddr, ahin+1, n);
821 		case ATA_CMD_PIO_WRITE:
822 		case ATA_CMD_PIO_WRITE_EXT:
823 			ifp = getif(t, skb->dev);
824 			if (ifp) {
825 				ifp->lost = 0;
826 				if (n > DEFAULTBCNT)
827 					ifp->lostjumbo = 0;
828 			}
829 			if (f->bcnt -= n) {
830 				f->lba += n >> 9;
831 				f->bufaddr += n;
832 				resend(d, t, f);
833 				goto xmit;
834 			}
835 			break;
836 		case ATA_CMD_ID_ATA:
837 			if (skb->len - sizeof *hin - sizeof *ahin < 512) {
838 				printk(KERN_INFO
839 					"aoe: runt data size in ataid.  skb->len=%d\n",
840 					skb->len);
841 				spin_unlock_irqrestore(&d->lock, flags);
842 				return;
843 			}
844 			ataid_complete(d, t, (char *) (ahin+1));
845 			break;
846 		default:
847 			printk(KERN_INFO
848 				"aoe: unrecognized ata command %2.2Xh for %d.%d\n",
849 				ahout->cmdstat,
850 				get_unaligned_be16(&hin->major),
851 				hin->minor);
852 		}
853 	}
854 
855 	if (buf && --buf->nframesout == 0 && buf->resid == 0) {
856 		diskstats(d->gd, buf->bio, jiffies - buf->stime, buf->sector);
857 		if (buf->flags & BUFFL_FAIL)
858 			bio_endio(buf->bio, -EIO);
859 		else {
860 			bio_flush_dcache_pages(buf->bio);
861 			bio_endio(buf->bio, 0);
862 		}
863 		mempool_free(buf, d->bufpool);
864 	}
865 
866 	f->buf = NULL;
867 	f->tag = FREETAG;
868 	t->nout--;
869 
870 	aoecmd_work(d);
871 xmit:
872 	__skb_queue_head_init(&queue);
873 	skb_queue_splice_init(&d->sendq, &queue);
874 
875 	spin_unlock_irqrestore(&d->lock, flags);
876 	aoenet_xmit(&queue);
877 }
878 
879 void
880 aoecmd_cfg(ushort aoemajor, unsigned char aoeminor)
881 {
882 	struct sk_buff_head queue;
883 
884 	__skb_queue_head_init(&queue);
885 	aoecmd_cfg_pkts(aoemajor, aoeminor, &queue);
886 	aoenet_xmit(&queue);
887 }
888 
889 struct sk_buff *
890 aoecmd_ata_id(struct aoedev *d)
891 {
892 	struct aoe_hdr *h;
893 	struct aoe_atahdr *ah;
894 	struct frame *f;
895 	struct sk_buff *skb;
896 	struct aoetgt *t;
897 
898 	f = freeframe(d);
899 	if (f == NULL)
900 		return NULL;
901 
902 	t = *d->tgt;
903 
904 	/* initialize the headers & frame */
905 	skb = f->skb;
906 	h = (struct aoe_hdr *) skb_mac_header(skb);
907 	ah = (struct aoe_atahdr *) (h+1);
908 	skb_put(skb, sizeof *h + sizeof *ah);
909 	memset(h, 0, skb->len);
910 	f->tag = aoehdr_atainit(d, t, h);
911 	t->nout++;
912 	f->waited = 0;
913 
914 	/* set up ata header */
915 	ah->scnt = 1;
916 	ah->cmdstat = ATA_CMD_ID_ATA;
917 	ah->lba3 = 0xa0;
918 
919 	skb->dev = t->ifp->nd;
920 
921 	d->rttavg = MAXTIMER;
922 	d->timer.function = rexmit_timer;
923 
924 	return skb_clone(skb, GFP_ATOMIC);
925 }
926 
927 static struct aoetgt *
928 addtgt(struct aoedev *d, char *addr, ulong nframes)
929 {
930 	struct aoetgt *t, **tt, **te;
931 	struct frame *f, *e;
932 
933 	tt = d->targets;
934 	te = tt + NTARGETS;
935 	for (; tt < te && *tt; tt++)
936 		;
937 
938 	if (tt == te) {
939 		printk(KERN_INFO
940 			"aoe: device addtgt failure; too many targets\n");
941 		return NULL;
942 	}
943 	t = kcalloc(1, sizeof *t, GFP_ATOMIC);
944 	f = kcalloc(nframes, sizeof *f, GFP_ATOMIC);
945 	if (!t || !f) {
946 		kfree(f);
947 		kfree(t);
948 		printk(KERN_INFO "aoe: cannot allocate memory to add target\n");
949 		return NULL;
950 	}
951 
952 	t->nframes = nframes;
953 	t->frames = f;
954 	e = f + nframes;
955 	for (; f < e; f++)
956 		f->tag = FREETAG;
957 	memcpy(t->addr, addr, sizeof t->addr);
958 	t->ifp = t->ifs;
959 	t->maxout = t->nframes;
960 	return *tt = t;
961 }
962 
963 void
964 aoecmd_cfg_rsp(struct sk_buff *skb)
965 {
966 	struct aoedev *d;
967 	struct aoe_hdr *h;
968 	struct aoe_cfghdr *ch;
969 	struct aoetgt *t;
970 	struct aoeif *ifp;
971 	ulong flags, sysminor, aoemajor;
972 	struct sk_buff *sl;
973 	u16 n;
974 
975 	h = (struct aoe_hdr *) skb_mac_header(skb);
976 	ch = (struct aoe_cfghdr *) (h+1);
977 
978 	/*
979 	 * Enough people have their dip switches set backwards to
980 	 * warrant a loud message for this special case.
981 	 */
982 	aoemajor = get_unaligned_be16(&h->major);
983 	if (aoemajor == 0xfff) {
984 		printk(KERN_ERR "aoe: Warning: shelf address is all ones.  "
985 			"Check shelf dip switches.\n");
986 		return;
987 	}
988 
989 	sysminor = SYSMINOR(aoemajor, h->minor);
990 	if (sysminor * AOE_PARTITIONS + AOE_PARTITIONS > MINORMASK) {
991 		printk(KERN_INFO "aoe: e%ld.%d: minor number too large\n",
992 			aoemajor, (int) h->minor);
993 		return;
994 	}
995 
996 	n = be16_to_cpu(ch->bufcnt);
997 	if (n > aoe_maxout)	/* keep it reasonable */
998 		n = aoe_maxout;
999 
1000 	d = aoedev_by_sysminor_m(sysminor);
1001 	if (d == NULL) {
1002 		printk(KERN_INFO "aoe: device sysminor_m failure\n");
1003 		return;
1004 	}
1005 
1006 	spin_lock_irqsave(&d->lock, flags);
1007 
1008 	t = gettgt(d, h->src);
1009 	if (!t) {
1010 		t = addtgt(d, h->src, n);
1011 		if (!t) {
1012 			spin_unlock_irqrestore(&d->lock, flags);
1013 			return;
1014 		}
1015 	}
1016 	ifp = getif(t, skb->dev);
1017 	if (!ifp) {
1018 		ifp = addif(t, skb->dev);
1019 		if (!ifp) {
1020 			printk(KERN_INFO
1021 				"aoe: device addif failure; "
1022 				"too many interfaces?\n");
1023 			spin_unlock_irqrestore(&d->lock, flags);
1024 			return;
1025 		}
1026 	}
1027 	if (ifp->maxbcnt) {
1028 		n = ifp->nd->mtu;
1029 		n -= sizeof (struct aoe_hdr) + sizeof (struct aoe_atahdr);
1030 		n /= 512;
1031 		if (n > ch->scnt)
1032 			n = ch->scnt;
1033 		n = n ? n * 512 : DEFAULTBCNT;
1034 		if (n != ifp->maxbcnt) {
1035 			printk(KERN_INFO
1036 				"aoe: e%ld.%d: setting %d%s%s:%pm\n",
1037 				d->aoemajor, d->aoeminor, n,
1038 				" byte data frames on ", ifp->nd->name,
1039 				t->addr);
1040 			ifp->maxbcnt = n;
1041 		}
1042 	}
1043 
1044 	/* don't change users' perspective */
1045 	if (d->nopen) {
1046 		spin_unlock_irqrestore(&d->lock, flags);
1047 		return;
1048 	}
1049 	d->fw_ver = be16_to_cpu(ch->fwver);
1050 
1051 	sl = aoecmd_ata_id(d);
1052 
1053 	spin_unlock_irqrestore(&d->lock, flags);
1054 
1055 	if (sl) {
1056 		struct sk_buff_head queue;
1057 		__skb_queue_head_init(&queue);
1058 		__skb_queue_tail(&queue, sl);
1059 		aoenet_xmit(&queue);
1060 	}
1061 }
1062 
1063 void
1064 aoecmd_cleanslate(struct aoedev *d)
1065 {
1066 	struct aoetgt **t, **te;
1067 	struct aoeif *p, *e;
1068 
1069 	d->mintimer = MINTIMER;
1070 
1071 	t = d->targets;
1072 	te = t + NTARGETS;
1073 	for (; t < te && *t; t++) {
1074 		(*t)->maxout = (*t)->nframes;
1075 		p = (*t)->ifs;
1076 		e = p + NAOEIFS;
1077 		for (; p < e; p++) {
1078 			p->lostjumbo = 0;
1079 			p->lost = 0;
1080 			p->maxbcnt = DEFAULTBCNT;
1081 		}
1082 	}
1083 }
1084