xref: /openbmc/linux/drivers/block/aoe/aoecmd.c (revision a1e58bbd)
1 /* Copyright (c) 2007 Coraid, Inc.  See COPYING for GPL terms. */
2 /*
3  * aoecmd.c
4  * Filesystem request handling methods
5  */
6 
7 #include <linux/hdreg.h>
8 #include <linux/blkdev.h>
9 #include <linux/skbuff.h>
10 #include <linux/netdevice.h>
11 #include <linux/genhd.h>
12 #include <linux/moduleparam.h>
13 #include <net/net_namespace.h>
14 #include <asm/unaligned.h>
15 #include "aoe.h"
16 
17 static int aoe_deadsecs = 60 * 3;
18 module_param(aoe_deadsecs, int, 0644);
19 MODULE_PARM_DESC(aoe_deadsecs, "After aoe_deadsecs seconds, give up and fail dev.");
20 
21 static int aoe_maxout = 16;
22 module_param(aoe_maxout, int, 0644);
23 MODULE_PARM_DESC(aoe_maxout,
24 	"Only aoe_maxout outstanding packets for every MAC on eX.Y.");
25 
26 static struct sk_buff *
27 new_skb(ulong len)
28 {
29 	struct sk_buff *skb;
30 
31 	skb = alloc_skb(len, GFP_ATOMIC);
32 	if (skb) {
33 		skb_reset_mac_header(skb);
34 		skb_reset_network_header(skb);
35 		skb->protocol = __constant_htons(ETH_P_AOE);
36 		skb->priority = 0;
37 		skb->next = skb->prev = NULL;
38 
39 		/* tell the network layer not to perform IP checksums
40 		 * or to get the NIC to do it
41 		 */
42 		skb->ip_summed = CHECKSUM_NONE;
43 	}
44 	return skb;
45 }
46 
47 static struct frame *
48 getframe(struct aoetgt *t, int tag)
49 {
50 	struct frame *f, *e;
51 
52 	f = t->frames;
53 	e = f + t->nframes;
54 	for (; f<e; f++)
55 		if (f->tag == tag)
56 			return f;
57 	return NULL;
58 }
59 
60 /*
61  * Leave the top bit clear so we have tagspace for userland.
62  * The bottom 16 bits are the xmit tick for rexmit/rttavg processing.
63  * This driver reserves tag -1 to mean "unused frame."
64  */
65 static int
66 newtag(struct aoetgt *t)
67 {
68 	register ulong n;
69 
70 	n = jiffies & 0xffff;
71 	return n |= (++t->lasttag & 0x7fff) << 16;
72 }
73 
74 static int
75 aoehdr_atainit(struct aoedev *d, struct aoetgt *t, struct aoe_hdr *h)
76 {
77 	u32 host_tag = newtag(t);
78 
79 	memcpy(h->src, t->ifp->nd->dev_addr, sizeof h->src);
80 	memcpy(h->dst, t->addr, sizeof h->dst);
81 	h->type = __constant_cpu_to_be16(ETH_P_AOE);
82 	h->verfl = AOE_HVER;
83 	h->major = cpu_to_be16(d->aoemajor);
84 	h->minor = d->aoeminor;
85 	h->cmd = AOECMD_ATA;
86 	h->tag = cpu_to_be32(host_tag);
87 
88 	return host_tag;
89 }
90 
91 static inline void
92 put_lba(struct aoe_atahdr *ah, sector_t lba)
93 {
94 	ah->lba0 = lba;
95 	ah->lba1 = lba >>= 8;
96 	ah->lba2 = lba >>= 8;
97 	ah->lba3 = lba >>= 8;
98 	ah->lba4 = lba >>= 8;
99 	ah->lba5 = lba >>= 8;
100 }
101 
102 static void
103 ifrotate(struct aoetgt *t)
104 {
105 	t->ifp++;
106 	if (t->ifp >= &t->ifs[NAOEIFS] || t->ifp->nd == NULL)
107 		t->ifp = t->ifs;
108 	if (t->ifp->nd == NULL) {
109 		printk(KERN_INFO "aoe: no interface to rotate to\n");
110 		BUG();
111 	}
112 }
113 
114 static void
115 skb_pool_put(struct aoedev *d, struct sk_buff *skb)
116 {
117 	if (!d->skbpool_hd)
118 		d->skbpool_hd = skb;
119 	else
120 		d->skbpool_tl->next = skb;
121 	d->skbpool_tl = skb;
122 }
123 
124 static struct sk_buff *
125 skb_pool_get(struct aoedev *d)
126 {
127 	struct sk_buff *skb;
128 
129 	skb = d->skbpool_hd;
130 	if (skb && atomic_read(&skb_shinfo(skb)->dataref) == 1) {
131 		d->skbpool_hd = skb->next;
132 		skb->next = NULL;
133 		return skb;
134 	}
135 	if (d->nskbpool < NSKBPOOLMAX
136 	&& (skb = new_skb(ETH_ZLEN))) {
137 		d->nskbpool++;
138 		return skb;
139 	}
140 	return NULL;
141 }
142 
143 /* freeframe is where we do our load balancing so it's a little hairy. */
144 static struct frame *
145 freeframe(struct aoedev *d)
146 {
147 	struct frame *f, *e, *rf;
148 	struct aoetgt **t;
149 	struct sk_buff *skb;
150 
151 	if (d->targets[0] == NULL) {	/* shouldn't happen, but I'm paranoid */
152 		printk(KERN_ERR "aoe: NULL TARGETS!\n");
153 		return NULL;
154 	}
155 	t = d->tgt;
156 	t++;
157 	if (t >= &d->targets[NTARGETS] || !*t)
158 		t = d->targets;
159 	for (;;) {
160 		if ((*t)->nout < (*t)->maxout
161 		&& t != d->htgt
162 		&& (*t)->ifp->nd) {
163 			rf = NULL;
164 			f = (*t)->frames;
165 			e = f + (*t)->nframes;
166 			for (; f < e; f++) {
167 				if (f->tag != FREETAG)
168 					continue;
169 				skb = f->skb;
170 				if (!skb
171 				&& !(f->skb = skb = new_skb(ETH_ZLEN)))
172 					continue;
173 				if (atomic_read(&skb_shinfo(skb)->dataref)
174 					!= 1) {
175 					if (!rf)
176 						rf = f;
177 					continue;
178 				}
179 gotone:				skb_shinfo(skb)->nr_frags = skb->data_len = 0;
180 				skb_trim(skb, 0);
181 				d->tgt = t;
182 				ifrotate(*t);
183 				return f;
184 			}
185 			/* Work can be done, but the network layer is
186 			   holding our precious packets.  Try to grab
187 			   one from the pool. */
188 			f = rf;
189 			if (f == NULL) {	/* more paranoia */
190 				printk(KERN_ERR
191 					"aoe: freeframe: %s.\n",
192 					"unexpected null rf");
193 				d->flags |= DEVFL_KICKME;
194 				return NULL;
195 			}
196 			skb = skb_pool_get(d);
197 			if (skb) {
198 				skb_pool_put(d, f->skb);
199 				f->skb = skb;
200 				goto gotone;
201 			}
202 			(*t)->dataref++;
203 			if ((*t)->nout == 0)
204 				d->flags |= DEVFL_KICKME;
205 		}
206 		if (t == d->tgt)	/* we've looped and found nada */
207 			break;
208 		t++;
209 		if (t >= &d->targets[NTARGETS] || !*t)
210 			t = d->targets;
211 	}
212 	return NULL;
213 }
214 
215 static int
216 aoecmd_ata_rw(struct aoedev *d)
217 {
218 	struct frame *f;
219 	struct aoe_hdr *h;
220 	struct aoe_atahdr *ah;
221 	struct buf *buf;
222 	struct bio_vec *bv;
223 	struct aoetgt *t;
224 	struct sk_buff *skb;
225 	ulong bcnt;
226 	char writebit, extbit;
227 
228 	writebit = 0x10;
229 	extbit = 0x4;
230 
231 	f = freeframe(d);
232 	if (f == NULL)
233 		return 0;
234 	t = *d->tgt;
235 	buf = d->inprocess;
236 	bv = buf->bv;
237 	bcnt = t->ifp->maxbcnt;
238 	if (bcnt == 0)
239 		bcnt = DEFAULTBCNT;
240 	if (bcnt > buf->bv_resid)
241 		bcnt = buf->bv_resid;
242 	/* initialize the headers & frame */
243 	skb = f->skb;
244 	h = (struct aoe_hdr *) skb_mac_header(skb);
245 	ah = (struct aoe_atahdr *) (h+1);
246 	skb_put(skb, sizeof *h + sizeof *ah);
247 	memset(h, 0, skb->len);
248 	f->tag = aoehdr_atainit(d, t, h);
249 	t->nout++;
250 	f->waited = 0;
251 	f->buf = buf;
252 	f->bufaddr = page_address(bv->bv_page) + buf->bv_off;
253 	f->bcnt = bcnt;
254 	f->lba = buf->sector;
255 
256 	/* set up ata header */
257 	ah->scnt = bcnt >> 9;
258 	put_lba(ah, buf->sector);
259 	if (d->flags & DEVFL_EXT) {
260 		ah->aflags |= AOEAFL_EXT;
261 	} else {
262 		extbit = 0;
263 		ah->lba3 &= 0x0f;
264 		ah->lba3 |= 0xe0;	/* LBA bit + obsolete 0xa0 */
265 	}
266 	if (bio_data_dir(buf->bio) == WRITE) {
267 		skb_fill_page_desc(skb, 0, bv->bv_page, buf->bv_off, bcnt);
268 		ah->aflags |= AOEAFL_WRITE;
269 		skb->len += bcnt;
270 		skb->data_len = bcnt;
271 		t->wpkts++;
272 	} else {
273 		t->rpkts++;
274 		writebit = 0;
275 	}
276 
277 	ah->cmdstat = WIN_READ | writebit | extbit;
278 
279 	/* mark all tracking fields and load out */
280 	buf->nframesout += 1;
281 	buf->bv_off += bcnt;
282 	buf->bv_resid -= bcnt;
283 	buf->resid -= bcnt;
284 	buf->sector += bcnt >> 9;
285 	if (buf->resid == 0) {
286 		d->inprocess = NULL;
287 	} else if (buf->bv_resid == 0) {
288 		buf->bv = ++bv;
289 		buf->bv_resid = bv->bv_len;
290 		WARN_ON(buf->bv_resid == 0);
291 		buf->bv_off = bv->bv_offset;
292 	}
293 
294 	skb->dev = t->ifp->nd;
295 	skb = skb_clone(skb, GFP_ATOMIC);
296 	if (skb) {
297 		if (d->sendq_hd)
298 			d->sendq_tl->next = skb;
299 		else
300 			d->sendq_hd = skb;
301 		d->sendq_tl = skb;
302 	}
303 	return 1;
304 }
305 
306 /* some callers cannot sleep, and they can call this function,
307  * transmitting the packets later, when interrupts are on
308  */
309 static struct sk_buff *
310 aoecmd_cfg_pkts(ushort aoemajor, unsigned char aoeminor, struct sk_buff **tail)
311 {
312 	struct aoe_hdr *h;
313 	struct aoe_cfghdr *ch;
314 	struct sk_buff *skb, *sl, *sl_tail;
315 	struct net_device *ifp;
316 
317 	sl = sl_tail = NULL;
318 
319 	read_lock(&dev_base_lock);
320 	for_each_netdev(&init_net, ifp) {
321 		dev_hold(ifp);
322 		if (!is_aoe_netif(ifp))
323 			goto cont;
324 
325 		skb = new_skb(sizeof *h + sizeof *ch);
326 		if (skb == NULL) {
327 			printk(KERN_INFO "aoe: skb alloc failure\n");
328 			goto cont;
329 		}
330 		skb_put(skb, sizeof *h + sizeof *ch);
331 		skb->dev = ifp;
332 		if (sl_tail == NULL)
333 			sl_tail = skb;
334 		h = (struct aoe_hdr *) skb_mac_header(skb);
335 		memset(h, 0, sizeof *h + sizeof *ch);
336 
337 		memset(h->dst, 0xff, sizeof h->dst);
338 		memcpy(h->src, ifp->dev_addr, sizeof h->src);
339 		h->type = __constant_cpu_to_be16(ETH_P_AOE);
340 		h->verfl = AOE_HVER;
341 		h->major = cpu_to_be16(aoemajor);
342 		h->minor = aoeminor;
343 		h->cmd = AOECMD_CFG;
344 
345 		skb->next = sl;
346 		sl = skb;
347 cont:
348 		dev_put(ifp);
349 	}
350 	read_unlock(&dev_base_lock);
351 
352 	if (tail != NULL)
353 		*tail = sl_tail;
354 	return sl;
355 }
356 
357 static void
358 resend(struct aoedev *d, struct aoetgt *t, struct frame *f)
359 {
360 	struct sk_buff *skb;
361 	struct aoe_hdr *h;
362 	struct aoe_atahdr *ah;
363 	char buf[128];
364 	u32 n;
365 
366 	ifrotate(t);
367 	n = newtag(t);
368 	skb = f->skb;
369 	h = (struct aoe_hdr *) skb_mac_header(skb);
370 	ah = (struct aoe_atahdr *) (h+1);
371 
372 	snprintf(buf, sizeof buf,
373 		"%15s e%ld.%d oldtag=%08x@%08lx newtag=%08x "
374 		"s=%012llx d=%012llx nout=%d\n",
375 		"retransmit", d->aoemajor, d->aoeminor, f->tag, jiffies, n,
376 		mac_addr(h->src),
377 		mac_addr(h->dst), t->nout);
378 	aoechr_error(buf);
379 
380 	f->tag = n;
381 	h->tag = cpu_to_be32(n);
382 	memcpy(h->dst, t->addr, sizeof h->dst);
383 	memcpy(h->src, t->ifp->nd->dev_addr, sizeof h->src);
384 
385 	switch (ah->cmdstat) {
386 	default:
387 		break;
388 	case WIN_READ:
389 	case WIN_READ_EXT:
390 	case WIN_WRITE:
391 	case WIN_WRITE_EXT:
392 		put_lba(ah, f->lba);
393 
394 		n = f->bcnt;
395 		if (n > DEFAULTBCNT)
396 			n = DEFAULTBCNT;
397 		ah->scnt = n >> 9;
398 		if (ah->aflags & AOEAFL_WRITE) {
399 			skb_fill_page_desc(skb, 0, virt_to_page(f->bufaddr),
400 				offset_in_page(f->bufaddr), n);
401 			skb->len = sizeof *h + sizeof *ah + n;
402 			skb->data_len = n;
403 		}
404 	}
405 	skb->dev = t->ifp->nd;
406 	skb = skb_clone(skb, GFP_ATOMIC);
407 	if (skb == NULL)
408 		return;
409 	if (d->sendq_hd)
410 		d->sendq_tl->next = skb;
411 	else
412 		d->sendq_hd = skb;
413 	d->sendq_tl = skb;
414 }
415 
416 static int
417 tsince(int tag)
418 {
419 	int n;
420 
421 	n = jiffies & 0xffff;
422 	n -= tag & 0xffff;
423 	if (n < 0)
424 		n += 1<<16;
425 	return n;
426 }
427 
428 static struct aoeif *
429 getif(struct aoetgt *t, struct net_device *nd)
430 {
431 	struct aoeif *p, *e;
432 
433 	p = t->ifs;
434 	e = p + NAOEIFS;
435 	for (; p < e; p++)
436 		if (p->nd == nd)
437 			return p;
438 	return NULL;
439 }
440 
441 static struct aoeif *
442 addif(struct aoetgt *t, struct net_device *nd)
443 {
444 	struct aoeif *p;
445 
446 	p = getif(t, NULL);
447 	if (!p)
448 		return NULL;
449 	p->nd = nd;
450 	p->maxbcnt = DEFAULTBCNT;
451 	p->lost = 0;
452 	p->lostjumbo = 0;
453 	return p;
454 }
455 
456 static void
457 ejectif(struct aoetgt *t, struct aoeif *ifp)
458 {
459 	struct aoeif *e;
460 	ulong n;
461 
462 	e = t->ifs + NAOEIFS - 1;
463 	n = (e - ifp) * sizeof *ifp;
464 	memmove(ifp, ifp+1, n);
465 	e->nd = NULL;
466 }
467 
468 static int
469 sthtith(struct aoedev *d)
470 {
471 	struct frame *f, *e, *nf;
472 	struct sk_buff *skb;
473 	struct aoetgt *ht = *d->htgt;
474 
475 	f = ht->frames;
476 	e = f + ht->nframes;
477 	for (; f < e; f++) {
478 		if (f->tag == FREETAG)
479 			continue;
480 		nf = freeframe(d);
481 		if (!nf)
482 			return 0;
483 		skb = nf->skb;
484 		*nf = *f;
485 		f->skb = skb;
486 		f->tag = FREETAG;
487 		nf->waited = 0;
488 		ht->nout--;
489 		(*d->tgt)->nout++;
490 		resend(d, *d->tgt, nf);
491 	}
492 	/* he's clean, he's useless.  take away his interfaces */
493 	memset(ht->ifs, 0, sizeof ht->ifs);
494 	d->htgt = NULL;
495 	return 1;
496 }
497 
498 static inline unsigned char
499 ata_scnt(unsigned char *packet) {
500 	struct aoe_hdr *h;
501 	struct aoe_atahdr *ah;
502 
503 	h = (struct aoe_hdr *) packet;
504 	ah = (struct aoe_atahdr *) (h+1);
505 	return ah->scnt;
506 }
507 
508 static void
509 rexmit_timer(ulong vp)
510 {
511 	struct aoedev *d;
512 	struct aoetgt *t, **tt, **te;
513 	struct aoeif *ifp;
514 	struct frame *f, *e;
515 	struct sk_buff *sl;
516 	register long timeout;
517 	ulong flags, n;
518 
519 	d = (struct aoedev *) vp;
520 	sl = NULL;
521 
522 	/* timeout is always ~150% of the moving average */
523 	timeout = d->rttavg;
524 	timeout += timeout >> 1;
525 
526 	spin_lock_irqsave(&d->lock, flags);
527 
528 	if (d->flags & DEVFL_TKILL) {
529 		spin_unlock_irqrestore(&d->lock, flags);
530 		return;
531 	}
532 	tt = d->targets;
533 	te = tt + NTARGETS;
534 	for (; tt < te && *tt; tt++) {
535 		t = *tt;
536 		f = t->frames;
537 		e = f + t->nframes;
538 		for (; f < e; f++) {
539 			if (f->tag == FREETAG
540 			|| tsince(f->tag) < timeout)
541 				continue;
542 			n = f->waited += timeout;
543 			n /= HZ;
544 			if (n > aoe_deadsecs) {
545 				/* waited too long.  device failure. */
546 				aoedev_downdev(d);
547 				break;
548 			}
549 
550 			if (n > HELPWAIT /* see if another target can help */
551 			&& (tt != d->targets || d->targets[1]))
552 				d->htgt = tt;
553 
554 			if (t->nout == t->maxout) {
555 				if (t->maxout > 1)
556 					t->maxout--;
557 				t->lastwadj = jiffies;
558 			}
559 
560 			ifp = getif(t, f->skb->dev);
561 			if (ifp && ++ifp->lost > (t->nframes << 1)
562 			&& (ifp != t->ifs || t->ifs[1].nd)) {
563 				ejectif(t, ifp);
564 				ifp = NULL;
565 			}
566 
567 			if (ata_scnt(skb_mac_header(f->skb)) > DEFAULTBCNT / 512
568 			&& ifp && ++ifp->lostjumbo > (t->nframes << 1)
569 			&& ifp->maxbcnt != DEFAULTBCNT) {
570 				printk(KERN_INFO
571 					"aoe: e%ld.%d: "
572 					"too many lost jumbo on "
573 					"%s:%012llx - "
574 					"falling back to %d frames.\n",
575 					d->aoemajor, d->aoeminor,
576 					ifp->nd->name, mac_addr(t->addr),
577 					DEFAULTBCNT);
578 				ifp->maxbcnt = 0;
579 			}
580 			resend(d, t, f);
581 		}
582 
583 		/* window check */
584 		if (t->nout == t->maxout
585 		&& t->maxout < t->nframes
586 		&& (jiffies - t->lastwadj)/HZ > 10) {
587 			t->maxout++;
588 			t->lastwadj = jiffies;
589 		}
590 	}
591 
592 	if (d->sendq_hd) {
593 		n = d->rttavg <<= 1;
594 		if (n > MAXTIMER)
595 			d->rttavg = MAXTIMER;
596 	}
597 
598 	if (d->flags & DEVFL_KICKME || d->htgt) {
599 		d->flags &= ~DEVFL_KICKME;
600 		aoecmd_work(d);
601 	}
602 
603 	sl = d->sendq_hd;
604 	d->sendq_hd = d->sendq_tl = NULL;
605 
606 	d->timer.expires = jiffies + TIMERTICK;
607 	add_timer(&d->timer);
608 
609 	spin_unlock_irqrestore(&d->lock, flags);
610 
611 	aoenet_xmit(sl);
612 }
613 
614 /* enters with d->lock held */
615 void
616 aoecmd_work(struct aoedev *d)
617 {
618 	struct buf *buf;
619 loop:
620 	if (d->htgt && !sthtith(d))
621 		return;
622 	if (d->inprocess == NULL) {
623 		if (list_empty(&d->bufq))
624 			return;
625 		buf = container_of(d->bufq.next, struct buf, bufs);
626 		list_del(d->bufq.next);
627 		d->inprocess = buf;
628 	}
629 	if (aoecmd_ata_rw(d))
630 		goto loop;
631 }
632 
633 /* this function performs work that has been deferred until sleeping is OK
634  */
635 void
636 aoecmd_sleepwork(struct work_struct *work)
637 {
638 	struct aoedev *d = container_of(work, struct aoedev, work);
639 
640 	if (d->flags & DEVFL_GDALLOC)
641 		aoeblk_gdalloc(d);
642 
643 	if (d->flags & DEVFL_NEWSIZE) {
644 		struct block_device *bd;
645 		unsigned long flags;
646 		u64 ssize;
647 
648 		ssize = d->gd->capacity;
649 		bd = bdget_disk(d->gd, 0);
650 
651 		if (bd) {
652 			mutex_lock(&bd->bd_inode->i_mutex);
653 			i_size_write(bd->bd_inode, (loff_t)ssize<<9);
654 			mutex_unlock(&bd->bd_inode->i_mutex);
655 			bdput(bd);
656 		}
657 		spin_lock_irqsave(&d->lock, flags);
658 		d->flags |= DEVFL_UP;
659 		d->flags &= ~DEVFL_NEWSIZE;
660 		spin_unlock_irqrestore(&d->lock, flags);
661 	}
662 }
663 
664 static void
665 ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id)
666 {
667 	u64 ssize;
668 	u16 n;
669 
670 	/* word 83: command set supported */
671 	n = le16_to_cpu(get_unaligned((__le16 *) &id[83<<1]));
672 
673 	/* word 86: command set/feature enabled */
674 	n |= le16_to_cpu(get_unaligned((__le16 *) &id[86<<1]));
675 
676 	if (n & (1<<10)) {	/* bit 10: LBA 48 */
677 		d->flags |= DEVFL_EXT;
678 
679 		/* word 100: number lba48 sectors */
680 		ssize = le64_to_cpu(get_unaligned((__le64 *) &id[100<<1]));
681 
682 		/* set as in ide-disk.c:init_idedisk_capacity */
683 		d->geo.cylinders = ssize;
684 		d->geo.cylinders /= (255 * 63);
685 		d->geo.heads = 255;
686 		d->geo.sectors = 63;
687 	} else {
688 		d->flags &= ~DEVFL_EXT;
689 
690 		/* number lba28 sectors */
691 		ssize = le32_to_cpu(get_unaligned((__le32 *) &id[60<<1]));
692 
693 		/* NOTE: obsolete in ATA 6 */
694 		d->geo.cylinders = le16_to_cpu(get_unaligned((__le16 *) &id[54<<1]));
695 		d->geo.heads = le16_to_cpu(get_unaligned((__le16 *) &id[55<<1]));
696 		d->geo.sectors = le16_to_cpu(get_unaligned((__le16 *) &id[56<<1]));
697 	}
698 
699 	if (d->ssize != ssize)
700 		printk(KERN_INFO
701 			"aoe: %012llx e%ld.%d v%04x has %llu sectors\n",
702 			mac_addr(t->addr),
703 			d->aoemajor, d->aoeminor,
704 			d->fw_ver, (long long)ssize);
705 	d->ssize = ssize;
706 	d->geo.start = 0;
707 	if (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE))
708 		return;
709 	if (d->gd != NULL) {
710 		d->gd->capacity = ssize;
711 		d->flags |= DEVFL_NEWSIZE;
712 	} else
713 		d->flags |= DEVFL_GDALLOC;
714 	schedule_work(&d->work);
715 }
716 
717 static void
718 calc_rttavg(struct aoedev *d, int rtt)
719 {
720 	register long n;
721 
722 	n = rtt;
723 	if (n < 0) {
724 		n = -rtt;
725 		if (n < MINTIMER)
726 			n = MINTIMER;
727 		else if (n > MAXTIMER)
728 			n = MAXTIMER;
729 		d->mintimer += (n - d->mintimer) >> 1;
730 	} else if (n < d->mintimer)
731 		n = d->mintimer;
732 	else if (n > MAXTIMER)
733 		n = MAXTIMER;
734 
735 	/* g == .25; cf. Congestion Avoidance and Control, Jacobson & Karels; 1988 */
736 	n -= d->rttavg;
737 	d->rttavg += n >> 2;
738 }
739 
740 static struct aoetgt *
741 gettgt(struct aoedev *d, char *addr)
742 {
743 	struct aoetgt **t, **e;
744 
745 	t = d->targets;
746 	e = t + NTARGETS;
747 	for (; t < e && *t; t++)
748 		if (memcmp((*t)->addr, addr, sizeof((*t)->addr)) == 0)
749 			return *t;
750 	return NULL;
751 }
752 
753 static inline void
754 diskstats(struct gendisk *disk, struct bio *bio, ulong duration, sector_t sector)
755 {
756 	unsigned long n_sect = bio->bi_size >> 9;
757 	const int rw = bio_data_dir(bio);
758 
759 	all_stat_inc(disk, ios[rw], sector);
760 	all_stat_add(disk, ticks[rw], duration, sector);
761 	all_stat_add(disk, sectors[rw], n_sect, sector);
762 	all_stat_add(disk, io_ticks, duration, sector);
763 }
764 
765 void
766 aoecmd_ata_rsp(struct sk_buff *skb)
767 {
768 	struct aoedev *d;
769 	struct aoe_hdr *hin, *hout;
770 	struct aoe_atahdr *ahin, *ahout;
771 	struct frame *f;
772 	struct buf *buf;
773 	struct sk_buff *sl;
774 	struct aoetgt *t;
775 	struct aoeif *ifp;
776 	register long n;
777 	ulong flags;
778 	char ebuf[128];
779 	u16 aoemajor;
780 
781 	hin = (struct aoe_hdr *) skb_mac_header(skb);
782 	aoemajor = be16_to_cpu(get_unaligned(&hin->major));
783 	d = aoedev_by_aoeaddr(aoemajor, hin->minor);
784 	if (d == NULL) {
785 		snprintf(ebuf, sizeof ebuf, "aoecmd_ata_rsp: ata response "
786 			"for unknown device %d.%d\n",
787 			 aoemajor, hin->minor);
788 		aoechr_error(ebuf);
789 		return;
790 	}
791 
792 	spin_lock_irqsave(&d->lock, flags);
793 
794 	n = be32_to_cpu(get_unaligned(&hin->tag));
795 	t = gettgt(d, hin->src);
796 	if (t == NULL) {
797 		printk(KERN_INFO "aoe: can't find target e%ld.%d:%012llx\n",
798 			d->aoemajor, d->aoeminor, mac_addr(hin->src));
799 		spin_unlock_irqrestore(&d->lock, flags);
800 		return;
801 	}
802 	f = getframe(t, n);
803 	if (f == NULL) {
804 		calc_rttavg(d, -tsince(n));
805 		spin_unlock_irqrestore(&d->lock, flags);
806 		snprintf(ebuf, sizeof ebuf,
807 			"%15s e%d.%d    tag=%08x@%08lx\n",
808 			"unexpected rsp",
809 			be16_to_cpu(get_unaligned(&hin->major)),
810 			hin->minor,
811 			be32_to_cpu(get_unaligned(&hin->tag)),
812 			jiffies);
813 		aoechr_error(ebuf);
814 		return;
815 	}
816 
817 	calc_rttavg(d, tsince(f->tag));
818 
819 	ahin = (struct aoe_atahdr *) (hin+1);
820 	hout = (struct aoe_hdr *) skb_mac_header(f->skb);
821 	ahout = (struct aoe_atahdr *) (hout+1);
822 	buf = f->buf;
823 
824 	if (ahin->cmdstat & 0xa9) {	/* these bits cleared on success */
825 		printk(KERN_ERR
826 			"aoe: ata error cmd=%2.2Xh stat=%2.2Xh from e%ld.%d\n",
827 			ahout->cmdstat, ahin->cmdstat,
828 			d->aoemajor, d->aoeminor);
829 		if (buf)
830 			buf->flags |= BUFFL_FAIL;
831 	} else {
832 		if (d->htgt && t == *d->htgt) /* I'll help myself, thank you. */
833 			d->htgt = NULL;
834 		n = ahout->scnt << 9;
835 		switch (ahout->cmdstat) {
836 		case WIN_READ:
837 		case WIN_READ_EXT:
838 			if (skb->len - sizeof *hin - sizeof *ahin < n) {
839 				printk(KERN_ERR
840 					"aoe: %s.  skb->len=%d need=%ld\n",
841 					"runt data size in read", skb->len, n);
842 				/* fail frame f?  just returning will rexmit. */
843 				spin_unlock_irqrestore(&d->lock, flags);
844 				return;
845 			}
846 			memcpy(f->bufaddr, ahin+1, n);
847 		case WIN_WRITE:
848 		case WIN_WRITE_EXT:
849 			ifp = getif(t, skb->dev);
850 			if (ifp) {
851 				ifp->lost = 0;
852 				if (n > DEFAULTBCNT)
853 					ifp->lostjumbo = 0;
854 			}
855 			if (f->bcnt -= n) {
856 				f->lba += n >> 9;
857 				f->bufaddr += n;
858 				resend(d, t, f);
859 				goto xmit;
860 			}
861 			break;
862 		case WIN_IDENTIFY:
863 			if (skb->len - sizeof *hin - sizeof *ahin < 512) {
864 				printk(KERN_INFO
865 					"aoe: runt data size in ataid.  skb->len=%d\n",
866 					skb->len);
867 				spin_unlock_irqrestore(&d->lock, flags);
868 				return;
869 			}
870 			ataid_complete(d, t, (char *) (ahin+1));
871 			break;
872 		default:
873 			printk(KERN_INFO
874 				"aoe: unrecognized ata command %2.2Xh for %d.%d\n",
875 				ahout->cmdstat,
876 				be16_to_cpu(get_unaligned(&hin->major)),
877 				hin->minor);
878 		}
879 	}
880 
881 	if (buf && --buf->nframesout == 0 && buf->resid == 0) {
882 		diskstats(d->gd, buf->bio, jiffies - buf->stime, buf->sector);
883 		n = (buf->flags & BUFFL_FAIL) ? -EIO : 0;
884 		bio_endio(buf->bio, n);
885 		mempool_free(buf, d->bufpool);
886 	}
887 
888 	f->buf = NULL;
889 	f->tag = FREETAG;
890 	t->nout--;
891 
892 	aoecmd_work(d);
893 xmit:
894 	sl = d->sendq_hd;
895 	d->sendq_hd = d->sendq_tl = NULL;
896 
897 	spin_unlock_irqrestore(&d->lock, flags);
898 	aoenet_xmit(sl);
899 }
900 
901 void
902 aoecmd_cfg(ushort aoemajor, unsigned char aoeminor)
903 {
904 	struct sk_buff *sl;
905 
906 	sl = aoecmd_cfg_pkts(aoemajor, aoeminor, NULL);
907 
908 	aoenet_xmit(sl);
909 }
910 
911 struct sk_buff *
912 aoecmd_ata_id(struct aoedev *d)
913 {
914 	struct aoe_hdr *h;
915 	struct aoe_atahdr *ah;
916 	struct frame *f;
917 	struct sk_buff *skb;
918 	struct aoetgt *t;
919 
920 	f = freeframe(d);
921 	if (f == NULL)
922 		return NULL;
923 
924 	t = *d->tgt;
925 
926 	/* initialize the headers & frame */
927 	skb = f->skb;
928 	h = (struct aoe_hdr *) skb_mac_header(skb);
929 	ah = (struct aoe_atahdr *) (h+1);
930 	skb_put(skb, sizeof *h + sizeof *ah);
931 	memset(h, 0, skb->len);
932 	f->tag = aoehdr_atainit(d, t, h);
933 	t->nout++;
934 	f->waited = 0;
935 
936 	/* set up ata header */
937 	ah->scnt = 1;
938 	ah->cmdstat = WIN_IDENTIFY;
939 	ah->lba3 = 0xa0;
940 
941 	skb->dev = t->ifp->nd;
942 
943 	d->rttavg = MAXTIMER;
944 	d->timer.function = rexmit_timer;
945 
946 	return skb_clone(skb, GFP_ATOMIC);
947 }
948 
949 static struct aoetgt *
950 addtgt(struct aoedev *d, char *addr, ulong nframes)
951 {
952 	struct aoetgt *t, **tt, **te;
953 	struct frame *f, *e;
954 
955 	tt = d->targets;
956 	te = tt + NTARGETS;
957 	for (; tt < te && *tt; tt++)
958 		;
959 
960 	if (tt == te) {
961 		printk(KERN_INFO
962 			"aoe: device addtgt failure; too many targets\n");
963 		return NULL;
964 	}
965 	t = kcalloc(1, sizeof *t, GFP_ATOMIC);
966 	f = kcalloc(nframes, sizeof *f, GFP_ATOMIC);
967 	if (!t || !f) {
968 		kfree(f);
969 		kfree(t);
970 		printk(KERN_INFO "aoe: cannot allocate memory to add target\n");
971 		return NULL;
972 	}
973 
974 	t->nframes = nframes;
975 	t->frames = f;
976 	e = f + nframes;
977 	for (; f < e; f++)
978 		f->tag = FREETAG;
979 	memcpy(t->addr, addr, sizeof t->addr);
980 	t->ifp = t->ifs;
981 	t->maxout = t->nframes;
982 	return *tt = t;
983 }
984 
985 void
986 aoecmd_cfg_rsp(struct sk_buff *skb)
987 {
988 	struct aoedev *d;
989 	struct aoe_hdr *h;
990 	struct aoe_cfghdr *ch;
991 	struct aoetgt *t;
992 	struct aoeif *ifp;
993 	ulong flags, sysminor, aoemajor;
994 	struct sk_buff *sl;
995 	u16 n;
996 
997 	h = (struct aoe_hdr *) skb_mac_header(skb);
998 	ch = (struct aoe_cfghdr *) (h+1);
999 
1000 	/*
1001 	 * Enough people have their dip switches set backwards to
1002 	 * warrant a loud message for this special case.
1003 	 */
1004 	aoemajor = be16_to_cpu(get_unaligned(&h->major));
1005 	if (aoemajor == 0xfff) {
1006 		printk(KERN_ERR "aoe: Warning: shelf address is all ones.  "
1007 			"Check shelf dip switches.\n");
1008 		return;
1009 	}
1010 
1011 	sysminor = SYSMINOR(aoemajor, h->minor);
1012 	if (sysminor * AOE_PARTITIONS + AOE_PARTITIONS > MINORMASK) {
1013 		printk(KERN_INFO "aoe: e%ld.%d: minor number too large\n",
1014 			aoemajor, (int) h->minor);
1015 		return;
1016 	}
1017 
1018 	n = be16_to_cpu(ch->bufcnt);
1019 	if (n > aoe_maxout)	/* keep it reasonable */
1020 		n = aoe_maxout;
1021 
1022 	d = aoedev_by_sysminor_m(sysminor);
1023 	if (d == NULL) {
1024 		printk(KERN_INFO "aoe: device sysminor_m failure\n");
1025 		return;
1026 	}
1027 
1028 	spin_lock_irqsave(&d->lock, flags);
1029 
1030 	t = gettgt(d, h->src);
1031 	if (!t) {
1032 		t = addtgt(d, h->src, n);
1033 		if (!t) {
1034 			spin_unlock_irqrestore(&d->lock, flags);
1035 			return;
1036 		}
1037 	}
1038 	ifp = getif(t, skb->dev);
1039 	if (!ifp) {
1040 		ifp = addif(t, skb->dev);
1041 		if (!ifp) {
1042 			printk(KERN_INFO
1043 				"aoe: device addif failure; "
1044 				"too many interfaces?\n");
1045 			spin_unlock_irqrestore(&d->lock, flags);
1046 			return;
1047 		}
1048 	}
1049 	if (ifp->maxbcnt) {
1050 		n = ifp->nd->mtu;
1051 		n -= sizeof (struct aoe_hdr) + sizeof (struct aoe_atahdr);
1052 		n /= 512;
1053 		if (n > ch->scnt)
1054 			n = ch->scnt;
1055 		n = n ? n * 512 : DEFAULTBCNT;
1056 		if (n != ifp->maxbcnt) {
1057 			printk(KERN_INFO
1058 				"aoe: e%ld.%d: setting %d%s%s:%012llx\n",
1059 				d->aoemajor, d->aoeminor, n,
1060 				" byte data frames on ", ifp->nd->name,
1061 				mac_addr(t->addr));
1062 			ifp->maxbcnt = n;
1063 		}
1064 	}
1065 
1066 	/* don't change users' perspective */
1067 	if (d->nopen) {
1068 		spin_unlock_irqrestore(&d->lock, flags);
1069 		return;
1070 	}
1071 	d->fw_ver = be16_to_cpu(ch->fwver);
1072 
1073 	sl = aoecmd_ata_id(d);
1074 
1075 	spin_unlock_irqrestore(&d->lock, flags);
1076 
1077 	aoenet_xmit(sl);
1078 }
1079 
1080 void
1081 aoecmd_cleanslate(struct aoedev *d)
1082 {
1083 	struct aoetgt **t, **te;
1084 	struct aoeif *p, *e;
1085 
1086 	d->mintimer = MINTIMER;
1087 
1088 	t = d->targets;
1089 	te = t + NTARGETS;
1090 	for (; t < te && *t; t++) {
1091 		(*t)->maxout = (*t)->nframes;
1092 		p = (*t)->ifs;
1093 		e = p + NAOEIFS;
1094 		for (; p < e; p++) {
1095 			p->lostjumbo = 0;
1096 			p->lost = 0;
1097 			p->maxbcnt = DEFAULTBCNT;
1098 		}
1099 	}
1100 }
1101