xref: /openbmc/linux/drivers/block/aoe/aoecmd.c (revision 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2)
1 /* Copyright (c) 2004 Coraid, Inc.  See COPYING for GPL terms. */
2 /*
3  * aoecmd.c
4  * Filesystem request handling methods
5  */
6 
7 #include <linux/hdreg.h>
8 #include <linux/blkdev.h>
9 #include <linux/skbuff.h>
10 #include <linux/netdevice.h>
11 #include "aoe.h"
12 
13 #define TIMERTICK (HZ / 10)
14 #define MINTIMER (2 * TIMERTICK)
15 #define MAXTIMER (HZ << 1)
16 #define MAXWAIT (60 * 3)	/* After MAXWAIT seconds, give up and fail dev */
17 
18 static struct sk_buff *
19 new_skb(struct net_device *if_dev, ulong len)
20 {
21 	struct sk_buff *skb;
22 
23 	skb = alloc_skb(len, GFP_ATOMIC);
24 	if (skb) {
25 		skb->nh.raw = skb->mac.raw = skb->data;
26 		skb->dev = if_dev;
27 		skb->protocol = __constant_htons(ETH_P_AOE);
28 		skb->priority = 0;
29 		skb_put(skb, len);
30 		skb->next = skb->prev = NULL;
31 
32 		/* tell the network layer not to perform IP checksums
33 		 * or to get the NIC to do it
34 		 */
35 		skb->ip_summed = CHECKSUM_NONE;
36 	}
37 	return skb;
38 }
39 
40 static struct sk_buff *
41 skb_prepare(struct aoedev *d, struct frame *f)
42 {
43 	struct sk_buff *skb;
44 	char *p;
45 
46 	skb = new_skb(d->ifp, f->ndata + f->writedatalen);
47 	if (!skb) {
48 		printk(KERN_INFO "aoe: skb_prepare: failure to allocate skb\n");
49 		return NULL;
50 	}
51 
52 	p = skb->mac.raw;
53 	memcpy(p, f->data, f->ndata);
54 
55 	if (f->writedatalen) {
56 		p += sizeof(struct aoe_hdr) + sizeof(struct aoe_atahdr);
57 		memcpy(p, f->bufaddr, f->writedatalen);
58 	}
59 
60 	return skb;
61 }
62 
63 static struct frame *
64 getframe(struct aoedev *d, int tag)
65 {
66 	struct frame *f, *e;
67 
68 	f = d->frames;
69 	e = f + d->nframes;
70 	for (; f<e; f++)
71 		if (f->tag == tag)
72 			return f;
73 	return NULL;
74 }
75 
76 /*
77  * Leave the top bit clear so we have tagspace for userland.
78  * The bottom 16 bits are the xmit tick for rexmit/rttavg processing.
79  * This driver reserves tag -1 to mean "unused frame."
80  */
81 static int
82 newtag(struct aoedev *d)
83 {
84 	register ulong n;
85 
86 	n = jiffies & 0xffff;
87 	return n |= (++d->lasttag & 0x7fff) << 16;
88 }
89 
90 static int
91 aoehdr_atainit(struct aoedev *d, struct aoe_hdr *h)
92 {
93 	u16 type = __constant_cpu_to_be16(ETH_P_AOE);
94 	u16 aoemajor = __cpu_to_be16(d->aoemajor);
95 	u32 host_tag = newtag(d);
96 	u32 tag = __cpu_to_be32(host_tag);
97 
98 	memcpy(h->src, d->ifp->dev_addr, sizeof h->src);
99 	memcpy(h->dst, d->addr, sizeof h->dst);
100 	memcpy(h->type, &type, sizeof type);
101 	h->verfl = AOE_HVER;
102 	memcpy(h->major, &aoemajor, sizeof aoemajor);
103 	h->minor = d->aoeminor;
104 	h->cmd = AOECMD_ATA;
105 	memcpy(h->tag, &tag, sizeof tag);
106 
107 	return host_tag;
108 }
109 
110 static void
111 aoecmd_ata_rw(struct aoedev *d, struct frame *f)
112 {
113 	struct aoe_hdr *h;
114 	struct aoe_atahdr *ah;
115 	struct buf *buf;
116 	struct sk_buff *skb;
117 	ulong bcnt;
118 	register sector_t sector;
119 	char writebit, extbit;
120 
121 	writebit = 0x10;
122 	extbit = 0x4;
123 
124 	buf = d->inprocess;
125 
126 	sector = buf->sector;
127 	bcnt = buf->bv_resid;
128 	if (bcnt > MAXATADATA)
129 		bcnt = MAXATADATA;
130 
131 	/* initialize the headers & frame */
132 	h = (struct aoe_hdr *) f->data;
133 	ah = (struct aoe_atahdr *) (h+1);
134 	f->ndata = sizeof *h + sizeof *ah;
135 	memset(h, 0, f->ndata);
136 	f->tag = aoehdr_atainit(d, h);
137 	f->waited = 0;
138 	f->buf = buf;
139 	f->bufaddr = buf->bufaddr;
140 
141 	/* set up ata header */
142 	ah->scnt = bcnt >> 9;
143 	ah->lba0 = sector;
144 	ah->lba1 = sector >>= 8;
145 	ah->lba2 = sector >>= 8;
146 	ah->lba3 = sector >>= 8;
147 	if (d->flags & DEVFL_EXT) {
148 		ah->aflags |= AOEAFL_EXT;
149 		ah->lba4 = sector >>= 8;
150 		ah->lba5 = sector >>= 8;
151 	} else {
152 		extbit = 0;
153 		ah->lba3 &= 0x0f;
154 		ah->lba3 |= 0xe0;	/* LBA bit + obsolete 0xa0 */
155 	}
156 
157 	if (bio_data_dir(buf->bio) == WRITE) {
158 		ah->aflags |= AOEAFL_WRITE;
159 		f->writedatalen = bcnt;
160 	} else {
161 		writebit = 0;
162 		f->writedatalen = 0;
163 	}
164 
165 	ah->cmdstat = WIN_READ | writebit | extbit;
166 
167 	/* mark all tracking fields and load out */
168 	buf->nframesout += 1;
169 	buf->bufaddr += bcnt;
170 	buf->bv_resid -= bcnt;
171 /* printk(KERN_INFO "aoe: bv_resid=%ld\n", buf->bv_resid); */
172 	buf->resid -= bcnt;
173 	buf->sector += bcnt >> 9;
174 	if (buf->resid == 0) {
175 		d->inprocess = NULL;
176 	} else if (buf->bv_resid == 0) {
177 		buf->bv++;
178 		buf->bv_resid = buf->bv->bv_len;
179 		buf->bufaddr = page_address(buf->bv->bv_page) + buf->bv->bv_offset;
180 	}
181 
182 	skb = skb_prepare(d, f);
183 	if (skb) {
184 		skb->next = d->skblist;
185 		d->skblist = skb;
186 	}
187 }
188 
189 /* enters with d->lock held */
190 void
191 aoecmd_work(struct aoedev *d)
192 {
193 	struct frame *f;
194 	struct buf *buf;
195 loop:
196 	f = getframe(d, FREETAG);
197 	if (f == NULL)
198 		return;
199 	if (d->inprocess == NULL) {
200 		if (list_empty(&d->bufq))
201 			return;
202 		buf = container_of(d->bufq.next, struct buf, bufs);
203 		list_del(d->bufq.next);
204 /*printk(KERN_INFO "aoecmd_work: bi_size=%ld\n", buf->bio->bi_size); */
205 		d->inprocess = buf;
206 	}
207 	aoecmd_ata_rw(d, f);
208 	goto loop;
209 }
210 
211 static void
212 rexmit(struct aoedev *d, struct frame *f)
213 {
214 	struct sk_buff *skb;
215 	struct aoe_hdr *h;
216 	char buf[128];
217 	u32 n;
218 	u32 net_tag;
219 
220 	n = newtag(d);
221 
222 	snprintf(buf, sizeof buf,
223 		"%15s e%ld.%ld oldtag=%08x@%08lx newtag=%08x\n",
224 		"retransmit",
225 		d->aoemajor, d->aoeminor, f->tag, jiffies, n);
226 	aoechr_error(buf);
227 
228 	h = (struct aoe_hdr *) f->data;
229 	f->tag = n;
230 	net_tag = __cpu_to_be32(n);
231 	memcpy(h->tag, &net_tag, sizeof net_tag);
232 
233 	skb = skb_prepare(d, f);
234 	if (skb) {
235 		skb->next = d->skblist;
236 		d->skblist = skb;
237 	}
238 }
239 
240 static int
241 tsince(int tag)
242 {
243 	int n;
244 
245 	n = jiffies & 0xffff;
246 	n -= tag & 0xffff;
247 	if (n < 0)
248 		n += 1<<16;
249 	return n;
250 }
251 
252 static void
253 rexmit_timer(ulong vp)
254 {
255 	struct aoedev *d;
256 	struct frame *f, *e;
257 	struct sk_buff *sl;
258 	register long timeout;
259 	ulong flags, n;
260 
261 	d = (struct aoedev *) vp;
262 	sl = NULL;
263 
264 	/* timeout is always ~150% of the moving average */
265 	timeout = d->rttavg;
266 	timeout += timeout >> 1;
267 
268 	spin_lock_irqsave(&d->lock, flags);
269 
270 	if (d->flags & DEVFL_TKILL) {
271 tdie:		spin_unlock_irqrestore(&d->lock, flags);
272 		return;
273 	}
274 	f = d->frames;
275 	e = f + d->nframes;
276 	for (; f<e; f++) {
277 		if (f->tag != FREETAG && tsince(f->tag) >= timeout) {
278 			n = f->waited += timeout;
279 			n /= HZ;
280 			if (n > MAXWAIT) { /* waited too long.  device failure. */
281 				aoedev_downdev(d);
282 				goto tdie;
283 			}
284 			rexmit(d, f);
285 		}
286 	}
287 
288 	sl = d->skblist;
289 	d->skblist = NULL;
290 	if (sl) {
291 		n = d->rttavg <<= 1;
292 		if (n > MAXTIMER)
293 			d->rttavg = MAXTIMER;
294 	}
295 
296 	d->timer.expires = jiffies + TIMERTICK;
297 	add_timer(&d->timer);
298 
299 	spin_unlock_irqrestore(&d->lock, flags);
300 
301 	aoenet_xmit(sl);
302 }
303 
304 static void
305 ataid_complete(struct aoedev *d, unsigned char *id)
306 {
307 	u64 ssize;
308 	u16 n;
309 
310 	/* word 83: command set supported */
311 	n = __le16_to_cpu(*((u16 *) &id[83<<1]));
312 
313 	/* word 86: command set/feature enabled */
314 	n |= __le16_to_cpu(*((u16 *) &id[86<<1]));
315 
316 	if (n & (1<<10)) {	/* bit 10: LBA 48 */
317 		d->flags |= DEVFL_EXT;
318 
319 		/* word 100: number lba48 sectors */
320 		ssize = __le64_to_cpu(*((u64 *) &id[100<<1]));
321 
322 		/* set as in ide-disk.c:init_idedisk_capacity */
323 		d->geo.cylinders = ssize;
324 		d->geo.cylinders /= (255 * 63);
325 		d->geo.heads = 255;
326 		d->geo.sectors = 63;
327 	} else {
328 		d->flags &= ~DEVFL_EXT;
329 
330 		/* number lba28 sectors */
331 		ssize = __le32_to_cpu(*((u32 *) &id[60<<1]));
332 
333 		/* NOTE: obsolete in ATA 6 */
334 		d->geo.cylinders = __le16_to_cpu(*((u16 *) &id[54<<1]));
335 		d->geo.heads = __le16_to_cpu(*((u16 *) &id[55<<1]));
336 		d->geo.sectors = __le16_to_cpu(*((u16 *) &id[56<<1]));
337 	}
338 	d->ssize = ssize;
339 	d->geo.start = 0;
340 	if (d->gd != NULL) {
341 		d->gd->capacity = ssize;
342 		d->flags |= DEVFL_UP;
343 		return;
344 	}
345 	if (d->flags & DEVFL_WORKON) {
346 		printk(KERN_INFO "aoe: ataid_complete: can't schedule work, it's already on!  "
347 			"(This really shouldn't happen).\n");
348 		return;
349 	}
350 	INIT_WORK(&d->work, aoeblk_gdalloc, d);
351 	schedule_work(&d->work);
352 	d->flags |= DEVFL_WORKON;
353 }
354 
355 static void
356 calc_rttavg(struct aoedev *d, int rtt)
357 {
358 	register long n;
359 
360 	n = rtt;
361 	if (n < MINTIMER)
362 		n = MINTIMER;
363 	else if (n > MAXTIMER)
364 		n = MAXTIMER;
365 
366 	/* g == .25; cf. Congestion Avoidance and Control, Jacobson & Karels; 1988 */
367 	n -= d->rttavg;
368 	d->rttavg += n >> 2;
369 }
370 
371 void
372 aoecmd_ata_rsp(struct sk_buff *skb)
373 {
374 	struct aoedev *d;
375 	struct aoe_hdr *hin;
376 	struct aoe_atahdr *ahin, *ahout;
377 	struct frame *f;
378 	struct buf *buf;
379 	struct sk_buff *sl;
380 	register long n;
381 	ulong flags;
382 	char ebuf[128];
383 
384 	hin = (struct aoe_hdr *) skb->mac.raw;
385 	d = aoedev_bymac(hin->src);
386 	if (d == NULL) {
387 		snprintf(ebuf, sizeof ebuf, "aoecmd_ata_rsp: ata response "
388 			"for unknown device %d.%d\n",
389 			 __be16_to_cpu(*((u16 *) hin->major)),
390 			hin->minor);
391 		aoechr_error(ebuf);
392 		return;
393 	}
394 
395 	spin_lock_irqsave(&d->lock, flags);
396 
397 	f = getframe(d, __be32_to_cpu(*((u32 *) hin->tag)));
398 	if (f == NULL) {
399 		spin_unlock_irqrestore(&d->lock, flags);
400 		snprintf(ebuf, sizeof ebuf,
401 			"%15s e%d.%d    tag=%08x@%08lx\n",
402 			"unexpected rsp",
403 			__be16_to_cpu(*((u16 *) hin->major)),
404 			hin->minor,
405 			__be32_to_cpu(*((u32 *) hin->tag)),
406 			jiffies);
407 		aoechr_error(ebuf);
408 		return;
409 	}
410 
411 	calc_rttavg(d, tsince(f->tag));
412 
413 	ahin = (struct aoe_atahdr *) (hin+1);
414 	ahout = (struct aoe_atahdr *) (f->data + sizeof(struct aoe_hdr));
415 	buf = f->buf;
416 
417 	if (ahin->cmdstat & 0xa9) {	/* these bits cleared on success */
418 		printk(KERN_CRIT "aoe: aoecmd_ata_rsp: ata error cmd=%2.2Xh "
419 			"stat=%2.2Xh from e%ld.%ld\n",
420 			ahout->cmdstat, ahin->cmdstat,
421 			d->aoemajor, d->aoeminor);
422 		if (buf)
423 			buf->flags |= BUFFL_FAIL;
424 	} else {
425 		switch (ahout->cmdstat) {
426 		case WIN_READ:
427 		case WIN_READ_EXT:
428 			n = ahout->scnt << 9;
429 			if (skb->len - sizeof *hin - sizeof *ahin < n) {
430 				printk(KERN_CRIT "aoe: aoecmd_ata_rsp: runt "
431 					"ata data size in read.  skb->len=%d\n",
432 					skb->len);
433 				/* fail frame f?  just returning will rexmit. */
434 				spin_unlock_irqrestore(&d->lock, flags);
435 				return;
436 			}
437 			memcpy(f->bufaddr, ahin+1, n);
438 		case WIN_WRITE:
439 		case WIN_WRITE_EXT:
440 			break;
441 		case WIN_IDENTIFY:
442 			if (skb->len - sizeof *hin - sizeof *ahin < 512) {
443 				printk(KERN_INFO "aoe: aoecmd_ata_rsp: runt data size "
444 					"in ataid.  skb->len=%d\n", skb->len);
445 				spin_unlock_irqrestore(&d->lock, flags);
446 				return;
447 			}
448 			ataid_complete(d, (char *) (ahin+1));
449 			/* d->flags |= DEVFL_WC_UPDATE; */
450 			break;
451 		default:
452 			printk(KERN_INFO "aoe: aoecmd_ata_rsp: unrecognized "
453 			       "outbound ata command %2.2Xh for %d.%d\n",
454 			       ahout->cmdstat,
455 			       __be16_to_cpu(*((u16 *) hin->major)),
456 			       hin->minor);
457 		}
458 	}
459 
460 	if (buf) {
461 		buf->nframesout -= 1;
462 		if (buf->nframesout == 0 && buf->resid == 0) {
463 			n = (buf->flags & BUFFL_FAIL) ? -EIO : 0;
464 			bio_endio(buf->bio, buf->bio->bi_size, n);
465 			mempool_free(buf, d->bufpool);
466 		}
467 	}
468 
469 	f->buf = NULL;
470 	f->tag = FREETAG;
471 
472 	aoecmd_work(d);
473 
474 	sl = d->skblist;
475 	d->skblist = NULL;
476 
477 	spin_unlock_irqrestore(&d->lock, flags);
478 
479 	aoenet_xmit(sl);
480 }
481 
482 void
483 aoecmd_cfg(ushort aoemajor, unsigned char aoeminor)
484 {
485 	struct aoe_hdr *h;
486 	struct aoe_cfghdr *ch;
487 	struct sk_buff *skb, *sl;
488 	struct net_device *ifp;
489 	u16 aoe_type = __constant_cpu_to_be16(ETH_P_AOE);
490 	u16 net_aoemajor = __cpu_to_be16(aoemajor);
491 
492 	sl = NULL;
493 
494 	read_lock(&dev_base_lock);
495 	for (ifp = dev_base; ifp; dev_put(ifp), ifp = ifp->next) {
496 		dev_hold(ifp);
497 		if (!is_aoe_netif(ifp))
498 			continue;
499 
500 		skb = new_skb(ifp, sizeof *h + sizeof *ch);
501 		if (skb == NULL) {
502 			printk(KERN_INFO "aoe: aoecmd_cfg: skb alloc failure\n");
503 			continue;
504 		}
505 		h = (struct aoe_hdr *) skb->mac.raw;
506 		memset(h, 0, sizeof *h + sizeof *ch);
507 
508 		memset(h->dst, 0xff, sizeof h->dst);
509 		memcpy(h->src, ifp->dev_addr, sizeof h->src);
510 		memcpy(h->type, &aoe_type, sizeof aoe_type);
511 		h->verfl = AOE_HVER;
512 		memcpy(h->major, &net_aoemajor, sizeof net_aoemajor);
513 		h->minor = aoeminor;
514 		h->cmd = AOECMD_CFG;
515 
516 		skb->next = sl;
517 		sl = skb;
518 	}
519 	read_unlock(&dev_base_lock);
520 
521 	aoenet_xmit(sl);
522 }
523 
524 /*
525  * Since we only call this in one place (and it only prepares one frame)
526  * we just return the skb.  Usually we'd chain it up to the d->skblist.
527  */
528 static struct sk_buff *
529 aoecmd_ata_id(struct aoedev *d)
530 {
531 	struct aoe_hdr *h;
532 	struct aoe_atahdr *ah;
533 	struct frame *f;
534 	struct sk_buff *skb;
535 
536 	f = getframe(d, FREETAG);
537 	if (f == NULL) {
538 		printk(KERN_CRIT "aoe: aoecmd_ata_id: can't get a frame.  "
539 			"This shouldn't happen.\n");
540 		return NULL;
541 	}
542 
543 	/* initialize the headers & frame */
544 	h = (struct aoe_hdr *) f->data;
545 	ah = (struct aoe_atahdr *) (h+1);
546 	f->ndata = sizeof *h + sizeof *ah;
547 	memset(h, 0, f->ndata);
548 	f->tag = aoehdr_atainit(d, h);
549 	f->waited = 0;
550 	f->writedatalen = 0;
551 
552 	/* this message initializes the device, so we reset the rttavg */
553 	d->rttavg = MAXTIMER;
554 
555 	/* set up ata header */
556 	ah->scnt = 1;
557 	ah->cmdstat = WIN_IDENTIFY;
558 	ah->lba3 = 0xa0;
559 
560 	skb = skb_prepare(d, f);
561 
562 	/* we now want to start the rexmit tracking */
563 	d->flags &= ~DEVFL_TKILL;
564 	d->timer.data = (ulong) d;
565 	d->timer.function = rexmit_timer;
566 	d->timer.expires = jiffies + TIMERTICK;
567 	add_timer(&d->timer);
568 
569 	return skb;
570 }
571 
572 void
573 aoecmd_cfg_rsp(struct sk_buff *skb)
574 {
575 	struct aoedev *d;
576 	struct aoe_hdr *h;
577 	struct aoe_cfghdr *ch;
578 	ulong flags, bufcnt, sysminor, aoemajor;
579 	struct sk_buff *sl;
580 	enum { MAXFRAMES = 8, MAXSYSMINOR = 255 };
581 
582 	h = (struct aoe_hdr *) skb->mac.raw;
583 	ch = (struct aoe_cfghdr *) (h+1);
584 
585 	/*
586 	 * Enough people have their dip switches set backwards to
587 	 * warrant a loud message for this special case.
588 	 */
589 	aoemajor = __be16_to_cpu(*((u16 *) h->major));
590 	if (aoemajor == 0xfff) {
591 		printk(KERN_CRIT "aoe: aoecmd_cfg_rsp: Warning: shelf "
592 			"address is all ones.  Check shelf dip switches\n");
593 		return;
594 	}
595 
596 	sysminor = SYSMINOR(aoemajor, h->minor);
597 	if (sysminor > MAXSYSMINOR) {
598 		printk(KERN_INFO "aoe: aoecmd_cfg_rsp: sysminor %ld too "
599 			"large\n", sysminor);
600 		return;
601 	}
602 
603 	bufcnt = __be16_to_cpu(*((u16 *) ch->bufcnt));
604 	if (bufcnt > MAXFRAMES)	/* keep it reasonable */
605 		bufcnt = MAXFRAMES;
606 
607 	d = aoedev_set(sysminor, h->src, skb->dev, bufcnt);
608 	if (d == NULL) {
609 		printk(KERN_INFO "aoe: aoecmd_cfg_rsp: device set failure\n");
610 		return;
611 	}
612 
613 	spin_lock_irqsave(&d->lock, flags);
614 
615 	if (d->flags & (DEVFL_UP | DEVFL_CLOSEWAIT)) {
616 		spin_unlock_irqrestore(&d->lock, flags);
617 		return;
618 	}
619 
620 	d->fw_ver = __be16_to_cpu(*((u16 *) ch->fwver));
621 
622 	/* we get here only if the device is new */
623 	sl = aoecmd_ata_id(d);
624 
625 	spin_unlock_irqrestore(&d->lock, flags);
626 
627 	aoenet_xmit(sl);
628 }
629 
630