1 /* Copyright (c) 2004 Coraid, Inc. See COPYING for GPL terms. */ 2 /* 3 * aoecmd.c 4 * Filesystem request handling methods 5 */ 6 7 #include <linux/hdreg.h> 8 #include <linux/blkdev.h> 9 #include <linux/skbuff.h> 10 #include <linux/netdevice.h> 11 #include "aoe.h" 12 13 #define TIMERTICK (HZ / 10) 14 #define MINTIMER (2 * TIMERTICK) 15 #define MAXTIMER (HZ << 1) 16 #define MAXWAIT (60 * 3) /* After MAXWAIT seconds, give up and fail dev */ 17 18 static struct sk_buff * 19 new_skb(struct net_device *if_dev, ulong len) 20 { 21 struct sk_buff *skb; 22 23 skb = alloc_skb(len, GFP_ATOMIC); 24 if (skb) { 25 skb->nh.raw = skb->mac.raw = skb->data; 26 skb->dev = if_dev; 27 skb->protocol = __constant_htons(ETH_P_AOE); 28 skb->priority = 0; 29 skb_put(skb, len); 30 skb->next = skb->prev = NULL; 31 32 /* tell the network layer not to perform IP checksums 33 * or to get the NIC to do it 34 */ 35 skb->ip_summed = CHECKSUM_NONE; 36 } 37 return skb; 38 } 39 40 static struct sk_buff * 41 skb_prepare(struct aoedev *d, struct frame *f) 42 { 43 struct sk_buff *skb; 44 char *p; 45 46 skb = new_skb(d->ifp, f->ndata + f->writedatalen); 47 if (!skb) { 48 printk(KERN_INFO "aoe: skb_prepare: failure to allocate skb\n"); 49 return NULL; 50 } 51 52 p = skb->mac.raw; 53 memcpy(p, f->data, f->ndata); 54 55 if (f->writedatalen) { 56 p += sizeof(struct aoe_hdr) + sizeof(struct aoe_atahdr); 57 memcpy(p, f->bufaddr, f->writedatalen); 58 } 59 60 return skb; 61 } 62 63 static struct frame * 64 getframe(struct aoedev *d, int tag) 65 { 66 struct frame *f, *e; 67 68 f = d->frames; 69 e = f + d->nframes; 70 for (; f<e; f++) 71 if (f->tag == tag) 72 return f; 73 return NULL; 74 } 75 76 /* 77 * Leave the top bit clear so we have tagspace for userland. 78 * The bottom 16 bits are the xmit tick for rexmit/rttavg processing. 79 * This driver reserves tag -1 to mean "unused frame." 80 */ 81 static int 82 newtag(struct aoedev *d) 83 { 84 register ulong n; 85 86 n = jiffies & 0xffff; 87 return n |= (++d->lasttag & 0x7fff) << 16; 88 } 89 90 static int 91 aoehdr_atainit(struct aoedev *d, struct aoe_hdr *h) 92 { 93 u16 type = __constant_cpu_to_be16(ETH_P_AOE); 94 u16 aoemajor = __cpu_to_be16(d->aoemajor); 95 u32 host_tag = newtag(d); 96 u32 tag = __cpu_to_be32(host_tag); 97 98 memcpy(h->src, d->ifp->dev_addr, sizeof h->src); 99 memcpy(h->dst, d->addr, sizeof h->dst); 100 memcpy(h->type, &type, sizeof type); 101 h->verfl = AOE_HVER; 102 memcpy(h->major, &aoemajor, sizeof aoemajor); 103 h->minor = d->aoeminor; 104 h->cmd = AOECMD_ATA; 105 memcpy(h->tag, &tag, sizeof tag); 106 107 return host_tag; 108 } 109 110 static void 111 aoecmd_ata_rw(struct aoedev *d, struct frame *f) 112 { 113 struct aoe_hdr *h; 114 struct aoe_atahdr *ah; 115 struct buf *buf; 116 struct sk_buff *skb; 117 ulong bcnt; 118 register sector_t sector; 119 char writebit, extbit; 120 121 writebit = 0x10; 122 extbit = 0x4; 123 124 buf = d->inprocess; 125 126 sector = buf->sector; 127 bcnt = buf->bv_resid; 128 if (bcnt > MAXATADATA) 129 bcnt = MAXATADATA; 130 131 /* initialize the headers & frame */ 132 h = (struct aoe_hdr *) f->data; 133 ah = (struct aoe_atahdr *) (h+1); 134 f->ndata = sizeof *h + sizeof *ah; 135 memset(h, 0, f->ndata); 136 f->tag = aoehdr_atainit(d, h); 137 f->waited = 0; 138 f->buf = buf; 139 f->bufaddr = buf->bufaddr; 140 141 /* set up ata header */ 142 ah->scnt = bcnt >> 9; 143 ah->lba0 = sector; 144 ah->lba1 = sector >>= 8; 145 ah->lba2 = sector >>= 8; 146 ah->lba3 = sector >>= 8; 147 if (d->flags & DEVFL_EXT) { 148 ah->aflags |= AOEAFL_EXT; 149 ah->lba4 = sector >>= 8; 150 ah->lba5 = sector >>= 8; 151 } else { 152 extbit = 0; 153 ah->lba3 &= 0x0f; 154 ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */ 155 } 156 157 if (bio_data_dir(buf->bio) == WRITE) { 158 ah->aflags |= AOEAFL_WRITE; 159 f->writedatalen = bcnt; 160 } else { 161 writebit = 0; 162 f->writedatalen = 0; 163 } 164 165 ah->cmdstat = WIN_READ | writebit | extbit; 166 167 /* mark all tracking fields and load out */ 168 buf->nframesout += 1; 169 buf->bufaddr += bcnt; 170 buf->bv_resid -= bcnt; 171 /* printk(KERN_INFO "aoe: bv_resid=%ld\n", buf->bv_resid); */ 172 buf->resid -= bcnt; 173 buf->sector += bcnt >> 9; 174 if (buf->resid == 0) { 175 d->inprocess = NULL; 176 } else if (buf->bv_resid == 0) { 177 buf->bv++; 178 buf->bv_resid = buf->bv->bv_len; 179 buf->bufaddr = page_address(buf->bv->bv_page) + buf->bv->bv_offset; 180 } 181 182 skb = skb_prepare(d, f); 183 if (skb) { 184 skb->next = d->skblist; 185 d->skblist = skb; 186 } 187 } 188 189 /* enters with d->lock held */ 190 void 191 aoecmd_work(struct aoedev *d) 192 { 193 struct frame *f; 194 struct buf *buf; 195 loop: 196 f = getframe(d, FREETAG); 197 if (f == NULL) 198 return; 199 if (d->inprocess == NULL) { 200 if (list_empty(&d->bufq)) 201 return; 202 buf = container_of(d->bufq.next, struct buf, bufs); 203 list_del(d->bufq.next); 204 /*printk(KERN_INFO "aoecmd_work: bi_size=%ld\n", buf->bio->bi_size); */ 205 d->inprocess = buf; 206 } 207 aoecmd_ata_rw(d, f); 208 goto loop; 209 } 210 211 static void 212 rexmit(struct aoedev *d, struct frame *f) 213 { 214 struct sk_buff *skb; 215 struct aoe_hdr *h; 216 char buf[128]; 217 u32 n; 218 u32 net_tag; 219 220 n = newtag(d); 221 222 snprintf(buf, sizeof buf, 223 "%15s e%ld.%ld oldtag=%08x@%08lx newtag=%08x\n", 224 "retransmit", 225 d->aoemajor, d->aoeminor, f->tag, jiffies, n); 226 aoechr_error(buf); 227 228 h = (struct aoe_hdr *) f->data; 229 f->tag = n; 230 net_tag = __cpu_to_be32(n); 231 memcpy(h->tag, &net_tag, sizeof net_tag); 232 233 skb = skb_prepare(d, f); 234 if (skb) { 235 skb->next = d->skblist; 236 d->skblist = skb; 237 } 238 } 239 240 static int 241 tsince(int tag) 242 { 243 int n; 244 245 n = jiffies & 0xffff; 246 n -= tag & 0xffff; 247 if (n < 0) 248 n += 1<<16; 249 return n; 250 } 251 252 static void 253 rexmit_timer(ulong vp) 254 { 255 struct aoedev *d; 256 struct frame *f, *e; 257 struct sk_buff *sl; 258 register long timeout; 259 ulong flags, n; 260 261 d = (struct aoedev *) vp; 262 sl = NULL; 263 264 /* timeout is always ~150% of the moving average */ 265 timeout = d->rttavg; 266 timeout += timeout >> 1; 267 268 spin_lock_irqsave(&d->lock, flags); 269 270 if (d->flags & DEVFL_TKILL) { 271 tdie: spin_unlock_irqrestore(&d->lock, flags); 272 return; 273 } 274 f = d->frames; 275 e = f + d->nframes; 276 for (; f<e; f++) { 277 if (f->tag != FREETAG && tsince(f->tag) >= timeout) { 278 n = f->waited += timeout; 279 n /= HZ; 280 if (n > MAXWAIT) { /* waited too long. device failure. */ 281 aoedev_downdev(d); 282 goto tdie; 283 } 284 rexmit(d, f); 285 } 286 } 287 288 sl = d->skblist; 289 d->skblist = NULL; 290 if (sl) { 291 n = d->rttavg <<= 1; 292 if (n > MAXTIMER) 293 d->rttavg = MAXTIMER; 294 } 295 296 d->timer.expires = jiffies + TIMERTICK; 297 add_timer(&d->timer); 298 299 spin_unlock_irqrestore(&d->lock, flags); 300 301 aoenet_xmit(sl); 302 } 303 304 static void 305 ataid_complete(struct aoedev *d, unsigned char *id) 306 { 307 u64 ssize; 308 u16 n; 309 310 /* word 83: command set supported */ 311 n = __le16_to_cpu(*((u16 *) &id[83<<1])); 312 313 /* word 86: command set/feature enabled */ 314 n |= __le16_to_cpu(*((u16 *) &id[86<<1])); 315 316 if (n & (1<<10)) { /* bit 10: LBA 48 */ 317 d->flags |= DEVFL_EXT; 318 319 /* word 100: number lba48 sectors */ 320 ssize = __le64_to_cpu(*((u64 *) &id[100<<1])); 321 322 /* set as in ide-disk.c:init_idedisk_capacity */ 323 d->geo.cylinders = ssize; 324 d->geo.cylinders /= (255 * 63); 325 d->geo.heads = 255; 326 d->geo.sectors = 63; 327 } else { 328 d->flags &= ~DEVFL_EXT; 329 330 /* number lba28 sectors */ 331 ssize = __le32_to_cpu(*((u32 *) &id[60<<1])); 332 333 /* NOTE: obsolete in ATA 6 */ 334 d->geo.cylinders = __le16_to_cpu(*((u16 *) &id[54<<1])); 335 d->geo.heads = __le16_to_cpu(*((u16 *) &id[55<<1])); 336 d->geo.sectors = __le16_to_cpu(*((u16 *) &id[56<<1])); 337 } 338 d->ssize = ssize; 339 d->geo.start = 0; 340 if (d->gd != NULL) { 341 d->gd->capacity = ssize; 342 d->flags |= DEVFL_UP; 343 return; 344 } 345 if (d->flags & DEVFL_WORKON) { 346 printk(KERN_INFO "aoe: ataid_complete: can't schedule work, it's already on! " 347 "(This really shouldn't happen).\n"); 348 return; 349 } 350 INIT_WORK(&d->work, aoeblk_gdalloc, d); 351 schedule_work(&d->work); 352 d->flags |= DEVFL_WORKON; 353 } 354 355 static void 356 calc_rttavg(struct aoedev *d, int rtt) 357 { 358 register long n; 359 360 n = rtt; 361 if (n < MINTIMER) 362 n = MINTIMER; 363 else if (n > MAXTIMER) 364 n = MAXTIMER; 365 366 /* g == .25; cf. Congestion Avoidance and Control, Jacobson & Karels; 1988 */ 367 n -= d->rttavg; 368 d->rttavg += n >> 2; 369 } 370 371 void 372 aoecmd_ata_rsp(struct sk_buff *skb) 373 { 374 struct aoedev *d; 375 struct aoe_hdr *hin; 376 struct aoe_atahdr *ahin, *ahout; 377 struct frame *f; 378 struct buf *buf; 379 struct sk_buff *sl; 380 register long n; 381 ulong flags; 382 char ebuf[128]; 383 384 hin = (struct aoe_hdr *) skb->mac.raw; 385 d = aoedev_bymac(hin->src); 386 if (d == NULL) { 387 snprintf(ebuf, sizeof ebuf, "aoecmd_ata_rsp: ata response " 388 "for unknown device %d.%d\n", 389 __be16_to_cpu(*((u16 *) hin->major)), 390 hin->minor); 391 aoechr_error(ebuf); 392 return; 393 } 394 395 spin_lock_irqsave(&d->lock, flags); 396 397 f = getframe(d, __be32_to_cpu(*((u32 *) hin->tag))); 398 if (f == NULL) { 399 spin_unlock_irqrestore(&d->lock, flags); 400 snprintf(ebuf, sizeof ebuf, 401 "%15s e%d.%d tag=%08x@%08lx\n", 402 "unexpected rsp", 403 __be16_to_cpu(*((u16 *) hin->major)), 404 hin->minor, 405 __be32_to_cpu(*((u32 *) hin->tag)), 406 jiffies); 407 aoechr_error(ebuf); 408 return; 409 } 410 411 calc_rttavg(d, tsince(f->tag)); 412 413 ahin = (struct aoe_atahdr *) (hin+1); 414 ahout = (struct aoe_atahdr *) (f->data + sizeof(struct aoe_hdr)); 415 buf = f->buf; 416 417 if (ahin->cmdstat & 0xa9) { /* these bits cleared on success */ 418 printk(KERN_CRIT "aoe: aoecmd_ata_rsp: ata error cmd=%2.2Xh " 419 "stat=%2.2Xh from e%ld.%ld\n", 420 ahout->cmdstat, ahin->cmdstat, 421 d->aoemajor, d->aoeminor); 422 if (buf) 423 buf->flags |= BUFFL_FAIL; 424 } else { 425 switch (ahout->cmdstat) { 426 case WIN_READ: 427 case WIN_READ_EXT: 428 n = ahout->scnt << 9; 429 if (skb->len - sizeof *hin - sizeof *ahin < n) { 430 printk(KERN_CRIT "aoe: aoecmd_ata_rsp: runt " 431 "ata data size in read. skb->len=%d\n", 432 skb->len); 433 /* fail frame f? just returning will rexmit. */ 434 spin_unlock_irqrestore(&d->lock, flags); 435 return; 436 } 437 memcpy(f->bufaddr, ahin+1, n); 438 case WIN_WRITE: 439 case WIN_WRITE_EXT: 440 break; 441 case WIN_IDENTIFY: 442 if (skb->len - sizeof *hin - sizeof *ahin < 512) { 443 printk(KERN_INFO "aoe: aoecmd_ata_rsp: runt data size " 444 "in ataid. skb->len=%d\n", skb->len); 445 spin_unlock_irqrestore(&d->lock, flags); 446 return; 447 } 448 ataid_complete(d, (char *) (ahin+1)); 449 /* d->flags |= DEVFL_WC_UPDATE; */ 450 break; 451 default: 452 printk(KERN_INFO "aoe: aoecmd_ata_rsp: unrecognized " 453 "outbound ata command %2.2Xh for %d.%d\n", 454 ahout->cmdstat, 455 __be16_to_cpu(*((u16 *) hin->major)), 456 hin->minor); 457 } 458 } 459 460 if (buf) { 461 buf->nframesout -= 1; 462 if (buf->nframesout == 0 && buf->resid == 0) { 463 n = (buf->flags & BUFFL_FAIL) ? -EIO : 0; 464 bio_endio(buf->bio, buf->bio->bi_size, n); 465 mempool_free(buf, d->bufpool); 466 } 467 } 468 469 f->buf = NULL; 470 f->tag = FREETAG; 471 472 aoecmd_work(d); 473 474 sl = d->skblist; 475 d->skblist = NULL; 476 477 spin_unlock_irqrestore(&d->lock, flags); 478 479 aoenet_xmit(sl); 480 } 481 482 void 483 aoecmd_cfg(ushort aoemajor, unsigned char aoeminor) 484 { 485 struct aoe_hdr *h; 486 struct aoe_cfghdr *ch; 487 struct sk_buff *skb, *sl; 488 struct net_device *ifp; 489 u16 aoe_type = __constant_cpu_to_be16(ETH_P_AOE); 490 u16 net_aoemajor = __cpu_to_be16(aoemajor); 491 492 sl = NULL; 493 494 read_lock(&dev_base_lock); 495 for (ifp = dev_base; ifp; dev_put(ifp), ifp = ifp->next) { 496 dev_hold(ifp); 497 if (!is_aoe_netif(ifp)) 498 continue; 499 500 skb = new_skb(ifp, sizeof *h + sizeof *ch); 501 if (skb == NULL) { 502 printk(KERN_INFO "aoe: aoecmd_cfg: skb alloc failure\n"); 503 continue; 504 } 505 h = (struct aoe_hdr *) skb->mac.raw; 506 memset(h, 0, sizeof *h + sizeof *ch); 507 508 memset(h->dst, 0xff, sizeof h->dst); 509 memcpy(h->src, ifp->dev_addr, sizeof h->src); 510 memcpy(h->type, &aoe_type, sizeof aoe_type); 511 h->verfl = AOE_HVER; 512 memcpy(h->major, &net_aoemajor, sizeof net_aoemajor); 513 h->minor = aoeminor; 514 h->cmd = AOECMD_CFG; 515 516 skb->next = sl; 517 sl = skb; 518 } 519 read_unlock(&dev_base_lock); 520 521 aoenet_xmit(sl); 522 } 523 524 /* 525 * Since we only call this in one place (and it only prepares one frame) 526 * we just return the skb. Usually we'd chain it up to the d->skblist. 527 */ 528 static struct sk_buff * 529 aoecmd_ata_id(struct aoedev *d) 530 { 531 struct aoe_hdr *h; 532 struct aoe_atahdr *ah; 533 struct frame *f; 534 struct sk_buff *skb; 535 536 f = getframe(d, FREETAG); 537 if (f == NULL) { 538 printk(KERN_CRIT "aoe: aoecmd_ata_id: can't get a frame. " 539 "This shouldn't happen.\n"); 540 return NULL; 541 } 542 543 /* initialize the headers & frame */ 544 h = (struct aoe_hdr *) f->data; 545 ah = (struct aoe_atahdr *) (h+1); 546 f->ndata = sizeof *h + sizeof *ah; 547 memset(h, 0, f->ndata); 548 f->tag = aoehdr_atainit(d, h); 549 f->waited = 0; 550 f->writedatalen = 0; 551 552 /* this message initializes the device, so we reset the rttavg */ 553 d->rttavg = MAXTIMER; 554 555 /* set up ata header */ 556 ah->scnt = 1; 557 ah->cmdstat = WIN_IDENTIFY; 558 ah->lba3 = 0xa0; 559 560 skb = skb_prepare(d, f); 561 562 /* we now want to start the rexmit tracking */ 563 d->flags &= ~DEVFL_TKILL; 564 d->timer.data = (ulong) d; 565 d->timer.function = rexmit_timer; 566 d->timer.expires = jiffies + TIMERTICK; 567 add_timer(&d->timer); 568 569 return skb; 570 } 571 572 void 573 aoecmd_cfg_rsp(struct sk_buff *skb) 574 { 575 struct aoedev *d; 576 struct aoe_hdr *h; 577 struct aoe_cfghdr *ch; 578 ulong flags, bufcnt, sysminor, aoemajor; 579 struct sk_buff *sl; 580 enum { MAXFRAMES = 8, MAXSYSMINOR = 255 }; 581 582 h = (struct aoe_hdr *) skb->mac.raw; 583 ch = (struct aoe_cfghdr *) (h+1); 584 585 /* 586 * Enough people have their dip switches set backwards to 587 * warrant a loud message for this special case. 588 */ 589 aoemajor = __be16_to_cpu(*((u16 *) h->major)); 590 if (aoemajor == 0xfff) { 591 printk(KERN_CRIT "aoe: aoecmd_cfg_rsp: Warning: shelf " 592 "address is all ones. Check shelf dip switches\n"); 593 return; 594 } 595 596 sysminor = SYSMINOR(aoemajor, h->minor); 597 if (sysminor > MAXSYSMINOR) { 598 printk(KERN_INFO "aoe: aoecmd_cfg_rsp: sysminor %ld too " 599 "large\n", sysminor); 600 return; 601 } 602 603 bufcnt = __be16_to_cpu(*((u16 *) ch->bufcnt)); 604 if (bufcnt > MAXFRAMES) /* keep it reasonable */ 605 bufcnt = MAXFRAMES; 606 607 d = aoedev_set(sysminor, h->src, skb->dev, bufcnt); 608 if (d == NULL) { 609 printk(KERN_INFO "aoe: aoecmd_cfg_rsp: device set failure\n"); 610 return; 611 } 612 613 spin_lock_irqsave(&d->lock, flags); 614 615 if (d->flags & (DEVFL_UP | DEVFL_CLOSEWAIT)) { 616 spin_unlock_irqrestore(&d->lock, flags); 617 return; 618 } 619 620 d->fw_ver = __be16_to_cpu(*((u16 *) ch->fwver)); 621 622 /* we get here only if the device is new */ 623 sl = aoecmd_ata_id(d); 624 625 spin_unlock_irqrestore(&d->lock, flags); 626 627 aoenet_xmit(sl); 628 } 629 630