1 /* 2 * blkfront.c 3 * 4 * XenLinux virtual block device driver. 5 * 6 * Copyright (c) 2003-2004, Keir Fraser & Steve Hand 7 * Modifications by Mark A. Williamson are (c) Intel Research Cambridge 8 * Copyright (c) 2004, Christian Limpach 9 * Copyright (c) 2004, Andrew Warfield 10 * Copyright (c) 2005, Christopher Clark 11 * Copyright (c) 2005, XenSource Ltd 12 * 13 * This program is free software; you can redistribute it and/or 14 * modify it under the terms of the GNU General Public License version 2 15 * as published by the Free Software Foundation; or, when distributed 16 * separately from the Linux kernel or incorporated into other 17 * software packages, subject to the following license: 18 * 19 * Permission is hereby granted, free of charge, to any person obtaining a copy 20 * of this source file (the "Software"), to deal in the Software without 21 * restriction, including without limitation the rights to use, copy, modify, 22 * merge, publish, distribute, sublicense, and/or sell copies of the Software, 23 * and to permit persons to whom the Software is furnished to do so, subject to 24 * the following conditions: 25 * 26 * The above copyright notice and this permission notice shall be included in 27 * all copies or substantial portions of the Software. 28 * 29 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 30 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 31 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 32 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 33 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 34 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 35 * IN THE SOFTWARE. 36 */ 37 38 #include <linux/interrupt.h> 39 #include <linux/blkdev.h> 40 #include <linux/module.h> 41 42 #include <xen/xenbus.h> 43 #include <xen/grant_table.h> 44 #include <xen/events.h> 45 #include <xen/page.h> 46 47 #include <xen/interface/grant_table.h> 48 #include <xen/interface/io/blkif.h> 49 50 #include <asm/xen/hypervisor.h> 51 52 enum blkif_state { 53 BLKIF_STATE_DISCONNECTED, 54 BLKIF_STATE_CONNECTED, 55 BLKIF_STATE_SUSPENDED, 56 }; 57 58 struct blk_shadow { 59 struct blkif_request req; 60 unsigned long request; 61 unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 62 }; 63 64 static struct block_device_operations xlvbd_block_fops; 65 66 #define BLK_RING_SIZE __RING_SIZE((struct blkif_sring *)0, PAGE_SIZE) 67 68 /* 69 * We have one of these per vbd, whether ide, scsi or 'other'. They 70 * hang in private_data off the gendisk structure. We may end up 71 * putting all kinds of interesting stuff here :-) 72 */ 73 struct blkfront_info 74 { 75 struct xenbus_device *xbdev; 76 dev_t dev; 77 struct gendisk *gd; 78 int vdevice; 79 blkif_vdev_t handle; 80 enum blkif_state connected; 81 int ring_ref; 82 struct blkif_front_ring ring; 83 unsigned int evtchn, irq; 84 struct request_queue *rq; 85 struct work_struct work; 86 struct gnttab_free_callback callback; 87 struct blk_shadow shadow[BLK_RING_SIZE]; 88 unsigned long shadow_free; 89 int feature_barrier; 90 91 /** 92 * The number of people holding this device open. We won't allow a 93 * hot-unplug unless this is 0. 94 */ 95 int users; 96 }; 97 98 static DEFINE_SPINLOCK(blkif_io_lock); 99 100 #define MAXIMUM_OUTSTANDING_BLOCK_REQS \ 101 (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE) 102 #define GRANT_INVALID_REF 0 103 104 #define PARTS_PER_DISK 16 105 106 #define BLKIF_MAJOR(dev) ((dev)>>8) 107 #define BLKIF_MINOR(dev) ((dev) & 0xff) 108 109 #define DEV_NAME "xvd" /* name in /dev */ 110 111 /* Information about our VBDs. */ 112 #define MAX_VBDS 64 113 static LIST_HEAD(vbds_list); 114 115 static int get_id_from_freelist(struct blkfront_info *info) 116 { 117 unsigned long free = info->shadow_free; 118 BUG_ON(free > BLK_RING_SIZE); 119 info->shadow_free = info->shadow[free].req.id; 120 info->shadow[free].req.id = 0x0fffffee; /* debug */ 121 return free; 122 } 123 124 static void add_id_to_freelist(struct blkfront_info *info, 125 unsigned long id) 126 { 127 info->shadow[id].req.id = info->shadow_free; 128 info->shadow[id].request = 0; 129 info->shadow_free = id; 130 } 131 132 static void blkif_restart_queue_callback(void *arg) 133 { 134 struct blkfront_info *info = (struct blkfront_info *)arg; 135 schedule_work(&info->work); 136 } 137 138 /* 139 * blkif_queue_request 140 * 141 * request block io 142 * 143 * id: for guest use only. 144 * operation: BLKIF_OP_{READ,WRITE,PROBE} 145 * buffer: buffer to read/write into. this should be a 146 * virtual address in the guest os. 147 */ 148 static int blkif_queue_request(struct request *req) 149 { 150 struct blkfront_info *info = req->rq_disk->private_data; 151 unsigned long buffer_mfn; 152 struct blkif_request *ring_req; 153 struct bio *bio; 154 struct bio_vec *bvec; 155 int idx; 156 unsigned long id; 157 unsigned int fsect, lsect; 158 int ref; 159 grant_ref_t gref_head; 160 161 if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) 162 return 1; 163 164 if (gnttab_alloc_grant_references( 165 BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) { 166 gnttab_request_free_callback( 167 &info->callback, 168 blkif_restart_queue_callback, 169 info, 170 BLKIF_MAX_SEGMENTS_PER_REQUEST); 171 return 1; 172 } 173 174 /* Fill out a communications ring structure. */ 175 ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); 176 id = get_id_from_freelist(info); 177 info->shadow[id].request = (unsigned long)req; 178 179 ring_req->id = id; 180 ring_req->sector_number = (blkif_sector_t)req->sector; 181 ring_req->handle = info->handle; 182 183 ring_req->operation = rq_data_dir(req) ? 184 BLKIF_OP_WRITE : BLKIF_OP_READ; 185 if (blk_barrier_rq(req)) 186 ring_req->operation = BLKIF_OP_WRITE_BARRIER; 187 188 ring_req->nr_segments = 0; 189 rq_for_each_bio (bio, req) { 190 bio_for_each_segment (bvec, bio, idx) { 191 BUG_ON(ring_req->nr_segments 192 == BLKIF_MAX_SEGMENTS_PER_REQUEST); 193 buffer_mfn = pfn_to_mfn(page_to_pfn(bvec->bv_page)); 194 fsect = bvec->bv_offset >> 9; 195 lsect = fsect + (bvec->bv_len >> 9) - 1; 196 /* install a grant reference. */ 197 ref = gnttab_claim_grant_reference(&gref_head); 198 BUG_ON(ref == -ENOSPC); 199 200 gnttab_grant_foreign_access_ref( 201 ref, 202 info->xbdev->otherend_id, 203 buffer_mfn, 204 rq_data_dir(req) ); 205 206 info->shadow[id].frame[ring_req->nr_segments] = 207 mfn_to_pfn(buffer_mfn); 208 209 ring_req->seg[ring_req->nr_segments] = 210 (struct blkif_request_segment) { 211 .gref = ref, 212 .first_sect = fsect, 213 .last_sect = lsect }; 214 215 ring_req->nr_segments++; 216 } 217 } 218 219 info->ring.req_prod_pvt++; 220 221 /* Keep a private copy so we can reissue requests when recovering. */ 222 info->shadow[id].req = *ring_req; 223 224 gnttab_free_grant_references(gref_head); 225 226 return 0; 227 } 228 229 230 static inline void flush_requests(struct blkfront_info *info) 231 { 232 int notify; 233 234 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify); 235 236 if (notify) 237 notify_remote_via_irq(info->irq); 238 } 239 240 /* 241 * do_blkif_request 242 * read a block; request is in a request queue 243 */ 244 static void do_blkif_request(struct request_queue *rq) 245 { 246 struct blkfront_info *info = NULL; 247 struct request *req; 248 int queued; 249 250 pr_debug("Entered do_blkif_request\n"); 251 252 queued = 0; 253 254 while ((req = elv_next_request(rq)) != NULL) { 255 info = req->rq_disk->private_data; 256 if (!blk_fs_request(req)) { 257 end_request(req, 0); 258 continue; 259 } 260 261 if (RING_FULL(&info->ring)) 262 goto wait; 263 264 pr_debug("do_blk_req %p: cmd %p, sec %lx, " 265 "(%u/%li) buffer:%p [%s]\n", 266 req, req->cmd, (unsigned long)req->sector, 267 req->current_nr_sectors, 268 req->nr_sectors, req->buffer, 269 rq_data_dir(req) ? "write" : "read"); 270 271 272 blkdev_dequeue_request(req); 273 if (blkif_queue_request(req)) { 274 blk_requeue_request(rq, req); 275 wait: 276 /* Avoid pointless unplugs. */ 277 blk_stop_queue(rq); 278 break; 279 } 280 281 queued++; 282 } 283 284 if (queued != 0) 285 flush_requests(info); 286 } 287 288 static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) 289 { 290 struct request_queue *rq; 291 292 rq = blk_init_queue(do_blkif_request, &blkif_io_lock); 293 if (rq == NULL) 294 return -1; 295 296 elevator_init(rq, "noop"); 297 298 /* Hard sector size and max sectors impersonate the equiv. hardware. */ 299 blk_queue_hardsect_size(rq, sector_size); 300 blk_queue_max_sectors(rq, 512); 301 302 /* Each segment in a request is up to an aligned page in size. */ 303 blk_queue_segment_boundary(rq, PAGE_SIZE - 1); 304 blk_queue_max_segment_size(rq, PAGE_SIZE); 305 306 /* Ensure a merged request will fit in a single I/O ring slot. */ 307 blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST); 308 blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST); 309 310 /* Make sure buffer addresses are sector-aligned. */ 311 blk_queue_dma_alignment(rq, 511); 312 313 gd->queue = rq; 314 315 return 0; 316 } 317 318 319 static int xlvbd_barrier(struct blkfront_info *info) 320 { 321 int err; 322 323 err = blk_queue_ordered(info->rq, 324 info->feature_barrier ? QUEUE_ORDERED_DRAIN : QUEUE_ORDERED_NONE, 325 NULL); 326 327 if (err) 328 return err; 329 330 printk(KERN_INFO "blkfront: %s: barriers %s\n", 331 info->gd->disk_name, 332 info->feature_barrier ? "enabled" : "disabled"); 333 return 0; 334 } 335 336 337 static int xlvbd_alloc_gendisk(int minor, blkif_sector_t capacity, 338 int vdevice, u16 vdisk_info, u16 sector_size, 339 struct blkfront_info *info) 340 { 341 struct gendisk *gd; 342 int nr_minors = 1; 343 int err = -ENODEV; 344 345 BUG_ON(info->gd != NULL); 346 BUG_ON(info->rq != NULL); 347 348 if ((minor % PARTS_PER_DISK) == 0) 349 nr_minors = PARTS_PER_DISK; 350 351 gd = alloc_disk(nr_minors); 352 if (gd == NULL) 353 goto out; 354 355 if (nr_minors > 1) 356 sprintf(gd->disk_name, "%s%c", DEV_NAME, 357 'a' + minor / PARTS_PER_DISK); 358 else 359 sprintf(gd->disk_name, "%s%c%d", DEV_NAME, 360 'a' + minor / PARTS_PER_DISK, 361 minor % PARTS_PER_DISK); 362 363 gd->major = XENVBD_MAJOR; 364 gd->first_minor = minor; 365 gd->fops = &xlvbd_block_fops; 366 gd->private_data = info; 367 gd->driverfs_dev = &(info->xbdev->dev); 368 set_capacity(gd, capacity); 369 370 if (xlvbd_init_blk_queue(gd, sector_size)) { 371 del_gendisk(gd); 372 goto out; 373 } 374 375 info->rq = gd->queue; 376 info->gd = gd; 377 378 if (info->feature_barrier) 379 xlvbd_barrier(info); 380 381 if (vdisk_info & VDISK_READONLY) 382 set_disk_ro(gd, 1); 383 384 if (vdisk_info & VDISK_REMOVABLE) 385 gd->flags |= GENHD_FL_REMOVABLE; 386 387 if (vdisk_info & VDISK_CDROM) 388 gd->flags |= GENHD_FL_CD; 389 390 return 0; 391 392 out: 393 return err; 394 } 395 396 static void kick_pending_request_queues(struct blkfront_info *info) 397 { 398 if (!RING_FULL(&info->ring)) { 399 /* Re-enable calldowns. */ 400 blk_start_queue(info->rq); 401 /* Kick things off immediately. */ 402 do_blkif_request(info->rq); 403 } 404 } 405 406 static void blkif_restart_queue(struct work_struct *work) 407 { 408 struct blkfront_info *info = container_of(work, struct blkfront_info, work); 409 410 spin_lock_irq(&blkif_io_lock); 411 if (info->connected == BLKIF_STATE_CONNECTED) 412 kick_pending_request_queues(info); 413 spin_unlock_irq(&blkif_io_lock); 414 } 415 416 static void blkif_free(struct blkfront_info *info, int suspend) 417 { 418 /* Prevent new requests being issued until we fix things up. */ 419 spin_lock_irq(&blkif_io_lock); 420 info->connected = suspend ? 421 BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED; 422 /* No more blkif_request(). */ 423 if (info->rq) 424 blk_stop_queue(info->rq); 425 /* No more gnttab callback work. */ 426 gnttab_cancel_free_callback(&info->callback); 427 spin_unlock_irq(&blkif_io_lock); 428 429 /* Flush gnttab callback work. Must be done with no locks held. */ 430 flush_scheduled_work(); 431 432 /* Free resources associated with old device channel. */ 433 if (info->ring_ref != GRANT_INVALID_REF) { 434 gnttab_end_foreign_access(info->ring_ref, 0, 435 (unsigned long)info->ring.sring); 436 info->ring_ref = GRANT_INVALID_REF; 437 info->ring.sring = NULL; 438 } 439 if (info->irq) 440 unbind_from_irqhandler(info->irq, info); 441 info->evtchn = info->irq = 0; 442 443 } 444 445 static void blkif_completion(struct blk_shadow *s) 446 { 447 int i; 448 for (i = 0; i < s->req.nr_segments; i++) 449 gnttab_end_foreign_access(s->req.seg[i].gref, 0, 0UL); 450 } 451 452 static irqreturn_t blkif_interrupt(int irq, void *dev_id) 453 { 454 struct request *req; 455 struct blkif_response *bret; 456 RING_IDX i, rp; 457 unsigned long flags; 458 struct blkfront_info *info = (struct blkfront_info *)dev_id; 459 int uptodate; 460 461 spin_lock_irqsave(&blkif_io_lock, flags); 462 463 if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) { 464 spin_unlock_irqrestore(&blkif_io_lock, flags); 465 return IRQ_HANDLED; 466 } 467 468 again: 469 rp = info->ring.sring->rsp_prod; 470 rmb(); /* Ensure we see queued responses up to 'rp'. */ 471 472 for (i = info->ring.rsp_cons; i != rp; i++) { 473 unsigned long id; 474 int ret; 475 476 bret = RING_GET_RESPONSE(&info->ring, i); 477 id = bret->id; 478 req = (struct request *)info->shadow[id].request; 479 480 blkif_completion(&info->shadow[id]); 481 482 add_id_to_freelist(info, id); 483 484 uptodate = (bret->status == BLKIF_RSP_OKAY); 485 switch (bret->operation) { 486 case BLKIF_OP_WRITE_BARRIER: 487 if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { 488 printk(KERN_WARNING "blkfront: %s: write barrier op failed\n", 489 info->gd->disk_name); 490 uptodate = -EOPNOTSUPP; 491 info->feature_barrier = 0; 492 xlvbd_barrier(info); 493 } 494 /* fall through */ 495 case BLKIF_OP_READ: 496 case BLKIF_OP_WRITE: 497 if (unlikely(bret->status != BLKIF_RSP_OKAY)) 498 dev_dbg(&info->xbdev->dev, "Bad return from blkdev data " 499 "request: %x\n", bret->status); 500 501 ret = end_that_request_first(req, uptodate, 502 req->hard_nr_sectors); 503 BUG_ON(ret); 504 end_that_request_last(req, uptodate); 505 break; 506 default: 507 BUG(); 508 } 509 } 510 511 info->ring.rsp_cons = i; 512 513 if (i != info->ring.req_prod_pvt) { 514 int more_to_do; 515 RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do); 516 if (more_to_do) 517 goto again; 518 } else 519 info->ring.sring->rsp_event = i + 1; 520 521 kick_pending_request_queues(info); 522 523 spin_unlock_irqrestore(&blkif_io_lock, flags); 524 525 return IRQ_HANDLED; 526 } 527 528 529 static int setup_blkring(struct xenbus_device *dev, 530 struct blkfront_info *info) 531 { 532 struct blkif_sring *sring; 533 int err; 534 535 info->ring_ref = GRANT_INVALID_REF; 536 537 sring = (struct blkif_sring *)__get_free_page(GFP_KERNEL); 538 if (!sring) { 539 xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring"); 540 return -ENOMEM; 541 } 542 SHARED_RING_INIT(sring); 543 FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE); 544 545 err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring)); 546 if (err < 0) { 547 free_page((unsigned long)sring); 548 info->ring.sring = NULL; 549 goto fail; 550 } 551 info->ring_ref = err; 552 553 err = xenbus_alloc_evtchn(dev, &info->evtchn); 554 if (err) 555 goto fail; 556 557 err = bind_evtchn_to_irqhandler(info->evtchn, 558 blkif_interrupt, 559 IRQF_SAMPLE_RANDOM, "blkif", info); 560 if (err <= 0) { 561 xenbus_dev_fatal(dev, err, 562 "bind_evtchn_to_irqhandler failed"); 563 goto fail; 564 } 565 info->irq = err; 566 567 return 0; 568 fail: 569 blkif_free(info, 0); 570 return err; 571 } 572 573 574 /* Common code used when first setting up, and when resuming. */ 575 static int talk_to_backend(struct xenbus_device *dev, 576 struct blkfront_info *info) 577 { 578 const char *message = NULL; 579 struct xenbus_transaction xbt; 580 int err; 581 582 /* Create shared ring, alloc event channel. */ 583 err = setup_blkring(dev, info); 584 if (err) 585 goto out; 586 587 again: 588 err = xenbus_transaction_start(&xbt); 589 if (err) { 590 xenbus_dev_fatal(dev, err, "starting transaction"); 591 goto destroy_blkring; 592 } 593 594 err = xenbus_printf(xbt, dev->nodename, 595 "ring-ref", "%u", info->ring_ref); 596 if (err) { 597 message = "writing ring-ref"; 598 goto abort_transaction; 599 } 600 err = xenbus_printf(xbt, dev->nodename, 601 "event-channel", "%u", info->evtchn); 602 if (err) { 603 message = "writing event-channel"; 604 goto abort_transaction; 605 } 606 607 err = xenbus_transaction_end(xbt, 0); 608 if (err) { 609 if (err == -EAGAIN) 610 goto again; 611 xenbus_dev_fatal(dev, err, "completing transaction"); 612 goto destroy_blkring; 613 } 614 615 xenbus_switch_state(dev, XenbusStateInitialised); 616 617 return 0; 618 619 abort_transaction: 620 xenbus_transaction_end(xbt, 1); 621 if (message) 622 xenbus_dev_fatal(dev, err, "%s", message); 623 destroy_blkring: 624 blkif_free(info, 0); 625 out: 626 return err; 627 } 628 629 630 /** 631 * Entry point to this code when a new device is created. Allocate the basic 632 * structures and the ring buffer for communication with the backend, and 633 * inform the backend of the appropriate details for those. Switch to 634 * Initialised state. 635 */ 636 static int blkfront_probe(struct xenbus_device *dev, 637 const struct xenbus_device_id *id) 638 { 639 int err, vdevice, i; 640 struct blkfront_info *info; 641 642 /* FIXME: Use dynamic device id if this is not set. */ 643 err = xenbus_scanf(XBT_NIL, dev->nodename, 644 "virtual-device", "%i", &vdevice); 645 if (err != 1) { 646 xenbus_dev_fatal(dev, err, "reading virtual-device"); 647 return err; 648 } 649 650 info = kzalloc(sizeof(*info), GFP_KERNEL); 651 if (!info) { 652 xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure"); 653 return -ENOMEM; 654 } 655 656 info->xbdev = dev; 657 info->vdevice = vdevice; 658 info->connected = BLKIF_STATE_DISCONNECTED; 659 INIT_WORK(&info->work, blkif_restart_queue); 660 661 for (i = 0; i < BLK_RING_SIZE; i++) 662 info->shadow[i].req.id = i+1; 663 info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; 664 665 /* Front end dir is a number, which is used as the id. */ 666 info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0); 667 dev->dev.driver_data = info; 668 669 err = talk_to_backend(dev, info); 670 if (err) { 671 kfree(info); 672 dev->dev.driver_data = NULL; 673 return err; 674 } 675 676 return 0; 677 } 678 679 680 static int blkif_recover(struct blkfront_info *info) 681 { 682 int i; 683 struct blkif_request *req; 684 struct blk_shadow *copy; 685 int j; 686 687 /* Stage 1: Make a safe copy of the shadow state. */ 688 copy = kmalloc(sizeof(info->shadow), GFP_KERNEL); 689 if (!copy) 690 return -ENOMEM; 691 memcpy(copy, info->shadow, sizeof(info->shadow)); 692 693 /* Stage 2: Set up free list. */ 694 memset(&info->shadow, 0, sizeof(info->shadow)); 695 for (i = 0; i < BLK_RING_SIZE; i++) 696 info->shadow[i].req.id = i+1; 697 info->shadow_free = info->ring.req_prod_pvt; 698 info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; 699 700 /* Stage 3: Find pending requests and requeue them. */ 701 for (i = 0; i < BLK_RING_SIZE; i++) { 702 /* Not in use? */ 703 if (copy[i].request == 0) 704 continue; 705 706 /* Grab a request slot and copy shadow state into it. */ 707 req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); 708 *req = copy[i].req; 709 710 /* We get a new request id, and must reset the shadow state. */ 711 req->id = get_id_from_freelist(info); 712 memcpy(&info->shadow[req->id], ©[i], sizeof(copy[i])); 713 714 /* Rewrite any grant references invalidated by susp/resume. */ 715 for (j = 0; j < req->nr_segments; j++) 716 gnttab_grant_foreign_access_ref( 717 req->seg[j].gref, 718 info->xbdev->otherend_id, 719 pfn_to_mfn(info->shadow[req->id].frame[j]), 720 rq_data_dir( 721 (struct request *) 722 info->shadow[req->id].request)); 723 info->shadow[req->id].req = *req; 724 725 info->ring.req_prod_pvt++; 726 } 727 728 kfree(copy); 729 730 xenbus_switch_state(info->xbdev, XenbusStateConnected); 731 732 spin_lock_irq(&blkif_io_lock); 733 734 /* Now safe for us to use the shared ring */ 735 info->connected = BLKIF_STATE_CONNECTED; 736 737 /* Send off requeued requests */ 738 flush_requests(info); 739 740 /* Kick any other new requests queued since we resumed */ 741 kick_pending_request_queues(info); 742 743 spin_unlock_irq(&blkif_io_lock); 744 745 return 0; 746 } 747 748 /** 749 * We are reconnecting to the backend, due to a suspend/resume, or a backend 750 * driver restart. We tear down our blkif structure and recreate it, but 751 * leave the device-layer structures intact so that this is transparent to the 752 * rest of the kernel. 753 */ 754 static int blkfront_resume(struct xenbus_device *dev) 755 { 756 struct blkfront_info *info = dev->dev.driver_data; 757 int err; 758 759 dev_dbg(&dev->dev, "blkfront_resume: %s\n", dev->nodename); 760 761 blkif_free(info, info->connected == BLKIF_STATE_CONNECTED); 762 763 err = talk_to_backend(dev, info); 764 if (info->connected == BLKIF_STATE_SUSPENDED && !err) 765 err = blkif_recover(info); 766 767 return err; 768 } 769 770 771 /* 772 * Invoked when the backend is finally 'ready' (and has told produced 773 * the details about the physical device - #sectors, size, etc). 774 */ 775 static void blkfront_connect(struct blkfront_info *info) 776 { 777 unsigned long long sectors; 778 unsigned long sector_size; 779 unsigned int binfo; 780 int err; 781 782 if ((info->connected == BLKIF_STATE_CONNECTED) || 783 (info->connected == BLKIF_STATE_SUSPENDED) ) 784 return; 785 786 dev_dbg(&info->xbdev->dev, "%s:%s.\n", 787 __func__, info->xbdev->otherend); 788 789 err = xenbus_gather(XBT_NIL, info->xbdev->otherend, 790 "sectors", "%llu", §ors, 791 "info", "%u", &binfo, 792 "sector-size", "%lu", §or_size, 793 NULL); 794 if (err) { 795 xenbus_dev_fatal(info->xbdev, err, 796 "reading backend fields at %s", 797 info->xbdev->otherend); 798 return; 799 } 800 801 err = xenbus_gather(XBT_NIL, info->xbdev->otherend, 802 "feature-barrier", "%lu", &info->feature_barrier, 803 NULL); 804 if (err) 805 info->feature_barrier = 0; 806 807 err = xlvbd_alloc_gendisk(BLKIF_MINOR(info->vdevice), 808 sectors, info->vdevice, 809 binfo, sector_size, info); 810 if (err) { 811 xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", 812 info->xbdev->otherend); 813 return; 814 } 815 816 xenbus_switch_state(info->xbdev, XenbusStateConnected); 817 818 /* Kick pending requests. */ 819 spin_lock_irq(&blkif_io_lock); 820 info->connected = BLKIF_STATE_CONNECTED; 821 kick_pending_request_queues(info); 822 spin_unlock_irq(&blkif_io_lock); 823 824 add_disk(info->gd); 825 } 826 827 /** 828 * Handle the change of state of the backend to Closing. We must delete our 829 * device-layer structures now, to ensure that writes are flushed through to 830 * the backend. Once is this done, we can switch to Closed in 831 * acknowledgement. 832 */ 833 static void blkfront_closing(struct xenbus_device *dev) 834 { 835 struct blkfront_info *info = dev->dev.driver_data; 836 unsigned long flags; 837 838 dev_dbg(&dev->dev, "blkfront_closing: %s removed\n", dev->nodename); 839 840 if (info->rq == NULL) 841 goto out; 842 843 spin_lock_irqsave(&blkif_io_lock, flags); 844 845 del_gendisk(info->gd); 846 847 /* No more blkif_request(). */ 848 blk_stop_queue(info->rq); 849 850 /* No more gnttab callback work. */ 851 gnttab_cancel_free_callback(&info->callback); 852 spin_unlock_irqrestore(&blkif_io_lock, flags); 853 854 /* Flush gnttab callback work. Must be done with no locks held. */ 855 flush_scheduled_work(); 856 857 blk_cleanup_queue(info->rq); 858 info->rq = NULL; 859 860 out: 861 xenbus_frontend_closed(dev); 862 } 863 864 /** 865 * Callback received when the backend's state changes. 866 */ 867 static void backend_changed(struct xenbus_device *dev, 868 enum xenbus_state backend_state) 869 { 870 struct blkfront_info *info = dev->dev.driver_data; 871 struct block_device *bd; 872 873 dev_dbg(&dev->dev, "blkfront:backend_changed.\n"); 874 875 switch (backend_state) { 876 case XenbusStateInitialising: 877 case XenbusStateInitWait: 878 case XenbusStateInitialised: 879 case XenbusStateUnknown: 880 case XenbusStateClosed: 881 break; 882 883 case XenbusStateConnected: 884 blkfront_connect(info); 885 break; 886 887 case XenbusStateClosing: 888 bd = bdget(info->dev); 889 if (bd == NULL) 890 xenbus_dev_fatal(dev, -ENODEV, "bdget failed"); 891 892 mutex_lock(&bd->bd_mutex); 893 if (info->users > 0) 894 xenbus_dev_error(dev, -EBUSY, 895 "Device in use; refusing to close"); 896 else 897 blkfront_closing(dev); 898 mutex_unlock(&bd->bd_mutex); 899 bdput(bd); 900 break; 901 } 902 } 903 904 static int blkfront_remove(struct xenbus_device *dev) 905 { 906 struct blkfront_info *info = dev->dev.driver_data; 907 908 dev_dbg(&dev->dev, "blkfront_remove: %s removed\n", dev->nodename); 909 910 blkif_free(info, 0); 911 912 kfree(info); 913 914 return 0; 915 } 916 917 static int blkif_open(struct inode *inode, struct file *filep) 918 { 919 struct blkfront_info *info = inode->i_bdev->bd_disk->private_data; 920 info->users++; 921 return 0; 922 } 923 924 static int blkif_release(struct inode *inode, struct file *filep) 925 { 926 struct blkfront_info *info = inode->i_bdev->bd_disk->private_data; 927 info->users--; 928 if (info->users == 0) { 929 /* Check whether we have been instructed to close. We will 930 have ignored this request initially, as the device was 931 still mounted. */ 932 struct xenbus_device *dev = info->xbdev; 933 enum xenbus_state state = xenbus_read_driver_state(dev->otherend); 934 935 if (state == XenbusStateClosing) 936 blkfront_closing(dev); 937 } 938 return 0; 939 } 940 941 static struct block_device_operations xlvbd_block_fops = 942 { 943 .owner = THIS_MODULE, 944 .open = blkif_open, 945 .release = blkif_release, 946 }; 947 948 949 static struct xenbus_device_id blkfront_ids[] = { 950 { "vbd" }, 951 { "" } 952 }; 953 954 static struct xenbus_driver blkfront = { 955 .name = "vbd", 956 .owner = THIS_MODULE, 957 .ids = blkfront_ids, 958 .probe = blkfront_probe, 959 .remove = blkfront_remove, 960 .resume = blkfront_resume, 961 .otherend_changed = backend_changed, 962 }; 963 964 static int __init xlblk_init(void) 965 { 966 if (!is_running_on_xen()) 967 return -ENODEV; 968 969 if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) { 970 printk(KERN_WARNING "xen_blk: can't get major %d with name %s\n", 971 XENVBD_MAJOR, DEV_NAME); 972 return -ENODEV; 973 } 974 975 return xenbus_register_frontend(&blkfront); 976 } 977 module_init(xlblk_init); 978 979 980 static void xlblk_exit(void) 981 { 982 return xenbus_unregister_driver(&blkfront); 983 } 984 module_exit(xlblk_exit); 985 986 MODULE_DESCRIPTION("Xen virtual block device frontend"); 987 MODULE_LICENSE("GPL"); 988 MODULE_ALIAS_BLOCKDEV_MAJOR(XENVBD_MAJOR); 989