1 /* Xenbus code for blkif backend 2 Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au> 3 Copyright (C) 2005 XenSource Ltd 4 5 This program is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 2 of the License, or 8 (at your option) any later version. 9 10 This program is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 */ 16 17 #define pr_fmt(fmt) "xen-blkback: " fmt 18 19 #include <stdarg.h> 20 #include <linux/module.h> 21 #include <linux/kthread.h> 22 #include <xen/events.h> 23 #include <xen/grant_table.h> 24 #include "common.h" 25 26 /* Enlarge the array size in order to fully show blkback name. */ 27 #define BLKBACK_NAME_LEN (20) 28 #define RINGREF_NAME_LEN (20) 29 30 struct backend_info { 31 struct xenbus_device *dev; 32 struct xen_blkif *blkif; 33 struct xenbus_watch backend_watch; 34 unsigned major; 35 unsigned minor; 36 char *mode; 37 }; 38 39 static struct kmem_cache *xen_blkif_cachep; 40 static void connect(struct backend_info *); 41 static int connect_ring(struct backend_info *); 42 static void backend_changed(struct xenbus_watch *, const char **, 43 unsigned int); 44 static void xen_blkif_free(struct xen_blkif *blkif); 45 static void xen_vbd_free(struct xen_vbd *vbd); 46 47 struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be) 48 { 49 return be->dev; 50 } 51 52 /* 53 * The last request could free the device from softirq context and 54 * xen_blkif_free() can sleep. 55 */ 56 static void xen_blkif_deferred_free(struct work_struct *work) 57 { 58 struct xen_blkif *blkif; 59 60 blkif = container_of(work, struct xen_blkif, free_work); 61 xen_blkif_free(blkif); 62 } 63 64 static int blkback_name(struct xen_blkif *blkif, char *buf) 65 { 66 char *devpath, *devname; 67 struct xenbus_device *dev = blkif->be->dev; 68 69 devpath = xenbus_read(XBT_NIL, dev->nodename, "dev", NULL); 70 if (IS_ERR(devpath)) 71 return PTR_ERR(devpath); 72 73 devname = strstr(devpath, "/dev/"); 74 if (devname != NULL) 75 devname += strlen("/dev/"); 76 else 77 devname = devpath; 78 79 snprintf(buf, BLKBACK_NAME_LEN, "blkback.%d.%s", blkif->domid, devname); 80 kfree(devpath); 81 82 return 0; 83 } 84 85 static void xen_update_blkif_status(struct xen_blkif *blkif) 86 { 87 int err; 88 char name[BLKBACK_NAME_LEN]; 89 struct xen_blkif_ring *ring; 90 int i; 91 92 /* Not ready to connect? */ 93 if (!blkif->rings || !blkif->rings[0].irq || !blkif->vbd.bdev) 94 return; 95 96 /* Already connected? */ 97 if (blkif->be->dev->state == XenbusStateConnected) 98 return; 99 100 /* Attempt to connect: exit if we fail to. */ 101 connect(blkif->be); 102 if (blkif->be->dev->state != XenbusStateConnected) 103 return; 104 105 err = blkback_name(blkif, name); 106 if (err) { 107 xenbus_dev_error(blkif->be->dev, err, "get blkback dev name"); 108 return; 109 } 110 111 err = filemap_write_and_wait(blkif->vbd.bdev->bd_inode->i_mapping); 112 if (err) { 113 xenbus_dev_error(blkif->be->dev, err, "block flush"); 114 return; 115 } 116 invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping); 117 118 for (i = 0; i < blkif->nr_rings; i++) { 119 ring = &blkif->rings[i]; 120 ring->xenblkd = kthread_run(xen_blkif_schedule, ring, "%s-%d", name, i); 121 if (IS_ERR(ring->xenblkd)) { 122 err = PTR_ERR(ring->xenblkd); 123 ring->xenblkd = NULL; 124 xenbus_dev_fatal(blkif->be->dev, err, 125 "start %s-%d xenblkd", name, i); 126 goto out; 127 } 128 } 129 return; 130 131 out: 132 while (--i >= 0) { 133 ring = &blkif->rings[i]; 134 kthread_stop(ring->xenblkd); 135 } 136 return; 137 } 138 139 static int xen_blkif_alloc_rings(struct xen_blkif *blkif) 140 { 141 unsigned int r; 142 143 blkif->rings = kzalloc(blkif->nr_rings * sizeof(struct xen_blkif_ring), GFP_KERNEL); 144 if (!blkif->rings) 145 return -ENOMEM; 146 147 for (r = 0; r < blkif->nr_rings; r++) { 148 struct xen_blkif_ring *ring = &blkif->rings[r]; 149 150 spin_lock_init(&ring->blk_ring_lock); 151 init_waitqueue_head(&ring->wq); 152 INIT_LIST_HEAD(&ring->pending_free); 153 INIT_LIST_HEAD(&ring->persistent_purge_list); 154 INIT_WORK(&ring->persistent_purge_work, xen_blkbk_unmap_purged_grants); 155 spin_lock_init(&ring->free_pages_lock); 156 INIT_LIST_HEAD(&ring->free_pages); 157 158 spin_lock_init(&ring->pending_free_lock); 159 init_waitqueue_head(&ring->pending_free_wq); 160 init_waitqueue_head(&ring->shutdown_wq); 161 ring->blkif = blkif; 162 ring->st_print = jiffies; 163 xen_blkif_get(blkif); 164 } 165 166 return 0; 167 } 168 169 static struct xen_blkif *xen_blkif_alloc(domid_t domid) 170 { 171 struct xen_blkif *blkif; 172 173 BUILD_BUG_ON(MAX_INDIRECT_PAGES > BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST); 174 175 blkif = kmem_cache_zalloc(xen_blkif_cachep, GFP_KERNEL); 176 if (!blkif) 177 return ERR_PTR(-ENOMEM); 178 179 blkif->domid = domid; 180 atomic_set(&blkif->refcnt, 1); 181 init_completion(&blkif->drain_complete); 182 INIT_WORK(&blkif->free_work, xen_blkif_deferred_free); 183 184 return blkif; 185 } 186 187 static int xen_blkif_map(struct xen_blkif_ring *ring, grant_ref_t *gref, 188 unsigned int nr_grefs, unsigned int evtchn) 189 { 190 int err; 191 struct xen_blkif *blkif = ring->blkif; 192 193 /* Already connected through? */ 194 if (ring->irq) 195 return 0; 196 197 err = xenbus_map_ring_valloc(blkif->be->dev, gref, nr_grefs, 198 &ring->blk_ring); 199 if (err < 0) 200 return err; 201 202 switch (blkif->blk_protocol) { 203 case BLKIF_PROTOCOL_NATIVE: 204 { 205 struct blkif_sring *sring; 206 sring = (struct blkif_sring *)ring->blk_ring; 207 BACK_RING_INIT(&ring->blk_rings.native, sring, 208 XEN_PAGE_SIZE * nr_grefs); 209 break; 210 } 211 case BLKIF_PROTOCOL_X86_32: 212 { 213 struct blkif_x86_32_sring *sring_x86_32; 214 sring_x86_32 = (struct blkif_x86_32_sring *)ring->blk_ring; 215 BACK_RING_INIT(&ring->blk_rings.x86_32, sring_x86_32, 216 XEN_PAGE_SIZE * nr_grefs); 217 break; 218 } 219 case BLKIF_PROTOCOL_X86_64: 220 { 221 struct blkif_x86_64_sring *sring_x86_64; 222 sring_x86_64 = (struct blkif_x86_64_sring *)ring->blk_ring; 223 BACK_RING_INIT(&ring->blk_rings.x86_64, sring_x86_64, 224 XEN_PAGE_SIZE * nr_grefs); 225 break; 226 } 227 default: 228 BUG(); 229 } 230 231 err = bind_interdomain_evtchn_to_irqhandler(blkif->domid, evtchn, 232 xen_blkif_be_int, 0, 233 "blkif-backend", ring); 234 if (err < 0) { 235 xenbus_unmap_ring_vfree(blkif->be->dev, ring->blk_ring); 236 ring->blk_rings.common.sring = NULL; 237 return err; 238 } 239 ring->irq = err; 240 241 return 0; 242 } 243 244 static int xen_blkif_disconnect(struct xen_blkif *blkif) 245 { 246 struct pending_req *req, *n; 247 unsigned int j, r; 248 249 for (r = 0; r < blkif->nr_rings; r++) { 250 struct xen_blkif_ring *ring = &blkif->rings[r]; 251 unsigned int i = 0; 252 253 if (ring->xenblkd) { 254 kthread_stop(ring->xenblkd); 255 wake_up(&ring->shutdown_wq); 256 ring->xenblkd = NULL; 257 } 258 259 /* The above kthread_stop() guarantees that at this point we 260 * don't have any discard_io or other_io requests. So, checking 261 * for inflight IO is enough. 262 */ 263 if (atomic_read(&ring->inflight) > 0) 264 return -EBUSY; 265 266 if (ring->irq) { 267 unbind_from_irqhandler(ring->irq, ring); 268 ring->irq = 0; 269 } 270 271 if (ring->blk_rings.common.sring) { 272 xenbus_unmap_ring_vfree(blkif->be->dev, ring->blk_ring); 273 ring->blk_rings.common.sring = NULL; 274 } 275 276 /* Remove all persistent grants and the cache of ballooned pages. */ 277 xen_blkbk_free_caches(ring); 278 279 /* Check that there is no request in use */ 280 list_for_each_entry_safe(req, n, &ring->pending_free, free_list) { 281 list_del(&req->free_list); 282 283 for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) 284 kfree(req->segments[j]); 285 286 for (j = 0; j < MAX_INDIRECT_PAGES; j++) 287 kfree(req->indirect_pages[j]); 288 289 kfree(req); 290 i++; 291 } 292 293 BUG_ON(atomic_read(&ring->persistent_gnt_in_use) != 0); 294 BUG_ON(!list_empty(&ring->persistent_purge_list)); 295 BUG_ON(!RB_EMPTY_ROOT(&ring->persistent_gnts)); 296 BUG_ON(!list_empty(&ring->free_pages)); 297 BUG_ON(ring->free_pages_num != 0); 298 BUG_ON(ring->persistent_gnt_c != 0); 299 WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages)); 300 xen_blkif_put(blkif); 301 } 302 blkif->nr_ring_pages = 0; 303 /* 304 * blkif->rings was allocated in connect_ring, so we should free it in 305 * here. 306 */ 307 kfree(blkif->rings); 308 blkif->rings = NULL; 309 blkif->nr_rings = 0; 310 311 return 0; 312 } 313 314 static void xen_blkif_free(struct xen_blkif *blkif) 315 { 316 317 xen_blkif_disconnect(blkif); 318 xen_vbd_free(&blkif->vbd); 319 320 /* Make sure everything is drained before shutting down */ 321 kmem_cache_free(xen_blkif_cachep, blkif); 322 } 323 324 int __init xen_blkif_interface_init(void) 325 { 326 xen_blkif_cachep = kmem_cache_create("blkif_cache", 327 sizeof(struct xen_blkif), 328 0, 0, NULL); 329 if (!xen_blkif_cachep) 330 return -ENOMEM; 331 332 return 0; 333 } 334 335 /* 336 * sysfs interface for VBD I/O requests 337 */ 338 339 #define VBD_SHOW_ALLRING(name, format) \ 340 static ssize_t show_##name(struct device *_dev, \ 341 struct device_attribute *attr, \ 342 char *buf) \ 343 { \ 344 struct xenbus_device *dev = to_xenbus_device(_dev); \ 345 struct backend_info *be = dev_get_drvdata(&dev->dev); \ 346 struct xen_blkif *blkif = be->blkif; \ 347 unsigned int i; \ 348 unsigned long long result = 0; \ 349 \ 350 if (!blkif->rings) \ 351 goto out; \ 352 \ 353 for (i = 0; i < blkif->nr_rings; i++) { \ 354 struct xen_blkif_ring *ring = &blkif->rings[i]; \ 355 \ 356 result += ring->st_##name; \ 357 } \ 358 \ 359 out: \ 360 return sprintf(buf, format, result); \ 361 } \ 362 static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL) 363 364 VBD_SHOW_ALLRING(oo_req, "%llu\n"); 365 VBD_SHOW_ALLRING(rd_req, "%llu\n"); 366 VBD_SHOW_ALLRING(wr_req, "%llu\n"); 367 VBD_SHOW_ALLRING(f_req, "%llu\n"); 368 VBD_SHOW_ALLRING(ds_req, "%llu\n"); 369 VBD_SHOW_ALLRING(rd_sect, "%llu\n"); 370 VBD_SHOW_ALLRING(wr_sect, "%llu\n"); 371 372 static struct attribute *xen_vbdstat_attrs[] = { 373 &dev_attr_oo_req.attr, 374 &dev_attr_rd_req.attr, 375 &dev_attr_wr_req.attr, 376 &dev_attr_f_req.attr, 377 &dev_attr_ds_req.attr, 378 &dev_attr_rd_sect.attr, 379 &dev_attr_wr_sect.attr, 380 NULL 381 }; 382 383 static struct attribute_group xen_vbdstat_group = { 384 .name = "statistics", 385 .attrs = xen_vbdstat_attrs, 386 }; 387 388 #define VBD_SHOW(name, format, args...) \ 389 static ssize_t show_##name(struct device *_dev, \ 390 struct device_attribute *attr, \ 391 char *buf) \ 392 { \ 393 struct xenbus_device *dev = to_xenbus_device(_dev); \ 394 struct backend_info *be = dev_get_drvdata(&dev->dev); \ 395 \ 396 return sprintf(buf, format, ##args); \ 397 } \ 398 static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL) 399 400 VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor); 401 VBD_SHOW(mode, "%s\n", be->mode); 402 403 static int xenvbd_sysfs_addif(struct xenbus_device *dev) 404 { 405 int error; 406 407 error = device_create_file(&dev->dev, &dev_attr_physical_device); 408 if (error) 409 goto fail1; 410 411 error = device_create_file(&dev->dev, &dev_attr_mode); 412 if (error) 413 goto fail2; 414 415 error = sysfs_create_group(&dev->dev.kobj, &xen_vbdstat_group); 416 if (error) 417 goto fail3; 418 419 return 0; 420 421 fail3: sysfs_remove_group(&dev->dev.kobj, &xen_vbdstat_group); 422 fail2: device_remove_file(&dev->dev, &dev_attr_mode); 423 fail1: device_remove_file(&dev->dev, &dev_attr_physical_device); 424 return error; 425 } 426 427 static void xenvbd_sysfs_delif(struct xenbus_device *dev) 428 { 429 sysfs_remove_group(&dev->dev.kobj, &xen_vbdstat_group); 430 device_remove_file(&dev->dev, &dev_attr_mode); 431 device_remove_file(&dev->dev, &dev_attr_physical_device); 432 } 433 434 435 static void xen_vbd_free(struct xen_vbd *vbd) 436 { 437 if (vbd->bdev) 438 blkdev_put(vbd->bdev, vbd->readonly ? FMODE_READ : FMODE_WRITE); 439 vbd->bdev = NULL; 440 } 441 442 static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle, 443 unsigned major, unsigned minor, int readonly, 444 int cdrom) 445 { 446 struct xen_vbd *vbd; 447 struct block_device *bdev; 448 struct request_queue *q; 449 450 vbd = &blkif->vbd; 451 vbd->handle = handle; 452 vbd->readonly = readonly; 453 vbd->type = 0; 454 455 vbd->pdevice = MKDEV(major, minor); 456 457 bdev = blkdev_get_by_dev(vbd->pdevice, vbd->readonly ? 458 FMODE_READ : FMODE_WRITE, NULL); 459 460 if (IS_ERR(bdev)) { 461 pr_warn("xen_vbd_create: device %08x could not be opened\n", 462 vbd->pdevice); 463 return -ENOENT; 464 } 465 466 vbd->bdev = bdev; 467 if (vbd->bdev->bd_disk == NULL) { 468 pr_warn("xen_vbd_create: device %08x doesn't exist\n", 469 vbd->pdevice); 470 xen_vbd_free(vbd); 471 return -ENOENT; 472 } 473 vbd->size = vbd_sz(vbd); 474 475 if (vbd->bdev->bd_disk->flags & GENHD_FL_CD || cdrom) 476 vbd->type |= VDISK_CDROM; 477 if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE) 478 vbd->type |= VDISK_REMOVABLE; 479 480 q = bdev_get_queue(bdev); 481 if (q && q->flush_flags) 482 vbd->flush_support = true; 483 484 if (q && blk_queue_secdiscard(q)) 485 vbd->discard_secure = true; 486 487 pr_debug("Successful creation of handle=%04x (dom=%u)\n", 488 handle, blkif->domid); 489 return 0; 490 } 491 static int xen_blkbk_remove(struct xenbus_device *dev) 492 { 493 struct backend_info *be = dev_get_drvdata(&dev->dev); 494 495 pr_debug("%s %p %d\n", __func__, dev, dev->otherend_id); 496 497 if (be->major || be->minor) 498 xenvbd_sysfs_delif(dev); 499 500 if (be->backend_watch.node) { 501 unregister_xenbus_watch(&be->backend_watch); 502 kfree(be->backend_watch.node); 503 be->backend_watch.node = NULL; 504 } 505 506 dev_set_drvdata(&dev->dev, NULL); 507 508 if (be->blkif) 509 xen_blkif_disconnect(be->blkif); 510 511 /* Put the reference we set in xen_blkif_alloc(). */ 512 xen_blkif_put(be->blkif); 513 kfree(be->mode); 514 kfree(be); 515 return 0; 516 } 517 518 int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt, 519 struct backend_info *be, int state) 520 { 521 struct xenbus_device *dev = be->dev; 522 int err; 523 524 err = xenbus_printf(xbt, dev->nodename, "feature-flush-cache", 525 "%d", state); 526 if (err) 527 dev_warn(&dev->dev, "writing feature-flush-cache (%d)", err); 528 529 return err; 530 } 531 532 static void xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info *be) 533 { 534 struct xenbus_device *dev = be->dev; 535 struct xen_blkif *blkif = be->blkif; 536 int err; 537 int state = 0, discard_enable; 538 struct block_device *bdev = be->blkif->vbd.bdev; 539 struct request_queue *q = bdev_get_queue(bdev); 540 541 err = xenbus_scanf(XBT_NIL, dev->nodename, "discard-enable", "%d", 542 &discard_enable); 543 if (err == 1 && !discard_enable) 544 return; 545 546 if (blk_queue_discard(q)) { 547 err = xenbus_printf(xbt, dev->nodename, 548 "discard-granularity", "%u", 549 q->limits.discard_granularity); 550 if (err) { 551 dev_warn(&dev->dev, "writing discard-granularity (%d)", err); 552 return; 553 } 554 err = xenbus_printf(xbt, dev->nodename, 555 "discard-alignment", "%u", 556 q->limits.discard_alignment); 557 if (err) { 558 dev_warn(&dev->dev, "writing discard-alignment (%d)", err); 559 return; 560 } 561 state = 1; 562 /* Optional. */ 563 err = xenbus_printf(xbt, dev->nodename, 564 "discard-secure", "%d", 565 blkif->vbd.discard_secure); 566 if (err) { 567 dev_warn(&dev->dev, "writing discard-secure (%d)", err); 568 return; 569 } 570 } 571 err = xenbus_printf(xbt, dev->nodename, "feature-discard", 572 "%d", state); 573 if (err) 574 dev_warn(&dev->dev, "writing feature-discard (%d)", err); 575 } 576 int xen_blkbk_barrier(struct xenbus_transaction xbt, 577 struct backend_info *be, int state) 578 { 579 struct xenbus_device *dev = be->dev; 580 int err; 581 582 err = xenbus_printf(xbt, dev->nodename, "feature-barrier", 583 "%d", state); 584 if (err) 585 dev_warn(&dev->dev, "writing feature-barrier (%d)", err); 586 587 return err; 588 } 589 590 /* 591 * Entry point to this code when a new device is created. Allocate the basic 592 * structures, and watch the store waiting for the hotplug scripts to tell us 593 * the device's physical major and minor numbers. Switch to InitWait. 594 */ 595 static int xen_blkbk_probe(struct xenbus_device *dev, 596 const struct xenbus_device_id *id) 597 { 598 int err; 599 struct backend_info *be = kzalloc(sizeof(struct backend_info), 600 GFP_KERNEL); 601 602 /* match the pr_debug in xen_blkbk_remove */ 603 pr_debug("%s %p %d\n", __func__, dev, dev->otherend_id); 604 605 if (!be) { 606 xenbus_dev_fatal(dev, -ENOMEM, 607 "allocating backend structure"); 608 return -ENOMEM; 609 } 610 be->dev = dev; 611 dev_set_drvdata(&dev->dev, be); 612 613 be->blkif = xen_blkif_alloc(dev->otherend_id); 614 if (IS_ERR(be->blkif)) { 615 err = PTR_ERR(be->blkif); 616 be->blkif = NULL; 617 xenbus_dev_fatal(dev, err, "creating block interface"); 618 goto fail; 619 } 620 621 /* Multi-queue: advertise how many queues are supported by us.*/ 622 err = xenbus_printf(XBT_NIL, dev->nodename, 623 "multi-queue-max-queues", "%u", xenblk_max_queues); 624 if (err) 625 pr_warn("Error writing multi-queue-max-queues\n"); 626 627 /* setup back pointer */ 628 be->blkif->be = be; 629 630 err = xenbus_watch_pathfmt(dev, &be->backend_watch, backend_changed, 631 "%s/%s", dev->nodename, "physical-device"); 632 if (err) 633 goto fail; 634 635 err = xenbus_printf(XBT_NIL, dev->nodename, "max-ring-page-order", "%u", 636 xen_blkif_max_ring_order); 637 if (err) 638 pr_warn("%s write out 'max-ring-page-order' failed\n", __func__); 639 640 err = xenbus_switch_state(dev, XenbusStateInitWait); 641 if (err) 642 goto fail; 643 644 return 0; 645 646 fail: 647 pr_warn("%s failed\n", __func__); 648 xen_blkbk_remove(dev); 649 return err; 650 } 651 652 653 /* 654 * Callback received when the hotplug scripts have placed the physical-device 655 * node. Read it and the mode node, and create a vbd. If the frontend is 656 * ready, connect. 657 */ 658 static void backend_changed(struct xenbus_watch *watch, 659 const char **vec, unsigned int len) 660 { 661 int err; 662 unsigned major; 663 unsigned minor; 664 struct backend_info *be 665 = container_of(watch, struct backend_info, backend_watch); 666 struct xenbus_device *dev = be->dev; 667 int cdrom = 0; 668 unsigned long handle; 669 char *device_type; 670 671 pr_debug("%s %p %d\n", __func__, dev, dev->otherend_id); 672 673 err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x", 674 &major, &minor); 675 if (XENBUS_EXIST_ERR(err)) { 676 /* 677 * Since this watch will fire once immediately after it is 678 * registered, we expect this. Ignore it, and wait for the 679 * hotplug scripts. 680 */ 681 return; 682 } 683 if (err != 2) { 684 xenbus_dev_fatal(dev, err, "reading physical-device"); 685 return; 686 } 687 688 if (be->major | be->minor) { 689 if (be->major != major || be->minor != minor) 690 pr_warn("changing physical device (from %x:%x to %x:%x) not supported.\n", 691 be->major, be->minor, major, minor); 692 return; 693 } 694 695 be->mode = xenbus_read(XBT_NIL, dev->nodename, "mode", NULL); 696 if (IS_ERR(be->mode)) { 697 err = PTR_ERR(be->mode); 698 be->mode = NULL; 699 xenbus_dev_fatal(dev, err, "reading mode"); 700 return; 701 } 702 703 device_type = xenbus_read(XBT_NIL, dev->otherend, "device-type", NULL); 704 if (!IS_ERR(device_type)) { 705 cdrom = strcmp(device_type, "cdrom") == 0; 706 kfree(device_type); 707 } 708 709 /* Front end dir is a number, which is used as the handle. */ 710 err = kstrtoul(strrchr(dev->otherend, '/') + 1, 0, &handle); 711 if (err) 712 return; 713 714 be->major = major; 715 be->minor = minor; 716 717 err = xen_vbd_create(be->blkif, handle, major, minor, 718 !strchr(be->mode, 'w'), cdrom); 719 720 if (err) 721 xenbus_dev_fatal(dev, err, "creating vbd structure"); 722 else { 723 err = xenvbd_sysfs_addif(dev); 724 if (err) { 725 xen_vbd_free(&be->blkif->vbd); 726 xenbus_dev_fatal(dev, err, "creating sysfs entries"); 727 } 728 } 729 730 if (err) { 731 kfree(be->mode); 732 be->mode = NULL; 733 be->major = 0; 734 be->minor = 0; 735 } else { 736 /* We're potentially connected now */ 737 xen_update_blkif_status(be->blkif); 738 } 739 } 740 741 742 /* 743 * Callback received when the frontend's state changes. 744 */ 745 static void frontend_changed(struct xenbus_device *dev, 746 enum xenbus_state frontend_state) 747 { 748 struct backend_info *be = dev_get_drvdata(&dev->dev); 749 int err; 750 751 pr_debug("%s %p %s\n", __func__, dev, xenbus_strstate(frontend_state)); 752 753 switch (frontend_state) { 754 case XenbusStateInitialising: 755 if (dev->state == XenbusStateClosed) { 756 pr_info("%s: prepare for reconnect\n", dev->nodename); 757 xenbus_switch_state(dev, XenbusStateInitWait); 758 } 759 break; 760 761 case XenbusStateInitialised: 762 case XenbusStateConnected: 763 /* 764 * Ensure we connect even when two watches fire in 765 * close succession and we miss the intermediate value 766 * of frontend_state. 767 */ 768 if (dev->state == XenbusStateConnected) 769 break; 770 771 /* 772 * Enforce precondition before potential leak point. 773 * xen_blkif_disconnect() is idempotent. 774 */ 775 err = xen_blkif_disconnect(be->blkif); 776 if (err) { 777 xenbus_dev_fatal(dev, err, "pending I/O"); 778 break; 779 } 780 781 err = connect_ring(be); 782 if (err) { 783 /* 784 * Clean up so that memory resources can be used by 785 * other devices. connect_ring reported already error. 786 */ 787 xen_blkif_disconnect(be->blkif); 788 break; 789 } 790 xen_update_blkif_status(be->blkif); 791 break; 792 793 case XenbusStateClosing: 794 xenbus_switch_state(dev, XenbusStateClosing); 795 break; 796 797 case XenbusStateClosed: 798 xen_blkif_disconnect(be->blkif); 799 xenbus_switch_state(dev, XenbusStateClosed); 800 if (xenbus_dev_is_online(dev)) 801 break; 802 /* fall through if not online */ 803 case XenbusStateUnknown: 804 /* implies xen_blkif_disconnect() via xen_blkbk_remove() */ 805 device_unregister(&dev->dev); 806 break; 807 808 default: 809 xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend", 810 frontend_state); 811 break; 812 } 813 } 814 815 816 /* ** Connection ** */ 817 818 819 /* 820 * Write the physical details regarding the block device to the store, and 821 * switch to Connected state. 822 */ 823 static void connect(struct backend_info *be) 824 { 825 struct xenbus_transaction xbt; 826 int err; 827 struct xenbus_device *dev = be->dev; 828 829 pr_debug("%s %s\n", __func__, dev->otherend); 830 831 /* Supply the information about the device the frontend needs */ 832 again: 833 err = xenbus_transaction_start(&xbt); 834 if (err) { 835 xenbus_dev_fatal(dev, err, "starting transaction"); 836 return; 837 } 838 839 /* If we can't advertise it is OK. */ 840 xen_blkbk_flush_diskcache(xbt, be, be->blkif->vbd.flush_support); 841 842 xen_blkbk_discard(xbt, be); 843 844 xen_blkbk_barrier(xbt, be, be->blkif->vbd.flush_support); 845 846 err = xenbus_printf(xbt, dev->nodename, "feature-persistent", "%u", 1); 847 if (err) { 848 xenbus_dev_fatal(dev, err, "writing %s/feature-persistent", 849 dev->nodename); 850 goto abort; 851 } 852 err = xenbus_printf(xbt, dev->nodename, "feature-max-indirect-segments", "%u", 853 MAX_INDIRECT_SEGMENTS); 854 if (err) 855 dev_warn(&dev->dev, "writing %s/feature-max-indirect-segments (%d)", 856 dev->nodename, err); 857 858 err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", 859 (unsigned long long)vbd_sz(&be->blkif->vbd)); 860 if (err) { 861 xenbus_dev_fatal(dev, err, "writing %s/sectors", 862 dev->nodename); 863 goto abort; 864 } 865 866 /* FIXME: use a typename instead */ 867 err = xenbus_printf(xbt, dev->nodename, "info", "%u", 868 be->blkif->vbd.type | 869 (be->blkif->vbd.readonly ? VDISK_READONLY : 0)); 870 if (err) { 871 xenbus_dev_fatal(dev, err, "writing %s/info", 872 dev->nodename); 873 goto abort; 874 } 875 err = xenbus_printf(xbt, dev->nodename, "sector-size", "%lu", 876 (unsigned long) 877 bdev_logical_block_size(be->blkif->vbd.bdev)); 878 if (err) { 879 xenbus_dev_fatal(dev, err, "writing %s/sector-size", 880 dev->nodename); 881 goto abort; 882 } 883 err = xenbus_printf(xbt, dev->nodename, "physical-sector-size", "%u", 884 bdev_physical_block_size(be->blkif->vbd.bdev)); 885 if (err) 886 xenbus_dev_error(dev, err, "writing %s/physical-sector-size", 887 dev->nodename); 888 889 err = xenbus_transaction_end(xbt, 0); 890 if (err == -EAGAIN) 891 goto again; 892 if (err) 893 xenbus_dev_fatal(dev, err, "ending transaction"); 894 895 err = xenbus_switch_state(dev, XenbusStateConnected); 896 if (err) 897 xenbus_dev_fatal(dev, err, "%s: switching to Connected state", 898 dev->nodename); 899 900 return; 901 abort: 902 xenbus_transaction_end(xbt, 1); 903 } 904 905 /* 906 * Each ring may have multi pages, depends on "ring-page-order". 907 */ 908 static int read_per_ring_refs(struct xen_blkif_ring *ring, const char *dir) 909 { 910 unsigned int ring_ref[XENBUS_MAX_RING_GRANTS]; 911 struct pending_req *req, *n; 912 int err, i, j; 913 struct xen_blkif *blkif = ring->blkif; 914 struct xenbus_device *dev = blkif->be->dev; 915 unsigned int ring_page_order, nr_grefs, evtchn; 916 917 err = xenbus_scanf(XBT_NIL, dir, "event-channel", "%u", 918 &evtchn); 919 if (err != 1) { 920 err = -EINVAL; 921 xenbus_dev_fatal(dev, err, "reading %s/event-channel", dir); 922 return err; 923 } 924 925 err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-page-order", "%u", 926 &ring_page_order); 927 if (err != 1) { 928 err = xenbus_scanf(XBT_NIL, dir, "ring-ref", "%u", &ring_ref[0]); 929 if (err != 1) { 930 err = -EINVAL; 931 xenbus_dev_fatal(dev, err, "reading %s/ring-ref", dir); 932 return err; 933 } 934 nr_grefs = 1; 935 } else { 936 unsigned int i; 937 938 if (ring_page_order > xen_blkif_max_ring_order) { 939 err = -EINVAL; 940 xenbus_dev_fatal(dev, err, "%s/request %d ring page order exceed max:%d", 941 dir, ring_page_order, 942 xen_blkif_max_ring_order); 943 return err; 944 } 945 946 nr_grefs = 1 << ring_page_order; 947 for (i = 0; i < nr_grefs; i++) { 948 char ring_ref_name[RINGREF_NAME_LEN]; 949 950 snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i); 951 err = xenbus_scanf(XBT_NIL, dir, ring_ref_name, 952 "%u", &ring_ref[i]); 953 if (err != 1) { 954 err = -EINVAL; 955 xenbus_dev_fatal(dev, err, "reading %s/%s", 956 dir, ring_ref_name); 957 return err; 958 } 959 } 960 } 961 blkif->nr_ring_pages = nr_grefs; 962 963 for (i = 0; i < nr_grefs * XEN_BLKIF_REQS_PER_PAGE; i++) { 964 req = kzalloc(sizeof(*req), GFP_KERNEL); 965 if (!req) 966 goto fail; 967 list_add_tail(&req->free_list, &ring->pending_free); 968 for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) { 969 req->segments[j] = kzalloc(sizeof(*req->segments[0]), GFP_KERNEL); 970 if (!req->segments[j]) 971 goto fail; 972 } 973 for (j = 0; j < MAX_INDIRECT_PAGES; j++) { 974 req->indirect_pages[j] = kzalloc(sizeof(*req->indirect_pages[0]), 975 GFP_KERNEL); 976 if (!req->indirect_pages[j]) 977 goto fail; 978 } 979 } 980 981 /* Map the shared frame, irq etc. */ 982 err = xen_blkif_map(ring, ring_ref, nr_grefs, evtchn); 983 if (err) { 984 xenbus_dev_fatal(dev, err, "mapping ring-ref port %u", evtchn); 985 return err; 986 } 987 988 return 0; 989 990 fail: 991 list_for_each_entry_safe(req, n, &ring->pending_free, free_list) { 992 list_del(&req->free_list); 993 for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) { 994 if (!req->segments[j]) 995 break; 996 kfree(req->segments[j]); 997 } 998 for (j = 0; j < MAX_INDIRECT_PAGES; j++) { 999 if (!req->indirect_pages[j]) 1000 break; 1001 kfree(req->indirect_pages[j]); 1002 } 1003 kfree(req); 1004 } 1005 return -ENOMEM; 1006 1007 } 1008 1009 static int connect_ring(struct backend_info *be) 1010 { 1011 struct xenbus_device *dev = be->dev; 1012 unsigned int pers_grants; 1013 char protocol[64] = ""; 1014 int err, i; 1015 char *xspath; 1016 size_t xspathsize; 1017 const size_t xenstore_path_ext_size = 11; /* sufficient for "/queue-NNN" */ 1018 unsigned int requested_num_queues = 0; 1019 1020 pr_debug("%s %s\n", __func__, dev->otherend); 1021 1022 be->blkif->blk_protocol = BLKIF_PROTOCOL_DEFAULT; 1023 err = xenbus_gather(XBT_NIL, dev->otherend, "protocol", 1024 "%63s", protocol, NULL); 1025 if (err) 1026 strcpy(protocol, "unspecified, assuming default"); 1027 else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE)) 1028 be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE; 1029 else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32)) 1030 be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32; 1031 else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64)) 1032 be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64; 1033 else { 1034 xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol); 1035 return -ENOSYS; 1036 } 1037 err = xenbus_gather(XBT_NIL, dev->otherend, 1038 "feature-persistent", "%u", 1039 &pers_grants, NULL); 1040 if (err) 1041 pers_grants = 0; 1042 1043 be->blkif->vbd.feature_gnt_persistent = pers_grants; 1044 be->blkif->vbd.overflow_max_grants = 0; 1045 1046 /* 1047 * Read the number of hardware queues from frontend. 1048 */ 1049 err = xenbus_scanf(XBT_NIL, dev->otherend, "multi-queue-num-queues", 1050 "%u", &requested_num_queues); 1051 if (err < 0) { 1052 requested_num_queues = 1; 1053 } else { 1054 if (requested_num_queues > xenblk_max_queues 1055 || requested_num_queues == 0) { 1056 /* Buggy or malicious guest. */ 1057 xenbus_dev_fatal(dev, err, 1058 "guest requested %u queues, exceeding the maximum of %u.", 1059 requested_num_queues, xenblk_max_queues); 1060 return -ENOSYS; 1061 } 1062 } 1063 be->blkif->nr_rings = requested_num_queues; 1064 if (xen_blkif_alloc_rings(be->blkif)) 1065 return -ENOMEM; 1066 1067 pr_info("%s: using %d queues, protocol %d (%s) %s\n", dev->nodename, 1068 be->blkif->nr_rings, be->blkif->blk_protocol, protocol, 1069 pers_grants ? "persistent grants" : ""); 1070 1071 if (be->blkif->nr_rings == 1) 1072 return read_per_ring_refs(&be->blkif->rings[0], dev->otherend); 1073 else { 1074 xspathsize = strlen(dev->otherend) + xenstore_path_ext_size; 1075 xspath = kmalloc(xspathsize, GFP_KERNEL); 1076 if (!xspath) { 1077 xenbus_dev_fatal(dev, -ENOMEM, "reading ring references"); 1078 return -ENOMEM; 1079 } 1080 1081 for (i = 0; i < be->blkif->nr_rings; i++) { 1082 memset(xspath, 0, xspathsize); 1083 snprintf(xspath, xspathsize, "%s/queue-%u", dev->otherend, i); 1084 err = read_per_ring_refs(&be->blkif->rings[i], xspath); 1085 if (err) { 1086 kfree(xspath); 1087 return err; 1088 } 1089 } 1090 kfree(xspath); 1091 } 1092 return 0; 1093 } 1094 1095 static const struct xenbus_device_id xen_blkbk_ids[] = { 1096 { "vbd" }, 1097 { "" } 1098 }; 1099 1100 static struct xenbus_driver xen_blkbk_driver = { 1101 .ids = xen_blkbk_ids, 1102 .probe = xen_blkbk_probe, 1103 .remove = xen_blkbk_remove, 1104 .otherend_changed = frontend_changed 1105 }; 1106 1107 int xen_blkif_xenbus_init(void) 1108 { 1109 return xenbus_register_backend(&xen_blkbk_driver); 1110 } 1111