1 /* 2 * Copyright (C) 2001, 2002 Sistina Software (UK) Limited. 3 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. 4 * 5 * This file is released under the GPL. 6 */ 7 8 #include "dm.h" 9 #include "dm-uevent.h" 10 11 #include <linux/init.h> 12 #include <linux/module.h> 13 #include <linux/mutex.h> 14 #include <linux/moduleparam.h> 15 #include <linux/blkpg.h> 16 #include <linux/bio.h> 17 #include <linux/buffer_head.h> 18 #include <linux/mempool.h> 19 #include <linux/slab.h> 20 #include <linux/idr.h> 21 #include <linux/hdreg.h> 22 #include <linux/blktrace_api.h> 23 24 #include <trace/events/block.h> 25 26 #define DM_MSG_PREFIX "core" 27 28 static const char *_name = DM_NAME; 29 30 static unsigned int major = 0; 31 static unsigned int _major = 0; 32 33 static DEFINE_SPINLOCK(_minor_lock); 34 /* 35 * For bio-based dm. 36 * One of these is allocated per bio. 37 */ 38 struct dm_io { 39 struct mapped_device *md; 40 int error; 41 atomic_t io_count; 42 struct bio *bio; 43 unsigned long start_time; 44 }; 45 46 /* 47 * For bio-based dm. 48 * One of these is allocated per target within a bio. Hopefully 49 * this will be simplified out one day. 50 */ 51 struct dm_target_io { 52 struct dm_io *io; 53 struct dm_target *ti; 54 union map_info info; 55 }; 56 57 /* 58 * For request-based dm. 59 * One of these is allocated per request. 60 */ 61 struct dm_rq_target_io { 62 struct mapped_device *md; 63 struct dm_target *ti; 64 struct request *orig, clone; 65 int error; 66 union map_info info; 67 }; 68 69 /* 70 * For request-based dm. 71 * One of these is allocated per bio. 72 */ 73 struct dm_rq_clone_bio_info { 74 struct bio *orig; 75 struct request *rq; 76 }; 77 78 union map_info *dm_get_mapinfo(struct bio *bio) 79 { 80 if (bio && bio->bi_private) 81 return &((struct dm_target_io *)bio->bi_private)->info; 82 return NULL; 83 } 84 85 #define MINOR_ALLOCED ((void *)-1) 86 87 /* 88 * Bits for the md->flags field. 89 */ 90 #define DMF_BLOCK_IO_FOR_SUSPEND 0 91 #define DMF_SUSPENDED 1 92 #define DMF_FROZEN 2 93 #define DMF_FREEING 3 94 #define DMF_DELETING 4 95 #define DMF_NOFLUSH_SUSPENDING 5 96 #define DMF_QUEUE_IO_TO_THREAD 6 97 98 /* 99 * Work processed by per-device workqueue. 100 */ 101 struct mapped_device { 102 struct rw_semaphore io_lock; 103 struct mutex suspend_lock; 104 rwlock_t map_lock; 105 atomic_t holders; 106 atomic_t open_count; 107 108 unsigned long flags; 109 110 struct request_queue *queue; 111 struct gendisk *disk; 112 char name[16]; 113 114 void *interface_ptr; 115 116 /* 117 * A list of ios that arrived while we were suspended. 118 */ 119 atomic_t pending; 120 wait_queue_head_t wait; 121 struct work_struct work; 122 struct bio_list deferred; 123 spinlock_t deferred_lock; 124 125 /* 126 * An error from the barrier request currently being processed. 127 */ 128 int barrier_error; 129 130 /* 131 * Processing queue (flush/barriers) 132 */ 133 struct workqueue_struct *wq; 134 135 /* 136 * The current mapping. 137 */ 138 struct dm_table *map; 139 140 /* 141 * io objects are allocated from here. 142 */ 143 mempool_t *io_pool; 144 mempool_t *tio_pool; 145 146 struct bio_set *bs; 147 148 /* 149 * Event handling. 150 */ 151 atomic_t event_nr; 152 wait_queue_head_t eventq; 153 atomic_t uevent_seq; 154 struct list_head uevent_list; 155 spinlock_t uevent_lock; /* Protect access to uevent_list */ 156 157 /* 158 * freeze/thaw support require holding onto a super block 159 */ 160 struct super_block *frozen_sb; 161 struct block_device *suspended_bdev; 162 163 /* forced geometry settings */ 164 struct hd_geometry geometry; 165 166 /* sysfs handle */ 167 struct kobject kobj; 168 }; 169 170 #define MIN_IOS 256 171 static struct kmem_cache *_io_cache; 172 static struct kmem_cache *_tio_cache; 173 static struct kmem_cache *_rq_tio_cache; 174 static struct kmem_cache *_rq_bio_info_cache; 175 176 static int __init local_init(void) 177 { 178 int r = -ENOMEM; 179 180 /* allocate a slab for the dm_ios */ 181 _io_cache = KMEM_CACHE(dm_io, 0); 182 if (!_io_cache) 183 return r; 184 185 /* allocate a slab for the target ios */ 186 _tio_cache = KMEM_CACHE(dm_target_io, 0); 187 if (!_tio_cache) 188 goto out_free_io_cache; 189 190 _rq_tio_cache = KMEM_CACHE(dm_rq_target_io, 0); 191 if (!_rq_tio_cache) 192 goto out_free_tio_cache; 193 194 _rq_bio_info_cache = KMEM_CACHE(dm_rq_clone_bio_info, 0); 195 if (!_rq_bio_info_cache) 196 goto out_free_rq_tio_cache; 197 198 r = dm_uevent_init(); 199 if (r) 200 goto out_free_rq_bio_info_cache; 201 202 _major = major; 203 r = register_blkdev(_major, _name); 204 if (r < 0) 205 goto out_uevent_exit; 206 207 if (!_major) 208 _major = r; 209 210 return 0; 211 212 out_uevent_exit: 213 dm_uevent_exit(); 214 out_free_rq_bio_info_cache: 215 kmem_cache_destroy(_rq_bio_info_cache); 216 out_free_rq_tio_cache: 217 kmem_cache_destroy(_rq_tio_cache); 218 out_free_tio_cache: 219 kmem_cache_destroy(_tio_cache); 220 out_free_io_cache: 221 kmem_cache_destroy(_io_cache); 222 223 return r; 224 } 225 226 static void local_exit(void) 227 { 228 kmem_cache_destroy(_rq_bio_info_cache); 229 kmem_cache_destroy(_rq_tio_cache); 230 kmem_cache_destroy(_tio_cache); 231 kmem_cache_destroy(_io_cache); 232 unregister_blkdev(_major, _name); 233 dm_uevent_exit(); 234 235 _major = 0; 236 237 DMINFO("cleaned up"); 238 } 239 240 static int (*_inits[])(void) __initdata = { 241 local_init, 242 dm_target_init, 243 dm_linear_init, 244 dm_stripe_init, 245 dm_kcopyd_init, 246 dm_interface_init, 247 }; 248 249 static void (*_exits[])(void) = { 250 local_exit, 251 dm_target_exit, 252 dm_linear_exit, 253 dm_stripe_exit, 254 dm_kcopyd_exit, 255 dm_interface_exit, 256 }; 257 258 static int __init dm_init(void) 259 { 260 const int count = ARRAY_SIZE(_inits); 261 262 int r, i; 263 264 for (i = 0; i < count; i++) { 265 r = _inits[i](); 266 if (r) 267 goto bad; 268 } 269 270 return 0; 271 272 bad: 273 while (i--) 274 _exits[i](); 275 276 return r; 277 } 278 279 static void __exit dm_exit(void) 280 { 281 int i = ARRAY_SIZE(_exits); 282 283 while (i--) 284 _exits[i](); 285 } 286 287 /* 288 * Block device functions 289 */ 290 static int dm_blk_open(struct block_device *bdev, fmode_t mode) 291 { 292 struct mapped_device *md; 293 294 spin_lock(&_minor_lock); 295 296 md = bdev->bd_disk->private_data; 297 if (!md) 298 goto out; 299 300 if (test_bit(DMF_FREEING, &md->flags) || 301 test_bit(DMF_DELETING, &md->flags)) { 302 md = NULL; 303 goto out; 304 } 305 306 dm_get(md); 307 atomic_inc(&md->open_count); 308 309 out: 310 spin_unlock(&_minor_lock); 311 312 return md ? 0 : -ENXIO; 313 } 314 315 static int dm_blk_close(struct gendisk *disk, fmode_t mode) 316 { 317 struct mapped_device *md = disk->private_data; 318 atomic_dec(&md->open_count); 319 dm_put(md); 320 return 0; 321 } 322 323 int dm_open_count(struct mapped_device *md) 324 { 325 return atomic_read(&md->open_count); 326 } 327 328 /* 329 * Guarantees nothing is using the device before it's deleted. 330 */ 331 int dm_lock_for_deletion(struct mapped_device *md) 332 { 333 int r = 0; 334 335 spin_lock(&_minor_lock); 336 337 if (dm_open_count(md)) 338 r = -EBUSY; 339 else 340 set_bit(DMF_DELETING, &md->flags); 341 342 spin_unlock(&_minor_lock); 343 344 return r; 345 } 346 347 static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo) 348 { 349 struct mapped_device *md = bdev->bd_disk->private_data; 350 351 return dm_get_geometry(md, geo); 352 } 353 354 static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode, 355 unsigned int cmd, unsigned long arg) 356 { 357 struct mapped_device *md = bdev->bd_disk->private_data; 358 struct dm_table *map = dm_get_table(md); 359 struct dm_target *tgt; 360 int r = -ENOTTY; 361 362 if (!map || !dm_table_get_size(map)) 363 goto out; 364 365 /* We only support devices that have a single target */ 366 if (dm_table_get_num_targets(map) != 1) 367 goto out; 368 369 tgt = dm_table_get_target(map, 0); 370 371 if (dm_suspended(md)) { 372 r = -EAGAIN; 373 goto out; 374 } 375 376 if (tgt->type->ioctl) 377 r = tgt->type->ioctl(tgt, cmd, arg); 378 379 out: 380 dm_table_put(map); 381 382 return r; 383 } 384 385 static struct dm_io *alloc_io(struct mapped_device *md) 386 { 387 return mempool_alloc(md->io_pool, GFP_NOIO); 388 } 389 390 static void free_io(struct mapped_device *md, struct dm_io *io) 391 { 392 mempool_free(io, md->io_pool); 393 } 394 395 static struct dm_target_io *alloc_tio(struct mapped_device *md) 396 { 397 return mempool_alloc(md->tio_pool, GFP_NOIO); 398 } 399 400 static void free_tio(struct mapped_device *md, struct dm_target_io *tio) 401 { 402 mempool_free(tio, md->tio_pool); 403 } 404 405 static void start_io_acct(struct dm_io *io) 406 { 407 struct mapped_device *md = io->md; 408 int cpu; 409 410 io->start_time = jiffies; 411 412 cpu = part_stat_lock(); 413 part_round_stats(cpu, &dm_disk(md)->part0); 414 part_stat_unlock(); 415 dm_disk(md)->part0.in_flight = atomic_inc_return(&md->pending); 416 } 417 418 static void end_io_acct(struct dm_io *io) 419 { 420 struct mapped_device *md = io->md; 421 struct bio *bio = io->bio; 422 unsigned long duration = jiffies - io->start_time; 423 int pending, cpu; 424 int rw = bio_data_dir(bio); 425 426 cpu = part_stat_lock(); 427 part_round_stats(cpu, &dm_disk(md)->part0); 428 part_stat_add(cpu, &dm_disk(md)->part0, ticks[rw], duration); 429 part_stat_unlock(); 430 431 /* 432 * After this is decremented the bio must not be touched if it is 433 * a barrier. 434 */ 435 dm_disk(md)->part0.in_flight = pending = 436 atomic_dec_return(&md->pending); 437 438 /* nudge anyone waiting on suspend queue */ 439 if (!pending) 440 wake_up(&md->wait); 441 } 442 443 /* 444 * Add the bio to the list of deferred io. 445 */ 446 static void queue_io(struct mapped_device *md, struct bio *bio) 447 { 448 down_write(&md->io_lock); 449 450 spin_lock_irq(&md->deferred_lock); 451 bio_list_add(&md->deferred, bio); 452 spin_unlock_irq(&md->deferred_lock); 453 454 if (!test_and_set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) 455 queue_work(md->wq, &md->work); 456 457 up_write(&md->io_lock); 458 } 459 460 /* 461 * Everyone (including functions in this file), should use this 462 * function to access the md->map field, and make sure they call 463 * dm_table_put() when finished. 464 */ 465 struct dm_table *dm_get_table(struct mapped_device *md) 466 { 467 struct dm_table *t; 468 469 read_lock(&md->map_lock); 470 t = md->map; 471 if (t) 472 dm_table_get(t); 473 read_unlock(&md->map_lock); 474 475 return t; 476 } 477 478 /* 479 * Get the geometry associated with a dm device 480 */ 481 int dm_get_geometry(struct mapped_device *md, struct hd_geometry *geo) 482 { 483 *geo = md->geometry; 484 485 return 0; 486 } 487 488 /* 489 * Set the geometry of a device. 490 */ 491 int dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo) 492 { 493 sector_t sz = (sector_t)geo->cylinders * geo->heads * geo->sectors; 494 495 if (geo->start > sz) { 496 DMWARN("Start sector is beyond the geometry limits."); 497 return -EINVAL; 498 } 499 500 md->geometry = *geo; 501 502 return 0; 503 } 504 505 /*----------------------------------------------------------------- 506 * CRUD START: 507 * A more elegant soln is in the works that uses the queue 508 * merge fn, unfortunately there are a couple of changes to 509 * the block layer that I want to make for this. So in the 510 * interests of getting something for people to use I give 511 * you this clearly demarcated crap. 512 *---------------------------------------------------------------*/ 513 514 static int __noflush_suspending(struct mapped_device *md) 515 { 516 return test_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); 517 } 518 519 /* 520 * Decrements the number of outstanding ios that a bio has been 521 * cloned into, completing the original io if necc. 522 */ 523 static void dec_pending(struct dm_io *io, int error) 524 { 525 unsigned long flags; 526 int io_error; 527 struct bio *bio; 528 struct mapped_device *md = io->md; 529 530 /* Push-back supersedes any I/O errors */ 531 if (error && !(io->error > 0 && __noflush_suspending(md))) 532 io->error = error; 533 534 if (atomic_dec_and_test(&io->io_count)) { 535 if (io->error == DM_ENDIO_REQUEUE) { 536 /* 537 * Target requested pushing back the I/O. 538 */ 539 spin_lock_irqsave(&md->deferred_lock, flags); 540 if (__noflush_suspending(md)) 541 bio_list_add_head(&md->deferred, io->bio); 542 else 543 /* noflush suspend was interrupted. */ 544 io->error = -EIO; 545 spin_unlock_irqrestore(&md->deferred_lock, flags); 546 } 547 548 io_error = io->error; 549 bio = io->bio; 550 551 if (bio_barrier(bio)) { 552 /* 553 * There can be just one barrier request so we use 554 * a per-device variable for error reporting. 555 * Note that you can't touch the bio after end_io_acct 556 */ 557 md->barrier_error = io_error; 558 end_io_acct(io); 559 } else { 560 end_io_acct(io); 561 562 if (io_error != DM_ENDIO_REQUEUE) { 563 trace_block_bio_complete(md->queue, bio); 564 565 bio_endio(bio, io_error); 566 } 567 } 568 569 free_io(md, io); 570 } 571 } 572 573 static void clone_endio(struct bio *bio, int error) 574 { 575 int r = 0; 576 struct dm_target_io *tio = bio->bi_private; 577 struct dm_io *io = tio->io; 578 struct mapped_device *md = tio->io->md; 579 dm_endio_fn endio = tio->ti->type->end_io; 580 581 if (!bio_flagged(bio, BIO_UPTODATE) && !error) 582 error = -EIO; 583 584 if (endio) { 585 r = endio(tio->ti, bio, error, &tio->info); 586 if (r < 0 || r == DM_ENDIO_REQUEUE) 587 /* 588 * error and requeue request are handled 589 * in dec_pending(). 590 */ 591 error = r; 592 else if (r == DM_ENDIO_INCOMPLETE) 593 /* The target will handle the io */ 594 return; 595 else if (r) { 596 DMWARN("unimplemented target endio return value: %d", r); 597 BUG(); 598 } 599 } 600 601 /* 602 * Store md for cleanup instead of tio which is about to get freed. 603 */ 604 bio->bi_private = md->bs; 605 606 free_tio(md, tio); 607 bio_put(bio); 608 dec_pending(io, error); 609 } 610 611 static sector_t max_io_len(struct mapped_device *md, 612 sector_t sector, struct dm_target *ti) 613 { 614 sector_t offset = sector - ti->begin; 615 sector_t len = ti->len - offset; 616 617 /* 618 * Does the target need to split even further ? 619 */ 620 if (ti->split_io) { 621 sector_t boundary; 622 boundary = ((offset + ti->split_io) & ~(ti->split_io - 1)) 623 - offset; 624 if (len > boundary) 625 len = boundary; 626 } 627 628 return len; 629 } 630 631 static void __map_bio(struct dm_target *ti, struct bio *clone, 632 struct dm_target_io *tio) 633 { 634 int r; 635 sector_t sector; 636 struct mapped_device *md; 637 638 /* 639 * Sanity checks. 640 */ 641 BUG_ON(!clone->bi_size); 642 643 clone->bi_end_io = clone_endio; 644 clone->bi_private = tio; 645 646 /* 647 * Map the clone. If r == 0 we don't need to do 648 * anything, the target has assumed ownership of 649 * this io. 650 */ 651 atomic_inc(&tio->io->io_count); 652 sector = clone->bi_sector; 653 r = ti->type->map(ti, clone, &tio->info); 654 if (r == DM_MAPIO_REMAPPED) { 655 /* the bio has been remapped so dispatch it */ 656 657 trace_block_remap(bdev_get_queue(clone->bi_bdev), clone, 658 tio->io->bio->bi_bdev->bd_dev, sector); 659 660 generic_make_request(clone); 661 } else if (r < 0 || r == DM_MAPIO_REQUEUE) { 662 /* error the io and bail out, or requeue it if needed */ 663 md = tio->io->md; 664 dec_pending(tio->io, r); 665 /* 666 * Store bio_set for cleanup. 667 */ 668 clone->bi_private = md->bs; 669 bio_put(clone); 670 free_tio(md, tio); 671 } else if (r) { 672 DMWARN("unimplemented target map return value: %d", r); 673 BUG(); 674 } 675 } 676 677 struct clone_info { 678 struct mapped_device *md; 679 struct dm_table *map; 680 struct bio *bio; 681 struct dm_io *io; 682 sector_t sector; 683 sector_t sector_count; 684 unsigned short idx; 685 }; 686 687 static void dm_bio_destructor(struct bio *bio) 688 { 689 struct bio_set *bs = bio->bi_private; 690 691 bio_free(bio, bs); 692 } 693 694 /* 695 * Creates a little bio that is just does part of a bvec. 696 */ 697 static struct bio *split_bvec(struct bio *bio, sector_t sector, 698 unsigned short idx, unsigned int offset, 699 unsigned int len, struct bio_set *bs) 700 { 701 struct bio *clone; 702 struct bio_vec *bv = bio->bi_io_vec + idx; 703 704 clone = bio_alloc_bioset(GFP_NOIO, 1, bs); 705 clone->bi_destructor = dm_bio_destructor; 706 *clone->bi_io_vec = *bv; 707 708 clone->bi_sector = sector; 709 clone->bi_bdev = bio->bi_bdev; 710 clone->bi_rw = bio->bi_rw & ~(1 << BIO_RW_BARRIER); 711 clone->bi_vcnt = 1; 712 clone->bi_size = to_bytes(len); 713 clone->bi_io_vec->bv_offset = offset; 714 clone->bi_io_vec->bv_len = clone->bi_size; 715 clone->bi_flags |= 1 << BIO_CLONED; 716 717 if (bio_integrity(bio)) { 718 bio_integrity_clone(clone, bio, GFP_NOIO); 719 bio_integrity_trim(clone, 720 bio_sector_offset(bio, idx, offset), len); 721 } 722 723 return clone; 724 } 725 726 /* 727 * Creates a bio that consists of range of complete bvecs. 728 */ 729 static struct bio *clone_bio(struct bio *bio, sector_t sector, 730 unsigned short idx, unsigned short bv_count, 731 unsigned int len, struct bio_set *bs) 732 { 733 struct bio *clone; 734 735 clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs); 736 __bio_clone(clone, bio); 737 clone->bi_rw &= ~(1 << BIO_RW_BARRIER); 738 clone->bi_destructor = dm_bio_destructor; 739 clone->bi_sector = sector; 740 clone->bi_idx = idx; 741 clone->bi_vcnt = idx + bv_count; 742 clone->bi_size = to_bytes(len); 743 clone->bi_flags &= ~(1 << BIO_SEG_VALID); 744 745 if (bio_integrity(bio)) { 746 bio_integrity_clone(clone, bio, GFP_NOIO); 747 748 if (idx != bio->bi_idx || clone->bi_size < bio->bi_size) 749 bio_integrity_trim(clone, 750 bio_sector_offset(bio, idx, 0), len); 751 } 752 753 return clone; 754 } 755 756 static int __clone_and_map(struct clone_info *ci) 757 { 758 struct bio *clone, *bio = ci->bio; 759 struct dm_target *ti; 760 sector_t len = 0, max; 761 struct dm_target_io *tio; 762 763 ti = dm_table_find_target(ci->map, ci->sector); 764 if (!dm_target_is_valid(ti)) 765 return -EIO; 766 767 max = max_io_len(ci->md, ci->sector, ti); 768 769 /* 770 * Allocate a target io object. 771 */ 772 tio = alloc_tio(ci->md); 773 tio->io = ci->io; 774 tio->ti = ti; 775 memset(&tio->info, 0, sizeof(tio->info)); 776 777 if (ci->sector_count <= max) { 778 /* 779 * Optimise for the simple case where we can do all of 780 * the remaining io with a single clone. 781 */ 782 clone = clone_bio(bio, ci->sector, ci->idx, 783 bio->bi_vcnt - ci->idx, ci->sector_count, 784 ci->md->bs); 785 __map_bio(ti, clone, tio); 786 ci->sector_count = 0; 787 788 } else if (to_sector(bio->bi_io_vec[ci->idx].bv_len) <= max) { 789 /* 790 * There are some bvecs that don't span targets. 791 * Do as many of these as possible. 792 */ 793 int i; 794 sector_t remaining = max; 795 sector_t bv_len; 796 797 for (i = ci->idx; remaining && (i < bio->bi_vcnt); i++) { 798 bv_len = to_sector(bio->bi_io_vec[i].bv_len); 799 800 if (bv_len > remaining) 801 break; 802 803 remaining -= bv_len; 804 len += bv_len; 805 } 806 807 clone = clone_bio(bio, ci->sector, ci->idx, i - ci->idx, len, 808 ci->md->bs); 809 __map_bio(ti, clone, tio); 810 811 ci->sector += len; 812 ci->sector_count -= len; 813 ci->idx = i; 814 815 } else { 816 /* 817 * Handle a bvec that must be split between two or more targets. 818 */ 819 struct bio_vec *bv = bio->bi_io_vec + ci->idx; 820 sector_t remaining = to_sector(bv->bv_len); 821 unsigned int offset = 0; 822 823 do { 824 if (offset) { 825 ti = dm_table_find_target(ci->map, ci->sector); 826 if (!dm_target_is_valid(ti)) 827 return -EIO; 828 829 max = max_io_len(ci->md, ci->sector, ti); 830 831 tio = alloc_tio(ci->md); 832 tio->io = ci->io; 833 tio->ti = ti; 834 memset(&tio->info, 0, sizeof(tio->info)); 835 } 836 837 len = min(remaining, max); 838 839 clone = split_bvec(bio, ci->sector, ci->idx, 840 bv->bv_offset + offset, len, 841 ci->md->bs); 842 843 __map_bio(ti, clone, tio); 844 845 ci->sector += len; 846 ci->sector_count -= len; 847 offset += to_bytes(len); 848 } while (remaining -= len); 849 850 ci->idx++; 851 } 852 853 return 0; 854 } 855 856 /* 857 * Split the bio into several clones and submit it to targets. 858 */ 859 static void __split_and_process_bio(struct mapped_device *md, struct bio *bio) 860 { 861 struct clone_info ci; 862 int error = 0; 863 864 ci.map = dm_get_table(md); 865 if (unlikely(!ci.map)) { 866 if (!bio_barrier(bio)) 867 bio_io_error(bio); 868 else 869 md->barrier_error = -EIO; 870 return; 871 } 872 873 ci.md = md; 874 ci.bio = bio; 875 ci.io = alloc_io(md); 876 ci.io->error = 0; 877 atomic_set(&ci.io->io_count, 1); 878 ci.io->bio = bio; 879 ci.io->md = md; 880 ci.sector = bio->bi_sector; 881 ci.sector_count = bio_sectors(bio); 882 ci.idx = bio->bi_idx; 883 884 start_io_acct(ci.io); 885 while (ci.sector_count && !error) 886 error = __clone_and_map(&ci); 887 888 /* drop the extra reference count */ 889 dec_pending(ci.io, error); 890 dm_table_put(ci.map); 891 } 892 /*----------------------------------------------------------------- 893 * CRUD END 894 *---------------------------------------------------------------*/ 895 896 static int dm_merge_bvec(struct request_queue *q, 897 struct bvec_merge_data *bvm, 898 struct bio_vec *biovec) 899 { 900 struct mapped_device *md = q->queuedata; 901 struct dm_table *map = dm_get_table(md); 902 struct dm_target *ti; 903 sector_t max_sectors; 904 int max_size = 0; 905 906 if (unlikely(!map)) 907 goto out; 908 909 ti = dm_table_find_target(map, bvm->bi_sector); 910 if (!dm_target_is_valid(ti)) 911 goto out_table; 912 913 /* 914 * Find maximum amount of I/O that won't need splitting 915 */ 916 max_sectors = min(max_io_len(md, bvm->bi_sector, ti), 917 (sector_t) BIO_MAX_SECTORS); 918 max_size = (max_sectors << SECTOR_SHIFT) - bvm->bi_size; 919 if (max_size < 0) 920 max_size = 0; 921 922 /* 923 * merge_bvec_fn() returns number of bytes 924 * it can accept at this offset 925 * max is precomputed maximal io size 926 */ 927 if (max_size && ti->type->merge) 928 max_size = ti->type->merge(ti, bvm, biovec, max_size); 929 930 out_table: 931 dm_table_put(map); 932 933 out: 934 /* 935 * Always allow an entire first page 936 */ 937 if (max_size <= biovec->bv_len && !(bvm->bi_size >> SECTOR_SHIFT)) 938 max_size = biovec->bv_len; 939 940 return max_size; 941 } 942 943 /* 944 * The request function that just remaps the bio built up by 945 * dm_merge_bvec. 946 */ 947 static int dm_request(struct request_queue *q, struct bio *bio) 948 { 949 int rw = bio_data_dir(bio); 950 struct mapped_device *md = q->queuedata; 951 int cpu; 952 953 down_read(&md->io_lock); 954 955 cpu = part_stat_lock(); 956 part_stat_inc(cpu, &dm_disk(md)->part0, ios[rw]); 957 part_stat_add(cpu, &dm_disk(md)->part0, sectors[rw], bio_sectors(bio)); 958 part_stat_unlock(); 959 960 /* 961 * If we're suspended or the thread is processing barriers 962 * we have to queue this io for later. 963 */ 964 if (unlikely(test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) || 965 unlikely(bio_barrier(bio))) { 966 up_read(&md->io_lock); 967 968 if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) && 969 bio_rw(bio) == READA) { 970 bio_io_error(bio); 971 return 0; 972 } 973 974 queue_io(md, bio); 975 976 return 0; 977 } 978 979 __split_and_process_bio(md, bio); 980 up_read(&md->io_lock); 981 return 0; 982 } 983 984 static void dm_unplug_all(struct request_queue *q) 985 { 986 struct mapped_device *md = q->queuedata; 987 struct dm_table *map = dm_get_table(md); 988 989 if (map) { 990 dm_table_unplug_all(map); 991 dm_table_put(map); 992 } 993 } 994 995 static int dm_any_congested(void *congested_data, int bdi_bits) 996 { 997 int r = bdi_bits; 998 struct mapped_device *md = congested_data; 999 struct dm_table *map; 1000 1001 if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { 1002 map = dm_get_table(md); 1003 if (map) { 1004 r = dm_table_any_congested(map, bdi_bits); 1005 dm_table_put(map); 1006 } 1007 } 1008 1009 return r; 1010 } 1011 1012 /*----------------------------------------------------------------- 1013 * An IDR is used to keep track of allocated minor numbers. 1014 *---------------------------------------------------------------*/ 1015 static DEFINE_IDR(_minor_idr); 1016 1017 static void free_minor(int minor) 1018 { 1019 spin_lock(&_minor_lock); 1020 idr_remove(&_minor_idr, minor); 1021 spin_unlock(&_minor_lock); 1022 } 1023 1024 /* 1025 * See if the device with a specific minor # is free. 1026 */ 1027 static int specific_minor(int minor) 1028 { 1029 int r, m; 1030 1031 if (minor >= (1 << MINORBITS)) 1032 return -EINVAL; 1033 1034 r = idr_pre_get(&_minor_idr, GFP_KERNEL); 1035 if (!r) 1036 return -ENOMEM; 1037 1038 spin_lock(&_minor_lock); 1039 1040 if (idr_find(&_minor_idr, minor)) { 1041 r = -EBUSY; 1042 goto out; 1043 } 1044 1045 r = idr_get_new_above(&_minor_idr, MINOR_ALLOCED, minor, &m); 1046 if (r) 1047 goto out; 1048 1049 if (m != minor) { 1050 idr_remove(&_minor_idr, m); 1051 r = -EBUSY; 1052 goto out; 1053 } 1054 1055 out: 1056 spin_unlock(&_minor_lock); 1057 return r; 1058 } 1059 1060 static int next_free_minor(int *minor) 1061 { 1062 int r, m; 1063 1064 r = idr_pre_get(&_minor_idr, GFP_KERNEL); 1065 if (!r) 1066 return -ENOMEM; 1067 1068 spin_lock(&_minor_lock); 1069 1070 r = idr_get_new(&_minor_idr, MINOR_ALLOCED, &m); 1071 if (r) 1072 goto out; 1073 1074 if (m >= (1 << MINORBITS)) { 1075 idr_remove(&_minor_idr, m); 1076 r = -ENOSPC; 1077 goto out; 1078 } 1079 1080 *minor = m; 1081 1082 out: 1083 spin_unlock(&_minor_lock); 1084 return r; 1085 } 1086 1087 static struct block_device_operations dm_blk_dops; 1088 1089 static void dm_wq_work(struct work_struct *work); 1090 1091 /* 1092 * Allocate and initialise a blank device with a given minor. 1093 */ 1094 static struct mapped_device *alloc_dev(int minor) 1095 { 1096 int r; 1097 struct mapped_device *md = kzalloc(sizeof(*md), GFP_KERNEL); 1098 void *old_md; 1099 1100 if (!md) { 1101 DMWARN("unable to allocate device, out of memory."); 1102 return NULL; 1103 } 1104 1105 if (!try_module_get(THIS_MODULE)) 1106 goto bad_module_get; 1107 1108 /* get a minor number for the dev */ 1109 if (minor == DM_ANY_MINOR) 1110 r = next_free_minor(&minor); 1111 else 1112 r = specific_minor(minor); 1113 if (r < 0) 1114 goto bad_minor; 1115 1116 init_rwsem(&md->io_lock); 1117 mutex_init(&md->suspend_lock); 1118 spin_lock_init(&md->deferred_lock); 1119 rwlock_init(&md->map_lock); 1120 atomic_set(&md->holders, 1); 1121 atomic_set(&md->open_count, 0); 1122 atomic_set(&md->event_nr, 0); 1123 atomic_set(&md->uevent_seq, 0); 1124 INIT_LIST_HEAD(&md->uevent_list); 1125 spin_lock_init(&md->uevent_lock); 1126 1127 md->queue = blk_alloc_queue(GFP_KERNEL); 1128 if (!md->queue) 1129 goto bad_queue; 1130 1131 md->queue->queuedata = md; 1132 md->queue->backing_dev_info.congested_fn = dm_any_congested; 1133 md->queue->backing_dev_info.congested_data = md; 1134 blk_queue_make_request(md->queue, dm_request); 1135 blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN, NULL); 1136 blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); 1137 md->queue->unplug_fn = dm_unplug_all; 1138 blk_queue_merge_bvec(md->queue, dm_merge_bvec); 1139 1140 md->io_pool = mempool_create_slab_pool(MIN_IOS, _io_cache); 1141 if (!md->io_pool) 1142 goto bad_io_pool; 1143 1144 md->tio_pool = mempool_create_slab_pool(MIN_IOS, _tio_cache); 1145 if (!md->tio_pool) 1146 goto bad_tio_pool; 1147 1148 md->bs = bioset_create(16, 0); 1149 if (!md->bs) 1150 goto bad_no_bioset; 1151 1152 md->disk = alloc_disk(1); 1153 if (!md->disk) 1154 goto bad_disk; 1155 1156 atomic_set(&md->pending, 0); 1157 init_waitqueue_head(&md->wait); 1158 INIT_WORK(&md->work, dm_wq_work); 1159 init_waitqueue_head(&md->eventq); 1160 1161 md->disk->major = _major; 1162 md->disk->first_minor = minor; 1163 md->disk->fops = &dm_blk_dops; 1164 md->disk->queue = md->queue; 1165 md->disk->private_data = md; 1166 sprintf(md->disk->disk_name, "dm-%d", minor); 1167 add_disk(md->disk); 1168 format_dev_t(md->name, MKDEV(_major, minor)); 1169 1170 md->wq = create_singlethread_workqueue("kdmflush"); 1171 if (!md->wq) 1172 goto bad_thread; 1173 1174 /* Populate the mapping, nobody knows we exist yet */ 1175 spin_lock(&_minor_lock); 1176 old_md = idr_replace(&_minor_idr, md, minor); 1177 spin_unlock(&_minor_lock); 1178 1179 BUG_ON(old_md != MINOR_ALLOCED); 1180 1181 return md; 1182 1183 bad_thread: 1184 put_disk(md->disk); 1185 bad_disk: 1186 bioset_free(md->bs); 1187 bad_no_bioset: 1188 mempool_destroy(md->tio_pool); 1189 bad_tio_pool: 1190 mempool_destroy(md->io_pool); 1191 bad_io_pool: 1192 blk_cleanup_queue(md->queue); 1193 bad_queue: 1194 free_minor(minor); 1195 bad_minor: 1196 module_put(THIS_MODULE); 1197 bad_module_get: 1198 kfree(md); 1199 return NULL; 1200 } 1201 1202 static void unlock_fs(struct mapped_device *md); 1203 1204 static void free_dev(struct mapped_device *md) 1205 { 1206 int minor = MINOR(disk_devt(md->disk)); 1207 1208 if (md->suspended_bdev) { 1209 unlock_fs(md); 1210 bdput(md->suspended_bdev); 1211 } 1212 destroy_workqueue(md->wq); 1213 mempool_destroy(md->tio_pool); 1214 mempool_destroy(md->io_pool); 1215 bioset_free(md->bs); 1216 blk_integrity_unregister(md->disk); 1217 del_gendisk(md->disk); 1218 free_minor(minor); 1219 1220 spin_lock(&_minor_lock); 1221 md->disk->private_data = NULL; 1222 spin_unlock(&_minor_lock); 1223 1224 put_disk(md->disk); 1225 blk_cleanup_queue(md->queue); 1226 module_put(THIS_MODULE); 1227 kfree(md); 1228 } 1229 1230 /* 1231 * Bind a table to the device. 1232 */ 1233 static void event_callback(void *context) 1234 { 1235 unsigned long flags; 1236 LIST_HEAD(uevents); 1237 struct mapped_device *md = (struct mapped_device *) context; 1238 1239 spin_lock_irqsave(&md->uevent_lock, flags); 1240 list_splice_init(&md->uevent_list, &uevents); 1241 spin_unlock_irqrestore(&md->uevent_lock, flags); 1242 1243 dm_send_uevents(&uevents, &disk_to_dev(md->disk)->kobj); 1244 1245 atomic_inc(&md->event_nr); 1246 wake_up(&md->eventq); 1247 } 1248 1249 static void __set_size(struct mapped_device *md, sector_t size) 1250 { 1251 set_capacity(md->disk, size); 1252 1253 mutex_lock(&md->suspended_bdev->bd_inode->i_mutex); 1254 i_size_write(md->suspended_bdev->bd_inode, (loff_t)size << SECTOR_SHIFT); 1255 mutex_unlock(&md->suspended_bdev->bd_inode->i_mutex); 1256 } 1257 1258 static int __bind(struct mapped_device *md, struct dm_table *t) 1259 { 1260 struct request_queue *q = md->queue; 1261 sector_t size; 1262 1263 size = dm_table_get_size(t); 1264 1265 /* 1266 * Wipe any geometry if the size of the table changed. 1267 */ 1268 if (size != get_capacity(md->disk)) 1269 memset(&md->geometry, 0, sizeof(md->geometry)); 1270 1271 if (md->suspended_bdev) 1272 __set_size(md, size); 1273 1274 if (!size) { 1275 dm_table_destroy(t); 1276 return 0; 1277 } 1278 1279 dm_table_event_callback(t, event_callback, md); 1280 1281 write_lock(&md->map_lock); 1282 md->map = t; 1283 dm_table_set_restrictions(t, q); 1284 write_unlock(&md->map_lock); 1285 1286 return 0; 1287 } 1288 1289 static void __unbind(struct mapped_device *md) 1290 { 1291 struct dm_table *map = md->map; 1292 1293 if (!map) 1294 return; 1295 1296 dm_table_event_callback(map, NULL, NULL); 1297 write_lock(&md->map_lock); 1298 md->map = NULL; 1299 write_unlock(&md->map_lock); 1300 dm_table_destroy(map); 1301 } 1302 1303 /* 1304 * Constructor for a new device. 1305 */ 1306 int dm_create(int minor, struct mapped_device **result) 1307 { 1308 struct mapped_device *md; 1309 1310 md = alloc_dev(minor); 1311 if (!md) 1312 return -ENXIO; 1313 1314 dm_sysfs_init(md); 1315 1316 *result = md; 1317 return 0; 1318 } 1319 1320 static struct mapped_device *dm_find_md(dev_t dev) 1321 { 1322 struct mapped_device *md; 1323 unsigned minor = MINOR(dev); 1324 1325 if (MAJOR(dev) != _major || minor >= (1 << MINORBITS)) 1326 return NULL; 1327 1328 spin_lock(&_minor_lock); 1329 1330 md = idr_find(&_minor_idr, minor); 1331 if (md && (md == MINOR_ALLOCED || 1332 (MINOR(disk_devt(dm_disk(md))) != minor) || 1333 test_bit(DMF_FREEING, &md->flags))) { 1334 md = NULL; 1335 goto out; 1336 } 1337 1338 out: 1339 spin_unlock(&_minor_lock); 1340 1341 return md; 1342 } 1343 1344 struct mapped_device *dm_get_md(dev_t dev) 1345 { 1346 struct mapped_device *md = dm_find_md(dev); 1347 1348 if (md) 1349 dm_get(md); 1350 1351 return md; 1352 } 1353 1354 void *dm_get_mdptr(struct mapped_device *md) 1355 { 1356 return md->interface_ptr; 1357 } 1358 1359 void dm_set_mdptr(struct mapped_device *md, void *ptr) 1360 { 1361 md->interface_ptr = ptr; 1362 } 1363 1364 void dm_get(struct mapped_device *md) 1365 { 1366 atomic_inc(&md->holders); 1367 } 1368 1369 const char *dm_device_name(struct mapped_device *md) 1370 { 1371 return md->name; 1372 } 1373 EXPORT_SYMBOL_GPL(dm_device_name); 1374 1375 void dm_put(struct mapped_device *md) 1376 { 1377 struct dm_table *map; 1378 1379 BUG_ON(test_bit(DMF_FREEING, &md->flags)); 1380 1381 if (atomic_dec_and_lock(&md->holders, &_minor_lock)) { 1382 map = dm_get_table(md); 1383 idr_replace(&_minor_idr, MINOR_ALLOCED, 1384 MINOR(disk_devt(dm_disk(md)))); 1385 set_bit(DMF_FREEING, &md->flags); 1386 spin_unlock(&_minor_lock); 1387 if (!dm_suspended(md)) { 1388 dm_table_presuspend_targets(map); 1389 dm_table_postsuspend_targets(map); 1390 } 1391 dm_sysfs_exit(md); 1392 dm_table_put(map); 1393 __unbind(md); 1394 free_dev(md); 1395 } 1396 } 1397 EXPORT_SYMBOL_GPL(dm_put); 1398 1399 static int dm_wait_for_completion(struct mapped_device *md, int interruptible) 1400 { 1401 int r = 0; 1402 DECLARE_WAITQUEUE(wait, current); 1403 1404 dm_unplug_all(md->queue); 1405 1406 add_wait_queue(&md->wait, &wait); 1407 1408 while (1) { 1409 set_current_state(interruptible); 1410 1411 smp_mb(); 1412 if (!atomic_read(&md->pending)) 1413 break; 1414 1415 if (interruptible == TASK_INTERRUPTIBLE && 1416 signal_pending(current)) { 1417 r = -EINTR; 1418 break; 1419 } 1420 1421 io_schedule(); 1422 } 1423 set_current_state(TASK_RUNNING); 1424 1425 remove_wait_queue(&md->wait, &wait); 1426 1427 return r; 1428 } 1429 1430 static int dm_flush(struct mapped_device *md) 1431 { 1432 dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); 1433 return 0; 1434 } 1435 1436 static void process_barrier(struct mapped_device *md, struct bio *bio) 1437 { 1438 int error = dm_flush(md); 1439 1440 if (unlikely(error)) { 1441 bio_endio(bio, error); 1442 return; 1443 } 1444 if (bio_empty_barrier(bio)) { 1445 bio_endio(bio, 0); 1446 return; 1447 } 1448 1449 __split_and_process_bio(md, bio); 1450 1451 error = dm_flush(md); 1452 1453 if (!error && md->barrier_error) 1454 error = md->barrier_error; 1455 1456 if (md->barrier_error != DM_ENDIO_REQUEUE) 1457 bio_endio(bio, error); 1458 } 1459 1460 /* 1461 * Process the deferred bios 1462 */ 1463 static void dm_wq_work(struct work_struct *work) 1464 { 1465 struct mapped_device *md = container_of(work, struct mapped_device, 1466 work); 1467 struct bio *c; 1468 1469 down_write(&md->io_lock); 1470 1471 while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { 1472 spin_lock_irq(&md->deferred_lock); 1473 c = bio_list_pop(&md->deferred); 1474 spin_unlock_irq(&md->deferred_lock); 1475 1476 if (!c) { 1477 clear_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags); 1478 break; 1479 } 1480 1481 up_write(&md->io_lock); 1482 1483 if (bio_barrier(c)) 1484 process_barrier(md, c); 1485 else 1486 __split_and_process_bio(md, c); 1487 1488 down_write(&md->io_lock); 1489 } 1490 1491 up_write(&md->io_lock); 1492 } 1493 1494 static void dm_queue_flush(struct mapped_device *md) 1495 { 1496 clear_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); 1497 smp_mb__after_clear_bit(); 1498 queue_work(md->wq, &md->work); 1499 } 1500 1501 /* 1502 * Swap in a new table (destroying old one). 1503 */ 1504 int dm_swap_table(struct mapped_device *md, struct dm_table *table) 1505 { 1506 int r = -EINVAL; 1507 1508 mutex_lock(&md->suspend_lock); 1509 1510 /* device must be suspended */ 1511 if (!dm_suspended(md)) 1512 goto out; 1513 1514 /* without bdev, the device size cannot be changed */ 1515 if (!md->suspended_bdev) 1516 if (get_capacity(md->disk) != dm_table_get_size(table)) 1517 goto out; 1518 1519 __unbind(md); 1520 r = __bind(md, table); 1521 1522 out: 1523 mutex_unlock(&md->suspend_lock); 1524 return r; 1525 } 1526 1527 /* 1528 * Functions to lock and unlock any filesystem running on the 1529 * device. 1530 */ 1531 static int lock_fs(struct mapped_device *md) 1532 { 1533 int r; 1534 1535 WARN_ON(md->frozen_sb); 1536 1537 md->frozen_sb = freeze_bdev(md->suspended_bdev); 1538 if (IS_ERR(md->frozen_sb)) { 1539 r = PTR_ERR(md->frozen_sb); 1540 md->frozen_sb = NULL; 1541 return r; 1542 } 1543 1544 set_bit(DMF_FROZEN, &md->flags); 1545 1546 /* don't bdput right now, we don't want the bdev 1547 * to go away while it is locked. 1548 */ 1549 return 0; 1550 } 1551 1552 static void unlock_fs(struct mapped_device *md) 1553 { 1554 if (!test_bit(DMF_FROZEN, &md->flags)) 1555 return; 1556 1557 thaw_bdev(md->suspended_bdev, md->frozen_sb); 1558 md->frozen_sb = NULL; 1559 clear_bit(DMF_FROZEN, &md->flags); 1560 } 1561 1562 /* 1563 * We need to be able to change a mapping table under a mounted 1564 * filesystem. For example we might want to move some data in 1565 * the background. Before the table can be swapped with 1566 * dm_bind_table, dm_suspend must be called to flush any in 1567 * flight bios and ensure that any further io gets deferred. 1568 */ 1569 int dm_suspend(struct mapped_device *md, unsigned suspend_flags) 1570 { 1571 struct dm_table *map = NULL; 1572 int r = 0; 1573 int do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG ? 1 : 0; 1574 int noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG ? 1 : 0; 1575 1576 mutex_lock(&md->suspend_lock); 1577 1578 if (dm_suspended(md)) { 1579 r = -EINVAL; 1580 goto out_unlock; 1581 } 1582 1583 map = dm_get_table(md); 1584 1585 /* 1586 * DMF_NOFLUSH_SUSPENDING must be set before presuspend. 1587 * This flag is cleared before dm_suspend returns. 1588 */ 1589 if (noflush) 1590 set_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); 1591 1592 /* This does not get reverted if there's an error later. */ 1593 dm_table_presuspend_targets(map); 1594 1595 /* bdget() can stall if the pending I/Os are not flushed */ 1596 if (!noflush) { 1597 md->suspended_bdev = bdget_disk(md->disk, 0); 1598 if (!md->suspended_bdev) { 1599 DMWARN("bdget failed in dm_suspend"); 1600 r = -ENOMEM; 1601 goto out; 1602 } 1603 1604 /* 1605 * Flush I/O to the device. noflush supersedes do_lockfs, 1606 * because lock_fs() needs to flush I/Os. 1607 */ 1608 if (do_lockfs) { 1609 r = lock_fs(md); 1610 if (r) 1611 goto out; 1612 } 1613 } 1614 1615 /* 1616 * Here we must make sure that no processes are submitting requests 1617 * to target drivers i.e. no one may be executing 1618 * __split_and_process_bio. This is called from dm_request and 1619 * dm_wq_work. 1620 * 1621 * To get all processes out of __split_and_process_bio in dm_request, 1622 * we take the write lock. To prevent any process from reentering 1623 * __split_and_process_bio from dm_request, we set 1624 * DMF_QUEUE_IO_TO_THREAD. 1625 * 1626 * To quiesce the thread (dm_wq_work), we set DMF_BLOCK_IO_FOR_SUSPEND 1627 * and call flush_workqueue(md->wq). flush_workqueue will wait until 1628 * dm_wq_work exits and DMF_BLOCK_IO_FOR_SUSPEND will prevent any 1629 * further calls to __split_and_process_bio from dm_wq_work. 1630 */ 1631 down_write(&md->io_lock); 1632 set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); 1633 set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags); 1634 up_write(&md->io_lock); 1635 1636 flush_workqueue(md->wq); 1637 1638 /* 1639 * At this point no more requests are entering target request routines. 1640 * We call dm_wait_for_completion to wait for all existing requests 1641 * to finish. 1642 */ 1643 r = dm_wait_for_completion(md, TASK_INTERRUPTIBLE); 1644 1645 down_write(&md->io_lock); 1646 if (noflush) 1647 clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); 1648 up_write(&md->io_lock); 1649 1650 /* were we interrupted ? */ 1651 if (r < 0) { 1652 dm_queue_flush(md); 1653 1654 unlock_fs(md); 1655 goto out; /* pushback list is already flushed, so skip flush */ 1656 } 1657 1658 /* 1659 * If dm_wait_for_completion returned 0, the device is completely 1660 * quiescent now. There is no request-processing activity. All new 1661 * requests are being added to md->deferred list. 1662 */ 1663 1664 dm_table_postsuspend_targets(map); 1665 1666 set_bit(DMF_SUSPENDED, &md->flags); 1667 1668 out: 1669 if (r && md->suspended_bdev) { 1670 bdput(md->suspended_bdev); 1671 md->suspended_bdev = NULL; 1672 } 1673 1674 dm_table_put(map); 1675 1676 out_unlock: 1677 mutex_unlock(&md->suspend_lock); 1678 return r; 1679 } 1680 1681 int dm_resume(struct mapped_device *md) 1682 { 1683 int r = -EINVAL; 1684 struct dm_table *map = NULL; 1685 1686 mutex_lock(&md->suspend_lock); 1687 if (!dm_suspended(md)) 1688 goto out; 1689 1690 map = dm_get_table(md); 1691 if (!map || !dm_table_get_size(map)) 1692 goto out; 1693 1694 r = dm_table_resume_targets(map); 1695 if (r) 1696 goto out; 1697 1698 dm_queue_flush(md); 1699 1700 unlock_fs(md); 1701 1702 if (md->suspended_bdev) { 1703 bdput(md->suspended_bdev); 1704 md->suspended_bdev = NULL; 1705 } 1706 1707 clear_bit(DMF_SUSPENDED, &md->flags); 1708 1709 dm_table_unplug_all(map); 1710 1711 dm_kobject_uevent(md); 1712 1713 r = 0; 1714 1715 out: 1716 dm_table_put(map); 1717 mutex_unlock(&md->suspend_lock); 1718 1719 return r; 1720 } 1721 1722 /*----------------------------------------------------------------- 1723 * Event notification. 1724 *---------------------------------------------------------------*/ 1725 void dm_kobject_uevent(struct mapped_device *md) 1726 { 1727 kobject_uevent(&disk_to_dev(md->disk)->kobj, KOBJ_CHANGE); 1728 } 1729 1730 uint32_t dm_next_uevent_seq(struct mapped_device *md) 1731 { 1732 return atomic_add_return(1, &md->uevent_seq); 1733 } 1734 1735 uint32_t dm_get_event_nr(struct mapped_device *md) 1736 { 1737 return atomic_read(&md->event_nr); 1738 } 1739 1740 int dm_wait_event(struct mapped_device *md, int event_nr) 1741 { 1742 return wait_event_interruptible(md->eventq, 1743 (event_nr != atomic_read(&md->event_nr))); 1744 } 1745 1746 void dm_uevent_add(struct mapped_device *md, struct list_head *elist) 1747 { 1748 unsigned long flags; 1749 1750 spin_lock_irqsave(&md->uevent_lock, flags); 1751 list_add(elist, &md->uevent_list); 1752 spin_unlock_irqrestore(&md->uevent_lock, flags); 1753 } 1754 1755 /* 1756 * The gendisk is only valid as long as you have a reference 1757 * count on 'md'. 1758 */ 1759 struct gendisk *dm_disk(struct mapped_device *md) 1760 { 1761 return md->disk; 1762 } 1763 1764 struct kobject *dm_kobject(struct mapped_device *md) 1765 { 1766 return &md->kobj; 1767 } 1768 1769 /* 1770 * struct mapped_device should not be exported outside of dm.c 1771 * so use this check to verify that kobj is part of md structure 1772 */ 1773 struct mapped_device *dm_get_from_kobject(struct kobject *kobj) 1774 { 1775 struct mapped_device *md; 1776 1777 md = container_of(kobj, struct mapped_device, kobj); 1778 if (&md->kobj != kobj) 1779 return NULL; 1780 1781 dm_get(md); 1782 return md; 1783 } 1784 1785 int dm_suspended(struct mapped_device *md) 1786 { 1787 return test_bit(DMF_SUSPENDED, &md->flags); 1788 } 1789 1790 int dm_noflush_suspending(struct dm_target *ti) 1791 { 1792 struct mapped_device *md = dm_table_get_md(ti->table); 1793 int r = __noflush_suspending(md); 1794 1795 dm_put(md); 1796 1797 return r; 1798 } 1799 EXPORT_SYMBOL_GPL(dm_noflush_suspending); 1800 1801 static struct block_device_operations dm_blk_dops = { 1802 .open = dm_blk_open, 1803 .release = dm_blk_close, 1804 .ioctl = dm_blk_ioctl, 1805 .getgeo = dm_blk_getgeo, 1806 .owner = THIS_MODULE 1807 }; 1808 1809 EXPORT_SYMBOL(dm_get_mapinfo); 1810 1811 /* 1812 * module hooks 1813 */ 1814 module_init(dm_init); 1815 module_exit(dm_exit); 1816 1817 module_param(major, uint, 0); 1818 MODULE_PARM_DESC(major, "The major number of the device mapper"); 1819 MODULE_DESCRIPTION(DM_NAME " driver"); 1820 MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>"); 1821 MODULE_LICENSE("GPL"); 1822