1 /* 2 * Copyright (C) 2001, 2002 Sistina Software (UK) Limited. 3 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. 4 * 5 * This file is released under the GPL. 6 */ 7 8 #include "dm.h" 9 #include "dm-uevent.h" 10 11 #include <linux/init.h> 12 #include <linux/module.h> 13 #include <linux/mutex.h> 14 #include <linux/moduleparam.h> 15 #include <linux/blkpg.h> 16 #include <linux/bio.h> 17 #include <linux/mempool.h> 18 #include <linux/slab.h> 19 #include <linux/idr.h> 20 #include <linux/hdreg.h> 21 #include <linux/delay.h> 22 #include <linux/wait.h> 23 #include <linux/kthread.h> 24 #include <linux/ktime.h> 25 #include <linux/elevator.h> /* for rq_end_sector() */ 26 #include <linux/blk-mq.h> 27 #include <linux/pr.h> 28 29 #include <trace/events/block.h> 30 31 #define DM_MSG_PREFIX "core" 32 33 #ifdef CONFIG_PRINTK 34 /* 35 * ratelimit state to be used in DMXXX_LIMIT(). 36 */ 37 DEFINE_RATELIMIT_STATE(dm_ratelimit_state, 38 DEFAULT_RATELIMIT_INTERVAL, 39 DEFAULT_RATELIMIT_BURST); 40 EXPORT_SYMBOL(dm_ratelimit_state); 41 #endif 42 43 /* 44 * Cookies are numeric values sent with CHANGE and REMOVE 45 * uevents while resuming, removing or renaming the device. 46 */ 47 #define DM_COOKIE_ENV_VAR_NAME "DM_COOKIE" 48 #define DM_COOKIE_LENGTH 24 49 50 static const char *_name = DM_NAME; 51 52 static unsigned int major = 0; 53 static unsigned int _major = 0; 54 55 static DEFINE_IDR(_minor_idr); 56 57 static DEFINE_SPINLOCK(_minor_lock); 58 59 static void do_deferred_remove(struct work_struct *w); 60 61 static DECLARE_WORK(deferred_remove_work, do_deferred_remove); 62 63 static struct workqueue_struct *deferred_remove_workqueue; 64 65 /* 66 * For bio-based dm. 67 * One of these is allocated per bio. 68 */ 69 struct dm_io { 70 struct mapped_device *md; 71 int error; 72 atomic_t io_count; 73 struct bio *bio; 74 unsigned long start_time; 75 spinlock_t endio_lock; 76 struct dm_stats_aux stats_aux; 77 }; 78 79 /* 80 * For request-based dm. 81 * One of these is allocated per request. 82 */ 83 struct dm_rq_target_io { 84 struct mapped_device *md; 85 struct dm_target *ti; 86 struct request *orig, *clone; 87 struct kthread_work work; 88 int error; 89 union map_info info; 90 struct dm_stats_aux stats_aux; 91 unsigned long duration_jiffies; 92 unsigned n_sectors; 93 }; 94 95 /* 96 * For request-based dm - the bio clones we allocate are embedded in these 97 * structs. 98 * 99 * We allocate these with bio_alloc_bioset, using the front_pad parameter when 100 * the bioset is created - this means the bio has to come at the end of the 101 * struct. 102 */ 103 struct dm_rq_clone_bio_info { 104 struct bio *orig; 105 struct dm_rq_target_io *tio; 106 struct bio clone; 107 }; 108 109 union map_info *dm_get_rq_mapinfo(struct request *rq) 110 { 111 if (rq && rq->end_io_data) 112 return &((struct dm_rq_target_io *)rq->end_io_data)->info; 113 return NULL; 114 } 115 EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo); 116 117 #define MINOR_ALLOCED ((void *)-1) 118 119 /* 120 * Bits for the md->flags field. 121 */ 122 #define DMF_BLOCK_IO_FOR_SUSPEND 0 123 #define DMF_SUSPENDED 1 124 #define DMF_FROZEN 2 125 #define DMF_FREEING 3 126 #define DMF_DELETING 4 127 #define DMF_NOFLUSH_SUSPENDING 5 128 #define DMF_DEFERRED_REMOVE 6 129 #define DMF_SUSPENDED_INTERNALLY 7 130 131 /* 132 * A dummy definition to make RCU happy. 133 * struct dm_table should never be dereferenced in this file. 134 */ 135 struct dm_table { 136 int undefined__; 137 }; 138 139 /* 140 * Work processed by per-device workqueue. 141 */ 142 struct mapped_device { 143 struct srcu_struct io_barrier; 144 struct mutex suspend_lock; 145 atomic_t holders; 146 atomic_t open_count; 147 148 /* 149 * The current mapping. 150 * Use dm_get_live_table{_fast} or take suspend_lock for 151 * dereference. 152 */ 153 struct dm_table __rcu *map; 154 155 struct list_head table_devices; 156 struct mutex table_devices_lock; 157 158 unsigned long flags; 159 160 struct request_queue *queue; 161 unsigned type; 162 /* Protect queue and type against concurrent access. */ 163 struct mutex type_lock; 164 165 struct target_type *immutable_target_type; 166 167 struct gendisk *disk; 168 char name[16]; 169 170 void *interface_ptr; 171 172 /* 173 * A list of ios that arrived while we were suspended. 174 */ 175 atomic_t pending[2]; 176 wait_queue_head_t wait; 177 struct work_struct work; 178 struct bio_list deferred; 179 spinlock_t deferred_lock; 180 181 /* 182 * Processing queue (flush) 183 */ 184 struct workqueue_struct *wq; 185 186 /* 187 * io objects are allocated from here. 188 */ 189 mempool_t *io_pool; 190 mempool_t *rq_pool; 191 192 struct bio_set *bs; 193 194 /* 195 * Event handling. 196 */ 197 atomic_t event_nr; 198 wait_queue_head_t eventq; 199 atomic_t uevent_seq; 200 struct list_head uevent_list; 201 spinlock_t uevent_lock; /* Protect access to uevent_list */ 202 203 /* 204 * freeze/thaw support require holding onto a super block 205 */ 206 struct super_block *frozen_sb; 207 struct block_device *bdev; 208 209 /* forced geometry settings */ 210 struct hd_geometry geometry; 211 212 /* kobject and completion */ 213 struct dm_kobject_holder kobj_holder; 214 215 /* zero-length flush that will be cloned and submitted to targets */ 216 struct bio flush_bio; 217 218 /* the number of internal suspends */ 219 unsigned internal_suspend_count; 220 221 struct dm_stats stats; 222 223 struct kthread_worker kworker; 224 struct task_struct *kworker_task; 225 226 /* for request-based merge heuristic in dm_request_fn() */ 227 unsigned seq_rq_merge_deadline_usecs; 228 int last_rq_rw; 229 sector_t last_rq_pos; 230 ktime_t last_rq_start_time; 231 232 /* for blk-mq request-based DM support */ 233 struct blk_mq_tag_set tag_set; 234 bool use_blk_mq; 235 }; 236 237 #ifdef CONFIG_DM_MQ_DEFAULT 238 static bool use_blk_mq = true; 239 #else 240 static bool use_blk_mq = false; 241 #endif 242 243 bool dm_use_blk_mq(struct mapped_device *md) 244 { 245 return md->use_blk_mq; 246 } 247 248 /* 249 * For mempools pre-allocation at the table loading time. 250 */ 251 struct dm_md_mempools { 252 mempool_t *io_pool; 253 mempool_t *rq_pool; 254 struct bio_set *bs; 255 }; 256 257 struct table_device { 258 struct list_head list; 259 atomic_t count; 260 struct dm_dev dm_dev; 261 }; 262 263 #define RESERVED_BIO_BASED_IOS 16 264 #define RESERVED_REQUEST_BASED_IOS 256 265 #define RESERVED_MAX_IOS 1024 266 static struct kmem_cache *_io_cache; 267 static struct kmem_cache *_rq_tio_cache; 268 static struct kmem_cache *_rq_cache; 269 270 /* 271 * Bio-based DM's mempools' reserved IOs set by the user. 272 */ 273 static unsigned reserved_bio_based_ios = RESERVED_BIO_BASED_IOS; 274 275 /* 276 * Request-based DM's mempools' reserved IOs set by the user. 277 */ 278 static unsigned reserved_rq_based_ios = RESERVED_REQUEST_BASED_IOS; 279 280 static unsigned __dm_get_module_param(unsigned *module_param, 281 unsigned def, unsigned max) 282 { 283 unsigned param = ACCESS_ONCE(*module_param); 284 unsigned modified_param = 0; 285 286 if (!param) 287 modified_param = def; 288 else if (param > max) 289 modified_param = max; 290 291 if (modified_param) { 292 (void)cmpxchg(module_param, param, modified_param); 293 param = modified_param; 294 } 295 296 return param; 297 } 298 299 unsigned dm_get_reserved_bio_based_ios(void) 300 { 301 return __dm_get_module_param(&reserved_bio_based_ios, 302 RESERVED_BIO_BASED_IOS, RESERVED_MAX_IOS); 303 } 304 EXPORT_SYMBOL_GPL(dm_get_reserved_bio_based_ios); 305 306 unsigned dm_get_reserved_rq_based_ios(void) 307 { 308 return __dm_get_module_param(&reserved_rq_based_ios, 309 RESERVED_REQUEST_BASED_IOS, RESERVED_MAX_IOS); 310 } 311 EXPORT_SYMBOL_GPL(dm_get_reserved_rq_based_ios); 312 313 static int __init local_init(void) 314 { 315 int r = -ENOMEM; 316 317 /* allocate a slab for the dm_ios */ 318 _io_cache = KMEM_CACHE(dm_io, 0); 319 if (!_io_cache) 320 return r; 321 322 _rq_tio_cache = KMEM_CACHE(dm_rq_target_io, 0); 323 if (!_rq_tio_cache) 324 goto out_free_io_cache; 325 326 _rq_cache = kmem_cache_create("dm_clone_request", sizeof(struct request), 327 __alignof__(struct request), 0, NULL); 328 if (!_rq_cache) 329 goto out_free_rq_tio_cache; 330 331 r = dm_uevent_init(); 332 if (r) 333 goto out_free_rq_cache; 334 335 deferred_remove_workqueue = alloc_workqueue("kdmremove", WQ_UNBOUND, 1); 336 if (!deferred_remove_workqueue) { 337 r = -ENOMEM; 338 goto out_uevent_exit; 339 } 340 341 _major = major; 342 r = register_blkdev(_major, _name); 343 if (r < 0) 344 goto out_free_workqueue; 345 346 if (!_major) 347 _major = r; 348 349 return 0; 350 351 out_free_workqueue: 352 destroy_workqueue(deferred_remove_workqueue); 353 out_uevent_exit: 354 dm_uevent_exit(); 355 out_free_rq_cache: 356 kmem_cache_destroy(_rq_cache); 357 out_free_rq_tio_cache: 358 kmem_cache_destroy(_rq_tio_cache); 359 out_free_io_cache: 360 kmem_cache_destroy(_io_cache); 361 362 return r; 363 } 364 365 static void local_exit(void) 366 { 367 flush_scheduled_work(); 368 destroy_workqueue(deferred_remove_workqueue); 369 370 kmem_cache_destroy(_rq_cache); 371 kmem_cache_destroy(_rq_tio_cache); 372 kmem_cache_destroy(_io_cache); 373 unregister_blkdev(_major, _name); 374 dm_uevent_exit(); 375 376 _major = 0; 377 378 DMINFO("cleaned up"); 379 } 380 381 static int (*_inits[])(void) __initdata = { 382 local_init, 383 dm_target_init, 384 dm_linear_init, 385 dm_stripe_init, 386 dm_io_init, 387 dm_kcopyd_init, 388 dm_interface_init, 389 dm_statistics_init, 390 }; 391 392 static void (*_exits[])(void) = { 393 local_exit, 394 dm_target_exit, 395 dm_linear_exit, 396 dm_stripe_exit, 397 dm_io_exit, 398 dm_kcopyd_exit, 399 dm_interface_exit, 400 dm_statistics_exit, 401 }; 402 403 static int __init dm_init(void) 404 { 405 const int count = ARRAY_SIZE(_inits); 406 407 int r, i; 408 409 for (i = 0; i < count; i++) { 410 r = _inits[i](); 411 if (r) 412 goto bad; 413 } 414 415 return 0; 416 417 bad: 418 while (i--) 419 _exits[i](); 420 421 return r; 422 } 423 424 static void __exit dm_exit(void) 425 { 426 int i = ARRAY_SIZE(_exits); 427 428 while (i--) 429 _exits[i](); 430 431 /* 432 * Should be empty by this point. 433 */ 434 idr_destroy(&_minor_idr); 435 } 436 437 /* 438 * Block device functions 439 */ 440 int dm_deleting_md(struct mapped_device *md) 441 { 442 return test_bit(DMF_DELETING, &md->flags); 443 } 444 445 static int dm_blk_open(struct block_device *bdev, fmode_t mode) 446 { 447 struct mapped_device *md; 448 449 spin_lock(&_minor_lock); 450 451 md = bdev->bd_disk->private_data; 452 if (!md) 453 goto out; 454 455 if (test_bit(DMF_FREEING, &md->flags) || 456 dm_deleting_md(md)) { 457 md = NULL; 458 goto out; 459 } 460 461 dm_get(md); 462 atomic_inc(&md->open_count); 463 out: 464 spin_unlock(&_minor_lock); 465 466 return md ? 0 : -ENXIO; 467 } 468 469 static void dm_blk_close(struct gendisk *disk, fmode_t mode) 470 { 471 struct mapped_device *md; 472 473 spin_lock(&_minor_lock); 474 475 md = disk->private_data; 476 if (WARN_ON(!md)) 477 goto out; 478 479 if (atomic_dec_and_test(&md->open_count) && 480 (test_bit(DMF_DEFERRED_REMOVE, &md->flags))) 481 queue_work(deferred_remove_workqueue, &deferred_remove_work); 482 483 dm_put(md); 484 out: 485 spin_unlock(&_minor_lock); 486 } 487 488 int dm_open_count(struct mapped_device *md) 489 { 490 return atomic_read(&md->open_count); 491 } 492 493 /* 494 * Guarantees nothing is using the device before it's deleted. 495 */ 496 int dm_lock_for_deletion(struct mapped_device *md, bool mark_deferred, bool only_deferred) 497 { 498 int r = 0; 499 500 spin_lock(&_minor_lock); 501 502 if (dm_open_count(md)) { 503 r = -EBUSY; 504 if (mark_deferred) 505 set_bit(DMF_DEFERRED_REMOVE, &md->flags); 506 } else if (only_deferred && !test_bit(DMF_DEFERRED_REMOVE, &md->flags)) 507 r = -EEXIST; 508 else 509 set_bit(DMF_DELETING, &md->flags); 510 511 spin_unlock(&_minor_lock); 512 513 return r; 514 } 515 516 int dm_cancel_deferred_remove(struct mapped_device *md) 517 { 518 int r = 0; 519 520 spin_lock(&_minor_lock); 521 522 if (test_bit(DMF_DELETING, &md->flags)) 523 r = -EBUSY; 524 else 525 clear_bit(DMF_DEFERRED_REMOVE, &md->flags); 526 527 spin_unlock(&_minor_lock); 528 529 return r; 530 } 531 532 static void do_deferred_remove(struct work_struct *w) 533 { 534 dm_deferred_remove(); 535 } 536 537 sector_t dm_get_size(struct mapped_device *md) 538 { 539 return get_capacity(md->disk); 540 } 541 542 struct request_queue *dm_get_md_queue(struct mapped_device *md) 543 { 544 return md->queue; 545 } 546 547 struct dm_stats *dm_get_stats(struct mapped_device *md) 548 { 549 return &md->stats; 550 } 551 552 static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo) 553 { 554 struct mapped_device *md = bdev->bd_disk->private_data; 555 556 return dm_get_geometry(md, geo); 557 } 558 559 static int dm_get_live_table_for_ioctl(struct mapped_device *md, 560 struct dm_target **tgt, struct block_device **bdev, 561 fmode_t *mode, int *srcu_idx) 562 { 563 struct dm_table *map; 564 int r; 565 566 retry: 567 r = -ENOTTY; 568 map = dm_get_live_table(md, srcu_idx); 569 if (!map || !dm_table_get_size(map)) 570 goto out; 571 572 /* We only support devices that have a single target */ 573 if (dm_table_get_num_targets(map) != 1) 574 goto out; 575 576 *tgt = dm_table_get_target(map, 0); 577 578 if (!(*tgt)->type->prepare_ioctl) 579 goto out; 580 581 if (dm_suspended_md(md)) { 582 r = -EAGAIN; 583 goto out; 584 } 585 586 r = (*tgt)->type->prepare_ioctl(*tgt, bdev, mode); 587 if (r < 0) 588 goto out; 589 590 return r; 591 592 out: 593 dm_put_live_table(md, *srcu_idx); 594 if (r == -ENOTCONN) { 595 msleep(10); 596 goto retry; 597 } 598 return r; 599 } 600 601 static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode, 602 unsigned int cmd, unsigned long arg) 603 { 604 struct mapped_device *md = bdev->bd_disk->private_data; 605 struct dm_target *tgt; 606 int srcu_idx, r; 607 608 r = dm_get_live_table_for_ioctl(md, &tgt, &bdev, &mode, &srcu_idx); 609 if (r < 0) 610 return r; 611 612 if (r > 0) { 613 /* 614 * Target determined this ioctl is being issued against 615 * a logical partition of the parent bdev; so extra 616 * validation is needed. 617 */ 618 r = scsi_verify_blk_ioctl(NULL, cmd); 619 if (r) 620 goto out; 621 } 622 623 r = __blkdev_driver_ioctl(bdev, mode, cmd, arg); 624 out: 625 dm_put_live_table(md, srcu_idx); 626 return r; 627 } 628 629 static struct dm_io *alloc_io(struct mapped_device *md) 630 { 631 return mempool_alloc(md->io_pool, GFP_NOIO); 632 } 633 634 static void free_io(struct mapped_device *md, struct dm_io *io) 635 { 636 mempool_free(io, md->io_pool); 637 } 638 639 static void free_tio(struct mapped_device *md, struct dm_target_io *tio) 640 { 641 bio_put(&tio->clone); 642 } 643 644 static struct dm_rq_target_io *alloc_rq_tio(struct mapped_device *md, 645 gfp_t gfp_mask) 646 { 647 return mempool_alloc(md->io_pool, gfp_mask); 648 } 649 650 static void free_rq_tio(struct dm_rq_target_io *tio) 651 { 652 mempool_free(tio, tio->md->io_pool); 653 } 654 655 static struct request *alloc_clone_request(struct mapped_device *md, 656 gfp_t gfp_mask) 657 { 658 return mempool_alloc(md->rq_pool, gfp_mask); 659 } 660 661 static void free_clone_request(struct mapped_device *md, struct request *rq) 662 { 663 mempool_free(rq, md->rq_pool); 664 } 665 666 static int md_in_flight(struct mapped_device *md) 667 { 668 return atomic_read(&md->pending[READ]) + 669 atomic_read(&md->pending[WRITE]); 670 } 671 672 static void start_io_acct(struct dm_io *io) 673 { 674 struct mapped_device *md = io->md; 675 struct bio *bio = io->bio; 676 int cpu; 677 int rw = bio_data_dir(bio); 678 679 io->start_time = jiffies; 680 681 cpu = part_stat_lock(); 682 part_round_stats(cpu, &dm_disk(md)->part0); 683 part_stat_unlock(); 684 atomic_set(&dm_disk(md)->part0.in_flight[rw], 685 atomic_inc_return(&md->pending[rw])); 686 687 if (unlikely(dm_stats_used(&md->stats))) 688 dm_stats_account_io(&md->stats, bio->bi_rw, bio->bi_iter.bi_sector, 689 bio_sectors(bio), false, 0, &io->stats_aux); 690 } 691 692 static void end_io_acct(struct dm_io *io) 693 { 694 struct mapped_device *md = io->md; 695 struct bio *bio = io->bio; 696 unsigned long duration = jiffies - io->start_time; 697 int pending; 698 int rw = bio_data_dir(bio); 699 700 generic_end_io_acct(rw, &dm_disk(md)->part0, io->start_time); 701 702 if (unlikely(dm_stats_used(&md->stats))) 703 dm_stats_account_io(&md->stats, bio->bi_rw, bio->bi_iter.bi_sector, 704 bio_sectors(bio), true, duration, &io->stats_aux); 705 706 /* 707 * After this is decremented the bio must not be touched if it is 708 * a flush. 709 */ 710 pending = atomic_dec_return(&md->pending[rw]); 711 atomic_set(&dm_disk(md)->part0.in_flight[rw], pending); 712 pending += atomic_read(&md->pending[rw^0x1]); 713 714 /* nudge anyone waiting on suspend queue */ 715 if (!pending) 716 wake_up(&md->wait); 717 } 718 719 /* 720 * Add the bio to the list of deferred io. 721 */ 722 static void queue_io(struct mapped_device *md, struct bio *bio) 723 { 724 unsigned long flags; 725 726 spin_lock_irqsave(&md->deferred_lock, flags); 727 bio_list_add(&md->deferred, bio); 728 spin_unlock_irqrestore(&md->deferred_lock, flags); 729 queue_work(md->wq, &md->work); 730 } 731 732 /* 733 * Everyone (including functions in this file), should use this 734 * function to access the md->map field, and make sure they call 735 * dm_put_live_table() when finished. 736 */ 737 struct dm_table *dm_get_live_table(struct mapped_device *md, int *srcu_idx) __acquires(md->io_barrier) 738 { 739 *srcu_idx = srcu_read_lock(&md->io_barrier); 740 741 return srcu_dereference(md->map, &md->io_barrier); 742 } 743 744 void dm_put_live_table(struct mapped_device *md, int srcu_idx) __releases(md->io_barrier) 745 { 746 srcu_read_unlock(&md->io_barrier, srcu_idx); 747 } 748 749 void dm_sync_table(struct mapped_device *md) 750 { 751 synchronize_srcu(&md->io_barrier); 752 synchronize_rcu_expedited(); 753 } 754 755 /* 756 * A fast alternative to dm_get_live_table/dm_put_live_table. 757 * The caller must not block between these two functions. 758 */ 759 static struct dm_table *dm_get_live_table_fast(struct mapped_device *md) __acquires(RCU) 760 { 761 rcu_read_lock(); 762 return rcu_dereference(md->map); 763 } 764 765 static void dm_put_live_table_fast(struct mapped_device *md) __releases(RCU) 766 { 767 rcu_read_unlock(); 768 } 769 770 /* 771 * Open a table device so we can use it as a map destination. 772 */ 773 static int open_table_device(struct table_device *td, dev_t dev, 774 struct mapped_device *md) 775 { 776 static char *_claim_ptr = "I belong to device-mapper"; 777 struct block_device *bdev; 778 779 int r; 780 781 BUG_ON(td->dm_dev.bdev); 782 783 bdev = blkdev_get_by_dev(dev, td->dm_dev.mode | FMODE_EXCL, _claim_ptr); 784 if (IS_ERR(bdev)) 785 return PTR_ERR(bdev); 786 787 r = bd_link_disk_holder(bdev, dm_disk(md)); 788 if (r) { 789 blkdev_put(bdev, td->dm_dev.mode | FMODE_EXCL); 790 return r; 791 } 792 793 td->dm_dev.bdev = bdev; 794 return 0; 795 } 796 797 /* 798 * Close a table device that we've been using. 799 */ 800 static void close_table_device(struct table_device *td, struct mapped_device *md) 801 { 802 if (!td->dm_dev.bdev) 803 return; 804 805 bd_unlink_disk_holder(td->dm_dev.bdev, dm_disk(md)); 806 blkdev_put(td->dm_dev.bdev, td->dm_dev.mode | FMODE_EXCL); 807 td->dm_dev.bdev = NULL; 808 } 809 810 static struct table_device *find_table_device(struct list_head *l, dev_t dev, 811 fmode_t mode) { 812 struct table_device *td; 813 814 list_for_each_entry(td, l, list) 815 if (td->dm_dev.bdev->bd_dev == dev && td->dm_dev.mode == mode) 816 return td; 817 818 return NULL; 819 } 820 821 int dm_get_table_device(struct mapped_device *md, dev_t dev, fmode_t mode, 822 struct dm_dev **result) { 823 int r; 824 struct table_device *td; 825 826 mutex_lock(&md->table_devices_lock); 827 td = find_table_device(&md->table_devices, dev, mode); 828 if (!td) { 829 td = kmalloc(sizeof(*td), GFP_KERNEL); 830 if (!td) { 831 mutex_unlock(&md->table_devices_lock); 832 return -ENOMEM; 833 } 834 835 td->dm_dev.mode = mode; 836 td->dm_dev.bdev = NULL; 837 838 if ((r = open_table_device(td, dev, md))) { 839 mutex_unlock(&md->table_devices_lock); 840 kfree(td); 841 return r; 842 } 843 844 format_dev_t(td->dm_dev.name, dev); 845 846 atomic_set(&td->count, 0); 847 list_add(&td->list, &md->table_devices); 848 } 849 atomic_inc(&td->count); 850 mutex_unlock(&md->table_devices_lock); 851 852 *result = &td->dm_dev; 853 return 0; 854 } 855 EXPORT_SYMBOL_GPL(dm_get_table_device); 856 857 void dm_put_table_device(struct mapped_device *md, struct dm_dev *d) 858 { 859 struct table_device *td = container_of(d, struct table_device, dm_dev); 860 861 mutex_lock(&md->table_devices_lock); 862 if (atomic_dec_and_test(&td->count)) { 863 close_table_device(td, md); 864 list_del(&td->list); 865 kfree(td); 866 } 867 mutex_unlock(&md->table_devices_lock); 868 } 869 EXPORT_SYMBOL(dm_put_table_device); 870 871 static void free_table_devices(struct list_head *devices) 872 { 873 struct list_head *tmp, *next; 874 875 list_for_each_safe(tmp, next, devices) { 876 struct table_device *td = list_entry(tmp, struct table_device, list); 877 878 DMWARN("dm_destroy: %s still exists with %d references", 879 td->dm_dev.name, atomic_read(&td->count)); 880 kfree(td); 881 } 882 } 883 884 /* 885 * Get the geometry associated with a dm device 886 */ 887 int dm_get_geometry(struct mapped_device *md, struct hd_geometry *geo) 888 { 889 *geo = md->geometry; 890 891 return 0; 892 } 893 894 /* 895 * Set the geometry of a device. 896 */ 897 int dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo) 898 { 899 sector_t sz = (sector_t)geo->cylinders * geo->heads * geo->sectors; 900 901 if (geo->start > sz) { 902 DMWARN("Start sector is beyond the geometry limits."); 903 return -EINVAL; 904 } 905 906 md->geometry = *geo; 907 908 return 0; 909 } 910 911 /*----------------------------------------------------------------- 912 * CRUD START: 913 * A more elegant soln is in the works that uses the queue 914 * merge fn, unfortunately there are a couple of changes to 915 * the block layer that I want to make for this. So in the 916 * interests of getting something for people to use I give 917 * you this clearly demarcated crap. 918 *---------------------------------------------------------------*/ 919 920 static int __noflush_suspending(struct mapped_device *md) 921 { 922 return test_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); 923 } 924 925 /* 926 * Decrements the number of outstanding ios that a bio has been 927 * cloned into, completing the original io if necc. 928 */ 929 static void dec_pending(struct dm_io *io, int error) 930 { 931 unsigned long flags; 932 int io_error; 933 struct bio *bio; 934 struct mapped_device *md = io->md; 935 936 /* Push-back supersedes any I/O errors */ 937 if (unlikely(error)) { 938 spin_lock_irqsave(&io->endio_lock, flags); 939 if (!(io->error > 0 && __noflush_suspending(md))) 940 io->error = error; 941 spin_unlock_irqrestore(&io->endio_lock, flags); 942 } 943 944 if (atomic_dec_and_test(&io->io_count)) { 945 if (io->error == DM_ENDIO_REQUEUE) { 946 /* 947 * Target requested pushing back the I/O. 948 */ 949 spin_lock_irqsave(&md->deferred_lock, flags); 950 if (__noflush_suspending(md)) 951 bio_list_add_head(&md->deferred, io->bio); 952 else 953 /* noflush suspend was interrupted. */ 954 io->error = -EIO; 955 spin_unlock_irqrestore(&md->deferred_lock, flags); 956 } 957 958 io_error = io->error; 959 bio = io->bio; 960 end_io_acct(io); 961 free_io(md, io); 962 963 if (io_error == DM_ENDIO_REQUEUE) 964 return; 965 966 if ((bio->bi_rw & REQ_FLUSH) && bio->bi_iter.bi_size) { 967 /* 968 * Preflush done for flush with data, reissue 969 * without REQ_FLUSH. 970 */ 971 bio->bi_rw &= ~REQ_FLUSH; 972 queue_io(md, bio); 973 } else { 974 /* done with normal IO or empty flush */ 975 trace_block_bio_complete(md->queue, bio, io_error); 976 bio->bi_error = io_error; 977 bio_endio(bio); 978 } 979 } 980 } 981 982 static void disable_write_same(struct mapped_device *md) 983 { 984 struct queue_limits *limits = dm_get_queue_limits(md); 985 986 /* device doesn't really support WRITE SAME, disable it */ 987 limits->max_write_same_sectors = 0; 988 } 989 990 static void clone_endio(struct bio *bio) 991 { 992 int error = bio->bi_error; 993 int r = error; 994 struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone); 995 struct dm_io *io = tio->io; 996 struct mapped_device *md = tio->io->md; 997 dm_endio_fn endio = tio->ti->type->end_io; 998 999 if (endio) { 1000 r = endio(tio->ti, bio, error); 1001 if (r < 0 || r == DM_ENDIO_REQUEUE) 1002 /* 1003 * error and requeue request are handled 1004 * in dec_pending(). 1005 */ 1006 error = r; 1007 else if (r == DM_ENDIO_INCOMPLETE) 1008 /* The target will handle the io */ 1009 return; 1010 else if (r) { 1011 DMWARN("unimplemented target endio return value: %d", r); 1012 BUG(); 1013 } 1014 } 1015 1016 if (unlikely(r == -EREMOTEIO && (bio->bi_rw & REQ_WRITE_SAME) && 1017 !bdev_get_queue(bio->bi_bdev)->limits.max_write_same_sectors)) 1018 disable_write_same(md); 1019 1020 free_tio(md, tio); 1021 dec_pending(io, error); 1022 } 1023 1024 /* 1025 * Partial completion handling for request-based dm 1026 */ 1027 static void end_clone_bio(struct bio *clone) 1028 { 1029 struct dm_rq_clone_bio_info *info = 1030 container_of(clone, struct dm_rq_clone_bio_info, clone); 1031 struct dm_rq_target_io *tio = info->tio; 1032 struct bio *bio = info->orig; 1033 unsigned int nr_bytes = info->orig->bi_iter.bi_size; 1034 int error = clone->bi_error; 1035 1036 bio_put(clone); 1037 1038 if (tio->error) 1039 /* 1040 * An error has already been detected on the request. 1041 * Once error occurred, just let clone->end_io() handle 1042 * the remainder. 1043 */ 1044 return; 1045 else if (error) { 1046 /* 1047 * Don't notice the error to the upper layer yet. 1048 * The error handling decision is made by the target driver, 1049 * when the request is completed. 1050 */ 1051 tio->error = error; 1052 return; 1053 } 1054 1055 /* 1056 * I/O for the bio successfully completed. 1057 * Notice the data completion to the upper layer. 1058 */ 1059 1060 /* 1061 * bios are processed from the head of the list. 1062 * So the completing bio should always be rq->bio. 1063 * If it's not, something wrong is happening. 1064 */ 1065 if (tio->orig->bio != bio) 1066 DMERR("bio completion is going in the middle of the request"); 1067 1068 /* 1069 * Update the original request. 1070 * Do not use blk_end_request() here, because it may complete 1071 * the original request before the clone, and break the ordering. 1072 */ 1073 blk_update_request(tio->orig, 0, nr_bytes); 1074 } 1075 1076 static struct dm_rq_target_io *tio_from_request(struct request *rq) 1077 { 1078 return (rq->q->mq_ops ? blk_mq_rq_to_pdu(rq) : rq->special); 1079 } 1080 1081 static void rq_end_stats(struct mapped_device *md, struct request *orig) 1082 { 1083 if (unlikely(dm_stats_used(&md->stats))) { 1084 struct dm_rq_target_io *tio = tio_from_request(orig); 1085 tio->duration_jiffies = jiffies - tio->duration_jiffies; 1086 dm_stats_account_io(&md->stats, orig->cmd_flags, blk_rq_pos(orig), 1087 tio->n_sectors, true, tio->duration_jiffies, 1088 &tio->stats_aux); 1089 } 1090 } 1091 1092 /* 1093 * Don't touch any member of the md after calling this function because 1094 * the md may be freed in dm_put() at the end of this function. 1095 * Or do dm_get() before calling this function and dm_put() later. 1096 */ 1097 static void rq_completed(struct mapped_device *md, int rw, bool run_queue) 1098 { 1099 atomic_dec(&md->pending[rw]); 1100 1101 /* nudge anyone waiting on suspend queue */ 1102 if (!md_in_flight(md)) 1103 wake_up(&md->wait); 1104 1105 /* 1106 * Run this off this callpath, as drivers could invoke end_io while 1107 * inside their request_fn (and holding the queue lock). Calling 1108 * back into ->request_fn() could deadlock attempting to grab the 1109 * queue lock again. 1110 */ 1111 if (run_queue) { 1112 if (md->queue->mq_ops) 1113 blk_mq_run_hw_queues(md->queue, true); 1114 else 1115 blk_run_queue_async(md->queue); 1116 } 1117 1118 /* 1119 * dm_put() must be at the end of this function. See the comment above 1120 */ 1121 dm_put(md); 1122 } 1123 1124 static void free_rq_clone(struct request *clone) 1125 { 1126 struct dm_rq_target_io *tio = clone->end_io_data; 1127 struct mapped_device *md = tio->md; 1128 1129 blk_rq_unprep_clone(clone); 1130 1131 if (md->type == DM_TYPE_MQ_REQUEST_BASED) 1132 /* stacked on blk-mq queue(s) */ 1133 tio->ti->type->release_clone_rq(clone); 1134 else if (!md->queue->mq_ops) 1135 /* request_fn queue stacked on request_fn queue(s) */ 1136 free_clone_request(md, clone); 1137 /* 1138 * NOTE: for the blk-mq queue stacked on request_fn queue(s) case: 1139 * no need to call free_clone_request() because we leverage blk-mq by 1140 * allocating the clone at the end of the blk-mq pdu (see: clone_rq) 1141 */ 1142 1143 if (!md->queue->mq_ops) 1144 free_rq_tio(tio); 1145 } 1146 1147 /* 1148 * Complete the clone and the original request. 1149 * Must be called without clone's queue lock held, 1150 * see end_clone_request() for more details. 1151 */ 1152 static void dm_end_request(struct request *clone, int error) 1153 { 1154 int rw = rq_data_dir(clone); 1155 struct dm_rq_target_io *tio = clone->end_io_data; 1156 struct mapped_device *md = tio->md; 1157 struct request *rq = tio->orig; 1158 1159 if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { 1160 rq->errors = clone->errors; 1161 rq->resid_len = clone->resid_len; 1162 1163 if (rq->sense) 1164 /* 1165 * We are using the sense buffer of the original 1166 * request. 1167 * So setting the length of the sense data is enough. 1168 */ 1169 rq->sense_len = clone->sense_len; 1170 } 1171 1172 free_rq_clone(clone); 1173 rq_end_stats(md, rq); 1174 if (!rq->q->mq_ops) 1175 blk_end_request_all(rq, error); 1176 else 1177 blk_mq_end_request(rq, error); 1178 rq_completed(md, rw, true); 1179 } 1180 1181 static void dm_unprep_request(struct request *rq) 1182 { 1183 struct dm_rq_target_io *tio = tio_from_request(rq); 1184 struct request *clone = tio->clone; 1185 1186 if (!rq->q->mq_ops) { 1187 rq->special = NULL; 1188 rq->cmd_flags &= ~REQ_DONTPREP; 1189 } 1190 1191 if (clone) 1192 free_rq_clone(clone); 1193 } 1194 1195 /* 1196 * Requeue the original request of a clone. 1197 */ 1198 static void old_requeue_request(struct request *rq) 1199 { 1200 struct request_queue *q = rq->q; 1201 unsigned long flags; 1202 1203 spin_lock_irqsave(q->queue_lock, flags); 1204 blk_requeue_request(q, rq); 1205 blk_run_queue_async(q); 1206 spin_unlock_irqrestore(q->queue_lock, flags); 1207 } 1208 1209 static void dm_requeue_original_request(struct mapped_device *md, 1210 struct request *rq) 1211 { 1212 int rw = rq_data_dir(rq); 1213 1214 dm_unprep_request(rq); 1215 1216 rq_end_stats(md, rq); 1217 if (!rq->q->mq_ops) 1218 old_requeue_request(rq); 1219 else { 1220 blk_mq_requeue_request(rq); 1221 blk_mq_kick_requeue_list(rq->q); 1222 } 1223 1224 rq_completed(md, rw, false); 1225 } 1226 1227 static void old_stop_queue(struct request_queue *q) 1228 { 1229 unsigned long flags; 1230 1231 if (blk_queue_stopped(q)) 1232 return; 1233 1234 spin_lock_irqsave(q->queue_lock, flags); 1235 blk_stop_queue(q); 1236 spin_unlock_irqrestore(q->queue_lock, flags); 1237 } 1238 1239 static void stop_queue(struct request_queue *q) 1240 { 1241 if (!q->mq_ops) 1242 old_stop_queue(q); 1243 else 1244 blk_mq_stop_hw_queues(q); 1245 } 1246 1247 static void old_start_queue(struct request_queue *q) 1248 { 1249 unsigned long flags; 1250 1251 spin_lock_irqsave(q->queue_lock, flags); 1252 if (blk_queue_stopped(q)) 1253 blk_start_queue(q); 1254 spin_unlock_irqrestore(q->queue_lock, flags); 1255 } 1256 1257 static void start_queue(struct request_queue *q) 1258 { 1259 if (!q->mq_ops) 1260 old_start_queue(q); 1261 else 1262 blk_mq_start_stopped_hw_queues(q, true); 1263 } 1264 1265 static void dm_done(struct request *clone, int error, bool mapped) 1266 { 1267 int r = error; 1268 struct dm_rq_target_io *tio = clone->end_io_data; 1269 dm_request_endio_fn rq_end_io = NULL; 1270 1271 if (tio->ti) { 1272 rq_end_io = tio->ti->type->rq_end_io; 1273 1274 if (mapped && rq_end_io) 1275 r = rq_end_io(tio->ti, clone, error, &tio->info); 1276 } 1277 1278 if (unlikely(r == -EREMOTEIO && (clone->cmd_flags & REQ_WRITE_SAME) && 1279 !clone->q->limits.max_write_same_sectors)) 1280 disable_write_same(tio->md); 1281 1282 if (r <= 0) 1283 /* The target wants to complete the I/O */ 1284 dm_end_request(clone, r); 1285 else if (r == DM_ENDIO_INCOMPLETE) 1286 /* The target will handle the I/O */ 1287 return; 1288 else if (r == DM_ENDIO_REQUEUE) 1289 /* The target wants to requeue the I/O */ 1290 dm_requeue_original_request(tio->md, tio->orig); 1291 else { 1292 DMWARN("unimplemented target endio return value: %d", r); 1293 BUG(); 1294 } 1295 } 1296 1297 /* 1298 * Request completion handler for request-based dm 1299 */ 1300 static void dm_softirq_done(struct request *rq) 1301 { 1302 bool mapped = true; 1303 struct dm_rq_target_io *tio = tio_from_request(rq); 1304 struct request *clone = tio->clone; 1305 int rw; 1306 1307 if (!clone) { 1308 rq_end_stats(tio->md, rq); 1309 rw = rq_data_dir(rq); 1310 if (!rq->q->mq_ops) { 1311 blk_end_request_all(rq, tio->error); 1312 rq_completed(tio->md, rw, false); 1313 free_rq_tio(tio); 1314 } else { 1315 blk_mq_end_request(rq, tio->error); 1316 rq_completed(tio->md, rw, false); 1317 } 1318 return; 1319 } 1320 1321 if (rq->cmd_flags & REQ_FAILED) 1322 mapped = false; 1323 1324 dm_done(clone, tio->error, mapped); 1325 } 1326 1327 /* 1328 * Complete the clone and the original request with the error status 1329 * through softirq context. 1330 */ 1331 static void dm_complete_request(struct request *rq, int error) 1332 { 1333 struct dm_rq_target_io *tio = tio_from_request(rq); 1334 1335 tio->error = error; 1336 blk_complete_request(rq); 1337 } 1338 1339 /* 1340 * Complete the not-mapped clone and the original request with the error status 1341 * through softirq context. 1342 * Target's rq_end_io() function isn't called. 1343 * This may be used when the target's map_rq() or clone_and_map_rq() functions fail. 1344 */ 1345 static void dm_kill_unmapped_request(struct request *rq, int error) 1346 { 1347 rq->cmd_flags |= REQ_FAILED; 1348 dm_complete_request(rq, error); 1349 } 1350 1351 /* 1352 * Called with the clone's queue lock held (for non-blk-mq) 1353 */ 1354 static void end_clone_request(struct request *clone, int error) 1355 { 1356 struct dm_rq_target_io *tio = clone->end_io_data; 1357 1358 if (!clone->q->mq_ops) { 1359 /* 1360 * For just cleaning up the information of the queue in which 1361 * the clone was dispatched. 1362 * The clone is *NOT* freed actually here because it is alloced 1363 * from dm own mempool (REQ_ALLOCED isn't set). 1364 */ 1365 __blk_put_request(clone->q, clone); 1366 } 1367 1368 /* 1369 * Actual request completion is done in a softirq context which doesn't 1370 * hold the clone's queue lock. Otherwise, deadlock could occur because: 1371 * - another request may be submitted by the upper level driver 1372 * of the stacking during the completion 1373 * - the submission which requires queue lock may be done 1374 * against this clone's queue 1375 */ 1376 dm_complete_request(tio->orig, error); 1377 } 1378 1379 /* 1380 * Return maximum size of I/O possible at the supplied sector up to the current 1381 * target boundary. 1382 */ 1383 static sector_t max_io_len_target_boundary(sector_t sector, struct dm_target *ti) 1384 { 1385 sector_t target_offset = dm_target_offset(ti, sector); 1386 1387 return ti->len - target_offset; 1388 } 1389 1390 static sector_t max_io_len(sector_t sector, struct dm_target *ti) 1391 { 1392 sector_t len = max_io_len_target_boundary(sector, ti); 1393 sector_t offset, max_len; 1394 1395 /* 1396 * Does the target need to split even further? 1397 */ 1398 if (ti->max_io_len) { 1399 offset = dm_target_offset(ti, sector); 1400 if (unlikely(ti->max_io_len & (ti->max_io_len - 1))) 1401 max_len = sector_div(offset, ti->max_io_len); 1402 else 1403 max_len = offset & (ti->max_io_len - 1); 1404 max_len = ti->max_io_len - max_len; 1405 1406 if (len > max_len) 1407 len = max_len; 1408 } 1409 1410 return len; 1411 } 1412 1413 int dm_set_target_max_io_len(struct dm_target *ti, sector_t len) 1414 { 1415 if (len > UINT_MAX) { 1416 DMERR("Specified maximum size of target IO (%llu) exceeds limit (%u)", 1417 (unsigned long long)len, UINT_MAX); 1418 ti->error = "Maximum size of target IO is too large"; 1419 return -EINVAL; 1420 } 1421 1422 ti->max_io_len = (uint32_t) len; 1423 1424 return 0; 1425 } 1426 EXPORT_SYMBOL_GPL(dm_set_target_max_io_len); 1427 1428 /* 1429 * A target may call dm_accept_partial_bio only from the map routine. It is 1430 * allowed for all bio types except REQ_FLUSH. 1431 * 1432 * dm_accept_partial_bio informs the dm that the target only wants to process 1433 * additional n_sectors sectors of the bio and the rest of the data should be 1434 * sent in a next bio. 1435 * 1436 * A diagram that explains the arithmetics: 1437 * +--------------------+---------------+-------+ 1438 * | 1 | 2 | 3 | 1439 * +--------------------+---------------+-------+ 1440 * 1441 * <-------------- *tio->len_ptr ---------------> 1442 * <------- bi_size -------> 1443 * <-- n_sectors --> 1444 * 1445 * Region 1 was already iterated over with bio_advance or similar function. 1446 * (it may be empty if the target doesn't use bio_advance) 1447 * Region 2 is the remaining bio size that the target wants to process. 1448 * (it may be empty if region 1 is non-empty, although there is no reason 1449 * to make it empty) 1450 * The target requires that region 3 is to be sent in the next bio. 1451 * 1452 * If the target wants to receive multiple copies of the bio (via num_*bios, etc), 1453 * the partially processed part (the sum of regions 1+2) must be the same for all 1454 * copies of the bio. 1455 */ 1456 void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors) 1457 { 1458 struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone); 1459 unsigned bi_size = bio->bi_iter.bi_size >> SECTOR_SHIFT; 1460 BUG_ON(bio->bi_rw & REQ_FLUSH); 1461 BUG_ON(bi_size > *tio->len_ptr); 1462 BUG_ON(n_sectors > bi_size); 1463 *tio->len_ptr -= bi_size - n_sectors; 1464 bio->bi_iter.bi_size = n_sectors << SECTOR_SHIFT; 1465 } 1466 EXPORT_SYMBOL_GPL(dm_accept_partial_bio); 1467 1468 static void __map_bio(struct dm_target_io *tio) 1469 { 1470 int r; 1471 sector_t sector; 1472 struct mapped_device *md; 1473 struct bio *clone = &tio->clone; 1474 struct dm_target *ti = tio->ti; 1475 1476 clone->bi_end_io = clone_endio; 1477 1478 /* 1479 * Map the clone. If r == 0 we don't need to do 1480 * anything, the target has assumed ownership of 1481 * this io. 1482 */ 1483 atomic_inc(&tio->io->io_count); 1484 sector = clone->bi_iter.bi_sector; 1485 r = ti->type->map(ti, clone); 1486 if (r == DM_MAPIO_REMAPPED) { 1487 /* the bio has been remapped so dispatch it */ 1488 1489 trace_block_bio_remap(bdev_get_queue(clone->bi_bdev), clone, 1490 tio->io->bio->bi_bdev->bd_dev, sector); 1491 1492 generic_make_request(clone); 1493 } else if (r < 0 || r == DM_MAPIO_REQUEUE) { 1494 /* error the io and bail out, or requeue it if needed */ 1495 md = tio->io->md; 1496 dec_pending(tio->io, r); 1497 free_tio(md, tio); 1498 } else if (r != DM_MAPIO_SUBMITTED) { 1499 DMWARN("unimplemented target map return value: %d", r); 1500 BUG(); 1501 } 1502 } 1503 1504 struct clone_info { 1505 struct mapped_device *md; 1506 struct dm_table *map; 1507 struct bio *bio; 1508 struct dm_io *io; 1509 sector_t sector; 1510 unsigned sector_count; 1511 }; 1512 1513 static void bio_setup_sector(struct bio *bio, sector_t sector, unsigned len) 1514 { 1515 bio->bi_iter.bi_sector = sector; 1516 bio->bi_iter.bi_size = to_bytes(len); 1517 } 1518 1519 /* 1520 * Creates a bio that consists of range of complete bvecs. 1521 */ 1522 static void clone_bio(struct dm_target_io *tio, struct bio *bio, 1523 sector_t sector, unsigned len) 1524 { 1525 struct bio *clone = &tio->clone; 1526 1527 __bio_clone_fast(clone, bio); 1528 1529 if (bio_integrity(bio)) 1530 bio_integrity_clone(clone, bio, GFP_NOIO); 1531 1532 bio_advance(clone, to_bytes(sector - clone->bi_iter.bi_sector)); 1533 clone->bi_iter.bi_size = to_bytes(len); 1534 1535 if (bio_integrity(bio)) 1536 bio_integrity_trim(clone, 0, len); 1537 } 1538 1539 static struct dm_target_io *alloc_tio(struct clone_info *ci, 1540 struct dm_target *ti, 1541 unsigned target_bio_nr) 1542 { 1543 struct dm_target_io *tio; 1544 struct bio *clone; 1545 1546 clone = bio_alloc_bioset(GFP_NOIO, 0, ci->md->bs); 1547 tio = container_of(clone, struct dm_target_io, clone); 1548 1549 tio->io = ci->io; 1550 tio->ti = ti; 1551 tio->target_bio_nr = target_bio_nr; 1552 1553 return tio; 1554 } 1555 1556 static void __clone_and_map_simple_bio(struct clone_info *ci, 1557 struct dm_target *ti, 1558 unsigned target_bio_nr, unsigned *len) 1559 { 1560 struct dm_target_io *tio = alloc_tio(ci, ti, target_bio_nr); 1561 struct bio *clone = &tio->clone; 1562 1563 tio->len_ptr = len; 1564 1565 __bio_clone_fast(clone, ci->bio); 1566 if (len) 1567 bio_setup_sector(clone, ci->sector, *len); 1568 1569 __map_bio(tio); 1570 } 1571 1572 static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti, 1573 unsigned num_bios, unsigned *len) 1574 { 1575 unsigned target_bio_nr; 1576 1577 for (target_bio_nr = 0; target_bio_nr < num_bios; target_bio_nr++) 1578 __clone_and_map_simple_bio(ci, ti, target_bio_nr, len); 1579 } 1580 1581 static int __send_empty_flush(struct clone_info *ci) 1582 { 1583 unsigned target_nr = 0; 1584 struct dm_target *ti; 1585 1586 BUG_ON(bio_has_data(ci->bio)); 1587 while ((ti = dm_table_get_target(ci->map, target_nr++))) 1588 __send_duplicate_bios(ci, ti, ti->num_flush_bios, NULL); 1589 1590 return 0; 1591 } 1592 1593 static void __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti, 1594 sector_t sector, unsigned *len) 1595 { 1596 struct bio *bio = ci->bio; 1597 struct dm_target_io *tio; 1598 unsigned target_bio_nr; 1599 unsigned num_target_bios = 1; 1600 1601 /* 1602 * Does the target want to receive duplicate copies of the bio? 1603 */ 1604 if (bio_data_dir(bio) == WRITE && ti->num_write_bios) 1605 num_target_bios = ti->num_write_bios(ti, bio); 1606 1607 for (target_bio_nr = 0; target_bio_nr < num_target_bios; target_bio_nr++) { 1608 tio = alloc_tio(ci, ti, target_bio_nr); 1609 tio->len_ptr = len; 1610 clone_bio(tio, bio, sector, *len); 1611 __map_bio(tio); 1612 } 1613 } 1614 1615 typedef unsigned (*get_num_bios_fn)(struct dm_target *ti); 1616 1617 static unsigned get_num_discard_bios(struct dm_target *ti) 1618 { 1619 return ti->num_discard_bios; 1620 } 1621 1622 static unsigned get_num_write_same_bios(struct dm_target *ti) 1623 { 1624 return ti->num_write_same_bios; 1625 } 1626 1627 typedef bool (*is_split_required_fn)(struct dm_target *ti); 1628 1629 static bool is_split_required_for_discard(struct dm_target *ti) 1630 { 1631 return ti->split_discard_bios; 1632 } 1633 1634 static int __send_changing_extent_only(struct clone_info *ci, 1635 get_num_bios_fn get_num_bios, 1636 is_split_required_fn is_split_required) 1637 { 1638 struct dm_target *ti; 1639 unsigned len; 1640 unsigned num_bios; 1641 1642 do { 1643 ti = dm_table_find_target(ci->map, ci->sector); 1644 if (!dm_target_is_valid(ti)) 1645 return -EIO; 1646 1647 /* 1648 * Even though the device advertised support for this type of 1649 * request, that does not mean every target supports it, and 1650 * reconfiguration might also have changed that since the 1651 * check was performed. 1652 */ 1653 num_bios = get_num_bios ? get_num_bios(ti) : 0; 1654 if (!num_bios) 1655 return -EOPNOTSUPP; 1656 1657 if (is_split_required && !is_split_required(ti)) 1658 len = min((sector_t)ci->sector_count, max_io_len_target_boundary(ci->sector, ti)); 1659 else 1660 len = min((sector_t)ci->sector_count, max_io_len(ci->sector, ti)); 1661 1662 __send_duplicate_bios(ci, ti, num_bios, &len); 1663 1664 ci->sector += len; 1665 } while (ci->sector_count -= len); 1666 1667 return 0; 1668 } 1669 1670 static int __send_discard(struct clone_info *ci) 1671 { 1672 return __send_changing_extent_only(ci, get_num_discard_bios, 1673 is_split_required_for_discard); 1674 } 1675 1676 static int __send_write_same(struct clone_info *ci) 1677 { 1678 return __send_changing_extent_only(ci, get_num_write_same_bios, NULL); 1679 } 1680 1681 /* 1682 * Select the correct strategy for processing a non-flush bio. 1683 */ 1684 static int __split_and_process_non_flush(struct clone_info *ci) 1685 { 1686 struct bio *bio = ci->bio; 1687 struct dm_target *ti; 1688 unsigned len; 1689 1690 if (unlikely(bio->bi_rw & REQ_DISCARD)) 1691 return __send_discard(ci); 1692 else if (unlikely(bio->bi_rw & REQ_WRITE_SAME)) 1693 return __send_write_same(ci); 1694 1695 ti = dm_table_find_target(ci->map, ci->sector); 1696 if (!dm_target_is_valid(ti)) 1697 return -EIO; 1698 1699 len = min_t(sector_t, max_io_len(ci->sector, ti), ci->sector_count); 1700 1701 __clone_and_map_data_bio(ci, ti, ci->sector, &len); 1702 1703 ci->sector += len; 1704 ci->sector_count -= len; 1705 1706 return 0; 1707 } 1708 1709 /* 1710 * Entry point to split a bio into clones and submit them to the targets. 1711 */ 1712 static void __split_and_process_bio(struct mapped_device *md, 1713 struct dm_table *map, struct bio *bio) 1714 { 1715 struct clone_info ci; 1716 int error = 0; 1717 1718 if (unlikely(!map)) { 1719 bio_io_error(bio); 1720 return; 1721 } 1722 1723 ci.map = map; 1724 ci.md = md; 1725 ci.io = alloc_io(md); 1726 ci.io->error = 0; 1727 atomic_set(&ci.io->io_count, 1); 1728 ci.io->bio = bio; 1729 ci.io->md = md; 1730 spin_lock_init(&ci.io->endio_lock); 1731 ci.sector = bio->bi_iter.bi_sector; 1732 1733 start_io_acct(ci.io); 1734 1735 if (bio->bi_rw & REQ_FLUSH) { 1736 ci.bio = &ci.md->flush_bio; 1737 ci.sector_count = 0; 1738 error = __send_empty_flush(&ci); 1739 /* dec_pending submits any data associated with flush */ 1740 } else { 1741 ci.bio = bio; 1742 ci.sector_count = bio_sectors(bio); 1743 while (ci.sector_count && !error) 1744 error = __split_and_process_non_flush(&ci); 1745 } 1746 1747 /* drop the extra reference count */ 1748 dec_pending(ci.io, error); 1749 } 1750 /*----------------------------------------------------------------- 1751 * CRUD END 1752 *---------------------------------------------------------------*/ 1753 1754 /* 1755 * The request function that just remaps the bio built up by 1756 * dm_merge_bvec. 1757 */ 1758 static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio) 1759 { 1760 int rw = bio_data_dir(bio); 1761 struct mapped_device *md = q->queuedata; 1762 int srcu_idx; 1763 struct dm_table *map; 1764 1765 map = dm_get_live_table(md, &srcu_idx); 1766 1767 generic_start_io_acct(rw, bio_sectors(bio), &dm_disk(md)->part0); 1768 1769 /* if we're suspended, we have to queue this io for later */ 1770 if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) { 1771 dm_put_live_table(md, srcu_idx); 1772 1773 if (bio_rw(bio) != READA) 1774 queue_io(md, bio); 1775 else 1776 bio_io_error(bio); 1777 return BLK_QC_T_NONE; 1778 } 1779 1780 __split_and_process_bio(md, map, bio); 1781 dm_put_live_table(md, srcu_idx); 1782 return BLK_QC_T_NONE; 1783 } 1784 1785 int dm_request_based(struct mapped_device *md) 1786 { 1787 return blk_queue_stackable(md->queue); 1788 } 1789 1790 static void dm_dispatch_clone_request(struct request *clone, struct request *rq) 1791 { 1792 int r; 1793 1794 if (blk_queue_io_stat(clone->q)) 1795 clone->cmd_flags |= REQ_IO_STAT; 1796 1797 clone->start_time = jiffies; 1798 r = blk_insert_cloned_request(clone->q, clone); 1799 if (r) 1800 /* must complete clone in terms of original request */ 1801 dm_complete_request(rq, r); 1802 } 1803 1804 static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig, 1805 void *data) 1806 { 1807 struct dm_rq_target_io *tio = data; 1808 struct dm_rq_clone_bio_info *info = 1809 container_of(bio, struct dm_rq_clone_bio_info, clone); 1810 1811 info->orig = bio_orig; 1812 info->tio = tio; 1813 bio->bi_end_io = end_clone_bio; 1814 1815 return 0; 1816 } 1817 1818 static int setup_clone(struct request *clone, struct request *rq, 1819 struct dm_rq_target_io *tio, gfp_t gfp_mask) 1820 { 1821 int r; 1822 1823 r = blk_rq_prep_clone(clone, rq, tio->md->bs, gfp_mask, 1824 dm_rq_bio_constructor, tio); 1825 if (r) 1826 return r; 1827 1828 clone->cmd = rq->cmd; 1829 clone->cmd_len = rq->cmd_len; 1830 clone->sense = rq->sense; 1831 clone->end_io = end_clone_request; 1832 clone->end_io_data = tio; 1833 1834 tio->clone = clone; 1835 1836 return 0; 1837 } 1838 1839 static struct request *clone_rq(struct request *rq, struct mapped_device *md, 1840 struct dm_rq_target_io *tio, gfp_t gfp_mask) 1841 { 1842 /* 1843 * Do not allocate a clone if tio->clone was already set 1844 * (see: dm_mq_queue_rq). 1845 */ 1846 bool alloc_clone = !tio->clone; 1847 struct request *clone; 1848 1849 if (alloc_clone) { 1850 clone = alloc_clone_request(md, gfp_mask); 1851 if (!clone) 1852 return NULL; 1853 } else 1854 clone = tio->clone; 1855 1856 blk_rq_init(NULL, clone); 1857 if (setup_clone(clone, rq, tio, gfp_mask)) { 1858 /* -ENOMEM */ 1859 if (alloc_clone) 1860 free_clone_request(md, clone); 1861 return NULL; 1862 } 1863 1864 return clone; 1865 } 1866 1867 static void map_tio_request(struct kthread_work *work); 1868 1869 static void init_tio(struct dm_rq_target_io *tio, struct request *rq, 1870 struct mapped_device *md) 1871 { 1872 tio->md = md; 1873 tio->ti = NULL; 1874 tio->clone = NULL; 1875 tio->orig = rq; 1876 tio->error = 0; 1877 memset(&tio->info, 0, sizeof(tio->info)); 1878 if (md->kworker_task) 1879 init_kthread_work(&tio->work, map_tio_request); 1880 } 1881 1882 static struct dm_rq_target_io *prep_tio(struct request *rq, 1883 struct mapped_device *md, gfp_t gfp_mask) 1884 { 1885 struct dm_rq_target_io *tio; 1886 int srcu_idx; 1887 struct dm_table *table; 1888 1889 tio = alloc_rq_tio(md, gfp_mask); 1890 if (!tio) 1891 return NULL; 1892 1893 init_tio(tio, rq, md); 1894 1895 table = dm_get_live_table(md, &srcu_idx); 1896 if (!dm_table_mq_request_based(table)) { 1897 if (!clone_rq(rq, md, tio, gfp_mask)) { 1898 dm_put_live_table(md, srcu_idx); 1899 free_rq_tio(tio); 1900 return NULL; 1901 } 1902 } 1903 dm_put_live_table(md, srcu_idx); 1904 1905 return tio; 1906 } 1907 1908 /* 1909 * Called with the queue lock held. 1910 */ 1911 static int dm_prep_fn(struct request_queue *q, struct request *rq) 1912 { 1913 struct mapped_device *md = q->queuedata; 1914 struct dm_rq_target_io *tio; 1915 1916 if (unlikely(rq->special)) { 1917 DMWARN("Already has something in rq->special."); 1918 return BLKPREP_KILL; 1919 } 1920 1921 tio = prep_tio(rq, md, GFP_ATOMIC); 1922 if (!tio) 1923 return BLKPREP_DEFER; 1924 1925 rq->special = tio; 1926 rq->cmd_flags |= REQ_DONTPREP; 1927 1928 return BLKPREP_OK; 1929 } 1930 1931 /* 1932 * Returns: 1933 * 0 : the request has been processed 1934 * DM_MAPIO_REQUEUE : the original request needs to be requeued 1935 * < 0 : the request was completed due to failure 1936 */ 1937 static int map_request(struct dm_rq_target_io *tio, struct request *rq, 1938 struct mapped_device *md) 1939 { 1940 int r; 1941 struct dm_target *ti = tio->ti; 1942 struct request *clone = NULL; 1943 1944 if (tio->clone) { 1945 clone = tio->clone; 1946 r = ti->type->map_rq(ti, clone, &tio->info); 1947 } else { 1948 r = ti->type->clone_and_map_rq(ti, rq, &tio->info, &clone); 1949 if (r < 0) { 1950 /* The target wants to complete the I/O */ 1951 dm_kill_unmapped_request(rq, r); 1952 return r; 1953 } 1954 if (r != DM_MAPIO_REMAPPED) 1955 return r; 1956 if (setup_clone(clone, rq, tio, GFP_ATOMIC)) { 1957 /* -ENOMEM */ 1958 ti->type->release_clone_rq(clone); 1959 return DM_MAPIO_REQUEUE; 1960 } 1961 } 1962 1963 switch (r) { 1964 case DM_MAPIO_SUBMITTED: 1965 /* The target has taken the I/O to submit by itself later */ 1966 break; 1967 case DM_MAPIO_REMAPPED: 1968 /* The target has remapped the I/O so dispatch it */ 1969 trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)), 1970 blk_rq_pos(rq)); 1971 dm_dispatch_clone_request(clone, rq); 1972 break; 1973 case DM_MAPIO_REQUEUE: 1974 /* The target wants to requeue the I/O */ 1975 dm_requeue_original_request(md, tio->orig); 1976 break; 1977 default: 1978 if (r > 0) { 1979 DMWARN("unimplemented target map return value: %d", r); 1980 BUG(); 1981 } 1982 1983 /* The target wants to complete the I/O */ 1984 dm_kill_unmapped_request(rq, r); 1985 return r; 1986 } 1987 1988 return 0; 1989 } 1990 1991 static void map_tio_request(struct kthread_work *work) 1992 { 1993 struct dm_rq_target_io *tio = container_of(work, struct dm_rq_target_io, work); 1994 struct request *rq = tio->orig; 1995 struct mapped_device *md = tio->md; 1996 1997 if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE) 1998 dm_requeue_original_request(md, rq); 1999 } 2000 2001 static void dm_start_request(struct mapped_device *md, struct request *orig) 2002 { 2003 if (!orig->q->mq_ops) 2004 blk_start_request(orig); 2005 else 2006 blk_mq_start_request(orig); 2007 atomic_inc(&md->pending[rq_data_dir(orig)]); 2008 2009 if (md->seq_rq_merge_deadline_usecs) { 2010 md->last_rq_pos = rq_end_sector(orig); 2011 md->last_rq_rw = rq_data_dir(orig); 2012 md->last_rq_start_time = ktime_get(); 2013 } 2014 2015 if (unlikely(dm_stats_used(&md->stats))) { 2016 struct dm_rq_target_io *tio = tio_from_request(orig); 2017 tio->duration_jiffies = jiffies; 2018 tio->n_sectors = blk_rq_sectors(orig); 2019 dm_stats_account_io(&md->stats, orig->cmd_flags, blk_rq_pos(orig), 2020 tio->n_sectors, false, 0, &tio->stats_aux); 2021 } 2022 2023 /* 2024 * Hold the md reference here for the in-flight I/O. 2025 * We can't rely on the reference count by device opener, 2026 * because the device may be closed during the request completion 2027 * when all bios are completed. 2028 * See the comment in rq_completed() too. 2029 */ 2030 dm_get(md); 2031 } 2032 2033 #define MAX_SEQ_RQ_MERGE_DEADLINE_USECS 100000 2034 2035 ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf) 2036 { 2037 return sprintf(buf, "%u\n", md->seq_rq_merge_deadline_usecs); 2038 } 2039 2040 ssize_t dm_attr_rq_based_seq_io_merge_deadline_store(struct mapped_device *md, 2041 const char *buf, size_t count) 2042 { 2043 unsigned deadline; 2044 2045 if (!dm_request_based(md) || md->use_blk_mq) 2046 return count; 2047 2048 if (kstrtouint(buf, 10, &deadline)) 2049 return -EINVAL; 2050 2051 if (deadline > MAX_SEQ_RQ_MERGE_DEADLINE_USECS) 2052 deadline = MAX_SEQ_RQ_MERGE_DEADLINE_USECS; 2053 2054 md->seq_rq_merge_deadline_usecs = deadline; 2055 2056 return count; 2057 } 2058 2059 static bool dm_request_peeked_before_merge_deadline(struct mapped_device *md) 2060 { 2061 ktime_t kt_deadline; 2062 2063 if (!md->seq_rq_merge_deadline_usecs) 2064 return false; 2065 2066 kt_deadline = ns_to_ktime((u64)md->seq_rq_merge_deadline_usecs * NSEC_PER_USEC); 2067 kt_deadline = ktime_add_safe(md->last_rq_start_time, kt_deadline); 2068 2069 return !ktime_after(ktime_get(), kt_deadline); 2070 } 2071 2072 /* 2073 * q->request_fn for request-based dm. 2074 * Called with the queue lock held. 2075 */ 2076 static void dm_request_fn(struct request_queue *q) 2077 { 2078 struct mapped_device *md = q->queuedata; 2079 int srcu_idx; 2080 struct dm_table *map = dm_get_live_table(md, &srcu_idx); 2081 struct dm_target *ti; 2082 struct request *rq; 2083 struct dm_rq_target_io *tio; 2084 sector_t pos; 2085 2086 /* 2087 * For suspend, check blk_queue_stopped() and increment 2088 * ->pending within a single queue_lock not to increment the 2089 * number of in-flight I/Os after the queue is stopped in 2090 * dm_suspend(). 2091 */ 2092 while (!blk_queue_stopped(q)) { 2093 rq = blk_peek_request(q); 2094 if (!rq) 2095 goto out; 2096 2097 /* always use block 0 to find the target for flushes for now */ 2098 pos = 0; 2099 if (!(rq->cmd_flags & REQ_FLUSH)) 2100 pos = blk_rq_pos(rq); 2101 2102 ti = dm_table_find_target(map, pos); 2103 if (!dm_target_is_valid(ti)) { 2104 /* 2105 * Must perform setup, that rq_completed() requires, 2106 * before calling dm_kill_unmapped_request 2107 */ 2108 DMERR_LIMIT("request attempted access beyond the end of device"); 2109 dm_start_request(md, rq); 2110 dm_kill_unmapped_request(rq, -EIO); 2111 continue; 2112 } 2113 2114 if (dm_request_peeked_before_merge_deadline(md) && 2115 md_in_flight(md) && rq->bio && rq->bio->bi_vcnt == 1 && 2116 md->last_rq_pos == pos && md->last_rq_rw == rq_data_dir(rq)) 2117 goto delay_and_out; 2118 2119 if (ti->type->busy && ti->type->busy(ti)) 2120 goto delay_and_out; 2121 2122 dm_start_request(md, rq); 2123 2124 tio = tio_from_request(rq); 2125 /* Establish tio->ti before queuing work (map_tio_request) */ 2126 tio->ti = ti; 2127 queue_kthread_work(&md->kworker, &tio->work); 2128 BUG_ON(!irqs_disabled()); 2129 } 2130 2131 goto out; 2132 2133 delay_and_out: 2134 blk_delay_queue(q, HZ / 100); 2135 out: 2136 dm_put_live_table(md, srcu_idx); 2137 } 2138 2139 static int dm_any_congested(void *congested_data, int bdi_bits) 2140 { 2141 int r = bdi_bits; 2142 struct mapped_device *md = congested_data; 2143 struct dm_table *map; 2144 2145 if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { 2146 map = dm_get_live_table_fast(md); 2147 if (map) { 2148 /* 2149 * Request-based dm cares about only own queue for 2150 * the query about congestion status of request_queue 2151 */ 2152 if (dm_request_based(md)) 2153 r = md->queue->backing_dev_info.wb.state & 2154 bdi_bits; 2155 else 2156 r = dm_table_any_congested(map, bdi_bits); 2157 } 2158 dm_put_live_table_fast(md); 2159 } 2160 2161 return r; 2162 } 2163 2164 /*----------------------------------------------------------------- 2165 * An IDR is used to keep track of allocated minor numbers. 2166 *---------------------------------------------------------------*/ 2167 static void free_minor(int minor) 2168 { 2169 spin_lock(&_minor_lock); 2170 idr_remove(&_minor_idr, minor); 2171 spin_unlock(&_minor_lock); 2172 } 2173 2174 /* 2175 * See if the device with a specific minor # is free. 2176 */ 2177 static int specific_minor(int minor) 2178 { 2179 int r; 2180 2181 if (minor >= (1 << MINORBITS)) 2182 return -EINVAL; 2183 2184 idr_preload(GFP_KERNEL); 2185 spin_lock(&_minor_lock); 2186 2187 r = idr_alloc(&_minor_idr, MINOR_ALLOCED, minor, minor + 1, GFP_NOWAIT); 2188 2189 spin_unlock(&_minor_lock); 2190 idr_preload_end(); 2191 if (r < 0) 2192 return r == -ENOSPC ? -EBUSY : r; 2193 return 0; 2194 } 2195 2196 static int next_free_minor(int *minor) 2197 { 2198 int r; 2199 2200 idr_preload(GFP_KERNEL); 2201 spin_lock(&_minor_lock); 2202 2203 r = idr_alloc(&_minor_idr, MINOR_ALLOCED, 0, 1 << MINORBITS, GFP_NOWAIT); 2204 2205 spin_unlock(&_minor_lock); 2206 idr_preload_end(); 2207 if (r < 0) 2208 return r; 2209 *minor = r; 2210 return 0; 2211 } 2212 2213 static const struct block_device_operations dm_blk_dops; 2214 2215 static void dm_wq_work(struct work_struct *work); 2216 2217 static void dm_init_md_queue(struct mapped_device *md) 2218 { 2219 /* 2220 * Request-based dm devices cannot be stacked on top of bio-based dm 2221 * devices. The type of this dm device may not have been decided yet. 2222 * The type is decided at the first table loading time. 2223 * To prevent problematic device stacking, clear the queue flag 2224 * for request stacking support until then. 2225 * 2226 * This queue is new, so no concurrency on the queue_flags. 2227 */ 2228 queue_flag_clear_unlocked(QUEUE_FLAG_STACKABLE, md->queue); 2229 2230 /* 2231 * Initialize data that will only be used by a non-blk-mq DM queue 2232 * - must do so here (in alloc_dev callchain) before queue is used 2233 */ 2234 md->queue->queuedata = md; 2235 md->queue->backing_dev_info.congested_data = md; 2236 } 2237 2238 static void dm_init_old_md_queue(struct mapped_device *md) 2239 { 2240 md->use_blk_mq = false; 2241 dm_init_md_queue(md); 2242 2243 /* 2244 * Initialize aspects of queue that aren't relevant for blk-mq 2245 */ 2246 md->queue->backing_dev_info.congested_fn = dm_any_congested; 2247 blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); 2248 } 2249 2250 static void cleanup_mapped_device(struct mapped_device *md) 2251 { 2252 if (md->wq) 2253 destroy_workqueue(md->wq); 2254 if (md->kworker_task) 2255 kthread_stop(md->kworker_task); 2256 mempool_destroy(md->io_pool); 2257 mempool_destroy(md->rq_pool); 2258 if (md->bs) 2259 bioset_free(md->bs); 2260 2261 cleanup_srcu_struct(&md->io_barrier); 2262 2263 if (md->disk) { 2264 spin_lock(&_minor_lock); 2265 md->disk->private_data = NULL; 2266 spin_unlock(&_minor_lock); 2267 del_gendisk(md->disk); 2268 put_disk(md->disk); 2269 } 2270 2271 if (md->queue) 2272 blk_cleanup_queue(md->queue); 2273 2274 if (md->bdev) { 2275 bdput(md->bdev); 2276 md->bdev = NULL; 2277 } 2278 } 2279 2280 /* 2281 * Allocate and initialise a blank device with a given minor. 2282 */ 2283 static struct mapped_device *alloc_dev(int minor) 2284 { 2285 int r; 2286 struct mapped_device *md = kzalloc(sizeof(*md), GFP_KERNEL); 2287 void *old_md; 2288 2289 if (!md) { 2290 DMWARN("unable to allocate device, out of memory."); 2291 return NULL; 2292 } 2293 2294 if (!try_module_get(THIS_MODULE)) 2295 goto bad_module_get; 2296 2297 /* get a minor number for the dev */ 2298 if (minor == DM_ANY_MINOR) 2299 r = next_free_minor(&minor); 2300 else 2301 r = specific_minor(minor); 2302 if (r < 0) 2303 goto bad_minor; 2304 2305 r = init_srcu_struct(&md->io_barrier); 2306 if (r < 0) 2307 goto bad_io_barrier; 2308 2309 md->use_blk_mq = use_blk_mq; 2310 md->type = DM_TYPE_NONE; 2311 mutex_init(&md->suspend_lock); 2312 mutex_init(&md->type_lock); 2313 mutex_init(&md->table_devices_lock); 2314 spin_lock_init(&md->deferred_lock); 2315 atomic_set(&md->holders, 1); 2316 atomic_set(&md->open_count, 0); 2317 atomic_set(&md->event_nr, 0); 2318 atomic_set(&md->uevent_seq, 0); 2319 INIT_LIST_HEAD(&md->uevent_list); 2320 INIT_LIST_HEAD(&md->table_devices); 2321 spin_lock_init(&md->uevent_lock); 2322 2323 md->queue = blk_alloc_queue(GFP_KERNEL); 2324 if (!md->queue) 2325 goto bad; 2326 2327 dm_init_md_queue(md); 2328 2329 md->disk = alloc_disk(1); 2330 if (!md->disk) 2331 goto bad; 2332 2333 atomic_set(&md->pending[0], 0); 2334 atomic_set(&md->pending[1], 0); 2335 init_waitqueue_head(&md->wait); 2336 INIT_WORK(&md->work, dm_wq_work); 2337 init_waitqueue_head(&md->eventq); 2338 init_completion(&md->kobj_holder.completion); 2339 md->kworker_task = NULL; 2340 2341 md->disk->major = _major; 2342 md->disk->first_minor = minor; 2343 md->disk->fops = &dm_blk_dops; 2344 md->disk->queue = md->queue; 2345 md->disk->private_data = md; 2346 sprintf(md->disk->disk_name, "dm-%d", minor); 2347 add_disk(md->disk); 2348 format_dev_t(md->name, MKDEV(_major, minor)); 2349 2350 md->wq = alloc_workqueue("kdmflush", WQ_MEM_RECLAIM, 0); 2351 if (!md->wq) 2352 goto bad; 2353 2354 md->bdev = bdget_disk(md->disk, 0); 2355 if (!md->bdev) 2356 goto bad; 2357 2358 bio_init(&md->flush_bio); 2359 md->flush_bio.bi_bdev = md->bdev; 2360 md->flush_bio.bi_rw = WRITE_FLUSH; 2361 2362 dm_stats_init(&md->stats); 2363 2364 /* Populate the mapping, nobody knows we exist yet */ 2365 spin_lock(&_minor_lock); 2366 old_md = idr_replace(&_minor_idr, md, minor); 2367 spin_unlock(&_minor_lock); 2368 2369 BUG_ON(old_md != MINOR_ALLOCED); 2370 2371 return md; 2372 2373 bad: 2374 cleanup_mapped_device(md); 2375 bad_io_barrier: 2376 free_minor(minor); 2377 bad_minor: 2378 module_put(THIS_MODULE); 2379 bad_module_get: 2380 kfree(md); 2381 return NULL; 2382 } 2383 2384 static void unlock_fs(struct mapped_device *md); 2385 2386 static void free_dev(struct mapped_device *md) 2387 { 2388 int minor = MINOR(disk_devt(md->disk)); 2389 2390 unlock_fs(md); 2391 2392 cleanup_mapped_device(md); 2393 if (md->use_blk_mq) 2394 blk_mq_free_tag_set(&md->tag_set); 2395 2396 free_table_devices(&md->table_devices); 2397 dm_stats_cleanup(&md->stats); 2398 free_minor(minor); 2399 2400 module_put(THIS_MODULE); 2401 kfree(md); 2402 } 2403 2404 static void __bind_mempools(struct mapped_device *md, struct dm_table *t) 2405 { 2406 struct dm_md_mempools *p = dm_table_get_md_mempools(t); 2407 2408 if (md->bs) { 2409 /* The md already has necessary mempools. */ 2410 if (dm_table_get_type(t) == DM_TYPE_BIO_BASED) { 2411 /* 2412 * Reload bioset because front_pad may have changed 2413 * because a different table was loaded. 2414 */ 2415 bioset_free(md->bs); 2416 md->bs = p->bs; 2417 p->bs = NULL; 2418 } 2419 /* 2420 * There's no need to reload with request-based dm 2421 * because the size of front_pad doesn't change. 2422 * Note for future: If you are to reload bioset, 2423 * prep-ed requests in the queue may refer 2424 * to bio from the old bioset, so you must walk 2425 * through the queue to unprep. 2426 */ 2427 goto out; 2428 } 2429 2430 BUG_ON(!p || md->io_pool || md->rq_pool || md->bs); 2431 2432 md->io_pool = p->io_pool; 2433 p->io_pool = NULL; 2434 md->rq_pool = p->rq_pool; 2435 p->rq_pool = NULL; 2436 md->bs = p->bs; 2437 p->bs = NULL; 2438 2439 out: 2440 /* mempool bind completed, no longer need any mempools in the table */ 2441 dm_table_free_md_mempools(t); 2442 } 2443 2444 /* 2445 * Bind a table to the device. 2446 */ 2447 static void event_callback(void *context) 2448 { 2449 unsigned long flags; 2450 LIST_HEAD(uevents); 2451 struct mapped_device *md = (struct mapped_device *) context; 2452 2453 spin_lock_irqsave(&md->uevent_lock, flags); 2454 list_splice_init(&md->uevent_list, &uevents); 2455 spin_unlock_irqrestore(&md->uevent_lock, flags); 2456 2457 dm_send_uevents(&uevents, &disk_to_dev(md->disk)->kobj); 2458 2459 atomic_inc(&md->event_nr); 2460 wake_up(&md->eventq); 2461 } 2462 2463 /* 2464 * Protected by md->suspend_lock obtained by dm_swap_table(). 2465 */ 2466 static void __set_size(struct mapped_device *md, sector_t size) 2467 { 2468 set_capacity(md->disk, size); 2469 2470 i_size_write(md->bdev->bd_inode, (loff_t)size << SECTOR_SHIFT); 2471 } 2472 2473 /* 2474 * Returns old map, which caller must destroy. 2475 */ 2476 static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, 2477 struct queue_limits *limits) 2478 { 2479 struct dm_table *old_map; 2480 struct request_queue *q = md->queue; 2481 sector_t size; 2482 2483 size = dm_table_get_size(t); 2484 2485 /* 2486 * Wipe any geometry if the size of the table changed. 2487 */ 2488 if (size != dm_get_size(md)) 2489 memset(&md->geometry, 0, sizeof(md->geometry)); 2490 2491 __set_size(md, size); 2492 2493 dm_table_event_callback(t, event_callback, md); 2494 2495 /* 2496 * The queue hasn't been stopped yet, if the old table type wasn't 2497 * for request-based during suspension. So stop it to prevent 2498 * I/O mapping before resume. 2499 * This must be done before setting the queue restrictions, 2500 * because request-based dm may be run just after the setting. 2501 */ 2502 if (dm_table_request_based(t)) 2503 stop_queue(q); 2504 2505 __bind_mempools(md, t); 2506 2507 old_map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock)); 2508 rcu_assign_pointer(md->map, t); 2509 md->immutable_target_type = dm_table_get_immutable_target_type(t); 2510 2511 dm_table_set_restrictions(t, q, limits); 2512 if (old_map) 2513 dm_sync_table(md); 2514 2515 return old_map; 2516 } 2517 2518 /* 2519 * Returns unbound table for the caller to free. 2520 */ 2521 static struct dm_table *__unbind(struct mapped_device *md) 2522 { 2523 struct dm_table *map = rcu_dereference_protected(md->map, 1); 2524 2525 if (!map) 2526 return NULL; 2527 2528 dm_table_event_callback(map, NULL, NULL); 2529 RCU_INIT_POINTER(md->map, NULL); 2530 dm_sync_table(md); 2531 2532 return map; 2533 } 2534 2535 /* 2536 * Constructor for a new device. 2537 */ 2538 int dm_create(int minor, struct mapped_device **result) 2539 { 2540 struct mapped_device *md; 2541 2542 md = alloc_dev(minor); 2543 if (!md) 2544 return -ENXIO; 2545 2546 dm_sysfs_init(md); 2547 2548 *result = md; 2549 return 0; 2550 } 2551 2552 /* 2553 * Functions to manage md->type. 2554 * All are required to hold md->type_lock. 2555 */ 2556 void dm_lock_md_type(struct mapped_device *md) 2557 { 2558 mutex_lock(&md->type_lock); 2559 } 2560 2561 void dm_unlock_md_type(struct mapped_device *md) 2562 { 2563 mutex_unlock(&md->type_lock); 2564 } 2565 2566 void dm_set_md_type(struct mapped_device *md, unsigned type) 2567 { 2568 BUG_ON(!mutex_is_locked(&md->type_lock)); 2569 md->type = type; 2570 } 2571 2572 unsigned dm_get_md_type(struct mapped_device *md) 2573 { 2574 BUG_ON(!mutex_is_locked(&md->type_lock)); 2575 return md->type; 2576 } 2577 2578 struct target_type *dm_get_immutable_target_type(struct mapped_device *md) 2579 { 2580 return md->immutable_target_type; 2581 } 2582 2583 /* 2584 * The queue_limits are only valid as long as you have a reference 2585 * count on 'md'. 2586 */ 2587 struct queue_limits *dm_get_queue_limits(struct mapped_device *md) 2588 { 2589 BUG_ON(!atomic_read(&md->holders)); 2590 return &md->queue->limits; 2591 } 2592 EXPORT_SYMBOL_GPL(dm_get_queue_limits); 2593 2594 static void init_rq_based_worker_thread(struct mapped_device *md) 2595 { 2596 /* Initialize the request-based DM worker thread */ 2597 init_kthread_worker(&md->kworker); 2598 md->kworker_task = kthread_run(kthread_worker_fn, &md->kworker, 2599 "kdmwork-%s", dm_device_name(md)); 2600 } 2601 2602 /* 2603 * Fully initialize a request-based queue (->elevator, ->request_fn, etc). 2604 */ 2605 static int dm_init_request_based_queue(struct mapped_device *md) 2606 { 2607 struct request_queue *q = NULL; 2608 2609 /* Fully initialize the queue */ 2610 q = blk_init_allocated_queue(md->queue, dm_request_fn, NULL); 2611 if (!q) 2612 return -EINVAL; 2613 2614 /* disable dm_request_fn's merge heuristic by default */ 2615 md->seq_rq_merge_deadline_usecs = 0; 2616 2617 md->queue = q; 2618 dm_init_old_md_queue(md); 2619 blk_queue_softirq_done(md->queue, dm_softirq_done); 2620 blk_queue_prep_rq(md->queue, dm_prep_fn); 2621 2622 init_rq_based_worker_thread(md); 2623 2624 elv_register_queue(md->queue); 2625 2626 return 0; 2627 } 2628 2629 static int dm_mq_init_request(void *data, struct request *rq, 2630 unsigned int hctx_idx, unsigned int request_idx, 2631 unsigned int numa_node) 2632 { 2633 struct mapped_device *md = data; 2634 struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq); 2635 2636 /* 2637 * Must initialize md member of tio, otherwise it won't 2638 * be available in dm_mq_queue_rq. 2639 */ 2640 tio->md = md; 2641 2642 return 0; 2643 } 2644 2645 static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx, 2646 const struct blk_mq_queue_data *bd) 2647 { 2648 struct request *rq = bd->rq; 2649 struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq); 2650 struct mapped_device *md = tio->md; 2651 int srcu_idx; 2652 struct dm_table *map = dm_get_live_table(md, &srcu_idx); 2653 struct dm_target *ti; 2654 sector_t pos; 2655 2656 /* always use block 0 to find the target for flushes for now */ 2657 pos = 0; 2658 if (!(rq->cmd_flags & REQ_FLUSH)) 2659 pos = blk_rq_pos(rq); 2660 2661 ti = dm_table_find_target(map, pos); 2662 if (!dm_target_is_valid(ti)) { 2663 dm_put_live_table(md, srcu_idx); 2664 DMERR_LIMIT("request attempted access beyond the end of device"); 2665 /* 2666 * Must perform setup, that rq_completed() requires, 2667 * before returning BLK_MQ_RQ_QUEUE_ERROR 2668 */ 2669 dm_start_request(md, rq); 2670 return BLK_MQ_RQ_QUEUE_ERROR; 2671 } 2672 dm_put_live_table(md, srcu_idx); 2673 2674 if (ti->type->busy && ti->type->busy(ti)) 2675 return BLK_MQ_RQ_QUEUE_BUSY; 2676 2677 dm_start_request(md, rq); 2678 2679 /* Init tio using md established in .init_request */ 2680 init_tio(tio, rq, md); 2681 2682 /* 2683 * Establish tio->ti before queuing work (map_tio_request) 2684 * or making direct call to map_request(). 2685 */ 2686 tio->ti = ti; 2687 2688 /* Clone the request if underlying devices aren't blk-mq */ 2689 if (dm_table_get_type(map) == DM_TYPE_REQUEST_BASED) { 2690 /* clone request is allocated at the end of the pdu */ 2691 tio->clone = (void *)blk_mq_rq_to_pdu(rq) + sizeof(struct dm_rq_target_io); 2692 (void) clone_rq(rq, md, tio, GFP_ATOMIC); 2693 queue_kthread_work(&md->kworker, &tio->work); 2694 } else { 2695 /* Direct call is fine since .queue_rq allows allocations */ 2696 if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE) { 2697 /* Undo dm_start_request() before requeuing */ 2698 rq_end_stats(md, rq); 2699 rq_completed(md, rq_data_dir(rq), false); 2700 return BLK_MQ_RQ_QUEUE_BUSY; 2701 } 2702 } 2703 2704 return BLK_MQ_RQ_QUEUE_OK; 2705 } 2706 2707 static struct blk_mq_ops dm_mq_ops = { 2708 .queue_rq = dm_mq_queue_rq, 2709 .map_queue = blk_mq_map_queue, 2710 .complete = dm_softirq_done, 2711 .init_request = dm_mq_init_request, 2712 }; 2713 2714 static int dm_init_request_based_blk_mq_queue(struct mapped_device *md) 2715 { 2716 unsigned md_type = dm_get_md_type(md); 2717 struct request_queue *q; 2718 int err; 2719 2720 memset(&md->tag_set, 0, sizeof(md->tag_set)); 2721 md->tag_set.ops = &dm_mq_ops; 2722 md->tag_set.queue_depth = BLKDEV_MAX_RQ; 2723 md->tag_set.numa_node = NUMA_NO_NODE; 2724 md->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE; 2725 md->tag_set.nr_hw_queues = 1; 2726 if (md_type == DM_TYPE_REQUEST_BASED) { 2727 /* make the memory for non-blk-mq clone part of the pdu */ 2728 md->tag_set.cmd_size = sizeof(struct dm_rq_target_io) + sizeof(struct request); 2729 } else 2730 md->tag_set.cmd_size = sizeof(struct dm_rq_target_io); 2731 md->tag_set.driver_data = md; 2732 2733 err = blk_mq_alloc_tag_set(&md->tag_set); 2734 if (err) 2735 return err; 2736 2737 q = blk_mq_init_allocated_queue(&md->tag_set, md->queue); 2738 if (IS_ERR(q)) { 2739 err = PTR_ERR(q); 2740 goto out_tag_set; 2741 } 2742 md->queue = q; 2743 dm_init_md_queue(md); 2744 2745 /* backfill 'mq' sysfs registration normally done in blk_register_queue */ 2746 blk_mq_register_disk(md->disk); 2747 2748 if (md_type == DM_TYPE_REQUEST_BASED) 2749 init_rq_based_worker_thread(md); 2750 2751 return 0; 2752 2753 out_tag_set: 2754 blk_mq_free_tag_set(&md->tag_set); 2755 return err; 2756 } 2757 2758 static unsigned filter_md_type(unsigned type, struct mapped_device *md) 2759 { 2760 if (type == DM_TYPE_BIO_BASED) 2761 return type; 2762 2763 return !md->use_blk_mq ? DM_TYPE_REQUEST_BASED : DM_TYPE_MQ_REQUEST_BASED; 2764 } 2765 2766 /* 2767 * Setup the DM device's queue based on md's type 2768 */ 2769 int dm_setup_md_queue(struct mapped_device *md) 2770 { 2771 int r; 2772 unsigned md_type = filter_md_type(dm_get_md_type(md), md); 2773 2774 switch (md_type) { 2775 case DM_TYPE_REQUEST_BASED: 2776 r = dm_init_request_based_queue(md); 2777 if (r) { 2778 DMWARN("Cannot initialize queue for request-based mapped device"); 2779 return r; 2780 } 2781 break; 2782 case DM_TYPE_MQ_REQUEST_BASED: 2783 r = dm_init_request_based_blk_mq_queue(md); 2784 if (r) { 2785 DMWARN("Cannot initialize queue for request-based blk-mq mapped device"); 2786 return r; 2787 } 2788 break; 2789 case DM_TYPE_BIO_BASED: 2790 dm_init_old_md_queue(md); 2791 blk_queue_make_request(md->queue, dm_make_request); 2792 /* 2793 * DM handles splitting bios as needed. Free the bio_split bioset 2794 * since it won't be used (saves 1 process per bio-based DM device). 2795 */ 2796 bioset_free(md->queue->bio_split); 2797 md->queue->bio_split = NULL; 2798 break; 2799 } 2800 2801 return 0; 2802 } 2803 2804 struct mapped_device *dm_get_md(dev_t dev) 2805 { 2806 struct mapped_device *md; 2807 unsigned minor = MINOR(dev); 2808 2809 if (MAJOR(dev) != _major || minor >= (1 << MINORBITS)) 2810 return NULL; 2811 2812 spin_lock(&_minor_lock); 2813 2814 md = idr_find(&_minor_idr, minor); 2815 if (md) { 2816 if ((md == MINOR_ALLOCED || 2817 (MINOR(disk_devt(dm_disk(md))) != minor) || 2818 dm_deleting_md(md) || 2819 test_bit(DMF_FREEING, &md->flags))) { 2820 md = NULL; 2821 goto out; 2822 } 2823 dm_get(md); 2824 } 2825 2826 out: 2827 spin_unlock(&_minor_lock); 2828 2829 return md; 2830 } 2831 EXPORT_SYMBOL_GPL(dm_get_md); 2832 2833 void *dm_get_mdptr(struct mapped_device *md) 2834 { 2835 return md->interface_ptr; 2836 } 2837 2838 void dm_set_mdptr(struct mapped_device *md, void *ptr) 2839 { 2840 md->interface_ptr = ptr; 2841 } 2842 2843 void dm_get(struct mapped_device *md) 2844 { 2845 atomic_inc(&md->holders); 2846 BUG_ON(test_bit(DMF_FREEING, &md->flags)); 2847 } 2848 2849 int dm_hold(struct mapped_device *md) 2850 { 2851 spin_lock(&_minor_lock); 2852 if (test_bit(DMF_FREEING, &md->flags)) { 2853 spin_unlock(&_minor_lock); 2854 return -EBUSY; 2855 } 2856 dm_get(md); 2857 spin_unlock(&_minor_lock); 2858 return 0; 2859 } 2860 EXPORT_SYMBOL_GPL(dm_hold); 2861 2862 const char *dm_device_name(struct mapped_device *md) 2863 { 2864 return md->name; 2865 } 2866 EXPORT_SYMBOL_GPL(dm_device_name); 2867 2868 static void __dm_destroy(struct mapped_device *md, bool wait) 2869 { 2870 struct dm_table *map; 2871 int srcu_idx; 2872 2873 might_sleep(); 2874 2875 spin_lock(&_minor_lock); 2876 idr_replace(&_minor_idr, MINOR_ALLOCED, MINOR(disk_devt(dm_disk(md)))); 2877 set_bit(DMF_FREEING, &md->flags); 2878 spin_unlock(&_minor_lock); 2879 2880 if (dm_request_based(md) && md->kworker_task) 2881 flush_kthread_worker(&md->kworker); 2882 2883 /* 2884 * Take suspend_lock so that presuspend and postsuspend methods 2885 * do not race with internal suspend. 2886 */ 2887 mutex_lock(&md->suspend_lock); 2888 map = dm_get_live_table(md, &srcu_idx); 2889 if (!dm_suspended_md(md)) { 2890 dm_table_presuspend_targets(map); 2891 dm_table_postsuspend_targets(map); 2892 } 2893 /* dm_put_live_table must be before msleep, otherwise deadlock is possible */ 2894 dm_put_live_table(md, srcu_idx); 2895 mutex_unlock(&md->suspend_lock); 2896 2897 /* 2898 * Rare, but there may be I/O requests still going to complete, 2899 * for example. Wait for all references to disappear. 2900 * No one should increment the reference count of the mapped_device, 2901 * after the mapped_device state becomes DMF_FREEING. 2902 */ 2903 if (wait) 2904 while (atomic_read(&md->holders)) 2905 msleep(1); 2906 else if (atomic_read(&md->holders)) 2907 DMWARN("%s: Forcibly removing mapped_device still in use! (%d users)", 2908 dm_device_name(md), atomic_read(&md->holders)); 2909 2910 dm_sysfs_exit(md); 2911 dm_table_destroy(__unbind(md)); 2912 free_dev(md); 2913 } 2914 2915 void dm_destroy(struct mapped_device *md) 2916 { 2917 __dm_destroy(md, true); 2918 } 2919 2920 void dm_destroy_immediate(struct mapped_device *md) 2921 { 2922 __dm_destroy(md, false); 2923 } 2924 2925 void dm_put(struct mapped_device *md) 2926 { 2927 atomic_dec(&md->holders); 2928 } 2929 EXPORT_SYMBOL_GPL(dm_put); 2930 2931 static int dm_wait_for_completion(struct mapped_device *md, int interruptible) 2932 { 2933 int r = 0; 2934 DECLARE_WAITQUEUE(wait, current); 2935 2936 add_wait_queue(&md->wait, &wait); 2937 2938 while (1) { 2939 set_current_state(interruptible); 2940 2941 if (!md_in_flight(md)) 2942 break; 2943 2944 if (interruptible == TASK_INTERRUPTIBLE && 2945 signal_pending(current)) { 2946 r = -EINTR; 2947 break; 2948 } 2949 2950 io_schedule(); 2951 } 2952 set_current_state(TASK_RUNNING); 2953 2954 remove_wait_queue(&md->wait, &wait); 2955 2956 return r; 2957 } 2958 2959 /* 2960 * Process the deferred bios 2961 */ 2962 static void dm_wq_work(struct work_struct *work) 2963 { 2964 struct mapped_device *md = container_of(work, struct mapped_device, 2965 work); 2966 struct bio *c; 2967 int srcu_idx; 2968 struct dm_table *map; 2969 2970 map = dm_get_live_table(md, &srcu_idx); 2971 2972 while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { 2973 spin_lock_irq(&md->deferred_lock); 2974 c = bio_list_pop(&md->deferred); 2975 spin_unlock_irq(&md->deferred_lock); 2976 2977 if (!c) 2978 break; 2979 2980 if (dm_request_based(md)) 2981 generic_make_request(c); 2982 else 2983 __split_and_process_bio(md, map, c); 2984 } 2985 2986 dm_put_live_table(md, srcu_idx); 2987 } 2988 2989 static void dm_queue_flush(struct mapped_device *md) 2990 { 2991 clear_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); 2992 smp_mb__after_atomic(); 2993 queue_work(md->wq, &md->work); 2994 } 2995 2996 /* 2997 * Swap in a new table, returning the old one for the caller to destroy. 2998 */ 2999 struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table) 3000 { 3001 struct dm_table *live_map = NULL, *map = ERR_PTR(-EINVAL); 3002 struct queue_limits limits; 3003 int r; 3004 3005 mutex_lock(&md->suspend_lock); 3006 3007 /* device must be suspended */ 3008 if (!dm_suspended_md(md)) 3009 goto out; 3010 3011 /* 3012 * If the new table has no data devices, retain the existing limits. 3013 * This helps multipath with queue_if_no_path if all paths disappear, 3014 * then new I/O is queued based on these limits, and then some paths 3015 * reappear. 3016 */ 3017 if (dm_table_has_no_data_devices(table)) { 3018 live_map = dm_get_live_table_fast(md); 3019 if (live_map) 3020 limits = md->queue->limits; 3021 dm_put_live_table_fast(md); 3022 } 3023 3024 if (!live_map) { 3025 r = dm_calculate_queue_limits(table, &limits); 3026 if (r) { 3027 map = ERR_PTR(r); 3028 goto out; 3029 } 3030 } 3031 3032 map = __bind(md, table, &limits); 3033 3034 out: 3035 mutex_unlock(&md->suspend_lock); 3036 return map; 3037 } 3038 3039 /* 3040 * Functions to lock and unlock any filesystem running on the 3041 * device. 3042 */ 3043 static int lock_fs(struct mapped_device *md) 3044 { 3045 int r; 3046 3047 WARN_ON(md->frozen_sb); 3048 3049 md->frozen_sb = freeze_bdev(md->bdev); 3050 if (IS_ERR(md->frozen_sb)) { 3051 r = PTR_ERR(md->frozen_sb); 3052 md->frozen_sb = NULL; 3053 return r; 3054 } 3055 3056 set_bit(DMF_FROZEN, &md->flags); 3057 3058 return 0; 3059 } 3060 3061 static void unlock_fs(struct mapped_device *md) 3062 { 3063 if (!test_bit(DMF_FROZEN, &md->flags)) 3064 return; 3065 3066 thaw_bdev(md->bdev, md->frozen_sb); 3067 md->frozen_sb = NULL; 3068 clear_bit(DMF_FROZEN, &md->flags); 3069 } 3070 3071 /* 3072 * If __dm_suspend returns 0, the device is completely quiescent 3073 * now. There is no request-processing activity. All new requests 3074 * are being added to md->deferred list. 3075 * 3076 * Caller must hold md->suspend_lock 3077 */ 3078 static int __dm_suspend(struct mapped_device *md, struct dm_table *map, 3079 unsigned suspend_flags, int interruptible) 3080 { 3081 bool do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG; 3082 bool noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG; 3083 int r; 3084 3085 /* 3086 * DMF_NOFLUSH_SUSPENDING must be set before presuspend. 3087 * This flag is cleared before dm_suspend returns. 3088 */ 3089 if (noflush) 3090 set_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); 3091 3092 /* 3093 * This gets reverted if there's an error later and the targets 3094 * provide the .presuspend_undo hook. 3095 */ 3096 dm_table_presuspend_targets(map); 3097 3098 /* 3099 * Flush I/O to the device. 3100 * Any I/O submitted after lock_fs() may not be flushed. 3101 * noflush takes precedence over do_lockfs. 3102 * (lock_fs() flushes I/Os and waits for them to complete.) 3103 */ 3104 if (!noflush && do_lockfs) { 3105 r = lock_fs(md); 3106 if (r) { 3107 dm_table_presuspend_undo_targets(map); 3108 return r; 3109 } 3110 } 3111 3112 /* 3113 * Here we must make sure that no processes are submitting requests 3114 * to target drivers i.e. no one may be executing 3115 * __split_and_process_bio. This is called from dm_request and 3116 * dm_wq_work. 3117 * 3118 * To get all processes out of __split_and_process_bio in dm_request, 3119 * we take the write lock. To prevent any process from reentering 3120 * __split_and_process_bio from dm_request and quiesce the thread 3121 * (dm_wq_work), we set BMF_BLOCK_IO_FOR_SUSPEND and call 3122 * flush_workqueue(md->wq). 3123 */ 3124 set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); 3125 if (map) 3126 synchronize_srcu(&md->io_barrier); 3127 3128 /* 3129 * Stop md->queue before flushing md->wq in case request-based 3130 * dm defers requests to md->wq from md->queue. 3131 */ 3132 if (dm_request_based(md)) { 3133 stop_queue(md->queue); 3134 if (md->kworker_task) 3135 flush_kthread_worker(&md->kworker); 3136 } 3137 3138 flush_workqueue(md->wq); 3139 3140 /* 3141 * At this point no more requests are entering target request routines. 3142 * We call dm_wait_for_completion to wait for all existing requests 3143 * to finish. 3144 */ 3145 r = dm_wait_for_completion(md, interruptible); 3146 3147 if (noflush) 3148 clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); 3149 if (map) 3150 synchronize_srcu(&md->io_barrier); 3151 3152 /* were we interrupted ? */ 3153 if (r < 0) { 3154 dm_queue_flush(md); 3155 3156 if (dm_request_based(md)) 3157 start_queue(md->queue); 3158 3159 unlock_fs(md); 3160 dm_table_presuspend_undo_targets(map); 3161 /* pushback list is already flushed, so skip flush */ 3162 } 3163 3164 return r; 3165 } 3166 3167 /* 3168 * We need to be able to change a mapping table under a mounted 3169 * filesystem. For example we might want to move some data in 3170 * the background. Before the table can be swapped with 3171 * dm_bind_table, dm_suspend must be called to flush any in 3172 * flight bios and ensure that any further io gets deferred. 3173 */ 3174 /* 3175 * Suspend mechanism in request-based dm. 3176 * 3177 * 1. Flush all I/Os by lock_fs() if needed. 3178 * 2. Stop dispatching any I/O by stopping the request_queue. 3179 * 3. Wait for all in-flight I/Os to be completed or requeued. 3180 * 3181 * To abort suspend, start the request_queue. 3182 */ 3183 int dm_suspend(struct mapped_device *md, unsigned suspend_flags) 3184 { 3185 struct dm_table *map = NULL; 3186 int r = 0; 3187 3188 retry: 3189 mutex_lock_nested(&md->suspend_lock, SINGLE_DEPTH_NESTING); 3190 3191 if (dm_suspended_md(md)) { 3192 r = -EINVAL; 3193 goto out_unlock; 3194 } 3195 3196 if (dm_suspended_internally_md(md)) { 3197 /* already internally suspended, wait for internal resume */ 3198 mutex_unlock(&md->suspend_lock); 3199 r = wait_on_bit(&md->flags, DMF_SUSPENDED_INTERNALLY, TASK_INTERRUPTIBLE); 3200 if (r) 3201 return r; 3202 goto retry; 3203 } 3204 3205 map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock)); 3206 3207 r = __dm_suspend(md, map, suspend_flags, TASK_INTERRUPTIBLE); 3208 if (r) 3209 goto out_unlock; 3210 3211 set_bit(DMF_SUSPENDED, &md->flags); 3212 3213 dm_table_postsuspend_targets(map); 3214 3215 out_unlock: 3216 mutex_unlock(&md->suspend_lock); 3217 return r; 3218 } 3219 3220 static int __dm_resume(struct mapped_device *md, struct dm_table *map) 3221 { 3222 if (map) { 3223 int r = dm_table_resume_targets(map); 3224 if (r) 3225 return r; 3226 } 3227 3228 dm_queue_flush(md); 3229 3230 /* 3231 * Flushing deferred I/Os must be done after targets are resumed 3232 * so that mapping of targets can work correctly. 3233 * Request-based dm is queueing the deferred I/Os in its request_queue. 3234 */ 3235 if (dm_request_based(md)) 3236 start_queue(md->queue); 3237 3238 unlock_fs(md); 3239 3240 return 0; 3241 } 3242 3243 int dm_resume(struct mapped_device *md) 3244 { 3245 int r = -EINVAL; 3246 struct dm_table *map = NULL; 3247 3248 retry: 3249 mutex_lock_nested(&md->suspend_lock, SINGLE_DEPTH_NESTING); 3250 3251 if (!dm_suspended_md(md)) 3252 goto out; 3253 3254 if (dm_suspended_internally_md(md)) { 3255 /* already internally suspended, wait for internal resume */ 3256 mutex_unlock(&md->suspend_lock); 3257 r = wait_on_bit(&md->flags, DMF_SUSPENDED_INTERNALLY, TASK_INTERRUPTIBLE); 3258 if (r) 3259 return r; 3260 goto retry; 3261 } 3262 3263 map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock)); 3264 if (!map || !dm_table_get_size(map)) 3265 goto out; 3266 3267 r = __dm_resume(md, map); 3268 if (r) 3269 goto out; 3270 3271 clear_bit(DMF_SUSPENDED, &md->flags); 3272 3273 r = 0; 3274 out: 3275 mutex_unlock(&md->suspend_lock); 3276 3277 return r; 3278 } 3279 3280 /* 3281 * Internal suspend/resume works like userspace-driven suspend. It waits 3282 * until all bios finish and prevents issuing new bios to the target drivers. 3283 * It may be used only from the kernel. 3284 */ 3285 3286 static void __dm_internal_suspend(struct mapped_device *md, unsigned suspend_flags) 3287 { 3288 struct dm_table *map = NULL; 3289 3290 if (md->internal_suspend_count++) 3291 return; /* nested internal suspend */ 3292 3293 if (dm_suspended_md(md)) { 3294 set_bit(DMF_SUSPENDED_INTERNALLY, &md->flags); 3295 return; /* nest suspend */ 3296 } 3297 3298 map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock)); 3299 3300 /* 3301 * Using TASK_UNINTERRUPTIBLE because only NOFLUSH internal suspend is 3302 * supported. Properly supporting a TASK_INTERRUPTIBLE internal suspend 3303 * would require changing .presuspend to return an error -- avoid this 3304 * until there is a need for more elaborate variants of internal suspend. 3305 */ 3306 (void) __dm_suspend(md, map, suspend_flags, TASK_UNINTERRUPTIBLE); 3307 3308 set_bit(DMF_SUSPENDED_INTERNALLY, &md->flags); 3309 3310 dm_table_postsuspend_targets(map); 3311 } 3312 3313 static void __dm_internal_resume(struct mapped_device *md) 3314 { 3315 BUG_ON(!md->internal_suspend_count); 3316 3317 if (--md->internal_suspend_count) 3318 return; /* resume from nested internal suspend */ 3319 3320 if (dm_suspended_md(md)) 3321 goto done; /* resume from nested suspend */ 3322 3323 /* 3324 * NOTE: existing callers don't need to call dm_table_resume_targets 3325 * (which may fail -- so best to avoid it for now by passing NULL map) 3326 */ 3327 (void) __dm_resume(md, NULL); 3328 3329 done: 3330 clear_bit(DMF_SUSPENDED_INTERNALLY, &md->flags); 3331 smp_mb__after_atomic(); 3332 wake_up_bit(&md->flags, DMF_SUSPENDED_INTERNALLY); 3333 } 3334 3335 void dm_internal_suspend_noflush(struct mapped_device *md) 3336 { 3337 mutex_lock(&md->suspend_lock); 3338 __dm_internal_suspend(md, DM_SUSPEND_NOFLUSH_FLAG); 3339 mutex_unlock(&md->suspend_lock); 3340 } 3341 EXPORT_SYMBOL_GPL(dm_internal_suspend_noflush); 3342 3343 void dm_internal_resume(struct mapped_device *md) 3344 { 3345 mutex_lock(&md->suspend_lock); 3346 __dm_internal_resume(md); 3347 mutex_unlock(&md->suspend_lock); 3348 } 3349 EXPORT_SYMBOL_GPL(dm_internal_resume); 3350 3351 /* 3352 * Fast variants of internal suspend/resume hold md->suspend_lock, 3353 * which prevents interaction with userspace-driven suspend. 3354 */ 3355 3356 void dm_internal_suspend_fast(struct mapped_device *md) 3357 { 3358 mutex_lock(&md->suspend_lock); 3359 if (dm_suspended_md(md) || dm_suspended_internally_md(md)) 3360 return; 3361 3362 set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); 3363 synchronize_srcu(&md->io_barrier); 3364 flush_workqueue(md->wq); 3365 dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); 3366 } 3367 EXPORT_SYMBOL_GPL(dm_internal_suspend_fast); 3368 3369 void dm_internal_resume_fast(struct mapped_device *md) 3370 { 3371 if (dm_suspended_md(md) || dm_suspended_internally_md(md)) 3372 goto done; 3373 3374 dm_queue_flush(md); 3375 3376 done: 3377 mutex_unlock(&md->suspend_lock); 3378 } 3379 EXPORT_SYMBOL_GPL(dm_internal_resume_fast); 3380 3381 /*----------------------------------------------------------------- 3382 * Event notification. 3383 *---------------------------------------------------------------*/ 3384 int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action, 3385 unsigned cookie) 3386 { 3387 char udev_cookie[DM_COOKIE_LENGTH]; 3388 char *envp[] = { udev_cookie, NULL }; 3389 3390 if (!cookie) 3391 return kobject_uevent(&disk_to_dev(md->disk)->kobj, action); 3392 else { 3393 snprintf(udev_cookie, DM_COOKIE_LENGTH, "%s=%u", 3394 DM_COOKIE_ENV_VAR_NAME, cookie); 3395 return kobject_uevent_env(&disk_to_dev(md->disk)->kobj, 3396 action, envp); 3397 } 3398 } 3399 3400 uint32_t dm_next_uevent_seq(struct mapped_device *md) 3401 { 3402 return atomic_add_return(1, &md->uevent_seq); 3403 } 3404 3405 uint32_t dm_get_event_nr(struct mapped_device *md) 3406 { 3407 return atomic_read(&md->event_nr); 3408 } 3409 3410 int dm_wait_event(struct mapped_device *md, int event_nr) 3411 { 3412 return wait_event_interruptible(md->eventq, 3413 (event_nr != atomic_read(&md->event_nr))); 3414 } 3415 3416 void dm_uevent_add(struct mapped_device *md, struct list_head *elist) 3417 { 3418 unsigned long flags; 3419 3420 spin_lock_irqsave(&md->uevent_lock, flags); 3421 list_add(elist, &md->uevent_list); 3422 spin_unlock_irqrestore(&md->uevent_lock, flags); 3423 } 3424 3425 /* 3426 * The gendisk is only valid as long as you have a reference 3427 * count on 'md'. 3428 */ 3429 struct gendisk *dm_disk(struct mapped_device *md) 3430 { 3431 return md->disk; 3432 } 3433 EXPORT_SYMBOL_GPL(dm_disk); 3434 3435 struct kobject *dm_kobject(struct mapped_device *md) 3436 { 3437 return &md->kobj_holder.kobj; 3438 } 3439 3440 struct mapped_device *dm_get_from_kobject(struct kobject *kobj) 3441 { 3442 struct mapped_device *md; 3443 3444 md = container_of(kobj, struct mapped_device, kobj_holder.kobj); 3445 3446 if (test_bit(DMF_FREEING, &md->flags) || 3447 dm_deleting_md(md)) 3448 return NULL; 3449 3450 dm_get(md); 3451 return md; 3452 } 3453 3454 int dm_suspended_md(struct mapped_device *md) 3455 { 3456 return test_bit(DMF_SUSPENDED, &md->flags); 3457 } 3458 3459 int dm_suspended_internally_md(struct mapped_device *md) 3460 { 3461 return test_bit(DMF_SUSPENDED_INTERNALLY, &md->flags); 3462 } 3463 3464 int dm_test_deferred_remove_flag(struct mapped_device *md) 3465 { 3466 return test_bit(DMF_DEFERRED_REMOVE, &md->flags); 3467 } 3468 3469 int dm_suspended(struct dm_target *ti) 3470 { 3471 return dm_suspended_md(dm_table_get_md(ti->table)); 3472 } 3473 EXPORT_SYMBOL_GPL(dm_suspended); 3474 3475 int dm_noflush_suspending(struct dm_target *ti) 3476 { 3477 return __noflush_suspending(dm_table_get_md(ti->table)); 3478 } 3479 EXPORT_SYMBOL_GPL(dm_noflush_suspending); 3480 3481 struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned type, 3482 unsigned integrity, unsigned per_bio_data_size) 3483 { 3484 struct dm_md_mempools *pools = kzalloc(sizeof(*pools), GFP_KERNEL); 3485 struct kmem_cache *cachep = NULL; 3486 unsigned int pool_size = 0; 3487 unsigned int front_pad; 3488 3489 if (!pools) 3490 return NULL; 3491 3492 type = filter_md_type(type, md); 3493 3494 switch (type) { 3495 case DM_TYPE_BIO_BASED: 3496 cachep = _io_cache; 3497 pool_size = dm_get_reserved_bio_based_ios(); 3498 front_pad = roundup(per_bio_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone); 3499 break; 3500 case DM_TYPE_REQUEST_BASED: 3501 cachep = _rq_tio_cache; 3502 pool_size = dm_get_reserved_rq_based_ios(); 3503 pools->rq_pool = mempool_create_slab_pool(pool_size, _rq_cache); 3504 if (!pools->rq_pool) 3505 goto out; 3506 /* fall through to setup remaining rq-based pools */ 3507 case DM_TYPE_MQ_REQUEST_BASED: 3508 if (!pool_size) 3509 pool_size = dm_get_reserved_rq_based_ios(); 3510 front_pad = offsetof(struct dm_rq_clone_bio_info, clone); 3511 /* per_bio_data_size is not used. See __bind_mempools(). */ 3512 WARN_ON(per_bio_data_size != 0); 3513 break; 3514 default: 3515 BUG(); 3516 } 3517 3518 if (cachep) { 3519 pools->io_pool = mempool_create_slab_pool(pool_size, cachep); 3520 if (!pools->io_pool) 3521 goto out; 3522 } 3523 3524 pools->bs = bioset_create_nobvec(pool_size, front_pad); 3525 if (!pools->bs) 3526 goto out; 3527 3528 if (integrity && bioset_integrity_create(pools->bs, pool_size)) 3529 goto out; 3530 3531 return pools; 3532 3533 out: 3534 dm_free_md_mempools(pools); 3535 3536 return NULL; 3537 } 3538 3539 void dm_free_md_mempools(struct dm_md_mempools *pools) 3540 { 3541 if (!pools) 3542 return; 3543 3544 mempool_destroy(pools->io_pool); 3545 mempool_destroy(pools->rq_pool); 3546 3547 if (pools->bs) 3548 bioset_free(pools->bs); 3549 3550 kfree(pools); 3551 } 3552 3553 static int dm_pr_register(struct block_device *bdev, u64 old_key, u64 new_key, 3554 u32 flags) 3555 { 3556 struct mapped_device *md = bdev->bd_disk->private_data; 3557 const struct pr_ops *ops; 3558 struct dm_target *tgt; 3559 fmode_t mode; 3560 int srcu_idx, r; 3561 3562 r = dm_get_live_table_for_ioctl(md, &tgt, &bdev, &mode, &srcu_idx); 3563 if (r < 0) 3564 return r; 3565 3566 ops = bdev->bd_disk->fops->pr_ops; 3567 if (ops && ops->pr_register) 3568 r = ops->pr_register(bdev, old_key, new_key, flags); 3569 else 3570 r = -EOPNOTSUPP; 3571 3572 dm_put_live_table(md, srcu_idx); 3573 return r; 3574 } 3575 3576 static int dm_pr_reserve(struct block_device *bdev, u64 key, enum pr_type type, 3577 u32 flags) 3578 { 3579 struct mapped_device *md = bdev->bd_disk->private_data; 3580 const struct pr_ops *ops; 3581 struct dm_target *tgt; 3582 fmode_t mode; 3583 int srcu_idx, r; 3584 3585 r = dm_get_live_table_for_ioctl(md, &tgt, &bdev, &mode, &srcu_idx); 3586 if (r < 0) 3587 return r; 3588 3589 ops = bdev->bd_disk->fops->pr_ops; 3590 if (ops && ops->pr_reserve) 3591 r = ops->pr_reserve(bdev, key, type, flags); 3592 else 3593 r = -EOPNOTSUPP; 3594 3595 dm_put_live_table(md, srcu_idx); 3596 return r; 3597 } 3598 3599 static int dm_pr_release(struct block_device *bdev, u64 key, enum pr_type type) 3600 { 3601 struct mapped_device *md = bdev->bd_disk->private_data; 3602 const struct pr_ops *ops; 3603 struct dm_target *tgt; 3604 fmode_t mode; 3605 int srcu_idx, r; 3606 3607 r = dm_get_live_table_for_ioctl(md, &tgt, &bdev, &mode, &srcu_idx); 3608 if (r < 0) 3609 return r; 3610 3611 ops = bdev->bd_disk->fops->pr_ops; 3612 if (ops && ops->pr_release) 3613 r = ops->pr_release(bdev, key, type); 3614 else 3615 r = -EOPNOTSUPP; 3616 3617 dm_put_live_table(md, srcu_idx); 3618 return r; 3619 } 3620 3621 static int dm_pr_preempt(struct block_device *bdev, u64 old_key, u64 new_key, 3622 enum pr_type type, bool abort) 3623 { 3624 struct mapped_device *md = bdev->bd_disk->private_data; 3625 const struct pr_ops *ops; 3626 struct dm_target *tgt; 3627 fmode_t mode; 3628 int srcu_idx, r; 3629 3630 r = dm_get_live_table_for_ioctl(md, &tgt, &bdev, &mode, &srcu_idx); 3631 if (r < 0) 3632 return r; 3633 3634 ops = bdev->bd_disk->fops->pr_ops; 3635 if (ops && ops->pr_preempt) 3636 r = ops->pr_preempt(bdev, old_key, new_key, type, abort); 3637 else 3638 r = -EOPNOTSUPP; 3639 3640 dm_put_live_table(md, srcu_idx); 3641 return r; 3642 } 3643 3644 static int dm_pr_clear(struct block_device *bdev, u64 key) 3645 { 3646 struct mapped_device *md = bdev->bd_disk->private_data; 3647 const struct pr_ops *ops; 3648 struct dm_target *tgt; 3649 fmode_t mode; 3650 int srcu_idx, r; 3651 3652 r = dm_get_live_table_for_ioctl(md, &tgt, &bdev, &mode, &srcu_idx); 3653 if (r < 0) 3654 return r; 3655 3656 ops = bdev->bd_disk->fops->pr_ops; 3657 if (ops && ops->pr_clear) 3658 r = ops->pr_clear(bdev, key); 3659 else 3660 r = -EOPNOTSUPP; 3661 3662 dm_put_live_table(md, srcu_idx); 3663 return r; 3664 } 3665 3666 static const struct pr_ops dm_pr_ops = { 3667 .pr_register = dm_pr_register, 3668 .pr_reserve = dm_pr_reserve, 3669 .pr_release = dm_pr_release, 3670 .pr_preempt = dm_pr_preempt, 3671 .pr_clear = dm_pr_clear, 3672 }; 3673 3674 static const struct block_device_operations dm_blk_dops = { 3675 .open = dm_blk_open, 3676 .release = dm_blk_close, 3677 .ioctl = dm_blk_ioctl, 3678 .getgeo = dm_blk_getgeo, 3679 .pr_ops = &dm_pr_ops, 3680 .owner = THIS_MODULE 3681 }; 3682 3683 /* 3684 * module hooks 3685 */ 3686 module_init(dm_init); 3687 module_exit(dm_exit); 3688 3689 module_param(major, uint, 0); 3690 MODULE_PARM_DESC(major, "The major number of the device mapper"); 3691 3692 module_param(reserved_bio_based_ios, uint, S_IRUGO | S_IWUSR); 3693 MODULE_PARM_DESC(reserved_bio_based_ios, "Reserved IOs in bio-based mempools"); 3694 3695 module_param(reserved_rq_based_ios, uint, S_IRUGO | S_IWUSR); 3696 MODULE_PARM_DESC(reserved_rq_based_ios, "Reserved IOs in request-based mempools"); 3697 3698 module_param(use_blk_mq, bool, S_IRUGO | S_IWUSR); 3699 MODULE_PARM_DESC(use_blk_mq, "Use block multiqueue for request-based DM devices"); 3700 3701 MODULE_DESCRIPTION(DM_NAME " driver"); 3702 MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>"); 3703 MODULE_LICENSE("GPL"); 3704