1 /* 2 * Copyright (C) 2001, 2002 Sistina Software (UK) Limited. 3 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. 4 * 5 * This file is released under the GPL. 6 */ 7 8 #include "dm-core.h" 9 #include "dm-rq.h" 10 #include "dm-uevent.h" 11 12 #include <linux/init.h> 13 #include <linux/module.h> 14 #include <linux/mutex.h> 15 #include <linux/sched/signal.h> 16 #include <linux/blkpg.h> 17 #include <linux/bio.h> 18 #include <linux/mempool.h> 19 #include <linux/dax.h> 20 #include <linux/slab.h> 21 #include <linux/idr.h> 22 #include <linux/uio.h> 23 #include <linux/hdreg.h> 24 #include <linux/delay.h> 25 #include <linux/wait.h> 26 #include <linux/pr.h> 27 #include <linux/refcount.h> 28 29 #define DM_MSG_PREFIX "core" 30 31 /* 32 * Cookies are numeric values sent with CHANGE and REMOVE 33 * uevents while resuming, removing or renaming the device. 34 */ 35 #define DM_COOKIE_ENV_VAR_NAME "DM_COOKIE" 36 #define DM_COOKIE_LENGTH 24 37 38 static const char *_name = DM_NAME; 39 40 static unsigned int major = 0; 41 static unsigned int _major = 0; 42 43 static DEFINE_IDR(_minor_idr); 44 45 static DEFINE_SPINLOCK(_minor_lock); 46 47 static void do_deferred_remove(struct work_struct *w); 48 49 static DECLARE_WORK(deferred_remove_work, do_deferred_remove); 50 51 static struct workqueue_struct *deferred_remove_workqueue; 52 53 atomic_t dm_global_event_nr = ATOMIC_INIT(0); 54 DECLARE_WAIT_QUEUE_HEAD(dm_global_eventq); 55 56 void dm_issue_global_event(void) 57 { 58 atomic_inc(&dm_global_event_nr); 59 wake_up(&dm_global_eventq); 60 } 61 62 /* 63 * One of these is allocated per original bio. 64 */ 65 struct dm_io { 66 struct mapped_device *md; 67 blk_status_t status; 68 atomic_t io_count; 69 struct bio *orig_bio; 70 unsigned long start_time; 71 spinlock_t endio_lock; 72 struct dm_stats_aux stats_aux; 73 }; 74 75 #define MINOR_ALLOCED ((void *)-1) 76 77 /* 78 * Bits for the md->flags field. 79 */ 80 #define DMF_BLOCK_IO_FOR_SUSPEND 0 81 #define DMF_SUSPENDED 1 82 #define DMF_FROZEN 2 83 #define DMF_FREEING 3 84 #define DMF_DELETING 4 85 #define DMF_NOFLUSH_SUSPENDING 5 86 #define DMF_DEFERRED_REMOVE 6 87 #define DMF_SUSPENDED_INTERNALLY 7 88 89 #define DM_NUMA_NODE NUMA_NO_NODE 90 static int dm_numa_node = DM_NUMA_NODE; 91 92 /* 93 * For mempools pre-allocation at the table loading time. 94 */ 95 struct dm_md_mempools { 96 mempool_t *io_pool; 97 struct bio_set *bs; 98 }; 99 100 struct table_device { 101 struct list_head list; 102 refcount_t count; 103 struct dm_dev dm_dev; 104 }; 105 106 static struct kmem_cache *_io_cache; 107 static struct kmem_cache *_rq_tio_cache; 108 static struct kmem_cache *_rq_cache; 109 110 /* 111 * Bio-based DM's mempools' reserved IOs set by the user. 112 */ 113 #define RESERVED_BIO_BASED_IOS 16 114 static unsigned reserved_bio_based_ios = RESERVED_BIO_BASED_IOS; 115 116 static int __dm_get_module_param_int(int *module_param, int min, int max) 117 { 118 int param = READ_ONCE(*module_param); 119 int modified_param = 0; 120 bool modified = true; 121 122 if (param < min) 123 modified_param = min; 124 else if (param > max) 125 modified_param = max; 126 else 127 modified = false; 128 129 if (modified) { 130 (void)cmpxchg(module_param, param, modified_param); 131 param = modified_param; 132 } 133 134 return param; 135 } 136 137 unsigned __dm_get_module_param(unsigned *module_param, 138 unsigned def, unsigned max) 139 { 140 unsigned param = READ_ONCE(*module_param); 141 unsigned modified_param = 0; 142 143 if (!param) 144 modified_param = def; 145 else if (param > max) 146 modified_param = max; 147 148 if (modified_param) { 149 (void)cmpxchg(module_param, param, modified_param); 150 param = modified_param; 151 } 152 153 return param; 154 } 155 156 unsigned dm_get_reserved_bio_based_ios(void) 157 { 158 return __dm_get_module_param(&reserved_bio_based_ios, 159 RESERVED_BIO_BASED_IOS, DM_RESERVED_MAX_IOS); 160 } 161 EXPORT_SYMBOL_GPL(dm_get_reserved_bio_based_ios); 162 163 static unsigned dm_get_numa_node(void) 164 { 165 return __dm_get_module_param_int(&dm_numa_node, 166 DM_NUMA_NODE, num_online_nodes() - 1); 167 } 168 169 static int __init local_init(void) 170 { 171 int r = -ENOMEM; 172 173 /* allocate a slab for the dm_ios */ 174 _io_cache = KMEM_CACHE(dm_io, 0); 175 if (!_io_cache) 176 return r; 177 178 _rq_tio_cache = KMEM_CACHE(dm_rq_target_io, 0); 179 if (!_rq_tio_cache) 180 goto out_free_io_cache; 181 182 _rq_cache = kmem_cache_create("dm_old_clone_request", sizeof(struct request), 183 __alignof__(struct request), 0, NULL); 184 if (!_rq_cache) 185 goto out_free_rq_tio_cache; 186 187 r = dm_uevent_init(); 188 if (r) 189 goto out_free_rq_cache; 190 191 deferred_remove_workqueue = alloc_workqueue("kdmremove", WQ_UNBOUND, 1); 192 if (!deferred_remove_workqueue) { 193 r = -ENOMEM; 194 goto out_uevent_exit; 195 } 196 197 _major = major; 198 r = register_blkdev(_major, _name); 199 if (r < 0) 200 goto out_free_workqueue; 201 202 if (!_major) 203 _major = r; 204 205 return 0; 206 207 out_free_workqueue: 208 destroy_workqueue(deferred_remove_workqueue); 209 out_uevent_exit: 210 dm_uevent_exit(); 211 out_free_rq_cache: 212 kmem_cache_destroy(_rq_cache); 213 out_free_rq_tio_cache: 214 kmem_cache_destroy(_rq_tio_cache); 215 out_free_io_cache: 216 kmem_cache_destroy(_io_cache); 217 218 return r; 219 } 220 221 static void local_exit(void) 222 { 223 flush_scheduled_work(); 224 destroy_workqueue(deferred_remove_workqueue); 225 226 kmem_cache_destroy(_rq_cache); 227 kmem_cache_destroy(_rq_tio_cache); 228 kmem_cache_destroy(_io_cache); 229 unregister_blkdev(_major, _name); 230 dm_uevent_exit(); 231 232 _major = 0; 233 234 DMINFO("cleaned up"); 235 } 236 237 static int (*_inits[])(void) __initdata = { 238 local_init, 239 dm_target_init, 240 dm_linear_init, 241 dm_stripe_init, 242 dm_io_init, 243 dm_kcopyd_init, 244 dm_interface_init, 245 dm_statistics_init, 246 }; 247 248 static void (*_exits[])(void) = { 249 local_exit, 250 dm_target_exit, 251 dm_linear_exit, 252 dm_stripe_exit, 253 dm_io_exit, 254 dm_kcopyd_exit, 255 dm_interface_exit, 256 dm_statistics_exit, 257 }; 258 259 static int __init dm_init(void) 260 { 261 const int count = ARRAY_SIZE(_inits); 262 263 int r, i; 264 265 for (i = 0; i < count; i++) { 266 r = _inits[i](); 267 if (r) 268 goto bad; 269 } 270 271 return 0; 272 273 bad: 274 while (i--) 275 _exits[i](); 276 277 return r; 278 } 279 280 static void __exit dm_exit(void) 281 { 282 int i = ARRAY_SIZE(_exits); 283 284 while (i--) 285 _exits[i](); 286 287 /* 288 * Should be empty by this point. 289 */ 290 idr_destroy(&_minor_idr); 291 } 292 293 /* 294 * Block device functions 295 */ 296 int dm_deleting_md(struct mapped_device *md) 297 { 298 return test_bit(DMF_DELETING, &md->flags); 299 } 300 301 static int dm_blk_open(struct block_device *bdev, fmode_t mode) 302 { 303 struct mapped_device *md; 304 305 spin_lock(&_minor_lock); 306 307 md = bdev->bd_disk->private_data; 308 if (!md) 309 goto out; 310 311 if (test_bit(DMF_FREEING, &md->flags) || 312 dm_deleting_md(md)) { 313 md = NULL; 314 goto out; 315 } 316 317 dm_get(md); 318 atomic_inc(&md->open_count); 319 out: 320 spin_unlock(&_minor_lock); 321 322 return md ? 0 : -ENXIO; 323 } 324 325 static void dm_blk_close(struct gendisk *disk, fmode_t mode) 326 { 327 struct mapped_device *md; 328 329 spin_lock(&_minor_lock); 330 331 md = disk->private_data; 332 if (WARN_ON(!md)) 333 goto out; 334 335 if (atomic_dec_and_test(&md->open_count) && 336 (test_bit(DMF_DEFERRED_REMOVE, &md->flags))) 337 queue_work(deferred_remove_workqueue, &deferred_remove_work); 338 339 dm_put(md); 340 out: 341 spin_unlock(&_minor_lock); 342 } 343 344 int dm_open_count(struct mapped_device *md) 345 { 346 return atomic_read(&md->open_count); 347 } 348 349 /* 350 * Guarantees nothing is using the device before it's deleted. 351 */ 352 int dm_lock_for_deletion(struct mapped_device *md, bool mark_deferred, bool only_deferred) 353 { 354 int r = 0; 355 356 spin_lock(&_minor_lock); 357 358 if (dm_open_count(md)) { 359 r = -EBUSY; 360 if (mark_deferred) 361 set_bit(DMF_DEFERRED_REMOVE, &md->flags); 362 } else if (only_deferred && !test_bit(DMF_DEFERRED_REMOVE, &md->flags)) 363 r = -EEXIST; 364 else 365 set_bit(DMF_DELETING, &md->flags); 366 367 spin_unlock(&_minor_lock); 368 369 return r; 370 } 371 372 int dm_cancel_deferred_remove(struct mapped_device *md) 373 { 374 int r = 0; 375 376 spin_lock(&_minor_lock); 377 378 if (test_bit(DMF_DELETING, &md->flags)) 379 r = -EBUSY; 380 else 381 clear_bit(DMF_DEFERRED_REMOVE, &md->flags); 382 383 spin_unlock(&_minor_lock); 384 385 return r; 386 } 387 388 static void do_deferred_remove(struct work_struct *w) 389 { 390 dm_deferred_remove(); 391 } 392 393 sector_t dm_get_size(struct mapped_device *md) 394 { 395 return get_capacity(md->disk); 396 } 397 398 struct request_queue *dm_get_md_queue(struct mapped_device *md) 399 { 400 return md->queue; 401 } 402 403 struct dm_stats *dm_get_stats(struct mapped_device *md) 404 { 405 return &md->stats; 406 } 407 408 static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo) 409 { 410 struct mapped_device *md = bdev->bd_disk->private_data; 411 412 return dm_get_geometry(md, geo); 413 } 414 415 static int dm_grab_bdev_for_ioctl(struct mapped_device *md, 416 struct block_device **bdev, 417 fmode_t *mode) 418 { 419 struct dm_target *tgt; 420 struct dm_table *map; 421 int srcu_idx, r; 422 423 retry: 424 r = -ENOTTY; 425 map = dm_get_live_table(md, &srcu_idx); 426 if (!map || !dm_table_get_size(map)) 427 goto out; 428 429 /* We only support devices that have a single target */ 430 if (dm_table_get_num_targets(map) != 1) 431 goto out; 432 433 tgt = dm_table_get_target(map, 0); 434 if (!tgt->type->prepare_ioctl) 435 goto out; 436 437 if (dm_suspended_md(md)) { 438 r = -EAGAIN; 439 goto out; 440 } 441 442 r = tgt->type->prepare_ioctl(tgt, bdev, mode); 443 if (r < 0) 444 goto out; 445 446 bdgrab(*bdev); 447 dm_put_live_table(md, srcu_idx); 448 return r; 449 450 out: 451 dm_put_live_table(md, srcu_idx); 452 if (r == -ENOTCONN && !fatal_signal_pending(current)) { 453 msleep(10); 454 goto retry; 455 } 456 return r; 457 } 458 459 static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode, 460 unsigned int cmd, unsigned long arg) 461 { 462 struct mapped_device *md = bdev->bd_disk->private_data; 463 int r; 464 465 r = dm_grab_bdev_for_ioctl(md, &bdev, &mode); 466 if (r < 0) 467 return r; 468 469 if (r > 0) { 470 /* 471 * Target determined this ioctl is being issued against a 472 * subset of the parent bdev; require extra privileges. 473 */ 474 if (!capable(CAP_SYS_RAWIO)) { 475 DMWARN_LIMIT( 476 "%s: sending ioctl %x to DM device without required privilege.", 477 current->comm, cmd); 478 r = -ENOIOCTLCMD; 479 goto out; 480 } 481 } 482 483 r = __blkdev_driver_ioctl(bdev, mode, cmd, arg); 484 out: 485 bdput(bdev); 486 return r; 487 } 488 489 static struct dm_io *alloc_io(struct mapped_device *md) 490 { 491 return mempool_alloc(md->io_pool, GFP_NOIO); 492 } 493 494 static void free_io(struct mapped_device *md, struct dm_io *io) 495 { 496 mempool_free(io, md->io_pool); 497 } 498 499 static void free_tio(struct dm_target_io *tio) 500 { 501 bio_put(&tio->clone); 502 } 503 504 int md_in_flight(struct mapped_device *md) 505 { 506 return atomic_read(&md->pending[READ]) + 507 atomic_read(&md->pending[WRITE]); 508 } 509 510 static void start_io_acct(struct dm_io *io) 511 { 512 struct mapped_device *md = io->md; 513 struct bio *bio = io->orig_bio; 514 int cpu; 515 int rw = bio_data_dir(bio); 516 517 io->start_time = jiffies; 518 519 cpu = part_stat_lock(); 520 part_round_stats(md->queue, cpu, &dm_disk(md)->part0); 521 part_stat_unlock(); 522 atomic_set(&dm_disk(md)->part0.in_flight[rw], 523 atomic_inc_return(&md->pending[rw])); 524 525 if (unlikely(dm_stats_used(&md->stats))) 526 dm_stats_account_io(&md->stats, bio_data_dir(bio), 527 bio->bi_iter.bi_sector, bio_sectors(bio), 528 false, 0, &io->stats_aux); 529 } 530 531 static void end_io_acct(struct dm_io *io) 532 { 533 struct mapped_device *md = io->md; 534 struct bio *bio = io->orig_bio; 535 unsigned long duration = jiffies - io->start_time; 536 int pending; 537 int rw = bio_data_dir(bio); 538 539 generic_end_io_acct(md->queue, rw, &dm_disk(md)->part0, io->start_time); 540 541 if (unlikely(dm_stats_used(&md->stats))) 542 dm_stats_account_io(&md->stats, bio_data_dir(bio), 543 bio->bi_iter.bi_sector, bio_sectors(bio), 544 true, duration, &io->stats_aux); 545 546 /* 547 * After this is decremented the bio must not be touched if it is 548 * a flush. 549 */ 550 pending = atomic_dec_return(&md->pending[rw]); 551 atomic_set(&dm_disk(md)->part0.in_flight[rw], pending); 552 pending += atomic_read(&md->pending[rw^0x1]); 553 554 /* nudge anyone waiting on suspend queue */ 555 if (!pending) 556 wake_up(&md->wait); 557 } 558 559 /* 560 * Add the bio to the list of deferred io. 561 */ 562 static void queue_io(struct mapped_device *md, struct bio *bio) 563 { 564 unsigned long flags; 565 566 spin_lock_irqsave(&md->deferred_lock, flags); 567 bio_list_add(&md->deferred, bio); 568 spin_unlock_irqrestore(&md->deferred_lock, flags); 569 queue_work(md->wq, &md->work); 570 } 571 572 /* 573 * Everyone (including functions in this file), should use this 574 * function to access the md->map field, and make sure they call 575 * dm_put_live_table() when finished. 576 */ 577 struct dm_table *dm_get_live_table(struct mapped_device *md, int *srcu_idx) __acquires(md->io_barrier) 578 { 579 *srcu_idx = srcu_read_lock(&md->io_barrier); 580 581 return srcu_dereference(md->map, &md->io_barrier); 582 } 583 584 void dm_put_live_table(struct mapped_device *md, int srcu_idx) __releases(md->io_barrier) 585 { 586 srcu_read_unlock(&md->io_barrier, srcu_idx); 587 } 588 589 void dm_sync_table(struct mapped_device *md) 590 { 591 synchronize_srcu(&md->io_barrier); 592 synchronize_rcu_expedited(); 593 } 594 595 /* 596 * A fast alternative to dm_get_live_table/dm_put_live_table. 597 * The caller must not block between these two functions. 598 */ 599 static struct dm_table *dm_get_live_table_fast(struct mapped_device *md) __acquires(RCU) 600 { 601 rcu_read_lock(); 602 return rcu_dereference(md->map); 603 } 604 605 static void dm_put_live_table_fast(struct mapped_device *md) __releases(RCU) 606 { 607 rcu_read_unlock(); 608 } 609 610 /* 611 * Open a table device so we can use it as a map destination. 612 */ 613 static int open_table_device(struct table_device *td, dev_t dev, 614 struct mapped_device *md) 615 { 616 static char *_claim_ptr = "I belong to device-mapper"; 617 struct block_device *bdev; 618 619 int r; 620 621 BUG_ON(td->dm_dev.bdev); 622 623 bdev = blkdev_get_by_dev(dev, td->dm_dev.mode | FMODE_EXCL, _claim_ptr); 624 if (IS_ERR(bdev)) 625 return PTR_ERR(bdev); 626 627 r = bd_link_disk_holder(bdev, dm_disk(md)); 628 if (r) { 629 blkdev_put(bdev, td->dm_dev.mode | FMODE_EXCL); 630 return r; 631 } 632 633 td->dm_dev.bdev = bdev; 634 td->dm_dev.dax_dev = dax_get_by_host(bdev->bd_disk->disk_name); 635 return 0; 636 } 637 638 /* 639 * Close a table device that we've been using. 640 */ 641 static void close_table_device(struct table_device *td, struct mapped_device *md) 642 { 643 if (!td->dm_dev.bdev) 644 return; 645 646 bd_unlink_disk_holder(td->dm_dev.bdev, dm_disk(md)); 647 blkdev_put(td->dm_dev.bdev, td->dm_dev.mode | FMODE_EXCL); 648 put_dax(td->dm_dev.dax_dev); 649 td->dm_dev.bdev = NULL; 650 td->dm_dev.dax_dev = NULL; 651 } 652 653 static struct table_device *find_table_device(struct list_head *l, dev_t dev, 654 fmode_t mode) { 655 struct table_device *td; 656 657 list_for_each_entry(td, l, list) 658 if (td->dm_dev.bdev->bd_dev == dev && td->dm_dev.mode == mode) 659 return td; 660 661 return NULL; 662 } 663 664 int dm_get_table_device(struct mapped_device *md, dev_t dev, fmode_t mode, 665 struct dm_dev **result) { 666 int r; 667 struct table_device *td; 668 669 mutex_lock(&md->table_devices_lock); 670 td = find_table_device(&md->table_devices, dev, mode); 671 if (!td) { 672 td = kmalloc_node(sizeof(*td), GFP_KERNEL, md->numa_node_id); 673 if (!td) { 674 mutex_unlock(&md->table_devices_lock); 675 return -ENOMEM; 676 } 677 678 td->dm_dev.mode = mode; 679 td->dm_dev.bdev = NULL; 680 681 if ((r = open_table_device(td, dev, md))) { 682 mutex_unlock(&md->table_devices_lock); 683 kfree(td); 684 return r; 685 } 686 687 format_dev_t(td->dm_dev.name, dev); 688 689 refcount_set(&td->count, 1); 690 list_add(&td->list, &md->table_devices); 691 } else { 692 refcount_inc(&td->count); 693 } 694 mutex_unlock(&md->table_devices_lock); 695 696 *result = &td->dm_dev; 697 return 0; 698 } 699 EXPORT_SYMBOL_GPL(dm_get_table_device); 700 701 void dm_put_table_device(struct mapped_device *md, struct dm_dev *d) 702 { 703 struct table_device *td = container_of(d, struct table_device, dm_dev); 704 705 mutex_lock(&md->table_devices_lock); 706 if (refcount_dec_and_test(&td->count)) { 707 close_table_device(td, md); 708 list_del(&td->list); 709 kfree(td); 710 } 711 mutex_unlock(&md->table_devices_lock); 712 } 713 EXPORT_SYMBOL(dm_put_table_device); 714 715 static void free_table_devices(struct list_head *devices) 716 { 717 struct list_head *tmp, *next; 718 719 list_for_each_safe(tmp, next, devices) { 720 struct table_device *td = list_entry(tmp, struct table_device, list); 721 722 DMWARN("dm_destroy: %s still exists with %d references", 723 td->dm_dev.name, refcount_read(&td->count)); 724 kfree(td); 725 } 726 } 727 728 /* 729 * Get the geometry associated with a dm device 730 */ 731 int dm_get_geometry(struct mapped_device *md, struct hd_geometry *geo) 732 { 733 *geo = md->geometry; 734 735 return 0; 736 } 737 738 /* 739 * Set the geometry of a device. 740 */ 741 int dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo) 742 { 743 sector_t sz = (sector_t)geo->cylinders * geo->heads * geo->sectors; 744 745 if (geo->start > sz) { 746 DMWARN("Start sector is beyond the geometry limits."); 747 return -EINVAL; 748 } 749 750 md->geometry = *geo; 751 752 return 0; 753 } 754 755 static int __noflush_suspending(struct mapped_device *md) 756 { 757 return test_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); 758 } 759 760 /* 761 * Decrements the number of outstanding ios that a bio has been 762 * cloned into, completing the original io if necc. 763 */ 764 static void dec_pending(struct dm_io *io, blk_status_t error) 765 { 766 unsigned long flags; 767 blk_status_t io_error; 768 struct bio *bio; 769 struct mapped_device *md = io->md; 770 771 /* Push-back supersedes any I/O errors */ 772 if (unlikely(error)) { 773 spin_lock_irqsave(&io->endio_lock, flags); 774 if (!(io->status == BLK_STS_DM_REQUEUE && __noflush_suspending(md))) 775 io->status = error; 776 spin_unlock_irqrestore(&io->endio_lock, flags); 777 } 778 779 if (atomic_dec_and_test(&io->io_count)) { 780 if (io->status == BLK_STS_DM_REQUEUE) { 781 /* 782 * Target requested pushing back the I/O. 783 */ 784 spin_lock_irqsave(&md->deferred_lock, flags); 785 if (__noflush_suspending(md)) 786 /* NOTE early return due to BLK_STS_DM_REQUEUE below */ 787 bio_list_add_head(&md->deferred, io->orig_bio); 788 else 789 /* noflush suspend was interrupted. */ 790 io->status = BLK_STS_IOERR; 791 spin_unlock_irqrestore(&md->deferred_lock, flags); 792 } 793 794 io_error = io->status; 795 bio = io->orig_bio; 796 end_io_acct(io); 797 free_io(md, io); 798 799 if (io_error == BLK_STS_DM_REQUEUE) 800 return; 801 802 if ((bio->bi_opf & REQ_PREFLUSH) && bio->bi_iter.bi_size) { 803 /* 804 * Preflush done for flush with data, reissue 805 * without REQ_PREFLUSH. 806 */ 807 bio->bi_opf &= ~REQ_PREFLUSH; 808 queue_io(md, bio); 809 } else { 810 /* done with normal IO or empty flush */ 811 bio->bi_status = io_error; 812 bio_endio(bio); 813 } 814 } 815 } 816 817 void disable_write_same(struct mapped_device *md) 818 { 819 struct queue_limits *limits = dm_get_queue_limits(md); 820 821 /* device doesn't really support WRITE SAME, disable it */ 822 limits->max_write_same_sectors = 0; 823 } 824 825 void disable_write_zeroes(struct mapped_device *md) 826 { 827 struct queue_limits *limits = dm_get_queue_limits(md); 828 829 /* device doesn't really support WRITE ZEROES, disable it */ 830 limits->max_write_zeroes_sectors = 0; 831 } 832 833 static void clone_endio(struct bio *bio) 834 { 835 blk_status_t error = bio->bi_status; 836 struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone); 837 struct dm_io *io = tio->io; 838 struct mapped_device *md = tio->io->md; 839 dm_endio_fn endio = tio->ti->type->end_io; 840 841 if (unlikely(error == BLK_STS_TARGET)) { 842 if (bio_op(bio) == REQ_OP_WRITE_SAME && 843 !bio->bi_disk->queue->limits.max_write_same_sectors) 844 disable_write_same(md); 845 if (bio_op(bio) == REQ_OP_WRITE_ZEROES && 846 !bio->bi_disk->queue->limits.max_write_zeroes_sectors) 847 disable_write_zeroes(md); 848 } 849 850 if (endio) { 851 int r = endio(tio->ti, bio, &error); 852 switch (r) { 853 case DM_ENDIO_REQUEUE: 854 error = BLK_STS_DM_REQUEUE; 855 /*FALLTHRU*/ 856 case DM_ENDIO_DONE: 857 break; 858 case DM_ENDIO_INCOMPLETE: 859 /* The target will handle the io */ 860 return; 861 default: 862 DMWARN("unimplemented target endio return value: %d", r); 863 BUG(); 864 } 865 } 866 867 free_tio(tio); 868 dec_pending(io, error); 869 } 870 871 /* 872 * Return maximum size of I/O possible at the supplied sector up to the current 873 * target boundary. 874 */ 875 static sector_t max_io_len_target_boundary(sector_t sector, struct dm_target *ti) 876 { 877 sector_t target_offset = dm_target_offset(ti, sector); 878 879 return ti->len - target_offset; 880 } 881 882 static sector_t max_io_len(sector_t sector, struct dm_target *ti) 883 { 884 sector_t len = max_io_len_target_boundary(sector, ti); 885 sector_t offset, max_len; 886 887 /* 888 * Does the target need to split even further? 889 */ 890 if (ti->max_io_len) { 891 offset = dm_target_offset(ti, sector); 892 if (unlikely(ti->max_io_len & (ti->max_io_len - 1))) 893 max_len = sector_div(offset, ti->max_io_len); 894 else 895 max_len = offset & (ti->max_io_len - 1); 896 max_len = ti->max_io_len - max_len; 897 898 if (len > max_len) 899 len = max_len; 900 } 901 902 return len; 903 } 904 905 int dm_set_target_max_io_len(struct dm_target *ti, sector_t len) 906 { 907 if (len > UINT_MAX) { 908 DMERR("Specified maximum size of target IO (%llu) exceeds limit (%u)", 909 (unsigned long long)len, UINT_MAX); 910 ti->error = "Maximum size of target IO is too large"; 911 return -EINVAL; 912 } 913 914 ti->max_io_len = (uint32_t) len; 915 916 return 0; 917 } 918 EXPORT_SYMBOL_GPL(dm_set_target_max_io_len); 919 920 static struct dm_target *dm_dax_get_live_target(struct mapped_device *md, 921 sector_t sector, int *srcu_idx) 922 { 923 struct dm_table *map; 924 struct dm_target *ti; 925 926 map = dm_get_live_table(md, srcu_idx); 927 if (!map) 928 return NULL; 929 930 ti = dm_table_find_target(map, sector); 931 if (!dm_target_is_valid(ti)) 932 return NULL; 933 934 return ti; 935 } 936 937 static long dm_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, 938 long nr_pages, void **kaddr, pfn_t *pfn) 939 { 940 struct mapped_device *md = dax_get_private(dax_dev); 941 sector_t sector = pgoff * PAGE_SECTORS; 942 struct dm_target *ti; 943 long len, ret = -EIO; 944 int srcu_idx; 945 946 ti = dm_dax_get_live_target(md, sector, &srcu_idx); 947 948 if (!ti) 949 goto out; 950 if (!ti->type->direct_access) 951 goto out; 952 len = max_io_len(sector, ti) / PAGE_SECTORS; 953 if (len < 1) 954 goto out; 955 nr_pages = min(len, nr_pages); 956 if (ti->type->direct_access) 957 ret = ti->type->direct_access(ti, pgoff, nr_pages, kaddr, pfn); 958 959 out: 960 dm_put_live_table(md, srcu_idx); 961 962 return ret; 963 } 964 965 static size_t dm_dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, 966 void *addr, size_t bytes, struct iov_iter *i) 967 { 968 struct mapped_device *md = dax_get_private(dax_dev); 969 sector_t sector = pgoff * PAGE_SECTORS; 970 struct dm_target *ti; 971 long ret = 0; 972 int srcu_idx; 973 974 ti = dm_dax_get_live_target(md, sector, &srcu_idx); 975 976 if (!ti) 977 goto out; 978 if (!ti->type->dax_copy_from_iter) { 979 ret = copy_from_iter(addr, bytes, i); 980 goto out; 981 } 982 ret = ti->type->dax_copy_from_iter(ti, pgoff, addr, bytes, i); 983 out: 984 dm_put_live_table(md, srcu_idx); 985 986 return ret; 987 } 988 989 /* 990 * A target may call dm_accept_partial_bio only from the map routine. It is 991 * allowed for all bio types except REQ_PREFLUSH and REQ_OP_ZONE_RESET. 992 * 993 * dm_accept_partial_bio informs the dm that the target only wants to process 994 * additional n_sectors sectors of the bio and the rest of the data should be 995 * sent in a next bio. 996 * 997 * A diagram that explains the arithmetics: 998 * +--------------------+---------------+-------+ 999 * | 1 | 2 | 3 | 1000 * +--------------------+---------------+-------+ 1001 * 1002 * <-------------- *tio->len_ptr ---------------> 1003 * <------- bi_size -------> 1004 * <-- n_sectors --> 1005 * 1006 * Region 1 was already iterated over with bio_advance or similar function. 1007 * (it may be empty if the target doesn't use bio_advance) 1008 * Region 2 is the remaining bio size that the target wants to process. 1009 * (it may be empty if region 1 is non-empty, although there is no reason 1010 * to make it empty) 1011 * The target requires that region 3 is to be sent in the next bio. 1012 * 1013 * If the target wants to receive multiple copies of the bio (via num_*bios, etc), 1014 * the partially processed part (the sum of regions 1+2) must be the same for all 1015 * copies of the bio. 1016 */ 1017 void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors) 1018 { 1019 struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone); 1020 unsigned bi_size = bio->bi_iter.bi_size >> SECTOR_SHIFT; 1021 BUG_ON(bio->bi_opf & REQ_PREFLUSH); 1022 BUG_ON(bi_size > *tio->len_ptr); 1023 BUG_ON(n_sectors > bi_size); 1024 *tio->len_ptr -= bi_size - n_sectors; 1025 bio->bi_iter.bi_size = n_sectors << SECTOR_SHIFT; 1026 } 1027 EXPORT_SYMBOL_GPL(dm_accept_partial_bio); 1028 1029 /* 1030 * The zone descriptors obtained with a zone report indicate 1031 * zone positions within the target device. The zone descriptors 1032 * must be remapped to match their position within the dm device. 1033 * A target may call dm_remap_zone_report after completion of a 1034 * REQ_OP_ZONE_REPORT bio to remap the zone descriptors obtained 1035 * from the target device mapping to the dm device. 1036 */ 1037 void dm_remap_zone_report(struct dm_target *ti, struct bio *bio, sector_t start) 1038 { 1039 #ifdef CONFIG_BLK_DEV_ZONED 1040 struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone); 1041 struct bio *report_bio = tio->io->orig_bio; 1042 struct blk_zone_report_hdr *hdr = NULL; 1043 struct blk_zone *zone; 1044 unsigned int nr_rep = 0; 1045 unsigned int ofst; 1046 struct bio_vec bvec; 1047 struct bvec_iter iter; 1048 void *addr; 1049 1050 if (bio->bi_status) 1051 return; 1052 1053 /* 1054 * Remap the start sector of the reported zones. For sequential zones, 1055 * also remap the write pointer position. 1056 */ 1057 bio_for_each_segment(bvec, report_bio, iter) { 1058 addr = kmap_atomic(bvec.bv_page); 1059 1060 /* Remember the report header in the first page */ 1061 if (!hdr) { 1062 hdr = addr; 1063 ofst = sizeof(struct blk_zone_report_hdr); 1064 } else 1065 ofst = 0; 1066 1067 /* Set zones start sector */ 1068 while (hdr->nr_zones && ofst < bvec.bv_len) { 1069 zone = addr + ofst; 1070 if (zone->start >= start + ti->len) { 1071 hdr->nr_zones = 0; 1072 break; 1073 } 1074 zone->start = zone->start + ti->begin - start; 1075 if (zone->type != BLK_ZONE_TYPE_CONVENTIONAL) { 1076 if (zone->cond == BLK_ZONE_COND_FULL) 1077 zone->wp = zone->start + zone->len; 1078 else if (zone->cond == BLK_ZONE_COND_EMPTY) 1079 zone->wp = zone->start; 1080 else 1081 zone->wp = zone->wp + ti->begin - start; 1082 } 1083 ofst += sizeof(struct blk_zone); 1084 hdr->nr_zones--; 1085 nr_rep++; 1086 } 1087 1088 if (addr != hdr) 1089 kunmap_atomic(addr); 1090 1091 if (!hdr->nr_zones) 1092 break; 1093 } 1094 1095 if (hdr) { 1096 hdr->nr_zones = nr_rep; 1097 kunmap_atomic(hdr); 1098 } 1099 1100 bio_advance(report_bio, report_bio->bi_iter.bi_size); 1101 1102 #else /* !CONFIG_BLK_DEV_ZONED */ 1103 bio->bi_status = BLK_STS_NOTSUPP; 1104 #endif 1105 } 1106 EXPORT_SYMBOL_GPL(dm_remap_zone_report); 1107 1108 static void __map_bio(struct dm_target_io *tio) 1109 { 1110 int r; 1111 sector_t sector; 1112 struct bio *clone = &tio->clone; 1113 struct dm_target *ti = tio->ti; 1114 1115 clone->bi_end_io = clone_endio; 1116 1117 /* 1118 * Map the clone. If r == 0 we don't need to do 1119 * anything, the target has assumed ownership of 1120 * this io. 1121 */ 1122 atomic_inc(&tio->io->io_count); 1123 sector = clone->bi_iter.bi_sector; 1124 1125 r = ti->type->map(ti, clone); 1126 switch (r) { 1127 case DM_MAPIO_SUBMITTED: 1128 break; 1129 case DM_MAPIO_REMAPPED: 1130 /* the bio has been remapped so dispatch it */ 1131 trace_block_bio_remap(clone->bi_disk->queue, clone, 1132 bio_dev(tio->io->orig_bio), sector); 1133 generic_make_request(clone); 1134 break; 1135 case DM_MAPIO_KILL: 1136 dec_pending(tio->io, BLK_STS_IOERR); 1137 free_tio(tio); 1138 break; 1139 case DM_MAPIO_REQUEUE: 1140 dec_pending(tio->io, BLK_STS_DM_REQUEUE); 1141 free_tio(tio); 1142 break; 1143 default: 1144 DMWARN("unimplemented target map return value: %d", r); 1145 BUG(); 1146 } 1147 } 1148 1149 struct clone_info { 1150 struct mapped_device *md; 1151 struct dm_table *map; 1152 struct bio *bio; 1153 struct dm_io *io; 1154 sector_t sector; 1155 unsigned sector_count; 1156 }; 1157 1158 static void bio_setup_sector(struct bio *bio, sector_t sector, unsigned len) 1159 { 1160 bio->bi_iter.bi_sector = sector; 1161 bio->bi_iter.bi_size = to_bytes(len); 1162 } 1163 1164 /* 1165 * Creates a bio that consists of range of complete bvecs. 1166 */ 1167 static int clone_bio(struct dm_target_io *tio, struct bio *bio, 1168 sector_t sector, unsigned len) 1169 { 1170 struct bio *clone = &tio->clone; 1171 1172 __bio_clone_fast(clone, bio); 1173 1174 if (unlikely(bio_integrity(bio) != NULL)) { 1175 int r; 1176 1177 if (unlikely(!dm_target_has_integrity(tio->ti->type) && 1178 !dm_target_passes_integrity(tio->ti->type))) { 1179 DMWARN("%s: the target %s doesn't support integrity data.", 1180 dm_device_name(tio->io->md), 1181 tio->ti->type->name); 1182 return -EIO; 1183 } 1184 1185 r = bio_integrity_clone(clone, bio, GFP_NOIO); 1186 if (r < 0) 1187 return r; 1188 } 1189 1190 if (bio_op(bio) != REQ_OP_ZONE_REPORT) 1191 bio_advance(clone, to_bytes(sector - clone->bi_iter.bi_sector)); 1192 clone->bi_iter.bi_size = to_bytes(len); 1193 1194 if (unlikely(bio_integrity(bio) != NULL)) 1195 bio_integrity_trim(clone); 1196 1197 return 0; 1198 } 1199 1200 static struct dm_target_io *alloc_tio(struct clone_info *ci, struct dm_target *ti, 1201 unsigned target_bio_nr, gfp_t gfp_mask) 1202 { 1203 struct dm_target_io *tio; 1204 struct bio *clone; 1205 1206 clone = bio_alloc_bioset(gfp_mask, 0, ci->md->bs); 1207 if (!clone) 1208 return NULL; 1209 1210 tio = container_of(clone, struct dm_target_io, clone); 1211 tio->io = ci->io; 1212 tio->ti = ti; 1213 tio->target_bio_nr = target_bio_nr; 1214 1215 return tio; 1216 } 1217 1218 static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci, 1219 struct dm_target *ti, unsigned num_bios) 1220 { 1221 struct dm_target_io *tio; 1222 int try; 1223 1224 if (!num_bios) 1225 return; 1226 1227 if (num_bios == 1) { 1228 tio = alloc_tio(ci, ti, 0, GFP_NOIO); 1229 bio_list_add(blist, &tio->clone); 1230 return; 1231 } 1232 1233 for (try = 0; try < 2; try++) { 1234 int bio_nr; 1235 struct bio *bio; 1236 1237 if (try) 1238 mutex_lock(&ci->md->table_devices_lock); 1239 for (bio_nr = 0; bio_nr < num_bios; bio_nr++) { 1240 tio = alloc_tio(ci, ti, bio_nr, try ? GFP_NOIO : GFP_NOWAIT); 1241 if (!tio) 1242 break; 1243 1244 bio_list_add(blist, &tio->clone); 1245 } 1246 if (try) 1247 mutex_unlock(&ci->md->table_devices_lock); 1248 if (bio_nr == num_bios) 1249 return; 1250 1251 while ((bio = bio_list_pop(blist))) { 1252 tio = container_of(bio, struct dm_target_io, clone); 1253 free_tio(tio); 1254 } 1255 } 1256 } 1257 1258 static void __clone_and_map_simple_bio(struct clone_info *ci, 1259 struct dm_target_io *tio, unsigned *len) 1260 { 1261 struct bio *clone = &tio->clone; 1262 1263 tio->len_ptr = len; 1264 1265 __bio_clone_fast(clone, ci->bio); 1266 if (len) 1267 bio_setup_sector(clone, ci->sector, *len); 1268 1269 __map_bio(tio); 1270 } 1271 1272 static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti, 1273 unsigned num_bios, unsigned *len) 1274 { 1275 struct bio_list blist = BIO_EMPTY_LIST; 1276 struct bio *bio; 1277 struct dm_target_io *tio; 1278 1279 alloc_multiple_bios(&blist, ci, ti, num_bios); 1280 1281 while ((bio = bio_list_pop(&blist))) { 1282 tio = container_of(bio, struct dm_target_io, clone); 1283 __clone_and_map_simple_bio(ci, tio, len); 1284 } 1285 } 1286 1287 static int __send_empty_flush(struct clone_info *ci) 1288 { 1289 unsigned target_nr = 0; 1290 struct dm_target *ti; 1291 1292 BUG_ON(bio_has_data(ci->bio)); 1293 while ((ti = dm_table_get_target(ci->map, target_nr++))) 1294 __send_duplicate_bios(ci, ti, ti->num_flush_bios, NULL); 1295 1296 return 0; 1297 } 1298 1299 static int __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti, 1300 sector_t sector, unsigned *len) 1301 { 1302 struct bio *bio = ci->bio; 1303 struct dm_target_io *tio; 1304 int r; 1305 1306 tio = alloc_tio(ci, ti, 0, GFP_NOIO); 1307 tio->len_ptr = len; 1308 r = clone_bio(tio, bio, sector, *len); 1309 if (r < 0) { 1310 free_tio(tio); 1311 return r; 1312 } 1313 __map_bio(tio); 1314 1315 return 0; 1316 } 1317 1318 typedef unsigned (*get_num_bios_fn)(struct dm_target *ti); 1319 1320 static unsigned get_num_discard_bios(struct dm_target *ti) 1321 { 1322 return ti->num_discard_bios; 1323 } 1324 1325 static unsigned get_num_write_same_bios(struct dm_target *ti) 1326 { 1327 return ti->num_write_same_bios; 1328 } 1329 1330 static unsigned get_num_write_zeroes_bios(struct dm_target *ti) 1331 { 1332 return ti->num_write_zeroes_bios; 1333 } 1334 1335 typedef bool (*is_split_required_fn)(struct dm_target *ti); 1336 1337 static bool is_split_required_for_discard(struct dm_target *ti) 1338 { 1339 return ti->split_discard_bios; 1340 } 1341 1342 static int __send_changing_extent_only(struct clone_info *ci, struct dm_target *ti, 1343 get_num_bios_fn get_num_bios, 1344 is_split_required_fn is_split_required) 1345 { 1346 unsigned len; 1347 unsigned num_bios; 1348 1349 /* 1350 * Even though the device advertised support for this type of 1351 * request, that does not mean every target supports it, and 1352 * reconfiguration might also have changed that since the 1353 * check was performed. 1354 */ 1355 num_bios = get_num_bios ? get_num_bios(ti) : 0; 1356 if (!num_bios) 1357 return -EOPNOTSUPP; 1358 1359 if (is_split_required && !is_split_required(ti)) 1360 len = min((sector_t)ci->sector_count, max_io_len_target_boundary(ci->sector, ti)); 1361 else 1362 len = min((sector_t)ci->sector_count, max_io_len(ci->sector, ti)); 1363 1364 __send_duplicate_bios(ci, ti, num_bios, &len); 1365 1366 ci->sector += len; 1367 ci->sector_count -= len; 1368 1369 return 0; 1370 } 1371 1372 static int __send_discard(struct clone_info *ci, struct dm_target *ti) 1373 { 1374 return __send_changing_extent_only(ci, ti, get_num_discard_bios, 1375 is_split_required_for_discard); 1376 } 1377 1378 static int __send_write_same(struct clone_info *ci, struct dm_target *ti) 1379 { 1380 return __send_changing_extent_only(ci, ti, get_num_write_same_bios, NULL); 1381 } 1382 1383 static int __send_write_zeroes(struct clone_info *ci, struct dm_target *ti) 1384 { 1385 return __send_changing_extent_only(ci, ti, get_num_write_zeroes_bios, NULL); 1386 } 1387 1388 /* 1389 * Select the correct strategy for processing a non-flush bio. 1390 */ 1391 static int __split_and_process_non_flush(struct clone_info *ci) 1392 { 1393 struct bio *bio = ci->bio; 1394 struct dm_target *ti; 1395 unsigned len; 1396 int r; 1397 1398 ti = dm_table_find_target(ci->map, ci->sector); 1399 if (!dm_target_is_valid(ti)) 1400 return -EIO; 1401 1402 if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) 1403 return __send_discard(ci, ti); 1404 else if (unlikely(bio_op(bio) == REQ_OP_WRITE_SAME)) 1405 return __send_write_same(ci, ti); 1406 else if (unlikely(bio_op(bio) == REQ_OP_WRITE_ZEROES)) 1407 return __send_write_zeroes(ci, ti); 1408 1409 if (bio_op(bio) == REQ_OP_ZONE_REPORT) 1410 len = ci->sector_count; 1411 else 1412 len = min_t(sector_t, max_io_len(ci->sector, ti), 1413 ci->sector_count); 1414 1415 r = __clone_and_map_data_bio(ci, ti, ci->sector, &len); 1416 if (r < 0) 1417 return r; 1418 1419 ci->sector += len; 1420 ci->sector_count -= len; 1421 1422 return 0; 1423 } 1424 1425 /* 1426 * Entry point to split a bio into clones and submit them to the targets. 1427 */ 1428 static void __split_and_process_bio(struct mapped_device *md, 1429 struct dm_table *map, struct bio *bio) 1430 { 1431 struct clone_info ci; 1432 int error = 0; 1433 1434 if (unlikely(!map)) { 1435 bio_io_error(bio); 1436 return; 1437 } 1438 1439 ci.map = map; 1440 ci.md = md; 1441 ci.io = alloc_io(md); 1442 ci.io->status = 0; 1443 atomic_set(&ci.io->io_count, 1); 1444 ci.io->orig_bio = bio; 1445 ci.io->md = md; 1446 spin_lock_init(&ci.io->endio_lock); 1447 ci.sector = bio->bi_iter.bi_sector; 1448 1449 start_io_acct(ci.io); 1450 1451 if (bio->bi_opf & REQ_PREFLUSH) { 1452 ci.bio = &ci.md->flush_bio; 1453 ci.sector_count = 0; 1454 error = __send_empty_flush(&ci); 1455 /* dec_pending submits any data associated with flush */ 1456 } else if (bio_op(bio) == REQ_OP_ZONE_RESET) { 1457 ci.bio = bio; 1458 ci.sector_count = 0; 1459 error = __split_and_process_non_flush(&ci); 1460 } else { 1461 ci.bio = bio; 1462 ci.sector_count = bio_sectors(bio); 1463 while (ci.sector_count && !error) { 1464 error = __split_and_process_non_flush(&ci); 1465 if (current->bio_list && ci.sector_count && !error) { 1466 /* 1467 * Remainder must be passed to generic_make_request() 1468 * so that it gets handled *after* bios already submitted 1469 * have been completely processed. 1470 * We take a clone of the original to store in 1471 * ci.io->orig_bio to be used by end_io_acct() and 1472 * for dec_pending to use for completion handling. 1473 * As this path is not used for REQ_OP_ZONE_REPORT, 1474 * the usage of io->orig_bio in dm_remap_zone_report() 1475 * won't be affected by this reassignment. 1476 */ 1477 struct bio *b = bio_clone_bioset(bio, GFP_NOIO, 1478 md->queue->bio_split); 1479 ci.io->orig_bio = b; 1480 bio_advance(bio, (bio_sectors(bio) - ci.sector_count) << 9); 1481 bio_chain(b, bio); 1482 generic_make_request(bio); 1483 break; 1484 } 1485 } 1486 } 1487 1488 /* drop the extra reference count */ 1489 dec_pending(ci.io, errno_to_blk_status(error)); 1490 } 1491 1492 /* 1493 * The request function that remaps the bio to one target and 1494 * splits off any remainder. 1495 */ 1496 static blk_qc_t dm_make_request(struct request_queue *q, struct bio *bio) 1497 { 1498 int rw = bio_data_dir(bio); 1499 struct mapped_device *md = q->queuedata; 1500 int srcu_idx; 1501 struct dm_table *map; 1502 1503 map = dm_get_live_table(md, &srcu_idx); 1504 1505 generic_start_io_acct(q, rw, bio_sectors(bio), &dm_disk(md)->part0); 1506 1507 /* if we're suspended, we have to queue this io for later */ 1508 if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) { 1509 dm_put_live_table(md, srcu_idx); 1510 1511 if (!(bio->bi_opf & REQ_RAHEAD)) 1512 queue_io(md, bio); 1513 else 1514 bio_io_error(bio); 1515 return BLK_QC_T_NONE; 1516 } 1517 1518 __split_and_process_bio(md, map, bio); 1519 dm_put_live_table(md, srcu_idx); 1520 return BLK_QC_T_NONE; 1521 } 1522 1523 static int dm_any_congested(void *congested_data, int bdi_bits) 1524 { 1525 int r = bdi_bits; 1526 struct mapped_device *md = congested_data; 1527 struct dm_table *map; 1528 1529 if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { 1530 if (dm_request_based(md)) { 1531 /* 1532 * With request-based DM we only need to check the 1533 * top-level queue for congestion. 1534 */ 1535 r = md->queue->backing_dev_info->wb.state & bdi_bits; 1536 } else { 1537 map = dm_get_live_table_fast(md); 1538 if (map) 1539 r = dm_table_any_congested(map, bdi_bits); 1540 dm_put_live_table_fast(md); 1541 } 1542 } 1543 1544 return r; 1545 } 1546 1547 /*----------------------------------------------------------------- 1548 * An IDR is used to keep track of allocated minor numbers. 1549 *---------------------------------------------------------------*/ 1550 static void free_minor(int minor) 1551 { 1552 spin_lock(&_minor_lock); 1553 idr_remove(&_minor_idr, minor); 1554 spin_unlock(&_minor_lock); 1555 } 1556 1557 /* 1558 * See if the device with a specific minor # is free. 1559 */ 1560 static int specific_minor(int minor) 1561 { 1562 int r; 1563 1564 if (minor >= (1 << MINORBITS)) 1565 return -EINVAL; 1566 1567 idr_preload(GFP_KERNEL); 1568 spin_lock(&_minor_lock); 1569 1570 r = idr_alloc(&_minor_idr, MINOR_ALLOCED, minor, minor + 1, GFP_NOWAIT); 1571 1572 spin_unlock(&_minor_lock); 1573 idr_preload_end(); 1574 if (r < 0) 1575 return r == -ENOSPC ? -EBUSY : r; 1576 return 0; 1577 } 1578 1579 static int next_free_minor(int *minor) 1580 { 1581 int r; 1582 1583 idr_preload(GFP_KERNEL); 1584 spin_lock(&_minor_lock); 1585 1586 r = idr_alloc(&_minor_idr, MINOR_ALLOCED, 0, 1 << MINORBITS, GFP_NOWAIT); 1587 1588 spin_unlock(&_minor_lock); 1589 idr_preload_end(); 1590 if (r < 0) 1591 return r; 1592 *minor = r; 1593 return 0; 1594 } 1595 1596 static const struct block_device_operations dm_blk_dops; 1597 static const struct dax_operations dm_dax_ops; 1598 1599 static void dm_wq_work(struct work_struct *work); 1600 1601 void dm_init_md_queue(struct mapped_device *md) 1602 { 1603 /* 1604 * Initialize data that will only be used by a non-blk-mq DM queue 1605 * - must do so here (in alloc_dev callchain) before queue is used 1606 */ 1607 md->queue->queuedata = md; 1608 md->queue->backing_dev_info->congested_data = md; 1609 } 1610 1611 void dm_init_normal_md_queue(struct mapped_device *md) 1612 { 1613 md->use_blk_mq = false; 1614 dm_init_md_queue(md); 1615 1616 /* 1617 * Initialize aspects of queue that aren't relevant for blk-mq 1618 */ 1619 md->queue->backing_dev_info->congested_fn = dm_any_congested; 1620 } 1621 1622 static void cleanup_mapped_device(struct mapped_device *md) 1623 { 1624 if (md->wq) 1625 destroy_workqueue(md->wq); 1626 if (md->kworker_task) 1627 kthread_stop(md->kworker_task); 1628 mempool_destroy(md->io_pool); 1629 if (md->bs) 1630 bioset_free(md->bs); 1631 1632 if (md->dax_dev) { 1633 kill_dax(md->dax_dev); 1634 put_dax(md->dax_dev); 1635 md->dax_dev = NULL; 1636 } 1637 1638 if (md->disk) { 1639 spin_lock(&_minor_lock); 1640 md->disk->private_data = NULL; 1641 spin_unlock(&_minor_lock); 1642 del_gendisk(md->disk); 1643 put_disk(md->disk); 1644 } 1645 1646 if (md->queue) 1647 blk_cleanup_queue(md->queue); 1648 1649 cleanup_srcu_struct(&md->io_barrier); 1650 1651 if (md->bdev) { 1652 bdput(md->bdev); 1653 md->bdev = NULL; 1654 } 1655 1656 dm_mq_cleanup_mapped_device(md); 1657 } 1658 1659 /* 1660 * Allocate and initialise a blank device with a given minor. 1661 */ 1662 static struct mapped_device *alloc_dev(int minor) 1663 { 1664 int r, numa_node_id = dm_get_numa_node(); 1665 struct dax_device *dax_dev; 1666 struct mapped_device *md; 1667 void *old_md; 1668 1669 md = kvzalloc_node(sizeof(*md), GFP_KERNEL, numa_node_id); 1670 if (!md) { 1671 DMWARN("unable to allocate device, out of memory."); 1672 return NULL; 1673 } 1674 1675 if (!try_module_get(THIS_MODULE)) 1676 goto bad_module_get; 1677 1678 /* get a minor number for the dev */ 1679 if (minor == DM_ANY_MINOR) 1680 r = next_free_minor(&minor); 1681 else 1682 r = specific_minor(minor); 1683 if (r < 0) 1684 goto bad_minor; 1685 1686 r = init_srcu_struct(&md->io_barrier); 1687 if (r < 0) 1688 goto bad_io_barrier; 1689 1690 md->numa_node_id = numa_node_id; 1691 md->use_blk_mq = dm_use_blk_mq_default(); 1692 md->init_tio_pdu = false; 1693 md->type = DM_TYPE_NONE; 1694 mutex_init(&md->suspend_lock); 1695 mutex_init(&md->type_lock); 1696 mutex_init(&md->table_devices_lock); 1697 spin_lock_init(&md->deferred_lock); 1698 atomic_set(&md->holders, 1); 1699 atomic_set(&md->open_count, 0); 1700 atomic_set(&md->event_nr, 0); 1701 atomic_set(&md->uevent_seq, 0); 1702 INIT_LIST_HEAD(&md->uevent_list); 1703 INIT_LIST_HEAD(&md->table_devices); 1704 spin_lock_init(&md->uevent_lock); 1705 1706 md->queue = blk_alloc_queue_node(GFP_KERNEL, numa_node_id); 1707 if (!md->queue) 1708 goto bad; 1709 1710 dm_init_md_queue(md); 1711 1712 md->disk = alloc_disk_node(1, numa_node_id); 1713 if (!md->disk) 1714 goto bad; 1715 1716 atomic_set(&md->pending[0], 0); 1717 atomic_set(&md->pending[1], 0); 1718 init_waitqueue_head(&md->wait); 1719 INIT_WORK(&md->work, dm_wq_work); 1720 init_waitqueue_head(&md->eventq); 1721 init_completion(&md->kobj_holder.completion); 1722 md->kworker_task = NULL; 1723 1724 md->disk->major = _major; 1725 md->disk->first_minor = minor; 1726 md->disk->fops = &dm_blk_dops; 1727 md->disk->queue = md->queue; 1728 md->disk->private_data = md; 1729 sprintf(md->disk->disk_name, "dm-%d", minor); 1730 1731 dax_dev = alloc_dax(md, md->disk->disk_name, &dm_dax_ops); 1732 if (!dax_dev) 1733 goto bad; 1734 md->dax_dev = dax_dev; 1735 1736 add_disk(md->disk); 1737 format_dev_t(md->name, MKDEV(_major, minor)); 1738 1739 md->wq = alloc_workqueue("kdmflush", WQ_MEM_RECLAIM, 0); 1740 if (!md->wq) 1741 goto bad; 1742 1743 md->bdev = bdget_disk(md->disk, 0); 1744 if (!md->bdev) 1745 goto bad; 1746 1747 bio_init(&md->flush_bio, NULL, 0); 1748 bio_set_dev(&md->flush_bio, md->bdev); 1749 md->flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC; 1750 1751 dm_stats_init(&md->stats); 1752 1753 /* Populate the mapping, nobody knows we exist yet */ 1754 spin_lock(&_minor_lock); 1755 old_md = idr_replace(&_minor_idr, md, minor); 1756 spin_unlock(&_minor_lock); 1757 1758 BUG_ON(old_md != MINOR_ALLOCED); 1759 1760 return md; 1761 1762 bad: 1763 cleanup_mapped_device(md); 1764 bad_io_barrier: 1765 free_minor(minor); 1766 bad_minor: 1767 module_put(THIS_MODULE); 1768 bad_module_get: 1769 kvfree(md); 1770 return NULL; 1771 } 1772 1773 static void unlock_fs(struct mapped_device *md); 1774 1775 static void free_dev(struct mapped_device *md) 1776 { 1777 int minor = MINOR(disk_devt(md->disk)); 1778 1779 unlock_fs(md); 1780 1781 cleanup_mapped_device(md); 1782 1783 free_table_devices(&md->table_devices); 1784 dm_stats_cleanup(&md->stats); 1785 free_minor(minor); 1786 1787 module_put(THIS_MODULE); 1788 kvfree(md); 1789 } 1790 1791 static void __bind_mempools(struct mapped_device *md, struct dm_table *t) 1792 { 1793 struct dm_md_mempools *p = dm_table_get_md_mempools(t); 1794 1795 if (dm_table_bio_based(t)) { 1796 /* The md may already have mempools that need changing. */ 1797 if (md->bs) { 1798 /* 1799 * Reload bioset because front_pad may have changed 1800 * because a different table was loaded. 1801 */ 1802 bioset_free(md->bs); 1803 md->bs = NULL; 1804 } 1805 if (md->io_pool) { 1806 /* 1807 * Reload io_pool because pool_size may have changed 1808 * because a different table was loaded. 1809 */ 1810 mempool_destroy(md->io_pool); 1811 md->io_pool = NULL; 1812 } 1813 1814 } else if (md->bs) { 1815 /* 1816 * There's no need to reload with request-based dm 1817 * because the size of front_pad doesn't change. 1818 * Note for future: If you are to reload bioset, 1819 * prep-ed requests in the queue may refer 1820 * to bio from the old bioset, so you must walk 1821 * through the queue to unprep. 1822 */ 1823 goto out; 1824 } 1825 1826 BUG_ON(!p || md->io_pool || md->bs); 1827 1828 md->io_pool = p->io_pool; 1829 p->io_pool = NULL; 1830 md->bs = p->bs; 1831 p->bs = NULL; 1832 out: 1833 /* mempool bind completed, no longer need any mempools in the table */ 1834 dm_table_free_md_mempools(t); 1835 } 1836 1837 /* 1838 * Bind a table to the device. 1839 */ 1840 static void event_callback(void *context) 1841 { 1842 unsigned long flags; 1843 LIST_HEAD(uevents); 1844 struct mapped_device *md = (struct mapped_device *) context; 1845 1846 spin_lock_irqsave(&md->uevent_lock, flags); 1847 list_splice_init(&md->uevent_list, &uevents); 1848 spin_unlock_irqrestore(&md->uevent_lock, flags); 1849 1850 dm_send_uevents(&uevents, &disk_to_dev(md->disk)->kobj); 1851 1852 atomic_inc(&md->event_nr); 1853 wake_up(&md->eventq); 1854 dm_issue_global_event(); 1855 } 1856 1857 /* 1858 * Protected by md->suspend_lock obtained by dm_swap_table(). 1859 */ 1860 static void __set_size(struct mapped_device *md, sector_t size) 1861 { 1862 lockdep_assert_held(&md->suspend_lock); 1863 1864 set_capacity(md->disk, size); 1865 1866 i_size_write(md->bdev->bd_inode, (loff_t)size << SECTOR_SHIFT); 1867 } 1868 1869 /* 1870 * Returns old map, which caller must destroy. 1871 */ 1872 static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, 1873 struct queue_limits *limits) 1874 { 1875 struct dm_table *old_map; 1876 struct request_queue *q = md->queue; 1877 sector_t size; 1878 1879 lockdep_assert_held(&md->suspend_lock); 1880 1881 size = dm_table_get_size(t); 1882 1883 /* 1884 * Wipe any geometry if the size of the table changed. 1885 */ 1886 if (size != dm_get_size(md)) 1887 memset(&md->geometry, 0, sizeof(md->geometry)); 1888 1889 __set_size(md, size); 1890 1891 dm_table_event_callback(t, event_callback, md); 1892 1893 /* 1894 * The queue hasn't been stopped yet, if the old table type wasn't 1895 * for request-based during suspension. So stop it to prevent 1896 * I/O mapping before resume. 1897 * This must be done before setting the queue restrictions, 1898 * because request-based dm may be run just after the setting. 1899 */ 1900 if (dm_table_request_based(t)) { 1901 dm_stop_queue(q); 1902 /* 1903 * Leverage the fact that request-based DM targets are 1904 * immutable singletons and establish md->immutable_target 1905 * - used to optimize both dm_request_fn and dm_mq_queue_rq 1906 */ 1907 md->immutable_target = dm_table_get_immutable_target(t); 1908 } 1909 1910 __bind_mempools(md, t); 1911 1912 old_map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock)); 1913 rcu_assign_pointer(md->map, (void *)t); 1914 md->immutable_target_type = dm_table_get_immutable_target_type(t); 1915 1916 dm_table_set_restrictions(t, q, limits); 1917 if (old_map) 1918 dm_sync_table(md); 1919 1920 return old_map; 1921 } 1922 1923 /* 1924 * Returns unbound table for the caller to free. 1925 */ 1926 static struct dm_table *__unbind(struct mapped_device *md) 1927 { 1928 struct dm_table *map = rcu_dereference_protected(md->map, 1); 1929 1930 if (!map) 1931 return NULL; 1932 1933 dm_table_event_callback(map, NULL, NULL); 1934 RCU_INIT_POINTER(md->map, NULL); 1935 dm_sync_table(md); 1936 1937 return map; 1938 } 1939 1940 /* 1941 * Constructor for a new device. 1942 */ 1943 int dm_create(int minor, struct mapped_device **result) 1944 { 1945 struct mapped_device *md; 1946 1947 md = alloc_dev(minor); 1948 if (!md) 1949 return -ENXIO; 1950 1951 dm_sysfs_init(md); 1952 1953 *result = md; 1954 return 0; 1955 } 1956 1957 /* 1958 * Functions to manage md->type. 1959 * All are required to hold md->type_lock. 1960 */ 1961 void dm_lock_md_type(struct mapped_device *md) 1962 { 1963 mutex_lock(&md->type_lock); 1964 } 1965 1966 void dm_unlock_md_type(struct mapped_device *md) 1967 { 1968 mutex_unlock(&md->type_lock); 1969 } 1970 1971 void dm_set_md_type(struct mapped_device *md, enum dm_queue_mode type) 1972 { 1973 BUG_ON(!mutex_is_locked(&md->type_lock)); 1974 md->type = type; 1975 } 1976 1977 enum dm_queue_mode dm_get_md_type(struct mapped_device *md) 1978 { 1979 return md->type; 1980 } 1981 1982 struct target_type *dm_get_immutable_target_type(struct mapped_device *md) 1983 { 1984 return md->immutable_target_type; 1985 } 1986 1987 /* 1988 * The queue_limits are only valid as long as you have a reference 1989 * count on 'md'. 1990 */ 1991 struct queue_limits *dm_get_queue_limits(struct mapped_device *md) 1992 { 1993 BUG_ON(!atomic_read(&md->holders)); 1994 return &md->queue->limits; 1995 } 1996 EXPORT_SYMBOL_GPL(dm_get_queue_limits); 1997 1998 /* 1999 * Setup the DM device's queue based on md's type 2000 */ 2001 int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t) 2002 { 2003 int r; 2004 enum dm_queue_mode type = dm_get_md_type(md); 2005 2006 switch (type) { 2007 case DM_TYPE_REQUEST_BASED: 2008 r = dm_old_init_request_queue(md, t); 2009 if (r) { 2010 DMERR("Cannot initialize queue for request-based mapped device"); 2011 return r; 2012 } 2013 break; 2014 case DM_TYPE_MQ_REQUEST_BASED: 2015 r = dm_mq_init_request_queue(md, t); 2016 if (r) { 2017 DMERR("Cannot initialize queue for request-based dm-mq mapped device"); 2018 return r; 2019 } 2020 break; 2021 case DM_TYPE_BIO_BASED: 2022 case DM_TYPE_DAX_BIO_BASED: 2023 dm_init_normal_md_queue(md); 2024 blk_queue_make_request(md->queue, dm_make_request); 2025 break; 2026 case DM_TYPE_NONE: 2027 WARN_ON_ONCE(true); 2028 break; 2029 } 2030 2031 return 0; 2032 } 2033 2034 struct mapped_device *dm_get_md(dev_t dev) 2035 { 2036 struct mapped_device *md; 2037 unsigned minor = MINOR(dev); 2038 2039 if (MAJOR(dev) != _major || minor >= (1 << MINORBITS)) 2040 return NULL; 2041 2042 spin_lock(&_minor_lock); 2043 2044 md = idr_find(&_minor_idr, minor); 2045 if (!md || md == MINOR_ALLOCED || (MINOR(disk_devt(dm_disk(md))) != minor) || 2046 test_bit(DMF_FREEING, &md->flags) || dm_deleting_md(md)) { 2047 md = NULL; 2048 goto out; 2049 } 2050 dm_get(md); 2051 out: 2052 spin_unlock(&_minor_lock); 2053 2054 return md; 2055 } 2056 EXPORT_SYMBOL_GPL(dm_get_md); 2057 2058 void *dm_get_mdptr(struct mapped_device *md) 2059 { 2060 return md->interface_ptr; 2061 } 2062 2063 void dm_set_mdptr(struct mapped_device *md, void *ptr) 2064 { 2065 md->interface_ptr = ptr; 2066 } 2067 2068 void dm_get(struct mapped_device *md) 2069 { 2070 atomic_inc(&md->holders); 2071 BUG_ON(test_bit(DMF_FREEING, &md->flags)); 2072 } 2073 2074 int dm_hold(struct mapped_device *md) 2075 { 2076 spin_lock(&_minor_lock); 2077 if (test_bit(DMF_FREEING, &md->flags)) { 2078 spin_unlock(&_minor_lock); 2079 return -EBUSY; 2080 } 2081 dm_get(md); 2082 spin_unlock(&_minor_lock); 2083 return 0; 2084 } 2085 EXPORT_SYMBOL_GPL(dm_hold); 2086 2087 const char *dm_device_name(struct mapped_device *md) 2088 { 2089 return md->name; 2090 } 2091 EXPORT_SYMBOL_GPL(dm_device_name); 2092 2093 static void __dm_destroy(struct mapped_device *md, bool wait) 2094 { 2095 struct request_queue *q = dm_get_md_queue(md); 2096 struct dm_table *map; 2097 int srcu_idx; 2098 2099 might_sleep(); 2100 2101 spin_lock(&_minor_lock); 2102 idr_replace(&_minor_idr, MINOR_ALLOCED, MINOR(disk_devt(dm_disk(md)))); 2103 set_bit(DMF_FREEING, &md->flags); 2104 spin_unlock(&_minor_lock); 2105 2106 blk_set_queue_dying(q); 2107 2108 if (dm_request_based(md) && md->kworker_task) 2109 kthread_flush_worker(&md->kworker); 2110 2111 /* 2112 * Take suspend_lock so that presuspend and postsuspend methods 2113 * do not race with internal suspend. 2114 */ 2115 mutex_lock(&md->suspend_lock); 2116 map = dm_get_live_table(md, &srcu_idx); 2117 if (!dm_suspended_md(md)) { 2118 dm_table_presuspend_targets(map); 2119 dm_table_postsuspend_targets(map); 2120 } 2121 /* dm_put_live_table must be before msleep, otherwise deadlock is possible */ 2122 dm_put_live_table(md, srcu_idx); 2123 mutex_unlock(&md->suspend_lock); 2124 2125 /* 2126 * Rare, but there may be I/O requests still going to complete, 2127 * for example. Wait for all references to disappear. 2128 * No one should increment the reference count of the mapped_device, 2129 * after the mapped_device state becomes DMF_FREEING. 2130 */ 2131 if (wait) 2132 while (atomic_read(&md->holders)) 2133 msleep(1); 2134 else if (atomic_read(&md->holders)) 2135 DMWARN("%s: Forcibly removing mapped_device still in use! (%d users)", 2136 dm_device_name(md), atomic_read(&md->holders)); 2137 2138 dm_sysfs_exit(md); 2139 dm_table_destroy(__unbind(md)); 2140 free_dev(md); 2141 } 2142 2143 void dm_destroy(struct mapped_device *md) 2144 { 2145 __dm_destroy(md, true); 2146 } 2147 2148 void dm_destroy_immediate(struct mapped_device *md) 2149 { 2150 __dm_destroy(md, false); 2151 } 2152 2153 void dm_put(struct mapped_device *md) 2154 { 2155 atomic_dec(&md->holders); 2156 } 2157 EXPORT_SYMBOL_GPL(dm_put); 2158 2159 static int dm_wait_for_completion(struct mapped_device *md, long task_state) 2160 { 2161 int r = 0; 2162 DEFINE_WAIT(wait); 2163 2164 while (1) { 2165 prepare_to_wait(&md->wait, &wait, task_state); 2166 2167 if (!md_in_flight(md)) 2168 break; 2169 2170 if (signal_pending_state(task_state, current)) { 2171 r = -EINTR; 2172 break; 2173 } 2174 2175 io_schedule(); 2176 } 2177 finish_wait(&md->wait, &wait); 2178 2179 return r; 2180 } 2181 2182 /* 2183 * Process the deferred bios 2184 */ 2185 static void dm_wq_work(struct work_struct *work) 2186 { 2187 struct mapped_device *md = container_of(work, struct mapped_device, 2188 work); 2189 struct bio *c; 2190 int srcu_idx; 2191 struct dm_table *map; 2192 2193 map = dm_get_live_table(md, &srcu_idx); 2194 2195 while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { 2196 spin_lock_irq(&md->deferred_lock); 2197 c = bio_list_pop(&md->deferred); 2198 spin_unlock_irq(&md->deferred_lock); 2199 2200 if (!c) 2201 break; 2202 2203 if (dm_request_based(md)) 2204 generic_make_request(c); 2205 else 2206 __split_and_process_bio(md, map, c); 2207 } 2208 2209 dm_put_live_table(md, srcu_idx); 2210 } 2211 2212 static void dm_queue_flush(struct mapped_device *md) 2213 { 2214 clear_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); 2215 smp_mb__after_atomic(); 2216 queue_work(md->wq, &md->work); 2217 } 2218 2219 /* 2220 * Swap in a new table, returning the old one for the caller to destroy. 2221 */ 2222 struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table) 2223 { 2224 struct dm_table *live_map = NULL, *map = ERR_PTR(-EINVAL); 2225 struct queue_limits limits; 2226 int r; 2227 2228 mutex_lock(&md->suspend_lock); 2229 2230 /* device must be suspended */ 2231 if (!dm_suspended_md(md)) 2232 goto out; 2233 2234 /* 2235 * If the new table has no data devices, retain the existing limits. 2236 * This helps multipath with queue_if_no_path if all paths disappear, 2237 * then new I/O is queued based on these limits, and then some paths 2238 * reappear. 2239 */ 2240 if (dm_table_has_no_data_devices(table)) { 2241 live_map = dm_get_live_table_fast(md); 2242 if (live_map) 2243 limits = md->queue->limits; 2244 dm_put_live_table_fast(md); 2245 } 2246 2247 if (!live_map) { 2248 r = dm_calculate_queue_limits(table, &limits); 2249 if (r) { 2250 map = ERR_PTR(r); 2251 goto out; 2252 } 2253 } 2254 2255 map = __bind(md, table, &limits); 2256 dm_issue_global_event(); 2257 2258 out: 2259 mutex_unlock(&md->suspend_lock); 2260 return map; 2261 } 2262 2263 /* 2264 * Functions to lock and unlock any filesystem running on the 2265 * device. 2266 */ 2267 static int lock_fs(struct mapped_device *md) 2268 { 2269 int r; 2270 2271 WARN_ON(md->frozen_sb); 2272 2273 md->frozen_sb = freeze_bdev(md->bdev); 2274 if (IS_ERR(md->frozen_sb)) { 2275 r = PTR_ERR(md->frozen_sb); 2276 md->frozen_sb = NULL; 2277 return r; 2278 } 2279 2280 set_bit(DMF_FROZEN, &md->flags); 2281 2282 return 0; 2283 } 2284 2285 static void unlock_fs(struct mapped_device *md) 2286 { 2287 if (!test_bit(DMF_FROZEN, &md->flags)) 2288 return; 2289 2290 thaw_bdev(md->bdev, md->frozen_sb); 2291 md->frozen_sb = NULL; 2292 clear_bit(DMF_FROZEN, &md->flags); 2293 } 2294 2295 /* 2296 * @suspend_flags: DM_SUSPEND_LOCKFS_FLAG and/or DM_SUSPEND_NOFLUSH_FLAG 2297 * @task_state: e.g. TASK_INTERRUPTIBLE or TASK_UNINTERRUPTIBLE 2298 * @dmf_suspended_flag: DMF_SUSPENDED or DMF_SUSPENDED_INTERNALLY 2299 * 2300 * If __dm_suspend returns 0, the device is completely quiescent 2301 * now. There is no request-processing activity. All new requests 2302 * are being added to md->deferred list. 2303 */ 2304 static int __dm_suspend(struct mapped_device *md, struct dm_table *map, 2305 unsigned suspend_flags, long task_state, 2306 int dmf_suspended_flag) 2307 { 2308 bool do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG; 2309 bool noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG; 2310 int r; 2311 2312 lockdep_assert_held(&md->suspend_lock); 2313 2314 /* 2315 * DMF_NOFLUSH_SUSPENDING must be set before presuspend. 2316 * This flag is cleared before dm_suspend returns. 2317 */ 2318 if (noflush) 2319 set_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); 2320 else 2321 pr_debug("%s: suspending with flush\n", dm_device_name(md)); 2322 2323 /* 2324 * This gets reverted if there's an error later and the targets 2325 * provide the .presuspend_undo hook. 2326 */ 2327 dm_table_presuspend_targets(map); 2328 2329 /* 2330 * Flush I/O to the device. 2331 * Any I/O submitted after lock_fs() may not be flushed. 2332 * noflush takes precedence over do_lockfs. 2333 * (lock_fs() flushes I/Os and waits for them to complete.) 2334 */ 2335 if (!noflush && do_lockfs) { 2336 r = lock_fs(md); 2337 if (r) { 2338 dm_table_presuspend_undo_targets(map); 2339 return r; 2340 } 2341 } 2342 2343 /* 2344 * Here we must make sure that no processes are submitting requests 2345 * to target drivers i.e. no one may be executing 2346 * __split_and_process_bio. This is called from dm_request and 2347 * dm_wq_work. 2348 * 2349 * To get all processes out of __split_and_process_bio in dm_request, 2350 * we take the write lock. To prevent any process from reentering 2351 * __split_and_process_bio from dm_request and quiesce the thread 2352 * (dm_wq_work), we set BMF_BLOCK_IO_FOR_SUSPEND and call 2353 * flush_workqueue(md->wq). 2354 */ 2355 set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); 2356 if (map) 2357 synchronize_srcu(&md->io_barrier); 2358 2359 /* 2360 * Stop md->queue before flushing md->wq in case request-based 2361 * dm defers requests to md->wq from md->queue. 2362 */ 2363 if (dm_request_based(md)) { 2364 dm_stop_queue(md->queue); 2365 if (md->kworker_task) 2366 kthread_flush_worker(&md->kworker); 2367 } 2368 2369 flush_workqueue(md->wq); 2370 2371 /* 2372 * At this point no more requests are entering target request routines. 2373 * We call dm_wait_for_completion to wait for all existing requests 2374 * to finish. 2375 */ 2376 r = dm_wait_for_completion(md, task_state); 2377 if (!r) 2378 set_bit(dmf_suspended_flag, &md->flags); 2379 2380 if (noflush) 2381 clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); 2382 if (map) 2383 synchronize_srcu(&md->io_barrier); 2384 2385 /* were we interrupted ? */ 2386 if (r < 0) { 2387 dm_queue_flush(md); 2388 2389 if (dm_request_based(md)) 2390 dm_start_queue(md->queue); 2391 2392 unlock_fs(md); 2393 dm_table_presuspend_undo_targets(map); 2394 /* pushback list is already flushed, so skip flush */ 2395 } 2396 2397 return r; 2398 } 2399 2400 /* 2401 * We need to be able to change a mapping table under a mounted 2402 * filesystem. For example we might want to move some data in 2403 * the background. Before the table can be swapped with 2404 * dm_bind_table, dm_suspend must be called to flush any in 2405 * flight bios and ensure that any further io gets deferred. 2406 */ 2407 /* 2408 * Suspend mechanism in request-based dm. 2409 * 2410 * 1. Flush all I/Os by lock_fs() if needed. 2411 * 2. Stop dispatching any I/O by stopping the request_queue. 2412 * 3. Wait for all in-flight I/Os to be completed or requeued. 2413 * 2414 * To abort suspend, start the request_queue. 2415 */ 2416 int dm_suspend(struct mapped_device *md, unsigned suspend_flags) 2417 { 2418 struct dm_table *map = NULL; 2419 int r = 0; 2420 2421 retry: 2422 mutex_lock_nested(&md->suspend_lock, SINGLE_DEPTH_NESTING); 2423 2424 if (dm_suspended_md(md)) { 2425 r = -EINVAL; 2426 goto out_unlock; 2427 } 2428 2429 if (dm_suspended_internally_md(md)) { 2430 /* already internally suspended, wait for internal resume */ 2431 mutex_unlock(&md->suspend_lock); 2432 r = wait_on_bit(&md->flags, DMF_SUSPENDED_INTERNALLY, TASK_INTERRUPTIBLE); 2433 if (r) 2434 return r; 2435 goto retry; 2436 } 2437 2438 map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock)); 2439 2440 r = __dm_suspend(md, map, suspend_flags, TASK_INTERRUPTIBLE, DMF_SUSPENDED); 2441 if (r) 2442 goto out_unlock; 2443 2444 dm_table_postsuspend_targets(map); 2445 2446 out_unlock: 2447 mutex_unlock(&md->suspend_lock); 2448 return r; 2449 } 2450 2451 static int __dm_resume(struct mapped_device *md, struct dm_table *map) 2452 { 2453 if (map) { 2454 int r = dm_table_resume_targets(map); 2455 if (r) 2456 return r; 2457 } 2458 2459 dm_queue_flush(md); 2460 2461 /* 2462 * Flushing deferred I/Os must be done after targets are resumed 2463 * so that mapping of targets can work correctly. 2464 * Request-based dm is queueing the deferred I/Os in its request_queue. 2465 */ 2466 if (dm_request_based(md)) 2467 dm_start_queue(md->queue); 2468 2469 unlock_fs(md); 2470 2471 return 0; 2472 } 2473 2474 int dm_resume(struct mapped_device *md) 2475 { 2476 int r; 2477 struct dm_table *map = NULL; 2478 2479 retry: 2480 r = -EINVAL; 2481 mutex_lock_nested(&md->suspend_lock, SINGLE_DEPTH_NESTING); 2482 2483 if (!dm_suspended_md(md)) 2484 goto out; 2485 2486 if (dm_suspended_internally_md(md)) { 2487 /* already internally suspended, wait for internal resume */ 2488 mutex_unlock(&md->suspend_lock); 2489 r = wait_on_bit(&md->flags, DMF_SUSPENDED_INTERNALLY, TASK_INTERRUPTIBLE); 2490 if (r) 2491 return r; 2492 goto retry; 2493 } 2494 2495 map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock)); 2496 if (!map || !dm_table_get_size(map)) 2497 goto out; 2498 2499 r = __dm_resume(md, map); 2500 if (r) 2501 goto out; 2502 2503 clear_bit(DMF_SUSPENDED, &md->flags); 2504 out: 2505 mutex_unlock(&md->suspend_lock); 2506 2507 return r; 2508 } 2509 2510 /* 2511 * Internal suspend/resume works like userspace-driven suspend. It waits 2512 * until all bios finish and prevents issuing new bios to the target drivers. 2513 * It may be used only from the kernel. 2514 */ 2515 2516 static void __dm_internal_suspend(struct mapped_device *md, unsigned suspend_flags) 2517 { 2518 struct dm_table *map = NULL; 2519 2520 lockdep_assert_held(&md->suspend_lock); 2521 2522 if (md->internal_suspend_count++) 2523 return; /* nested internal suspend */ 2524 2525 if (dm_suspended_md(md)) { 2526 set_bit(DMF_SUSPENDED_INTERNALLY, &md->flags); 2527 return; /* nest suspend */ 2528 } 2529 2530 map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock)); 2531 2532 /* 2533 * Using TASK_UNINTERRUPTIBLE because only NOFLUSH internal suspend is 2534 * supported. Properly supporting a TASK_INTERRUPTIBLE internal suspend 2535 * would require changing .presuspend to return an error -- avoid this 2536 * until there is a need for more elaborate variants of internal suspend. 2537 */ 2538 (void) __dm_suspend(md, map, suspend_flags, TASK_UNINTERRUPTIBLE, 2539 DMF_SUSPENDED_INTERNALLY); 2540 2541 dm_table_postsuspend_targets(map); 2542 } 2543 2544 static void __dm_internal_resume(struct mapped_device *md) 2545 { 2546 BUG_ON(!md->internal_suspend_count); 2547 2548 if (--md->internal_suspend_count) 2549 return; /* resume from nested internal suspend */ 2550 2551 if (dm_suspended_md(md)) 2552 goto done; /* resume from nested suspend */ 2553 2554 /* 2555 * NOTE: existing callers don't need to call dm_table_resume_targets 2556 * (which may fail -- so best to avoid it for now by passing NULL map) 2557 */ 2558 (void) __dm_resume(md, NULL); 2559 2560 done: 2561 clear_bit(DMF_SUSPENDED_INTERNALLY, &md->flags); 2562 smp_mb__after_atomic(); 2563 wake_up_bit(&md->flags, DMF_SUSPENDED_INTERNALLY); 2564 } 2565 2566 void dm_internal_suspend_noflush(struct mapped_device *md) 2567 { 2568 mutex_lock(&md->suspend_lock); 2569 __dm_internal_suspend(md, DM_SUSPEND_NOFLUSH_FLAG); 2570 mutex_unlock(&md->suspend_lock); 2571 } 2572 EXPORT_SYMBOL_GPL(dm_internal_suspend_noflush); 2573 2574 void dm_internal_resume(struct mapped_device *md) 2575 { 2576 mutex_lock(&md->suspend_lock); 2577 __dm_internal_resume(md); 2578 mutex_unlock(&md->suspend_lock); 2579 } 2580 EXPORT_SYMBOL_GPL(dm_internal_resume); 2581 2582 /* 2583 * Fast variants of internal suspend/resume hold md->suspend_lock, 2584 * which prevents interaction with userspace-driven suspend. 2585 */ 2586 2587 void dm_internal_suspend_fast(struct mapped_device *md) 2588 { 2589 mutex_lock(&md->suspend_lock); 2590 if (dm_suspended_md(md) || dm_suspended_internally_md(md)) 2591 return; 2592 2593 set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); 2594 synchronize_srcu(&md->io_barrier); 2595 flush_workqueue(md->wq); 2596 dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); 2597 } 2598 EXPORT_SYMBOL_GPL(dm_internal_suspend_fast); 2599 2600 void dm_internal_resume_fast(struct mapped_device *md) 2601 { 2602 if (dm_suspended_md(md) || dm_suspended_internally_md(md)) 2603 goto done; 2604 2605 dm_queue_flush(md); 2606 2607 done: 2608 mutex_unlock(&md->suspend_lock); 2609 } 2610 EXPORT_SYMBOL_GPL(dm_internal_resume_fast); 2611 2612 /*----------------------------------------------------------------- 2613 * Event notification. 2614 *---------------------------------------------------------------*/ 2615 int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action, 2616 unsigned cookie) 2617 { 2618 char udev_cookie[DM_COOKIE_LENGTH]; 2619 char *envp[] = { udev_cookie, NULL }; 2620 2621 if (!cookie) 2622 return kobject_uevent(&disk_to_dev(md->disk)->kobj, action); 2623 else { 2624 snprintf(udev_cookie, DM_COOKIE_LENGTH, "%s=%u", 2625 DM_COOKIE_ENV_VAR_NAME, cookie); 2626 return kobject_uevent_env(&disk_to_dev(md->disk)->kobj, 2627 action, envp); 2628 } 2629 } 2630 2631 uint32_t dm_next_uevent_seq(struct mapped_device *md) 2632 { 2633 return atomic_add_return(1, &md->uevent_seq); 2634 } 2635 2636 uint32_t dm_get_event_nr(struct mapped_device *md) 2637 { 2638 return atomic_read(&md->event_nr); 2639 } 2640 2641 int dm_wait_event(struct mapped_device *md, int event_nr) 2642 { 2643 return wait_event_interruptible(md->eventq, 2644 (event_nr != atomic_read(&md->event_nr))); 2645 } 2646 2647 void dm_uevent_add(struct mapped_device *md, struct list_head *elist) 2648 { 2649 unsigned long flags; 2650 2651 spin_lock_irqsave(&md->uevent_lock, flags); 2652 list_add(elist, &md->uevent_list); 2653 spin_unlock_irqrestore(&md->uevent_lock, flags); 2654 } 2655 2656 /* 2657 * The gendisk is only valid as long as you have a reference 2658 * count on 'md'. 2659 */ 2660 struct gendisk *dm_disk(struct mapped_device *md) 2661 { 2662 return md->disk; 2663 } 2664 EXPORT_SYMBOL_GPL(dm_disk); 2665 2666 struct kobject *dm_kobject(struct mapped_device *md) 2667 { 2668 return &md->kobj_holder.kobj; 2669 } 2670 2671 struct mapped_device *dm_get_from_kobject(struct kobject *kobj) 2672 { 2673 struct mapped_device *md; 2674 2675 md = container_of(kobj, struct mapped_device, kobj_holder.kobj); 2676 2677 spin_lock(&_minor_lock); 2678 if (test_bit(DMF_FREEING, &md->flags) || dm_deleting_md(md)) { 2679 md = NULL; 2680 goto out; 2681 } 2682 dm_get(md); 2683 out: 2684 spin_unlock(&_minor_lock); 2685 2686 return md; 2687 } 2688 2689 int dm_suspended_md(struct mapped_device *md) 2690 { 2691 return test_bit(DMF_SUSPENDED, &md->flags); 2692 } 2693 2694 int dm_suspended_internally_md(struct mapped_device *md) 2695 { 2696 return test_bit(DMF_SUSPENDED_INTERNALLY, &md->flags); 2697 } 2698 2699 int dm_test_deferred_remove_flag(struct mapped_device *md) 2700 { 2701 return test_bit(DMF_DEFERRED_REMOVE, &md->flags); 2702 } 2703 2704 int dm_suspended(struct dm_target *ti) 2705 { 2706 return dm_suspended_md(dm_table_get_md(ti->table)); 2707 } 2708 EXPORT_SYMBOL_GPL(dm_suspended); 2709 2710 int dm_noflush_suspending(struct dm_target *ti) 2711 { 2712 return __noflush_suspending(dm_table_get_md(ti->table)); 2713 } 2714 EXPORT_SYMBOL_GPL(dm_noflush_suspending); 2715 2716 struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, enum dm_queue_mode type, 2717 unsigned integrity, unsigned per_io_data_size, 2718 unsigned min_pool_size) 2719 { 2720 struct dm_md_mempools *pools = kzalloc_node(sizeof(*pools), GFP_KERNEL, md->numa_node_id); 2721 unsigned int pool_size = 0; 2722 unsigned int front_pad; 2723 2724 if (!pools) 2725 return NULL; 2726 2727 switch (type) { 2728 case DM_TYPE_BIO_BASED: 2729 case DM_TYPE_DAX_BIO_BASED: 2730 pool_size = max(dm_get_reserved_bio_based_ios(), min_pool_size); 2731 front_pad = roundup(per_io_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone); 2732 pools->io_pool = mempool_create_slab_pool(pool_size, _io_cache); 2733 if (!pools->io_pool) 2734 goto out; 2735 break; 2736 case DM_TYPE_REQUEST_BASED: 2737 case DM_TYPE_MQ_REQUEST_BASED: 2738 pool_size = max(dm_get_reserved_rq_based_ios(), min_pool_size); 2739 front_pad = offsetof(struct dm_rq_clone_bio_info, clone); 2740 /* per_io_data_size is used for blk-mq pdu at queue allocation */ 2741 break; 2742 default: 2743 BUG(); 2744 } 2745 2746 pools->bs = bioset_create(pool_size, front_pad, 0); 2747 if (!pools->bs) 2748 goto out; 2749 2750 if (integrity && bioset_integrity_create(pools->bs, pool_size)) 2751 goto out; 2752 2753 return pools; 2754 2755 out: 2756 dm_free_md_mempools(pools); 2757 2758 return NULL; 2759 } 2760 2761 void dm_free_md_mempools(struct dm_md_mempools *pools) 2762 { 2763 if (!pools) 2764 return; 2765 2766 mempool_destroy(pools->io_pool); 2767 2768 if (pools->bs) 2769 bioset_free(pools->bs); 2770 2771 kfree(pools); 2772 } 2773 2774 struct dm_pr { 2775 u64 old_key; 2776 u64 new_key; 2777 u32 flags; 2778 bool fail_early; 2779 }; 2780 2781 static int dm_call_pr(struct block_device *bdev, iterate_devices_callout_fn fn, 2782 void *data) 2783 { 2784 struct mapped_device *md = bdev->bd_disk->private_data; 2785 struct dm_table *table; 2786 struct dm_target *ti; 2787 int ret = -ENOTTY, srcu_idx; 2788 2789 table = dm_get_live_table(md, &srcu_idx); 2790 if (!table || !dm_table_get_size(table)) 2791 goto out; 2792 2793 /* We only support devices that have a single target */ 2794 if (dm_table_get_num_targets(table) != 1) 2795 goto out; 2796 ti = dm_table_get_target(table, 0); 2797 2798 ret = -EINVAL; 2799 if (!ti->type->iterate_devices) 2800 goto out; 2801 2802 ret = ti->type->iterate_devices(ti, fn, data); 2803 out: 2804 dm_put_live_table(md, srcu_idx); 2805 return ret; 2806 } 2807 2808 /* 2809 * For register / unregister we need to manually call out to every path. 2810 */ 2811 static int __dm_pr_register(struct dm_target *ti, struct dm_dev *dev, 2812 sector_t start, sector_t len, void *data) 2813 { 2814 struct dm_pr *pr = data; 2815 const struct pr_ops *ops = dev->bdev->bd_disk->fops->pr_ops; 2816 2817 if (!ops || !ops->pr_register) 2818 return -EOPNOTSUPP; 2819 return ops->pr_register(dev->bdev, pr->old_key, pr->new_key, pr->flags); 2820 } 2821 2822 static int dm_pr_register(struct block_device *bdev, u64 old_key, u64 new_key, 2823 u32 flags) 2824 { 2825 struct dm_pr pr = { 2826 .old_key = old_key, 2827 .new_key = new_key, 2828 .flags = flags, 2829 .fail_early = true, 2830 }; 2831 int ret; 2832 2833 ret = dm_call_pr(bdev, __dm_pr_register, &pr); 2834 if (ret && new_key) { 2835 /* unregister all paths if we failed to register any path */ 2836 pr.old_key = new_key; 2837 pr.new_key = 0; 2838 pr.flags = 0; 2839 pr.fail_early = false; 2840 dm_call_pr(bdev, __dm_pr_register, &pr); 2841 } 2842 2843 return ret; 2844 } 2845 2846 static int dm_pr_reserve(struct block_device *bdev, u64 key, enum pr_type type, 2847 u32 flags) 2848 { 2849 struct mapped_device *md = bdev->bd_disk->private_data; 2850 const struct pr_ops *ops; 2851 fmode_t mode; 2852 int r; 2853 2854 r = dm_grab_bdev_for_ioctl(md, &bdev, &mode); 2855 if (r < 0) 2856 return r; 2857 2858 ops = bdev->bd_disk->fops->pr_ops; 2859 if (ops && ops->pr_reserve) 2860 r = ops->pr_reserve(bdev, key, type, flags); 2861 else 2862 r = -EOPNOTSUPP; 2863 2864 bdput(bdev); 2865 return r; 2866 } 2867 2868 static int dm_pr_release(struct block_device *bdev, u64 key, enum pr_type type) 2869 { 2870 struct mapped_device *md = bdev->bd_disk->private_data; 2871 const struct pr_ops *ops; 2872 fmode_t mode; 2873 int r; 2874 2875 r = dm_grab_bdev_for_ioctl(md, &bdev, &mode); 2876 if (r < 0) 2877 return r; 2878 2879 ops = bdev->bd_disk->fops->pr_ops; 2880 if (ops && ops->pr_release) 2881 r = ops->pr_release(bdev, key, type); 2882 else 2883 r = -EOPNOTSUPP; 2884 2885 bdput(bdev); 2886 return r; 2887 } 2888 2889 static int dm_pr_preempt(struct block_device *bdev, u64 old_key, u64 new_key, 2890 enum pr_type type, bool abort) 2891 { 2892 struct mapped_device *md = bdev->bd_disk->private_data; 2893 const struct pr_ops *ops; 2894 fmode_t mode; 2895 int r; 2896 2897 r = dm_grab_bdev_for_ioctl(md, &bdev, &mode); 2898 if (r < 0) 2899 return r; 2900 2901 ops = bdev->bd_disk->fops->pr_ops; 2902 if (ops && ops->pr_preempt) 2903 r = ops->pr_preempt(bdev, old_key, new_key, type, abort); 2904 else 2905 r = -EOPNOTSUPP; 2906 2907 bdput(bdev); 2908 return r; 2909 } 2910 2911 static int dm_pr_clear(struct block_device *bdev, u64 key) 2912 { 2913 struct mapped_device *md = bdev->bd_disk->private_data; 2914 const struct pr_ops *ops; 2915 fmode_t mode; 2916 int r; 2917 2918 r = dm_grab_bdev_for_ioctl(md, &bdev, &mode); 2919 if (r < 0) 2920 return r; 2921 2922 ops = bdev->bd_disk->fops->pr_ops; 2923 if (ops && ops->pr_clear) 2924 r = ops->pr_clear(bdev, key); 2925 else 2926 r = -EOPNOTSUPP; 2927 2928 bdput(bdev); 2929 return r; 2930 } 2931 2932 static const struct pr_ops dm_pr_ops = { 2933 .pr_register = dm_pr_register, 2934 .pr_reserve = dm_pr_reserve, 2935 .pr_release = dm_pr_release, 2936 .pr_preempt = dm_pr_preempt, 2937 .pr_clear = dm_pr_clear, 2938 }; 2939 2940 static const struct block_device_operations dm_blk_dops = { 2941 .open = dm_blk_open, 2942 .release = dm_blk_close, 2943 .ioctl = dm_blk_ioctl, 2944 .getgeo = dm_blk_getgeo, 2945 .pr_ops = &dm_pr_ops, 2946 .owner = THIS_MODULE 2947 }; 2948 2949 static const struct dax_operations dm_dax_ops = { 2950 .direct_access = dm_dax_direct_access, 2951 .copy_from_iter = dm_dax_copy_from_iter, 2952 }; 2953 2954 /* 2955 * module hooks 2956 */ 2957 module_init(dm_init); 2958 module_exit(dm_exit); 2959 2960 module_param(major, uint, 0); 2961 MODULE_PARM_DESC(major, "The major number of the device mapper"); 2962 2963 module_param(reserved_bio_based_ios, uint, S_IRUGO | S_IWUSR); 2964 MODULE_PARM_DESC(reserved_bio_based_ios, "Reserved IOs in bio-based mempools"); 2965 2966 module_param(dm_numa_node, int, S_IRUGO | S_IWUSR); 2967 MODULE_PARM_DESC(dm_numa_node, "NUMA node for DM device memory allocations"); 2968 2969 MODULE_DESCRIPTION(DM_NAME " driver"); 2970 MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>"); 2971 MODULE_LICENSE("GPL"); 2972