11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * Copyright (C) 2001, 2002 Sistina Software (UK) Limited. 3784aae73SMilan Broz * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. 41da177e4SLinus Torvalds * 51da177e4SLinus Torvalds * This file is released under the GPL. 61da177e4SLinus Torvalds */ 71da177e4SLinus Torvalds 81da177e4SLinus Torvalds #include "dm.h" 951e5b2bdSMike Anderson #include "dm-uevent.h" 101da177e4SLinus Torvalds 111da177e4SLinus Torvalds #include <linux/init.h> 121da177e4SLinus Torvalds #include <linux/module.h> 1348c9c27bSArjan van de Ven #include <linux/mutex.h> 141da177e4SLinus Torvalds #include <linux/moduleparam.h> 151da177e4SLinus Torvalds #include <linux/blkpg.h> 161da177e4SLinus Torvalds #include <linux/bio.h> 171da177e4SLinus Torvalds #include <linux/mempool.h> 181da177e4SLinus Torvalds #include <linux/slab.h> 191da177e4SLinus Torvalds #include <linux/idr.h> 203ac51e74SDarrick J. Wong #include <linux/hdreg.h> 213f77316dSKiyoshi Ueda #include <linux/delay.h> 2255782138SLi Zefan 2355782138SLi Zefan #include <trace/events/block.h> 241da177e4SLinus Torvalds 2572d94861SAlasdair G Kergon #define DM_MSG_PREFIX "core" 2672d94861SAlasdair G Kergon 2771a16736SNamhyung Kim #ifdef CONFIG_PRINTK 2871a16736SNamhyung Kim /* 2971a16736SNamhyung Kim * ratelimit state to be used in DMXXX_LIMIT(). 3071a16736SNamhyung Kim */ 3171a16736SNamhyung Kim DEFINE_RATELIMIT_STATE(dm_ratelimit_state, 3271a16736SNamhyung Kim DEFAULT_RATELIMIT_INTERVAL, 3371a16736SNamhyung Kim DEFAULT_RATELIMIT_BURST); 3471a16736SNamhyung Kim EXPORT_SYMBOL(dm_ratelimit_state); 3571a16736SNamhyung Kim #endif 3671a16736SNamhyung Kim 3760935eb2SMilan Broz /* 3860935eb2SMilan Broz * Cookies are numeric values sent with CHANGE and REMOVE 3960935eb2SMilan Broz * uevents while resuming, removing or renaming the device. 4060935eb2SMilan Broz */ 4160935eb2SMilan Broz #define DM_COOKIE_ENV_VAR_NAME "DM_COOKIE" 4260935eb2SMilan Broz #define DM_COOKIE_LENGTH 24 4360935eb2SMilan Broz 441da177e4SLinus Torvalds static const char *_name = DM_NAME; 451da177e4SLinus Torvalds 461da177e4SLinus Torvalds static unsigned int major = 0; 471da177e4SLinus Torvalds static unsigned int _major = 0; 481da177e4SLinus Torvalds 49d15b774cSAlasdair G Kergon static DEFINE_IDR(_minor_idr); 50d15b774cSAlasdair G Kergon 51f32c10b0SJeff Mahoney static DEFINE_SPINLOCK(_minor_lock); 521da177e4SLinus Torvalds /* 538fbf26adSKiyoshi Ueda * For bio-based dm. 541da177e4SLinus Torvalds * One of these is allocated per bio. 551da177e4SLinus Torvalds */ 561da177e4SLinus Torvalds struct dm_io { 571da177e4SLinus Torvalds struct mapped_device *md; 581da177e4SLinus Torvalds int error; 591da177e4SLinus Torvalds atomic_t io_count; 606ae2fa67SRichard Kennedy struct bio *bio; 613eaf840eSJun'ichi "Nick" Nomura unsigned long start_time; 62f88fb981SKiyoshi Ueda spinlock_t endio_lock; 63fd2ed4d2SMikulas Patocka struct dm_stats_aux stats_aux; 641da177e4SLinus Torvalds }; 651da177e4SLinus Torvalds 661da177e4SLinus Torvalds /* 678fbf26adSKiyoshi Ueda * For request-based dm. 688fbf26adSKiyoshi Ueda * One of these is allocated per request. 698fbf26adSKiyoshi Ueda */ 708fbf26adSKiyoshi Ueda struct dm_rq_target_io { 718fbf26adSKiyoshi Ueda struct mapped_device *md; 728fbf26adSKiyoshi Ueda struct dm_target *ti; 738fbf26adSKiyoshi Ueda struct request *orig, clone; 748fbf26adSKiyoshi Ueda int error; 758fbf26adSKiyoshi Ueda union map_info info; 768fbf26adSKiyoshi Ueda }; 778fbf26adSKiyoshi Ueda 788fbf26adSKiyoshi Ueda /* 7994818742SKent Overstreet * For request-based dm - the bio clones we allocate are embedded in these 8094818742SKent Overstreet * structs. 8194818742SKent Overstreet * 8294818742SKent Overstreet * We allocate these with bio_alloc_bioset, using the front_pad parameter when 8394818742SKent Overstreet * the bioset is created - this means the bio has to come at the end of the 8494818742SKent Overstreet * struct. 858fbf26adSKiyoshi Ueda */ 868fbf26adSKiyoshi Ueda struct dm_rq_clone_bio_info { 878fbf26adSKiyoshi Ueda struct bio *orig; 88cec47e3dSKiyoshi Ueda struct dm_rq_target_io *tio; 8994818742SKent Overstreet struct bio clone; 908fbf26adSKiyoshi Ueda }; 918fbf26adSKiyoshi Ueda 921da177e4SLinus Torvalds union map_info *dm_get_mapinfo(struct bio *bio) 931da177e4SLinus Torvalds { 941da177e4SLinus Torvalds if (bio && bio->bi_private) 95028867acSAlasdair G Kergon return &((struct dm_target_io *)bio->bi_private)->info; 961da177e4SLinus Torvalds return NULL; 971da177e4SLinus Torvalds } 981da177e4SLinus Torvalds 99cec47e3dSKiyoshi Ueda union map_info *dm_get_rq_mapinfo(struct request *rq) 100cec47e3dSKiyoshi Ueda { 101cec47e3dSKiyoshi Ueda if (rq && rq->end_io_data) 102cec47e3dSKiyoshi Ueda return &((struct dm_rq_target_io *)rq->end_io_data)->info; 103cec47e3dSKiyoshi Ueda return NULL; 104cec47e3dSKiyoshi Ueda } 105cec47e3dSKiyoshi Ueda EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo); 106cec47e3dSKiyoshi Ueda 107ba61fdd1SJeff Mahoney #define MINOR_ALLOCED ((void *)-1) 108ba61fdd1SJeff Mahoney 1091da177e4SLinus Torvalds /* 1101da177e4SLinus Torvalds * Bits for the md->flags field. 1111da177e4SLinus Torvalds */ 1121eb787ecSAlasdair G Kergon #define DMF_BLOCK_IO_FOR_SUSPEND 0 1131da177e4SLinus Torvalds #define DMF_SUSPENDED 1 114aa8d7c2fSAlasdair G Kergon #define DMF_FROZEN 2 115fba9f90eSJeff Mahoney #define DMF_FREEING 3 1165c6bd75dSAlasdair G Kergon #define DMF_DELETING 4 1172e93ccc1SKiyoshi Ueda #define DMF_NOFLUSH_SUSPENDING 5 118d5b9dd04SMikulas Patocka #define DMF_MERGE_IS_OPTIONAL 6 1191da177e4SLinus Torvalds 120304f3f6aSMilan Broz /* 12183d5e5b0SMikulas Patocka * A dummy definition to make RCU happy. 12283d5e5b0SMikulas Patocka * struct dm_table should never be dereferenced in this file. 12383d5e5b0SMikulas Patocka */ 12483d5e5b0SMikulas Patocka struct dm_table { 12583d5e5b0SMikulas Patocka int undefined__; 12683d5e5b0SMikulas Patocka }; 12783d5e5b0SMikulas Patocka 12883d5e5b0SMikulas Patocka /* 129304f3f6aSMilan Broz * Work processed by per-device workqueue. 130304f3f6aSMilan Broz */ 1311da177e4SLinus Torvalds struct mapped_device { 13283d5e5b0SMikulas Patocka struct srcu_struct io_barrier; 133e61290a4SDaniel Walker struct mutex suspend_lock; 1341da177e4SLinus Torvalds atomic_t holders; 1355c6bd75dSAlasdair G Kergon atomic_t open_count; 1361da177e4SLinus Torvalds 1372a7faeb1SMikulas Patocka /* 1382a7faeb1SMikulas Patocka * The current mapping. 1392a7faeb1SMikulas Patocka * Use dm_get_live_table{_fast} or take suspend_lock for 1402a7faeb1SMikulas Patocka * dereference. 1412a7faeb1SMikulas Patocka */ 1422a7faeb1SMikulas Patocka struct dm_table *map; 1432a7faeb1SMikulas Patocka 1441da177e4SLinus Torvalds unsigned long flags; 1451da177e4SLinus Torvalds 146165125e1SJens Axboe struct request_queue *queue; 147a5664dadSMike Snitzer unsigned type; 1484a0b4ddfSMike Snitzer /* Protect queue and type against concurrent access. */ 149a5664dadSMike Snitzer struct mutex type_lock; 150a5664dadSMike Snitzer 15136a0456fSAlasdair G Kergon struct target_type *immutable_target_type; 15236a0456fSAlasdair G Kergon 1531da177e4SLinus Torvalds struct gendisk *disk; 1547e51f257SMike Anderson char name[16]; 1551da177e4SLinus Torvalds 1561da177e4SLinus Torvalds void *interface_ptr; 1571da177e4SLinus Torvalds 1581da177e4SLinus Torvalds /* 1591da177e4SLinus Torvalds * A list of ios that arrived while we were suspended. 1601da177e4SLinus Torvalds */ 161316d315bSNikanth Karthikesan atomic_t pending[2]; 1621da177e4SLinus Torvalds wait_queue_head_t wait; 16353d5914fSMikulas Patocka struct work_struct work; 1641da177e4SLinus Torvalds struct bio_list deferred; 165022c2611SMikulas Patocka spinlock_t deferred_lock; 1661da177e4SLinus Torvalds 1671da177e4SLinus Torvalds /* 16829e4013dSTejun Heo * Processing queue (flush) 169304f3f6aSMilan Broz */ 170304f3f6aSMilan Broz struct workqueue_struct *wq; 171304f3f6aSMilan Broz 172304f3f6aSMilan Broz /* 1731da177e4SLinus Torvalds * io objects are allocated from here. 1741da177e4SLinus Torvalds */ 1751da177e4SLinus Torvalds mempool_t *io_pool; 1761da177e4SLinus Torvalds 1779faf400fSStefan Bader struct bio_set *bs; 1789faf400fSStefan Bader 1791da177e4SLinus Torvalds /* 1801da177e4SLinus Torvalds * Event handling. 1811da177e4SLinus Torvalds */ 1821da177e4SLinus Torvalds atomic_t event_nr; 1831da177e4SLinus Torvalds wait_queue_head_t eventq; 1847a8c3d3bSMike Anderson atomic_t uevent_seq; 1857a8c3d3bSMike Anderson struct list_head uevent_list; 1867a8c3d3bSMike Anderson spinlock_t uevent_lock; /* Protect access to uevent_list */ 1871da177e4SLinus Torvalds 1881da177e4SLinus Torvalds /* 1891da177e4SLinus Torvalds * freeze/thaw support require holding onto a super block 1901da177e4SLinus Torvalds */ 1911da177e4SLinus Torvalds struct super_block *frozen_sb; 192db8fef4fSMikulas Patocka struct block_device *bdev; 1933ac51e74SDarrick J. Wong 1943ac51e74SDarrick J. Wong /* forced geometry settings */ 1953ac51e74SDarrick J. Wong struct hd_geometry geometry; 196784aae73SMilan Broz 197784aae73SMilan Broz /* sysfs handle */ 198784aae73SMilan Broz struct kobject kobj; 19952b1fd5aSMikulas Patocka 200d87f4c14STejun Heo /* zero-length flush that will be cloned and submitted to targets */ 201d87f4c14STejun Heo struct bio flush_bio; 202fd2ed4d2SMikulas Patocka 203fd2ed4d2SMikulas Patocka struct dm_stats stats; 2041da177e4SLinus Torvalds }; 2051da177e4SLinus Torvalds 206e6ee8c0bSKiyoshi Ueda /* 207e6ee8c0bSKiyoshi Ueda * For mempools pre-allocation at the table loading time. 208e6ee8c0bSKiyoshi Ueda */ 209e6ee8c0bSKiyoshi Ueda struct dm_md_mempools { 210e6ee8c0bSKiyoshi Ueda mempool_t *io_pool; 211e6ee8c0bSKiyoshi Ueda struct bio_set *bs; 212e6ee8c0bSKiyoshi Ueda }; 213e6ee8c0bSKiyoshi Ueda 2146cfa5857SMike Snitzer #define RESERVED_BIO_BASED_IOS 16 2156cfa5857SMike Snitzer #define RESERVED_REQUEST_BASED_IOS 256 216*f4790826SMike Snitzer #define RESERVED_MAX_IOS 1024 217e18b890bSChristoph Lameter static struct kmem_cache *_io_cache; 2188fbf26adSKiyoshi Ueda static struct kmem_cache *_rq_tio_cache; 21994818742SKent Overstreet 220*f4790826SMike Snitzer /* 221*f4790826SMike Snitzer * Request-based DM's mempools' reserved IOs set by the user. 222*f4790826SMike Snitzer */ 223*f4790826SMike Snitzer static unsigned reserved_rq_based_ios = RESERVED_REQUEST_BASED_IOS; 224*f4790826SMike Snitzer 225*f4790826SMike Snitzer static unsigned __dm_get_reserved_ios(unsigned *reserved_ios, 226*f4790826SMike Snitzer unsigned def, unsigned max) 227*f4790826SMike Snitzer { 228*f4790826SMike Snitzer unsigned ios = ACCESS_ONCE(*reserved_ios); 229*f4790826SMike Snitzer unsigned modified_ios = 0; 230*f4790826SMike Snitzer 231*f4790826SMike Snitzer if (!ios) 232*f4790826SMike Snitzer modified_ios = def; 233*f4790826SMike Snitzer else if (ios > max) 234*f4790826SMike Snitzer modified_ios = max; 235*f4790826SMike Snitzer 236*f4790826SMike Snitzer if (modified_ios) { 237*f4790826SMike Snitzer (void)cmpxchg(reserved_ios, ios, modified_ios); 238*f4790826SMike Snitzer ios = modified_ios; 239*f4790826SMike Snitzer } 240*f4790826SMike Snitzer 241*f4790826SMike Snitzer return ios; 242*f4790826SMike Snitzer } 243*f4790826SMike Snitzer 244*f4790826SMike Snitzer unsigned dm_get_reserved_rq_based_ios(void) 245*f4790826SMike Snitzer { 246*f4790826SMike Snitzer return __dm_get_reserved_ios(&reserved_rq_based_ios, 247*f4790826SMike Snitzer RESERVED_REQUEST_BASED_IOS, RESERVED_MAX_IOS); 248*f4790826SMike Snitzer } 249*f4790826SMike Snitzer EXPORT_SYMBOL_GPL(dm_get_reserved_rq_based_ios); 250*f4790826SMike Snitzer 2511da177e4SLinus Torvalds static int __init local_init(void) 2521da177e4SLinus Torvalds { 25351157b4aSKiyoshi Ueda int r = -ENOMEM; 2541da177e4SLinus Torvalds 2551da177e4SLinus Torvalds /* allocate a slab for the dm_ios */ 256028867acSAlasdair G Kergon _io_cache = KMEM_CACHE(dm_io, 0); 2571da177e4SLinus Torvalds if (!_io_cache) 25851157b4aSKiyoshi Ueda return r; 2591da177e4SLinus Torvalds 2608fbf26adSKiyoshi Ueda _rq_tio_cache = KMEM_CACHE(dm_rq_target_io, 0); 2618fbf26adSKiyoshi Ueda if (!_rq_tio_cache) 262dba14160SMikulas Patocka goto out_free_io_cache; 2638fbf26adSKiyoshi Ueda 26451e5b2bdSMike Anderson r = dm_uevent_init(); 26551157b4aSKiyoshi Ueda if (r) 26623e5083bSJun'ichi Nomura goto out_free_rq_tio_cache; 26751e5b2bdSMike Anderson 2681da177e4SLinus Torvalds _major = major; 2691da177e4SLinus Torvalds r = register_blkdev(_major, _name); 27051157b4aSKiyoshi Ueda if (r < 0) 27151157b4aSKiyoshi Ueda goto out_uevent_exit; 2721da177e4SLinus Torvalds 2731da177e4SLinus Torvalds if (!_major) 2741da177e4SLinus Torvalds _major = r; 2751da177e4SLinus Torvalds 2761da177e4SLinus Torvalds return 0; 27751157b4aSKiyoshi Ueda 27851157b4aSKiyoshi Ueda out_uevent_exit: 27951157b4aSKiyoshi Ueda dm_uevent_exit(); 2808fbf26adSKiyoshi Ueda out_free_rq_tio_cache: 2818fbf26adSKiyoshi Ueda kmem_cache_destroy(_rq_tio_cache); 28251157b4aSKiyoshi Ueda out_free_io_cache: 28351157b4aSKiyoshi Ueda kmem_cache_destroy(_io_cache); 28451157b4aSKiyoshi Ueda 28551157b4aSKiyoshi Ueda return r; 2861da177e4SLinus Torvalds } 2871da177e4SLinus Torvalds 2881da177e4SLinus Torvalds static void local_exit(void) 2891da177e4SLinus Torvalds { 2908fbf26adSKiyoshi Ueda kmem_cache_destroy(_rq_tio_cache); 2911da177e4SLinus Torvalds kmem_cache_destroy(_io_cache); 29200d59405SAkinobu Mita unregister_blkdev(_major, _name); 29351e5b2bdSMike Anderson dm_uevent_exit(); 2941da177e4SLinus Torvalds 2951da177e4SLinus Torvalds _major = 0; 2961da177e4SLinus Torvalds 2971da177e4SLinus Torvalds DMINFO("cleaned up"); 2981da177e4SLinus Torvalds } 2991da177e4SLinus Torvalds 300b9249e55SAlasdair G Kergon static int (*_inits[])(void) __initdata = { 3011da177e4SLinus Torvalds local_init, 3021da177e4SLinus Torvalds dm_target_init, 3031da177e4SLinus Torvalds dm_linear_init, 3041da177e4SLinus Torvalds dm_stripe_init, 305952b3557SMikulas Patocka dm_io_init, 306945fa4d2SMikulas Patocka dm_kcopyd_init, 3071da177e4SLinus Torvalds dm_interface_init, 308fd2ed4d2SMikulas Patocka dm_statistics_init, 3091da177e4SLinus Torvalds }; 3101da177e4SLinus Torvalds 311b9249e55SAlasdair G Kergon static void (*_exits[])(void) = { 3121da177e4SLinus Torvalds local_exit, 3131da177e4SLinus Torvalds dm_target_exit, 3141da177e4SLinus Torvalds dm_linear_exit, 3151da177e4SLinus Torvalds dm_stripe_exit, 316952b3557SMikulas Patocka dm_io_exit, 317945fa4d2SMikulas Patocka dm_kcopyd_exit, 3181da177e4SLinus Torvalds dm_interface_exit, 319fd2ed4d2SMikulas Patocka dm_statistics_exit, 3201da177e4SLinus Torvalds }; 3211da177e4SLinus Torvalds 3221da177e4SLinus Torvalds static int __init dm_init(void) 3231da177e4SLinus Torvalds { 3241da177e4SLinus Torvalds const int count = ARRAY_SIZE(_inits); 3251da177e4SLinus Torvalds 3261da177e4SLinus Torvalds int r, i; 3271da177e4SLinus Torvalds 3281da177e4SLinus Torvalds for (i = 0; i < count; i++) { 3291da177e4SLinus Torvalds r = _inits[i](); 3301da177e4SLinus Torvalds if (r) 3311da177e4SLinus Torvalds goto bad; 3321da177e4SLinus Torvalds } 3331da177e4SLinus Torvalds 3341da177e4SLinus Torvalds return 0; 3351da177e4SLinus Torvalds 3361da177e4SLinus Torvalds bad: 3371da177e4SLinus Torvalds while (i--) 3381da177e4SLinus Torvalds _exits[i](); 3391da177e4SLinus Torvalds 3401da177e4SLinus Torvalds return r; 3411da177e4SLinus Torvalds } 3421da177e4SLinus Torvalds 3431da177e4SLinus Torvalds static void __exit dm_exit(void) 3441da177e4SLinus Torvalds { 3451da177e4SLinus Torvalds int i = ARRAY_SIZE(_exits); 3461da177e4SLinus Torvalds 3471da177e4SLinus Torvalds while (i--) 3481da177e4SLinus Torvalds _exits[i](); 349d15b774cSAlasdair G Kergon 350d15b774cSAlasdair G Kergon /* 351d15b774cSAlasdair G Kergon * Should be empty by this point. 352d15b774cSAlasdair G Kergon */ 353d15b774cSAlasdair G Kergon idr_destroy(&_minor_idr); 3541da177e4SLinus Torvalds } 3551da177e4SLinus Torvalds 3561da177e4SLinus Torvalds /* 3571da177e4SLinus Torvalds * Block device functions 3581da177e4SLinus Torvalds */ 359432a212cSMike Anderson int dm_deleting_md(struct mapped_device *md) 360432a212cSMike Anderson { 361432a212cSMike Anderson return test_bit(DMF_DELETING, &md->flags); 362432a212cSMike Anderson } 363432a212cSMike Anderson 364fe5f9f2cSAl Viro static int dm_blk_open(struct block_device *bdev, fmode_t mode) 3651da177e4SLinus Torvalds { 3661da177e4SLinus Torvalds struct mapped_device *md; 3671da177e4SLinus Torvalds 368fba9f90eSJeff Mahoney spin_lock(&_minor_lock); 369fba9f90eSJeff Mahoney 370fe5f9f2cSAl Viro md = bdev->bd_disk->private_data; 371fba9f90eSJeff Mahoney if (!md) 372fba9f90eSJeff Mahoney goto out; 373fba9f90eSJeff Mahoney 3745c6bd75dSAlasdair G Kergon if (test_bit(DMF_FREEING, &md->flags) || 375432a212cSMike Anderson dm_deleting_md(md)) { 376fba9f90eSJeff Mahoney md = NULL; 377fba9f90eSJeff Mahoney goto out; 378fba9f90eSJeff Mahoney } 379fba9f90eSJeff Mahoney 3801da177e4SLinus Torvalds dm_get(md); 3815c6bd75dSAlasdair G Kergon atomic_inc(&md->open_count); 382fba9f90eSJeff Mahoney 383fba9f90eSJeff Mahoney out: 384fba9f90eSJeff Mahoney spin_unlock(&_minor_lock); 385fba9f90eSJeff Mahoney 386fba9f90eSJeff Mahoney return md ? 0 : -ENXIO; 3871da177e4SLinus Torvalds } 3881da177e4SLinus Torvalds 389db2a144bSAl Viro static void dm_blk_close(struct gendisk *disk, fmode_t mode) 3901da177e4SLinus Torvalds { 391fe5f9f2cSAl Viro struct mapped_device *md = disk->private_data; 3926e9624b8SArnd Bergmann 3934a1aeb98SMilan Broz spin_lock(&_minor_lock); 3944a1aeb98SMilan Broz 3955c6bd75dSAlasdair G Kergon atomic_dec(&md->open_count); 3961da177e4SLinus Torvalds dm_put(md); 3974a1aeb98SMilan Broz 3984a1aeb98SMilan Broz spin_unlock(&_minor_lock); 3991da177e4SLinus Torvalds } 4001da177e4SLinus Torvalds 4015c6bd75dSAlasdair G Kergon int dm_open_count(struct mapped_device *md) 4025c6bd75dSAlasdair G Kergon { 4035c6bd75dSAlasdair G Kergon return atomic_read(&md->open_count); 4045c6bd75dSAlasdair G Kergon } 4055c6bd75dSAlasdair G Kergon 4065c6bd75dSAlasdair G Kergon /* 4075c6bd75dSAlasdair G Kergon * Guarantees nothing is using the device before it's deleted. 4085c6bd75dSAlasdair G Kergon */ 4095c6bd75dSAlasdair G Kergon int dm_lock_for_deletion(struct mapped_device *md) 4105c6bd75dSAlasdair G Kergon { 4115c6bd75dSAlasdair G Kergon int r = 0; 4125c6bd75dSAlasdair G Kergon 4135c6bd75dSAlasdair G Kergon spin_lock(&_minor_lock); 4145c6bd75dSAlasdair G Kergon 4155c6bd75dSAlasdair G Kergon if (dm_open_count(md)) 4165c6bd75dSAlasdair G Kergon r = -EBUSY; 4175c6bd75dSAlasdair G Kergon else 4185c6bd75dSAlasdair G Kergon set_bit(DMF_DELETING, &md->flags); 4195c6bd75dSAlasdair G Kergon 4205c6bd75dSAlasdair G Kergon spin_unlock(&_minor_lock); 4215c6bd75dSAlasdair G Kergon 4225c6bd75dSAlasdair G Kergon return r; 4235c6bd75dSAlasdair G Kergon } 4245c6bd75dSAlasdair G Kergon 425fd2ed4d2SMikulas Patocka sector_t dm_get_size(struct mapped_device *md) 426fd2ed4d2SMikulas Patocka { 427fd2ed4d2SMikulas Patocka return get_capacity(md->disk); 428fd2ed4d2SMikulas Patocka } 429fd2ed4d2SMikulas Patocka 430fd2ed4d2SMikulas Patocka struct dm_stats *dm_get_stats(struct mapped_device *md) 431fd2ed4d2SMikulas Patocka { 432fd2ed4d2SMikulas Patocka return &md->stats; 433fd2ed4d2SMikulas Patocka } 434fd2ed4d2SMikulas Patocka 4353ac51e74SDarrick J. Wong static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo) 4363ac51e74SDarrick J. Wong { 4373ac51e74SDarrick J. Wong struct mapped_device *md = bdev->bd_disk->private_data; 4383ac51e74SDarrick J. Wong 4393ac51e74SDarrick J. Wong return dm_get_geometry(md, geo); 4403ac51e74SDarrick J. Wong } 4413ac51e74SDarrick J. Wong 442fe5f9f2cSAl Viro static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode, 443aa129a22SMilan Broz unsigned int cmd, unsigned long arg) 444aa129a22SMilan Broz { 445fe5f9f2cSAl Viro struct mapped_device *md = bdev->bd_disk->private_data; 44683d5e5b0SMikulas Patocka int srcu_idx; 4476c182cd8SHannes Reinecke struct dm_table *map; 448aa129a22SMilan Broz struct dm_target *tgt; 449aa129a22SMilan Broz int r = -ENOTTY; 450aa129a22SMilan Broz 4516c182cd8SHannes Reinecke retry: 45283d5e5b0SMikulas Patocka map = dm_get_live_table(md, &srcu_idx); 45383d5e5b0SMikulas Patocka 454aa129a22SMilan Broz if (!map || !dm_table_get_size(map)) 455aa129a22SMilan Broz goto out; 456aa129a22SMilan Broz 457aa129a22SMilan Broz /* We only support devices that have a single target */ 458aa129a22SMilan Broz if (dm_table_get_num_targets(map) != 1) 459aa129a22SMilan Broz goto out; 460aa129a22SMilan Broz 461aa129a22SMilan Broz tgt = dm_table_get_target(map, 0); 462aa129a22SMilan Broz 4634f186f8bSKiyoshi Ueda if (dm_suspended_md(md)) { 464aa129a22SMilan Broz r = -EAGAIN; 465aa129a22SMilan Broz goto out; 466aa129a22SMilan Broz } 467aa129a22SMilan Broz 468aa129a22SMilan Broz if (tgt->type->ioctl) 469647b3d00SAl Viro r = tgt->type->ioctl(tgt, cmd, arg); 470aa129a22SMilan Broz 471aa129a22SMilan Broz out: 47283d5e5b0SMikulas Patocka dm_put_live_table(md, srcu_idx); 473aa129a22SMilan Broz 4746c182cd8SHannes Reinecke if (r == -ENOTCONN) { 4756c182cd8SHannes Reinecke msleep(10); 4766c182cd8SHannes Reinecke goto retry; 4776c182cd8SHannes Reinecke } 4786c182cd8SHannes Reinecke 479aa129a22SMilan Broz return r; 480aa129a22SMilan Broz } 481aa129a22SMilan Broz 482028867acSAlasdair G Kergon static struct dm_io *alloc_io(struct mapped_device *md) 4831da177e4SLinus Torvalds { 4841da177e4SLinus Torvalds return mempool_alloc(md->io_pool, GFP_NOIO); 4851da177e4SLinus Torvalds } 4861da177e4SLinus Torvalds 487028867acSAlasdair G Kergon static void free_io(struct mapped_device *md, struct dm_io *io) 4881da177e4SLinus Torvalds { 4891da177e4SLinus Torvalds mempool_free(io, md->io_pool); 4901da177e4SLinus Torvalds } 4911da177e4SLinus Torvalds 492028867acSAlasdair G Kergon static void free_tio(struct mapped_device *md, struct dm_target_io *tio) 4931da177e4SLinus Torvalds { 494dba14160SMikulas Patocka bio_put(&tio->clone); 4951da177e4SLinus Torvalds } 4961da177e4SLinus Torvalds 49708885643SKiyoshi Ueda static struct dm_rq_target_io *alloc_rq_tio(struct mapped_device *md, 49808885643SKiyoshi Ueda gfp_t gfp_mask) 499cec47e3dSKiyoshi Ueda { 5005f015204SJun'ichi Nomura return mempool_alloc(md->io_pool, gfp_mask); 501cec47e3dSKiyoshi Ueda } 502cec47e3dSKiyoshi Ueda 503cec47e3dSKiyoshi Ueda static void free_rq_tio(struct dm_rq_target_io *tio) 504cec47e3dSKiyoshi Ueda { 5055f015204SJun'ichi Nomura mempool_free(tio, tio->md->io_pool); 506cec47e3dSKiyoshi Ueda } 507cec47e3dSKiyoshi Ueda 50890abb8c4SKiyoshi Ueda static int md_in_flight(struct mapped_device *md) 50990abb8c4SKiyoshi Ueda { 51090abb8c4SKiyoshi Ueda return atomic_read(&md->pending[READ]) + 51190abb8c4SKiyoshi Ueda atomic_read(&md->pending[WRITE]); 51290abb8c4SKiyoshi Ueda } 51390abb8c4SKiyoshi Ueda 5143eaf840eSJun'ichi "Nick" Nomura static void start_io_acct(struct dm_io *io) 5153eaf840eSJun'ichi "Nick" Nomura { 5163eaf840eSJun'ichi "Nick" Nomura struct mapped_device *md = io->md; 517fd2ed4d2SMikulas Patocka struct bio *bio = io->bio; 518c9959059STejun Heo int cpu; 519fd2ed4d2SMikulas Patocka int rw = bio_data_dir(bio); 5203eaf840eSJun'ichi "Nick" Nomura 5213eaf840eSJun'ichi "Nick" Nomura io->start_time = jiffies; 5223eaf840eSJun'ichi "Nick" Nomura 523074a7acaSTejun Heo cpu = part_stat_lock(); 524074a7acaSTejun Heo part_round_stats(cpu, &dm_disk(md)->part0); 525074a7acaSTejun Heo part_stat_unlock(); 5261e9bb880SShaohua Li atomic_set(&dm_disk(md)->part0.in_flight[rw], 5271e9bb880SShaohua Li atomic_inc_return(&md->pending[rw])); 528fd2ed4d2SMikulas Patocka 529fd2ed4d2SMikulas Patocka if (unlikely(dm_stats_used(&md->stats))) 530fd2ed4d2SMikulas Patocka dm_stats_account_io(&md->stats, bio->bi_rw, bio->bi_sector, 531fd2ed4d2SMikulas Patocka bio_sectors(bio), false, 0, &io->stats_aux); 5323eaf840eSJun'ichi "Nick" Nomura } 5333eaf840eSJun'ichi "Nick" Nomura 534d221d2e7SMikulas Patocka static void end_io_acct(struct dm_io *io) 5353eaf840eSJun'ichi "Nick" Nomura { 5363eaf840eSJun'ichi "Nick" Nomura struct mapped_device *md = io->md; 5373eaf840eSJun'ichi "Nick" Nomura struct bio *bio = io->bio; 5383eaf840eSJun'ichi "Nick" Nomura unsigned long duration = jiffies - io->start_time; 539c9959059STejun Heo int pending, cpu; 5403eaf840eSJun'ichi "Nick" Nomura int rw = bio_data_dir(bio); 5413eaf840eSJun'ichi "Nick" Nomura 542074a7acaSTejun Heo cpu = part_stat_lock(); 543074a7acaSTejun Heo part_round_stats(cpu, &dm_disk(md)->part0); 544074a7acaSTejun Heo part_stat_add(cpu, &dm_disk(md)->part0, ticks[rw], duration); 545074a7acaSTejun Heo part_stat_unlock(); 5463eaf840eSJun'ichi "Nick" Nomura 547fd2ed4d2SMikulas Patocka if (unlikely(dm_stats_used(&md->stats))) 548fd2ed4d2SMikulas Patocka dm_stats_account_io(&md->stats, bio->bi_rw, bio->bi_sector, 549fd2ed4d2SMikulas Patocka bio_sectors(bio), true, duration, &io->stats_aux); 550fd2ed4d2SMikulas Patocka 551af7e466aSMikulas Patocka /* 552af7e466aSMikulas Patocka * After this is decremented the bio must not be touched if it is 553d87f4c14STejun Heo * a flush. 554af7e466aSMikulas Patocka */ 5551e9bb880SShaohua Li pending = atomic_dec_return(&md->pending[rw]); 5561e9bb880SShaohua Li atomic_set(&dm_disk(md)->part0.in_flight[rw], pending); 557316d315bSNikanth Karthikesan pending += atomic_read(&md->pending[rw^0x1]); 5583eaf840eSJun'ichi "Nick" Nomura 559d221d2e7SMikulas Patocka /* nudge anyone waiting on suspend queue */ 560d221d2e7SMikulas Patocka if (!pending) 561d221d2e7SMikulas Patocka wake_up(&md->wait); 5623eaf840eSJun'ichi "Nick" Nomura } 5633eaf840eSJun'ichi "Nick" Nomura 5641da177e4SLinus Torvalds /* 5651da177e4SLinus Torvalds * Add the bio to the list of deferred io. 5661da177e4SLinus Torvalds */ 56792c63902SMikulas Patocka static void queue_io(struct mapped_device *md, struct bio *bio) 5681da177e4SLinus Torvalds { 56905447420SKiyoshi Ueda unsigned long flags; 5701da177e4SLinus Torvalds 57105447420SKiyoshi Ueda spin_lock_irqsave(&md->deferred_lock, flags); 5721da177e4SLinus Torvalds bio_list_add(&md->deferred, bio); 57305447420SKiyoshi Ueda spin_unlock_irqrestore(&md->deferred_lock, flags); 57492c63902SMikulas Patocka queue_work(md->wq, &md->work); 5751da177e4SLinus Torvalds } 5761da177e4SLinus Torvalds 5771da177e4SLinus Torvalds /* 5781da177e4SLinus Torvalds * Everyone (including functions in this file), should use this 5791da177e4SLinus Torvalds * function to access the md->map field, and make sure they call 58083d5e5b0SMikulas Patocka * dm_put_live_table() when finished. 5811da177e4SLinus Torvalds */ 58283d5e5b0SMikulas Patocka struct dm_table *dm_get_live_table(struct mapped_device *md, int *srcu_idx) __acquires(md->io_barrier) 5831da177e4SLinus Torvalds { 58483d5e5b0SMikulas Patocka *srcu_idx = srcu_read_lock(&md->io_barrier); 5851da177e4SLinus Torvalds 58683d5e5b0SMikulas Patocka return srcu_dereference(md->map, &md->io_barrier); 58783d5e5b0SMikulas Patocka } 5881da177e4SLinus Torvalds 58983d5e5b0SMikulas Patocka void dm_put_live_table(struct mapped_device *md, int srcu_idx) __releases(md->io_barrier) 59083d5e5b0SMikulas Patocka { 59183d5e5b0SMikulas Patocka srcu_read_unlock(&md->io_barrier, srcu_idx); 59283d5e5b0SMikulas Patocka } 59383d5e5b0SMikulas Patocka 59483d5e5b0SMikulas Patocka void dm_sync_table(struct mapped_device *md) 59583d5e5b0SMikulas Patocka { 59683d5e5b0SMikulas Patocka synchronize_srcu(&md->io_barrier); 59783d5e5b0SMikulas Patocka synchronize_rcu_expedited(); 59883d5e5b0SMikulas Patocka } 59983d5e5b0SMikulas Patocka 60083d5e5b0SMikulas Patocka /* 60183d5e5b0SMikulas Patocka * A fast alternative to dm_get_live_table/dm_put_live_table. 60283d5e5b0SMikulas Patocka * The caller must not block between these two functions. 60383d5e5b0SMikulas Patocka */ 60483d5e5b0SMikulas Patocka static struct dm_table *dm_get_live_table_fast(struct mapped_device *md) __acquires(RCU) 60583d5e5b0SMikulas Patocka { 60683d5e5b0SMikulas Patocka rcu_read_lock(); 60783d5e5b0SMikulas Patocka return rcu_dereference(md->map); 60883d5e5b0SMikulas Patocka } 60983d5e5b0SMikulas Patocka 61083d5e5b0SMikulas Patocka static void dm_put_live_table_fast(struct mapped_device *md) __releases(RCU) 61183d5e5b0SMikulas Patocka { 61283d5e5b0SMikulas Patocka rcu_read_unlock(); 6131da177e4SLinus Torvalds } 6141da177e4SLinus Torvalds 6153ac51e74SDarrick J. Wong /* 6163ac51e74SDarrick J. Wong * Get the geometry associated with a dm device 6173ac51e74SDarrick J. Wong */ 6183ac51e74SDarrick J. Wong int dm_get_geometry(struct mapped_device *md, struct hd_geometry *geo) 6193ac51e74SDarrick J. Wong { 6203ac51e74SDarrick J. Wong *geo = md->geometry; 6213ac51e74SDarrick J. Wong 6223ac51e74SDarrick J. Wong return 0; 6233ac51e74SDarrick J. Wong } 6243ac51e74SDarrick J. Wong 6253ac51e74SDarrick J. Wong /* 6263ac51e74SDarrick J. Wong * Set the geometry of a device. 6273ac51e74SDarrick J. Wong */ 6283ac51e74SDarrick J. Wong int dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo) 6293ac51e74SDarrick J. Wong { 6303ac51e74SDarrick J. Wong sector_t sz = (sector_t)geo->cylinders * geo->heads * geo->sectors; 6313ac51e74SDarrick J. Wong 6323ac51e74SDarrick J. Wong if (geo->start > sz) { 6333ac51e74SDarrick J. Wong DMWARN("Start sector is beyond the geometry limits."); 6343ac51e74SDarrick J. Wong return -EINVAL; 6353ac51e74SDarrick J. Wong } 6363ac51e74SDarrick J. Wong 6373ac51e74SDarrick J. Wong md->geometry = *geo; 6383ac51e74SDarrick J. Wong 6393ac51e74SDarrick J. Wong return 0; 6403ac51e74SDarrick J. Wong } 6413ac51e74SDarrick J. Wong 6421da177e4SLinus Torvalds /*----------------------------------------------------------------- 6431da177e4SLinus Torvalds * CRUD START: 6441da177e4SLinus Torvalds * A more elegant soln is in the works that uses the queue 6451da177e4SLinus Torvalds * merge fn, unfortunately there are a couple of changes to 6461da177e4SLinus Torvalds * the block layer that I want to make for this. So in the 6471da177e4SLinus Torvalds * interests of getting something for people to use I give 6481da177e4SLinus Torvalds * you this clearly demarcated crap. 6491da177e4SLinus Torvalds *---------------------------------------------------------------*/ 6501da177e4SLinus Torvalds 6512e93ccc1SKiyoshi Ueda static int __noflush_suspending(struct mapped_device *md) 6522e93ccc1SKiyoshi Ueda { 6532e93ccc1SKiyoshi Ueda return test_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); 6542e93ccc1SKiyoshi Ueda } 6552e93ccc1SKiyoshi Ueda 6561da177e4SLinus Torvalds /* 6571da177e4SLinus Torvalds * Decrements the number of outstanding ios that a bio has been 6581da177e4SLinus Torvalds * cloned into, completing the original io if necc. 6591da177e4SLinus Torvalds */ 660858119e1SArjan van de Ven static void dec_pending(struct dm_io *io, int error) 6611da177e4SLinus Torvalds { 6622e93ccc1SKiyoshi Ueda unsigned long flags; 663b35f8caaSMilan Broz int io_error; 664b35f8caaSMilan Broz struct bio *bio; 665b35f8caaSMilan Broz struct mapped_device *md = io->md; 6662e93ccc1SKiyoshi Ueda 6672e93ccc1SKiyoshi Ueda /* Push-back supersedes any I/O errors */ 668f88fb981SKiyoshi Ueda if (unlikely(error)) { 669f88fb981SKiyoshi Ueda spin_lock_irqsave(&io->endio_lock, flags); 670f88fb981SKiyoshi Ueda if (!(io->error > 0 && __noflush_suspending(md))) 6711da177e4SLinus Torvalds io->error = error; 672f88fb981SKiyoshi Ueda spin_unlock_irqrestore(&io->endio_lock, flags); 673f88fb981SKiyoshi Ueda } 6741da177e4SLinus Torvalds 6751da177e4SLinus Torvalds if (atomic_dec_and_test(&io->io_count)) { 6762e93ccc1SKiyoshi Ueda if (io->error == DM_ENDIO_REQUEUE) { 6772e93ccc1SKiyoshi Ueda /* 6782e93ccc1SKiyoshi Ueda * Target requested pushing back the I/O. 6792e93ccc1SKiyoshi Ueda */ 680022c2611SMikulas Patocka spin_lock_irqsave(&md->deferred_lock, flags); 6816a8736d1STejun Heo if (__noflush_suspending(md)) 6826a8736d1STejun Heo bio_list_add_head(&md->deferred, io->bio); 6836a8736d1STejun Heo else 6842e93ccc1SKiyoshi Ueda /* noflush suspend was interrupted. */ 6852e93ccc1SKiyoshi Ueda io->error = -EIO; 686022c2611SMikulas Patocka spin_unlock_irqrestore(&md->deferred_lock, flags); 6872e93ccc1SKiyoshi Ueda } 6882e93ccc1SKiyoshi Ueda 689b35f8caaSMilan Broz io_error = io->error; 690b35f8caaSMilan Broz bio = io->bio; 691af7e466aSMikulas Patocka end_io_acct(io); 692a97f925aSMikulas Patocka free_io(md, io); 6931da177e4SLinus Torvalds 6946a8736d1STejun Heo if (io_error == DM_ENDIO_REQUEUE) 6956a8736d1STejun Heo return; 6966a8736d1STejun Heo 697b372d360SMike Snitzer if ((bio->bi_rw & REQ_FLUSH) && bio->bi_size) { 6981da177e4SLinus Torvalds /* 6996a8736d1STejun Heo * Preflush done for flush with data, reissue 7006a8736d1STejun Heo * without REQ_FLUSH. 7011da177e4SLinus Torvalds */ 7026a8736d1STejun Heo bio->bi_rw &= ~REQ_FLUSH; 7036a8736d1STejun Heo queue_io(md, bio); 7045f3ea37cSArnaldo Carvalho de Melo } else { 705b372d360SMike Snitzer /* done with normal IO or empty flush */ 7060a82a8d1SLinus Torvalds trace_block_bio_complete(md->queue, bio, io_error); 707b35f8caaSMilan Broz bio_endio(bio, io_error); 7082e93ccc1SKiyoshi Ueda } 7091da177e4SLinus Torvalds } 710af7e466aSMikulas Patocka } 7111da177e4SLinus Torvalds 7126712ecf8SNeilBrown static void clone_endio(struct bio *bio, int error) 7131da177e4SLinus Torvalds { 7141da177e4SLinus Torvalds int r = 0; 715028867acSAlasdair G Kergon struct dm_target_io *tio = bio->bi_private; 716b35f8caaSMilan Broz struct dm_io *io = tio->io; 7179faf400fSStefan Bader struct mapped_device *md = tio->io->md; 7181da177e4SLinus Torvalds dm_endio_fn endio = tio->ti->type->end_io; 7191da177e4SLinus Torvalds 7201da177e4SLinus Torvalds if (!bio_flagged(bio, BIO_UPTODATE) && !error) 7211da177e4SLinus Torvalds error = -EIO; 7221da177e4SLinus Torvalds 7231da177e4SLinus Torvalds if (endio) { 7247de3ee57SMikulas Patocka r = endio(tio->ti, bio, error); 7252e93ccc1SKiyoshi Ueda if (r < 0 || r == DM_ENDIO_REQUEUE) 7262e93ccc1SKiyoshi Ueda /* 7272e93ccc1SKiyoshi Ueda * error and requeue request are handled 7282e93ccc1SKiyoshi Ueda * in dec_pending(). 7292e93ccc1SKiyoshi Ueda */ 7301da177e4SLinus Torvalds error = r; 73145cbcd79SKiyoshi Ueda else if (r == DM_ENDIO_INCOMPLETE) 73245cbcd79SKiyoshi Ueda /* The target will handle the io */ 7336712ecf8SNeilBrown return; 73445cbcd79SKiyoshi Ueda else if (r) { 73545cbcd79SKiyoshi Ueda DMWARN("unimplemented target endio return value: %d", r); 73645cbcd79SKiyoshi Ueda BUG(); 73745cbcd79SKiyoshi Ueda } 7381da177e4SLinus Torvalds } 7391da177e4SLinus Torvalds 7409faf400fSStefan Bader free_tio(md, tio); 741b35f8caaSMilan Broz dec_pending(io, error); 7421da177e4SLinus Torvalds } 7431da177e4SLinus Torvalds 744cec47e3dSKiyoshi Ueda /* 745cec47e3dSKiyoshi Ueda * Partial completion handling for request-based dm 746cec47e3dSKiyoshi Ueda */ 747cec47e3dSKiyoshi Ueda static void end_clone_bio(struct bio *clone, int error) 748cec47e3dSKiyoshi Ueda { 749cec47e3dSKiyoshi Ueda struct dm_rq_clone_bio_info *info = clone->bi_private; 750cec47e3dSKiyoshi Ueda struct dm_rq_target_io *tio = info->tio; 751cec47e3dSKiyoshi Ueda struct bio *bio = info->orig; 752cec47e3dSKiyoshi Ueda unsigned int nr_bytes = info->orig->bi_size; 753cec47e3dSKiyoshi Ueda 754cec47e3dSKiyoshi Ueda bio_put(clone); 755cec47e3dSKiyoshi Ueda 756cec47e3dSKiyoshi Ueda if (tio->error) 757cec47e3dSKiyoshi Ueda /* 758cec47e3dSKiyoshi Ueda * An error has already been detected on the request. 759cec47e3dSKiyoshi Ueda * Once error occurred, just let clone->end_io() handle 760cec47e3dSKiyoshi Ueda * the remainder. 761cec47e3dSKiyoshi Ueda */ 762cec47e3dSKiyoshi Ueda return; 763cec47e3dSKiyoshi Ueda else if (error) { 764cec47e3dSKiyoshi Ueda /* 765cec47e3dSKiyoshi Ueda * Don't notice the error to the upper layer yet. 766cec47e3dSKiyoshi Ueda * The error handling decision is made by the target driver, 767cec47e3dSKiyoshi Ueda * when the request is completed. 768cec47e3dSKiyoshi Ueda */ 769cec47e3dSKiyoshi Ueda tio->error = error; 770cec47e3dSKiyoshi Ueda return; 771cec47e3dSKiyoshi Ueda } 772cec47e3dSKiyoshi Ueda 773cec47e3dSKiyoshi Ueda /* 774cec47e3dSKiyoshi Ueda * I/O for the bio successfully completed. 775cec47e3dSKiyoshi Ueda * Notice the data completion to the upper layer. 776cec47e3dSKiyoshi Ueda */ 777cec47e3dSKiyoshi Ueda 778cec47e3dSKiyoshi Ueda /* 779cec47e3dSKiyoshi Ueda * bios are processed from the head of the list. 780cec47e3dSKiyoshi Ueda * So the completing bio should always be rq->bio. 781cec47e3dSKiyoshi Ueda * If it's not, something wrong is happening. 782cec47e3dSKiyoshi Ueda */ 783cec47e3dSKiyoshi Ueda if (tio->orig->bio != bio) 784cec47e3dSKiyoshi Ueda DMERR("bio completion is going in the middle of the request"); 785cec47e3dSKiyoshi Ueda 786cec47e3dSKiyoshi Ueda /* 787cec47e3dSKiyoshi Ueda * Update the original request. 788cec47e3dSKiyoshi Ueda * Do not use blk_end_request() here, because it may complete 789cec47e3dSKiyoshi Ueda * the original request before the clone, and break the ordering. 790cec47e3dSKiyoshi Ueda */ 791cec47e3dSKiyoshi Ueda blk_update_request(tio->orig, 0, nr_bytes); 792cec47e3dSKiyoshi Ueda } 793cec47e3dSKiyoshi Ueda 794cec47e3dSKiyoshi Ueda /* 795cec47e3dSKiyoshi Ueda * Don't touch any member of the md after calling this function because 796cec47e3dSKiyoshi Ueda * the md may be freed in dm_put() at the end of this function. 797cec47e3dSKiyoshi Ueda * Or do dm_get() before calling this function and dm_put() later. 798cec47e3dSKiyoshi Ueda */ 799b4324feeSKiyoshi Ueda static void rq_completed(struct mapped_device *md, int rw, int run_queue) 800cec47e3dSKiyoshi Ueda { 801b4324feeSKiyoshi Ueda atomic_dec(&md->pending[rw]); 802cec47e3dSKiyoshi Ueda 803cec47e3dSKiyoshi Ueda /* nudge anyone waiting on suspend queue */ 804b4324feeSKiyoshi Ueda if (!md_in_flight(md)) 805cec47e3dSKiyoshi Ueda wake_up(&md->wait); 806cec47e3dSKiyoshi Ueda 807a8c32a5cSJens Axboe /* 808a8c32a5cSJens Axboe * Run this off this callpath, as drivers could invoke end_io while 809a8c32a5cSJens Axboe * inside their request_fn (and holding the queue lock). Calling 810a8c32a5cSJens Axboe * back into ->request_fn() could deadlock attempting to grab the 811a8c32a5cSJens Axboe * queue lock again. 812a8c32a5cSJens Axboe */ 813cec47e3dSKiyoshi Ueda if (run_queue) 814a8c32a5cSJens Axboe blk_run_queue_async(md->queue); 815cec47e3dSKiyoshi Ueda 816cec47e3dSKiyoshi Ueda /* 817cec47e3dSKiyoshi Ueda * dm_put() must be at the end of this function. See the comment above 818cec47e3dSKiyoshi Ueda */ 819cec47e3dSKiyoshi Ueda dm_put(md); 820cec47e3dSKiyoshi Ueda } 821cec47e3dSKiyoshi Ueda 822a77e28c7SKiyoshi Ueda static void free_rq_clone(struct request *clone) 823a77e28c7SKiyoshi Ueda { 824a77e28c7SKiyoshi Ueda struct dm_rq_target_io *tio = clone->end_io_data; 825a77e28c7SKiyoshi Ueda 826a77e28c7SKiyoshi Ueda blk_rq_unprep_clone(clone); 827a77e28c7SKiyoshi Ueda free_rq_tio(tio); 828a77e28c7SKiyoshi Ueda } 829a77e28c7SKiyoshi Ueda 830980691e5SKiyoshi Ueda /* 831980691e5SKiyoshi Ueda * Complete the clone and the original request. 832980691e5SKiyoshi Ueda * Must be called without queue lock. 833980691e5SKiyoshi Ueda */ 834980691e5SKiyoshi Ueda static void dm_end_request(struct request *clone, int error) 835980691e5SKiyoshi Ueda { 836980691e5SKiyoshi Ueda int rw = rq_data_dir(clone); 837980691e5SKiyoshi Ueda struct dm_rq_target_io *tio = clone->end_io_data; 838980691e5SKiyoshi Ueda struct mapped_device *md = tio->md; 839980691e5SKiyoshi Ueda struct request *rq = tio->orig; 840980691e5SKiyoshi Ueda 84129e4013dSTejun Heo if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { 842980691e5SKiyoshi Ueda rq->errors = clone->errors; 843980691e5SKiyoshi Ueda rq->resid_len = clone->resid_len; 844980691e5SKiyoshi Ueda 845980691e5SKiyoshi Ueda if (rq->sense) 846980691e5SKiyoshi Ueda /* 847980691e5SKiyoshi Ueda * We are using the sense buffer of the original 848980691e5SKiyoshi Ueda * request. 849980691e5SKiyoshi Ueda * So setting the length of the sense data is enough. 850980691e5SKiyoshi Ueda */ 851980691e5SKiyoshi Ueda rq->sense_len = clone->sense_len; 852980691e5SKiyoshi Ueda } 853980691e5SKiyoshi Ueda 854980691e5SKiyoshi Ueda free_rq_clone(clone); 855980691e5SKiyoshi Ueda blk_end_request_all(rq, error); 85629e4013dSTejun Heo rq_completed(md, rw, true); 857980691e5SKiyoshi Ueda } 858980691e5SKiyoshi Ueda 859cec47e3dSKiyoshi Ueda static void dm_unprep_request(struct request *rq) 860cec47e3dSKiyoshi Ueda { 861cec47e3dSKiyoshi Ueda struct request *clone = rq->special; 862cec47e3dSKiyoshi Ueda 863cec47e3dSKiyoshi Ueda rq->special = NULL; 864cec47e3dSKiyoshi Ueda rq->cmd_flags &= ~REQ_DONTPREP; 865cec47e3dSKiyoshi Ueda 866a77e28c7SKiyoshi Ueda free_rq_clone(clone); 867cec47e3dSKiyoshi Ueda } 868cec47e3dSKiyoshi Ueda 869cec47e3dSKiyoshi Ueda /* 870cec47e3dSKiyoshi Ueda * Requeue the original request of a clone. 871cec47e3dSKiyoshi Ueda */ 872cec47e3dSKiyoshi Ueda void dm_requeue_unmapped_request(struct request *clone) 873cec47e3dSKiyoshi Ueda { 874b4324feeSKiyoshi Ueda int rw = rq_data_dir(clone); 875cec47e3dSKiyoshi Ueda struct dm_rq_target_io *tio = clone->end_io_data; 876cec47e3dSKiyoshi Ueda struct mapped_device *md = tio->md; 877cec47e3dSKiyoshi Ueda struct request *rq = tio->orig; 878cec47e3dSKiyoshi Ueda struct request_queue *q = rq->q; 879cec47e3dSKiyoshi Ueda unsigned long flags; 880cec47e3dSKiyoshi Ueda 881cec47e3dSKiyoshi Ueda dm_unprep_request(rq); 882cec47e3dSKiyoshi Ueda 883cec47e3dSKiyoshi Ueda spin_lock_irqsave(q->queue_lock, flags); 884cec47e3dSKiyoshi Ueda blk_requeue_request(q, rq); 885cec47e3dSKiyoshi Ueda spin_unlock_irqrestore(q->queue_lock, flags); 886cec47e3dSKiyoshi Ueda 887b4324feeSKiyoshi Ueda rq_completed(md, rw, 0); 888cec47e3dSKiyoshi Ueda } 889cec47e3dSKiyoshi Ueda EXPORT_SYMBOL_GPL(dm_requeue_unmapped_request); 890cec47e3dSKiyoshi Ueda 891cec47e3dSKiyoshi Ueda static void __stop_queue(struct request_queue *q) 892cec47e3dSKiyoshi Ueda { 893cec47e3dSKiyoshi Ueda blk_stop_queue(q); 894cec47e3dSKiyoshi Ueda } 895cec47e3dSKiyoshi Ueda 896cec47e3dSKiyoshi Ueda static void stop_queue(struct request_queue *q) 897cec47e3dSKiyoshi Ueda { 898cec47e3dSKiyoshi Ueda unsigned long flags; 899cec47e3dSKiyoshi Ueda 900cec47e3dSKiyoshi Ueda spin_lock_irqsave(q->queue_lock, flags); 901cec47e3dSKiyoshi Ueda __stop_queue(q); 902cec47e3dSKiyoshi Ueda spin_unlock_irqrestore(q->queue_lock, flags); 903cec47e3dSKiyoshi Ueda } 904cec47e3dSKiyoshi Ueda 905cec47e3dSKiyoshi Ueda static void __start_queue(struct request_queue *q) 906cec47e3dSKiyoshi Ueda { 907cec47e3dSKiyoshi Ueda if (blk_queue_stopped(q)) 908cec47e3dSKiyoshi Ueda blk_start_queue(q); 909cec47e3dSKiyoshi Ueda } 910cec47e3dSKiyoshi Ueda 911cec47e3dSKiyoshi Ueda static void start_queue(struct request_queue *q) 912cec47e3dSKiyoshi Ueda { 913cec47e3dSKiyoshi Ueda unsigned long flags; 914cec47e3dSKiyoshi Ueda 915cec47e3dSKiyoshi Ueda spin_lock_irqsave(q->queue_lock, flags); 916cec47e3dSKiyoshi Ueda __start_queue(q); 917cec47e3dSKiyoshi Ueda spin_unlock_irqrestore(q->queue_lock, flags); 918cec47e3dSKiyoshi Ueda } 919cec47e3dSKiyoshi Ueda 92011a68244SKiyoshi Ueda static void dm_done(struct request *clone, int error, bool mapped) 92111a68244SKiyoshi Ueda { 92211a68244SKiyoshi Ueda int r = error; 92311a68244SKiyoshi Ueda struct dm_rq_target_io *tio = clone->end_io_data; 924ba1cbad9SMike Snitzer dm_request_endio_fn rq_end_io = NULL; 925ba1cbad9SMike Snitzer 926ba1cbad9SMike Snitzer if (tio->ti) { 927ba1cbad9SMike Snitzer rq_end_io = tio->ti->type->rq_end_io; 92811a68244SKiyoshi Ueda 92911a68244SKiyoshi Ueda if (mapped && rq_end_io) 93011a68244SKiyoshi Ueda r = rq_end_io(tio->ti, clone, error, &tio->info); 931ba1cbad9SMike Snitzer } 93211a68244SKiyoshi Ueda 93311a68244SKiyoshi Ueda if (r <= 0) 93411a68244SKiyoshi Ueda /* The target wants to complete the I/O */ 93511a68244SKiyoshi Ueda dm_end_request(clone, r); 93611a68244SKiyoshi Ueda else if (r == DM_ENDIO_INCOMPLETE) 93711a68244SKiyoshi Ueda /* The target will handle the I/O */ 93811a68244SKiyoshi Ueda return; 93911a68244SKiyoshi Ueda else if (r == DM_ENDIO_REQUEUE) 94011a68244SKiyoshi Ueda /* The target wants to requeue the I/O */ 94111a68244SKiyoshi Ueda dm_requeue_unmapped_request(clone); 94211a68244SKiyoshi Ueda else { 94311a68244SKiyoshi Ueda DMWARN("unimplemented target endio return value: %d", r); 94411a68244SKiyoshi Ueda BUG(); 94511a68244SKiyoshi Ueda } 94611a68244SKiyoshi Ueda } 94711a68244SKiyoshi Ueda 948cec47e3dSKiyoshi Ueda /* 949cec47e3dSKiyoshi Ueda * Request completion handler for request-based dm 950cec47e3dSKiyoshi Ueda */ 951cec47e3dSKiyoshi Ueda static void dm_softirq_done(struct request *rq) 952cec47e3dSKiyoshi Ueda { 95311a68244SKiyoshi Ueda bool mapped = true; 954cec47e3dSKiyoshi Ueda struct request *clone = rq->completion_data; 955cec47e3dSKiyoshi Ueda struct dm_rq_target_io *tio = clone->end_io_data; 956cec47e3dSKiyoshi Ueda 95711a68244SKiyoshi Ueda if (rq->cmd_flags & REQ_FAILED) 95811a68244SKiyoshi Ueda mapped = false; 959cec47e3dSKiyoshi Ueda 96011a68244SKiyoshi Ueda dm_done(clone, tio->error, mapped); 961cec47e3dSKiyoshi Ueda } 962cec47e3dSKiyoshi Ueda 963cec47e3dSKiyoshi Ueda /* 964cec47e3dSKiyoshi Ueda * Complete the clone and the original request with the error status 965cec47e3dSKiyoshi Ueda * through softirq context. 966cec47e3dSKiyoshi Ueda */ 967cec47e3dSKiyoshi Ueda static void dm_complete_request(struct request *clone, int error) 968cec47e3dSKiyoshi Ueda { 969cec47e3dSKiyoshi Ueda struct dm_rq_target_io *tio = clone->end_io_data; 970cec47e3dSKiyoshi Ueda struct request *rq = tio->orig; 971cec47e3dSKiyoshi Ueda 972cec47e3dSKiyoshi Ueda tio->error = error; 973cec47e3dSKiyoshi Ueda rq->completion_data = clone; 974cec47e3dSKiyoshi Ueda blk_complete_request(rq); 975cec47e3dSKiyoshi Ueda } 976cec47e3dSKiyoshi Ueda 977cec47e3dSKiyoshi Ueda /* 978cec47e3dSKiyoshi Ueda * Complete the not-mapped clone and the original request with the error status 979cec47e3dSKiyoshi Ueda * through softirq context. 980cec47e3dSKiyoshi Ueda * Target's rq_end_io() function isn't called. 981cec47e3dSKiyoshi Ueda * This may be used when the target's map_rq() function fails. 982cec47e3dSKiyoshi Ueda */ 983cec47e3dSKiyoshi Ueda void dm_kill_unmapped_request(struct request *clone, int error) 984cec47e3dSKiyoshi Ueda { 985cec47e3dSKiyoshi Ueda struct dm_rq_target_io *tio = clone->end_io_data; 986cec47e3dSKiyoshi Ueda struct request *rq = tio->orig; 987cec47e3dSKiyoshi Ueda 988cec47e3dSKiyoshi Ueda rq->cmd_flags |= REQ_FAILED; 989cec47e3dSKiyoshi Ueda dm_complete_request(clone, error); 990cec47e3dSKiyoshi Ueda } 991cec47e3dSKiyoshi Ueda EXPORT_SYMBOL_GPL(dm_kill_unmapped_request); 992cec47e3dSKiyoshi Ueda 993cec47e3dSKiyoshi Ueda /* 994cec47e3dSKiyoshi Ueda * Called with the queue lock held 995cec47e3dSKiyoshi Ueda */ 996cec47e3dSKiyoshi Ueda static void end_clone_request(struct request *clone, int error) 997cec47e3dSKiyoshi Ueda { 998cec47e3dSKiyoshi Ueda /* 999cec47e3dSKiyoshi Ueda * For just cleaning up the information of the queue in which 1000cec47e3dSKiyoshi Ueda * the clone was dispatched. 1001cec47e3dSKiyoshi Ueda * The clone is *NOT* freed actually here because it is alloced from 1002cec47e3dSKiyoshi Ueda * dm own mempool and REQ_ALLOCED isn't set in clone->cmd_flags. 1003cec47e3dSKiyoshi Ueda */ 1004cec47e3dSKiyoshi Ueda __blk_put_request(clone->q, clone); 1005cec47e3dSKiyoshi Ueda 1006cec47e3dSKiyoshi Ueda /* 1007cec47e3dSKiyoshi Ueda * Actual request completion is done in a softirq context which doesn't 1008cec47e3dSKiyoshi Ueda * hold the queue lock. Otherwise, deadlock could occur because: 1009cec47e3dSKiyoshi Ueda * - another request may be submitted by the upper level driver 1010cec47e3dSKiyoshi Ueda * of the stacking during the completion 1011cec47e3dSKiyoshi Ueda * - the submission which requires queue lock may be done 1012cec47e3dSKiyoshi Ueda * against this queue 1013cec47e3dSKiyoshi Ueda */ 1014cec47e3dSKiyoshi Ueda dm_complete_request(clone, error); 1015cec47e3dSKiyoshi Ueda } 1016cec47e3dSKiyoshi Ueda 101756a67df7SMike Snitzer /* 101856a67df7SMike Snitzer * Return maximum size of I/O possible at the supplied sector up to the current 101956a67df7SMike Snitzer * target boundary. 102056a67df7SMike Snitzer */ 102156a67df7SMike Snitzer static sector_t max_io_len_target_boundary(sector_t sector, struct dm_target *ti) 10221da177e4SLinus Torvalds { 102356a67df7SMike Snitzer sector_t target_offset = dm_target_offset(ti, sector); 102456a67df7SMike Snitzer 102556a67df7SMike Snitzer return ti->len - target_offset; 102656a67df7SMike Snitzer } 102756a67df7SMike Snitzer 102856a67df7SMike Snitzer static sector_t max_io_len(sector_t sector, struct dm_target *ti) 102956a67df7SMike Snitzer { 103056a67df7SMike Snitzer sector_t len = max_io_len_target_boundary(sector, ti); 1031542f9038SMike Snitzer sector_t offset, max_len; 10321da177e4SLinus Torvalds 10331da177e4SLinus Torvalds /* 10341da177e4SLinus Torvalds * Does the target need to split even further? 10351da177e4SLinus Torvalds */ 1036542f9038SMike Snitzer if (ti->max_io_len) { 1037542f9038SMike Snitzer offset = dm_target_offset(ti, sector); 1038542f9038SMike Snitzer if (unlikely(ti->max_io_len & (ti->max_io_len - 1))) 1039542f9038SMike Snitzer max_len = sector_div(offset, ti->max_io_len); 1040542f9038SMike Snitzer else 1041542f9038SMike Snitzer max_len = offset & (ti->max_io_len - 1); 1042542f9038SMike Snitzer max_len = ti->max_io_len - max_len; 1043542f9038SMike Snitzer 1044542f9038SMike Snitzer if (len > max_len) 1045542f9038SMike Snitzer len = max_len; 10461da177e4SLinus Torvalds } 10471da177e4SLinus Torvalds 10481da177e4SLinus Torvalds return len; 10491da177e4SLinus Torvalds } 10501da177e4SLinus Torvalds 1051542f9038SMike Snitzer int dm_set_target_max_io_len(struct dm_target *ti, sector_t len) 1052542f9038SMike Snitzer { 1053542f9038SMike Snitzer if (len > UINT_MAX) { 1054542f9038SMike Snitzer DMERR("Specified maximum size of target IO (%llu) exceeds limit (%u)", 1055542f9038SMike Snitzer (unsigned long long)len, UINT_MAX); 1056542f9038SMike Snitzer ti->error = "Maximum size of target IO is too large"; 1057542f9038SMike Snitzer return -EINVAL; 1058542f9038SMike Snitzer } 1059542f9038SMike Snitzer 1060542f9038SMike Snitzer ti->max_io_len = (uint32_t) len; 1061542f9038SMike Snitzer 1062542f9038SMike Snitzer return 0; 1063542f9038SMike Snitzer } 1064542f9038SMike Snitzer EXPORT_SYMBOL_GPL(dm_set_target_max_io_len); 1065542f9038SMike Snitzer 1066bd2a49b8SAlasdair G Kergon static void __map_bio(struct dm_target_io *tio) 10671da177e4SLinus Torvalds { 10681da177e4SLinus Torvalds int r; 10692056a782SJens Axboe sector_t sector; 10709faf400fSStefan Bader struct mapped_device *md; 1071dba14160SMikulas Patocka struct bio *clone = &tio->clone; 1072bd2a49b8SAlasdair G Kergon struct dm_target *ti = tio->ti; 10731da177e4SLinus Torvalds 10741da177e4SLinus Torvalds clone->bi_end_io = clone_endio; 10751da177e4SLinus Torvalds clone->bi_private = tio; 10761da177e4SLinus Torvalds 10771da177e4SLinus Torvalds /* 10781da177e4SLinus Torvalds * Map the clone. If r == 0 we don't need to do 10791da177e4SLinus Torvalds * anything, the target has assumed ownership of 10801da177e4SLinus Torvalds * this io. 10811da177e4SLinus Torvalds */ 10821da177e4SLinus Torvalds atomic_inc(&tio->io->io_count); 10832056a782SJens Axboe sector = clone->bi_sector; 10847de3ee57SMikulas Patocka r = ti->type->map(ti, clone); 108545cbcd79SKiyoshi Ueda if (r == DM_MAPIO_REMAPPED) { 10861da177e4SLinus Torvalds /* the bio has been remapped so dispatch it */ 10872056a782SJens Axboe 1088d07335e5SMike Snitzer trace_block_bio_remap(bdev_get_queue(clone->bi_bdev), clone, 108922a7c31aSAlan D. Brunelle tio->io->bio->bi_bdev->bd_dev, sector); 10902056a782SJens Axboe 10911da177e4SLinus Torvalds generic_make_request(clone); 10922e93ccc1SKiyoshi Ueda } else if (r < 0 || r == DM_MAPIO_REQUEUE) { 10932e93ccc1SKiyoshi Ueda /* error the io and bail out, or requeue it if needed */ 10949faf400fSStefan Bader md = tio->io->md; 10959faf400fSStefan Bader dec_pending(tio->io, r); 10969faf400fSStefan Bader free_tio(md, tio); 109745cbcd79SKiyoshi Ueda } else if (r) { 109845cbcd79SKiyoshi Ueda DMWARN("unimplemented target map return value: %d", r); 109945cbcd79SKiyoshi Ueda BUG(); 11001da177e4SLinus Torvalds } 11011da177e4SLinus Torvalds } 11021da177e4SLinus Torvalds 11031da177e4SLinus Torvalds struct clone_info { 11041da177e4SLinus Torvalds struct mapped_device *md; 11051da177e4SLinus Torvalds struct dm_table *map; 11061da177e4SLinus Torvalds struct bio *bio; 11071da177e4SLinus Torvalds struct dm_io *io; 11081da177e4SLinus Torvalds sector_t sector; 11091da177e4SLinus Torvalds sector_t sector_count; 11101da177e4SLinus Torvalds unsigned short idx; 11111da177e4SLinus Torvalds }; 11121da177e4SLinus Torvalds 1113bd2a49b8SAlasdair G Kergon static void bio_setup_sector(struct bio *bio, sector_t sector, sector_t len) 1114bd2a49b8SAlasdair G Kergon { 1115bd2a49b8SAlasdair G Kergon bio->bi_sector = sector; 1116bd2a49b8SAlasdair G Kergon bio->bi_size = to_bytes(len); 1117bd2a49b8SAlasdair G Kergon } 1118bd2a49b8SAlasdair G Kergon 1119bd2a49b8SAlasdair G Kergon static void bio_setup_bv(struct bio *bio, unsigned short idx, unsigned short bv_count) 1120bd2a49b8SAlasdair G Kergon { 1121bd2a49b8SAlasdair G Kergon bio->bi_idx = idx; 1122bd2a49b8SAlasdair G Kergon bio->bi_vcnt = idx + bv_count; 1123bd2a49b8SAlasdair G Kergon bio->bi_flags &= ~(1 << BIO_SEG_VALID); 1124bd2a49b8SAlasdair G Kergon } 1125bd2a49b8SAlasdair G Kergon 1126bd2a49b8SAlasdair G Kergon static void clone_bio_integrity(struct bio *bio, struct bio *clone, 1127bd2a49b8SAlasdair G Kergon unsigned short idx, unsigned len, unsigned offset, 1128bd2a49b8SAlasdair G Kergon unsigned trim) 1129bd2a49b8SAlasdair G Kergon { 1130bd2a49b8SAlasdair G Kergon if (!bio_integrity(bio)) 1131bd2a49b8SAlasdair G Kergon return; 1132bd2a49b8SAlasdair G Kergon 1133bd2a49b8SAlasdair G Kergon bio_integrity_clone(clone, bio, GFP_NOIO); 1134bd2a49b8SAlasdair G Kergon 1135bd2a49b8SAlasdair G Kergon if (trim) 1136bd2a49b8SAlasdair G Kergon bio_integrity_trim(clone, bio_sector_offset(bio, idx, offset), len); 1137bd2a49b8SAlasdair G Kergon } 1138bd2a49b8SAlasdair G Kergon 11391da177e4SLinus Torvalds /* 1140d87f4c14STejun Heo * Creates a little bio that just does part of a bvec. 11411da177e4SLinus Torvalds */ 114214fe594dSAlasdair G Kergon static void clone_split_bio(struct dm_target_io *tio, struct bio *bio, 114314fe594dSAlasdair G Kergon sector_t sector, unsigned short idx, 114414fe594dSAlasdair G Kergon unsigned offset, unsigned len) 11451da177e4SLinus Torvalds { 1146dba14160SMikulas Patocka struct bio *clone = &tio->clone; 11471da177e4SLinus Torvalds struct bio_vec *bv = bio->bi_io_vec + idx; 11481da177e4SLinus Torvalds 11491da177e4SLinus Torvalds *clone->bi_io_vec = *bv; 11501da177e4SLinus Torvalds 1151bd2a49b8SAlasdair G Kergon bio_setup_sector(clone, sector, len); 1152bd2a49b8SAlasdair G Kergon 11531da177e4SLinus Torvalds clone->bi_bdev = bio->bi_bdev; 1154d87f4c14STejun Heo clone->bi_rw = bio->bi_rw; 11551da177e4SLinus Torvalds clone->bi_vcnt = 1; 11561da177e4SLinus Torvalds clone->bi_io_vec->bv_offset = offset; 11571da177e4SLinus Torvalds clone->bi_io_vec->bv_len = clone->bi_size; 1158f3e1d26eSMartin K. Petersen clone->bi_flags |= 1 << BIO_CLONED; 11591da177e4SLinus Torvalds 1160bd2a49b8SAlasdair G Kergon clone_bio_integrity(bio, clone, idx, len, offset, 1); 11611da177e4SLinus Torvalds } 11621da177e4SLinus Torvalds 11631da177e4SLinus Torvalds /* 11641da177e4SLinus Torvalds * Creates a bio that consists of range of complete bvecs. 11651da177e4SLinus Torvalds */ 1166dba14160SMikulas Patocka static void clone_bio(struct dm_target_io *tio, struct bio *bio, 1167dba14160SMikulas Patocka sector_t sector, unsigned short idx, 1168e4c93811SAlasdair G Kergon unsigned short bv_count, unsigned len) 11691da177e4SLinus Torvalds { 1170dba14160SMikulas Patocka struct bio *clone = &tio->clone; 1171bd2a49b8SAlasdair G Kergon unsigned trim = 0; 11721da177e4SLinus Torvalds 11739faf400fSStefan Bader __bio_clone(clone, bio); 1174bd2a49b8SAlasdair G Kergon bio_setup_sector(clone, sector, len); 1175bd2a49b8SAlasdair G Kergon bio_setup_bv(clone, idx, bv_count); 11769c47008dSMartin K. Petersen 11779c47008dSMartin K. Petersen if (idx != bio->bi_idx || clone->bi_size < bio->bi_size) 1178bd2a49b8SAlasdair G Kergon trim = 1; 1179bd2a49b8SAlasdair G Kergon clone_bio_integrity(bio, clone, idx, len, 0, trim); 11801da177e4SLinus Torvalds } 11811da177e4SLinus Torvalds 11829015df24SAlasdair G Kergon static struct dm_target_io *alloc_tio(struct clone_info *ci, 1183bd2a49b8SAlasdair G Kergon struct dm_target *ti, int nr_iovecs, 118455a62eefSAlasdair G Kergon unsigned target_bio_nr) 1185f9ab94ceSMikulas Patocka { 1186dba14160SMikulas Patocka struct dm_target_io *tio; 1187dba14160SMikulas Patocka struct bio *clone; 1188dba14160SMikulas Patocka 1189dba14160SMikulas Patocka clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, ci->md->bs); 1190dba14160SMikulas Patocka tio = container_of(clone, struct dm_target_io, clone); 1191f9ab94ceSMikulas Patocka 1192f9ab94ceSMikulas Patocka tio->io = ci->io; 1193f9ab94ceSMikulas Patocka tio->ti = ti; 1194f9ab94ceSMikulas Patocka memset(&tio->info, 0, sizeof(tio->info)); 119555a62eefSAlasdair G Kergon tio->target_bio_nr = target_bio_nr; 11969015df24SAlasdair G Kergon 11979015df24SAlasdair G Kergon return tio; 11989015df24SAlasdair G Kergon } 11999015df24SAlasdair G Kergon 120014fe594dSAlasdair G Kergon static void __clone_and_map_simple_bio(struct clone_info *ci, 120114fe594dSAlasdair G Kergon struct dm_target *ti, 120255a62eefSAlasdair G Kergon unsigned target_bio_nr, sector_t len) 12039015df24SAlasdair G Kergon { 120455a62eefSAlasdair G Kergon struct dm_target_io *tio = alloc_tio(ci, ti, ci->bio->bi_max_vecs, target_bio_nr); 1205dba14160SMikulas Patocka struct bio *clone = &tio->clone; 12069015df24SAlasdair G Kergon 120706a426ceSMike Snitzer /* 120806a426ceSMike Snitzer * Discard requests require the bio's inline iovecs be initialized. 120906a426ceSMike Snitzer * ci->bio->bi_max_vecs is BIO_INLINE_VECS anyway, for both flush 121006a426ceSMike Snitzer * and discard, so no need for concern about wasted bvec allocations. 121106a426ceSMike Snitzer */ 1212dba14160SMikulas Patocka __bio_clone(clone, ci->bio); 1213bd2a49b8SAlasdair G Kergon if (len) 1214bd2a49b8SAlasdair G Kergon bio_setup_sector(clone, ci->sector, len); 1215f9ab94ceSMikulas Patocka 1216bd2a49b8SAlasdair G Kergon __map_bio(tio); 1217f9ab94ceSMikulas Patocka } 1218f9ab94ceSMikulas Patocka 121914fe594dSAlasdair G Kergon static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti, 122055a62eefSAlasdair G Kergon unsigned num_bios, sector_t len) 122106a426ceSMike Snitzer { 122255a62eefSAlasdair G Kergon unsigned target_bio_nr; 122306a426ceSMike Snitzer 122455a62eefSAlasdair G Kergon for (target_bio_nr = 0; target_bio_nr < num_bios; target_bio_nr++) 122514fe594dSAlasdair G Kergon __clone_and_map_simple_bio(ci, ti, target_bio_nr, len); 122606a426ceSMike Snitzer } 122706a426ceSMike Snitzer 122814fe594dSAlasdair G Kergon static int __send_empty_flush(struct clone_info *ci) 1229f9ab94ceSMikulas Patocka { 123006a426ceSMike Snitzer unsigned target_nr = 0; 1231f9ab94ceSMikulas Patocka struct dm_target *ti; 1232f9ab94ceSMikulas Patocka 1233b372d360SMike Snitzer BUG_ON(bio_has_data(ci->bio)); 1234f9ab94ceSMikulas Patocka while ((ti = dm_table_get_target(ci->map, target_nr++))) 123514fe594dSAlasdair G Kergon __send_duplicate_bios(ci, ti, ti->num_flush_bios, 0); 1236f9ab94ceSMikulas Patocka 1237f9ab94ceSMikulas Patocka return 0; 1238f9ab94ceSMikulas Patocka } 1239f9ab94ceSMikulas Patocka 1240e4c93811SAlasdair G Kergon static void __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti, 1241e4c93811SAlasdair G Kergon sector_t sector, int nr_iovecs, 1242e4c93811SAlasdair G Kergon unsigned short idx, unsigned short bv_count, 1243e4c93811SAlasdair G Kergon unsigned offset, unsigned len, 1244e4c93811SAlasdair G Kergon unsigned split_bvec) 12455ae89a87SMike Snitzer { 1246dba14160SMikulas Patocka struct bio *bio = ci->bio; 12475ae89a87SMike Snitzer struct dm_target_io *tio; 1248b0d8ed4dSAlasdair G Kergon unsigned target_bio_nr; 1249b0d8ed4dSAlasdair G Kergon unsigned num_target_bios = 1; 12505ae89a87SMike Snitzer 1251b0d8ed4dSAlasdair G Kergon /* 1252b0d8ed4dSAlasdair G Kergon * Does the target want to receive duplicate copies of the bio? 1253b0d8ed4dSAlasdair G Kergon */ 1254b0d8ed4dSAlasdair G Kergon if (bio_data_dir(bio) == WRITE && ti->num_write_bios) 1255b0d8ed4dSAlasdair G Kergon num_target_bios = ti->num_write_bios(ti, bio); 1256e4c93811SAlasdair G Kergon 1257b0d8ed4dSAlasdair G Kergon for (target_bio_nr = 0; target_bio_nr < num_target_bios; target_bio_nr++) { 1258b0d8ed4dSAlasdair G Kergon tio = alloc_tio(ci, ti, nr_iovecs, target_bio_nr); 1259e4c93811SAlasdair G Kergon if (split_bvec) 1260e4c93811SAlasdair G Kergon clone_split_bio(tio, bio, sector, idx, offset, len); 1261e4c93811SAlasdair G Kergon else 1262e4c93811SAlasdair G Kergon clone_bio(tio, bio, sector, idx, bv_count, len); 1263bd2a49b8SAlasdair G Kergon __map_bio(tio); 12645ae89a87SMike Snitzer } 1265b0d8ed4dSAlasdair G Kergon } 12665ae89a87SMike Snitzer 126755a62eefSAlasdair G Kergon typedef unsigned (*get_num_bios_fn)(struct dm_target *ti); 126823508a96SMike Snitzer 126955a62eefSAlasdair G Kergon static unsigned get_num_discard_bios(struct dm_target *ti) 127023508a96SMike Snitzer { 127155a62eefSAlasdair G Kergon return ti->num_discard_bios; 127223508a96SMike Snitzer } 127323508a96SMike Snitzer 127455a62eefSAlasdair G Kergon static unsigned get_num_write_same_bios(struct dm_target *ti) 127523508a96SMike Snitzer { 127655a62eefSAlasdair G Kergon return ti->num_write_same_bios; 127723508a96SMike Snitzer } 127823508a96SMike Snitzer 127923508a96SMike Snitzer typedef bool (*is_split_required_fn)(struct dm_target *ti); 128023508a96SMike Snitzer 128123508a96SMike Snitzer static bool is_split_required_for_discard(struct dm_target *ti) 128223508a96SMike Snitzer { 128355a62eefSAlasdair G Kergon return ti->split_discard_bios; 128423508a96SMike Snitzer } 128523508a96SMike Snitzer 128614fe594dSAlasdair G Kergon static int __send_changing_extent_only(struct clone_info *ci, 128755a62eefSAlasdair G Kergon get_num_bios_fn get_num_bios, 128823508a96SMike Snitzer is_split_required_fn is_split_required) 12895ae89a87SMike Snitzer { 12905ae89a87SMike Snitzer struct dm_target *ti; 1291a79245b3SMike Snitzer sector_t len; 129255a62eefSAlasdair G Kergon unsigned num_bios; 12935ae89a87SMike Snitzer 1294a79245b3SMike Snitzer do { 12955ae89a87SMike Snitzer ti = dm_table_find_target(ci->map, ci->sector); 12965ae89a87SMike Snitzer if (!dm_target_is_valid(ti)) 12975ae89a87SMike Snitzer return -EIO; 12985ae89a87SMike Snitzer 12995ae89a87SMike Snitzer /* 130023508a96SMike Snitzer * Even though the device advertised support for this type of 130123508a96SMike Snitzer * request, that does not mean every target supports it, and 1302936688d7SMike Snitzer * reconfiguration might also have changed that since the 13035ae89a87SMike Snitzer * check was performed. 13045ae89a87SMike Snitzer */ 130555a62eefSAlasdair G Kergon num_bios = get_num_bios ? get_num_bios(ti) : 0; 130655a62eefSAlasdair G Kergon if (!num_bios) 13075ae89a87SMike Snitzer return -EOPNOTSUPP; 13085ae89a87SMike Snitzer 130923508a96SMike Snitzer if (is_split_required && !is_split_required(ti)) 1310a79245b3SMike Snitzer len = min(ci->sector_count, max_io_len_target_boundary(ci->sector, ti)); 13117acf0277SMikulas Patocka else 13127acf0277SMikulas Patocka len = min(ci->sector_count, max_io_len(ci->sector, ti)); 13135ae89a87SMike Snitzer 131414fe594dSAlasdair G Kergon __send_duplicate_bios(ci, ti, num_bios, len); 13155ae89a87SMike Snitzer 1316a79245b3SMike Snitzer ci->sector += len; 1317a79245b3SMike Snitzer } while (ci->sector_count -= len); 13185ae89a87SMike Snitzer 13195ae89a87SMike Snitzer return 0; 13205ae89a87SMike Snitzer } 13215ae89a87SMike Snitzer 132214fe594dSAlasdair G Kergon static int __send_discard(struct clone_info *ci) 132323508a96SMike Snitzer { 132414fe594dSAlasdair G Kergon return __send_changing_extent_only(ci, get_num_discard_bios, 132523508a96SMike Snitzer is_split_required_for_discard); 132623508a96SMike Snitzer } 132723508a96SMike Snitzer 132814fe594dSAlasdair G Kergon static int __send_write_same(struct clone_info *ci) 132923508a96SMike Snitzer { 133014fe594dSAlasdair G Kergon return __send_changing_extent_only(ci, get_num_write_same_bios, NULL); 133123508a96SMike Snitzer } 133223508a96SMike Snitzer 1333e4c93811SAlasdair G Kergon /* 1334e4c93811SAlasdair G Kergon * Find maximum number of sectors / bvecs we can process with a single bio. 1335e4c93811SAlasdair G Kergon */ 1336e4c93811SAlasdair G Kergon static sector_t __len_within_target(struct clone_info *ci, sector_t max, int *idx) 13371da177e4SLinus Torvalds { 1338dba14160SMikulas Patocka struct bio *bio = ci->bio; 1339e4c93811SAlasdair G Kergon sector_t bv_len, total_len = 0; 13401da177e4SLinus Torvalds 1341e4c93811SAlasdair G Kergon for (*idx = ci->idx; max && (*idx < bio->bi_vcnt); (*idx)++) { 1342e4c93811SAlasdair G Kergon bv_len = to_sector(bio->bi_io_vec[*idx].bv_len); 13435ae89a87SMike Snitzer 1344e4c93811SAlasdair G Kergon if (bv_len > max) 13451da177e4SLinus Torvalds break; 13461da177e4SLinus Torvalds 1347e4c93811SAlasdair G Kergon max -= bv_len; 1348e4c93811SAlasdair G Kergon total_len += bv_len; 13491da177e4SLinus Torvalds } 13501da177e4SLinus Torvalds 1351e4c93811SAlasdair G Kergon return total_len; 1352e4c93811SAlasdair G Kergon } 13531da177e4SLinus Torvalds 1354e4c93811SAlasdair G Kergon static int __split_bvec_across_targets(struct clone_info *ci, 1355e4c93811SAlasdair G Kergon struct dm_target *ti, sector_t max) 1356e4c93811SAlasdair G Kergon { 1357e4c93811SAlasdair G Kergon struct bio *bio = ci->bio; 13581da177e4SLinus Torvalds struct bio_vec *bv = bio->bi_io_vec + ci->idx; 1359d2044a94SAlasdair G Kergon sector_t remaining = to_sector(bv->bv_len); 1360e4c93811SAlasdair G Kergon unsigned offset = 0; 1361e4c93811SAlasdair G Kergon sector_t len; 13621da177e4SLinus Torvalds 1363d2044a94SAlasdair G Kergon do { 1364d2044a94SAlasdair G Kergon if (offset) { 13651da177e4SLinus Torvalds ti = dm_table_find_target(ci->map, ci->sector); 1366512875bdSJun'ichi Nomura if (!dm_target_is_valid(ti)) 1367512875bdSJun'ichi Nomura return -EIO; 1368512875bdSJun'ichi Nomura 136956a67df7SMike Snitzer max = max_io_len(ci->sector, ti); 1370d2044a94SAlasdair G Kergon } 1371d2044a94SAlasdair G Kergon 1372d2044a94SAlasdair G Kergon len = min(remaining, max); 1373d2044a94SAlasdair G Kergon 1374e4c93811SAlasdair G Kergon __clone_and_map_data_bio(ci, ti, ci->sector, 1, ci->idx, 0, 1375e4c93811SAlasdair G Kergon bv->bv_offset + offset, len, 1); 13761da177e4SLinus Torvalds 13771da177e4SLinus Torvalds ci->sector += len; 13781da177e4SLinus Torvalds ci->sector_count -= len; 1379d2044a94SAlasdair G Kergon offset += to_bytes(len); 1380d2044a94SAlasdair G Kergon } while (remaining -= len); 1381d2044a94SAlasdair G Kergon 13821da177e4SLinus Torvalds ci->idx++; 1383512875bdSJun'ichi Nomura 1384512875bdSJun'ichi Nomura return 0; 13851da177e4SLinus Torvalds } 13861da177e4SLinus Torvalds 13871da177e4SLinus Torvalds /* 1388e4c93811SAlasdair G Kergon * Select the correct strategy for processing a non-flush bio. 1389e4c93811SAlasdair G Kergon */ 1390e4c93811SAlasdair G Kergon static int __split_and_process_non_flush(struct clone_info *ci) 1391e4c93811SAlasdair G Kergon { 1392e4c93811SAlasdair G Kergon struct bio *bio = ci->bio; 1393e4c93811SAlasdair G Kergon struct dm_target *ti; 1394e4c93811SAlasdair G Kergon sector_t len, max; 1395e4c93811SAlasdair G Kergon int idx; 1396e4c93811SAlasdair G Kergon 1397e4c93811SAlasdair G Kergon if (unlikely(bio->bi_rw & REQ_DISCARD)) 1398e4c93811SAlasdair G Kergon return __send_discard(ci); 1399e4c93811SAlasdair G Kergon else if (unlikely(bio->bi_rw & REQ_WRITE_SAME)) 1400e4c93811SAlasdair G Kergon return __send_write_same(ci); 1401e4c93811SAlasdair G Kergon 1402e4c93811SAlasdair G Kergon ti = dm_table_find_target(ci->map, ci->sector); 1403e4c93811SAlasdair G Kergon if (!dm_target_is_valid(ti)) 1404e4c93811SAlasdair G Kergon return -EIO; 1405e4c93811SAlasdair G Kergon 1406e4c93811SAlasdair G Kergon max = max_io_len(ci->sector, ti); 1407e4c93811SAlasdair G Kergon 1408e4c93811SAlasdair G Kergon /* 1409e4c93811SAlasdair G Kergon * Optimise for the simple case where we can do all of 1410e4c93811SAlasdair G Kergon * the remaining io with a single clone. 1411e4c93811SAlasdair G Kergon */ 1412e4c93811SAlasdair G Kergon if (ci->sector_count <= max) { 1413e4c93811SAlasdair G Kergon __clone_and_map_data_bio(ci, ti, ci->sector, bio->bi_max_vecs, 1414e4c93811SAlasdair G Kergon ci->idx, bio->bi_vcnt - ci->idx, 0, 1415e4c93811SAlasdair G Kergon ci->sector_count, 0); 1416e4c93811SAlasdair G Kergon ci->sector_count = 0; 1417e4c93811SAlasdair G Kergon return 0; 1418e4c93811SAlasdair G Kergon } 1419e4c93811SAlasdair G Kergon 1420e4c93811SAlasdair G Kergon /* 1421e4c93811SAlasdair G Kergon * There are some bvecs that don't span targets. 1422e4c93811SAlasdair G Kergon * Do as many of these as possible. 1423e4c93811SAlasdair G Kergon */ 1424e4c93811SAlasdair G Kergon if (to_sector(bio->bi_io_vec[ci->idx].bv_len) <= max) { 1425e4c93811SAlasdair G Kergon len = __len_within_target(ci, max, &idx); 1426e4c93811SAlasdair G Kergon 1427e4c93811SAlasdair G Kergon __clone_and_map_data_bio(ci, ti, ci->sector, bio->bi_max_vecs, 1428e4c93811SAlasdair G Kergon ci->idx, idx - ci->idx, 0, len, 0); 1429e4c93811SAlasdair G Kergon 1430e4c93811SAlasdair G Kergon ci->sector += len; 1431e4c93811SAlasdair G Kergon ci->sector_count -= len; 1432e4c93811SAlasdair G Kergon ci->idx = idx; 1433e4c93811SAlasdair G Kergon 1434e4c93811SAlasdair G Kergon return 0; 1435e4c93811SAlasdair G Kergon } 1436e4c93811SAlasdair G Kergon 1437e4c93811SAlasdair G Kergon /* 1438e4c93811SAlasdair G Kergon * Handle a bvec that must be split between two or more targets. 1439e4c93811SAlasdair G Kergon */ 1440e4c93811SAlasdair G Kergon return __split_bvec_across_targets(ci, ti, max); 1441e4c93811SAlasdair G Kergon } 1442e4c93811SAlasdair G Kergon 1443e4c93811SAlasdair G Kergon /* 144414fe594dSAlasdair G Kergon * Entry point to split a bio into clones and submit them to the targets. 14451da177e4SLinus Torvalds */ 144683d5e5b0SMikulas Patocka static void __split_and_process_bio(struct mapped_device *md, 144783d5e5b0SMikulas Patocka struct dm_table *map, struct bio *bio) 14481da177e4SLinus Torvalds { 14491da177e4SLinus Torvalds struct clone_info ci; 1450512875bdSJun'ichi Nomura int error = 0; 14511da177e4SLinus Torvalds 145283d5e5b0SMikulas Patocka if (unlikely(!map)) { 1453f0b9a450SMikulas Patocka bio_io_error(bio); 1454f0b9a450SMikulas Patocka return; 1455f0b9a450SMikulas Patocka } 1456692d0eb9SMikulas Patocka 145783d5e5b0SMikulas Patocka ci.map = map; 14581da177e4SLinus Torvalds ci.md = md; 14591da177e4SLinus Torvalds ci.io = alloc_io(md); 14601da177e4SLinus Torvalds ci.io->error = 0; 14611da177e4SLinus Torvalds atomic_set(&ci.io->io_count, 1); 14621da177e4SLinus Torvalds ci.io->bio = bio; 14631da177e4SLinus Torvalds ci.io->md = md; 1464f88fb981SKiyoshi Ueda spin_lock_init(&ci.io->endio_lock); 14651da177e4SLinus Torvalds ci.sector = bio->bi_sector; 14661da177e4SLinus Torvalds ci.idx = bio->bi_idx; 14671da177e4SLinus Torvalds 14683eaf840eSJun'ichi "Nick" Nomura start_io_acct(ci.io); 1469bd2a49b8SAlasdair G Kergon 1470b372d360SMike Snitzer if (bio->bi_rw & REQ_FLUSH) { 1471b372d360SMike Snitzer ci.bio = &ci.md->flush_bio; 1472b372d360SMike Snitzer ci.sector_count = 0; 147314fe594dSAlasdair G Kergon error = __send_empty_flush(&ci); 1474b372d360SMike Snitzer /* dec_pending submits any data associated with flush */ 1475b372d360SMike Snitzer } else { 14766a8736d1STejun Heo ci.bio = bio; 1477f6fccb12SMilan Broz ci.sector_count = bio_sectors(bio); 1478512875bdSJun'ichi Nomura while (ci.sector_count && !error) 147914fe594dSAlasdair G Kergon error = __split_and_process_non_flush(&ci); 1480d87f4c14STejun Heo } 14811da177e4SLinus Torvalds 14821da177e4SLinus Torvalds /* drop the extra reference count */ 1483512875bdSJun'ichi Nomura dec_pending(ci.io, error); 14849e4e5f87SMilan Broz } 14859e4e5f87SMilan Broz /*----------------------------------------------------------------- 14861da177e4SLinus Torvalds * CRUD END 14871da177e4SLinus Torvalds *---------------------------------------------------------------*/ 14881da177e4SLinus Torvalds 14891da177e4SLinus Torvalds static int dm_merge_bvec(struct request_queue *q, 14901da177e4SLinus Torvalds struct bvec_merge_data *bvm, 1491f6fccb12SMilan Broz struct bio_vec *biovec) 1492f6fccb12SMilan Broz { 1493f6fccb12SMilan Broz struct mapped_device *md = q->queuedata; 149483d5e5b0SMikulas Patocka struct dm_table *map = dm_get_live_table_fast(md); 1495f6fccb12SMilan Broz struct dm_target *ti; 1496f6fccb12SMilan Broz sector_t max_sectors; 1497f6fccb12SMilan Broz int max_size = 0; 1498f6fccb12SMilan Broz 1499f6fccb12SMilan Broz if (unlikely(!map)) 1500f6fccb12SMilan Broz goto out; 1501f6fccb12SMilan Broz 1502f6fccb12SMilan Broz ti = dm_table_find_target(map, bvm->bi_sector); 1503f6fccb12SMilan Broz if (!dm_target_is_valid(ti)) 150483d5e5b0SMikulas Patocka goto out; 1505f6fccb12SMilan Broz 1506f6fccb12SMilan Broz /* 1507f6fccb12SMilan Broz * Find maximum amount of I/O that won't need splitting 1508f6fccb12SMilan Broz */ 150956a67df7SMike Snitzer max_sectors = min(max_io_len(bvm->bi_sector, ti), 1510f6fccb12SMilan Broz (sector_t) BIO_MAX_SECTORS); 1511f6fccb12SMilan Broz max_size = (max_sectors << SECTOR_SHIFT) - bvm->bi_size; 1512f6fccb12SMilan Broz if (max_size < 0) 1513f6fccb12SMilan Broz max_size = 0; 1514f6fccb12SMilan Broz 1515f6fccb12SMilan Broz /* 1516f6fccb12SMilan Broz * merge_bvec_fn() returns number of bytes 1517f6fccb12SMilan Broz * it can accept at this offset 1518f6fccb12SMilan Broz * max is precomputed maximal io size 1519f6fccb12SMilan Broz */ 1520f6fccb12SMilan Broz if (max_size && ti->type->merge) 1521f6fccb12SMilan Broz max_size = ti->type->merge(ti, bvm, biovec, max_size); 15228cbeb67aSMikulas Patocka /* 15238cbeb67aSMikulas Patocka * If the target doesn't support merge method and some of the devices 15248cbeb67aSMikulas Patocka * provided their merge_bvec method (we know this by looking at 15258cbeb67aSMikulas Patocka * queue_max_hw_sectors), then we can't allow bios with multiple vector 15268cbeb67aSMikulas Patocka * entries. So always set max_size to 0, and the code below allows 15278cbeb67aSMikulas Patocka * just one page. 15288cbeb67aSMikulas Patocka */ 15298cbeb67aSMikulas Patocka else if (queue_max_hw_sectors(q) <= PAGE_SIZE >> 9) 15308cbeb67aSMikulas Patocka 15318cbeb67aSMikulas Patocka max_size = 0; 1532f6fccb12SMilan Broz 15335037108aSMikulas Patocka out: 153483d5e5b0SMikulas Patocka dm_put_live_table_fast(md); 1535f6fccb12SMilan Broz /* 1536f6fccb12SMilan Broz * Always allow an entire first page 1537f6fccb12SMilan Broz */ 1538f6fccb12SMilan Broz if (max_size <= biovec->bv_len && !(bvm->bi_size >> SECTOR_SHIFT)) 1539f6fccb12SMilan Broz max_size = biovec->bv_len; 1540f6fccb12SMilan Broz 1541f6fccb12SMilan Broz return max_size; 1542f6fccb12SMilan Broz } 1543f6fccb12SMilan Broz 15441da177e4SLinus Torvalds /* 15451da177e4SLinus Torvalds * The request function that just remaps the bio built up by 15461da177e4SLinus Torvalds * dm_merge_bvec. 15471da177e4SLinus Torvalds */ 15485a7bbad2SChristoph Hellwig static void _dm_request(struct request_queue *q, struct bio *bio) 15491da177e4SLinus Torvalds { 155012f03a49SKevin Corry int rw = bio_data_dir(bio); 15511da177e4SLinus Torvalds struct mapped_device *md = q->queuedata; 1552c9959059STejun Heo int cpu; 155383d5e5b0SMikulas Patocka int srcu_idx; 155483d5e5b0SMikulas Patocka struct dm_table *map; 15551da177e4SLinus Torvalds 155683d5e5b0SMikulas Patocka map = dm_get_live_table(md, &srcu_idx); 15571da177e4SLinus Torvalds 1558074a7acaSTejun Heo cpu = part_stat_lock(); 1559074a7acaSTejun Heo part_stat_inc(cpu, &dm_disk(md)->part0, ios[rw]); 1560074a7acaSTejun Heo part_stat_add(cpu, &dm_disk(md)->part0, sectors[rw], bio_sectors(bio)); 1561074a7acaSTejun Heo part_stat_unlock(); 156212f03a49SKevin Corry 15636a8736d1STejun Heo /* if we're suspended, we have to queue this io for later */ 15646a8736d1STejun Heo if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))) { 156583d5e5b0SMikulas Patocka dm_put_live_table(md, srcu_idx); 15661da177e4SLinus Torvalds 15676a8736d1STejun Heo if (bio_rw(bio) != READA) 156892c63902SMikulas Patocka queue_io(md, bio); 15696a8736d1STejun Heo else 15706a8736d1STejun Heo bio_io_error(bio); 15715a7bbad2SChristoph Hellwig return; 15721da177e4SLinus Torvalds } 15731da177e4SLinus Torvalds 157483d5e5b0SMikulas Patocka __split_and_process_bio(md, map, bio); 157583d5e5b0SMikulas Patocka dm_put_live_table(md, srcu_idx); 15765a7bbad2SChristoph Hellwig return; 1577cec47e3dSKiyoshi Ueda } 1578cec47e3dSKiyoshi Ueda 1579fd2ed4d2SMikulas Patocka int dm_request_based(struct mapped_device *md) 1580cec47e3dSKiyoshi Ueda { 1581cec47e3dSKiyoshi Ueda return blk_queue_stackable(md->queue); 1582cec47e3dSKiyoshi Ueda } 1583cec47e3dSKiyoshi Ueda 15845a7bbad2SChristoph Hellwig static void dm_request(struct request_queue *q, struct bio *bio) 1585cec47e3dSKiyoshi Ueda { 1586cec47e3dSKiyoshi Ueda struct mapped_device *md = q->queuedata; 1587cec47e3dSKiyoshi Ueda 1588cec47e3dSKiyoshi Ueda if (dm_request_based(md)) 15895a7bbad2SChristoph Hellwig blk_queue_bio(q, bio); 15905a7bbad2SChristoph Hellwig else 15915a7bbad2SChristoph Hellwig _dm_request(q, bio); 1592cec47e3dSKiyoshi Ueda } 1593cec47e3dSKiyoshi Ueda 1594cec47e3dSKiyoshi Ueda void dm_dispatch_request(struct request *rq) 1595cec47e3dSKiyoshi Ueda { 1596cec47e3dSKiyoshi Ueda int r; 1597cec47e3dSKiyoshi Ueda 1598cec47e3dSKiyoshi Ueda if (blk_queue_io_stat(rq->q)) 1599cec47e3dSKiyoshi Ueda rq->cmd_flags |= REQ_IO_STAT; 1600cec47e3dSKiyoshi Ueda 1601cec47e3dSKiyoshi Ueda rq->start_time = jiffies; 1602cec47e3dSKiyoshi Ueda r = blk_insert_cloned_request(rq->q, rq); 1603cec47e3dSKiyoshi Ueda if (r) 1604cec47e3dSKiyoshi Ueda dm_complete_request(rq, r); 1605cec47e3dSKiyoshi Ueda } 1606cec47e3dSKiyoshi Ueda EXPORT_SYMBOL_GPL(dm_dispatch_request); 1607cec47e3dSKiyoshi Ueda 1608cec47e3dSKiyoshi Ueda static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig, 1609cec47e3dSKiyoshi Ueda void *data) 1610cec47e3dSKiyoshi Ueda { 1611cec47e3dSKiyoshi Ueda struct dm_rq_target_io *tio = data; 161294818742SKent Overstreet struct dm_rq_clone_bio_info *info = 161394818742SKent Overstreet container_of(bio, struct dm_rq_clone_bio_info, clone); 1614cec47e3dSKiyoshi Ueda 1615cec47e3dSKiyoshi Ueda info->orig = bio_orig; 1616cec47e3dSKiyoshi Ueda info->tio = tio; 1617cec47e3dSKiyoshi Ueda bio->bi_end_io = end_clone_bio; 1618cec47e3dSKiyoshi Ueda bio->bi_private = info; 1619cec47e3dSKiyoshi Ueda 1620cec47e3dSKiyoshi Ueda return 0; 1621cec47e3dSKiyoshi Ueda } 1622cec47e3dSKiyoshi Ueda 1623cec47e3dSKiyoshi Ueda static int setup_clone(struct request *clone, struct request *rq, 1624cec47e3dSKiyoshi Ueda struct dm_rq_target_io *tio) 1625cec47e3dSKiyoshi Ueda { 1626d0bcb878SKiyoshi Ueda int r; 1627cec47e3dSKiyoshi Ueda 1628d0bcb878SKiyoshi Ueda r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC, 1629d0bcb878SKiyoshi Ueda dm_rq_bio_constructor, tio); 1630cec47e3dSKiyoshi Ueda if (r) 1631cec47e3dSKiyoshi Ueda return r; 1632cec47e3dSKiyoshi Ueda 1633cec47e3dSKiyoshi Ueda clone->cmd = rq->cmd; 1634cec47e3dSKiyoshi Ueda clone->cmd_len = rq->cmd_len; 1635cec47e3dSKiyoshi Ueda clone->sense = rq->sense; 1636cec47e3dSKiyoshi Ueda clone->buffer = rq->buffer; 1637cec47e3dSKiyoshi Ueda clone->end_io = end_clone_request; 1638cec47e3dSKiyoshi Ueda clone->end_io_data = tio; 1639cec47e3dSKiyoshi Ueda 1640cec47e3dSKiyoshi Ueda return 0; 1641cec47e3dSKiyoshi Ueda } 1642cec47e3dSKiyoshi Ueda 16436facdaffSKiyoshi Ueda static struct request *clone_rq(struct request *rq, struct mapped_device *md, 16446facdaffSKiyoshi Ueda gfp_t gfp_mask) 16456facdaffSKiyoshi Ueda { 16466facdaffSKiyoshi Ueda struct request *clone; 16476facdaffSKiyoshi Ueda struct dm_rq_target_io *tio; 16486facdaffSKiyoshi Ueda 16496facdaffSKiyoshi Ueda tio = alloc_rq_tio(md, gfp_mask); 16506facdaffSKiyoshi Ueda if (!tio) 16516facdaffSKiyoshi Ueda return NULL; 16526facdaffSKiyoshi Ueda 16536facdaffSKiyoshi Ueda tio->md = md; 16546facdaffSKiyoshi Ueda tio->ti = NULL; 16556facdaffSKiyoshi Ueda tio->orig = rq; 16566facdaffSKiyoshi Ueda tio->error = 0; 16576facdaffSKiyoshi Ueda memset(&tio->info, 0, sizeof(tio->info)); 16586facdaffSKiyoshi Ueda 16596facdaffSKiyoshi Ueda clone = &tio->clone; 16606facdaffSKiyoshi Ueda if (setup_clone(clone, rq, tio)) { 16616facdaffSKiyoshi Ueda /* -ENOMEM */ 16626facdaffSKiyoshi Ueda free_rq_tio(tio); 16636facdaffSKiyoshi Ueda return NULL; 16646facdaffSKiyoshi Ueda } 16656facdaffSKiyoshi Ueda 16666facdaffSKiyoshi Ueda return clone; 16676facdaffSKiyoshi Ueda } 16686facdaffSKiyoshi Ueda 1669cec47e3dSKiyoshi Ueda /* 1670cec47e3dSKiyoshi Ueda * Called with the queue lock held. 1671cec47e3dSKiyoshi Ueda */ 1672cec47e3dSKiyoshi Ueda static int dm_prep_fn(struct request_queue *q, struct request *rq) 1673cec47e3dSKiyoshi Ueda { 1674cec47e3dSKiyoshi Ueda struct mapped_device *md = q->queuedata; 1675cec47e3dSKiyoshi Ueda struct request *clone; 1676cec47e3dSKiyoshi Ueda 1677cec47e3dSKiyoshi Ueda if (unlikely(rq->special)) { 1678cec47e3dSKiyoshi Ueda DMWARN("Already has something in rq->special."); 1679cec47e3dSKiyoshi Ueda return BLKPREP_KILL; 1680cec47e3dSKiyoshi Ueda } 1681cec47e3dSKiyoshi Ueda 16826facdaffSKiyoshi Ueda clone = clone_rq(rq, md, GFP_ATOMIC); 16836facdaffSKiyoshi Ueda if (!clone) 1684cec47e3dSKiyoshi Ueda return BLKPREP_DEFER; 1685cec47e3dSKiyoshi Ueda 1686cec47e3dSKiyoshi Ueda rq->special = clone; 1687cec47e3dSKiyoshi Ueda rq->cmd_flags |= REQ_DONTPREP; 1688cec47e3dSKiyoshi Ueda 1689cec47e3dSKiyoshi Ueda return BLKPREP_OK; 1690cec47e3dSKiyoshi Ueda } 1691cec47e3dSKiyoshi Ueda 16929eef87daSKiyoshi Ueda /* 16939eef87daSKiyoshi Ueda * Returns: 16949eef87daSKiyoshi Ueda * 0 : the request has been processed (not requeued) 16959eef87daSKiyoshi Ueda * !0 : the request has been requeued 16969eef87daSKiyoshi Ueda */ 16979eef87daSKiyoshi Ueda static int map_request(struct dm_target *ti, struct request *clone, 1698cec47e3dSKiyoshi Ueda struct mapped_device *md) 1699cec47e3dSKiyoshi Ueda { 17009eef87daSKiyoshi Ueda int r, requeued = 0; 1701cec47e3dSKiyoshi Ueda struct dm_rq_target_io *tio = clone->end_io_data; 1702cec47e3dSKiyoshi Ueda 1703cec47e3dSKiyoshi Ueda tio->ti = ti; 1704cec47e3dSKiyoshi Ueda r = ti->type->map_rq(ti, clone, &tio->info); 1705cec47e3dSKiyoshi Ueda switch (r) { 1706cec47e3dSKiyoshi Ueda case DM_MAPIO_SUBMITTED: 1707cec47e3dSKiyoshi Ueda /* The target has taken the I/O to submit by itself later */ 1708cec47e3dSKiyoshi Ueda break; 1709cec47e3dSKiyoshi Ueda case DM_MAPIO_REMAPPED: 1710cec47e3dSKiyoshi Ueda /* The target has remapped the I/O so dispatch it */ 17116db4ccd6SJun'ichi Nomura trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)), 17126db4ccd6SJun'ichi Nomura blk_rq_pos(tio->orig)); 1713cec47e3dSKiyoshi Ueda dm_dispatch_request(clone); 1714cec47e3dSKiyoshi Ueda break; 1715cec47e3dSKiyoshi Ueda case DM_MAPIO_REQUEUE: 1716cec47e3dSKiyoshi Ueda /* The target wants to requeue the I/O */ 1717cec47e3dSKiyoshi Ueda dm_requeue_unmapped_request(clone); 17189eef87daSKiyoshi Ueda requeued = 1; 1719cec47e3dSKiyoshi Ueda break; 1720cec47e3dSKiyoshi Ueda default: 1721cec47e3dSKiyoshi Ueda if (r > 0) { 1722cec47e3dSKiyoshi Ueda DMWARN("unimplemented target map return value: %d", r); 1723cec47e3dSKiyoshi Ueda BUG(); 1724cec47e3dSKiyoshi Ueda } 1725cec47e3dSKiyoshi Ueda 1726cec47e3dSKiyoshi Ueda /* The target wants to complete the I/O */ 1727cec47e3dSKiyoshi Ueda dm_kill_unmapped_request(clone, r); 1728cec47e3dSKiyoshi Ueda break; 1729cec47e3dSKiyoshi Ueda } 17309eef87daSKiyoshi Ueda 17319eef87daSKiyoshi Ueda return requeued; 1732cec47e3dSKiyoshi Ueda } 1733cec47e3dSKiyoshi Ueda 1734ba1cbad9SMike Snitzer static struct request *dm_start_request(struct mapped_device *md, struct request *orig) 1735ba1cbad9SMike Snitzer { 1736ba1cbad9SMike Snitzer struct request *clone; 1737ba1cbad9SMike Snitzer 1738ba1cbad9SMike Snitzer blk_start_request(orig); 1739ba1cbad9SMike Snitzer clone = orig->special; 1740ba1cbad9SMike Snitzer atomic_inc(&md->pending[rq_data_dir(clone)]); 1741ba1cbad9SMike Snitzer 1742ba1cbad9SMike Snitzer /* 1743ba1cbad9SMike Snitzer * Hold the md reference here for the in-flight I/O. 1744ba1cbad9SMike Snitzer * We can't rely on the reference count by device opener, 1745ba1cbad9SMike Snitzer * because the device may be closed during the request completion 1746ba1cbad9SMike Snitzer * when all bios are completed. 1747ba1cbad9SMike Snitzer * See the comment in rq_completed() too. 1748ba1cbad9SMike Snitzer */ 1749ba1cbad9SMike Snitzer dm_get(md); 1750ba1cbad9SMike Snitzer 1751ba1cbad9SMike Snitzer return clone; 1752ba1cbad9SMike Snitzer } 1753ba1cbad9SMike Snitzer 1754cec47e3dSKiyoshi Ueda /* 1755cec47e3dSKiyoshi Ueda * q->request_fn for request-based dm. 1756cec47e3dSKiyoshi Ueda * Called with the queue lock held. 1757cec47e3dSKiyoshi Ueda */ 1758cec47e3dSKiyoshi Ueda static void dm_request_fn(struct request_queue *q) 1759cec47e3dSKiyoshi Ueda { 1760cec47e3dSKiyoshi Ueda struct mapped_device *md = q->queuedata; 176183d5e5b0SMikulas Patocka int srcu_idx; 176283d5e5b0SMikulas Patocka struct dm_table *map = dm_get_live_table(md, &srcu_idx); 1763cec47e3dSKiyoshi Ueda struct dm_target *ti; 1764b4324feeSKiyoshi Ueda struct request *rq, *clone; 176529e4013dSTejun Heo sector_t pos; 1766cec47e3dSKiyoshi Ueda 1767cec47e3dSKiyoshi Ueda /* 1768b4324feeSKiyoshi Ueda * For suspend, check blk_queue_stopped() and increment 1769b4324feeSKiyoshi Ueda * ->pending within a single queue_lock not to increment the 1770b4324feeSKiyoshi Ueda * number of in-flight I/Os after the queue is stopped in 1771b4324feeSKiyoshi Ueda * dm_suspend(). 1772cec47e3dSKiyoshi Ueda */ 17737eaceaccSJens Axboe while (!blk_queue_stopped(q)) { 1774cec47e3dSKiyoshi Ueda rq = blk_peek_request(q); 1775cec47e3dSKiyoshi Ueda if (!rq) 17767eaceaccSJens Axboe goto delay_and_out; 1777cec47e3dSKiyoshi Ueda 177829e4013dSTejun Heo /* always use block 0 to find the target for flushes for now */ 177929e4013dSTejun Heo pos = 0; 178029e4013dSTejun Heo if (!(rq->cmd_flags & REQ_FLUSH)) 178129e4013dSTejun Heo pos = blk_rq_pos(rq); 1782d0bcb878SKiyoshi Ueda 178329e4013dSTejun Heo ti = dm_table_find_target(map, pos); 1784ba1cbad9SMike Snitzer if (!dm_target_is_valid(ti)) { 1785ba1cbad9SMike Snitzer /* 1786ba1cbad9SMike Snitzer * Must perform setup, that dm_done() requires, 1787ba1cbad9SMike Snitzer * before calling dm_kill_unmapped_request 1788ba1cbad9SMike Snitzer */ 1789ba1cbad9SMike Snitzer DMERR_LIMIT("request attempted access beyond the end of device"); 1790ba1cbad9SMike Snitzer clone = dm_start_request(md, rq); 1791ba1cbad9SMike Snitzer dm_kill_unmapped_request(clone, -EIO); 1792ba1cbad9SMike Snitzer continue; 1793ba1cbad9SMike Snitzer } 179429e4013dSTejun Heo 1795cec47e3dSKiyoshi Ueda if (ti->type->busy && ti->type->busy(ti)) 17967eaceaccSJens Axboe goto delay_and_out; 1797cec47e3dSKiyoshi Ueda 1798ba1cbad9SMike Snitzer clone = dm_start_request(md, rq); 1799b4324feeSKiyoshi Ueda 1800cec47e3dSKiyoshi Ueda spin_unlock(q->queue_lock); 18019eef87daSKiyoshi Ueda if (map_request(ti, clone, md)) 18029eef87daSKiyoshi Ueda goto requeued; 18039eef87daSKiyoshi Ueda 1804052189a2SKiyoshi Ueda BUG_ON(!irqs_disabled()); 1805052189a2SKiyoshi Ueda spin_lock(q->queue_lock); 1806cec47e3dSKiyoshi Ueda } 1807cec47e3dSKiyoshi Ueda 1808cec47e3dSKiyoshi Ueda goto out; 1809cec47e3dSKiyoshi Ueda 18109eef87daSKiyoshi Ueda requeued: 1811052189a2SKiyoshi Ueda BUG_ON(!irqs_disabled()); 1812052189a2SKiyoshi Ueda spin_lock(q->queue_lock); 18139eef87daSKiyoshi Ueda 18147eaceaccSJens Axboe delay_and_out: 18157eaceaccSJens Axboe blk_delay_queue(q, HZ / 10); 1816cec47e3dSKiyoshi Ueda out: 181783d5e5b0SMikulas Patocka dm_put_live_table(md, srcu_idx); 1818cec47e3dSKiyoshi Ueda } 1819cec47e3dSKiyoshi Ueda 1820cec47e3dSKiyoshi Ueda int dm_underlying_device_busy(struct request_queue *q) 1821cec47e3dSKiyoshi Ueda { 1822cec47e3dSKiyoshi Ueda return blk_lld_busy(q); 1823cec47e3dSKiyoshi Ueda } 1824cec47e3dSKiyoshi Ueda EXPORT_SYMBOL_GPL(dm_underlying_device_busy); 1825cec47e3dSKiyoshi Ueda 1826cec47e3dSKiyoshi Ueda static int dm_lld_busy(struct request_queue *q) 1827cec47e3dSKiyoshi Ueda { 1828cec47e3dSKiyoshi Ueda int r; 1829cec47e3dSKiyoshi Ueda struct mapped_device *md = q->queuedata; 183083d5e5b0SMikulas Patocka struct dm_table *map = dm_get_live_table_fast(md); 1831cec47e3dSKiyoshi Ueda 1832cec47e3dSKiyoshi Ueda if (!map || test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) 1833cec47e3dSKiyoshi Ueda r = 1; 1834cec47e3dSKiyoshi Ueda else 1835cec47e3dSKiyoshi Ueda r = dm_table_any_busy_target(map); 1836cec47e3dSKiyoshi Ueda 183783d5e5b0SMikulas Patocka dm_put_live_table_fast(md); 1838cec47e3dSKiyoshi Ueda 1839cec47e3dSKiyoshi Ueda return r; 1840cec47e3dSKiyoshi Ueda } 1841cec47e3dSKiyoshi Ueda 18421da177e4SLinus Torvalds static int dm_any_congested(void *congested_data, int bdi_bits) 18431da177e4SLinus Torvalds { 18448a57dfc6SChandra Seetharaman int r = bdi_bits; 18458a57dfc6SChandra Seetharaman struct mapped_device *md = congested_data; 18468a57dfc6SChandra Seetharaman struct dm_table *map; 18471da177e4SLinus Torvalds 18481eb787ecSAlasdair G Kergon if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { 184983d5e5b0SMikulas Patocka map = dm_get_live_table_fast(md); 18508a57dfc6SChandra Seetharaman if (map) { 1851cec47e3dSKiyoshi Ueda /* 1852cec47e3dSKiyoshi Ueda * Request-based dm cares about only own queue for 1853cec47e3dSKiyoshi Ueda * the query about congestion status of request_queue 1854cec47e3dSKiyoshi Ueda */ 1855cec47e3dSKiyoshi Ueda if (dm_request_based(md)) 1856cec47e3dSKiyoshi Ueda r = md->queue->backing_dev_info.state & 1857cec47e3dSKiyoshi Ueda bdi_bits; 1858cec47e3dSKiyoshi Ueda else 18591da177e4SLinus Torvalds r = dm_table_any_congested(map, bdi_bits); 18608a57dfc6SChandra Seetharaman } 186183d5e5b0SMikulas Patocka dm_put_live_table_fast(md); 18628a57dfc6SChandra Seetharaman } 18638a57dfc6SChandra Seetharaman 18641da177e4SLinus Torvalds return r; 18651da177e4SLinus Torvalds } 18661da177e4SLinus Torvalds 18671da177e4SLinus Torvalds /*----------------------------------------------------------------- 18681da177e4SLinus Torvalds * An IDR is used to keep track of allocated minor numbers. 18691da177e4SLinus Torvalds *---------------------------------------------------------------*/ 18702b06cfffSAlasdair G Kergon static void free_minor(int minor) 18711da177e4SLinus Torvalds { 1872f32c10b0SJeff Mahoney spin_lock(&_minor_lock); 18731da177e4SLinus Torvalds idr_remove(&_minor_idr, minor); 1874f32c10b0SJeff Mahoney spin_unlock(&_minor_lock); 18751da177e4SLinus Torvalds } 18761da177e4SLinus Torvalds 18771da177e4SLinus Torvalds /* 18781da177e4SLinus Torvalds * See if the device with a specific minor # is free. 18791da177e4SLinus Torvalds */ 1880cf13ab8eSFrederik Deweerdt static int specific_minor(int minor) 18811da177e4SLinus Torvalds { 1882c9d76be6STejun Heo int r; 18831da177e4SLinus Torvalds 18841da177e4SLinus Torvalds if (minor >= (1 << MINORBITS)) 18851da177e4SLinus Torvalds return -EINVAL; 18861da177e4SLinus Torvalds 1887c9d76be6STejun Heo idr_preload(GFP_KERNEL); 1888f32c10b0SJeff Mahoney spin_lock(&_minor_lock); 18891da177e4SLinus Torvalds 1890c9d76be6STejun Heo r = idr_alloc(&_minor_idr, MINOR_ALLOCED, minor, minor + 1, GFP_NOWAIT); 18911da177e4SLinus Torvalds 1892f32c10b0SJeff Mahoney spin_unlock(&_minor_lock); 1893c9d76be6STejun Heo idr_preload_end(); 1894c9d76be6STejun Heo if (r < 0) 1895c9d76be6STejun Heo return r == -ENOSPC ? -EBUSY : r; 1896c9d76be6STejun Heo return 0; 18971da177e4SLinus Torvalds } 18981da177e4SLinus Torvalds 1899cf13ab8eSFrederik Deweerdt static int next_free_minor(int *minor) 19001da177e4SLinus Torvalds { 1901c9d76be6STejun Heo int r; 19021da177e4SLinus Torvalds 1903c9d76be6STejun Heo idr_preload(GFP_KERNEL); 1904f32c10b0SJeff Mahoney spin_lock(&_minor_lock); 19051da177e4SLinus Torvalds 1906c9d76be6STejun Heo r = idr_alloc(&_minor_idr, MINOR_ALLOCED, 0, 1 << MINORBITS, GFP_NOWAIT); 19071da177e4SLinus Torvalds 1908f32c10b0SJeff Mahoney spin_unlock(&_minor_lock); 1909c9d76be6STejun Heo idr_preload_end(); 1910c9d76be6STejun Heo if (r < 0) 19111da177e4SLinus Torvalds return r; 1912c9d76be6STejun Heo *minor = r; 1913c9d76be6STejun Heo return 0; 19141da177e4SLinus Torvalds } 19151da177e4SLinus Torvalds 191683d5cde4SAlexey Dobriyan static const struct block_device_operations dm_blk_dops; 19171da177e4SLinus Torvalds 191853d5914fSMikulas Patocka static void dm_wq_work(struct work_struct *work); 191953d5914fSMikulas Patocka 19204a0b4ddfSMike Snitzer static void dm_init_md_queue(struct mapped_device *md) 19214a0b4ddfSMike Snitzer { 19224a0b4ddfSMike Snitzer /* 19234a0b4ddfSMike Snitzer * Request-based dm devices cannot be stacked on top of bio-based dm 19244a0b4ddfSMike Snitzer * devices. The type of this dm device has not been decided yet. 19254a0b4ddfSMike Snitzer * The type is decided at the first table loading time. 19264a0b4ddfSMike Snitzer * To prevent problematic device stacking, clear the queue flag 19274a0b4ddfSMike Snitzer * for request stacking support until then. 19284a0b4ddfSMike Snitzer * 19294a0b4ddfSMike Snitzer * This queue is new, so no concurrency on the queue_flags. 19304a0b4ddfSMike Snitzer */ 19314a0b4ddfSMike Snitzer queue_flag_clear_unlocked(QUEUE_FLAG_STACKABLE, md->queue); 19324a0b4ddfSMike Snitzer 19334a0b4ddfSMike Snitzer md->queue->queuedata = md; 19344a0b4ddfSMike Snitzer md->queue->backing_dev_info.congested_fn = dm_any_congested; 19354a0b4ddfSMike Snitzer md->queue->backing_dev_info.congested_data = md; 19364a0b4ddfSMike Snitzer blk_queue_make_request(md->queue, dm_request); 19374a0b4ddfSMike Snitzer blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); 19384a0b4ddfSMike Snitzer blk_queue_merge_bvec(md->queue, dm_merge_bvec); 19394a0b4ddfSMike Snitzer } 19404a0b4ddfSMike Snitzer 19411da177e4SLinus Torvalds /* 19421da177e4SLinus Torvalds * Allocate and initialise a blank device with a given minor. 19431da177e4SLinus Torvalds */ 19442b06cfffSAlasdair G Kergon static struct mapped_device *alloc_dev(int minor) 19451da177e4SLinus Torvalds { 19461da177e4SLinus Torvalds int r; 1947cf13ab8eSFrederik Deweerdt struct mapped_device *md = kzalloc(sizeof(*md), GFP_KERNEL); 1948ba61fdd1SJeff Mahoney void *old_md; 19491da177e4SLinus Torvalds 19501da177e4SLinus Torvalds if (!md) { 19511da177e4SLinus Torvalds DMWARN("unable to allocate device, out of memory."); 19521da177e4SLinus Torvalds return NULL; 19531da177e4SLinus Torvalds } 19541da177e4SLinus Torvalds 195510da4f79SJeff Mahoney if (!try_module_get(THIS_MODULE)) 19566ed7ade8SMilan Broz goto bad_module_get; 195710da4f79SJeff Mahoney 19581da177e4SLinus Torvalds /* get a minor number for the dev */ 19592b06cfffSAlasdair G Kergon if (minor == DM_ANY_MINOR) 1960cf13ab8eSFrederik Deweerdt r = next_free_minor(&minor); 19612b06cfffSAlasdair G Kergon else 1962cf13ab8eSFrederik Deweerdt r = specific_minor(minor); 19631da177e4SLinus Torvalds if (r < 0) 19646ed7ade8SMilan Broz goto bad_minor; 19651da177e4SLinus Torvalds 196683d5e5b0SMikulas Patocka r = init_srcu_struct(&md->io_barrier); 196783d5e5b0SMikulas Patocka if (r < 0) 196883d5e5b0SMikulas Patocka goto bad_io_barrier; 196983d5e5b0SMikulas Patocka 1970a5664dadSMike Snitzer md->type = DM_TYPE_NONE; 1971e61290a4SDaniel Walker mutex_init(&md->suspend_lock); 1972a5664dadSMike Snitzer mutex_init(&md->type_lock); 1973022c2611SMikulas Patocka spin_lock_init(&md->deferred_lock); 19741da177e4SLinus Torvalds atomic_set(&md->holders, 1); 19755c6bd75dSAlasdair G Kergon atomic_set(&md->open_count, 0); 19761da177e4SLinus Torvalds atomic_set(&md->event_nr, 0); 19777a8c3d3bSMike Anderson atomic_set(&md->uevent_seq, 0); 19787a8c3d3bSMike Anderson INIT_LIST_HEAD(&md->uevent_list); 19797a8c3d3bSMike Anderson spin_lock_init(&md->uevent_lock); 19801da177e4SLinus Torvalds 19814a0b4ddfSMike Snitzer md->queue = blk_alloc_queue(GFP_KERNEL); 19821da177e4SLinus Torvalds if (!md->queue) 19836ed7ade8SMilan Broz goto bad_queue; 19841da177e4SLinus Torvalds 19854a0b4ddfSMike Snitzer dm_init_md_queue(md); 19869faf400fSStefan Bader 19871da177e4SLinus Torvalds md->disk = alloc_disk(1); 19881da177e4SLinus Torvalds if (!md->disk) 19896ed7ade8SMilan Broz goto bad_disk; 19901da177e4SLinus Torvalds 1991316d315bSNikanth Karthikesan atomic_set(&md->pending[0], 0); 1992316d315bSNikanth Karthikesan atomic_set(&md->pending[1], 0); 1993f0b04115SJeff Mahoney init_waitqueue_head(&md->wait); 199453d5914fSMikulas Patocka INIT_WORK(&md->work, dm_wq_work); 1995f0b04115SJeff Mahoney init_waitqueue_head(&md->eventq); 1996f0b04115SJeff Mahoney 19971da177e4SLinus Torvalds md->disk->major = _major; 19981da177e4SLinus Torvalds md->disk->first_minor = minor; 19991da177e4SLinus Torvalds md->disk->fops = &dm_blk_dops; 20001da177e4SLinus Torvalds md->disk->queue = md->queue; 20011da177e4SLinus Torvalds md->disk->private_data = md; 20021da177e4SLinus Torvalds sprintf(md->disk->disk_name, "dm-%d", minor); 20031da177e4SLinus Torvalds add_disk(md->disk); 20047e51f257SMike Anderson format_dev_t(md->name, MKDEV(_major, minor)); 20051da177e4SLinus Torvalds 2006670368a8STejun Heo md->wq = alloc_workqueue("kdmflush", WQ_MEM_RECLAIM, 0); 2007304f3f6aSMilan Broz if (!md->wq) 2008304f3f6aSMilan Broz goto bad_thread; 2009304f3f6aSMilan Broz 201032a926daSMikulas Patocka md->bdev = bdget_disk(md->disk, 0); 201132a926daSMikulas Patocka if (!md->bdev) 201232a926daSMikulas Patocka goto bad_bdev; 201332a926daSMikulas Patocka 20146a8736d1STejun Heo bio_init(&md->flush_bio); 20156a8736d1STejun Heo md->flush_bio.bi_bdev = md->bdev; 20166a8736d1STejun Heo md->flush_bio.bi_rw = WRITE_FLUSH; 20176a8736d1STejun Heo 2018fd2ed4d2SMikulas Patocka dm_stats_init(&md->stats); 2019fd2ed4d2SMikulas Patocka 2020ba61fdd1SJeff Mahoney /* Populate the mapping, nobody knows we exist yet */ 2021f32c10b0SJeff Mahoney spin_lock(&_minor_lock); 2022ba61fdd1SJeff Mahoney old_md = idr_replace(&_minor_idr, md, minor); 2023f32c10b0SJeff Mahoney spin_unlock(&_minor_lock); 2024ba61fdd1SJeff Mahoney 2025ba61fdd1SJeff Mahoney BUG_ON(old_md != MINOR_ALLOCED); 2026ba61fdd1SJeff Mahoney 20271da177e4SLinus Torvalds return md; 20281da177e4SLinus Torvalds 202932a926daSMikulas Patocka bad_bdev: 203032a926daSMikulas Patocka destroy_workqueue(md->wq); 2031304f3f6aSMilan Broz bad_thread: 203203022c54SZdenek Kabelac del_gendisk(md->disk); 2033304f3f6aSMilan Broz put_disk(md->disk); 20346ed7ade8SMilan Broz bad_disk: 20351312f40eSAl Viro blk_cleanup_queue(md->queue); 20366ed7ade8SMilan Broz bad_queue: 203783d5e5b0SMikulas Patocka cleanup_srcu_struct(&md->io_barrier); 203883d5e5b0SMikulas Patocka bad_io_barrier: 20391da177e4SLinus Torvalds free_minor(minor); 20406ed7ade8SMilan Broz bad_minor: 204110da4f79SJeff Mahoney module_put(THIS_MODULE); 20426ed7ade8SMilan Broz bad_module_get: 20431da177e4SLinus Torvalds kfree(md); 20441da177e4SLinus Torvalds return NULL; 20451da177e4SLinus Torvalds } 20461da177e4SLinus Torvalds 2047ae9da83fSJun'ichi Nomura static void unlock_fs(struct mapped_device *md); 2048ae9da83fSJun'ichi Nomura 20491da177e4SLinus Torvalds static void free_dev(struct mapped_device *md) 20501da177e4SLinus Torvalds { 2051f331c029STejun Heo int minor = MINOR(disk_devt(md->disk)); 205263d94e48SJun'ichi Nomura 2053ae9da83fSJun'ichi Nomura unlock_fs(md); 2054db8fef4fSMikulas Patocka bdput(md->bdev); 2055304f3f6aSMilan Broz destroy_workqueue(md->wq); 2056e6ee8c0bSKiyoshi Ueda if (md->io_pool) 20571da177e4SLinus Torvalds mempool_destroy(md->io_pool); 2058e6ee8c0bSKiyoshi Ueda if (md->bs) 20599faf400fSStefan Bader bioset_free(md->bs); 20609c47008dSMartin K. Petersen blk_integrity_unregister(md->disk); 20611da177e4SLinus Torvalds del_gendisk(md->disk); 206283d5e5b0SMikulas Patocka cleanup_srcu_struct(&md->io_barrier); 206363d94e48SJun'ichi Nomura free_minor(minor); 2064fba9f90eSJeff Mahoney 2065fba9f90eSJeff Mahoney spin_lock(&_minor_lock); 2066fba9f90eSJeff Mahoney md->disk->private_data = NULL; 2067fba9f90eSJeff Mahoney spin_unlock(&_minor_lock); 2068fba9f90eSJeff Mahoney 20691da177e4SLinus Torvalds put_disk(md->disk); 20701312f40eSAl Viro blk_cleanup_queue(md->queue); 2071fd2ed4d2SMikulas Patocka dm_stats_cleanup(&md->stats); 207210da4f79SJeff Mahoney module_put(THIS_MODULE); 20731da177e4SLinus Torvalds kfree(md); 20741da177e4SLinus Torvalds } 20751da177e4SLinus Torvalds 2076e6ee8c0bSKiyoshi Ueda static void __bind_mempools(struct mapped_device *md, struct dm_table *t) 2077e6ee8c0bSKiyoshi Ueda { 2078c0820cf5SMikulas Patocka struct dm_md_mempools *p = dm_table_get_md_mempools(t); 2079e6ee8c0bSKiyoshi Ueda 20805f015204SJun'ichi Nomura if (md->io_pool && md->bs) { 208116245bdcSJun'ichi Nomura /* The md already has necessary mempools. */ 208216245bdcSJun'ichi Nomura if (dm_table_get_type(t) == DM_TYPE_BIO_BASED) { 2083c0820cf5SMikulas Patocka /* 208416245bdcSJun'ichi Nomura * Reload bioset because front_pad may have changed 208516245bdcSJun'ichi Nomura * because a different table was loaded. 2086c0820cf5SMikulas Patocka */ 2087c0820cf5SMikulas Patocka bioset_free(md->bs); 2088c0820cf5SMikulas Patocka md->bs = p->bs; 2089c0820cf5SMikulas Patocka p->bs = NULL; 209016245bdcSJun'ichi Nomura } else if (dm_table_get_type(t) == DM_TYPE_REQUEST_BASED) { 209116245bdcSJun'ichi Nomura /* 209216245bdcSJun'ichi Nomura * There's no need to reload with request-based dm 209316245bdcSJun'ichi Nomura * because the size of front_pad doesn't change. 209416245bdcSJun'ichi Nomura * Note for future: If you are to reload bioset, 209516245bdcSJun'ichi Nomura * prep-ed requests in the queue may refer 209616245bdcSJun'ichi Nomura * to bio from the old bioset, so you must walk 209716245bdcSJun'ichi Nomura * through the queue to unprep. 209816245bdcSJun'ichi Nomura */ 209916245bdcSJun'ichi Nomura } 2100e6ee8c0bSKiyoshi Ueda goto out; 2101c0820cf5SMikulas Patocka } 2102e6ee8c0bSKiyoshi Ueda 21035f015204SJun'ichi Nomura BUG_ON(!p || md->io_pool || md->bs); 2104e6ee8c0bSKiyoshi Ueda 2105e6ee8c0bSKiyoshi Ueda md->io_pool = p->io_pool; 2106e6ee8c0bSKiyoshi Ueda p->io_pool = NULL; 2107e6ee8c0bSKiyoshi Ueda md->bs = p->bs; 2108e6ee8c0bSKiyoshi Ueda p->bs = NULL; 2109e6ee8c0bSKiyoshi Ueda 2110e6ee8c0bSKiyoshi Ueda out: 2111e6ee8c0bSKiyoshi Ueda /* mempool bind completed, now no need any mempools in the table */ 2112e6ee8c0bSKiyoshi Ueda dm_table_free_md_mempools(t); 2113e6ee8c0bSKiyoshi Ueda } 2114e6ee8c0bSKiyoshi Ueda 21151da177e4SLinus Torvalds /* 21161da177e4SLinus Torvalds * Bind a table to the device. 21171da177e4SLinus Torvalds */ 21181da177e4SLinus Torvalds static void event_callback(void *context) 21191da177e4SLinus Torvalds { 21207a8c3d3bSMike Anderson unsigned long flags; 21217a8c3d3bSMike Anderson LIST_HEAD(uevents); 21221da177e4SLinus Torvalds struct mapped_device *md = (struct mapped_device *) context; 21231da177e4SLinus Torvalds 21247a8c3d3bSMike Anderson spin_lock_irqsave(&md->uevent_lock, flags); 21257a8c3d3bSMike Anderson list_splice_init(&md->uevent_list, &uevents); 21267a8c3d3bSMike Anderson spin_unlock_irqrestore(&md->uevent_lock, flags); 21277a8c3d3bSMike Anderson 2128ed9e1982STejun Heo dm_send_uevents(&uevents, &disk_to_dev(md->disk)->kobj); 21297a8c3d3bSMike Anderson 21301da177e4SLinus Torvalds atomic_inc(&md->event_nr); 21311da177e4SLinus Torvalds wake_up(&md->eventq); 21321da177e4SLinus Torvalds } 21331da177e4SLinus Torvalds 2134c217649bSMike Snitzer /* 2135c217649bSMike Snitzer * Protected by md->suspend_lock obtained by dm_swap_table(). 2136c217649bSMike Snitzer */ 21374e90188bSAlasdair G Kergon static void __set_size(struct mapped_device *md, sector_t size) 21381da177e4SLinus Torvalds { 21394e90188bSAlasdair G Kergon set_capacity(md->disk, size); 21401da177e4SLinus Torvalds 2141db8fef4fSMikulas Patocka i_size_write(md->bdev->bd_inode, (loff_t)size << SECTOR_SHIFT); 21421da177e4SLinus Torvalds } 21431da177e4SLinus Torvalds 2144042d2a9bSAlasdair G Kergon /* 2145d5b9dd04SMikulas Patocka * Return 1 if the queue has a compulsory merge_bvec_fn function. 2146d5b9dd04SMikulas Patocka * 2147d5b9dd04SMikulas Patocka * If this function returns 0, then the device is either a non-dm 2148d5b9dd04SMikulas Patocka * device without a merge_bvec_fn, or it is a dm device that is 2149d5b9dd04SMikulas Patocka * able to split any bios it receives that are too big. 2150d5b9dd04SMikulas Patocka */ 2151d5b9dd04SMikulas Patocka int dm_queue_merge_is_compulsory(struct request_queue *q) 2152d5b9dd04SMikulas Patocka { 2153d5b9dd04SMikulas Patocka struct mapped_device *dev_md; 2154d5b9dd04SMikulas Patocka 2155d5b9dd04SMikulas Patocka if (!q->merge_bvec_fn) 2156d5b9dd04SMikulas Patocka return 0; 2157d5b9dd04SMikulas Patocka 2158d5b9dd04SMikulas Patocka if (q->make_request_fn == dm_request) { 2159d5b9dd04SMikulas Patocka dev_md = q->queuedata; 2160d5b9dd04SMikulas Patocka if (test_bit(DMF_MERGE_IS_OPTIONAL, &dev_md->flags)) 2161d5b9dd04SMikulas Patocka return 0; 2162d5b9dd04SMikulas Patocka } 2163d5b9dd04SMikulas Patocka 2164d5b9dd04SMikulas Patocka return 1; 2165d5b9dd04SMikulas Patocka } 2166d5b9dd04SMikulas Patocka 2167d5b9dd04SMikulas Patocka static int dm_device_merge_is_compulsory(struct dm_target *ti, 2168d5b9dd04SMikulas Patocka struct dm_dev *dev, sector_t start, 2169d5b9dd04SMikulas Patocka sector_t len, void *data) 2170d5b9dd04SMikulas Patocka { 2171d5b9dd04SMikulas Patocka struct block_device *bdev = dev->bdev; 2172d5b9dd04SMikulas Patocka struct request_queue *q = bdev_get_queue(bdev); 2173d5b9dd04SMikulas Patocka 2174d5b9dd04SMikulas Patocka return dm_queue_merge_is_compulsory(q); 2175d5b9dd04SMikulas Patocka } 2176d5b9dd04SMikulas Patocka 2177d5b9dd04SMikulas Patocka /* 2178d5b9dd04SMikulas Patocka * Return 1 if it is acceptable to ignore merge_bvec_fn based 2179d5b9dd04SMikulas Patocka * on the properties of the underlying devices. 2180d5b9dd04SMikulas Patocka */ 2181d5b9dd04SMikulas Patocka static int dm_table_merge_is_optional(struct dm_table *table) 2182d5b9dd04SMikulas Patocka { 2183d5b9dd04SMikulas Patocka unsigned i = 0; 2184d5b9dd04SMikulas Patocka struct dm_target *ti; 2185d5b9dd04SMikulas Patocka 2186d5b9dd04SMikulas Patocka while (i < dm_table_get_num_targets(table)) { 2187d5b9dd04SMikulas Patocka ti = dm_table_get_target(table, i++); 2188d5b9dd04SMikulas Patocka 2189d5b9dd04SMikulas Patocka if (ti->type->iterate_devices && 2190d5b9dd04SMikulas Patocka ti->type->iterate_devices(ti, dm_device_merge_is_compulsory, NULL)) 2191d5b9dd04SMikulas Patocka return 0; 2192d5b9dd04SMikulas Patocka } 2193d5b9dd04SMikulas Patocka 2194d5b9dd04SMikulas Patocka return 1; 2195d5b9dd04SMikulas Patocka } 2196d5b9dd04SMikulas Patocka 2197d5b9dd04SMikulas Patocka /* 2198042d2a9bSAlasdair G Kergon * Returns old map, which caller must destroy. 2199042d2a9bSAlasdair G Kergon */ 2200042d2a9bSAlasdair G Kergon static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, 2201754c5fc7SMike Snitzer struct queue_limits *limits) 22021da177e4SLinus Torvalds { 2203042d2a9bSAlasdair G Kergon struct dm_table *old_map; 2204165125e1SJens Axboe struct request_queue *q = md->queue; 22051da177e4SLinus Torvalds sector_t size; 2206d5b9dd04SMikulas Patocka int merge_is_optional; 22071da177e4SLinus Torvalds 22081da177e4SLinus Torvalds size = dm_table_get_size(t); 22093ac51e74SDarrick J. Wong 22103ac51e74SDarrick J. Wong /* 22113ac51e74SDarrick J. Wong * Wipe any geometry if the size of the table changed. 22123ac51e74SDarrick J. Wong */ 2213fd2ed4d2SMikulas Patocka if (size != dm_get_size(md)) 22143ac51e74SDarrick J. Wong memset(&md->geometry, 0, sizeof(md->geometry)); 22153ac51e74SDarrick J. Wong 22164e90188bSAlasdair G Kergon __set_size(md, size); 22171da177e4SLinus Torvalds 2218cf222b37SAlasdair G Kergon dm_table_event_callback(t, event_callback, md); 22192ca3310eSAlasdair G Kergon 2220e6ee8c0bSKiyoshi Ueda /* 2221e6ee8c0bSKiyoshi Ueda * The queue hasn't been stopped yet, if the old table type wasn't 2222e6ee8c0bSKiyoshi Ueda * for request-based during suspension. So stop it to prevent 2223e6ee8c0bSKiyoshi Ueda * I/O mapping before resume. 2224e6ee8c0bSKiyoshi Ueda * This must be done before setting the queue restrictions, 2225e6ee8c0bSKiyoshi Ueda * because request-based dm may be run just after the setting. 2226e6ee8c0bSKiyoshi Ueda */ 2227e6ee8c0bSKiyoshi Ueda if (dm_table_request_based(t) && !blk_queue_stopped(q)) 2228e6ee8c0bSKiyoshi Ueda stop_queue(q); 2229e6ee8c0bSKiyoshi Ueda 2230e6ee8c0bSKiyoshi Ueda __bind_mempools(md, t); 2231e6ee8c0bSKiyoshi Ueda 2232d5b9dd04SMikulas Patocka merge_is_optional = dm_table_merge_is_optional(t); 2233d5b9dd04SMikulas Patocka 2234042d2a9bSAlasdair G Kergon old_map = md->map; 223583d5e5b0SMikulas Patocka rcu_assign_pointer(md->map, t); 223636a0456fSAlasdair G Kergon md->immutable_target_type = dm_table_get_immutable_target_type(t); 223736a0456fSAlasdair G Kergon 2238754c5fc7SMike Snitzer dm_table_set_restrictions(t, q, limits); 2239d5b9dd04SMikulas Patocka if (merge_is_optional) 2240d5b9dd04SMikulas Patocka set_bit(DMF_MERGE_IS_OPTIONAL, &md->flags); 2241d5b9dd04SMikulas Patocka else 2242d5b9dd04SMikulas Patocka clear_bit(DMF_MERGE_IS_OPTIONAL, &md->flags); 224383d5e5b0SMikulas Patocka dm_sync_table(md); 22442ca3310eSAlasdair G Kergon 2245042d2a9bSAlasdair G Kergon return old_map; 22461da177e4SLinus Torvalds } 22471da177e4SLinus Torvalds 2248a7940155SAlasdair G Kergon /* 2249a7940155SAlasdair G Kergon * Returns unbound table for the caller to free. 2250a7940155SAlasdair G Kergon */ 2251a7940155SAlasdair G Kergon static struct dm_table *__unbind(struct mapped_device *md) 22521da177e4SLinus Torvalds { 22531da177e4SLinus Torvalds struct dm_table *map = md->map; 22541da177e4SLinus Torvalds 22551da177e4SLinus Torvalds if (!map) 2256a7940155SAlasdair G Kergon return NULL; 22571da177e4SLinus Torvalds 22581da177e4SLinus Torvalds dm_table_event_callback(map, NULL, NULL); 225983d5e5b0SMikulas Patocka rcu_assign_pointer(md->map, NULL); 226083d5e5b0SMikulas Patocka dm_sync_table(md); 2261a7940155SAlasdair G Kergon 2262a7940155SAlasdair G Kergon return map; 22631da177e4SLinus Torvalds } 22641da177e4SLinus Torvalds 22651da177e4SLinus Torvalds /* 22661da177e4SLinus Torvalds * Constructor for a new device. 22671da177e4SLinus Torvalds */ 22682b06cfffSAlasdair G Kergon int dm_create(int minor, struct mapped_device **result) 22691da177e4SLinus Torvalds { 22701da177e4SLinus Torvalds struct mapped_device *md; 22711da177e4SLinus Torvalds 22722b06cfffSAlasdair G Kergon md = alloc_dev(minor); 22731da177e4SLinus Torvalds if (!md) 22741da177e4SLinus Torvalds return -ENXIO; 22751da177e4SLinus Torvalds 2276784aae73SMilan Broz dm_sysfs_init(md); 2277784aae73SMilan Broz 22781da177e4SLinus Torvalds *result = md; 22791da177e4SLinus Torvalds return 0; 22801da177e4SLinus Torvalds } 22811da177e4SLinus Torvalds 2282a5664dadSMike Snitzer /* 2283a5664dadSMike Snitzer * Functions to manage md->type. 2284a5664dadSMike Snitzer * All are required to hold md->type_lock. 2285a5664dadSMike Snitzer */ 2286a5664dadSMike Snitzer void dm_lock_md_type(struct mapped_device *md) 2287a5664dadSMike Snitzer { 2288a5664dadSMike Snitzer mutex_lock(&md->type_lock); 2289a5664dadSMike Snitzer } 2290a5664dadSMike Snitzer 2291a5664dadSMike Snitzer void dm_unlock_md_type(struct mapped_device *md) 2292a5664dadSMike Snitzer { 2293a5664dadSMike Snitzer mutex_unlock(&md->type_lock); 2294a5664dadSMike Snitzer } 2295a5664dadSMike Snitzer 2296a5664dadSMike Snitzer void dm_set_md_type(struct mapped_device *md, unsigned type) 2297a5664dadSMike Snitzer { 229800c4fc3bSMike Snitzer BUG_ON(!mutex_is_locked(&md->type_lock)); 2299a5664dadSMike Snitzer md->type = type; 2300a5664dadSMike Snitzer } 2301a5664dadSMike Snitzer 2302a5664dadSMike Snitzer unsigned dm_get_md_type(struct mapped_device *md) 2303a5664dadSMike Snitzer { 230400c4fc3bSMike Snitzer BUG_ON(!mutex_is_locked(&md->type_lock)); 2305a5664dadSMike Snitzer return md->type; 2306a5664dadSMike Snitzer } 2307a5664dadSMike Snitzer 230836a0456fSAlasdair G Kergon struct target_type *dm_get_immutable_target_type(struct mapped_device *md) 230936a0456fSAlasdair G Kergon { 231036a0456fSAlasdair G Kergon return md->immutable_target_type; 231136a0456fSAlasdair G Kergon } 231236a0456fSAlasdair G Kergon 23134a0b4ddfSMike Snitzer /* 2314f84cb8a4SMike Snitzer * The queue_limits are only valid as long as you have a reference 2315f84cb8a4SMike Snitzer * count on 'md'. 2316f84cb8a4SMike Snitzer */ 2317f84cb8a4SMike Snitzer struct queue_limits *dm_get_queue_limits(struct mapped_device *md) 2318f84cb8a4SMike Snitzer { 2319f84cb8a4SMike Snitzer BUG_ON(!atomic_read(&md->holders)); 2320f84cb8a4SMike Snitzer return &md->queue->limits; 2321f84cb8a4SMike Snitzer } 2322f84cb8a4SMike Snitzer EXPORT_SYMBOL_GPL(dm_get_queue_limits); 2323f84cb8a4SMike Snitzer 2324f84cb8a4SMike Snitzer /* 23254a0b4ddfSMike Snitzer * Fully initialize a request-based queue (->elevator, ->request_fn, etc). 23264a0b4ddfSMike Snitzer */ 23274a0b4ddfSMike Snitzer static int dm_init_request_based_queue(struct mapped_device *md) 23284a0b4ddfSMike Snitzer { 23294a0b4ddfSMike Snitzer struct request_queue *q = NULL; 23304a0b4ddfSMike Snitzer 23314a0b4ddfSMike Snitzer if (md->queue->elevator) 23324a0b4ddfSMike Snitzer return 1; 23334a0b4ddfSMike Snitzer 23344a0b4ddfSMike Snitzer /* Fully initialize the queue */ 23354a0b4ddfSMike Snitzer q = blk_init_allocated_queue(md->queue, dm_request_fn, NULL); 23364a0b4ddfSMike Snitzer if (!q) 23374a0b4ddfSMike Snitzer return 0; 23384a0b4ddfSMike Snitzer 23394a0b4ddfSMike Snitzer md->queue = q; 23404a0b4ddfSMike Snitzer dm_init_md_queue(md); 23414a0b4ddfSMike Snitzer blk_queue_softirq_done(md->queue, dm_softirq_done); 23424a0b4ddfSMike Snitzer blk_queue_prep_rq(md->queue, dm_prep_fn); 23434a0b4ddfSMike Snitzer blk_queue_lld_busy(md->queue, dm_lld_busy); 23444a0b4ddfSMike Snitzer 23454a0b4ddfSMike Snitzer elv_register_queue(md->queue); 23464a0b4ddfSMike Snitzer 23474a0b4ddfSMike Snitzer return 1; 23484a0b4ddfSMike Snitzer } 23494a0b4ddfSMike Snitzer 23504a0b4ddfSMike Snitzer /* 23514a0b4ddfSMike Snitzer * Setup the DM device's queue based on md's type 23524a0b4ddfSMike Snitzer */ 23534a0b4ddfSMike Snitzer int dm_setup_md_queue(struct mapped_device *md) 23544a0b4ddfSMike Snitzer { 23554a0b4ddfSMike Snitzer if ((dm_get_md_type(md) == DM_TYPE_REQUEST_BASED) && 23564a0b4ddfSMike Snitzer !dm_init_request_based_queue(md)) { 23574a0b4ddfSMike Snitzer DMWARN("Cannot initialize queue for request-based mapped device"); 23584a0b4ddfSMike Snitzer return -EINVAL; 23594a0b4ddfSMike Snitzer } 23604a0b4ddfSMike Snitzer 23614a0b4ddfSMike Snitzer return 0; 23624a0b4ddfSMike Snitzer } 23634a0b4ddfSMike Snitzer 2364637842cfSDavid Teigland static struct mapped_device *dm_find_md(dev_t dev) 23651da177e4SLinus Torvalds { 23661da177e4SLinus Torvalds struct mapped_device *md; 23671da177e4SLinus Torvalds unsigned minor = MINOR(dev); 23681da177e4SLinus Torvalds 23691da177e4SLinus Torvalds if (MAJOR(dev) != _major || minor >= (1 << MINORBITS)) 23701da177e4SLinus Torvalds return NULL; 23711da177e4SLinus Torvalds 2372f32c10b0SJeff Mahoney spin_lock(&_minor_lock); 23731da177e4SLinus Torvalds 23741da177e4SLinus Torvalds md = idr_find(&_minor_idr, minor); 2375fba9f90eSJeff Mahoney if (md && (md == MINOR_ALLOCED || 2376f331c029STejun Heo (MINOR(disk_devt(dm_disk(md))) != minor) || 2377abdc568bSKiyoshi Ueda dm_deleting_md(md) || 2378fba9f90eSJeff Mahoney test_bit(DMF_FREEING, &md->flags))) { 2379637842cfSDavid Teigland md = NULL; 2380fba9f90eSJeff Mahoney goto out; 2381fba9f90eSJeff Mahoney } 23821da177e4SLinus Torvalds 2383fba9f90eSJeff Mahoney out: 2384f32c10b0SJeff Mahoney spin_unlock(&_minor_lock); 23851da177e4SLinus Torvalds 2386637842cfSDavid Teigland return md; 2387637842cfSDavid Teigland } 2388637842cfSDavid Teigland 2389d229a958SDavid Teigland struct mapped_device *dm_get_md(dev_t dev) 2390d229a958SDavid Teigland { 2391d229a958SDavid Teigland struct mapped_device *md = dm_find_md(dev); 2392d229a958SDavid Teigland 2393d229a958SDavid Teigland if (md) 2394d229a958SDavid Teigland dm_get(md); 2395d229a958SDavid Teigland 2396d229a958SDavid Teigland return md; 2397d229a958SDavid Teigland } 23983cf2e4baSAlasdair G Kergon EXPORT_SYMBOL_GPL(dm_get_md); 2399d229a958SDavid Teigland 24009ade92a9SAlasdair G Kergon void *dm_get_mdptr(struct mapped_device *md) 2401637842cfSDavid Teigland { 24029ade92a9SAlasdair G Kergon return md->interface_ptr; 24031da177e4SLinus Torvalds } 24041da177e4SLinus Torvalds 24051da177e4SLinus Torvalds void dm_set_mdptr(struct mapped_device *md, void *ptr) 24061da177e4SLinus Torvalds { 24071da177e4SLinus Torvalds md->interface_ptr = ptr; 24081da177e4SLinus Torvalds } 24091da177e4SLinus Torvalds 24101da177e4SLinus Torvalds void dm_get(struct mapped_device *md) 24111da177e4SLinus Torvalds { 24121da177e4SLinus Torvalds atomic_inc(&md->holders); 24133f77316dSKiyoshi Ueda BUG_ON(test_bit(DMF_FREEING, &md->flags)); 24141da177e4SLinus Torvalds } 24151da177e4SLinus Torvalds 241672d94861SAlasdair G Kergon const char *dm_device_name(struct mapped_device *md) 241772d94861SAlasdair G Kergon { 241872d94861SAlasdair G Kergon return md->name; 241972d94861SAlasdair G Kergon } 242072d94861SAlasdair G Kergon EXPORT_SYMBOL_GPL(dm_device_name); 242172d94861SAlasdair G Kergon 24223f77316dSKiyoshi Ueda static void __dm_destroy(struct mapped_device *md, bool wait) 24231da177e4SLinus Torvalds { 24241134e5aeSMike Anderson struct dm_table *map; 242583d5e5b0SMikulas Patocka int srcu_idx; 24261da177e4SLinus Torvalds 24273f77316dSKiyoshi Ueda might_sleep(); 2428fba9f90eSJeff Mahoney 24293f77316dSKiyoshi Ueda spin_lock(&_minor_lock); 243083d5e5b0SMikulas Patocka map = dm_get_live_table(md, &srcu_idx); 24313f77316dSKiyoshi Ueda idr_replace(&_minor_idr, MINOR_ALLOCED, MINOR(disk_devt(dm_disk(md)))); 2432fba9f90eSJeff Mahoney set_bit(DMF_FREEING, &md->flags); 2433f32c10b0SJeff Mahoney spin_unlock(&_minor_lock); 24343f77316dSKiyoshi Ueda 24354f186f8bSKiyoshi Ueda if (!dm_suspended_md(md)) { 24361da177e4SLinus Torvalds dm_table_presuspend_targets(map); 24371da177e4SLinus Torvalds dm_table_postsuspend_targets(map); 24381da177e4SLinus Torvalds } 24393f77316dSKiyoshi Ueda 244083d5e5b0SMikulas Patocka /* dm_put_live_table must be before msleep, otherwise deadlock is possible */ 244183d5e5b0SMikulas Patocka dm_put_live_table(md, srcu_idx); 244283d5e5b0SMikulas Patocka 24433f77316dSKiyoshi Ueda /* 24443f77316dSKiyoshi Ueda * Rare, but there may be I/O requests still going to complete, 24453f77316dSKiyoshi Ueda * for example. Wait for all references to disappear. 24463f77316dSKiyoshi Ueda * No one should increment the reference count of the mapped_device, 24473f77316dSKiyoshi Ueda * after the mapped_device state becomes DMF_FREEING. 24483f77316dSKiyoshi Ueda */ 24493f77316dSKiyoshi Ueda if (wait) 24503f77316dSKiyoshi Ueda while (atomic_read(&md->holders)) 24513f77316dSKiyoshi Ueda msleep(1); 24523f77316dSKiyoshi Ueda else if (atomic_read(&md->holders)) 24533f77316dSKiyoshi Ueda DMWARN("%s: Forcibly removing mapped_device still in use! (%d users)", 24543f77316dSKiyoshi Ueda dm_device_name(md), atomic_read(&md->holders)); 24553f77316dSKiyoshi Ueda 2456784aae73SMilan Broz dm_sysfs_exit(md); 2457a7940155SAlasdair G Kergon dm_table_destroy(__unbind(md)); 24581da177e4SLinus Torvalds free_dev(md); 24591da177e4SLinus Torvalds } 24603f77316dSKiyoshi Ueda 24613f77316dSKiyoshi Ueda void dm_destroy(struct mapped_device *md) 24623f77316dSKiyoshi Ueda { 24633f77316dSKiyoshi Ueda __dm_destroy(md, true); 24643f77316dSKiyoshi Ueda } 24653f77316dSKiyoshi Ueda 24663f77316dSKiyoshi Ueda void dm_destroy_immediate(struct mapped_device *md) 24673f77316dSKiyoshi Ueda { 24683f77316dSKiyoshi Ueda __dm_destroy(md, false); 24693f77316dSKiyoshi Ueda } 24703f77316dSKiyoshi Ueda 24713f77316dSKiyoshi Ueda void dm_put(struct mapped_device *md) 24723f77316dSKiyoshi Ueda { 24733f77316dSKiyoshi Ueda atomic_dec(&md->holders); 24741da177e4SLinus Torvalds } 247579eb885cSEdward Goggin EXPORT_SYMBOL_GPL(dm_put); 24761da177e4SLinus Torvalds 2477401600dfSMikulas Patocka static int dm_wait_for_completion(struct mapped_device *md, int interruptible) 247846125c1cSMilan Broz { 247946125c1cSMilan Broz int r = 0; 2480b44ebeb0SMikulas Patocka DECLARE_WAITQUEUE(wait, current); 2481b44ebeb0SMikulas Patocka 2482b44ebeb0SMikulas Patocka add_wait_queue(&md->wait, &wait); 248346125c1cSMilan Broz 248446125c1cSMilan Broz while (1) { 2485401600dfSMikulas Patocka set_current_state(interruptible); 248646125c1cSMilan Broz 2487b4324feeSKiyoshi Ueda if (!md_in_flight(md)) 248846125c1cSMilan Broz break; 248946125c1cSMilan Broz 2490401600dfSMikulas Patocka if (interruptible == TASK_INTERRUPTIBLE && 2491401600dfSMikulas Patocka signal_pending(current)) { 249246125c1cSMilan Broz r = -EINTR; 249346125c1cSMilan Broz break; 249446125c1cSMilan Broz } 249546125c1cSMilan Broz 249646125c1cSMilan Broz io_schedule(); 249746125c1cSMilan Broz } 249846125c1cSMilan Broz set_current_state(TASK_RUNNING); 249946125c1cSMilan Broz 2500b44ebeb0SMikulas Patocka remove_wait_queue(&md->wait, &wait); 2501b44ebeb0SMikulas Patocka 250246125c1cSMilan Broz return r; 250346125c1cSMilan Broz } 250446125c1cSMilan Broz 25051da177e4SLinus Torvalds /* 25061da177e4SLinus Torvalds * Process the deferred bios 25071da177e4SLinus Torvalds */ 2508ef208587SMikulas Patocka static void dm_wq_work(struct work_struct *work) 25091da177e4SLinus Torvalds { 2510ef208587SMikulas Patocka struct mapped_device *md = container_of(work, struct mapped_device, 2511ef208587SMikulas Patocka work); 25126d6f10dfSMilan Broz struct bio *c; 251383d5e5b0SMikulas Patocka int srcu_idx; 251483d5e5b0SMikulas Patocka struct dm_table *map; 25151da177e4SLinus Torvalds 251683d5e5b0SMikulas Patocka map = dm_get_live_table(md, &srcu_idx); 2517ef208587SMikulas Patocka 25183b00b203SMikulas Patocka while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { 2519022c2611SMikulas Patocka spin_lock_irq(&md->deferred_lock); 2520022c2611SMikulas Patocka c = bio_list_pop(&md->deferred); 2521022c2611SMikulas Patocka spin_unlock_irq(&md->deferred_lock); 2522022c2611SMikulas Patocka 25236a8736d1STejun Heo if (!c) 2524df12ee99SAlasdair G Kergon break; 252573d410c0SMilan Broz 2526e6ee8c0bSKiyoshi Ueda if (dm_request_based(md)) 2527e6ee8c0bSKiyoshi Ueda generic_make_request(c); 2528af7e466aSMikulas Patocka else 252983d5e5b0SMikulas Patocka __split_and_process_bio(md, map, c); 2530e6ee8c0bSKiyoshi Ueda } 25313b00b203SMikulas Patocka 253283d5e5b0SMikulas Patocka dm_put_live_table(md, srcu_idx); 25331da177e4SLinus Torvalds } 25341da177e4SLinus Torvalds 25359a1fb464SMikulas Patocka static void dm_queue_flush(struct mapped_device *md) 2536304f3f6aSMilan Broz { 25373b00b203SMikulas Patocka clear_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); 25383b00b203SMikulas Patocka smp_mb__after_clear_bit(); 253953d5914fSMikulas Patocka queue_work(md->wq, &md->work); 2540304f3f6aSMilan Broz } 2541304f3f6aSMilan Broz 25421da177e4SLinus Torvalds /* 2543042d2a9bSAlasdair G Kergon * Swap in a new table, returning the old one for the caller to destroy. 25441da177e4SLinus Torvalds */ 2545042d2a9bSAlasdair G Kergon struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table) 25461da177e4SLinus Torvalds { 254787eb5b21SMike Christie struct dm_table *live_map = NULL, *map = ERR_PTR(-EINVAL); 2548754c5fc7SMike Snitzer struct queue_limits limits; 2549042d2a9bSAlasdair G Kergon int r; 25501da177e4SLinus Torvalds 2551e61290a4SDaniel Walker mutex_lock(&md->suspend_lock); 25521da177e4SLinus Torvalds 25531da177e4SLinus Torvalds /* device must be suspended */ 25544f186f8bSKiyoshi Ueda if (!dm_suspended_md(md)) 255593c534aeSAlasdair G Kergon goto out; 25561da177e4SLinus Torvalds 25573ae70656SMike Snitzer /* 25583ae70656SMike Snitzer * If the new table has no data devices, retain the existing limits. 25593ae70656SMike Snitzer * This helps multipath with queue_if_no_path if all paths disappear, 25603ae70656SMike Snitzer * then new I/O is queued based on these limits, and then some paths 25613ae70656SMike Snitzer * reappear. 25623ae70656SMike Snitzer */ 25633ae70656SMike Snitzer if (dm_table_has_no_data_devices(table)) { 256483d5e5b0SMikulas Patocka live_map = dm_get_live_table_fast(md); 25653ae70656SMike Snitzer if (live_map) 25663ae70656SMike Snitzer limits = md->queue->limits; 256783d5e5b0SMikulas Patocka dm_put_live_table_fast(md); 25683ae70656SMike Snitzer } 25693ae70656SMike Snitzer 257087eb5b21SMike Christie if (!live_map) { 2571754c5fc7SMike Snitzer r = dm_calculate_queue_limits(table, &limits); 2572042d2a9bSAlasdair G Kergon if (r) { 2573042d2a9bSAlasdair G Kergon map = ERR_PTR(r); 2574754c5fc7SMike Snitzer goto out; 2575042d2a9bSAlasdair G Kergon } 257687eb5b21SMike Christie } 2577754c5fc7SMike Snitzer 2578042d2a9bSAlasdair G Kergon map = __bind(md, table, &limits); 25791da177e4SLinus Torvalds 258093c534aeSAlasdair G Kergon out: 2581e61290a4SDaniel Walker mutex_unlock(&md->suspend_lock); 2582042d2a9bSAlasdair G Kergon return map; 25831da177e4SLinus Torvalds } 25841da177e4SLinus Torvalds 25851da177e4SLinus Torvalds /* 25861da177e4SLinus Torvalds * Functions to lock and unlock any filesystem running on the 25871da177e4SLinus Torvalds * device. 25881da177e4SLinus Torvalds */ 25892ca3310eSAlasdair G Kergon static int lock_fs(struct mapped_device *md) 25901da177e4SLinus Torvalds { 2591e39e2e95SAlasdair G Kergon int r; 25921da177e4SLinus Torvalds 25931da177e4SLinus Torvalds WARN_ON(md->frozen_sb); 2594dfbe03f6SAlasdair G Kergon 2595db8fef4fSMikulas Patocka md->frozen_sb = freeze_bdev(md->bdev); 2596dfbe03f6SAlasdair G Kergon if (IS_ERR(md->frozen_sb)) { 2597cf222b37SAlasdair G Kergon r = PTR_ERR(md->frozen_sb); 2598e39e2e95SAlasdair G Kergon md->frozen_sb = NULL; 2599e39e2e95SAlasdair G Kergon return r; 2600dfbe03f6SAlasdair G Kergon } 2601dfbe03f6SAlasdair G Kergon 2602aa8d7c2fSAlasdair G Kergon set_bit(DMF_FROZEN, &md->flags); 2603aa8d7c2fSAlasdair G Kergon 26041da177e4SLinus Torvalds return 0; 26051da177e4SLinus Torvalds } 26061da177e4SLinus Torvalds 26072ca3310eSAlasdair G Kergon static void unlock_fs(struct mapped_device *md) 26081da177e4SLinus Torvalds { 2609aa8d7c2fSAlasdair G Kergon if (!test_bit(DMF_FROZEN, &md->flags)) 2610aa8d7c2fSAlasdair G Kergon return; 2611aa8d7c2fSAlasdair G Kergon 2612db8fef4fSMikulas Patocka thaw_bdev(md->bdev, md->frozen_sb); 26131da177e4SLinus Torvalds md->frozen_sb = NULL; 2614aa8d7c2fSAlasdair G Kergon clear_bit(DMF_FROZEN, &md->flags); 26151da177e4SLinus Torvalds } 26161da177e4SLinus Torvalds 26171da177e4SLinus Torvalds /* 26181da177e4SLinus Torvalds * We need to be able to change a mapping table under a mounted 26191da177e4SLinus Torvalds * filesystem. For example we might want to move some data in 26201da177e4SLinus Torvalds * the background. Before the table can be swapped with 26211da177e4SLinus Torvalds * dm_bind_table, dm_suspend must be called to flush any in 26221da177e4SLinus Torvalds * flight bios and ensure that any further io gets deferred. 26231da177e4SLinus Torvalds */ 2624cec47e3dSKiyoshi Ueda /* 2625cec47e3dSKiyoshi Ueda * Suspend mechanism in request-based dm. 2626cec47e3dSKiyoshi Ueda * 26279f518b27SKiyoshi Ueda * 1. Flush all I/Os by lock_fs() if needed. 26289f518b27SKiyoshi Ueda * 2. Stop dispatching any I/O by stopping the request_queue. 26299f518b27SKiyoshi Ueda * 3. Wait for all in-flight I/Os to be completed or requeued. 2630cec47e3dSKiyoshi Ueda * 26319f518b27SKiyoshi Ueda * To abort suspend, start the request_queue. 2632cec47e3dSKiyoshi Ueda */ 2633a3d77d35SKiyoshi Ueda int dm_suspend(struct mapped_device *md, unsigned suspend_flags) 26341da177e4SLinus Torvalds { 26352ca3310eSAlasdair G Kergon struct dm_table *map = NULL; 263646125c1cSMilan Broz int r = 0; 2637a3d77d35SKiyoshi Ueda int do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG ? 1 : 0; 26382e93ccc1SKiyoshi Ueda int noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG ? 1 : 0; 26391da177e4SLinus Torvalds 2640e61290a4SDaniel Walker mutex_lock(&md->suspend_lock); 26412ca3310eSAlasdair G Kergon 26424f186f8bSKiyoshi Ueda if (dm_suspended_md(md)) { 264373d410c0SMilan Broz r = -EINVAL; 2644d287483dSAlasdair G Kergon goto out_unlock; 264573d410c0SMilan Broz } 26461da177e4SLinus Torvalds 264783d5e5b0SMikulas Patocka map = md->map; 2648cf222b37SAlasdair G Kergon 26492e93ccc1SKiyoshi Ueda /* 26502e93ccc1SKiyoshi Ueda * DMF_NOFLUSH_SUSPENDING must be set before presuspend. 26512e93ccc1SKiyoshi Ueda * This flag is cleared before dm_suspend returns. 26522e93ccc1SKiyoshi Ueda */ 26532e93ccc1SKiyoshi Ueda if (noflush) 26542e93ccc1SKiyoshi Ueda set_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); 26552e93ccc1SKiyoshi Ueda 2656436d4108SAlasdair G Kergon /* This does not get reverted if there's an error later. */ 26571da177e4SLinus Torvalds dm_table_presuspend_targets(map); 26581da177e4SLinus Torvalds 26592e93ccc1SKiyoshi Ueda /* 26609f518b27SKiyoshi Ueda * Flush I/O to the device. 26619f518b27SKiyoshi Ueda * Any I/O submitted after lock_fs() may not be flushed. 26629f518b27SKiyoshi Ueda * noflush takes precedence over do_lockfs. 26639f518b27SKiyoshi Ueda * (lock_fs() flushes I/Os and waits for them to complete.) 26642e93ccc1SKiyoshi Ueda */ 266532a926daSMikulas Patocka if (!noflush && do_lockfs) { 26662ca3310eSAlasdair G Kergon r = lock_fs(md); 26672ca3310eSAlasdair G Kergon if (r) 266883d5e5b0SMikulas Patocka goto out_unlock; 2669aa8d7c2fSAlasdair G Kergon } 26701da177e4SLinus Torvalds 26711da177e4SLinus Torvalds /* 26723b00b203SMikulas Patocka * Here we must make sure that no processes are submitting requests 26733b00b203SMikulas Patocka * to target drivers i.e. no one may be executing 26743b00b203SMikulas Patocka * __split_and_process_bio. This is called from dm_request and 26753b00b203SMikulas Patocka * dm_wq_work. 26763b00b203SMikulas Patocka * 26773b00b203SMikulas Patocka * To get all processes out of __split_and_process_bio in dm_request, 26783b00b203SMikulas Patocka * we take the write lock. To prevent any process from reentering 26796a8736d1STejun Heo * __split_and_process_bio from dm_request and quiesce the thread 26806a8736d1STejun Heo * (dm_wq_work), we set BMF_BLOCK_IO_FOR_SUSPEND and call 26816a8736d1STejun Heo * flush_workqueue(md->wq). 26821da177e4SLinus Torvalds */ 26831eb787ecSAlasdair G Kergon set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); 268483d5e5b0SMikulas Patocka synchronize_srcu(&md->io_barrier); 26851da177e4SLinus Torvalds 2686d0bcb878SKiyoshi Ueda /* 268729e4013dSTejun Heo * Stop md->queue before flushing md->wq in case request-based 268829e4013dSTejun Heo * dm defers requests to md->wq from md->queue. 2689d0bcb878SKiyoshi Ueda */ 2690cec47e3dSKiyoshi Ueda if (dm_request_based(md)) 26919f518b27SKiyoshi Ueda stop_queue(md->queue); 2692cec47e3dSKiyoshi Ueda 2693d0bcb878SKiyoshi Ueda flush_workqueue(md->wq); 2694d0bcb878SKiyoshi Ueda 26951da177e4SLinus Torvalds /* 26963b00b203SMikulas Patocka * At this point no more requests are entering target request routines. 26973b00b203SMikulas Patocka * We call dm_wait_for_completion to wait for all existing requests 26983b00b203SMikulas Patocka * to finish. 26991da177e4SLinus Torvalds */ 2700401600dfSMikulas Patocka r = dm_wait_for_completion(md, TASK_INTERRUPTIBLE); 27011da177e4SLinus Torvalds 27026d6f10dfSMilan Broz if (noflush) 2703022c2611SMikulas Patocka clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); 270483d5e5b0SMikulas Patocka synchronize_srcu(&md->io_barrier); 27052e93ccc1SKiyoshi Ueda 27061da177e4SLinus Torvalds /* were we interrupted ? */ 270746125c1cSMilan Broz if (r < 0) { 27089a1fb464SMikulas Patocka dm_queue_flush(md); 270973d410c0SMilan Broz 2710cec47e3dSKiyoshi Ueda if (dm_request_based(md)) 27119f518b27SKiyoshi Ueda start_queue(md->queue); 2712cec47e3dSKiyoshi Ueda 27132ca3310eSAlasdair G Kergon unlock_fs(md); 271483d5e5b0SMikulas Patocka goto out_unlock; /* pushback list is already flushed, so skip flush */ 27152ca3310eSAlasdair G Kergon } 27162ca3310eSAlasdair G Kergon 27173b00b203SMikulas Patocka /* 27183b00b203SMikulas Patocka * If dm_wait_for_completion returned 0, the device is completely 27193b00b203SMikulas Patocka * quiescent now. There is no request-processing activity. All new 27203b00b203SMikulas Patocka * requests are being added to md->deferred list. 27213b00b203SMikulas Patocka */ 27223b00b203SMikulas Patocka 27231da177e4SLinus Torvalds set_bit(DMF_SUSPENDED, &md->flags); 27241da177e4SLinus Torvalds 27254d4471cbSKiyoshi Ueda dm_table_postsuspend_targets(map); 27264d4471cbSKiyoshi Ueda 2727d287483dSAlasdair G Kergon out_unlock: 2728e61290a4SDaniel Walker mutex_unlock(&md->suspend_lock); 2729cf222b37SAlasdair G Kergon return r; 27301da177e4SLinus Torvalds } 27311da177e4SLinus Torvalds 27321da177e4SLinus Torvalds int dm_resume(struct mapped_device *md) 27331da177e4SLinus Torvalds { 2734cf222b37SAlasdair G Kergon int r = -EINVAL; 2735cf222b37SAlasdair G Kergon struct dm_table *map = NULL; 27361da177e4SLinus Torvalds 2737e61290a4SDaniel Walker mutex_lock(&md->suspend_lock); 27384f186f8bSKiyoshi Ueda if (!dm_suspended_md(md)) 2739cf222b37SAlasdair G Kergon goto out; 2740cf222b37SAlasdair G Kergon 274183d5e5b0SMikulas Patocka map = md->map; 27422ca3310eSAlasdair G Kergon if (!map || !dm_table_get_size(map)) 2743cf222b37SAlasdair G Kergon goto out; 27441da177e4SLinus Torvalds 27458757b776SMilan Broz r = dm_table_resume_targets(map); 27468757b776SMilan Broz if (r) 27478757b776SMilan Broz goto out; 27482ca3310eSAlasdair G Kergon 27499a1fb464SMikulas Patocka dm_queue_flush(md); 27502ca3310eSAlasdair G Kergon 2751cec47e3dSKiyoshi Ueda /* 2752cec47e3dSKiyoshi Ueda * Flushing deferred I/Os must be done after targets are resumed 2753cec47e3dSKiyoshi Ueda * so that mapping of targets can work correctly. 2754cec47e3dSKiyoshi Ueda * Request-based dm is queueing the deferred I/Os in its request_queue. 2755cec47e3dSKiyoshi Ueda */ 2756cec47e3dSKiyoshi Ueda if (dm_request_based(md)) 2757cec47e3dSKiyoshi Ueda start_queue(md->queue); 2758cec47e3dSKiyoshi Ueda 27592ca3310eSAlasdair G Kergon unlock_fs(md); 27602ca3310eSAlasdair G Kergon 27612ca3310eSAlasdair G Kergon clear_bit(DMF_SUSPENDED, &md->flags); 27622ca3310eSAlasdair G Kergon 2763cf222b37SAlasdair G Kergon r = 0; 2764cf222b37SAlasdair G Kergon out: 2765e61290a4SDaniel Walker mutex_unlock(&md->suspend_lock); 27662ca3310eSAlasdair G Kergon 2767cf222b37SAlasdair G Kergon return r; 27681da177e4SLinus Torvalds } 27691da177e4SLinus Torvalds 2770fd2ed4d2SMikulas Patocka /* 2771fd2ed4d2SMikulas Patocka * Internal suspend/resume works like userspace-driven suspend. It waits 2772fd2ed4d2SMikulas Patocka * until all bios finish and prevents issuing new bios to the target drivers. 2773fd2ed4d2SMikulas Patocka * It may be used only from the kernel. 2774fd2ed4d2SMikulas Patocka * 2775fd2ed4d2SMikulas Patocka * Internal suspend holds md->suspend_lock, which prevents interaction with 2776fd2ed4d2SMikulas Patocka * userspace-driven suspend. 2777fd2ed4d2SMikulas Patocka */ 2778fd2ed4d2SMikulas Patocka 2779fd2ed4d2SMikulas Patocka void dm_internal_suspend(struct mapped_device *md) 2780fd2ed4d2SMikulas Patocka { 2781fd2ed4d2SMikulas Patocka mutex_lock(&md->suspend_lock); 2782fd2ed4d2SMikulas Patocka if (dm_suspended_md(md)) 2783fd2ed4d2SMikulas Patocka return; 2784fd2ed4d2SMikulas Patocka 2785fd2ed4d2SMikulas Patocka set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); 2786fd2ed4d2SMikulas Patocka synchronize_srcu(&md->io_barrier); 2787fd2ed4d2SMikulas Patocka flush_workqueue(md->wq); 2788fd2ed4d2SMikulas Patocka dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); 2789fd2ed4d2SMikulas Patocka } 2790fd2ed4d2SMikulas Patocka 2791fd2ed4d2SMikulas Patocka void dm_internal_resume(struct mapped_device *md) 2792fd2ed4d2SMikulas Patocka { 2793fd2ed4d2SMikulas Patocka if (dm_suspended_md(md)) 2794fd2ed4d2SMikulas Patocka goto done; 2795fd2ed4d2SMikulas Patocka 2796fd2ed4d2SMikulas Patocka dm_queue_flush(md); 2797fd2ed4d2SMikulas Patocka 2798fd2ed4d2SMikulas Patocka done: 2799fd2ed4d2SMikulas Patocka mutex_unlock(&md->suspend_lock); 2800fd2ed4d2SMikulas Patocka } 2801fd2ed4d2SMikulas Patocka 28021da177e4SLinus Torvalds /*----------------------------------------------------------------- 28031da177e4SLinus Torvalds * Event notification. 28041da177e4SLinus Torvalds *---------------------------------------------------------------*/ 28053abf85b5SPeter Rajnoha int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action, 280660935eb2SMilan Broz unsigned cookie) 280769267a30SAlasdair G Kergon { 280860935eb2SMilan Broz char udev_cookie[DM_COOKIE_LENGTH]; 280960935eb2SMilan Broz char *envp[] = { udev_cookie, NULL }; 281060935eb2SMilan Broz 281160935eb2SMilan Broz if (!cookie) 28123abf85b5SPeter Rajnoha return kobject_uevent(&disk_to_dev(md->disk)->kobj, action); 281360935eb2SMilan Broz else { 281460935eb2SMilan Broz snprintf(udev_cookie, DM_COOKIE_LENGTH, "%s=%u", 281560935eb2SMilan Broz DM_COOKIE_ENV_VAR_NAME, cookie); 28163abf85b5SPeter Rajnoha return kobject_uevent_env(&disk_to_dev(md->disk)->kobj, 28173abf85b5SPeter Rajnoha action, envp); 281860935eb2SMilan Broz } 281969267a30SAlasdair G Kergon } 282069267a30SAlasdair G Kergon 28217a8c3d3bSMike Anderson uint32_t dm_next_uevent_seq(struct mapped_device *md) 28227a8c3d3bSMike Anderson { 28237a8c3d3bSMike Anderson return atomic_add_return(1, &md->uevent_seq); 28247a8c3d3bSMike Anderson } 28257a8c3d3bSMike Anderson 28261da177e4SLinus Torvalds uint32_t dm_get_event_nr(struct mapped_device *md) 28271da177e4SLinus Torvalds { 28281da177e4SLinus Torvalds return atomic_read(&md->event_nr); 28291da177e4SLinus Torvalds } 28301da177e4SLinus Torvalds 28311da177e4SLinus Torvalds int dm_wait_event(struct mapped_device *md, int event_nr) 28321da177e4SLinus Torvalds { 28331da177e4SLinus Torvalds return wait_event_interruptible(md->eventq, 28341da177e4SLinus Torvalds (event_nr != atomic_read(&md->event_nr))); 28351da177e4SLinus Torvalds } 28361da177e4SLinus Torvalds 28377a8c3d3bSMike Anderson void dm_uevent_add(struct mapped_device *md, struct list_head *elist) 28387a8c3d3bSMike Anderson { 28397a8c3d3bSMike Anderson unsigned long flags; 28407a8c3d3bSMike Anderson 28417a8c3d3bSMike Anderson spin_lock_irqsave(&md->uevent_lock, flags); 28427a8c3d3bSMike Anderson list_add(elist, &md->uevent_list); 28437a8c3d3bSMike Anderson spin_unlock_irqrestore(&md->uevent_lock, flags); 28447a8c3d3bSMike Anderson } 28457a8c3d3bSMike Anderson 28461da177e4SLinus Torvalds /* 28471da177e4SLinus Torvalds * The gendisk is only valid as long as you have a reference 28481da177e4SLinus Torvalds * count on 'md'. 28491da177e4SLinus Torvalds */ 28501da177e4SLinus Torvalds struct gendisk *dm_disk(struct mapped_device *md) 28511da177e4SLinus Torvalds { 28521da177e4SLinus Torvalds return md->disk; 28531da177e4SLinus Torvalds } 28541da177e4SLinus Torvalds 2855784aae73SMilan Broz struct kobject *dm_kobject(struct mapped_device *md) 2856784aae73SMilan Broz { 2857784aae73SMilan Broz return &md->kobj; 2858784aae73SMilan Broz } 2859784aae73SMilan Broz 2860784aae73SMilan Broz /* 2861784aae73SMilan Broz * struct mapped_device should not be exported outside of dm.c 2862784aae73SMilan Broz * so use this check to verify that kobj is part of md structure 2863784aae73SMilan Broz */ 2864784aae73SMilan Broz struct mapped_device *dm_get_from_kobject(struct kobject *kobj) 2865784aae73SMilan Broz { 2866784aae73SMilan Broz struct mapped_device *md; 2867784aae73SMilan Broz 2868784aae73SMilan Broz md = container_of(kobj, struct mapped_device, kobj); 2869784aae73SMilan Broz if (&md->kobj != kobj) 2870784aae73SMilan Broz return NULL; 2871784aae73SMilan Broz 28724d89b7b4SMilan Broz if (test_bit(DMF_FREEING, &md->flags) || 2873432a212cSMike Anderson dm_deleting_md(md)) 28744d89b7b4SMilan Broz return NULL; 28754d89b7b4SMilan Broz 2876784aae73SMilan Broz dm_get(md); 2877784aae73SMilan Broz return md; 2878784aae73SMilan Broz } 2879784aae73SMilan Broz 28804f186f8bSKiyoshi Ueda int dm_suspended_md(struct mapped_device *md) 28811da177e4SLinus Torvalds { 28821da177e4SLinus Torvalds return test_bit(DMF_SUSPENDED, &md->flags); 28831da177e4SLinus Torvalds } 28841da177e4SLinus Torvalds 288564dbce58SKiyoshi Ueda int dm_suspended(struct dm_target *ti) 288664dbce58SKiyoshi Ueda { 2887ecdb2e25SKiyoshi Ueda return dm_suspended_md(dm_table_get_md(ti->table)); 288864dbce58SKiyoshi Ueda } 288964dbce58SKiyoshi Ueda EXPORT_SYMBOL_GPL(dm_suspended); 289064dbce58SKiyoshi Ueda 28912e93ccc1SKiyoshi Ueda int dm_noflush_suspending(struct dm_target *ti) 28922e93ccc1SKiyoshi Ueda { 2893ecdb2e25SKiyoshi Ueda return __noflush_suspending(dm_table_get_md(ti->table)); 28942e93ccc1SKiyoshi Ueda } 28952e93ccc1SKiyoshi Ueda EXPORT_SYMBOL_GPL(dm_noflush_suspending); 28962e93ccc1SKiyoshi Ueda 2897c0820cf5SMikulas Patocka struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity, unsigned per_bio_data_size) 2898e6ee8c0bSKiyoshi Ueda { 28995f015204SJun'ichi Nomura struct dm_md_mempools *pools = kzalloc(sizeof(*pools), GFP_KERNEL); 29005f015204SJun'ichi Nomura struct kmem_cache *cachep; 29015f015204SJun'ichi Nomura unsigned int pool_size; 29025f015204SJun'ichi Nomura unsigned int front_pad; 2903e6ee8c0bSKiyoshi Ueda 2904e6ee8c0bSKiyoshi Ueda if (!pools) 2905e6ee8c0bSKiyoshi Ueda return NULL; 2906e6ee8c0bSKiyoshi Ueda 290723e5083bSJun'ichi Nomura if (type == DM_TYPE_BIO_BASED) { 29085f015204SJun'ichi Nomura cachep = _io_cache; 29096cfa5857SMike Snitzer pool_size = RESERVED_BIO_BASED_IOS; 29105f015204SJun'ichi Nomura front_pad = roundup(per_bio_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone); 29115f015204SJun'ichi Nomura } else if (type == DM_TYPE_REQUEST_BASED) { 29125f015204SJun'ichi Nomura cachep = _rq_tio_cache; 2913*f4790826SMike Snitzer pool_size = dm_get_reserved_rq_based_ios(); 29145f015204SJun'ichi Nomura front_pad = offsetof(struct dm_rq_clone_bio_info, clone); 29155f015204SJun'ichi Nomura /* per_bio_data_size is not used. See __bind_mempools(). */ 29165f015204SJun'ichi Nomura WARN_ON(per_bio_data_size != 0); 29175f015204SJun'ichi Nomura } else 29185f015204SJun'ichi Nomura goto out; 29195f015204SJun'ichi Nomura 29206cfa5857SMike Snitzer pools->io_pool = mempool_create_slab_pool(pool_size, cachep); 2921e6ee8c0bSKiyoshi Ueda if (!pools->io_pool) 29225f015204SJun'ichi Nomura goto out; 2923e6ee8c0bSKiyoshi Ueda 29245f015204SJun'ichi Nomura pools->bs = bioset_create(pool_size, front_pad); 2925e6ee8c0bSKiyoshi Ueda if (!pools->bs) 29265f015204SJun'ichi Nomura goto out; 2927e6ee8c0bSKiyoshi Ueda 2928a91a2785SMartin K. Petersen if (integrity && bioset_integrity_create(pools->bs, pool_size)) 29295f015204SJun'ichi Nomura goto out; 2930a91a2785SMartin K. Petersen 2931e6ee8c0bSKiyoshi Ueda return pools; 2932e6ee8c0bSKiyoshi Ueda 29335f015204SJun'ichi Nomura out: 29345f015204SJun'ichi Nomura dm_free_md_mempools(pools); 2935e6ee8c0bSKiyoshi Ueda 2936e6ee8c0bSKiyoshi Ueda return NULL; 2937e6ee8c0bSKiyoshi Ueda } 2938e6ee8c0bSKiyoshi Ueda 2939e6ee8c0bSKiyoshi Ueda void dm_free_md_mempools(struct dm_md_mempools *pools) 2940e6ee8c0bSKiyoshi Ueda { 2941e6ee8c0bSKiyoshi Ueda if (!pools) 2942e6ee8c0bSKiyoshi Ueda return; 2943e6ee8c0bSKiyoshi Ueda 2944e6ee8c0bSKiyoshi Ueda if (pools->io_pool) 2945e6ee8c0bSKiyoshi Ueda mempool_destroy(pools->io_pool); 2946e6ee8c0bSKiyoshi Ueda 2947e6ee8c0bSKiyoshi Ueda if (pools->bs) 2948e6ee8c0bSKiyoshi Ueda bioset_free(pools->bs); 2949e6ee8c0bSKiyoshi Ueda 2950e6ee8c0bSKiyoshi Ueda kfree(pools); 2951e6ee8c0bSKiyoshi Ueda } 2952e6ee8c0bSKiyoshi Ueda 295383d5cde4SAlexey Dobriyan static const struct block_device_operations dm_blk_dops = { 29541da177e4SLinus Torvalds .open = dm_blk_open, 29551da177e4SLinus Torvalds .release = dm_blk_close, 2956aa129a22SMilan Broz .ioctl = dm_blk_ioctl, 29573ac51e74SDarrick J. Wong .getgeo = dm_blk_getgeo, 29581da177e4SLinus Torvalds .owner = THIS_MODULE 29591da177e4SLinus Torvalds }; 29601da177e4SLinus Torvalds 29611da177e4SLinus Torvalds EXPORT_SYMBOL(dm_get_mapinfo); 29621da177e4SLinus Torvalds 29631da177e4SLinus Torvalds /* 29641da177e4SLinus Torvalds * module hooks 29651da177e4SLinus Torvalds */ 29661da177e4SLinus Torvalds module_init(dm_init); 29671da177e4SLinus Torvalds module_exit(dm_exit); 29681da177e4SLinus Torvalds 29691da177e4SLinus Torvalds module_param(major, uint, 0); 29701da177e4SLinus Torvalds MODULE_PARM_DESC(major, "The major number of the device mapper"); 2971*f4790826SMike Snitzer 2972*f4790826SMike Snitzer module_param(reserved_rq_based_ios, uint, S_IRUGO | S_IWUSR); 2973*f4790826SMike Snitzer MODULE_PARM_DESC(reserved_rq_based_ios, "Reserved IOs in request-based mempools"); 2974*f4790826SMike Snitzer 29751da177e4SLinus Torvalds MODULE_DESCRIPTION(DM_NAME " driver"); 29761da177e4SLinus Torvalds MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>"); 29771da177e4SLinus Torvalds MODULE_LICENSE("GPL"); 2978