11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * dm-snapshot.c 31da177e4SLinus Torvalds * 41da177e4SLinus Torvalds * Copyright (C) 2001-2002 Sistina Software (UK) Limited. 51da177e4SLinus Torvalds * 61da177e4SLinus Torvalds * This file is released under the GPL. 71da177e4SLinus Torvalds */ 81da177e4SLinus Torvalds 91da177e4SLinus Torvalds #include <linux/blkdev.h> 101da177e4SLinus Torvalds #include <linux/device-mapper.h> 1190fa1527SMikulas Patocka #include <linux/delay.h> 121da177e4SLinus Torvalds #include <linux/fs.h> 131da177e4SLinus Torvalds #include <linux/init.h> 141da177e4SLinus Torvalds #include <linux/kdev_t.h> 151da177e4SLinus Torvalds #include <linux/list.h> 16f79ae415SNikos Tsironis #include <linux/list_bl.h> 171da177e4SLinus Torvalds #include <linux/mempool.h> 181da177e4SLinus Torvalds #include <linux/module.h> 191da177e4SLinus Torvalds #include <linux/slab.h> 201da177e4SLinus Torvalds #include <linux/vmalloc.h> 216f3c3f0aSvignesh babu #include <linux/log2.h> 22a765e20eSAlasdair G Kergon #include <linux/dm-kcopyd.h> 23721b1d98SNikos Tsironis #include <linux/semaphore.h> 241da177e4SLinus Torvalds 25b735fedeSMikulas Patocka #include "dm.h" 26b735fedeSMikulas Patocka 27aea53d92SJonathan Brassow #include "dm-exception-store.h" 281da177e4SLinus Torvalds 2972d94861SAlasdair G Kergon #define DM_MSG_PREFIX "snapshots" 3072d94861SAlasdair G Kergon 31d698aa45SMikulas Patocka static const char dm_snapshot_merge_target_name[] = "snapshot-merge"; 32d698aa45SMikulas Patocka 33d698aa45SMikulas Patocka #define dm_target_is_snapshot_merge(ti) \ 34d698aa45SMikulas Patocka ((ti)->type->name == dm_snapshot_merge_target_name) 35d698aa45SMikulas Patocka 361da177e4SLinus Torvalds /* 37cd45daffSMikulas Patocka * The size of the mempool used to track chunks in use. 38cd45daffSMikulas Patocka */ 39cd45daffSMikulas Patocka #define MIN_IOS 256 40cd45daffSMikulas Patocka 41ccc45ea8SJonathan Brassow #define DM_TRACKED_CHUNK_HASH_SIZE 16 42ccc45ea8SJonathan Brassow #define DM_TRACKED_CHUNK_HASH(x) ((unsigned long)(x) & \ 43ccc45ea8SJonathan Brassow (DM_TRACKED_CHUNK_HASH_SIZE - 1)) 44ccc45ea8SJonathan Brassow 45191437a5SJon Brassow struct dm_exception_table { 46ccc45ea8SJonathan Brassow uint32_t hash_mask; 47ccc45ea8SJonathan Brassow unsigned hash_shift; 48f79ae415SNikos Tsironis struct hlist_bl_head *table; 49ccc45ea8SJonathan Brassow }; 50ccc45ea8SJonathan Brassow 51ccc45ea8SJonathan Brassow struct dm_snapshot { 524ad8d880SNikos Tsironis struct rw_semaphore lock; 53ccc45ea8SJonathan Brassow 54ccc45ea8SJonathan Brassow struct dm_dev *origin; 55fc56f6fbSMike Snitzer struct dm_dev *cow; 56fc56f6fbSMike Snitzer 57fc56f6fbSMike Snitzer struct dm_target *ti; 58ccc45ea8SJonathan Brassow 59ccc45ea8SJonathan Brassow /* List of snapshots per Origin */ 60ccc45ea8SJonathan Brassow struct list_head list; 61ccc45ea8SJonathan Brassow 62d8ddb1cfSMike Snitzer /* 63d8ddb1cfSMike Snitzer * You can't use a snapshot if this is 0 (e.g. if full). 64d8ddb1cfSMike Snitzer * A snapshot-merge target never clears this. 65d8ddb1cfSMike Snitzer */ 66ccc45ea8SJonathan Brassow int valid; 67ccc45ea8SJonathan Brassow 6876c44f6dSMikulas Patocka /* 6976c44f6dSMikulas Patocka * The snapshot overflowed because of a write to the snapshot device. 7076c44f6dSMikulas Patocka * We don't have to invalidate the snapshot in this case, but we need 7176c44f6dSMikulas Patocka * to prevent further writes. 7276c44f6dSMikulas Patocka */ 7376c44f6dSMikulas Patocka int snapshot_overflowed; 7476c44f6dSMikulas Patocka 75ccc45ea8SJonathan Brassow /* Origin writes don't trigger exceptions until this is set */ 76ccc45ea8SJonathan Brassow int active; 77ccc45ea8SJonathan Brassow 78ccc45ea8SJonathan Brassow atomic_t pending_exceptions_count; 79ccc45ea8SJonathan Brassow 803f1637f2SNikos Tsironis spinlock_t pe_allocation_lock; 813f1637f2SNikos Tsironis 823f1637f2SNikos Tsironis /* Protected by "pe_allocation_lock" */ 83230c83afSMikulas Patocka sector_t exception_start_sequence; 84230c83afSMikulas Patocka 85230c83afSMikulas Patocka /* Protected by kcopyd single-threaded callback */ 86230c83afSMikulas Patocka sector_t exception_complete_sequence; 87230c83afSMikulas Patocka 88230c83afSMikulas Patocka /* 89230c83afSMikulas Patocka * A list of pending exceptions that completed out of order. 90230c83afSMikulas Patocka * Protected by kcopyd single-threaded callback. 91230c83afSMikulas Patocka */ 923db2776dSDavid Jeffery struct rb_root out_of_order_tree; 93230c83afSMikulas Patocka 946f1c819cSKent Overstreet mempool_t pending_pool; 95924e600dSMike Snitzer 96191437a5SJon Brassow struct dm_exception_table pending; 97191437a5SJon Brassow struct dm_exception_table complete; 98ccc45ea8SJonathan Brassow 99ccc45ea8SJonathan Brassow /* 100ccc45ea8SJonathan Brassow * pe_lock protects all pending_exception operations and access 101ccc45ea8SJonathan Brassow * as well as the snapshot_bios list. 102ccc45ea8SJonathan Brassow */ 103ccc45ea8SJonathan Brassow spinlock_t pe_lock; 104ccc45ea8SJonathan Brassow 105924e600dSMike Snitzer /* Chunks with outstanding reads */ 106924e600dSMike Snitzer spinlock_t tracked_chunk_lock; 107924e600dSMike Snitzer struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE]; 108924e600dSMike Snitzer 109ccc45ea8SJonathan Brassow /* The on disk metadata handler */ 110ccc45ea8SJonathan Brassow struct dm_exception_store *store; 111ccc45ea8SJonathan Brassow 112721b1d98SNikos Tsironis /* Maximum number of in-flight COW jobs. */ 113721b1d98SNikos Tsironis struct semaphore cow_count; 114721b1d98SNikos Tsironis 115ccc45ea8SJonathan Brassow struct dm_kcopyd_client *kcopyd_client; 116ccc45ea8SJonathan Brassow 117924e600dSMike Snitzer /* Wait for events based on state_bits */ 118924e600dSMike Snitzer unsigned long state_bits; 119924e600dSMike Snitzer 120924e600dSMike Snitzer /* Range of chunks currently being merged. */ 121924e600dSMike Snitzer chunk_t first_merging_chunk; 122924e600dSMike Snitzer int num_merging_chunks; 1231e03f97eSMikulas Patocka 124d8ddb1cfSMike Snitzer /* 125d8ddb1cfSMike Snitzer * The merge operation failed if this flag is set. 126d8ddb1cfSMike Snitzer * Failure modes are handled as follows: 127d8ddb1cfSMike Snitzer * - I/O error reading the header 128d8ddb1cfSMike Snitzer * => don't load the target; abort. 129d8ddb1cfSMike Snitzer * - Header does not have "valid" flag set 130d8ddb1cfSMike Snitzer * => use the origin; forget about the snapshot. 131d8ddb1cfSMike Snitzer * - I/O error when reading exceptions 132d8ddb1cfSMike Snitzer * => don't load the target; abort. 133d8ddb1cfSMike Snitzer * (We can't use the intermediate origin state.) 134d8ddb1cfSMike Snitzer * - I/O error while merging 135d8ddb1cfSMike Snitzer * => stop merging; set merge_failed; process I/O normally. 136d8ddb1cfSMike Snitzer */ 137d8ddb1cfSMike Snitzer int merge_failed; 138d8ddb1cfSMike Snitzer 1399fe86254SMikulas Patocka /* 1409fe86254SMikulas Patocka * Incoming bios that overlap with chunks being merged must wait 1419fe86254SMikulas Patocka * for them to be committed. 1429fe86254SMikulas Patocka */ 1439fe86254SMikulas Patocka struct bio_list bios_queued_during_merge; 144ccc45ea8SJonathan Brassow }; 145ccc45ea8SJonathan Brassow 1461e03f97eSMikulas Patocka /* 1471e03f97eSMikulas Patocka * state_bits: 1481e03f97eSMikulas Patocka * RUNNING_MERGE - Merge operation is in progress. 1491e03f97eSMikulas Patocka * SHUTDOWN_MERGE - Set to signal that merge needs to be stopped; 1501e03f97eSMikulas Patocka * cleared afterwards. 1511e03f97eSMikulas Patocka */ 1521e03f97eSMikulas Patocka #define RUNNING_MERGE 0 1531e03f97eSMikulas Patocka #define SHUTDOWN_MERGE 1 1541e03f97eSMikulas Patocka 155721b1d98SNikos Tsironis /* 156721b1d98SNikos Tsironis * Maximum number of chunks being copied on write. 157721b1d98SNikos Tsironis * 158721b1d98SNikos Tsironis * The value was decided experimentally as a trade-off between memory 159721b1d98SNikos Tsironis * consumption, stalling the kernel's workqueues and maintaining a high enough 160721b1d98SNikos Tsironis * throughput. 161721b1d98SNikos Tsironis */ 162721b1d98SNikos Tsironis #define DEFAULT_COW_THRESHOLD 2048 163721b1d98SNikos Tsironis 164721b1d98SNikos Tsironis static int cow_threshold = DEFAULT_COW_THRESHOLD; 165721b1d98SNikos Tsironis module_param_named(snapshot_cow_threshold, cow_threshold, int, 0644); 166721b1d98SNikos Tsironis MODULE_PARM_DESC(snapshot_cow_threshold, "Maximum number of chunks being copied on write"); 167721b1d98SNikos Tsironis 168df5d2e90SMikulas Patocka DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle, 169df5d2e90SMikulas Patocka "A percentage of time allocated for copy on write"); 170df5d2e90SMikulas Patocka 171c2411045SMikulas Patocka struct dm_dev *dm_snap_origin(struct dm_snapshot *s) 172c2411045SMikulas Patocka { 173c2411045SMikulas Patocka return s->origin; 174c2411045SMikulas Patocka } 175c2411045SMikulas Patocka EXPORT_SYMBOL(dm_snap_origin); 176c2411045SMikulas Patocka 177fc56f6fbSMike Snitzer struct dm_dev *dm_snap_cow(struct dm_snapshot *s) 178fc56f6fbSMike Snitzer { 179fc56f6fbSMike Snitzer return s->cow; 180fc56f6fbSMike Snitzer } 181fc56f6fbSMike Snitzer EXPORT_SYMBOL(dm_snap_cow); 182fc56f6fbSMike Snitzer 183ccc45ea8SJonathan Brassow static sector_t chunk_to_sector(struct dm_exception_store *store, 184ccc45ea8SJonathan Brassow chunk_t chunk) 185ccc45ea8SJonathan Brassow { 186ccc45ea8SJonathan Brassow return chunk << store->chunk_shift; 187ccc45ea8SJonathan Brassow } 188ccc45ea8SJonathan Brassow 189ccc45ea8SJonathan Brassow static int bdev_equal(struct block_device *lhs, struct block_device *rhs) 190ccc45ea8SJonathan Brassow { 191ccc45ea8SJonathan Brassow /* 192ccc45ea8SJonathan Brassow * There is only ever one instance of a particular block 193ccc45ea8SJonathan Brassow * device so we can compare pointers safely. 194ccc45ea8SJonathan Brassow */ 195ccc45ea8SJonathan Brassow return lhs == rhs; 196ccc45ea8SJonathan Brassow } 197ccc45ea8SJonathan Brassow 198028867acSAlasdair G Kergon struct dm_snap_pending_exception { 1991d4989c8SJon Brassow struct dm_exception e; 2001da177e4SLinus Torvalds 2011da177e4SLinus Torvalds /* 2021da177e4SLinus Torvalds * Origin buffers waiting for this to complete are held 2031da177e4SLinus Torvalds * in a bio list 2041da177e4SLinus Torvalds */ 2051da177e4SLinus Torvalds struct bio_list origin_bios; 2061da177e4SLinus Torvalds struct bio_list snapshot_bios; 2071da177e4SLinus Torvalds 2081da177e4SLinus Torvalds /* Pointer back to snapshot context */ 2091da177e4SLinus Torvalds struct dm_snapshot *snap; 2101da177e4SLinus Torvalds 2111da177e4SLinus Torvalds /* 2121da177e4SLinus Torvalds * 1 indicates the exception has already been sent to 2131da177e4SLinus Torvalds * kcopyd. 2141da177e4SLinus Torvalds */ 2151da177e4SLinus Torvalds int started; 216a6e50b40SMikulas Patocka 217230c83afSMikulas Patocka /* There was copying error. */ 218230c83afSMikulas Patocka int copy_error; 219230c83afSMikulas Patocka 220230c83afSMikulas Patocka /* A sequence number, it is used for in-order completion. */ 221230c83afSMikulas Patocka sector_t exception_sequence; 222230c83afSMikulas Patocka 2233db2776dSDavid Jeffery struct rb_node out_of_order_node; 224230c83afSMikulas Patocka 225a6e50b40SMikulas Patocka /* 226a6e50b40SMikulas Patocka * For writing a complete chunk, bypassing the copy. 227a6e50b40SMikulas Patocka */ 228a6e50b40SMikulas Patocka struct bio *full_bio; 229a6e50b40SMikulas Patocka bio_end_io_t *full_bio_end_io; 2301da177e4SLinus Torvalds }; 2311da177e4SLinus Torvalds 2321da177e4SLinus Torvalds /* 2331da177e4SLinus Torvalds * Hash table mapping origin volumes to lists of snapshots and 2341da177e4SLinus Torvalds * a lock to protect it 2351da177e4SLinus Torvalds */ 236e18b890bSChristoph Lameter static struct kmem_cache *exception_cache; 237e18b890bSChristoph Lameter static struct kmem_cache *pending_cache; 2381da177e4SLinus Torvalds 239cd45daffSMikulas Patocka struct dm_snap_tracked_chunk { 240cd45daffSMikulas Patocka struct hlist_node node; 241cd45daffSMikulas Patocka chunk_t chunk; 242cd45daffSMikulas Patocka }; 243cd45daffSMikulas Patocka 244ee18026aSMikulas Patocka static void init_tracked_chunk(struct bio *bio) 245ee18026aSMikulas Patocka { 246ee18026aSMikulas Patocka struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk)); 247ee18026aSMikulas Patocka INIT_HLIST_NODE(&c->node); 248ee18026aSMikulas Patocka } 249ee18026aSMikulas Patocka 250ee18026aSMikulas Patocka static bool is_bio_tracked(struct bio *bio) 251ee18026aSMikulas Patocka { 252ee18026aSMikulas Patocka struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk)); 253ee18026aSMikulas Patocka return !hlist_unhashed(&c->node); 254ee18026aSMikulas Patocka } 255ee18026aSMikulas Patocka 256ee18026aSMikulas Patocka static void track_chunk(struct dm_snapshot *s, struct bio *bio, chunk_t chunk) 257cd45daffSMikulas Patocka { 25842bc954fSMikulas Patocka struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk)); 259cd45daffSMikulas Patocka 260cd45daffSMikulas Patocka c->chunk = chunk; 261cd45daffSMikulas Patocka 2629aa0c0e6SMikulas Patocka spin_lock_irq(&s->tracked_chunk_lock); 263cd45daffSMikulas Patocka hlist_add_head(&c->node, 264cd45daffSMikulas Patocka &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)]); 2659aa0c0e6SMikulas Patocka spin_unlock_irq(&s->tracked_chunk_lock); 266cd45daffSMikulas Patocka } 267cd45daffSMikulas Patocka 268ee18026aSMikulas Patocka static void stop_tracking_chunk(struct dm_snapshot *s, struct bio *bio) 269cd45daffSMikulas Patocka { 270ee18026aSMikulas Patocka struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk)); 271cd45daffSMikulas Patocka unsigned long flags; 272cd45daffSMikulas Patocka 273cd45daffSMikulas Patocka spin_lock_irqsave(&s->tracked_chunk_lock, flags); 274cd45daffSMikulas Patocka hlist_del(&c->node); 275cd45daffSMikulas Patocka spin_unlock_irqrestore(&s->tracked_chunk_lock, flags); 276cd45daffSMikulas Patocka } 277cd45daffSMikulas Patocka 278a8d41b59SMikulas Patocka static int __chunk_is_tracked(struct dm_snapshot *s, chunk_t chunk) 279a8d41b59SMikulas Patocka { 280a8d41b59SMikulas Patocka struct dm_snap_tracked_chunk *c; 281a8d41b59SMikulas Patocka int found = 0; 282a8d41b59SMikulas Patocka 283a8d41b59SMikulas Patocka spin_lock_irq(&s->tracked_chunk_lock); 284a8d41b59SMikulas Patocka 285b67bfe0dSSasha Levin hlist_for_each_entry(c, 286a8d41b59SMikulas Patocka &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)], node) { 287a8d41b59SMikulas Patocka if (c->chunk == chunk) { 288a8d41b59SMikulas Patocka found = 1; 289a8d41b59SMikulas Patocka break; 290a8d41b59SMikulas Patocka } 291a8d41b59SMikulas Patocka } 292a8d41b59SMikulas Patocka 293a8d41b59SMikulas Patocka spin_unlock_irq(&s->tracked_chunk_lock); 294a8d41b59SMikulas Patocka 295a8d41b59SMikulas Patocka return found; 296a8d41b59SMikulas Patocka } 297a8d41b59SMikulas Patocka 2981da177e4SLinus Torvalds /* 299615d1eb9SMike Snitzer * This conflicting I/O is extremely improbable in the caller, 300615d1eb9SMike Snitzer * so msleep(1) is sufficient and there is no need for a wait queue. 301615d1eb9SMike Snitzer */ 302615d1eb9SMike Snitzer static void __check_for_conflicting_io(struct dm_snapshot *s, chunk_t chunk) 303615d1eb9SMike Snitzer { 304615d1eb9SMike Snitzer while (__chunk_is_tracked(s, chunk)) 305615d1eb9SMike Snitzer msleep(1); 306615d1eb9SMike Snitzer } 307615d1eb9SMike Snitzer 308615d1eb9SMike Snitzer /* 3091da177e4SLinus Torvalds * One of these per registered origin, held in the snapshot_origins hash 3101da177e4SLinus Torvalds */ 3111da177e4SLinus Torvalds struct origin { 3121da177e4SLinus Torvalds /* The origin device */ 3131da177e4SLinus Torvalds struct block_device *bdev; 3141da177e4SLinus Torvalds 3151da177e4SLinus Torvalds struct list_head hash_list; 3161da177e4SLinus Torvalds 3171da177e4SLinus Torvalds /* List of snapshots for this origin */ 3181da177e4SLinus Torvalds struct list_head snapshots; 3191da177e4SLinus Torvalds }; 3201da177e4SLinus Torvalds 3211da177e4SLinus Torvalds /* 322b735fedeSMikulas Patocka * This structure is allocated for each origin target 323b735fedeSMikulas Patocka */ 324b735fedeSMikulas Patocka struct dm_origin { 325b735fedeSMikulas Patocka struct dm_dev *dev; 326b735fedeSMikulas Patocka struct dm_target *ti; 327b735fedeSMikulas Patocka unsigned split_boundary; 328b735fedeSMikulas Patocka struct list_head hash_list; 329b735fedeSMikulas Patocka }; 330b735fedeSMikulas Patocka 331b735fedeSMikulas Patocka /* 3321da177e4SLinus Torvalds * Size of the hash table for origin volumes. If we make this 3331da177e4SLinus Torvalds * the size of the minors list then it should be nearly perfect 3341da177e4SLinus Torvalds */ 3351da177e4SLinus Torvalds #define ORIGIN_HASH_SIZE 256 3361da177e4SLinus Torvalds #define ORIGIN_MASK 0xFF 3371da177e4SLinus Torvalds static struct list_head *_origins; 338b735fedeSMikulas Patocka static struct list_head *_dm_origins; 3391da177e4SLinus Torvalds static struct rw_semaphore _origins_lock; 3401da177e4SLinus Torvalds 34173dfd078SMikulas Patocka static DECLARE_WAIT_QUEUE_HEAD(_pending_exceptions_done); 34273dfd078SMikulas Patocka static DEFINE_SPINLOCK(_pending_exceptions_done_spinlock); 34373dfd078SMikulas Patocka static uint64_t _pending_exceptions_done_count; 34473dfd078SMikulas Patocka 3451da177e4SLinus Torvalds static int init_origin_hash(void) 3461da177e4SLinus Torvalds { 3471da177e4SLinus Torvalds int i; 3481da177e4SLinus Torvalds 3496da2ec56SKees Cook _origins = kmalloc_array(ORIGIN_HASH_SIZE, sizeof(struct list_head), 3501da177e4SLinus Torvalds GFP_KERNEL); 3511da177e4SLinus Torvalds if (!_origins) { 352b735fedeSMikulas Patocka DMERR("unable to allocate memory for _origins"); 3531da177e4SLinus Torvalds return -ENOMEM; 3541da177e4SLinus Torvalds } 3551da177e4SLinus Torvalds for (i = 0; i < ORIGIN_HASH_SIZE; i++) 3561da177e4SLinus Torvalds INIT_LIST_HEAD(_origins + i); 357b735fedeSMikulas Patocka 3586da2ec56SKees Cook _dm_origins = kmalloc_array(ORIGIN_HASH_SIZE, 3596da2ec56SKees Cook sizeof(struct list_head), 360b735fedeSMikulas Patocka GFP_KERNEL); 361b735fedeSMikulas Patocka if (!_dm_origins) { 362b735fedeSMikulas Patocka DMERR("unable to allocate memory for _dm_origins"); 363b735fedeSMikulas Patocka kfree(_origins); 364b735fedeSMikulas Patocka return -ENOMEM; 365b735fedeSMikulas Patocka } 366b735fedeSMikulas Patocka for (i = 0; i < ORIGIN_HASH_SIZE; i++) 367b735fedeSMikulas Patocka INIT_LIST_HEAD(_dm_origins + i); 368b735fedeSMikulas Patocka 3691da177e4SLinus Torvalds init_rwsem(&_origins_lock); 3701da177e4SLinus Torvalds 3711da177e4SLinus Torvalds return 0; 3721da177e4SLinus Torvalds } 3731da177e4SLinus Torvalds 3741da177e4SLinus Torvalds static void exit_origin_hash(void) 3751da177e4SLinus Torvalds { 3761da177e4SLinus Torvalds kfree(_origins); 377b735fedeSMikulas Patocka kfree(_dm_origins); 3781da177e4SLinus Torvalds } 3791da177e4SLinus Torvalds 380028867acSAlasdair G Kergon static unsigned origin_hash(struct block_device *bdev) 3811da177e4SLinus Torvalds { 3821da177e4SLinus Torvalds return bdev->bd_dev & ORIGIN_MASK; 3831da177e4SLinus Torvalds } 3841da177e4SLinus Torvalds 3851da177e4SLinus Torvalds static struct origin *__lookup_origin(struct block_device *origin) 3861da177e4SLinus Torvalds { 3871da177e4SLinus Torvalds struct list_head *ol; 3881da177e4SLinus Torvalds struct origin *o; 3891da177e4SLinus Torvalds 3901da177e4SLinus Torvalds ol = &_origins[origin_hash(origin)]; 3911da177e4SLinus Torvalds list_for_each_entry (o, ol, hash_list) 3921da177e4SLinus Torvalds if (bdev_equal(o->bdev, origin)) 3931da177e4SLinus Torvalds return o; 3941da177e4SLinus Torvalds 3951da177e4SLinus Torvalds return NULL; 3961da177e4SLinus Torvalds } 3971da177e4SLinus Torvalds 3981da177e4SLinus Torvalds static void __insert_origin(struct origin *o) 3991da177e4SLinus Torvalds { 4001da177e4SLinus Torvalds struct list_head *sl = &_origins[origin_hash(o->bdev)]; 4011da177e4SLinus Torvalds list_add_tail(&o->hash_list, sl); 4021da177e4SLinus Torvalds } 4031da177e4SLinus Torvalds 404b735fedeSMikulas Patocka static struct dm_origin *__lookup_dm_origin(struct block_device *origin) 405b735fedeSMikulas Patocka { 406b735fedeSMikulas Patocka struct list_head *ol; 407b735fedeSMikulas Patocka struct dm_origin *o; 408b735fedeSMikulas Patocka 409b735fedeSMikulas Patocka ol = &_dm_origins[origin_hash(origin)]; 410b735fedeSMikulas Patocka list_for_each_entry (o, ol, hash_list) 411b735fedeSMikulas Patocka if (bdev_equal(o->dev->bdev, origin)) 412b735fedeSMikulas Patocka return o; 413b735fedeSMikulas Patocka 414b735fedeSMikulas Patocka return NULL; 415b735fedeSMikulas Patocka } 416b735fedeSMikulas Patocka 417b735fedeSMikulas Patocka static void __insert_dm_origin(struct dm_origin *o) 418b735fedeSMikulas Patocka { 419b735fedeSMikulas Patocka struct list_head *sl = &_dm_origins[origin_hash(o->dev->bdev)]; 420b735fedeSMikulas Patocka list_add_tail(&o->hash_list, sl); 421b735fedeSMikulas Patocka } 422b735fedeSMikulas Patocka 423b735fedeSMikulas Patocka static void __remove_dm_origin(struct dm_origin *o) 424b735fedeSMikulas Patocka { 425b735fedeSMikulas Patocka list_del(&o->hash_list); 426b735fedeSMikulas Patocka } 427b735fedeSMikulas Patocka 4281da177e4SLinus Torvalds /* 429c1f0c183SMike Snitzer * _origins_lock must be held when calling this function. 430c1f0c183SMike Snitzer * Returns number of snapshots registered using the supplied cow device, plus: 431c1f0c183SMike Snitzer * snap_src - a snapshot suitable for use as a source of exception handover 432c1f0c183SMike Snitzer * snap_dest - a snapshot capable of receiving exception handover. 4339d3b15c4SMikulas Patocka * snap_merge - an existing snapshot-merge target linked to the same origin. 4349d3b15c4SMikulas Patocka * There can be at most one snapshot-merge target. The parameter is optional. 435c1f0c183SMike Snitzer * 4369d3b15c4SMikulas Patocka * Possible return values and states of snap_src and snap_dest. 437c1f0c183SMike Snitzer * 0: NULL, NULL - first new snapshot 438c1f0c183SMike Snitzer * 1: snap_src, NULL - normal snapshot 439c1f0c183SMike Snitzer * 2: snap_src, snap_dest - waiting for handover 440c1f0c183SMike Snitzer * 2: snap_src, NULL - handed over, waiting for old to be deleted 441c1f0c183SMike Snitzer * 1: NULL, snap_dest - source got destroyed without handover 442c1f0c183SMike Snitzer */ 443c1f0c183SMike Snitzer static int __find_snapshots_sharing_cow(struct dm_snapshot *snap, 444c1f0c183SMike Snitzer struct dm_snapshot **snap_src, 4459d3b15c4SMikulas Patocka struct dm_snapshot **snap_dest, 4469d3b15c4SMikulas Patocka struct dm_snapshot **snap_merge) 447c1f0c183SMike Snitzer { 448c1f0c183SMike Snitzer struct dm_snapshot *s; 449c1f0c183SMike Snitzer struct origin *o; 450c1f0c183SMike Snitzer int count = 0; 451c1f0c183SMike Snitzer int active; 452c1f0c183SMike Snitzer 453c1f0c183SMike Snitzer o = __lookup_origin(snap->origin->bdev); 454c1f0c183SMike Snitzer if (!o) 455c1f0c183SMike Snitzer goto out; 456c1f0c183SMike Snitzer 457c1f0c183SMike Snitzer list_for_each_entry(s, &o->snapshots, list) { 4589d3b15c4SMikulas Patocka if (dm_target_is_snapshot_merge(s->ti) && snap_merge) 4599d3b15c4SMikulas Patocka *snap_merge = s; 460c1f0c183SMike Snitzer if (!bdev_equal(s->cow->bdev, snap->cow->bdev)) 461c1f0c183SMike Snitzer continue; 462c1f0c183SMike Snitzer 4634ad8d880SNikos Tsironis down_read(&s->lock); 464c1f0c183SMike Snitzer active = s->active; 4654ad8d880SNikos Tsironis up_read(&s->lock); 466c1f0c183SMike Snitzer 467c1f0c183SMike Snitzer if (active) { 468c1f0c183SMike Snitzer if (snap_src) 469c1f0c183SMike Snitzer *snap_src = s; 470c1f0c183SMike Snitzer } else if (snap_dest) 471c1f0c183SMike Snitzer *snap_dest = s; 472c1f0c183SMike Snitzer 473c1f0c183SMike Snitzer count++; 474c1f0c183SMike Snitzer } 475c1f0c183SMike Snitzer 476c1f0c183SMike Snitzer out: 477c1f0c183SMike Snitzer return count; 478c1f0c183SMike Snitzer } 479c1f0c183SMike Snitzer 480c1f0c183SMike Snitzer /* 481c1f0c183SMike Snitzer * On success, returns 1 if this snapshot is a handover destination, 482c1f0c183SMike Snitzer * otherwise returns 0. 483c1f0c183SMike Snitzer */ 484c1f0c183SMike Snitzer static int __validate_exception_handover(struct dm_snapshot *snap) 485c1f0c183SMike Snitzer { 486c1f0c183SMike Snitzer struct dm_snapshot *snap_src = NULL, *snap_dest = NULL; 4879d3b15c4SMikulas Patocka struct dm_snapshot *snap_merge = NULL; 488c1f0c183SMike Snitzer 489c1f0c183SMike Snitzer /* Does snapshot need exceptions handed over to it? */ 4909d3b15c4SMikulas Patocka if ((__find_snapshots_sharing_cow(snap, &snap_src, &snap_dest, 4919d3b15c4SMikulas Patocka &snap_merge) == 2) || 492c1f0c183SMike Snitzer snap_dest) { 493c1f0c183SMike Snitzer snap->ti->error = "Snapshot cow pairing for exception " 494c1f0c183SMike Snitzer "table handover failed"; 495c1f0c183SMike Snitzer return -EINVAL; 496c1f0c183SMike Snitzer } 497c1f0c183SMike Snitzer 498c1f0c183SMike Snitzer /* 499c1f0c183SMike Snitzer * If no snap_src was found, snap cannot become a handover 500c1f0c183SMike Snitzer * destination. 501c1f0c183SMike Snitzer */ 502c1f0c183SMike Snitzer if (!snap_src) 503c1f0c183SMike Snitzer return 0; 504c1f0c183SMike Snitzer 5059d3b15c4SMikulas Patocka /* 5069d3b15c4SMikulas Patocka * Non-snapshot-merge handover? 5079d3b15c4SMikulas Patocka */ 5089d3b15c4SMikulas Patocka if (!dm_target_is_snapshot_merge(snap->ti)) 5099d3b15c4SMikulas Patocka return 1; 5109d3b15c4SMikulas Patocka 5119d3b15c4SMikulas Patocka /* 5129d3b15c4SMikulas Patocka * Do not allow more than one merging snapshot. 5139d3b15c4SMikulas Patocka */ 5149d3b15c4SMikulas Patocka if (snap_merge) { 5159d3b15c4SMikulas Patocka snap->ti->error = "A snapshot is already merging."; 5169d3b15c4SMikulas Patocka return -EINVAL; 5179d3b15c4SMikulas Patocka } 5189d3b15c4SMikulas Patocka 5191e03f97eSMikulas Patocka if (!snap_src->store->type->prepare_merge || 5201e03f97eSMikulas Patocka !snap_src->store->type->commit_merge) { 5211e03f97eSMikulas Patocka snap->ti->error = "Snapshot exception store does not " 5221e03f97eSMikulas Patocka "support snapshot-merge."; 5231e03f97eSMikulas Patocka return -EINVAL; 5241e03f97eSMikulas Patocka } 5251e03f97eSMikulas Patocka 526c1f0c183SMike Snitzer return 1; 527c1f0c183SMike Snitzer } 528c1f0c183SMike Snitzer 529c1f0c183SMike Snitzer static void __insert_snapshot(struct origin *o, struct dm_snapshot *s) 530c1f0c183SMike Snitzer { 531c1f0c183SMike Snitzer struct dm_snapshot *l; 532c1f0c183SMike Snitzer 533c1f0c183SMike Snitzer /* Sort the list according to chunk size, largest-first smallest-last */ 534c1f0c183SMike Snitzer list_for_each_entry(l, &o->snapshots, list) 535c1f0c183SMike Snitzer if (l->store->chunk_size < s->store->chunk_size) 536c1f0c183SMike Snitzer break; 537c1f0c183SMike Snitzer list_add_tail(&s->list, &l->list); 538c1f0c183SMike Snitzer } 539c1f0c183SMike Snitzer 540c1f0c183SMike Snitzer /* 5411da177e4SLinus Torvalds * Make a note of the snapshot and its origin so we can look it 5421da177e4SLinus Torvalds * up when the origin has a write on it. 543c1f0c183SMike Snitzer * 544c1f0c183SMike Snitzer * Also validate snapshot exception store handovers. 545c1f0c183SMike Snitzer * On success, returns 1 if this registration is a handover destination, 546c1f0c183SMike Snitzer * otherwise returns 0. 5471da177e4SLinus Torvalds */ 5481da177e4SLinus Torvalds static int register_snapshot(struct dm_snapshot *snap) 5491da177e4SLinus Torvalds { 550c1f0c183SMike Snitzer struct origin *o, *new_o = NULL; 5511da177e4SLinus Torvalds struct block_device *bdev = snap->origin->bdev; 552c1f0c183SMike Snitzer int r = 0; 5531da177e4SLinus Torvalds 55460c856c8SMikulas Patocka new_o = kmalloc(sizeof(*new_o), GFP_KERNEL); 55560c856c8SMikulas Patocka if (!new_o) 55660c856c8SMikulas Patocka return -ENOMEM; 55760c856c8SMikulas Patocka 5581da177e4SLinus Torvalds down_write(&_origins_lock); 5591da177e4SLinus Torvalds 560c1f0c183SMike Snitzer r = __validate_exception_handover(snap); 561c1f0c183SMike Snitzer if (r < 0) { 562c1f0c183SMike Snitzer kfree(new_o); 563c1f0c183SMike Snitzer goto out; 564c1f0c183SMike Snitzer } 565c1f0c183SMike Snitzer 566c1f0c183SMike Snitzer o = __lookup_origin(bdev); 56760c856c8SMikulas Patocka if (o) 56860c856c8SMikulas Patocka kfree(new_o); 56960c856c8SMikulas Patocka else { 5701da177e4SLinus Torvalds /* New origin */ 57160c856c8SMikulas Patocka o = new_o; 5721da177e4SLinus Torvalds 5731da177e4SLinus Torvalds /* Initialise the struct */ 5741da177e4SLinus Torvalds INIT_LIST_HEAD(&o->snapshots); 5751da177e4SLinus Torvalds o->bdev = bdev; 5761da177e4SLinus Torvalds 5771da177e4SLinus Torvalds __insert_origin(o); 5781da177e4SLinus Torvalds } 5791da177e4SLinus Torvalds 580c1f0c183SMike Snitzer __insert_snapshot(o, snap); 581c1f0c183SMike Snitzer 582c1f0c183SMike Snitzer out: 583c1f0c183SMike Snitzer up_write(&_origins_lock); 584c1f0c183SMike Snitzer 585c1f0c183SMike Snitzer return r; 586c1f0c183SMike Snitzer } 587c1f0c183SMike Snitzer 588c1f0c183SMike Snitzer /* 589c1f0c183SMike Snitzer * Move snapshot to correct place in list according to chunk size. 590c1f0c183SMike Snitzer */ 591c1f0c183SMike Snitzer static void reregister_snapshot(struct dm_snapshot *s) 592c1f0c183SMike Snitzer { 593c1f0c183SMike Snitzer struct block_device *bdev = s->origin->bdev; 594c1f0c183SMike Snitzer 595c1f0c183SMike Snitzer down_write(&_origins_lock); 596c1f0c183SMike Snitzer 597c1f0c183SMike Snitzer list_del(&s->list); 598c1f0c183SMike Snitzer __insert_snapshot(__lookup_origin(bdev), s); 5991da177e4SLinus Torvalds 6001da177e4SLinus Torvalds up_write(&_origins_lock); 6011da177e4SLinus Torvalds } 6021da177e4SLinus Torvalds 6031da177e4SLinus Torvalds static void unregister_snapshot(struct dm_snapshot *s) 6041da177e4SLinus Torvalds { 6051da177e4SLinus Torvalds struct origin *o; 6061da177e4SLinus Torvalds 6071da177e4SLinus Torvalds down_write(&_origins_lock); 6081da177e4SLinus Torvalds o = __lookup_origin(s->origin->bdev); 6091da177e4SLinus Torvalds 6101da177e4SLinus Torvalds list_del(&s->list); 611c1f0c183SMike Snitzer if (o && list_empty(&o->snapshots)) { 6121da177e4SLinus Torvalds list_del(&o->hash_list); 6131da177e4SLinus Torvalds kfree(o); 6141da177e4SLinus Torvalds } 6151da177e4SLinus Torvalds 6161da177e4SLinus Torvalds up_write(&_origins_lock); 6171da177e4SLinus Torvalds } 6181da177e4SLinus Torvalds 6191da177e4SLinus Torvalds /* 6201da177e4SLinus Torvalds * Implementation of the exception hash tables. 621d74f81f8SMilan Broz * The lowest hash_shift bits of the chunk number are ignored, allowing 622d74f81f8SMilan Broz * some consecutive chunks to be grouped together. 6231da177e4SLinus Torvalds */ 624f79ae415SNikos Tsironis static uint32_t exception_hash(struct dm_exception_table *et, chunk_t chunk); 625f79ae415SNikos Tsironis 626f79ae415SNikos Tsironis /* Lock to protect access to the completed and pending exception hash tables. */ 627f79ae415SNikos Tsironis struct dm_exception_table_lock { 628f79ae415SNikos Tsironis struct hlist_bl_head *complete_slot; 629f79ae415SNikos Tsironis struct hlist_bl_head *pending_slot; 630f79ae415SNikos Tsironis }; 631f79ae415SNikos Tsironis 632f79ae415SNikos Tsironis static void dm_exception_table_lock_init(struct dm_snapshot *s, chunk_t chunk, 633f79ae415SNikos Tsironis struct dm_exception_table_lock *lock) 634f79ae415SNikos Tsironis { 635f79ae415SNikos Tsironis struct dm_exception_table *complete = &s->complete; 636f79ae415SNikos Tsironis struct dm_exception_table *pending = &s->pending; 637f79ae415SNikos Tsironis 638f79ae415SNikos Tsironis lock->complete_slot = &complete->table[exception_hash(complete, chunk)]; 639f79ae415SNikos Tsironis lock->pending_slot = &pending->table[exception_hash(pending, chunk)]; 640f79ae415SNikos Tsironis } 641f79ae415SNikos Tsironis 642f79ae415SNikos Tsironis static void dm_exception_table_lock(struct dm_exception_table_lock *lock) 643f79ae415SNikos Tsironis { 644f79ae415SNikos Tsironis hlist_bl_lock(lock->complete_slot); 645f79ae415SNikos Tsironis hlist_bl_lock(lock->pending_slot); 646f79ae415SNikos Tsironis } 647f79ae415SNikos Tsironis 648f79ae415SNikos Tsironis static void dm_exception_table_unlock(struct dm_exception_table_lock *lock) 649f79ae415SNikos Tsironis { 650f79ae415SNikos Tsironis hlist_bl_unlock(lock->pending_slot); 651f79ae415SNikos Tsironis hlist_bl_unlock(lock->complete_slot); 652f79ae415SNikos Tsironis } 653f79ae415SNikos Tsironis 6543510cb94SJon Brassow static int dm_exception_table_init(struct dm_exception_table *et, 6553510cb94SJon Brassow uint32_t size, unsigned hash_shift) 6561da177e4SLinus Torvalds { 6571da177e4SLinus Torvalds unsigned int i; 6581da177e4SLinus Torvalds 659d74f81f8SMilan Broz et->hash_shift = hash_shift; 6601da177e4SLinus Torvalds et->hash_mask = size - 1; 661f79ae415SNikos Tsironis et->table = dm_vcalloc(size, sizeof(struct hlist_bl_head)); 6621da177e4SLinus Torvalds if (!et->table) 6631da177e4SLinus Torvalds return -ENOMEM; 6641da177e4SLinus Torvalds 6651da177e4SLinus Torvalds for (i = 0; i < size; i++) 666f79ae415SNikos Tsironis INIT_HLIST_BL_HEAD(et->table + i); 6671da177e4SLinus Torvalds 6681da177e4SLinus Torvalds return 0; 6691da177e4SLinus Torvalds } 6701da177e4SLinus Torvalds 6713510cb94SJon Brassow static void dm_exception_table_exit(struct dm_exception_table *et, 672191437a5SJon Brassow struct kmem_cache *mem) 6731da177e4SLinus Torvalds { 674f79ae415SNikos Tsironis struct hlist_bl_head *slot; 675f79ae415SNikos Tsironis struct dm_exception *ex; 676f79ae415SNikos Tsironis struct hlist_bl_node *pos, *n; 6771da177e4SLinus Torvalds int i, size; 6781da177e4SLinus Torvalds 6791da177e4SLinus Torvalds size = et->hash_mask + 1; 6801da177e4SLinus Torvalds for (i = 0; i < size; i++) { 6811da177e4SLinus Torvalds slot = et->table + i; 6821da177e4SLinus Torvalds 683f79ae415SNikos Tsironis hlist_bl_for_each_entry_safe(ex, pos, n, slot, hash_list) 6841da177e4SLinus Torvalds kmem_cache_free(mem, ex); 6851da177e4SLinus Torvalds } 6861da177e4SLinus Torvalds 6871da177e4SLinus Torvalds vfree(et->table); 6881da177e4SLinus Torvalds } 6891da177e4SLinus Torvalds 690191437a5SJon Brassow static uint32_t exception_hash(struct dm_exception_table *et, chunk_t chunk) 6911da177e4SLinus Torvalds { 692d74f81f8SMilan Broz return (chunk >> et->hash_shift) & et->hash_mask; 6931da177e4SLinus Torvalds } 6941da177e4SLinus Torvalds 6953510cb94SJon Brassow static void dm_remove_exception(struct dm_exception *e) 6961da177e4SLinus Torvalds { 697f79ae415SNikos Tsironis hlist_bl_del(&e->hash_list); 6981da177e4SLinus Torvalds } 6991da177e4SLinus Torvalds 7001da177e4SLinus Torvalds /* 7011da177e4SLinus Torvalds * Return the exception data for a sector, or NULL if not 7021da177e4SLinus Torvalds * remapped. 7031da177e4SLinus Torvalds */ 7043510cb94SJon Brassow static struct dm_exception *dm_lookup_exception(struct dm_exception_table *et, 7051da177e4SLinus Torvalds chunk_t chunk) 7061da177e4SLinus Torvalds { 707f79ae415SNikos Tsironis struct hlist_bl_head *slot; 708f79ae415SNikos Tsironis struct hlist_bl_node *pos; 7091d4989c8SJon Brassow struct dm_exception *e; 7101da177e4SLinus Torvalds 7111da177e4SLinus Torvalds slot = &et->table[exception_hash(et, chunk)]; 712f79ae415SNikos Tsironis hlist_bl_for_each_entry(e, pos, slot, hash_list) 713d74f81f8SMilan Broz if (chunk >= e->old_chunk && 714d74f81f8SMilan Broz chunk <= e->old_chunk + dm_consecutive_chunk_count(e)) 7151da177e4SLinus Torvalds return e; 7161da177e4SLinus Torvalds 7171da177e4SLinus Torvalds return NULL; 7181da177e4SLinus Torvalds } 7191da177e4SLinus Torvalds 720119bc547SMikulas Patocka static struct dm_exception *alloc_completed_exception(gfp_t gfp) 7211da177e4SLinus Torvalds { 7221d4989c8SJon Brassow struct dm_exception *e; 7231da177e4SLinus Torvalds 724119bc547SMikulas Patocka e = kmem_cache_alloc(exception_cache, gfp); 725119bc547SMikulas Patocka if (!e && gfp == GFP_NOIO) 7261da177e4SLinus Torvalds e = kmem_cache_alloc(exception_cache, GFP_ATOMIC); 7271da177e4SLinus Torvalds 7281da177e4SLinus Torvalds return e; 7291da177e4SLinus Torvalds } 7301da177e4SLinus Torvalds 7313510cb94SJon Brassow static void free_completed_exception(struct dm_exception *e) 7321da177e4SLinus Torvalds { 7331da177e4SLinus Torvalds kmem_cache_free(exception_cache, e); 7341da177e4SLinus Torvalds } 7351da177e4SLinus Torvalds 73692e86812SMikulas Patocka static struct dm_snap_pending_exception *alloc_pending_exception(struct dm_snapshot *s) 7371da177e4SLinus Torvalds { 7386f1c819cSKent Overstreet struct dm_snap_pending_exception *pe = mempool_alloc(&s->pending_pool, 73992e86812SMikulas Patocka GFP_NOIO); 74092e86812SMikulas Patocka 741879129d2SMikulas Patocka atomic_inc(&s->pending_exceptions_count); 74292e86812SMikulas Patocka pe->snap = s; 74392e86812SMikulas Patocka 74492e86812SMikulas Patocka return pe; 7451da177e4SLinus Torvalds } 7461da177e4SLinus Torvalds 747028867acSAlasdair G Kergon static void free_pending_exception(struct dm_snap_pending_exception *pe) 7481da177e4SLinus Torvalds { 749879129d2SMikulas Patocka struct dm_snapshot *s = pe->snap; 750879129d2SMikulas Patocka 7516f1c819cSKent Overstreet mempool_free(pe, &s->pending_pool); 7524e857c58SPeter Zijlstra smp_mb__before_atomic(); 753879129d2SMikulas Patocka atomic_dec(&s->pending_exceptions_count); 7541da177e4SLinus Torvalds } 7551da177e4SLinus Torvalds 7563510cb94SJon Brassow static void dm_insert_exception(struct dm_exception_table *eh, 7571d4989c8SJon Brassow struct dm_exception *new_e) 758d74f81f8SMilan Broz { 759f79ae415SNikos Tsironis struct hlist_bl_head *l; 760f79ae415SNikos Tsironis struct hlist_bl_node *pos; 7611d4989c8SJon Brassow struct dm_exception *e = NULL; 762d74f81f8SMilan Broz 763d74f81f8SMilan Broz l = &eh->table[exception_hash(eh, new_e->old_chunk)]; 764d74f81f8SMilan Broz 765d74f81f8SMilan Broz /* Add immediately if this table doesn't support consecutive chunks */ 766d74f81f8SMilan Broz if (!eh->hash_shift) 767d74f81f8SMilan Broz goto out; 768d74f81f8SMilan Broz 769d74f81f8SMilan Broz /* List is ordered by old_chunk */ 770f79ae415SNikos Tsironis hlist_bl_for_each_entry(e, pos, l, hash_list) { 771d74f81f8SMilan Broz /* Insert after an existing chunk? */ 772d74f81f8SMilan Broz if (new_e->old_chunk == (e->old_chunk + 773d74f81f8SMilan Broz dm_consecutive_chunk_count(e) + 1) && 774d74f81f8SMilan Broz new_e->new_chunk == (dm_chunk_number(e->new_chunk) + 775d74f81f8SMilan Broz dm_consecutive_chunk_count(e) + 1)) { 776d74f81f8SMilan Broz dm_consecutive_chunk_count_inc(e); 7773510cb94SJon Brassow free_completed_exception(new_e); 778d74f81f8SMilan Broz return; 779d74f81f8SMilan Broz } 780d74f81f8SMilan Broz 781d74f81f8SMilan Broz /* Insert before an existing chunk? */ 782d74f81f8SMilan Broz if (new_e->old_chunk == (e->old_chunk - 1) && 783d74f81f8SMilan Broz new_e->new_chunk == (dm_chunk_number(e->new_chunk) - 1)) { 784d74f81f8SMilan Broz dm_consecutive_chunk_count_inc(e); 785d74f81f8SMilan Broz e->old_chunk--; 786d74f81f8SMilan Broz e->new_chunk--; 7873510cb94SJon Brassow free_completed_exception(new_e); 788d74f81f8SMilan Broz return; 789d74f81f8SMilan Broz } 790d74f81f8SMilan Broz 791f79ae415SNikos Tsironis if (new_e->old_chunk < e->old_chunk) 792d74f81f8SMilan Broz break; 793d74f81f8SMilan Broz } 794d74f81f8SMilan Broz 795d74f81f8SMilan Broz out: 796f79ae415SNikos Tsironis if (!e) { 797f79ae415SNikos Tsironis /* 798f79ae415SNikos Tsironis * Either the table doesn't support consecutive chunks or slot 799f79ae415SNikos Tsironis * l is empty. 800f79ae415SNikos Tsironis */ 801f79ae415SNikos Tsironis hlist_bl_add_head(&new_e->hash_list, l); 802f79ae415SNikos Tsironis } else if (new_e->old_chunk < e->old_chunk) { 803f79ae415SNikos Tsironis /* Add before an existing exception */ 804f79ae415SNikos Tsironis hlist_bl_add_before(&new_e->hash_list, &e->hash_list); 805f79ae415SNikos Tsironis } else { 806f79ae415SNikos Tsironis /* Add to l's tail: e is the last exception in this slot */ 807f79ae415SNikos Tsironis hlist_bl_add_behind(&new_e->hash_list, &e->hash_list); 808f79ae415SNikos Tsironis } 809d74f81f8SMilan Broz } 810d74f81f8SMilan Broz 811a159c1acSJonathan Brassow /* 812a159c1acSJonathan Brassow * Callback used by the exception stores to load exceptions when 813a159c1acSJonathan Brassow * initialising. 814a159c1acSJonathan Brassow */ 815a159c1acSJonathan Brassow static int dm_add_exception(void *context, chunk_t old, chunk_t new) 8161da177e4SLinus Torvalds { 817f79ae415SNikos Tsironis struct dm_exception_table_lock lock; 818a159c1acSJonathan Brassow struct dm_snapshot *s = context; 8191d4989c8SJon Brassow struct dm_exception *e; 8201da177e4SLinus Torvalds 821119bc547SMikulas Patocka e = alloc_completed_exception(GFP_KERNEL); 8221da177e4SLinus Torvalds if (!e) 8231da177e4SLinus Torvalds return -ENOMEM; 8241da177e4SLinus Torvalds 8251da177e4SLinus Torvalds e->old_chunk = old; 826d74f81f8SMilan Broz 827d74f81f8SMilan Broz /* Consecutive_count is implicitly initialised to zero */ 8281da177e4SLinus Torvalds e->new_chunk = new; 829d74f81f8SMilan Broz 830f79ae415SNikos Tsironis /* 831f79ae415SNikos Tsironis * Although there is no need to lock access to the exception tables 832f79ae415SNikos Tsironis * here, if we don't then hlist_bl_add_head(), called by 833f79ae415SNikos Tsironis * dm_insert_exception(), will complain about accessing the 834f79ae415SNikos Tsironis * corresponding list without locking it first. 835f79ae415SNikos Tsironis */ 836f79ae415SNikos Tsironis dm_exception_table_lock_init(s, old, &lock); 837f79ae415SNikos Tsironis 838f79ae415SNikos Tsironis dm_exception_table_lock(&lock); 8393510cb94SJon Brassow dm_insert_exception(&s->complete, e); 840f79ae415SNikos Tsironis dm_exception_table_unlock(&lock); 841d74f81f8SMilan Broz 8421da177e4SLinus Torvalds return 0; 8431da177e4SLinus Torvalds } 8441da177e4SLinus Torvalds 8457e201b35SMikulas Patocka /* 8467e201b35SMikulas Patocka * Return a minimum chunk size of all snapshots that have the specified origin. 8477e201b35SMikulas Patocka * Return zero if the origin has no snapshots. 8487e201b35SMikulas Patocka */ 849542f9038SMike Snitzer static uint32_t __minimum_chunk_size(struct origin *o) 8507e201b35SMikulas Patocka { 8517e201b35SMikulas Patocka struct dm_snapshot *snap; 8527e201b35SMikulas Patocka unsigned chunk_size = 0; 8537e201b35SMikulas Patocka 8547e201b35SMikulas Patocka if (o) 8557e201b35SMikulas Patocka list_for_each_entry(snap, &o->snapshots, list) 8567e201b35SMikulas Patocka chunk_size = min_not_zero(chunk_size, 8577e201b35SMikulas Patocka snap->store->chunk_size); 8587e201b35SMikulas Patocka 859542f9038SMike Snitzer return (uint32_t) chunk_size; 8607e201b35SMikulas Patocka } 8617e201b35SMikulas Patocka 8621da177e4SLinus Torvalds /* 8631da177e4SLinus Torvalds * Hard coded magic. 8641da177e4SLinus Torvalds */ 8651da177e4SLinus Torvalds static int calc_max_buckets(void) 8661da177e4SLinus Torvalds { 8671da177e4SLinus Torvalds /* use a fixed size of 2MB */ 8681da177e4SLinus Torvalds unsigned long mem = 2 * 1024 * 1024; 869f79ae415SNikos Tsironis mem /= sizeof(struct hlist_bl_head); 8701da177e4SLinus Torvalds 8711da177e4SLinus Torvalds return mem; 8721da177e4SLinus Torvalds } 8731da177e4SLinus Torvalds 8741da177e4SLinus Torvalds /* 8751da177e4SLinus Torvalds * Allocate room for a suitable hash table. 8761da177e4SLinus Torvalds */ 877fee1998eSJonathan Brassow static int init_hash_tables(struct dm_snapshot *s) 8781da177e4SLinus Torvalds { 87960e356f3SMikulas Patocka sector_t hash_size, cow_dev_size, max_buckets; 8801da177e4SLinus Torvalds 8811da177e4SLinus Torvalds /* 8821da177e4SLinus Torvalds * Calculate based on the size of the original volume or 8831da177e4SLinus Torvalds * the COW volume... 8841da177e4SLinus Torvalds */ 885fc56f6fbSMike Snitzer cow_dev_size = get_dev_size(s->cow->bdev); 8861da177e4SLinus Torvalds max_buckets = calc_max_buckets(); 8871da177e4SLinus Torvalds 88860e356f3SMikulas Patocka hash_size = cow_dev_size >> s->store->chunk_shift; 8891da177e4SLinus Torvalds hash_size = min(hash_size, max_buckets); 8901da177e4SLinus Torvalds 8918e87b9b8SMikulas Patocka if (hash_size < 64) 8928e87b9b8SMikulas Patocka hash_size = 64; 8938defd830SRobert P. J. Day hash_size = rounddown_pow_of_two(hash_size); 8943510cb94SJon Brassow if (dm_exception_table_init(&s->complete, hash_size, 895d74f81f8SMilan Broz DM_CHUNK_CONSECUTIVE_BITS)) 8961da177e4SLinus Torvalds return -ENOMEM; 8971da177e4SLinus Torvalds 8981da177e4SLinus Torvalds /* 8991da177e4SLinus Torvalds * Allocate hash table for in-flight exceptions 9001da177e4SLinus Torvalds * Make this smaller than the real hash table 9011da177e4SLinus Torvalds */ 9021da177e4SLinus Torvalds hash_size >>= 3; 9031da177e4SLinus Torvalds if (hash_size < 64) 9041da177e4SLinus Torvalds hash_size = 64; 9051da177e4SLinus Torvalds 9063510cb94SJon Brassow if (dm_exception_table_init(&s->pending, hash_size, 0)) { 9073510cb94SJon Brassow dm_exception_table_exit(&s->complete, exception_cache); 9081da177e4SLinus Torvalds return -ENOMEM; 9091da177e4SLinus Torvalds } 9101da177e4SLinus Torvalds 9111da177e4SLinus Torvalds return 0; 9121da177e4SLinus Torvalds } 9131da177e4SLinus Torvalds 9141e03f97eSMikulas Patocka static void merge_shutdown(struct dm_snapshot *s) 9151e03f97eSMikulas Patocka { 9161e03f97eSMikulas Patocka clear_bit_unlock(RUNNING_MERGE, &s->state_bits); 9174e857c58SPeter Zijlstra smp_mb__after_atomic(); 9181e03f97eSMikulas Patocka wake_up_bit(&s->state_bits, RUNNING_MERGE); 9191e03f97eSMikulas Patocka } 9201e03f97eSMikulas Patocka 9219fe86254SMikulas Patocka static struct bio *__release_queued_bios_after_merge(struct dm_snapshot *s) 9229fe86254SMikulas Patocka { 9239fe86254SMikulas Patocka s->first_merging_chunk = 0; 9249fe86254SMikulas Patocka s->num_merging_chunks = 0; 9259fe86254SMikulas Patocka 9269fe86254SMikulas Patocka return bio_list_get(&s->bios_queued_during_merge); 9279fe86254SMikulas Patocka } 9289fe86254SMikulas Patocka 9291e03f97eSMikulas Patocka /* 9301e03f97eSMikulas Patocka * Remove one chunk from the index of completed exceptions. 9311e03f97eSMikulas Patocka */ 9321e03f97eSMikulas Patocka static int __remove_single_exception_chunk(struct dm_snapshot *s, 9331e03f97eSMikulas Patocka chunk_t old_chunk) 9341e03f97eSMikulas Patocka { 9351e03f97eSMikulas Patocka struct dm_exception *e; 9361e03f97eSMikulas Patocka 9371e03f97eSMikulas Patocka e = dm_lookup_exception(&s->complete, old_chunk); 9381e03f97eSMikulas Patocka if (!e) { 9391e03f97eSMikulas Patocka DMERR("Corruption detected: exception for block %llu is " 9401e03f97eSMikulas Patocka "on disk but not in memory", 9411e03f97eSMikulas Patocka (unsigned long long)old_chunk); 9421e03f97eSMikulas Patocka return -EINVAL; 9431e03f97eSMikulas Patocka } 9441e03f97eSMikulas Patocka 9451e03f97eSMikulas Patocka /* 9461e03f97eSMikulas Patocka * If this is the only chunk using this exception, remove exception. 9471e03f97eSMikulas Patocka */ 9481e03f97eSMikulas Patocka if (!dm_consecutive_chunk_count(e)) { 9491e03f97eSMikulas Patocka dm_remove_exception(e); 9501e03f97eSMikulas Patocka free_completed_exception(e); 9511e03f97eSMikulas Patocka return 0; 9521e03f97eSMikulas Patocka } 9531e03f97eSMikulas Patocka 9541e03f97eSMikulas Patocka /* 9551e03f97eSMikulas Patocka * The chunk may be either at the beginning or the end of a 9561e03f97eSMikulas Patocka * group of consecutive chunks - never in the middle. We are 9571e03f97eSMikulas Patocka * removing chunks in the opposite order to that in which they 9581e03f97eSMikulas Patocka * were added, so this should always be true. 9591e03f97eSMikulas Patocka * Decrement the consecutive chunk counter and adjust the 9601e03f97eSMikulas Patocka * starting point if necessary. 9611e03f97eSMikulas Patocka */ 9621e03f97eSMikulas Patocka if (old_chunk == e->old_chunk) { 9631e03f97eSMikulas Patocka e->old_chunk++; 9641e03f97eSMikulas Patocka e->new_chunk++; 9651e03f97eSMikulas Patocka } else if (old_chunk != e->old_chunk + 9661e03f97eSMikulas Patocka dm_consecutive_chunk_count(e)) { 9671e03f97eSMikulas Patocka DMERR("Attempt to merge block %llu from the " 9681e03f97eSMikulas Patocka "middle of a chunk range [%llu - %llu]", 9691e03f97eSMikulas Patocka (unsigned long long)old_chunk, 9701e03f97eSMikulas Patocka (unsigned long long)e->old_chunk, 9711e03f97eSMikulas Patocka (unsigned long long) 9721e03f97eSMikulas Patocka e->old_chunk + dm_consecutive_chunk_count(e)); 9731e03f97eSMikulas Patocka return -EINVAL; 9741e03f97eSMikulas Patocka } 9751e03f97eSMikulas Patocka 9761e03f97eSMikulas Patocka dm_consecutive_chunk_count_dec(e); 9771e03f97eSMikulas Patocka 9781e03f97eSMikulas Patocka return 0; 9791e03f97eSMikulas Patocka } 9801e03f97eSMikulas Patocka 9819fe86254SMikulas Patocka static void flush_bios(struct bio *bio); 9829fe86254SMikulas Patocka 9839fe86254SMikulas Patocka static int remove_single_exception_chunk(struct dm_snapshot *s) 9841e03f97eSMikulas Patocka { 9859fe86254SMikulas Patocka struct bio *b = NULL; 9869fe86254SMikulas Patocka int r; 9879fe86254SMikulas Patocka chunk_t old_chunk = s->first_merging_chunk + s->num_merging_chunks - 1; 9881e03f97eSMikulas Patocka 9894ad8d880SNikos Tsironis down_write(&s->lock); 9909fe86254SMikulas Patocka 9919fe86254SMikulas Patocka /* 9929fe86254SMikulas Patocka * Process chunks (and associated exceptions) in reverse order 9939fe86254SMikulas Patocka * so that dm_consecutive_chunk_count_dec() accounting works. 9949fe86254SMikulas Patocka */ 9959fe86254SMikulas Patocka do { 9961e03f97eSMikulas Patocka r = __remove_single_exception_chunk(s, old_chunk); 9979fe86254SMikulas Patocka if (r) 9989fe86254SMikulas Patocka goto out; 9999fe86254SMikulas Patocka } while (old_chunk-- > s->first_merging_chunk); 10009fe86254SMikulas Patocka 10019fe86254SMikulas Patocka b = __release_queued_bios_after_merge(s); 10029fe86254SMikulas Patocka 10039fe86254SMikulas Patocka out: 10044ad8d880SNikos Tsironis up_write(&s->lock); 10059fe86254SMikulas Patocka if (b) 10069fe86254SMikulas Patocka flush_bios(b); 10071e03f97eSMikulas Patocka 10081e03f97eSMikulas Patocka return r; 10091e03f97eSMikulas Patocka } 10101e03f97eSMikulas Patocka 101173dfd078SMikulas Patocka static int origin_write_extent(struct dm_snapshot *merging_snap, 101273dfd078SMikulas Patocka sector_t sector, unsigned chunk_size); 101373dfd078SMikulas Patocka 10141e03f97eSMikulas Patocka static void merge_callback(int read_err, unsigned long write_err, 10151e03f97eSMikulas Patocka void *context); 10161e03f97eSMikulas Patocka 101773dfd078SMikulas Patocka static uint64_t read_pending_exceptions_done_count(void) 101873dfd078SMikulas Patocka { 101973dfd078SMikulas Patocka uint64_t pending_exceptions_done; 102073dfd078SMikulas Patocka 102173dfd078SMikulas Patocka spin_lock(&_pending_exceptions_done_spinlock); 102273dfd078SMikulas Patocka pending_exceptions_done = _pending_exceptions_done_count; 102373dfd078SMikulas Patocka spin_unlock(&_pending_exceptions_done_spinlock); 102473dfd078SMikulas Patocka 102573dfd078SMikulas Patocka return pending_exceptions_done; 102673dfd078SMikulas Patocka } 102773dfd078SMikulas Patocka 102873dfd078SMikulas Patocka static void increment_pending_exceptions_done_count(void) 102973dfd078SMikulas Patocka { 103073dfd078SMikulas Patocka spin_lock(&_pending_exceptions_done_spinlock); 103173dfd078SMikulas Patocka _pending_exceptions_done_count++; 103273dfd078SMikulas Patocka spin_unlock(&_pending_exceptions_done_spinlock); 103373dfd078SMikulas Patocka 103473dfd078SMikulas Patocka wake_up_all(&_pending_exceptions_done); 103573dfd078SMikulas Patocka } 103673dfd078SMikulas Patocka 10371e03f97eSMikulas Patocka static void snapshot_merge_next_chunks(struct dm_snapshot *s) 10381e03f97eSMikulas Patocka { 10398a2d5286SMike Snitzer int i, linear_chunks; 10401e03f97eSMikulas Patocka chunk_t old_chunk, new_chunk; 10411e03f97eSMikulas Patocka struct dm_io_region src, dest; 10428a2d5286SMike Snitzer sector_t io_size; 104373dfd078SMikulas Patocka uint64_t previous_count; 10441e03f97eSMikulas Patocka 10451e03f97eSMikulas Patocka BUG_ON(!test_bit(RUNNING_MERGE, &s->state_bits)); 10461e03f97eSMikulas Patocka if (unlikely(test_bit(SHUTDOWN_MERGE, &s->state_bits))) 10471e03f97eSMikulas Patocka goto shut; 10481e03f97eSMikulas Patocka 10491e03f97eSMikulas Patocka /* 10501e03f97eSMikulas Patocka * valid flag never changes during merge, so no lock required. 10511e03f97eSMikulas Patocka */ 10521e03f97eSMikulas Patocka if (!s->valid) { 10531e03f97eSMikulas Patocka DMERR("Snapshot is invalid: can't merge"); 10541e03f97eSMikulas Patocka goto shut; 10551e03f97eSMikulas Patocka } 10561e03f97eSMikulas Patocka 10578a2d5286SMike Snitzer linear_chunks = s->store->type->prepare_merge(s->store, &old_chunk, 10588a2d5286SMike Snitzer &new_chunk); 10598a2d5286SMike Snitzer if (linear_chunks <= 0) { 1060d8ddb1cfSMike Snitzer if (linear_chunks < 0) { 10611e03f97eSMikulas Patocka DMERR("Read error in exception store: " 10621e03f97eSMikulas Patocka "shutting down merge"); 10634ad8d880SNikos Tsironis down_write(&s->lock); 1064d8ddb1cfSMike Snitzer s->merge_failed = 1; 10654ad8d880SNikos Tsironis up_write(&s->lock); 1066d8ddb1cfSMike Snitzer } 10671e03f97eSMikulas Patocka goto shut; 10681e03f97eSMikulas Patocka } 10691e03f97eSMikulas Patocka 10708a2d5286SMike Snitzer /* Adjust old_chunk and new_chunk to reflect start of linear region */ 10718a2d5286SMike Snitzer old_chunk = old_chunk + 1 - linear_chunks; 10728a2d5286SMike Snitzer new_chunk = new_chunk + 1 - linear_chunks; 10738a2d5286SMike Snitzer 10748a2d5286SMike Snitzer /* 10758a2d5286SMike Snitzer * Use one (potentially large) I/O to copy all 'linear_chunks' 10768a2d5286SMike Snitzer * from the exception store to the origin 10778a2d5286SMike Snitzer */ 10788a2d5286SMike Snitzer io_size = linear_chunks * s->store->chunk_size; 10791e03f97eSMikulas Patocka 10801e03f97eSMikulas Patocka dest.bdev = s->origin->bdev; 10811e03f97eSMikulas Patocka dest.sector = chunk_to_sector(s->store, old_chunk); 10828a2d5286SMike Snitzer dest.count = min(io_size, get_dev_size(dest.bdev) - dest.sector); 10831e03f97eSMikulas Patocka 10841e03f97eSMikulas Patocka src.bdev = s->cow->bdev; 10851e03f97eSMikulas Patocka src.sector = chunk_to_sector(s->store, new_chunk); 10861e03f97eSMikulas Patocka src.count = dest.count; 10871e03f97eSMikulas Patocka 108873dfd078SMikulas Patocka /* 108973dfd078SMikulas Patocka * Reallocate any exceptions needed in other snapshots then 109073dfd078SMikulas Patocka * wait for the pending exceptions to complete. 109173dfd078SMikulas Patocka * Each time any pending exception (globally on the system) 109273dfd078SMikulas Patocka * completes we are woken and repeat the process to find out 109373dfd078SMikulas Patocka * if we can proceed. While this may not seem a particularly 109473dfd078SMikulas Patocka * efficient algorithm, it is not expected to have any 109573dfd078SMikulas Patocka * significant impact on performance. 109673dfd078SMikulas Patocka */ 109773dfd078SMikulas Patocka previous_count = read_pending_exceptions_done_count(); 10988a2d5286SMike Snitzer while (origin_write_extent(s, dest.sector, io_size)) { 109973dfd078SMikulas Patocka wait_event(_pending_exceptions_done, 110073dfd078SMikulas Patocka (read_pending_exceptions_done_count() != 110173dfd078SMikulas Patocka previous_count)); 110273dfd078SMikulas Patocka /* Retry after the wait, until all exceptions are done. */ 110373dfd078SMikulas Patocka previous_count = read_pending_exceptions_done_count(); 110473dfd078SMikulas Patocka } 110573dfd078SMikulas Patocka 11064ad8d880SNikos Tsironis down_write(&s->lock); 11079fe86254SMikulas Patocka s->first_merging_chunk = old_chunk; 11088a2d5286SMike Snitzer s->num_merging_chunks = linear_chunks; 11094ad8d880SNikos Tsironis up_write(&s->lock); 11109fe86254SMikulas Patocka 11118a2d5286SMike Snitzer /* Wait until writes to all 'linear_chunks' drain */ 11128a2d5286SMike Snitzer for (i = 0; i < linear_chunks; i++) 11138a2d5286SMike Snitzer __check_for_conflicting_io(s, old_chunk + i); 11149fe86254SMikulas Patocka 11151e03f97eSMikulas Patocka dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, merge_callback, s); 11161e03f97eSMikulas Patocka return; 11171e03f97eSMikulas Patocka 11181e03f97eSMikulas Patocka shut: 11191e03f97eSMikulas Patocka merge_shutdown(s); 11201e03f97eSMikulas Patocka } 11211e03f97eSMikulas Patocka 11229fe86254SMikulas Patocka static void error_bios(struct bio *bio); 11239fe86254SMikulas Patocka 11241e03f97eSMikulas Patocka static void merge_callback(int read_err, unsigned long write_err, void *context) 11251e03f97eSMikulas Patocka { 11261e03f97eSMikulas Patocka struct dm_snapshot *s = context; 11279fe86254SMikulas Patocka struct bio *b = NULL; 11281e03f97eSMikulas Patocka 11291e03f97eSMikulas Patocka if (read_err || write_err) { 11301e03f97eSMikulas Patocka if (read_err) 11311e03f97eSMikulas Patocka DMERR("Read error: shutting down merge."); 11321e03f97eSMikulas Patocka else 11331e03f97eSMikulas Patocka DMERR("Write error: shutting down merge."); 11341e03f97eSMikulas Patocka goto shut; 11351e03f97eSMikulas Patocka } 11361e03f97eSMikulas Patocka 11379fe86254SMikulas Patocka if (s->store->type->commit_merge(s->store, 11389fe86254SMikulas Patocka s->num_merging_chunks) < 0) { 11391e03f97eSMikulas Patocka DMERR("Write error in exception store: shutting down merge"); 11401e03f97eSMikulas Patocka goto shut; 11411e03f97eSMikulas Patocka } 11421e03f97eSMikulas Patocka 11439fe86254SMikulas Patocka if (remove_single_exception_chunk(s) < 0) 11449fe86254SMikulas Patocka goto shut; 11459fe86254SMikulas Patocka 11461e03f97eSMikulas Patocka snapshot_merge_next_chunks(s); 11471e03f97eSMikulas Patocka 11481e03f97eSMikulas Patocka return; 11491e03f97eSMikulas Patocka 11501e03f97eSMikulas Patocka shut: 11514ad8d880SNikos Tsironis down_write(&s->lock); 1152d8ddb1cfSMike Snitzer s->merge_failed = 1; 11539fe86254SMikulas Patocka b = __release_queued_bios_after_merge(s); 11544ad8d880SNikos Tsironis up_write(&s->lock); 11559fe86254SMikulas Patocka error_bios(b); 11569fe86254SMikulas Patocka 11571e03f97eSMikulas Patocka merge_shutdown(s); 11581e03f97eSMikulas Patocka } 11591e03f97eSMikulas Patocka 11601e03f97eSMikulas Patocka static void start_merge(struct dm_snapshot *s) 11611e03f97eSMikulas Patocka { 11621e03f97eSMikulas Patocka if (!test_and_set_bit(RUNNING_MERGE, &s->state_bits)) 11631e03f97eSMikulas Patocka snapshot_merge_next_chunks(s); 11641e03f97eSMikulas Patocka } 11651e03f97eSMikulas Patocka 11661e03f97eSMikulas Patocka /* 11671e03f97eSMikulas Patocka * Stop the merging process and wait until it finishes. 11681e03f97eSMikulas Patocka */ 11691e03f97eSMikulas Patocka static void stop_merge(struct dm_snapshot *s) 11701e03f97eSMikulas Patocka { 11711e03f97eSMikulas Patocka set_bit(SHUTDOWN_MERGE, &s->state_bits); 117274316201SNeilBrown wait_on_bit(&s->state_bits, RUNNING_MERGE, TASK_UNINTERRUPTIBLE); 11731e03f97eSMikulas Patocka clear_bit(SHUTDOWN_MERGE, &s->state_bits); 11741e03f97eSMikulas Patocka } 11751e03f97eSMikulas Patocka 11761da177e4SLinus Torvalds /* 1177b0d3cc01SMike Snitzer * Construct a snapshot mapping: <origin_dev> <COW-dev> <p|po|n> <chunk-size> 11781da177e4SLinus Torvalds */ 11791da177e4SLinus Torvalds static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) 11801da177e4SLinus Torvalds { 11811da177e4SLinus Torvalds struct dm_snapshot *s; 1182cd45daffSMikulas Patocka int i; 11831da177e4SLinus Torvalds int r = -EINVAL; 1184fc56f6fbSMike Snitzer char *origin_path, *cow_path; 11854df2bf46SDingXiang dev_t origin_dev, cow_dev; 118655a62eefSAlasdair G Kergon unsigned args_used, num_flush_bios = 1; 118710b8106aSMike Snitzer fmode_t origin_mode = FMODE_READ; 11881da177e4SLinus Torvalds 11894c7e3bf4SMark McLoughlin if (argc != 4) { 119072d94861SAlasdair G Kergon ti->error = "requires exactly 4 arguments"; 11911da177e4SLinus Torvalds r = -EINVAL; 1192fc56f6fbSMike Snitzer goto bad; 11931da177e4SLinus Torvalds } 11941da177e4SLinus Torvalds 119510b8106aSMike Snitzer if (dm_target_is_snapshot_merge(ti)) { 119655a62eefSAlasdair G Kergon num_flush_bios = 2; 119710b8106aSMike Snitzer origin_mode = FMODE_WRITE; 119810b8106aSMike Snitzer } 119910b8106aSMike Snitzer 1200d3775354SKent Overstreet s = kzalloc(sizeof(*s), GFP_KERNEL); 1201fee1998eSJonathan Brassow if (!s) { 1202a2d2b034SJonathan Brassow ti->error = "Cannot allocate private snapshot structure"; 12031da177e4SLinus Torvalds r = -ENOMEM; 1204fc56f6fbSMike Snitzer goto bad; 12051da177e4SLinus Torvalds } 12061da177e4SLinus Torvalds 1207c2411045SMikulas Patocka origin_path = argv[0]; 1208c2411045SMikulas Patocka argv++; 1209c2411045SMikulas Patocka argc--; 1210c2411045SMikulas Patocka 1211c2411045SMikulas Patocka r = dm_get_device(ti, origin_path, origin_mode, &s->origin); 1212c2411045SMikulas Patocka if (r) { 1213c2411045SMikulas Patocka ti->error = "Cannot get origin device"; 1214c2411045SMikulas Patocka goto bad_origin; 1215c2411045SMikulas Patocka } 12164df2bf46SDingXiang origin_dev = s->origin->bdev->bd_dev; 1217c2411045SMikulas Patocka 1218fc56f6fbSMike Snitzer cow_path = argv[0]; 1219fc56f6fbSMike Snitzer argv++; 1220fc56f6fbSMike Snitzer argc--; 1221fc56f6fbSMike Snitzer 12224df2bf46SDingXiang cow_dev = dm_get_dev_t(cow_path); 12234df2bf46SDingXiang if (cow_dev && cow_dev == origin_dev) { 12244df2bf46SDingXiang ti->error = "COW device cannot be the same as origin device"; 12254df2bf46SDingXiang r = -EINVAL; 12264df2bf46SDingXiang goto bad_cow; 12274df2bf46SDingXiang } 12284df2bf46SDingXiang 1229024d37e9SMilan Broz r = dm_get_device(ti, cow_path, dm_table_get_mode(ti->table), &s->cow); 1230fc56f6fbSMike Snitzer if (r) { 1231fc56f6fbSMike Snitzer ti->error = "Cannot get COW device"; 1232fc56f6fbSMike Snitzer goto bad_cow; 1233fc56f6fbSMike Snitzer } 1234fc56f6fbSMike Snitzer 1235fc56f6fbSMike Snitzer r = dm_exception_store_create(ti, argc, argv, s, &args_used, &s->store); 1236fc56f6fbSMike Snitzer if (r) { 1237fc56f6fbSMike Snitzer ti->error = "Couldn't create exception store"; 1238fc56f6fbSMike Snitzer r = -EINVAL; 1239fc56f6fbSMike Snitzer goto bad_store; 1240fc56f6fbSMike Snitzer } 1241fc56f6fbSMike Snitzer 1242fc56f6fbSMike Snitzer argv += args_used; 1243fc56f6fbSMike Snitzer argc -= args_used; 1244fc56f6fbSMike Snitzer 1245fc56f6fbSMike Snitzer s->ti = ti; 12461da177e4SLinus Torvalds s->valid = 1; 124776c44f6dSMikulas Patocka s->snapshot_overflowed = 0; 1248aa14edebSAlasdair G Kergon s->active = 0; 1249879129d2SMikulas Patocka atomic_set(&s->pending_exceptions_count, 0); 12503f1637f2SNikos Tsironis spin_lock_init(&s->pe_allocation_lock); 1251230c83afSMikulas Patocka s->exception_start_sequence = 0; 1252230c83afSMikulas Patocka s->exception_complete_sequence = 0; 12533db2776dSDavid Jeffery s->out_of_order_tree = RB_ROOT; 12544ad8d880SNikos Tsironis init_rwsem(&s->lock); 1255c1f0c183SMike Snitzer INIT_LIST_HEAD(&s->list); 1256ca3a931fSAlasdair G Kergon spin_lock_init(&s->pe_lock); 12571e03f97eSMikulas Patocka s->state_bits = 0; 1258d8ddb1cfSMike Snitzer s->merge_failed = 0; 12599fe86254SMikulas Patocka s->first_merging_chunk = 0; 12609fe86254SMikulas Patocka s->num_merging_chunks = 0; 12619fe86254SMikulas Patocka bio_list_init(&s->bios_queued_during_merge); 12621da177e4SLinus Torvalds 12631da177e4SLinus Torvalds /* Allocate hash table for COW data */ 1264fee1998eSJonathan Brassow if (init_hash_tables(s)) { 12651da177e4SLinus Torvalds ti->error = "Unable to allocate hash table space"; 12661da177e4SLinus Torvalds r = -ENOMEM; 1267fee1998eSJonathan Brassow goto bad_hash_tables; 12681da177e4SLinus Torvalds } 12691da177e4SLinus Torvalds 1270721b1d98SNikos Tsironis sema_init(&s->cow_count, (cow_threshold > 0) ? cow_threshold : INT_MAX); 1271721b1d98SNikos Tsironis 1272df5d2e90SMikulas Patocka s->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle); 1273fa34ce73SMikulas Patocka if (IS_ERR(s->kcopyd_client)) { 1274fa34ce73SMikulas Patocka r = PTR_ERR(s->kcopyd_client); 12751da177e4SLinus Torvalds ti->error = "Could not create kcopyd client"; 1276fee1998eSJonathan Brassow goto bad_kcopyd; 12771da177e4SLinus Torvalds } 12781da177e4SLinus Torvalds 12796f1c819cSKent Overstreet r = mempool_init_slab_pool(&s->pending_pool, MIN_IOS, pending_cache); 12806f1c819cSKent Overstreet if (r) { 128192e86812SMikulas Patocka ti->error = "Could not allocate mempool for pending exceptions"; 1282fee1998eSJonathan Brassow goto bad_pending_pool; 128392e86812SMikulas Patocka } 128492e86812SMikulas Patocka 1285cd45daffSMikulas Patocka for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++) 1286cd45daffSMikulas Patocka INIT_HLIST_HEAD(&s->tracked_chunk_hash[i]); 1287cd45daffSMikulas Patocka 1288cd45daffSMikulas Patocka spin_lock_init(&s->tracked_chunk_lock); 1289cd45daffSMikulas Patocka 1290c1f0c183SMike Snitzer ti->private = s; 129155a62eefSAlasdair G Kergon ti->num_flush_bios = num_flush_bios; 129230187e1dSMike Snitzer ti->per_io_data_size = sizeof(struct dm_snap_tracked_chunk); 1293c1f0c183SMike Snitzer 1294c1f0c183SMike Snitzer /* Add snapshot to the list of snapshots for this origin */ 1295c1f0c183SMike Snitzer /* Exceptions aren't triggered till snapshot_resume() is called */ 1296c1f0c183SMike Snitzer r = register_snapshot(s); 1297c1f0c183SMike Snitzer if (r == -ENOMEM) { 1298c1f0c183SMike Snitzer ti->error = "Snapshot origin struct allocation failed"; 1299c1f0c183SMike Snitzer goto bad_load_and_register; 1300c1f0c183SMike Snitzer } else if (r < 0) { 1301c1f0c183SMike Snitzer /* invalid handover, register_snapshot has set ti->error */ 1302c1f0c183SMike Snitzer goto bad_load_and_register; 1303c1f0c183SMike Snitzer } 1304c1f0c183SMike Snitzer 1305c1f0c183SMike Snitzer /* 1306c1f0c183SMike Snitzer * Metadata must only be loaded into one table at once, so skip this 1307c1f0c183SMike Snitzer * if metadata will be handed over during resume. 1308c1f0c183SMike Snitzer * Chunk size will be set during the handover - set it to zero to 1309c1f0c183SMike Snitzer * ensure it's ignored. 1310c1f0c183SMike Snitzer */ 1311c1f0c183SMike Snitzer if (r > 0) { 1312c1f0c183SMike Snitzer s->store->chunk_size = 0; 1313c1f0c183SMike Snitzer return 0; 1314c1f0c183SMike Snitzer } 1315c1f0c183SMike Snitzer 1316493df71cSJonathan Brassow r = s->store->type->read_metadata(s->store, dm_add_exception, 1317493df71cSJonathan Brassow (void *)s); 13180764147bSMilan Broz if (r < 0) { 1319f9cea4f7SMark McLoughlin ti->error = "Failed to read snapshot metadata"; 1320c1f0c183SMike Snitzer goto bad_read_metadata; 13210764147bSMilan Broz } else if (r > 0) { 13220764147bSMilan Broz s->valid = 0; 13230764147bSMilan Broz DMWARN("Snapshot is marked invalid."); 1324f9cea4f7SMark McLoughlin } 1325aa14edebSAlasdair G Kergon 13263f2412dcSMikulas Patocka if (!s->store->chunk_size) { 13273f2412dcSMikulas Patocka ti->error = "Chunk size not set"; 1328c1f0c183SMike Snitzer goto bad_read_metadata; 13293f2412dcSMikulas Patocka } 1330542f9038SMike Snitzer 1331542f9038SMike Snitzer r = dm_set_target_max_io_len(ti, s->store->chunk_size); 1332542f9038SMike Snitzer if (r) 1333542f9038SMike Snitzer goto bad_read_metadata; 13341da177e4SLinus Torvalds 13351da177e4SLinus Torvalds return 0; 13361da177e4SLinus Torvalds 1337c1f0c183SMike Snitzer bad_read_metadata: 1338c1f0c183SMike Snitzer unregister_snapshot(s); 1339c1f0c183SMike Snitzer 1340cd45daffSMikulas Patocka bad_load_and_register: 13416f1c819cSKent Overstreet mempool_exit(&s->pending_pool); 134292e86812SMikulas Patocka 1343fee1998eSJonathan Brassow bad_pending_pool: 1344eb69aca5SHeinz Mauelshagen dm_kcopyd_client_destroy(s->kcopyd_client); 13451da177e4SLinus Torvalds 1346fee1998eSJonathan Brassow bad_kcopyd: 13473510cb94SJon Brassow dm_exception_table_exit(&s->pending, pending_cache); 13483510cb94SJon Brassow dm_exception_table_exit(&s->complete, exception_cache); 13491da177e4SLinus Torvalds 1350fee1998eSJonathan Brassow bad_hash_tables: 1351fc56f6fbSMike Snitzer dm_exception_store_destroy(s->store); 1352fc56f6fbSMike Snitzer 1353fc56f6fbSMike Snitzer bad_store: 1354fc56f6fbSMike Snitzer dm_put_device(ti, s->cow); 1355fc56f6fbSMike Snitzer 1356fc56f6fbSMike Snitzer bad_cow: 1357c2411045SMikulas Patocka dm_put_device(ti, s->origin); 1358c2411045SMikulas Patocka 1359c2411045SMikulas Patocka bad_origin: 13601da177e4SLinus Torvalds kfree(s); 13611da177e4SLinus Torvalds 1362fc56f6fbSMike Snitzer bad: 13631da177e4SLinus Torvalds return r; 13641da177e4SLinus Torvalds } 13651da177e4SLinus Torvalds 136631c93a0cSMilan Broz static void __free_exceptions(struct dm_snapshot *s) 136731c93a0cSMilan Broz { 1368eb69aca5SHeinz Mauelshagen dm_kcopyd_client_destroy(s->kcopyd_client); 136931c93a0cSMilan Broz s->kcopyd_client = NULL; 137031c93a0cSMilan Broz 13713510cb94SJon Brassow dm_exception_table_exit(&s->pending, pending_cache); 13723510cb94SJon Brassow dm_exception_table_exit(&s->complete, exception_cache); 137331c93a0cSMilan Broz } 137431c93a0cSMilan Broz 1375c1f0c183SMike Snitzer static void __handover_exceptions(struct dm_snapshot *snap_src, 1376c1f0c183SMike Snitzer struct dm_snapshot *snap_dest) 1377c1f0c183SMike Snitzer { 1378c1f0c183SMike Snitzer union { 1379c1f0c183SMike Snitzer struct dm_exception_table table_swap; 1380c1f0c183SMike Snitzer struct dm_exception_store *store_swap; 1381c1f0c183SMike Snitzer } u; 1382c1f0c183SMike Snitzer 1383c1f0c183SMike Snitzer /* 1384c1f0c183SMike Snitzer * Swap all snapshot context information between the two instances. 1385c1f0c183SMike Snitzer */ 1386c1f0c183SMike Snitzer u.table_swap = snap_dest->complete; 1387c1f0c183SMike Snitzer snap_dest->complete = snap_src->complete; 1388c1f0c183SMike Snitzer snap_src->complete = u.table_swap; 1389c1f0c183SMike Snitzer 1390c1f0c183SMike Snitzer u.store_swap = snap_dest->store; 1391c1f0c183SMike Snitzer snap_dest->store = snap_src->store; 1392b0d3cc01SMike Snitzer snap_dest->store->userspace_supports_overflow = u.store_swap->userspace_supports_overflow; 1393c1f0c183SMike Snitzer snap_src->store = u.store_swap; 1394c1f0c183SMike Snitzer 1395c1f0c183SMike Snitzer snap_dest->store->snap = snap_dest; 1396c1f0c183SMike Snitzer snap_src->store->snap = snap_src; 1397c1f0c183SMike Snitzer 1398542f9038SMike Snitzer snap_dest->ti->max_io_len = snap_dest->store->chunk_size; 1399c1f0c183SMike Snitzer snap_dest->valid = snap_src->valid; 140076c44f6dSMikulas Patocka snap_dest->snapshot_overflowed = snap_src->snapshot_overflowed; 1401c1f0c183SMike Snitzer 1402c1f0c183SMike Snitzer /* 1403c1f0c183SMike Snitzer * Set source invalid to ensure it receives no further I/O. 1404c1f0c183SMike Snitzer */ 1405c1f0c183SMike Snitzer snap_src->valid = 0; 1406c1f0c183SMike Snitzer } 1407c1f0c183SMike Snitzer 14081da177e4SLinus Torvalds static void snapshot_dtr(struct dm_target *ti) 14091da177e4SLinus Torvalds { 1410cd45daffSMikulas Patocka #ifdef CONFIG_DM_DEBUG 1411cd45daffSMikulas Patocka int i; 1412cd45daffSMikulas Patocka #endif 1413028867acSAlasdair G Kergon struct dm_snapshot *s = ti->private; 1414c1f0c183SMike Snitzer struct dm_snapshot *snap_src = NULL, *snap_dest = NULL; 14151da177e4SLinus Torvalds 1416c1f0c183SMike Snitzer down_read(&_origins_lock); 1417c1f0c183SMike Snitzer /* Check whether exception handover must be cancelled */ 14189d3b15c4SMikulas Patocka (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL); 1419c1f0c183SMike Snitzer if (snap_src && snap_dest && (s == snap_src)) { 14204ad8d880SNikos Tsironis down_write(&snap_dest->lock); 1421c1f0c183SMike Snitzer snap_dest->valid = 0; 14224ad8d880SNikos Tsironis up_write(&snap_dest->lock); 1423c1f0c183SMike Snitzer DMERR("Cancelling snapshot handover."); 1424c1f0c183SMike Snitzer } 1425c1f0c183SMike Snitzer up_read(&_origins_lock); 1426c1f0c183SMike Snitzer 14271e03f97eSMikulas Patocka if (dm_target_is_snapshot_merge(ti)) 14281e03f97eSMikulas Patocka stop_merge(s); 14291e03f97eSMikulas Patocka 1430138728dcSAlasdair G Kergon /* Prevent further origin writes from using this snapshot. */ 1431138728dcSAlasdair G Kergon /* After this returns there can be no new kcopyd jobs. */ 14321da177e4SLinus Torvalds unregister_snapshot(s); 14331da177e4SLinus Torvalds 1434879129d2SMikulas Patocka while (atomic_read(&s->pending_exceptions_count)) 143590fa1527SMikulas Patocka msleep(1); 1436879129d2SMikulas Patocka /* 14376f1c819cSKent Overstreet * Ensure instructions in mempool_exit aren't reordered 1438879129d2SMikulas Patocka * before atomic_read. 1439879129d2SMikulas Patocka */ 1440879129d2SMikulas Patocka smp_mb(); 1441879129d2SMikulas Patocka 1442cd45daffSMikulas Patocka #ifdef CONFIG_DM_DEBUG 1443cd45daffSMikulas Patocka for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++) 1444cd45daffSMikulas Patocka BUG_ON(!hlist_empty(&s->tracked_chunk_hash[i])); 1445cd45daffSMikulas Patocka #endif 1446cd45daffSMikulas Patocka 144731c93a0cSMilan Broz __free_exceptions(s); 14481da177e4SLinus Torvalds 14496f1c819cSKent Overstreet mempool_exit(&s->pending_pool); 145092e86812SMikulas Patocka 1451fee1998eSJonathan Brassow dm_exception_store_destroy(s->store); 1452138728dcSAlasdair G Kergon 1453fc56f6fbSMike Snitzer dm_put_device(ti, s->cow); 1454fc56f6fbSMike Snitzer 1455c2411045SMikulas Patocka dm_put_device(ti, s->origin); 1456c2411045SMikulas Patocka 14571da177e4SLinus Torvalds kfree(s); 14581da177e4SLinus Torvalds } 14591da177e4SLinus Torvalds 14601da177e4SLinus Torvalds /* 14611da177e4SLinus Torvalds * Flush a list of buffers. 14621da177e4SLinus Torvalds */ 14631da177e4SLinus Torvalds static void flush_bios(struct bio *bio) 14641da177e4SLinus Torvalds { 14651da177e4SLinus Torvalds struct bio *n; 14661da177e4SLinus Torvalds 14671da177e4SLinus Torvalds while (bio) { 14681da177e4SLinus Torvalds n = bio->bi_next; 14691da177e4SLinus Torvalds bio->bi_next = NULL; 14701da177e4SLinus Torvalds generic_make_request(bio); 14711da177e4SLinus Torvalds bio = n; 14721da177e4SLinus Torvalds } 14731da177e4SLinus Torvalds } 14741da177e4SLinus Torvalds 1475515ad66cSMikulas Patocka static int do_origin(struct dm_dev *origin, struct bio *bio); 1476515ad66cSMikulas Patocka 1477515ad66cSMikulas Patocka /* 1478515ad66cSMikulas Patocka * Flush a list of buffers. 1479515ad66cSMikulas Patocka */ 1480515ad66cSMikulas Patocka static void retry_origin_bios(struct dm_snapshot *s, struct bio *bio) 1481515ad66cSMikulas Patocka { 1482515ad66cSMikulas Patocka struct bio *n; 1483515ad66cSMikulas Patocka int r; 1484515ad66cSMikulas Patocka 1485515ad66cSMikulas Patocka while (bio) { 1486515ad66cSMikulas Patocka n = bio->bi_next; 1487515ad66cSMikulas Patocka bio->bi_next = NULL; 1488515ad66cSMikulas Patocka r = do_origin(s->origin, bio); 1489515ad66cSMikulas Patocka if (r == DM_MAPIO_REMAPPED) 1490515ad66cSMikulas Patocka generic_make_request(bio); 1491515ad66cSMikulas Patocka bio = n; 1492515ad66cSMikulas Patocka } 1493515ad66cSMikulas Patocka } 1494515ad66cSMikulas Patocka 14951da177e4SLinus Torvalds /* 14961da177e4SLinus Torvalds * Error a list of buffers. 14971da177e4SLinus Torvalds */ 14981da177e4SLinus Torvalds static void error_bios(struct bio *bio) 14991da177e4SLinus Torvalds { 15001da177e4SLinus Torvalds struct bio *n; 15011da177e4SLinus Torvalds 15021da177e4SLinus Torvalds while (bio) { 15031da177e4SLinus Torvalds n = bio->bi_next; 15041da177e4SLinus Torvalds bio->bi_next = NULL; 15056712ecf8SNeilBrown bio_io_error(bio); 15061da177e4SLinus Torvalds bio = n; 15071da177e4SLinus Torvalds } 15081da177e4SLinus Torvalds } 15091da177e4SLinus Torvalds 1510695368acSAlasdair G Kergon static void __invalidate_snapshot(struct dm_snapshot *s, int err) 151176df1c65SAlasdair G Kergon { 151276df1c65SAlasdair G Kergon if (!s->valid) 151376df1c65SAlasdair G Kergon return; 151476df1c65SAlasdair G Kergon 151576df1c65SAlasdair G Kergon if (err == -EIO) 151676df1c65SAlasdair G Kergon DMERR("Invalidating snapshot: Error reading/writing."); 151776df1c65SAlasdair G Kergon else if (err == -ENOMEM) 151876df1c65SAlasdair G Kergon DMERR("Invalidating snapshot: Unable to allocate exception."); 151976df1c65SAlasdair G Kergon 1520493df71cSJonathan Brassow if (s->store->type->drop_snapshot) 1521493df71cSJonathan Brassow s->store->type->drop_snapshot(s->store); 152276df1c65SAlasdair G Kergon 152376df1c65SAlasdair G Kergon s->valid = 0; 152476df1c65SAlasdair G Kergon 1525fc56f6fbSMike Snitzer dm_table_event(s->ti->table); 152676df1c65SAlasdair G Kergon } 152776df1c65SAlasdair G Kergon 15283f1637f2SNikos Tsironis static void invalidate_snapshot(struct dm_snapshot *s, int err) 15293f1637f2SNikos Tsironis { 15303f1637f2SNikos Tsironis down_write(&s->lock); 15313f1637f2SNikos Tsironis __invalidate_snapshot(s, err); 15323f1637f2SNikos Tsironis up_write(&s->lock); 15333f1637f2SNikos Tsironis } 15343f1637f2SNikos Tsironis 1535385277bfSMikulas Patocka static void pending_complete(void *context, int success) 15361da177e4SLinus Torvalds { 1537385277bfSMikulas Patocka struct dm_snap_pending_exception *pe = context; 15381d4989c8SJon Brassow struct dm_exception *e; 15391da177e4SLinus Torvalds struct dm_snapshot *s = pe->snap; 15409d493fa8SAlasdair G Kergon struct bio *origin_bios = NULL; 15419d493fa8SAlasdair G Kergon struct bio *snapshot_bios = NULL; 1542a6e50b40SMikulas Patocka struct bio *full_bio = NULL; 1543f79ae415SNikos Tsironis struct dm_exception_table_lock lock; 15449d493fa8SAlasdair G Kergon int error = 0; 15451da177e4SLinus Torvalds 1546f79ae415SNikos Tsironis dm_exception_table_lock_init(s, pe->e.old_chunk, &lock); 1547f79ae415SNikos Tsironis 154876df1c65SAlasdair G Kergon if (!success) { 154976df1c65SAlasdair G Kergon /* Read/write error - snapshot is unusable */ 15503f1637f2SNikos Tsironis invalidate_snapshot(s, -EIO); 15519d493fa8SAlasdair G Kergon error = 1; 1552f79ae415SNikos Tsironis 1553f79ae415SNikos Tsironis dm_exception_table_lock(&lock); 155476df1c65SAlasdair G Kergon goto out; 155576df1c65SAlasdair G Kergon } 155676df1c65SAlasdair G Kergon 1557119bc547SMikulas Patocka e = alloc_completed_exception(GFP_NOIO); 155876df1c65SAlasdair G Kergon if (!e) { 15593f1637f2SNikos Tsironis invalidate_snapshot(s, -ENOMEM); 15609d493fa8SAlasdair G Kergon error = 1; 1561f79ae415SNikos Tsironis 1562f79ae415SNikos Tsironis dm_exception_table_lock(&lock); 15631da177e4SLinus Torvalds goto out; 15641da177e4SLinus Torvalds } 15651da177e4SLinus Torvalds *e = pe->e; 15661da177e4SLinus Torvalds 15673f1637f2SNikos Tsironis down_read(&s->lock); 1568f79ae415SNikos Tsironis dm_exception_table_lock(&lock); 15699d493fa8SAlasdair G Kergon if (!s->valid) { 15703f1637f2SNikos Tsironis up_read(&s->lock); 15713510cb94SJon Brassow free_completed_exception(e); 15729d493fa8SAlasdair G Kergon error = 1; 15733f1637f2SNikos Tsironis 15749d493fa8SAlasdair G Kergon goto out; 15759d493fa8SAlasdair G Kergon } 15769d493fa8SAlasdair G Kergon 1577a8d41b59SMikulas Patocka /* 157865fc7c37SNikos Tsironis * Add a proper exception. After inserting the completed exception all 157965fc7c37SNikos Tsironis * subsequent snapshot reads to this chunk will be redirected to the 158065fc7c37SNikos Tsironis * COW device. This ensures that we do not starve. Moreover, as long 158165fc7c37SNikos Tsironis * as the pending exception exists, neither origin writes nor snapshot 158265fc7c37SNikos Tsironis * merging can overwrite the chunk in origin. 15831da177e4SLinus Torvalds */ 15843510cb94SJon Brassow dm_insert_exception(&s->complete, e); 15853f1637f2SNikos Tsironis up_read(&s->lock); 15861da177e4SLinus Torvalds 158765fc7c37SNikos Tsironis /* Wait for conflicting reads to drain */ 158865fc7c37SNikos Tsironis if (__chunk_is_tracked(s, pe->e.old_chunk)) { 1589f79ae415SNikos Tsironis dm_exception_table_unlock(&lock); 159065fc7c37SNikos Tsironis __check_for_conflicting_io(s, pe->e.old_chunk); 1591f79ae415SNikos Tsironis dm_exception_table_lock(&lock); 159265fc7c37SNikos Tsironis } 159365fc7c37SNikos Tsironis 15941da177e4SLinus Torvalds out: 159565fc7c37SNikos Tsironis /* Remove the in-flight exception from the list */ 15963510cb94SJon Brassow dm_remove_exception(&pe->e); 1597f79ae415SNikos Tsironis 1598f79ae415SNikos Tsironis dm_exception_table_unlock(&lock); 1599f79ae415SNikos Tsironis 16009d493fa8SAlasdair G Kergon snapshot_bios = bio_list_get(&pe->snapshot_bios); 1601515ad66cSMikulas Patocka origin_bios = bio_list_get(&pe->origin_bios); 1602a6e50b40SMikulas Patocka full_bio = pe->full_bio; 1603fe3265b1SMikulas Patocka if (full_bio) 1604a6e50b40SMikulas Patocka full_bio->bi_end_io = pe->full_bio_end_io; 160573dfd078SMikulas Patocka increment_pending_exceptions_done_count(); 160673dfd078SMikulas Patocka 16079d493fa8SAlasdair G Kergon /* Submit any pending write bios */ 1608a6e50b40SMikulas Patocka if (error) { 1609a6e50b40SMikulas Patocka if (full_bio) 1610a6e50b40SMikulas Patocka bio_io_error(full_bio); 16119d493fa8SAlasdair G Kergon error_bios(snapshot_bios); 1612a6e50b40SMikulas Patocka } else { 1613a6e50b40SMikulas Patocka if (full_bio) 16144246a0b6SChristoph Hellwig bio_endio(full_bio); 16159d493fa8SAlasdair G Kergon flush_bios(snapshot_bios); 1616a6e50b40SMikulas Patocka } 16179d493fa8SAlasdair G Kergon 1618515ad66cSMikulas Patocka retry_origin_bios(s, origin_bios); 161922aa66a3SMikulas Patocka 162022aa66a3SMikulas Patocka free_pending_exception(pe); 16211da177e4SLinus Torvalds } 16221da177e4SLinus Torvalds 1623230c83afSMikulas Patocka static void complete_exception(struct dm_snap_pending_exception *pe) 1624230c83afSMikulas Patocka { 1625230c83afSMikulas Patocka struct dm_snapshot *s = pe->snap; 1626230c83afSMikulas Patocka 1627230c83afSMikulas Patocka /* Update the metadata if we are persistent */ 1628385277bfSMikulas Patocka s->store->type->commit_exception(s->store, &pe->e, !pe->copy_error, 1629385277bfSMikulas Patocka pending_complete, pe); 1630230c83afSMikulas Patocka } 1631230c83afSMikulas Patocka 16321da177e4SLinus Torvalds /* 16331da177e4SLinus Torvalds * Called when the copy I/O has finished. kcopyd actually runs 16341da177e4SLinus Torvalds * this code so don't block. 16351da177e4SLinus Torvalds */ 16364cdc1d1fSAlasdair G Kergon static void copy_callback(int read_err, unsigned long write_err, void *context) 16371da177e4SLinus Torvalds { 1638028867acSAlasdair G Kergon struct dm_snap_pending_exception *pe = context; 16391da177e4SLinus Torvalds struct dm_snapshot *s = pe->snap; 16401da177e4SLinus Torvalds 1641230c83afSMikulas Patocka pe->copy_error = read_err || write_err; 16421da177e4SLinus Torvalds 1643230c83afSMikulas Patocka if (pe->exception_sequence == s->exception_complete_sequence) { 16443db2776dSDavid Jeffery struct rb_node *next; 16453db2776dSDavid Jeffery 1646230c83afSMikulas Patocka s->exception_complete_sequence++; 1647230c83afSMikulas Patocka complete_exception(pe); 1648230c83afSMikulas Patocka 16493db2776dSDavid Jeffery next = rb_first(&s->out_of_order_tree); 16503db2776dSDavid Jeffery while (next) { 16513db2776dSDavid Jeffery pe = rb_entry(next, struct dm_snap_pending_exception, 16523db2776dSDavid Jeffery out_of_order_node); 1653230c83afSMikulas Patocka if (pe->exception_sequence != s->exception_complete_sequence) 1654230c83afSMikulas Patocka break; 16553db2776dSDavid Jeffery next = rb_next(next); 1656230c83afSMikulas Patocka s->exception_complete_sequence++; 16573db2776dSDavid Jeffery rb_erase(&pe->out_of_order_node, &s->out_of_order_tree); 1658230c83afSMikulas Patocka complete_exception(pe); 16593db2776dSDavid Jeffery cond_resched(); 1660230c83afSMikulas Patocka } 1661230c83afSMikulas Patocka } else { 16623db2776dSDavid Jeffery struct rb_node *parent = NULL; 16633db2776dSDavid Jeffery struct rb_node **p = &s->out_of_order_tree.rb_node; 1664230c83afSMikulas Patocka struct dm_snap_pending_exception *pe2; 1665230c83afSMikulas Patocka 16663db2776dSDavid Jeffery while (*p) { 16673db2776dSDavid Jeffery pe2 = rb_entry(*p, struct dm_snap_pending_exception, out_of_order_node); 16683db2776dSDavid Jeffery parent = *p; 16693db2776dSDavid Jeffery 16703db2776dSDavid Jeffery BUG_ON(pe->exception_sequence == pe2->exception_sequence); 16713db2776dSDavid Jeffery if (pe->exception_sequence < pe2->exception_sequence) 16723db2776dSDavid Jeffery p = &((*p)->rb_left); 16733db2776dSDavid Jeffery else 16743db2776dSDavid Jeffery p = &((*p)->rb_right); 1675230c83afSMikulas Patocka } 16763db2776dSDavid Jeffery 16773db2776dSDavid Jeffery rb_link_node(&pe->out_of_order_node, parent, p); 16783db2776dSDavid Jeffery rb_insert_color(&pe->out_of_order_node, &s->out_of_order_tree); 1679230c83afSMikulas Patocka } 1680721b1d98SNikos Tsironis up(&s->cow_count); 16811da177e4SLinus Torvalds } 16821da177e4SLinus Torvalds 16831da177e4SLinus Torvalds /* 16841da177e4SLinus Torvalds * Dispatches the copy operation to kcopyd. 16851da177e4SLinus Torvalds */ 1686028867acSAlasdair G Kergon static void start_copy(struct dm_snap_pending_exception *pe) 16871da177e4SLinus Torvalds { 16881da177e4SLinus Torvalds struct dm_snapshot *s = pe->snap; 168922a1ceb1SHeinz Mauelshagen struct dm_io_region src, dest; 16901da177e4SLinus Torvalds struct block_device *bdev = s->origin->bdev; 16911da177e4SLinus Torvalds sector_t dev_size; 16921da177e4SLinus Torvalds 16931da177e4SLinus Torvalds dev_size = get_dev_size(bdev); 16941da177e4SLinus Torvalds 16951da177e4SLinus Torvalds src.bdev = bdev; 169671fab00aSJonathan Brassow src.sector = chunk_to_sector(s->store, pe->e.old_chunk); 1697df96eee6SMikulas Patocka src.count = min((sector_t)s->store->chunk_size, dev_size - src.sector); 16981da177e4SLinus Torvalds 1699fc56f6fbSMike Snitzer dest.bdev = s->cow->bdev; 170071fab00aSJonathan Brassow dest.sector = chunk_to_sector(s->store, pe->e.new_chunk); 17011da177e4SLinus Torvalds dest.count = src.count; 17021da177e4SLinus Torvalds 17031da177e4SLinus Torvalds /* Hand over to kcopyd */ 1704721b1d98SNikos Tsironis down(&s->cow_count); 1705a2d2b034SJonathan Brassow dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, copy_callback, pe); 17061da177e4SLinus Torvalds } 17071da177e4SLinus Torvalds 17084246a0b6SChristoph Hellwig static void full_bio_end_io(struct bio *bio) 1709a6e50b40SMikulas Patocka { 1710a6e50b40SMikulas Patocka void *callback_data = bio->bi_private; 1711a6e50b40SMikulas Patocka 17124e4cbee9SChristoph Hellwig dm_kcopyd_do_callback(callback_data, 0, bio->bi_status ? 1 : 0); 1713a6e50b40SMikulas Patocka } 1714a6e50b40SMikulas Patocka 1715a6e50b40SMikulas Patocka static void start_full_bio(struct dm_snap_pending_exception *pe, 1716a6e50b40SMikulas Patocka struct bio *bio) 1717a6e50b40SMikulas Patocka { 1718a6e50b40SMikulas Patocka struct dm_snapshot *s = pe->snap; 1719a6e50b40SMikulas Patocka void *callback_data; 1720a6e50b40SMikulas Patocka 1721a6e50b40SMikulas Patocka pe->full_bio = bio; 1722a6e50b40SMikulas Patocka pe->full_bio_end_io = bio->bi_end_io; 1723a6e50b40SMikulas Patocka 1724721b1d98SNikos Tsironis down(&s->cow_count); 1725a6e50b40SMikulas Patocka callback_data = dm_kcopyd_prepare_callback(s->kcopyd_client, 1726a6e50b40SMikulas Patocka copy_callback, pe); 1727a6e50b40SMikulas Patocka 1728a6e50b40SMikulas Patocka bio->bi_end_io = full_bio_end_io; 1729a6e50b40SMikulas Patocka bio->bi_private = callback_data; 1730a6e50b40SMikulas Patocka 1731a6e50b40SMikulas Patocka generic_make_request(bio); 1732a6e50b40SMikulas Patocka } 1733a6e50b40SMikulas Patocka 17342913808eSMikulas Patocka static struct dm_snap_pending_exception * 17352913808eSMikulas Patocka __lookup_pending_exception(struct dm_snapshot *s, chunk_t chunk) 17362913808eSMikulas Patocka { 17373510cb94SJon Brassow struct dm_exception *e = dm_lookup_exception(&s->pending, chunk); 17382913808eSMikulas Patocka 17392913808eSMikulas Patocka if (!e) 17402913808eSMikulas Patocka return NULL; 17412913808eSMikulas Patocka 17422913808eSMikulas Patocka return container_of(e, struct dm_snap_pending_exception, e); 17432913808eSMikulas Patocka } 17442913808eSMikulas Patocka 17451da177e4SLinus Torvalds /* 174665fc7c37SNikos Tsironis * Inserts a pending exception into the pending table. 174765fc7c37SNikos Tsironis * 17483f1637f2SNikos Tsironis * NOTE: a write lock must be held on the chunk's pending exception table slot 17493f1637f2SNikos Tsironis * before calling this. 175065fc7c37SNikos Tsironis */ 175165fc7c37SNikos Tsironis static struct dm_snap_pending_exception * 175265fc7c37SNikos Tsironis __insert_pending_exception(struct dm_snapshot *s, 175365fc7c37SNikos Tsironis struct dm_snap_pending_exception *pe, chunk_t chunk) 175465fc7c37SNikos Tsironis { 175565fc7c37SNikos Tsironis pe->e.old_chunk = chunk; 175665fc7c37SNikos Tsironis bio_list_init(&pe->origin_bios); 175765fc7c37SNikos Tsironis bio_list_init(&pe->snapshot_bios); 175865fc7c37SNikos Tsironis pe->started = 0; 175965fc7c37SNikos Tsironis pe->full_bio = NULL; 176065fc7c37SNikos Tsironis 17613f1637f2SNikos Tsironis spin_lock(&s->pe_allocation_lock); 176265fc7c37SNikos Tsironis if (s->store->type->prepare_exception(s->store, &pe->e)) { 17633f1637f2SNikos Tsironis spin_unlock(&s->pe_allocation_lock); 176465fc7c37SNikos Tsironis free_pending_exception(pe); 176565fc7c37SNikos Tsironis return NULL; 176665fc7c37SNikos Tsironis } 176765fc7c37SNikos Tsironis 176865fc7c37SNikos Tsironis pe->exception_sequence = s->exception_start_sequence++; 17693f1637f2SNikos Tsironis spin_unlock(&s->pe_allocation_lock); 177065fc7c37SNikos Tsironis 177165fc7c37SNikos Tsironis dm_insert_exception(&s->pending, &pe->e); 177265fc7c37SNikos Tsironis 177365fc7c37SNikos Tsironis return pe; 177465fc7c37SNikos Tsironis } 177565fc7c37SNikos Tsironis 177665fc7c37SNikos Tsironis /* 17771da177e4SLinus Torvalds * Looks to see if this snapshot already has a pending exception 17781da177e4SLinus Torvalds * for this chunk, otherwise it allocates a new one and inserts 17791da177e4SLinus Torvalds * it into the pending table. 17801da177e4SLinus Torvalds * 17813f1637f2SNikos Tsironis * NOTE: a write lock must be held on the chunk's pending exception table slot 17823f1637f2SNikos Tsironis * before calling this. 17831da177e4SLinus Torvalds */ 1784028867acSAlasdair G Kergon static struct dm_snap_pending_exception * 1785c6621392SMikulas Patocka __find_pending_exception(struct dm_snapshot *s, 1786c6621392SMikulas Patocka struct dm_snap_pending_exception *pe, chunk_t chunk) 17871da177e4SLinus Torvalds { 1788c6621392SMikulas Patocka struct dm_snap_pending_exception *pe2; 178976df1c65SAlasdair G Kergon 17902913808eSMikulas Patocka pe2 = __lookup_pending_exception(s, chunk); 17912913808eSMikulas Patocka if (pe2) { 17921da177e4SLinus Torvalds free_pending_exception(pe); 17932913808eSMikulas Patocka return pe2; 179476df1c65SAlasdair G Kergon } 179576df1c65SAlasdair G Kergon 179665fc7c37SNikos Tsironis return __insert_pending_exception(s, pe, chunk); 17971da177e4SLinus Torvalds } 17981da177e4SLinus Torvalds 17991d4989c8SJon Brassow static void remap_exception(struct dm_snapshot *s, struct dm_exception *e, 1800d74f81f8SMilan Broz struct bio *bio, chunk_t chunk) 18011da177e4SLinus Torvalds { 180274d46992SChristoph Hellwig bio_set_dev(bio, s->cow->bdev); 18034f024f37SKent Overstreet bio->bi_iter.bi_sector = 18044f024f37SKent Overstreet chunk_to_sector(s->store, dm_chunk_number(e->new_chunk) + 1805d74f81f8SMilan Broz (chunk - e->old_chunk)) + 18064f024f37SKent Overstreet (bio->bi_iter.bi_sector & s->store->chunk_mask); 18071da177e4SLinus Torvalds } 18081da177e4SLinus Torvalds 18097de3ee57SMikulas Patocka static int snapshot_map(struct dm_target *ti, struct bio *bio) 18101da177e4SLinus Torvalds { 18111d4989c8SJon Brassow struct dm_exception *e; 1812028867acSAlasdair G Kergon struct dm_snapshot *s = ti->private; 1813d2a7ad29SKiyoshi Ueda int r = DM_MAPIO_REMAPPED; 18141da177e4SLinus Torvalds chunk_t chunk; 1815028867acSAlasdair G Kergon struct dm_snap_pending_exception *pe = NULL; 1816f79ae415SNikos Tsironis struct dm_exception_table_lock lock; 18171da177e4SLinus Torvalds 1818ee18026aSMikulas Patocka init_tracked_chunk(bio); 1819ee18026aSMikulas Patocka 18201eff9d32SJens Axboe if (bio->bi_opf & REQ_PREFLUSH) { 182174d46992SChristoph Hellwig bio_set_dev(bio, s->cow->bdev); 1822494b3ee7SMikulas Patocka return DM_MAPIO_REMAPPED; 1823494b3ee7SMikulas Patocka } 1824494b3ee7SMikulas Patocka 18254f024f37SKent Overstreet chunk = sector_to_chunk(s->store, bio->bi_iter.bi_sector); 1826f79ae415SNikos Tsironis dm_exception_table_lock_init(s, chunk, &lock); 18271da177e4SLinus Torvalds 18281da177e4SLinus Torvalds /* Full snapshots are not usable */ 182976df1c65SAlasdair G Kergon /* To get here the table must be live so s->active is always set. */ 18301da177e4SLinus Torvalds if (!s->valid) 1831846785e6SChristoph Hellwig return DM_MAPIO_KILL; 18321da177e4SLinus Torvalds 18333f1637f2SNikos Tsironis down_read(&s->lock); 1834f79ae415SNikos Tsironis dm_exception_table_lock(&lock); 18351da177e4SLinus Torvalds 183670246286SChristoph Hellwig if (!s->valid || (unlikely(s->snapshot_overflowed) && 183770246286SChristoph Hellwig bio_data_dir(bio) == WRITE)) { 1838846785e6SChristoph Hellwig r = DM_MAPIO_KILL; 183976df1c65SAlasdair G Kergon goto out_unlock; 184076df1c65SAlasdair G Kergon } 184176df1c65SAlasdair G Kergon 18421da177e4SLinus Torvalds /* If the block is already remapped - use that, else remap it */ 18433510cb94SJon Brassow e = dm_lookup_exception(&s->complete, chunk); 18441da177e4SLinus Torvalds if (e) { 1845d74f81f8SMilan Broz remap_exception(s, e, bio, chunk); 184676df1c65SAlasdair G Kergon goto out_unlock; 184776df1c65SAlasdair G Kergon } 18481da177e4SLinus Torvalds 1849ba40a2aaSAlasdair G Kergon /* 1850ba40a2aaSAlasdair G Kergon * Write to snapshot - higher level takes care of RW/RO 1851ba40a2aaSAlasdair G Kergon * flags so we should only get this if we are 1852ba40a2aaSAlasdair G Kergon * writeable. 1853ba40a2aaSAlasdair G Kergon */ 185470246286SChristoph Hellwig if (bio_data_dir(bio) == WRITE) { 18552913808eSMikulas Patocka pe = __lookup_pending_exception(s, chunk); 18562913808eSMikulas Patocka if (!pe) { 1857f79ae415SNikos Tsironis dm_exception_table_unlock(&lock); 1858c6621392SMikulas Patocka pe = alloc_pending_exception(s); 1859f79ae415SNikos Tsironis dm_exception_table_lock(&lock); 1860c6621392SMikulas Patocka 18613510cb94SJon Brassow e = dm_lookup_exception(&s->complete, chunk); 186235bf659bSMikulas Patocka if (e) { 186335bf659bSMikulas Patocka free_pending_exception(pe); 186435bf659bSMikulas Patocka remap_exception(s, e, bio, chunk); 186535bf659bSMikulas Patocka goto out_unlock; 186635bf659bSMikulas Patocka } 186735bf659bSMikulas Patocka 1868c6621392SMikulas Patocka pe = __find_pending_exception(s, pe, chunk); 18691da177e4SLinus Torvalds if (!pe) { 1870f79ae415SNikos Tsironis dm_exception_table_unlock(&lock); 18713f1637f2SNikos Tsironis up_read(&s->lock); 18723f1637f2SNikos Tsironis 18733f1637f2SNikos Tsironis down_write(&s->lock); 1874f79ae415SNikos Tsironis 1875b0d3cc01SMike Snitzer if (s->store->userspace_supports_overflow) { 18763f1637f2SNikos Tsironis if (s->valid && !s->snapshot_overflowed) { 187776c44f6dSMikulas Patocka s->snapshot_overflowed = 1; 187876c44f6dSMikulas Patocka DMERR("Snapshot overflowed: Unable to allocate exception."); 18793f1637f2SNikos Tsironis } 1880b0d3cc01SMike Snitzer } else 1881b0d3cc01SMike Snitzer __invalidate_snapshot(s, -ENOMEM); 1882f79ae415SNikos Tsironis up_write(&s->lock); 1883f79ae415SNikos Tsironis 1884846785e6SChristoph Hellwig r = DM_MAPIO_KILL; 1885f79ae415SNikos Tsironis goto out; 188676df1c65SAlasdair G Kergon } 18872913808eSMikulas Patocka } 188876df1c65SAlasdair G Kergon 1889d74f81f8SMilan Broz remap_exception(s, &pe->e, bio, chunk); 18901da177e4SLinus Torvalds 1891d2a7ad29SKiyoshi Ueda r = DM_MAPIO_SUBMITTED; 1892ba40a2aaSAlasdair G Kergon 1893a6e50b40SMikulas Patocka if (!pe->started && 18944f024f37SKent Overstreet bio->bi_iter.bi_size == 18954f024f37SKent Overstreet (s->store->chunk_size << SECTOR_SHIFT)) { 1896a6e50b40SMikulas Patocka pe->started = 1; 18973f1637f2SNikos Tsironis 1898f79ae415SNikos Tsironis dm_exception_table_unlock(&lock); 18993f1637f2SNikos Tsironis up_read(&s->lock); 19003f1637f2SNikos Tsironis 1901a6e50b40SMikulas Patocka start_full_bio(pe, bio); 1902a6e50b40SMikulas Patocka goto out; 1903a6e50b40SMikulas Patocka } 1904a6e50b40SMikulas Patocka 1905a6e50b40SMikulas Patocka bio_list_add(&pe->snapshot_bios, bio); 1906a6e50b40SMikulas Patocka 19071da177e4SLinus Torvalds if (!pe->started) { 19083f1637f2SNikos Tsironis /* this is protected by the exception table lock */ 19091da177e4SLinus Torvalds pe->started = 1; 19103f1637f2SNikos Tsironis 1911f79ae415SNikos Tsironis dm_exception_table_unlock(&lock); 19123f1637f2SNikos Tsironis up_read(&s->lock); 19133f1637f2SNikos Tsironis 191476df1c65SAlasdair G Kergon start_copy(pe); 1915ba40a2aaSAlasdair G Kergon goto out; 1916ba40a2aaSAlasdair G Kergon } 1917cd45daffSMikulas Patocka } else { 191874d46992SChristoph Hellwig bio_set_dev(bio, s->origin->bdev); 1919ee18026aSMikulas Patocka track_chunk(s, bio, chunk); 1920cd45daffSMikulas Patocka } 19211da177e4SLinus Torvalds 1922ba40a2aaSAlasdair G Kergon out_unlock: 1923f79ae415SNikos Tsironis dm_exception_table_unlock(&lock); 19243f1637f2SNikos Tsironis up_read(&s->lock); 1925ba40a2aaSAlasdair G Kergon out: 19261da177e4SLinus Torvalds return r; 19271da177e4SLinus Torvalds } 19281da177e4SLinus Torvalds 19293452c2a1SMikulas Patocka /* 19303452c2a1SMikulas Patocka * A snapshot-merge target behaves like a combination of a snapshot 19313452c2a1SMikulas Patocka * target and a snapshot-origin target. It only generates new 19323452c2a1SMikulas Patocka * exceptions in other snapshots and not in the one that is being 19333452c2a1SMikulas Patocka * merged. 19343452c2a1SMikulas Patocka * 19353452c2a1SMikulas Patocka * For each chunk, if there is an existing exception, it is used to 19363452c2a1SMikulas Patocka * redirect I/O to the cow device. Otherwise I/O is sent to the origin, 19373452c2a1SMikulas Patocka * which in turn might generate exceptions in other snapshots. 19389fe86254SMikulas Patocka * If merging is currently taking place on the chunk in question, the 19399fe86254SMikulas Patocka * I/O is deferred by adding it to s->bios_queued_during_merge. 19403452c2a1SMikulas Patocka */ 19417de3ee57SMikulas Patocka static int snapshot_merge_map(struct dm_target *ti, struct bio *bio) 19423452c2a1SMikulas Patocka { 19433452c2a1SMikulas Patocka struct dm_exception *e; 19443452c2a1SMikulas Patocka struct dm_snapshot *s = ti->private; 19453452c2a1SMikulas Patocka int r = DM_MAPIO_REMAPPED; 19463452c2a1SMikulas Patocka chunk_t chunk; 19473452c2a1SMikulas Patocka 1948ee18026aSMikulas Patocka init_tracked_chunk(bio); 1949ee18026aSMikulas Patocka 19501eff9d32SJens Axboe if (bio->bi_opf & REQ_PREFLUSH) { 195155a62eefSAlasdair G Kergon if (!dm_bio_get_target_bio_nr(bio)) 195274d46992SChristoph Hellwig bio_set_dev(bio, s->origin->bdev); 195310b8106aSMike Snitzer else 195474d46992SChristoph Hellwig bio_set_dev(bio, s->cow->bdev); 195510b8106aSMike Snitzer return DM_MAPIO_REMAPPED; 195610b8106aSMike Snitzer } 195710b8106aSMike Snitzer 19584f024f37SKent Overstreet chunk = sector_to_chunk(s->store, bio->bi_iter.bi_sector); 19593452c2a1SMikulas Patocka 19604ad8d880SNikos Tsironis down_write(&s->lock); 19613452c2a1SMikulas Patocka 1962d2fdb776SMikulas Patocka /* Full merging snapshots are redirected to the origin */ 1963d2fdb776SMikulas Patocka if (!s->valid) 1964d2fdb776SMikulas Patocka goto redirect_to_origin; 19653452c2a1SMikulas Patocka 19663452c2a1SMikulas Patocka /* If the block is already remapped - use that */ 19673452c2a1SMikulas Patocka e = dm_lookup_exception(&s->complete, chunk); 19683452c2a1SMikulas Patocka if (e) { 19699fe86254SMikulas Patocka /* Queue writes overlapping with chunks being merged */ 197070246286SChristoph Hellwig if (bio_data_dir(bio) == WRITE && 19719fe86254SMikulas Patocka chunk >= s->first_merging_chunk && 19729fe86254SMikulas Patocka chunk < (s->first_merging_chunk + 19739fe86254SMikulas Patocka s->num_merging_chunks)) { 197474d46992SChristoph Hellwig bio_set_dev(bio, s->origin->bdev); 19759fe86254SMikulas Patocka bio_list_add(&s->bios_queued_during_merge, bio); 19769fe86254SMikulas Patocka r = DM_MAPIO_SUBMITTED; 19779fe86254SMikulas Patocka goto out_unlock; 19789fe86254SMikulas Patocka } 197917aa0332SMikulas Patocka 19803452c2a1SMikulas Patocka remap_exception(s, e, bio, chunk); 198117aa0332SMikulas Patocka 198270246286SChristoph Hellwig if (bio_data_dir(bio) == WRITE) 1983ee18026aSMikulas Patocka track_chunk(s, bio, chunk); 19843452c2a1SMikulas Patocka goto out_unlock; 19853452c2a1SMikulas Patocka } 19863452c2a1SMikulas Patocka 1987d2fdb776SMikulas Patocka redirect_to_origin: 198874d46992SChristoph Hellwig bio_set_dev(bio, s->origin->bdev); 19893452c2a1SMikulas Patocka 199070246286SChristoph Hellwig if (bio_data_dir(bio) == WRITE) { 19914ad8d880SNikos Tsironis up_write(&s->lock); 19923452c2a1SMikulas Patocka return do_origin(s->origin, bio); 19933452c2a1SMikulas Patocka } 19943452c2a1SMikulas Patocka 19953452c2a1SMikulas Patocka out_unlock: 19964ad8d880SNikos Tsironis up_write(&s->lock); 19973452c2a1SMikulas Patocka 19983452c2a1SMikulas Patocka return r; 19993452c2a1SMikulas Patocka } 20003452c2a1SMikulas Patocka 20014e4cbee9SChristoph Hellwig static int snapshot_end_io(struct dm_target *ti, struct bio *bio, 20024e4cbee9SChristoph Hellwig blk_status_t *error) 2003cd45daffSMikulas Patocka { 2004cd45daffSMikulas Patocka struct dm_snapshot *s = ti->private; 2005cd45daffSMikulas Patocka 2006ee18026aSMikulas Patocka if (is_bio_tracked(bio)) 2007ee18026aSMikulas Patocka stop_tracking_chunk(s, bio); 2008cd45daffSMikulas Patocka 20091be56909SChristoph Hellwig return DM_ENDIO_DONE; 2010cd45daffSMikulas Patocka } 2011cd45daffSMikulas Patocka 20121e03f97eSMikulas Patocka static void snapshot_merge_presuspend(struct dm_target *ti) 20131e03f97eSMikulas Patocka { 20141e03f97eSMikulas Patocka struct dm_snapshot *s = ti->private; 20151e03f97eSMikulas Patocka 20161e03f97eSMikulas Patocka stop_merge(s); 20171e03f97eSMikulas Patocka } 20181e03f97eSMikulas Patocka 2019c1f0c183SMike Snitzer static int snapshot_preresume(struct dm_target *ti) 2020c1f0c183SMike Snitzer { 2021c1f0c183SMike Snitzer int r = 0; 2022c1f0c183SMike Snitzer struct dm_snapshot *s = ti->private; 2023c1f0c183SMike Snitzer struct dm_snapshot *snap_src = NULL, *snap_dest = NULL; 2024c1f0c183SMike Snitzer 2025c1f0c183SMike Snitzer down_read(&_origins_lock); 20269d3b15c4SMikulas Patocka (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL); 2027c1f0c183SMike Snitzer if (snap_src && snap_dest) { 20284ad8d880SNikos Tsironis down_read(&snap_src->lock); 2029c1f0c183SMike Snitzer if (s == snap_src) { 2030c1f0c183SMike Snitzer DMERR("Unable to resume snapshot source until " 2031c1f0c183SMike Snitzer "handover completes."); 2032c1f0c183SMike Snitzer r = -EINVAL; 2033b83b2f29SMike Snitzer } else if (!dm_suspended(snap_src->ti)) { 2034c1f0c183SMike Snitzer DMERR("Unable to perform snapshot handover until " 2035c1f0c183SMike Snitzer "source is suspended."); 2036c1f0c183SMike Snitzer r = -EINVAL; 2037c1f0c183SMike Snitzer } 20384ad8d880SNikos Tsironis up_read(&snap_src->lock); 2039c1f0c183SMike Snitzer } 2040c1f0c183SMike Snitzer up_read(&_origins_lock); 2041c1f0c183SMike Snitzer 2042c1f0c183SMike Snitzer return r; 2043c1f0c183SMike Snitzer } 2044c1f0c183SMike Snitzer 20451da177e4SLinus Torvalds static void snapshot_resume(struct dm_target *ti) 20461da177e4SLinus Torvalds { 2047028867acSAlasdair G Kergon struct dm_snapshot *s = ti->private; 204809ee96b2SMikulas Patocka struct dm_snapshot *snap_src = NULL, *snap_dest = NULL, *snap_merging = NULL; 2049b735fedeSMikulas Patocka struct dm_origin *o; 2050b735fedeSMikulas Patocka struct mapped_device *origin_md = NULL; 205109ee96b2SMikulas Patocka bool must_restart_merging = false; 2052c1f0c183SMike Snitzer 2053c1f0c183SMike Snitzer down_read(&_origins_lock); 2054b735fedeSMikulas Patocka 2055b735fedeSMikulas Patocka o = __lookup_dm_origin(s->origin->bdev); 2056b735fedeSMikulas Patocka if (o) 2057b735fedeSMikulas Patocka origin_md = dm_table_get_md(o->ti->table); 205809ee96b2SMikulas Patocka if (!origin_md) { 205909ee96b2SMikulas Patocka (void) __find_snapshots_sharing_cow(s, NULL, NULL, &snap_merging); 206009ee96b2SMikulas Patocka if (snap_merging) 206109ee96b2SMikulas Patocka origin_md = dm_table_get_md(snap_merging->ti->table); 206209ee96b2SMikulas Patocka } 2063b735fedeSMikulas Patocka if (origin_md == dm_table_get_md(ti->table)) 2064b735fedeSMikulas Patocka origin_md = NULL; 206509ee96b2SMikulas Patocka if (origin_md) { 206609ee96b2SMikulas Patocka if (dm_hold(origin_md)) 206709ee96b2SMikulas Patocka origin_md = NULL; 206809ee96b2SMikulas Patocka } 2069b735fedeSMikulas Patocka 207009ee96b2SMikulas Patocka up_read(&_origins_lock); 207109ee96b2SMikulas Patocka 207209ee96b2SMikulas Patocka if (origin_md) { 2073b735fedeSMikulas Patocka dm_internal_suspend_fast(origin_md); 207409ee96b2SMikulas Patocka if (snap_merging && test_bit(RUNNING_MERGE, &snap_merging->state_bits)) { 207509ee96b2SMikulas Patocka must_restart_merging = true; 207609ee96b2SMikulas Patocka stop_merge(snap_merging); 207709ee96b2SMikulas Patocka } 207809ee96b2SMikulas Patocka } 207909ee96b2SMikulas Patocka 208009ee96b2SMikulas Patocka down_read(&_origins_lock); 2081b735fedeSMikulas Patocka 20829d3b15c4SMikulas Patocka (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL); 2083c1f0c183SMike Snitzer if (snap_src && snap_dest) { 20844ad8d880SNikos Tsironis down_write(&snap_src->lock); 20854ad8d880SNikos Tsironis down_write_nested(&snap_dest->lock, SINGLE_DEPTH_NESTING); 2086c1f0c183SMike Snitzer __handover_exceptions(snap_src, snap_dest); 20874ad8d880SNikos Tsironis up_write(&snap_dest->lock); 20884ad8d880SNikos Tsironis up_write(&snap_src->lock); 2089c1f0c183SMike Snitzer } 2090b735fedeSMikulas Patocka 2091c1f0c183SMike Snitzer up_read(&_origins_lock); 2092c1f0c183SMike Snitzer 209309ee96b2SMikulas Patocka if (origin_md) { 209409ee96b2SMikulas Patocka if (must_restart_merging) 209509ee96b2SMikulas Patocka start_merge(snap_merging); 209609ee96b2SMikulas Patocka dm_internal_resume_fast(origin_md); 209709ee96b2SMikulas Patocka dm_put(origin_md); 209809ee96b2SMikulas Patocka } 209909ee96b2SMikulas Patocka 2100c1f0c183SMike Snitzer /* Now we have correct chunk size, reregister */ 2101c1f0c183SMike Snitzer reregister_snapshot(s); 21021da177e4SLinus Torvalds 21034ad8d880SNikos Tsironis down_write(&s->lock); 2104aa14edebSAlasdair G Kergon s->active = 1; 21054ad8d880SNikos Tsironis up_write(&s->lock); 21061da177e4SLinus Torvalds } 21071da177e4SLinus Torvalds 2108542f9038SMike Snitzer static uint32_t get_origin_minimum_chunksize(struct block_device *bdev) 21091e03f97eSMikulas Patocka { 2110542f9038SMike Snitzer uint32_t min_chunksize; 21111e03f97eSMikulas Patocka 21121e03f97eSMikulas Patocka down_read(&_origins_lock); 21131e03f97eSMikulas Patocka min_chunksize = __minimum_chunk_size(__lookup_origin(bdev)); 21141e03f97eSMikulas Patocka up_read(&_origins_lock); 21151e03f97eSMikulas Patocka 21161e03f97eSMikulas Patocka return min_chunksize; 21171e03f97eSMikulas Patocka } 21181e03f97eSMikulas Patocka 21191e03f97eSMikulas Patocka static void snapshot_merge_resume(struct dm_target *ti) 21201e03f97eSMikulas Patocka { 21211e03f97eSMikulas Patocka struct dm_snapshot *s = ti->private; 21221e03f97eSMikulas Patocka 21231e03f97eSMikulas Patocka /* 21241e03f97eSMikulas Patocka * Handover exceptions from existing snapshot. 21251e03f97eSMikulas Patocka */ 21261e03f97eSMikulas Patocka snapshot_resume(ti); 21271e03f97eSMikulas Patocka 21281e03f97eSMikulas Patocka /* 2129542f9038SMike Snitzer * snapshot-merge acts as an origin, so set ti->max_io_len 21301e03f97eSMikulas Patocka */ 2131542f9038SMike Snitzer ti->max_io_len = get_origin_minimum_chunksize(s->origin->bdev); 21321e03f97eSMikulas Patocka 21331e03f97eSMikulas Patocka start_merge(s); 21341e03f97eSMikulas Patocka } 21351e03f97eSMikulas Patocka 2136fd7c092eSMikulas Patocka static void snapshot_status(struct dm_target *ti, status_type_t type, 21371f4e0ff0SAlasdair G Kergon unsigned status_flags, char *result, unsigned maxlen) 21381da177e4SLinus Torvalds { 21392e4a31dfSJonathan Brassow unsigned sz = 0; 2140028867acSAlasdair G Kergon struct dm_snapshot *snap = ti->private; 21411da177e4SLinus Torvalds 21421da177e4SLinus Torvalds switch (type) { 21431da177e4SLinus Torvalds case STATUSTYPE_INFO: 214494e76572SMikulas Patocka 21454ad8d880SNikos Tsironis down_write(&snap->lock); 214694e76572SMikulas Patocka 21471da177e4SLinus Torvalds if (!snap->valid) 21482e4a31dfSJonathan Brassow DMEMIT("Invalid"); 2149d8ddb1cfSMike Snitzer else if (snap->merge_failed) 2150d8ddb1cfSMike Snitzer DMEMIT("Merge failed"); 215176c44f6dSMikulas Patocka else if (snap->snapshot_overflowed) 215276c44f6dSMikulas Patocka DMEMIT("Overflow"); 21531da177e4SLinus Torvalds else { 2154985903bbSMike Snitzer if (snap->store->type->usage) { 2155985903bbSMike Snitzer sector_t total_sectors, sectors_allocated, 2156985903bbSMike Snitzer metadata_sectors; 2157985903bbSMike Snitzer snap->store->type->usage(snap->store, 2158985903bbSMike Snitzer &total_sectors, 2159985903bbSMike Snitzer §ors_allocated, 2160985903bbSMike Snitzer &metadata_sectors); 2161985903bbSMike Snitzer DMEMIT("%llu/%llu %llu", 2162985903bbSMike Snitzer (unsigned long long)sectors_allocated, 2163985903bbSMike Snitzer (unsigned long long)total_sectors, 2164985903bbSMike Snitzer (unsigned long long)metadata_sectors); 21651da177e4SLinus Torvalds } 21661da177e4SLinus Torvalds else 21672e4a31dfSJonathan Brassow DMEMIT("Unknown"); 21681da177e4SLinus Torvalds } 216994e76572SMikulas Patocka 21704ad8d880SNikos Tsironis up_write(&snap->lock); 217194e76572SMikulas Patocka 21721da177e4SLinus Torvalds break; 21731da177e4SLinus Torvalds 21741da177e4SLinus Torvalds case STATUSTYPE_TABLE: 21751da177e4SLinus Torvalds /* 21761da177e4SLinus Torvalds * kdevname returns a static pointer so we need 21771da177e4SLinus Torvalds * to make private copies if the output is to 21781da177e4SLinus Torvalds * make sense. 21791da177e4SLinus Torvalds */ 2180fc56f6fbSMike Snitzer DMEMIT("%s %s", snap->origin->name, snap->cow->name); 21811e302a92SJonathan Brassow snap->store->type->status(snap->store, type, result + sz, 21821e302a92SJonathan Brassow maxlen - sz); 21831da177e4SLinus Torvalds break; 21841da177e4SLinus Torvalds } 21851da177e4SLinus Torvalds } 21861da177e4SLinus Torvalds 21878811f46cSMike Snitzer static int snapshot_iterate_devices(struct dm_target *ti, 21888811f46cSMike Snitzer iterate_devices_callout_fn fn, void *data) 21898811f46cSMike Snitzer { 21908811f46cSMike Snitzer struct dm_snapshot *snap = ti->private; 21911e5554c8SMikulas Patocka int r; 21928811f46cSMike Snitzer 21931e5554c8SMikulas Patocka r = fn(ti, snap->origin, 0, ti->len, data); 21941e5554c8SMikulas Patocka 21951e5554c8SMikulas Patocka if (!r) 21961e5554c8SMikulas Patocka r = fn(ti, snap->cow, 0, get_dev_size(snap->cow->bdev), data); 21971e5554c8SMikulas Patocka 21981e5554c8SMikulas Patocka return r; 21998811f46cSMike Snitzer } 22008811f46cSMike Snitzer 22018811f46cSMike Snitzer 22021da177e4SLinus Torvalds /*----------------------------------------------------------------- 22031da177e4SLinus Torvalds * Origin methods 22041da177e4SLinus Torvalds *---------------------------------------------------------------*/ 22059eaae8ffSMikulas Patocka 22069eaae8ffSMikulas Patocka /* 22079eaae8ffSMikulas Patocka * If no exceptions need creating, DM_MAPIO_REMAPPED is returned and any 22089eaae8ffSMikulas Patocka * supplied bio was ignored. The caller may submit it immediately. 22099eaae8ffSMikulas Patocka * (No remapping actually occurs as the origin is always a direct linear 22109eaae8ffSMikulas Patocka * map.) 22119eaae8ffSMikulas Patocka * 22129eaae8ffSMikulas Patocka * If further exceptions are required, DM_MAPIO_SUBMITTED is returned 22139eaae8ffSMikulas Patocka * and any supplied bio is added to a list to be submitted once all 22149eaae8ffSMikulas Patocka * the necessary exceptions exist. 22159eaae8ffSMikulas Patocka */ 22169eaae8ffSMikulas Patocka static int __origin_write(struct list_head *snapshots, sector_t sector, 22179eaae8ffSMikulas Patocka struct bio *bio) 22181da177e4SLinus Torvalds { 2219515ad66cSMikulas Patocka int r = DM_MAPIO_REMAPPED; 22201da177e4SLinus Torvalds struct dm_snapshot *snap; 22211d4989c8SJon Brassow struct dm_exception *e; 222265fc7c37SNikos Tsironis struct dm_snap_pending_exception *pe, *pe2; 2223515ad66cSMikulas Patocka struct dm_snap_pending_exception *pe_to_start_now = NULL; 2224515ad66cSMikulas Patocka struct dm_snap_pending_exception *pe_to_start_last = NULL; 2225f79ae415SNikos Tsironis struct dm_exception_table_lock lock; 22261da177e4SLinus Torvalds chunk_t chunk; 22271da177e4SLinus Torvalds 22281da177e4SLinus Torvalds /* Do all the snapshots on this origin */ 22291da177e4SLinus Torvalds list_for_each_entry (snap, snapshots, list) { 22303452c2a1SMikulas Patocka /* 22313452c2a1SMikulas Patocka * Don't make new exceptions in a merging snapshot 22323452c2a1SMikulas Patocka * because it has effectively been deleted 22333452c2a1SMikulas Patocka */ 22343452c2a1SMikulas Patocka if (dm_target_is_snapshot_merge(snap->ti)) 22353452c2a1SMikulas Patocka continue; 22363452c2a1SMikulas Patocka 2237d5e404c1SAlasdair G Kergon /* Nothing to do if writing beyond end of snapshot */ 22389eaae8ffSMikulas Patocka if (sector >= dm_table_get_size(snap->ti->table)) 2239f79ae415SNikos Tsironis continue; 22401da177e4SLinus Torvalds 22411da177e4SLinus Torvalds /* 22421da177e4SLinus Torvalds * Remember, different snapshots can have 22431da177e4SLinus Torvalds * different chunk sizes. 22441da177e4SLinus Torvalds */ 22459eaae8ffSMikulas Patocka chunk = sector_to_chunk(snap->store, sector); 2246f79ae415SNikos Tsironis dm_exception_table_lock_init(snap, chunk, &lock); 2247f79ae415SNikos Tsironis 22483f1637f2SNikos Tsironis down_read(&snap->lock); 2249f79ae415SNikos Tsironis dm_exception_table_lock(&lock); 2250f79ae415SNikos Tsironis 2251f79ae415SNikos Tsironis /* Only deal with valid and active snapshots */ 2252f79ae415SNikos Tsironis if (!snap->valid || !snap->active) 2253f79ae415SNikos Tsironis goto next_snapshot; 22541da177e4SLinus Torvalds 225565fc7c37SNikos Tsironis pe = __lookup_pending_exception(snap, chunk); 225665fc7c37SNikos Tsironis if (!pe) { 22571da177e4SLinus Torvalds /* 225865fc7c37SNikos Tsironis * Check exception table to see if block is already 225965fc7c37SNikos Tsironis * remapped in this snapshot and trigger an exception 226065fc7c37SNikos Tsironis * if not. 22611da177e4SLinus Torvalds */ 22623510cb94SJon Brassow e = dm_lookup_exception(&snap->complete, chunk); 226376df1c65SAlasdair G Kergon if (e) 226476df1c65SAlasdair G Kergon goto next_snapshot; 226576df1c65SAlasdair G Kergon 2266f79ae415SNikos Tsironis dm_exception_table_unlock(&lock); 2267c6621392SMikulas Patocka pe = alloc_pending_exception(snap); 2268f79ae415SNikos Tsironis dm_exception_table_lock(&lock); 2269c6621392SMikulas Patocka 227065fc7c37SNikos Tsironis pe2 = __lookup_pending_exception(snap, chunk); 227165fc7c37SNikos Tsironis 227265fc7c37SNikos Tsironis if (!pe2) { 22733510cb94SJon Brassow e = dm_lookup_exception(&snap->complete, chunk); 227435bf659bSMikulas Patocka if (e) { 227535bf659bSMikulas Patocka free_pending_exception(pe); 227635bf659bSMikulas Patocka goto next_snapshot; 227735bf659bSMikulas Patocka } 227835bf659bSMikulas Patocka 227965fc7c37SNikos Tsironis pe = __insert_pending_exception(snap, pe, chunk); 22801da177e4SLinus Torvalds if (!pe) { 2281f79ae415SNikos Tsironis dm_exception_table_unlock(&lock); 22823f1637f2SNikos Tsironis up_read(&snap->lock); 2283f79ae415SNikos Tsironis 22843f1637f2SNikos Tsironis invalidate_snapshot(snap, -ENOMEM); 2285f79ae415SNikos Tsironis continue; 228676df1c65SAlasdair G Kergon } 228765fc7c37SNikos Tsironis } else { 228865fc7c37SNikos Tsironis free_pending_exception(pe); 228965fc7c37SNikos Tsironis pe = pe2; 229065fc7c37SNikos Tsironis } 22912913808eSMikulas Patocka } 22921da177e4SLinus Torvalds 2293d2a7ad29SKiyoshi Ueda r = DM_MAPIO_SUBMITTED; 229476df1c65SAlasdair G Kergon 2295515ad66cSMikulas Patocka /* 2296515ad66cSMikulas Patocka * If an origin bio was supplied, queue it to wait for the 2297515ad66cSMikulas Patocka * completion of this exception, and start this one last, 2298515ad66cSMikulas Patocka * at the end of the function. 2299515ad66cSMikulas Patocka */ 2300515ad66cSMikulas Patocka if (bio) { 2301515ad66cSMikulas Patocka bio_list_add(&pe->origin_bios, bio); 2302515ad66cSMikulas Patocka bio = NULL; 2303515ad66cSMikulas Patocka 2304515ad66cSMikulas Patocka if (!pe->started) { 2305515ad66cSMikulas Patocka pe->started = 1; 2306515ad66cSMikulas Patocka pe_to_start_last = pe; 2307515ad66cSMikulas Patocka } 2308b4b610f6SAlasdair G Kergon } 230976df1c65SAlasdair G Kergon 2310eccf0817SAlasdair G Kergon if (!pe->started) { 2311eccf0817SAlasdair G Kergon pe->started = 1; 2312515ad66cSMikulas Patocka pe_to_start_now = pe; 2313eccf0817SAlasdair G Kergon } 23141da177e4SLinus Torvalds 231576df1c65SAlasdair G Kergon next_snapshot: 2316f79ae415SNikos Tsironis dm_exception_table_unlock(&lock); 23173f1637f2SNikos Tsironis up_read(&snap->lock); 2318515ad66cSMikulas Patocka 2319515ad66cSMikulas Patocka if (pe_to_start_now) { 2320515ad66cSMikulas Patocka start_copy(pe_to_start_now); 2321515ad66cSMikulas Patocka pe_to_start_now = NULL; 23221da177e4SLinus Torvalds } 2323b4b610f6SAlasdair G Kergon } 2324b4b610f6SAlasdair G Kergon 23251da177e4SLinus Torvalds /* 2326515ad66cSMikulas Patocka * Submit the exception against which the bio is queued last, 2327515ad66cSMikulas Patocka * to give the other exceptions a head start. 23281da177e4SLinus Torvalds */ 2329515ad66cSMikulas Patocka if (pe_to_start_last) 2330515ad66cSMikulas Patocka start_copy(pe_to_start_last); 23311da177e4SLinus Torvalds 23321da177e4SLinus Torvalds return r; 23331da177e4SLinus Torvalds } 23341da177e4SLinus Torvalds 23351da177e4SLinus Torvalds /* 23361da177e4SLinus Torvalds * Called on a write from the origin driver. 23371da177e4SLinus Torvalds */ 23381da177e4SLinus Torvalds static int do_origin(struct dm_dev *origin, struct bio *bio) 23391da177e4SLinus Torvalds { 23401da177e4SLinus Torvalds struct origin *o; 2341d2a7ad29SKiyoshi Ueda int r = DM_MAPIO_REMAPPED; 23421da177e4SLinus Torvalds 23431da177e4SLinus Torvalds down_read(&_origins_lock); 23441da177e4SLinus Torvalds o = __lookup_origin(origin->bdev); 23451da177e4SLinus Torvalds if (o) 23464f024f37SKent Overstreet r = __origin_write(&o->snapshots, bio->bi_iter.bi_sector, bio); 23471da177e4SLinus Torvalds up_read(&_origins_lock); 23481da177e4SLinus Torvalds 23491da177e4SLinus Torvalds return r; 23501da177e4SLinus Torvalds } 23511da177e4SLinus Torvalds 23521da177e4SLinus Torvalds /* 235373dfd078SMikulas Patocka * Trigger exceptions in all non-merging snapshots. 235473dfd078SMikulas Patocka * 235573dfd078SMikulas Patocka * The chunk size of the merging snapshot may be larger than the chunk 235673dfd078SMikulas Patocka * size of some other snapshot so we may need to reallocate multiple 235773dfd078SMikulas Patocka * chunks in other snapshots. 235873dfd078SMikulas Patocka * 235973dfd078SMikulas Patocka * We scan all the overlapping exceptions in the other snapshots. 236073dfd078SMikulas Patocka * Returns 1 if anything was reallocated and must be waited for, 236173dfd078SMikulas Patocka * otherwise returns 0. 236273dfd078SMikulas Patocka * 236373dfd078SMikulas Patocka * size must be a multiple of merging_snap's chunk_size. 236473dfd078SMikulas Patocka */ 236573dfd078SMikulas Patocka static int origin_write_extent(struct dm_snapshot *merging_snap, 236673dfd078SMikulas Patocka sector_t sector, unsigned size) 236773dfd078SMikulas Patocka { 236873dfd078SMikulas Patocka int must_wait = 0; 236973dfd078SMikulas Patocka sector_t n; 237073dfd078SMikulas Patocka struct origin *o; 237173dfd078SMikulas Patocka 237273dfd078SMikulas Patocka /* 2373542f9038SMike Snitzer * The origin's __minimum_chunk_size() got stored in max_io_len 237473dfd078SMikulas Patocka * by snapshot_merge_resume(). 237573dfd078SMikulas Patocka */ 237673dfd078SMikulas Patocka down_read(&_origins_lock); 237773dfd078SMikulas Patocka o = __lookup_origin(merging_snap->origin->bdev); 2378542f9038SMike Snitzer for (n = 0; n < size; n += merging_snap->ti->max_io_len) 237973dfd078SMikulas Patocka if (__origin_write(&o->snapshots, sector + n, NULL) == 238073dfd078SMikulas Patocka DM_MAPIO_SUBMITTED) 238173dfd078SMikulas Patocka must_wait = 1; 238273dfd078SMikulas Patocka up_read(&_origins_lock); 238373dfd078SMikulas Patocka 238473dfd078SMikulas Patocka return must_wait; 238573dfd078SMikulas Patocka } 238673dfd078SMikulas Patocka 238773dfd078SMikulas Patocka /* 23881da177e4SLinus Torvalds * Origin: maps a linear range of a device, with hooks for snapshotting. 23891da177e4SLinus Torvalds */ 23901da177e4SLinus Torvalds 23911da177e4SLinus Torvalds /* 23921da177e4SLinus Torvalds * Construct an origin mapping: <dev_path> 23931da177e4SLinus Torvalds * The context for an origin is merely a 'struct dm_dev *' 23941da177e4SLinus Torvalds * pointing to the real device. 23951da177e4SLinus Torvalds */ 23961da177e4SLinus Torvalds static int origin_ctr(struct dm_target *ti, unsigned int argc, char **argv) 23971da177e4SLinus Torvalds { 23981da177e4SLinus Torvalds int r; 2399599cdf3bSMikulas Patocka struct dm_origin *o; 24001da177e4SLinus Torvalds 24011da177e4SLinus Torvalds if (argc != 1) { 240272d94861SAlasdair G Kergon ti->error = "origin: incorrect number of arguments"; 24031da177e4SLinus Torvalds return -EINVAL; 24041da177e4SLinus Torvalds } 24051da177e4SLinus Torvalds 2406599cdf3bSMikulas Patocka o = kmalloc(sizeof(struct dm_origin), GFP_KERNEL); 2407599cdf3bSMikulas Patocka if (!o) { 2408599cdf3bSMikulas Patocka ti->error = "Cannot allocate private origin structure"; 2409599cdf3bSMikulas Patocka r = -ENOMEM; 2410599cdf3bSMikulas Patocka goto bad_alloc; 24111da177e4SLinus Torvalds } 24121da177e4SLinus Torvalds 2413599cdf3bSMikulas Patocka r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &o->dev); 2414599cdf3bSMikulas Patocka if (r) { 2415599cdf3bSMikulas Patocka ti->error = "Cannot get target device"; 2416599cdf3bSMikulas Patocka goto bad_open; 2417599cdf3bSMikulas Patocka } 2418599cdf3bSMikulas Patocka 2419b735fedeSMikulas Patocka o->ti = ti; 2420599cdf3bSMikulas Patocka ti->private = o; 242155a62eefSAlasdair G Kergon ti->num_flush_bios = 1; 2422494b3ee7SMikulas Patocka 24231da177e4SLinus Torvalds return 0; 2424599cdf3bSMikulas Patocka 2425599cdf3bSMikulas Patocka bad_open: 2426599cdf3bSMikulas Patocka kfree(o); 2427599cdf3bSMikulas Patocka bad_alloc: 2428599cdf3bSMikulas Patocka return r; 24291da177e4SLinus Torvalds } 24301da177e4SLinus Torvalds 24311da177e4SLinus Torvalds static void origin_dtr(struct dm_target *ti) 24321da177e4SLinus Torvalds { 2433599cdf3bSMikulas Patocka struct dm_origin *o = ti->private; 2434b735fedeSMikulas Patocka 2435599cdf3bSMikulas Patocka dm_put_device(ti, o->dev); 2436599cdf3bSMikulas Patocka kfree(o); 24371da177e4SLinus Torvalds } 24381da177e4SLinus Torvalds 24397de3ee57SMikulas Patocka static int origin_map(struct dm_target *ti, struct bio *bio) 24401da177e4SLinus Torvalds { 2441599cdf3bSMikulas Patocka struct dm_origin *o = ti->private; 2442298eaa89SMikulas Patocka unsigned available_sectors; 24431da177e4SLinus Torvalds 244474d46992SChristoph Hellwig bio_set_dev(bio, o->dev->bdev); 24451da177e4SLinus Torvalds 24461eff9d32SJens Axboe if (unlikely(bio->bi_opf & REQ_PREFLUSH)) 2447494b3ee7SMikulas Patocka return DM_MAPIO_REMAPPED; 2448494b3ee7SMikulas Patocka 244970246286SChristoph Hellwig if (bio_data_dir(bio) != WRITE) 2450298eaa89SMikulas Patocka return DM_MAPIO_REMAPPED; 2451298eaa89SMikulas Patocka 2452298eaa89SMikulas Patocka available_sectors = o->split_boundary - 2453298eaa89SMikulas Patocka ((unsigned)bio->bi_iter.bi_sector & (o->split_boundary - 1)); 2454298eaa89SMikulas Patocka 2455298eaa89SMikulas Patocka if (bio_sectors(bio) > available_sectors) 2456298eaa89SMikulas Patocka dm_accept_partial_bio(bio, available_sectors); 2457298eaa89SMikulas Patocka 24581da177e4SLinus Torvalds /* Only tell snapshots if this is a write */ 2459298eaa89SMikulas Patocka return do_origin(o->dev, bio); 24601da177e4SLinus Torvalds } 24611da177e4SLinus Torvalds 24621da177e4SLinus Torvalds /* 2463542f9038SMike Snitzer * Set the target "max_io_len" field to the minimum of all the snapshots' 24641da177e4SLinus Torvalds * chunk sizes. 24651da177e4SLinus Torvalds */ 24661da177e4SLinus Torvalds static void origin_resume(struct dm_target *ti) 24671da177e4SLinus Torvalds { 2468599cdf3bSMikulas Patocka struct dm_origin *o = ti->private; 24691da177e4SLinus Torvalds 2470298eaa89SMikulas Patocka o->split_boundary = get_origin_minimum_chunksize(o->dev->bdev); 2471b735fedeSMikulas Patocka 2472b735fedeSMikulas Patocka down_write(&_origins_lock); 2473b735fedeSMikulas Patocka __insert_dm_origin(o); 2474b735fedeSMikulas Patocka up_write(&_origins_lock); 2475b735fedeSMikulas Patocka } 2476b735fedeSMikulas Patocka 2477b735fedeSMikulas Patocka static void origin_postsuspend(struct dm_target *ti) 2478b735fedeSMikulas Patocka { 2479b735fedeSMikulas Patocka struct dm_origin *o = ti->private; 2480b735fedeSMikulas Patocka 2481b735fedeSMikulas Patocka down_write(&_origins_lock); 2482b735fedeSMikulas Patocka __remove_dm_origin(o); 2483b735fedeSMikulas Patocka up_write(&_origins_lock); 24841da177e4SLinus Torvalds } 24851da177e4SLinus Torvalds 2486fd7c092eSMikulas Patocka static void origin_status(struct dm_target *ti, status_type_t type, 24871f4e0ff0SAlasdair G Kergon unsigned status_flags, char *result, unsigned maxlen) 24881da177e4SLinus Torvalds { 2489599cdf3bSMikulas Patocka struct dm_origin *o = ti->private; 24901da177e4SLinus Torvalds 24911da177e4SLinus Torvalds switch (type) { 24921da177e4SLinus Torvalds case STATUSTYPE_INFO: 24931da177e4SLinus Torvalds result[0] = '\0'; 24941da177e4SLinus Torvalds break; 24951da177e4SLinus Torvalds 24961da177e4SLinus Torvalds case STATUSTYPE_TABLE: 2497599cdf3bSMikulas Patocka snprintf(result, maxlen, "%s", o->dev->name); 24981da177e4SLinus Torvalds break; 24991da177e4SLinus Torvalds } 25001da177e4SLinus Torvalds } 25011da177e4SLinus Torvalds 25028811f46cSMike Snitzer static int origin_iterate_devices(struct dm_target *ti, 25038811f46cSMike Snitzer iterate_devices_callout_fn fn, void *data) 25048811f46cSMike Snitzer { 2505599cdf3bSMikulas Patocka struct dm_origin *o = ti->private; 25068811f46cSMike Snitzer 2507599cdf3bSMikulas Patocka return fn(ti, o->dev, 0, ti->len, data); 25088811f46cSMike Snitzer } 25098811f46cSMike Snitzer 25101da177e4SLinus Torvalds static struct target_type origin_target = { 25111da177e4SLinus Torvalds .name = "snapshot-origin", 2512b735fedeSMikulas Patocka .version = {1, 9, 0}, 25131da177e4SLinus Torvalds .module = THIS_MODULE, 25141da177e4SLinus Torvalds .ctr = origin_ctr, 25151da177e4SLinus Torvalds .dtr = origin_dtr, 25161da177e4SLinus Torvalds .map = origin_map, 25171da177e4SLinus Torvalds .resume = origin_resume, 2518b735fedeSMikulas Patocka .postsuspend = origin_postsuspend, 25191da177e4SLinus Torvalds .status = origin_status, 25208811f46cSMike Snitzer .iterate_devices = origin_iterate_devices, 25211da177e4SLinus Torvalds }; 25221da177e4SLinus Torvalds 25231da177e4SLinus Torvalds static struct target_type snapshot_target = { 25241da177e4SLinus Torvalds .name = "snapshot", 2525b0d3cc01SMike Snitzer .version = {1, 15, 0}, 25261da177e4SLinus Torvalds .module = THIS_MODULE, 25271da177e4SLinus Torvalds .ctr = snapshot_ctr, 25281da177e4SLinus Torvalds .dtr = snapshot_dtr, 25291da177e4SLinus Torvalds .map = snapshot_map, 2530cd45daffSMikulas Patocka .end_io = snapshot_end_io, 2531c1f0c183SMike Snitzer .preresume = snapshot_preresume, 25321da177e4SLinus Torvalds .resume = snapshot_resume, 25331da177e4SLinus Torvalds .status = snapshot_status, 25348811f46cSMike Snitzer .iterate_devices = snapshot_iterate_devices, 25351da177e4SLinus Torvalds }; 25361da177e4SLinus Torvalds 2537d698aa45SMikulas Patocka static struct target_type merge_target = { 2538d698aa45SMikulas Patocka .name = dm_snapshot_merge_target_name, 2539b0d3cc01SMike Snitzer .version = {1, 4, 0}, 2540d698aa45SMikulas Patocka .module = THIS_MODULE, 2541d698aa45SMikulas Patocka .ctr = snapshot_ctr, 2542d698aa45SMikulas Patocka .dtr = snapshot_dtr, 25433452c2a1SMikulas Patocka .map = snapshot_merge_map, 2544d698aa45SMikulas Patocka .end_io = snapshot_end_io, 25451e03f97eSMikulas Patocka .presuspend = snapshot_merge_presuspend, 2546d698aa45SMikulas Patocka .preresume = snapshot_preresume, 25471e03f97eSMikulas Patocka .resume = snapshot_merge_resume, 2548d698aa45SMikulas Patocka .status = snapshot_status, 2549d698aa45SMikulas Patocka .iterate_devices = snapshot_iterate_devices, 2550d698aa45SMikulas Patocka }; 2551d698aa45SMikulas Patocka 25521da177e4SLinus Torvalds static int __init dm_snapshot_init(void) 25531da177e4SLinus Torvalds { 25541da177e4SLinus Torvalds int r; 25551da177e4SLinus Torvalds 25564db6bfe0SAlasdair G Kergon r = dm_exception_store_init(); 25574db6bfe0SAlasdair G Kergon if (r) { 25584db6bfe0SAlasdair G Kergon DMERR("Failed to initialize exception stores"); 25594db6bfe0SAlasdair G Kergon return r; 25604db6bfe0SAlasdair G Kergon } 25614db6bfe0SAlasdair G Kergon 25621da177e4SLinus Torvalds r = init_origin_hash(); 25631da177e4SLinus Torvalds if (r) { 25641da177e4SLinus Torvalds DMERR("init_origin_hash failed."); 2565d698aa45SMikulas Patocka goto bad_origin_hash; 25661da177e4SLinus Torvalds } 25671da177e4SLinus Torvalds 25681d4989c8SJon Brassow exception_cache = KMEM_CACHE(dm_exception, 0); 25691da177e4SLinus Torvalds if (!exception_cache) { 25701da177e4SLinus Torvalds DMERR("Couldn't create exception cache."); 25711da177e4SLinus Torvalds r = -ENOMEM; 2572d698aa45SMikulas Patocka goto bad_exception_cache; 25731da177e4SLinus Torvalds } 25741da177e4SLinus Torvalds 2575028867acSAlasdair G Kergon pending_cache = KMEM_CACHE(dm_snap_pending_exception, 0); 25761da177e4SLinus Torvalds if (!pending_cache) { 25771da177e4SLinus Torvalds DMERR("Couldn't create pending cache."); 25781da177e4SLinus Torvalds r = -ENOMEM; 2579d698aa45SMikulas Patocka goto bad_pending_cache; 25801da177e4SLinus Torvalds } 25811da177e4SLinus Torvalds 25827e6358d2Smonty_pavel@sina.com r = dm_register_target(&snapshot_target); 25837e6358d2Smonty_pavel@sina.com if (r < 0) { 25847e6358d2Smonty_pavel@sina.com DMERR("snapshot target register failed %d", r); 25857e6358d2Smonty_pavel@sina.com goto bad_register_snapshot_target; 25867e6358d2Smonty_pavel@sina.com } 25877e6358d2Smonty_pavel@sina.com 25887e6358d2Smonty_pavel@sina.com r = dm_register_target(&origin_target); 25897e6358d2Smonty_pavel@sina.com if (r < 0) { 25907e6358d2Smonty_pavel@sina.com DMERR("Origin target register failed %d", r); 25917e6358d2Smonty_pavel@sina.com goto bad_register_origin_target; 25927e6358d2Smonty_pavel@sina.com } 25937e6358d2Smonty_pavel@sina.com 25947e6358d2Smonty_pavel@sina.com r = dm_register_target(&merge_target); 25957e6358d2Smonty_pavel@sina.com if (r < 0) { 25967e6358d2Smonty_pavel@sina.com DMERR("Merge target register failed %d", r); 25977e6358d2Smonty_pavel@sina.com goto bad_register_merge_target; 25987e6358d2Smonty_pavel@sina.com } 25997e6358d2Smonty_pavel@sina.com 26001da177e4SLinus Torvalds return 0; 26011da177e4SLinus Torvalds 2602d698aa45SMikulas Patocka bad_register_merge_target: 26031da177e4SLinus Torvalds dm_unregister_target(&origin_target); 2604d698aa45SMikulas Patocka bad_register_origin_target: 26051da177e4SLinus Torvalds dm_unregister_target(&snapshot_target); 2606034a186dSJonathan Brassow bad_register_snapshot_target: 26077e6358d2Smonty_pavel@sina.com kmem_cache_destroy(pending_cache); 26087e6358d2Smonty_pavel@sina.com bad_pending_cache: 26097e6358d2Smonty_pavel@sina.com kmem_cache_destroy(exception_cache); 26107e6358d2Smonty_pavel@sina.com bad_exception_cache: 26117e6358d2Smonty_pavel@sina.com exit_origin_hash(); 26127e6358d2Smonty_pavel@sina.com bad_origin_hash: 2613034a186dSJonathan Brassow dm_exception_store_exit(); 2614d698aa45SMikulas Patocka 26151da177e4SLinus Torvalds return r; 26161da177e4SLinus Torvalds } 26171da177e4SLinus Torvalds 26181da177e4SLinus Torvalds static void __exit dm_snapshot_exit(void) 26191da177e4SLinus Torvalds { 262010d3bd09SMikulas Patocka dm_unregister_target(&snapshot_target); 262110d3bd09SMikulas Patocka dm_unregister_target(&origin_target); 2622d698aa45SMikulas Patocka dm_unregister_target(&merge_target); 26231da177e4SLinus Torvalds 26241da177e4SLinus Torvalds exit_origin_hash(); 26251da177e4SLinus Torvalds kmem_cache_destroy(pending_cache); 26261da177e4SLinus Torvalds kmem_cache_destroy(exception_cache); 26274db6bfe0SAlasdair G Kergon 26284db6bfe0SAlasdair G Kergon dm_exception_store_exit(); 26291da177e4SLinus Torvalds } 26301da177e4SLinus Torvalds 26311da177e4SLinus Torvalds /* Module hooks */ 26321da177e4SLinus Torvalds module_init(dm_snapshot_init); 26331da177e4SLinus Torvalds module_exit(dm_snapshot_exit); 26341da177e4SLinus Torvalds 26351da177e4SLinus Torvalds MODULE_DESCRIPTION(DM_NAME " snapshot target"); 26361da177e4SLinus Torvalds MODULE_AUTHOR("Joe Thornber"); 26371da177e4SLinus Torvalds MODULE_LICENSE("GPL"); 263823cb2109SMikulas Patocka MODULE_ALIAS("dm-snapshot-origin"); 263923cb2109SMikulas Patocka MODULE_ALIAS("dm-snapshot-merge"); 2640