11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * dm-snapshot.c 31da177e4SLinus Torvalds * 41da177e4SLinus Torvalds * Copyright (C) 2001-2002 Sistina Software (UK) Limited. 51da177e4SLinus Torvalds * 61da177e4SLinus Torvalds * This file is released under the GPL. 71da177e4SLinus Torvalds */ 81da177e4SLinus Torvalds 91da177e4SLinus Torvalds #include <linux/blkdev.h> 101da177e4SLinus Torvalds #include <linux/device-mapper.h> 1190fa1527SMikulas Patocka #include <linux/delay.h> 121da177e4SLinus Torvalds #include <linux/fs.h> 131da177e4SLinus Torvalds #include <linux/init.h> 141da177e4SLinus Torvalds #include <linux/kdev_t.h> 151da177e4SLinus Torvalds #include <linux/list.h> 161da177e4SLinus Torvalds #include <linux/mempool.h> 171da177e4SLinus Torvalds #include <linux/module.h> 181da177e4SLinus Torvalds #include <linux/slab.h> 191da177e4SLinus Torvalds #include <linux/vmalloc.h> 206f3c3f0aSvignesh babu #include <linux/log2.h> 21a765e20eSAlasdair G Kergon #include <linux/dm-kcopyd.h> 221da177e4SLinus Torvalds 23aea53d92SJonathan Brassow #include "dm-exception-store.h" 241da177e4SLinus Torvalds 2572d94861SAlasdair G Kergon #define DM_MSG_PREFIX "snapshots" 2672d94861SAlasdair G Kergon 27d698aa45SMikulas Patocka static const char dm_snapshot_merge_target_name[] = "snapshot-merge"; 28d698aa45SMikulas Patocka 29d698aa45SMikulas Patocka #define dm_target_is_snapshot_merge(ti) \ 30d698aa45SMikulas Patocka ((ti)->type->name == dm_snapshot_merge_target_name) 31d698aa45SMikulas Patocka 321da177e4SLinus Torvalds /* 331da177e4SLinus Torvalds * The percentage increment we will wake up users at 341da177e4SLinus Torvalds */ 351da177e4SLinus Torvalds #define WAKE_UP_PERCENT 5 361da177e4SLinus Torvalds 371da177e4SLinus Torvalds /* 381da177e4SLinus Torvalds * kcopyd priority of snapshot operations 391da177e4SLinus Torvalds */ 401da177e4SLinus Torvalds #define SNAPSHOT_COPY_PRIORITY 2 411da177e4SLinus Torvalds 421da177e4SLinus Torvalds /* 438ee2767aSMilan Broz * Reserve 1MB for each snapshot initially (with minimum of 1 page). 441da177e4SLinus Torvalds */ 458ee2767aSMilan Broz #define SNAPSHOT_PAGES (((1UL << 20) >> PAGE_SHIFT) ? : 1) 461da177e4SLinus Torvalds 47cd45daffSMikulas Patocka /* 48cd45daffSMikulas Patocka * The size of the mempool used to track chunks in use. 49cd45daffSMikulas Patocka */ 50cd45daffSMikulas Patocka #define MIN_IOS 256 51cd45daffSMikulas Patocka 52ccc45ea8SJonathan Brassow #define DM_TRACKED_CHUNK_HASH_SIZE 16 53ccc45ea8SJonathan Brassow #define DM_TRACKED_CHUNK_HASH(x) ((unsigned long)(x) & \ 54ccc45ea8SJonathan Brassow (DM_TRACKED_CHUNK_HASH_SIZE - 1)) 55ccc45ea8SJonathan Brassow 56191437a5SJon Brassow struct dm_exception_table { 57ccc45ea8SJonathan Brassow uint32_t hash_mask; 58ccc45ea8SJonathan Brassow unsigned hash_shift; 59ccc45ea8SJonathan Brassow struct list_head *table; 60ccc45ea8SJonathan Brassow }; 61ccc45ea8SJonathan Brassow 62ccc45ea8SJonathan Brassow struct dm_snapshot { 63ccc45ea8SJonathan Brassow struct rw_semaphore lock; 64ccc45ea8SJonathan Brassow 65ccc45ea8SJonathan Brassow struct dm_dev *origin; 66fc56f6fbSMike Snitzer struct dm_dev *cow; 67fc56f6fbSMike Snitzer 68fc56f6fbSMike Snitzer struct dm_target *ti; 69ccc45ea8SJonathan Brassow 70ccc45ea8SJonathan Brassow /* List of snapshots per Origin */ 71ccc45ea8SJonathan Brassow struct list_head list; 72ccc45ea8SJonathan Brassow 73d8ddb1cfSMike Snitzer /* 74d8ddb1cfSMike Snitzer * You can't use a snapshot if this is 0 (e.g. if full). 75d8ddb1cfSMike Snitzer * A snapshot-merge target never clears this. 76d8ddb1cfSMike Snitzer */ 77ccc45ea8SJonathan Brassow int valid; 78ccc45ea8SJonathan Brassow 79ccc45ea8SJonathan Brassow /* Origin writes don't trigger exceptions until this is set */ 80ccc45ea8SJonathan Brassow int active; 81ccc45ea8SJonathan Brassow 82ccc45ea8SJonathan Brassow atomic_t pending_exceptions_count; 83ccc45ea8SJonathan Brassow 84924e600dSMike Snitzer mempool_t *pending_pool; 85924e600dSMike Snitzer 86191437a5SJon Brassow struct dm_exception_table pending; 87191437a5SJon Brassow struct dm_exception_table complete; 88ccc45ea8SJonathan Brassow 89ccc45ea8SJonathan Brassow /* 90ccc45ea8SJonathan Brassow * pe_lock protects all pending_exception operations and access 91ccc45ea8SJonathan Brassow * as well as the snapshot_bios list. 92ccc45ea8SJonathan Brassow */ 93ccc45ea8SJonathan Brassow spinlock_t pe_lock; 94ccc45ea8SJonathan Brassow 95924e600dSMike Snitzer /* Chunks with outstanding reads */ 96924e600dSMike Snitzer spinlock_t tracked_chunk_lock; 97924e600dSMike Snitzer mempool_t *tracked_chunk_pool; 98924e600dSMike Snitzer struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE]; 99924e600dSMike Snitzer 100ccc45ea8SJonathan Brassow /* The on disk metadata handler */ 101ccc45ea8SJonathan Brassow struct dm_exception_store *store; 102ccc45ea8SJonathan Brassow 103ccc45ea8SJonathan Brassow struct dm_kcopyd_client *kcopyd_client; 104ccc45ea8SJonathan Brassow 105924e600dSMike Snitzer /* Wait for events based on state_bits */ 106924e600dSMike Snitzer unsigned long state_bits; 107924e600dSMike Snitzer 108924e600dSMike Snitzer /* Range of chunks currently being merged. */ 109924e600dSMike Snitzer chunk_t first_merging_chunk; 110924e600dSMike Snitzer int num_merging_chunks; 1111e03f97eSMikulas Patocka 112d8ddb1cfSMike Snitzer /* 113d8ddb1cfSMike Snitzer * The merge operation failed if this flag is set. 114d8ddb1cfSMike Snitzer * Failure modes are handled as follows: 115d8ddb1cfSMike Snitzer * - I/O error reading the header 116d8ddb1cfSMike Snitzer * => don't load the target; abort. 117d8ddb1cfSMike Snitzer * - Header does not have "valid" flag set 118d8ddb1cfSMike Snitzer * => use the origin; forget about the snapshot. 119d8ddb1cfSMike Snitzer * - I/O error when reading exceptions 120d8ddb1cfSMike Snitzer * => don't load the target; abort. 121d8ddb1cfSMike Snitzer * (We can't use the intermediate origin state.) 122d8ddb1cfSMike Snitzer * - I/O error while merging 123d8ddb1cfSMike Snitzer * => stop merging; set merge_failed; process I/O normally. 124d8ddb1cfSMike Snitzer */ 125d8ddb1cfSMike Snitzer int merge_failed; 126d8ddb1cfSMike Snitzer 1279fe86254SMikulas Patocka /* 1289fe86254SMikulas Patocka * Incoming bios that overlap with chunks being merged must wait 1299fe86254SMikulas Patocka * for them to be committed. 1309fe86254SMikulas Patocka */ 1319fe86254SMikulas Patocka struct bio_list bios_queued_during_merge; 132ccc45ea8SJonathan Brassow }; 133ccc45ea8SJonathan Brassow 1341e03f97eSMikulas Patocka /* 1351e03f97eSMikulas Patocka * state_bits: 1361e03f97eSMikulas Patocka * RUNNING_MERGE - Merge operation is in progress. 1371e03f97eSMikulas Patocka * SHUTDOWN_MERGE - Set to signal that merge needs to be stopped; 1381e03f97eSMikulas Patocka * cleared afterwards. 1391e03f97eSMikulas Patocka */ 1401e03f97eSMikulas Patocka #define RUNNING_MERGE 0 1411e03f97eSMikulas Patocka #define SHUTDOWN_MERGE 1 1421e03f97eSMikulas Patocka 143c2411045SMikulas Patocka struct dm_dev *dm_snap_origin(struct dm_snapshot *s) 144c2411045SMikulas Patocka { 145c2411045SMikulas Patocka return s->origin; 146c2411045SMikulas Patocka } 147c2411045SMikulas Patocka EXPORT_SYMBOL(dm_snap_origin); 148c2411045SMikulas Patocka 149fc56f6fbSMike Snitzer struct dm_dev *dm_snap_cow(struct dm_snapshot *s) 150fc56f6fbSMike Snitzer { 151fc56f6fbSMike Snitzer return s->cow; 152fc56f6fbSMike Snitzer } 153fc56f6fbSMike Snitzer EXPORT_SYMBOL(dm_snap_cow); 154fc56f6fbSMike Snitzer 155ccc45ea8SJonathan Brassow static sector_t chunk_to_sector(struct dm_exception_store *store, 156ccc45ea8SJonathan Brassow chunk_t chunk) 157ccc45ea8SJonathan Brassow { 158ccc45ea8SJonathan Brassow return chunk << store->chunk_shift; 159ccc45ea8SJonathan Brassow } 160ccc45ea8SJonathan Brassow 161ccc45ea8SJonathan Brassow static int bdev_equal(struct block_device *lhs, struct block_device *rhs) 162ccc45ea8SJonathan Brassow { 163ccc45ea8SJonathan Brassow /* 164ccc45ea8SJonathan Brassow * There is only ever one instance of a particular block 165ccc45ea8SJonathan Brassow * device so we can compare pointers safely. 166ccc45ea8SJonathan Brassow */ 167ccc45ea8SJonathan Brassow return lhs == rhs; 168ccc45ea8SJonathan Brassow } 169ccc45ea8SJonathan Brassow 170028867acSAlasdair G Kergon struct dm_snap_pending_exception { 1711d4989c8SJon Brassow struct dm_exception e; 1721da177e4SLinus Torvalds 1731da177e4SLinus Torvalds /* 1741da177e4SLinus Torvalds * Origin buffers waiting for this to complete are held 1751da177e4SLinus Torvalds * in a bio list 1761da177e4SLinus Torvalds */ 1771da177e4SLinus Torvalds struct bio_list origin_bios; 1781da177e4SLinus Torvalds struct bio_list snapshot_bios; 1791da177e4SLinus Torvalds 1801da177e4SLinus Torvalds /* Pointer back to snapshot context */ 1811da177e4SLinus Torvalds struct dm_snapshot *snap; 1821da177e4SLinus Torvalds 1831da177e4SLinus Torvalds /* 1841da177e4SLinus Torvalds * 1 indicates the exception has already been sent to 1851da177e4SLinus Torvalds * kcopyd. 1861da177e4SLinus Torvalds */ 1871da177e4SLinus Torvalds int started; 1881da177e4SLinus Torvalds }; 1891da177e4SLinus Torvalds 1901da177e4SLinus Torvalds /* 1911da177e4SLinus Torvalds * Hash table mapping origin volumes to lists of snapshots and 1921da177e4SLinus Torvalds * a lock to protect it 1931da177e4SLinus Torvalds */ 194e18b890bSChristoph Lameter static struct kmem_cache *exception_cache; 195e18b890bSChristoph Lameter static struct kmem_cache *pending_cache; 1961da177e4SLinus Torvalds 197cd45daffSMikulas Patocka struct dm_snap_tracked_chunk { 198cd45daffSMikulas Patocka struct hlist_node node; 199cd45daffSMikulas Patocka chunk_t chunk; 200cd45daffSMikulas Patocka }; 201cd45daffSMikulas Patocka 202cd45daffSMikulas Patocka static struct kmem_cache *tracked_chunk_cache; 203cd45daffSMikulas Patocka 204cd45daffSMikulas Patocka static struct dm_snap_tracked_chunk *track_chunk(struct dm_snapshot *s, 205cd45daffSMikulas Patocka chunk_t chunk) 206cd45daffSMikulas Patocka { 207cd45daffSMikulas Patocka struct dm_snap_tracked_chunk *c = mempool_alloc(s->tracked_chunk_pool, 208cd45daffSMikulas Patocka GFP_NOIO); 209cd45daffSMikulas Patocka unsigned long flags; 210cd45daffSMikulas Patocka 211cd45daffSMikulas Patocka c->chunk = chunk; 212cd45daffSMikulas Patocka 213cd45daffSMikulas Patocka spin_lock_irqsave(&s->tracked_chunk_lock, flags); 214cd45daffSMikulas Patocka hlist_add_head(&c->node, 215cd45daffSMikulas Patocka &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)]); 216cd45daffSMikulas Patocka spin_unlock_irqrestore(&s->tracked_chunk_lock, flags); 217cd45daffSMikulas Patocka 218cd45daffSMikulas Patocka return c; 219cd45daffSMikulas Patocka } 220cd45daffSMikulas Patocka 221cd45daffSMikulas Patocka static void stop_tracking_chunk(struct dm_snapshot *s, 222cd45daffSMikulas Patocka struct dm_snap_tracked_chunk *c) 223cd45daffSMikulas Patocka { 224cd45daffSMikulas Patocka unsigned long flags; 225cd45daffSMikulas Patocka 226cd45daffSMikulas Patocka spin_lock_irqsave(&s->tracked_chunk_lock, flags); 227cd45daffSMikulas Patocka hlist_del(&c->node); 228cd45daffSMikulas Patocka spin_unlock_irqrestore(&s->tracked_chunk_lock, flags); 229cd45daffSMikulas Patocka 230cd45daffSMikulas Patocka mempool_free(c, s->tracked_chunk_pool); 231cd45daffSMikulas Patocka } 232cd45daffSMikulas Patocka 233a8d41b59SMikulas Patocka static int __chunk_is_tracked(struct dm_snapshot *s, chunk_t chunk) 234a8d41b59SMikulas Patocka { 235a8d41b59SMikulas Patocka struct dm_snap_tracked_chunk *c; 236a8d41b59SMikulas Patocka struct hlist_node *hn; 237a8d41b59SMikulas Patocka int found = 0; 238a8d41b59SMikulas Patocka 239a8d41b59SMikulas Patocka spin_lock_irq(&s->tracked_chunk_lock); 240a8d41b59SMikulas Patocka 241a8d41b59SMikulas Patocka hlist_for_each_entry(c, hn, 242a8d41b59SMikulas Patocka &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)], node) { 243a8d41b59SMikulas Patocka if (c->chunk == chunk) { 244a8d41b59SMikulas Patocka found = 1; 245a8d41b59SMikulas Patocka break; 246a8d41b59SMikulas Patocka } 247a8d41b59SMikulas Patocka } 248a8d41b59SMikulas Patocka 249a8d41b59SMikulas Patocka spin_unlock_irq(&s->tracked_chunk_lock); 250a8d41b59SMikulas Patocka 251a8d41b59SMikulas Patocka return found; 252a8d41b59SMikulas Patocka } 253a8d41b59SMikulas Patocka 2541da177e4SLinus Torvalds /* 255615d1eb9SMike Snitzer * This conflicting I/O is extremely improbable in the caller, 256615d1eb9SMike Snitzer * so msleep(1) is sufficient and there is no need for a wait queue. 257615d1eb9SMike Snitzer */ 258615d1eb9SMike Snitzer static void __check_for_conflicting_io(struct dm_snapshot *s, chunk_t chunk) 259615d1eb9SMike Snitzer { 260615d1eb9SMike Snitzer while (__chunk_is_tracked(s, chunk)) 261615d1eb9SMike Snitzer msleep(1); 262615d1eb9SMike Snitzer } 263615d1eb9SMike Snitzer 264615d1eb9SMike Snitzer /* 2651da177e4SLinus Torvalds * One of these per registered origin, held in the snapshot_origins hash 2661da177e4SLinus Torvalds */ 2671da177e4SLinus Torvalds struct origin { 2681da177e4SLinus Torvalds /* The origin device */ 2691da177e4SLinus Torvalds struct block_device *bdev; 2701da177e4SLinus Torvalds 2711da177e4SLinus Torvalds struct list_head hash_list; 2721da177e4SLinus Torvalds 2731da177e4SLinus Torvalds /* List of snapshots for this origin */ 2741da177e4SLinus Torvalds struct list_head snapshots; 2751da177e4SLinus Torvalds }; 2761da177e4SLinus Torvalds 2771da177e4SLinus Torvalds /* 2781da177e4SLinus Torvalds * Size of the hash table for origin volumes. If we make this 2791da177e4SLinus Torvalds * the size of the minors list then it should be nearly perfect 2801da177e4SLinus Torvalds */ 2811da177e4SLinus Torvalds #define ORIGIN_HASH_SIZE 256 2821da177e4SLinus Torvalds #define ORIGIN_MASK 0xFF 2831da177e4SLinus Torvalds static struct list_head *_origins; 2841da177e4SLinus Torvalds static struct rw_semaphore _origins_lock; 2851da177e4SLinus Torvalds 28673dfd078SMikulas Patocka static DECLARE_WAIT_QUEUE_HEAD(_pending_exceptions_done); 28773dfd078SMikulas Patocka static DEFINE_SPINLOCK(_pending_exceptions_done_spinlock); 28873dfd078SMikulas Patocka static uint64_t _pending_exceptions_done_count; 28973dfd078SMikulas Patocka 2901da177e4SLinus Torvalds static int init_origin_hash(void) 2911da177e4SLinus Torvalds { 2921da177e4SLinus Torvalds int i; 2931da177e4SLinus Torvalds 2941da177e4SLinus Torvalds _origins = kmalloc(ORIGIN_HASH_SIZE * sizeof(struct list_head), 2951da177e4SLinus Torvalds GFP_KERNEL); 2961da177e4SLinus Torvalds if (!_origins) { 29772d94861SAlasdair G Kergon DMERR("unable to allocate memory"); 2981da177e4SLinus Torvalds return -ENOMEM; 2991da177e4SLinus Torvalds } 3001da177e4SLinus Torvalds 3011da177e4SLinus Torvalds for (i = 0; i < ORIGIN_HASH_SIZE; i++) 3021da177e4SLinus Torvalds INIT_LIST_HEAD(_origins + i); 3031da177e4SLinus Torvalds init_rwsem(&_origins_lock); 3041da177e4SLinus Torvalds 3051da177e4SLinus Torvalds return 0; 3061da177e4SLinus Torvalds } 3071da177e4SLinus Torvalds 3081da177e4SLinus Torvalds static void exit_origin_hash(void) 3091da177e4SLinus Torvalds { 3101da177e4SLinus Torvalds kfree(_origins); 3111da177e4SLinus Torvalds } 3121da177e4SLinus Torvalds 313028867acSAlasdair G Kergon static unsigned origin_hash(struct block_device *bdev) 3141da177e4SLinus Torvalds { 3151da177e4SLinus Torvalds return bdev->bd_dev & ORIGIN_MASK; 3161da177e4SLinus Torvalds } 3171da177e4SLinus Torvalds 3181da177e4SLinus Torvalds static struct origin *__lookup_origin(struct block_device *origin) 3191da177e4SLinus Torvalds { 3201da177e4SLinus Torvalds struct list_head *ol; 3211da177e4SLinus Torvalds struct origin *o; 3221da177e4SLinus Torvalds 3231da177e4SLinus Torvalds ol = &_origins[origin_hash(origin)]; 3241da177e4SLinus Torvalds list_for_each_entry (o, ol, hash_list) 3251da177e4SLinus Torvalds if (bdev_equal(o->bdev, origin)) 3261da177e4SLinus Torvalds return o; 3271da177e4SLinus Torvalds 3281da177e4SLinus Torvalds return NULL; 3291da177e4SLinus Torvalds } 3301da177e4SLinus Torvalds 3311da177e4SLinus Torvalds static void __insert_origin(struct origin *o) 3321da177e4SLinus Torvalds { 3331da177e4SLinus Torvalds struct list_head *sl = &_origins[origin_hash(o->bdev)]; 3341da177e4SLinus Torvalds list_add_tail(&o->hash_list, sl); 3351da177e4SLinus Torvalds } 3361da177e4SLinus Torvalds 3371da177e4SLinus Torvalds /* 338c1f0c183SMike Snitzer * _origins_lock must be held when calling this function. 339c1f0c183SMike Snitzer * Returns number of snapshots registered using the supplied cow device, plus: 340c1f0c183SMike Snitzer * snap_src - a snapshot suitable for use as a source of exception handover 341c1f0c183SMike Snitzer * snap_dest - a snapshot capable of receiving exception handover. 3429d3b15c4SMikulas Patocka * snap_merge - an existing snapshot-merge target linked to the same origin. 3439d3b15c4SMikulas Patocka * There can be at most one snapshot-merge target. The parameter is optional. 344c1f0c183SMike Snitzer * 3459d3b15c4SMikulas Patocka * Possible return values and states of snap_src and snap_dest. 346c1f0c183SMike Snitzer * 0: NULL, NULL - first new snapshot 347c1f0c183SMike Snitzer * 1: snap_src, NULL - normal snapshot 348c1f0c183SMike Snitzer * 2: snap_src, snap_dest - waiting for handover 349c1f0c183SMike Snitzer * 2: snap_src, NULL - handed over, waiting for old to be deleted 350c1f0c183SMike Snitzer * 1: NULL, snap_dest - source got destroyed without handover 351c1f0c183SMike Snitzer */ 352c1f0c183SMike Snitzer static int __find_snapshots_sharing_cow(struct dm_snapshot *snap, 353c1f0c183SMike Snitzer struct dm_snapshot **snap_src, 3549d3b15c4SMikulas Patocka struct dm_snapshot **snap_dest, 3559d3b15c4SMikulas Patocka struct dm_snapshot **snap_merge) 356c1f0c183SMike Snitzer { 357c1f0c183SMike Snitzer struct dm_snapshot *s; 358c1f0c183SMike Snitzer struct origin *o; 359c1f0c183SMike Snitzer int count = 0; 360c1f0c183SMike Snitzer int active; 361c1f0c183SMike Snitzer 362c1f0c183SMike Snitzer o = __lookup_origin(snap->origin->bdev); 363c1f0c183SMike Snitzer if (!o) 364c1f0c183SMike Snitzer goto out; 365c1f0c183SMike Snitzer 366c1f0c183SMike Snitzer list_for_each_entry(s, &o->snapshots, list) { 3679d3b15c4SMikulas Patocka if (dm_target_is_snapshot_merge(s->ti) && snap_merge) 3689d3b15c4SMikulas Patocka *snap_merge = s; 369c1f0c183SMike Snitzer if (!bdev_equal(s->cow->bdev, snap->cow->bdev)) 370c1f0c183SMike Snitzer continue; 371c1f0c183SMike Snitzer 372c1f0c183SMike Snitzer down_read(&s->lock); 373c1f0c183SMike Snitzer active = s->active; 374c1f0c183SMike Snitzer up_read(&s->lock); 375c1f0c183SMike Snitzer 376c1f0c183SMike Snitzer if (active) { 377c1f0c183SMike Snitzer if (snap_src) 378c1f0c183SMike Snitzer *snap_src = s; 379c1f0c183SMike Snitzer } else if (snap_dest) 380c1f0c183SMike Snitzer *snap_dest = s; 381c1f0c183SMike Snitzer 382c1f0c183SMike Snitzer count++; 383c1f0c183SMike Snitzer } 384c1f0c183SMike Snitzer 385c1f0c183SMike Snitzer out: 386c1f0c183SMike Snitzer return count; 387c1f0c183SMike Snitzer } 388c1f0c183SMike Snitzer 389c1f0c183SMike Snitzer /* 390c1f0c183SMike Snitzer * On success, returns 1 if this snapshot is a handover destination, 391c1f0c183SMike Snitzer * otherwise returns 0. 392c1f0c183SMike Snitzer */ 393c1f0c183SMike Snitzer static int __validate_exception_handover(struct dm_snapshot *snap) 394c1f0c183SMike Snitzer { 395c1f0c183SMike Snitzer struct dm_snapshot *snap_src = NULL, *snap_dest = NULL; 3969d3b15c4SMikulas Patocka struct dm_snapshot *snap_merge = NULL; 397c1f0c183SMike Snitzer 398c1f0c183SMike Snitzer /* Does snapshot need exceptions handed over to it? */ 3999d3b15c4SMikulas Patocka if ((__find_snapshots_sharing_cow(snap, &snap_src, &snap_dest, 4009d3b15c4SMikulas Patocka &snap_merge) == 2) || 401c1f0c183SMike Snitzer snap_dest) { 402c1f0c183SMike Snitzer snap->ti->error = "Snapshot cow pairing for exception " 403c1f0c183SMike Snitzer "table handover failed"; 404c1f0c183SMike Snitzer return -EINVAL; 405c1f0c183SMike Snitzer } 406c1f0c183SMike Snitzer 407c1f0c183SMike Snitzer /* 408c1f0c183SMike Snitzer * If no snap_src was found, snap cannot become a handover 409c1f0c183SMike Snitzer * destination. 410c1f0c183SMike Snitzer */ 411c1f0c183SMike Snitzer if (!snap_src) 412c1f0c183SMike Snitzer return 0; 413c1f0c183SMike Snitzer 4149d3b15c4SMikulas Patocka /* 4159d3b15c4SMikulas Patocka * Non-snapshot-merge handover? 4169d3b15c4SMikulas Patocka */ 4179d3b15c4SMikulas Patocka if (!dm_target_is_snapshot_merge(snap->ti)) 4189d3b15c4SMikulas Patocka return 1; 4199d3b15c4SMikulas Patocka 4209d3b15c4SMikulas Patocka /* 4219d3b15c4SMikulas Patocka * Do not allow more than one merging snapshot. 4229d3b15c4SMikulas Patocka */ 4239d3b15c4SMikulas Patocka if (snap_merge) { 4249d3b15c4SMikulas Patocka snap->ti->error = "A snapshot is already merging."; 4259d3b15c4SMikulas Patocka return -EINVAL; 4269d3b15c4SMikulas Patocka } 4279d3b15c4SMikulas Patocka 4281e03f97eSMikulas Patocka if (!snap_src->store->type->prepare_merge || 4291e03f97eSMikulas Patocka !snap_src->store->type->commit_merge) { 4301e03f97eSMikulas Patocka snap->ti->error = "Snapshot exception store does not " 4311e03f97eSMikulas Patocka "support snapshot-merge."; 4321e03f97eSMikulas Patocka return -EINVAL; 4331e03f97eSMikulas Patocka } 4341e03f97eSMikulas Patocka 435c1f0c183SMike Snitzer return 1; 436c1f0c183SMike Snitzer } 437c1f0c183SMike Snitzer 438c1f0c183SMike Snitzer static void __insert_snapshot(struct origin *o, struct dm_snapshot *s) 439c1f0c183SMike Snitzer { 440c1f0c183SMike Snitzer struct dm_snapshot *l; 441c1f0c183SMike Snitzer 442c1f0c183SMike Snitzer /* Sort the list according to chunk size, largest-first smallest-last */ 443c1f0c183SMike Snitzer list_for_each_entry(l, &o->snapshots, list) 444c1f0c183SMike Snitzer if (l->store->chunk_size < s->store->chunk_size) 445c1f0c183SMike Snitzer break; 446c1f0c183SMike Snitzer list_add_tail(&s->list, &l->list); 447c1f0c183SMike Snitzer } 448c1f0c183SMike Snitzer 449c1f0c183SMike Snitzer /* 4501da177e4SLinus Torvalds * Make a note of the snapshot and its origin so we can look it 4511da177e4SLinus Torvalds * up when the origin has a write on it. 452c1f0c183SMike Snitzer * 453c1f0c183SMike Snitzer * Also validate snapshot exception store handovers. 454c1f0c183SMike Snitzer * On success, returns 1 if this registration is a handover destination, 455c1f0c183SMike Snitzer * otherwise returns 0. 4561da177e4SLinus Torvalds */ 4571da177e4SLinus Torvalds static int register_snapshot(struct dm_snapshot *snap) 4581da177e4SLinus Torvalds { 459c1f0c183SMike Snitzer struct origin *o, *new_o = NULL; 4601da177e4SLinus Torvalds struct block_device *bdev = snap->origin->bdev; 461c1f0c183SMike Snitzer int r = 0; 4621da177e4SLinus Torvalds 46360c856c8SMikulas Patocka new_o = kmalloc(sizeof(*new_o), GFP_KERNEL); 46460c856c8SMikulas Patocka if (!new_o) 46560c856c8SMikulas Patocka return -ENOMEM; 46660c856c8SMikulas Patocka 4671da177e4SLinus Torvalds down_write(&_origins_lock); 4681da177e4SLinus Torvalds 469c1f0c183SMike Snitzer r = __validate_exception_handover(snap); 470c1f0c183SMike Snitzer if (r < 0) { 471c1f0c183SMike Snitzer kfree(new_o); 472c1f0c183SMike Snitzer goto out; 473c1f0c183SMike Snitzer } 474c1f0c183SMike Snitzer 475c1f0c183SMike Snitzer o = __lookup_origin(bdev); 47660c856c8SMikulas Patocka if (o) 47760c856c8SMikulas Patocka kfree(new_o); 47860c856c8SMikulas Patocka else { 4791da177e4SLinus Torvalds /* New origin */ 48060c856c8SMikulas Patocka o = new_o; 4811da177e4SLinus Torvalds 4821da177e4SLinus Torvalds /* Initialise the struct */ 4831da177e4SLinus Torvalds INIT_LIST_HEAD(&o->snapshots); 4841da177e4SLinus Torvalds o->bdev = bdev; 4851da177e4SLinus Torvalds 4861da177e4SLinus Torvalds __insert_origin(o); 4871da177e4SLinus Torvalds } 4881da177e4SLinus Torvalds 489c1f0c183SMike Snitzer __insert_snapshot(o, snap); 490c1f0c183SMike Snitzer 491c1f0c183SMike Snitzer out: 492c1f0c183SMike Snitzer up_write(&_origins_lock); 493c1f0c183SMike Snitzer 494c1f0c183SMike Snitzer return r; 495c1f0c183SMike Snitzer } 496c1f0c183SMike Snitzer 497c1f0c183SMike Snitzer /* 498c1f0c183SMike Snitzer * Move snapshot to correct place in list according to chunk size. 499c1f0c183SMike Snitzer */ 500c1f0c183SMike Snitzer static void reregister_snapshot(struct dm_snapshot *s) 501c1f0c183SMike Snitzer { 502c1f0c183SMike Snitzer struct block_device *bdev = s->origin->bdev; 503c1f0c183SMike Snitzer 504c1f0c183SMike Snitzer down_write(&_origins_lock); 505c1f0c183SMike Snitzer 506c1f0c183SMike Snitzer list_del(&s->list); 507c1f0c183SMike Snitzer __insert_snapshot(__lookup_origin(bdev), s); 5081da177e4SLinus Torvalds 5091da177e4SLinus Torvalds up_write(&_origins_lock); 5101da177e4SLinus Torvalds } 5111da177e4SLinus Torvalds 5121da177e4SLinus Torvalds static void unregister_snapshot(struct dm_snapshot *s) 5131da177e4SLinus Torvalds { 5141da177e4SLinus Torvalds struct origin *o; 5151da177e4SLinus Torvalds 5161da177e4SLinus Torvalds down_write(&_origins_lock); 5171da177e4SLinus Torvalds o = __lookup_origin(s->origin->bdev); 5181da177e4SLinus Torvalds 5191da177e4SLinus Torvalds list_del(&s->list); 520c1f0c183SMike Snitzer if (o && list_empty(&o->snapshots)) { 5211da177e4SLinus Torvalds list_del(&o->hash_list); 5221da177e4SLinus Torvalds kfree(o); 5231da177e4SLinus Torvalds } 5241da177e4SLinus Torvalds 5251da177e4SLinus Torvalds up_write(&_origins_lock); 5261da177e4SLinus Torvalds } 5271da177e4SLinus Torvalds 5281da177e4SLinus Torvalds /* 5291da177e4SLinus Torvalds * Implementation of the exception hash tables. 530d74f81f8SMilan Broz * The lowest hash_shift bits of the chunk number are ignored, allowing 531d74f81f8SMilan Broz * some consecutive chunks to be grouped together. 5321da177e4SLinus Torvalds */ 5333510cb94SJon Brassow static int dm_exception_table_init(struct dm_exception_table *et, 5343510cb94SJon Brassow uint32_t size, unsigned hash_shift) 5351da177e4SLinus Torvalds { 5361da177e4SLinus Torvalds unsigned int i; 5371da177e4SLinus Torvalds 538d74f81f8SMilan Broz et->hash_shift = hash_shift; 5391da177e4SLinus Torvalds et->hash_mask = size - 1; 5401da177e4SLinus Torvalds et->table = dm_vcalloc(size, sizeof(struct list_head)); 5411da177e4SLinus Torvalds if (!et->table) 5421da177e4SLinus Torvalds return -ENOMEM; 5431da177e4SLinus Torvalds 5441da177e4SLinus Torvalds for (i = 0; i < size; i++) 5451da177e4SLinus Torvalds INIT_LIST_HEAD(et->table + i); 5461da177e4SLinus Torvalds 5471da177e4SLinus Torvalds return 0; 5481da177e4SLinus Torvalds } 5491da177e4SLinus Torvalds 5503510cb94SJon Brassow static void dm_exception_table_exit(struct dm_exception_table *et, 551191437a5SJon Brassow struct kmem_cache *mem) 5521da177e4SLinus Torvalds { 5531da177e4SLinus Torvalds struct list_head *slot; 5541d4989c8SJon Brassow struct dm_exception *ex, *next; 5551da177e4SLinus Torvalds int i, size; 5561da177e4SLinus Torvalds 5571da177e4SLinus Torvalds size = et->hash_mask + 1; 5581da177e4SLinus Torvalds for (i = 0; i < size; i++) { 5591da177e4SLinus Torvalds slot = et->table + i; 5601da177e4SLinus Torvalds 5611da177e4SLinus Torvalds list_for_each_entry_safe (ex, next, slot, hash_list) 5621da177e4SLinus Torvalds kmem_cache_free(mem, ex); 5631da177e4SLinus Torvalds } 5641da177e4SLinus Torvalds 5651da177e4SLinus Torvalds vfree(et->table); 5661da177e4SLinus Torvalds } 5671da177e4SLinus Torvalds 568191437a5SJon Brassow static uint32_t exception_hash(struct dm_exception_table *et, chunk_t chunk) 5691da177e4SLinus Torvalds { 570d74f81f8SMilan Broz return (chunk >> et->hash_shift) & et->hash_mask; 5711da177e4SLinus Torvalds } 5721da177e4SLinus Torvalds 5733510cb94SJon Brassow static void dm_remove_exception(struct dm_exception *e) 5741da177e4SLinus Torvalds { 5751da177e4SLinus Torvalds list_del(&e->hash_list); 5761da177e4SLinus Torvalds } 5771da177e4SLinus Torvalds 5781da177e4SLinus Torvalds /* 5791da177e4SLinus Torvalds * Return the exception data for a sector, or NULL if not 5801da177e4SLinus Torvalds * remapped. 5811da177e4SLinus Torvalds */ 5823510cb94SJon Brassow static struct dm_exception *dm_lookup_exception(struct dm_exception_table *et, 5831da177e4SLinus Torvalds chunk_t chunk) 5841da177e4SLinus Torvalds { 5851da177e4SLinus Torvalds struct list_head *slot; 5861d4989c8SJon Brassow struct dm_exception *e; 5871da177e4SLinus Torvalds 5881da177e4SLinus Torvalds slot = &et->table[exception_hash(et, chunk)]; 5891da177e4SLinus Torvalds list_for_each_entry (e, slot, hash_list) 590d74f81f8SMilan Broz if (chunk >= e->old_chunk && 591d74f81f8SMilan Broz chunk <= e->old_chunk + dm_consecutive_chunk_count(e)) 5921da177e4SLinus Torvalds return e; 5931da177e4SLinus Torvalds 5941da177e4SLinus Torvalds return NULL; 5951da177e4SLinus Torvalds } 5961da177e4SLinus Torvalds 5973510cb94SJon Brassow static struct dm_exception *alloc_completed_exception(void) 5981da177e4SLinus Torvalds { 5991d4989c8SJon Brassow struct dm_exception *e; 6001da177e4SLinus Torvalds 6011da177e4SLinus Torvalds e = kmem_cache_alloc(exception_cache, GFP_NOIO); 6021da177e4SLinus Torvalds if (!e) 6031da177e4SLinus Torvalds e = kmem_cache_alloc(exception_cache, GFP_ATOMIC); 6041da177e4SLinus Torvalds 6051da177e4SLinus Torvalds return e; 6061da177e4SLinus Torvalds } 6071da177e4SLinus Torvalds 6083510cb94SJon Brassow static void free_completed_exception(struct dm_exception *e) 6091da177e4SLinus Torvalds { 6101da177e4SLinus Torvalds kmem_cache_free(exception_cache, e); 6111da177e4SLinus Torvalds } 6121da177e4SLinus Torvalds 61392e86812SMikulas Patocka static struct dm_snap_pending_exception *alloc_pending_exception(struct dm_snapshot *s) 6141da177e4SLinus Torvalds { 61592e86812SMikulas Patocka struct dm_snap_pending_exception *pe = mempool_alloc(s->pending_pool, 61692e86812SMikulas Patocka GFP_NOIO); 61792e86812SMikulas Patocka 618879129d2SMikulas Patocka atomic_inc(&s->pending_exceptions_count); 61992e86812SMikulas Patocka pe->snap = s; 62092e86812SMikulas Patocka 62192e86812SMikulas Patocka return pe; 6221da177e4SLinus Torvalds } 6231da177e4SLinus Torvalds 624028867acSAlasdair G Kergon static void free_pending_exception(struct dm_snap_pending_exception *pe) 6251da177e4SLinus Torvalds { 626879129d2SMikulas Patocka struct dm_snapshot *s = pe->snap; 627879129d2SMikulas Patocka 628879129d2SMikulas Patocka mempool_free(pe, s->pending_pool); 629879129d2SMikulas Patocka smp_mb__before_atomic_dec(); 630879129d2SMikulas Patocka atomic_dec(&s->pending_exceptions_count); 6311da177e4SLinus Torvalds } 6321da177e4SLinus Torvalds 6333510cb94SJon Brassow static void dm_insert_exception(struct dm_exception_table *eh, 6341d4989c8SJon Brassow struct dm_exception *new_e) 635d74f81f8SMilan Broz { 636d74f81f8SMilan Broz struct list_head *l; 6371d4989c8SJon Brassow struct dm_exception *e = NULL; 638d74f81f8SMilan Broz 639d74f81f8SMilan Broz l = &eh->table[exception_hash(eh, new_e->old_chunk)]; 640d74f81f8SMilan Broz 641d74f81f8SMilan Broz /* Add immediately if this table doesn't support consecutive chunks */ 642d74f81f8SMilan Broz if (!eh->hash_shift) 643d74f81f8SMilan Broz goto out; 644d74f81f8SMilan Broz 645d74f81f8SMilan Broz /* List is ordered by old_chunk */ 646d74f81f8SMilan Broz list_for_each_entry_reverse(e, l, hash_list) { 647d74f81f8SMilan Broz /* Insert after an existing chunk? */ 648d74f81f8SMilan Broz if (new_e->old_chunk == (e->old_chunk + 649d74f81f8SMilan Broz dm_consecutive_chunk_count(e) + 1) && 650d74f81f8SMilan Broz new_e->new_chunk == (dm_chunk_number(e->new_chunk) + 651d74f81f8SMilan Broz dm_consecutive_chunk_count(e) + 1)) { 652d74f81f8SMilan Broz dm_consecutive_chunk_count_inc(e); 6533510cb94SJon Brassow free_completed_exception(new_e); 654d74f81f8SMilan Broz return; 655d74f81f8SMilan Broz } 656d74f81f8SMilan Broz 657d74f81f8SMilan Broz /* Insert before an existing chunk? */ 658d74f81f8SMilan Broz if (new_e->old_chunk == (e->old_chunk - 1) && 659d74f81f8SMilan Broz new_e->new_chunk == (dm_chunk_number(e->new_chunk) - 1)) { 660d74f81f8SMilan Broz dm_consecutive_chunk_count_inc(e); 661d74f81f8SMilan Broz e->old_chunk--; 662d74f81f8SMilan Broz e->new_chunk--; 6633510cb94SJon Brassow free_completed_exception(new_e); 664d74f81f8SMilan Broz return; 665d74f81f8SMilan Broz } 666d74f81f8SMilan Broz 667d74f81f8SMilan Broz if (new_e->old_chunk > e->old_chunk) 668d74f81f8SMilan Broz break; 669d74f81f8SMilan Broz } 670d74f81f8SMilan Broz 671d74f81f8SMilan Broz out: 672d74f81f8SMilan Broz list_add(&new_e->hash_list, e ? &e->hash_list : l); 673d74f81f8SMilan Broz } 674d74f81f8SMilan Broz 675a159c1acSJonathan Brassow /* 676a159c1acSJonathan Brassow * Callback used by the exception stores to load exceptions when 677a159c1acSJonathan Brassow * initialising. 678a159c1acSJonathan Brassow */ 679a159c1acSJonathan Brassow static int dm_add_exception(void *context, chunk_t old, chunk_t new) 6801da177e4SLinus Torvalds { 681a159c1acSJonathan Brassow struct dm_snapshot *s = context; 6821d4989c8SJon Brassow struct dm_exception *e; 6831da177e4SLinus Torvalds 6843510cb94SJon Brassow e = alloc_completed_exception(); 6851da177e4SLinus Torvalds if (!e) 6861da177e4SLinus Torvalds return -ENOMEM; 6871da177e4SLinus Torvalds 6881da177e4SLinus Torvalds e->old_chunk = old; 689d74f81f8SMilan Broz 690d74f81f8SMilan Broz /* Consecutive_count is implicitly initialised to zero */ 6911da177e4SLinus Torvalds e->new_chunk = new; 692d74f81f8SMilan Broz 6933510cb94SJon Brassow dm_insert_exception(&s->complete, e); 694d74f81f8SMilan Broz 6951da177e4SLinus Torvalds return 0; 6961da177e4SLinus Torvalds } 6971da177e4SLinus Torvalds 6987e201b35SMikulas Patocka /* 6997e201b35SMikulas Patocka * Return a minimum chunk size of all snapshots that have the specified origin. 7007e201b35SMikulas Patocka * Return zero if the origin has no snapshots. 7017e201b35SMikulas Patocka */ 7027e201b35SMikulas Patocka static sector_t __minimum_chunk_size(struct origin *o) 7037e201b35SMikulas Patocka { 7047e201b35SMikulas Patocka struct dm_snapshot *snap; 7057e201b35SMikulas Patocka unsigned chunk_size = 0; 7067e201b35SMikulas Patocka 7077e201b35SMikulas Patocka if (o) 7087e201b35SMikulas Patocka list_for_each_entry(snap, &o->snapshots, list) 7097e201b35SMikulas Patocka chunk_size = min_not_zero(chunk_size, 7107e201b35SMikulas Patocka snap->store->chunk_size); 7117e201b35SMikulas Patocka 7127e201b35SMikulas Patocka return chunk_size; 7137e201b35SMikulas Patocka } 7147e201b35SMikulas Patocka 7151da177e4SLinus Torvalds /* 7161da177e4SLinus Torvalds * Hard coded magic. 7171da177e4SLinus Torvalds */ 7181da177e4SLinus Torvalds static int calc_max_buckets(void) 7191da177e4SLinus Torvalds { 7201da177e4SLinus Torvalds /* use a fixed size of 2MB */ 7211da177e4SLinus Torvalds unsigned long mem = 2 * 1024 * 1024; 7221da177e4SLinus Torvalds mem /= sizeof(struct list_head); 7231da177e4SLinus Torvalds 7241da177e4SLinus Torvalds return mem; 7251da177e4SLinus Torvalds } 7261da177e4SLinus Torvalds 7271da177e4SLinus Torvalds /* 7281da177e4SLinus Torvalds * Allocate room for a suitable hash table. 7291da177e4SLinus Torvalds */ 730fee1998eSJonathan Brassow static int init_hash_tables(struct dm_snapshot *s) 7311da177e4SLinus Torvalds { 7321da177e4SLinus Torvalds sector_t hash_size, cow_dev_size, origin_dev_size, max_buckets; 7331da177e4SLinus Torvalds 7341da177e4SLinus Torvalds /* 7351da177e4SLinus Torvalds * Calculate based on the size of the original volume or 7361da177e4SLinus Torvalds * the COW volume... 7371da177e4SLinus Torvalds */ 738fc56f6fbSMike Snitzer cow_dev_size = get_dev_size(s->cow->bdev); 7391da177e4SLinus Torvalds origin_dev_size = get_dev_size(s->origin->bdev); 7401da177e4SLinus Torvalds max_buckets = calc_max_buckets(); 7411da177e4SLinus Torvalds 742fee1998eSJonathan Brassow hash_size = min(origin_dev_size, cow_dev_size) >> s->store->chunk_shift; 7431da177e4SLinus Torvalds hash_size = min(hash_size, max_buckets); 7441da177e4SLinus Torvalds 7458e87b9b8SMikulas Patocka if (hash_size < 64) 7468e87b9b8SMikulas Patocka hash_size = 64; 7478defd830SRobert P. J. Day hash_size = rounddown_pow_of_two(hash_size); 7483510cb94SJon Brassow if (dm_exception_table_init(&s->complete, hash_size, 749d74f81f8SMilan Broz DM_CHUNK_CONSECUTIVE_BITS)) 7501da177e4SLinus Torvalds return -ENOMEM; 7511da177e4SLinus Torvalds 7521da177e4SLinus Torvalds /* 7531da177e4SLinus Torvalds * Allocate hash table for in-flight exceptions 7541da177e4SLinus Torvalds * Make this smaller than the real hash table 7551da177e4SLinus Torvalds */ 7561da177e4SLinus Torvalds hash_size >>= 3; 7571da177e4SLinus Torvalds if (hash_size < 64) 7581da177e4SLinus Torvalds hash_size = 64; 7591da177e4SLinus Torvalds 7603510cb94SJon Brassow if (dm_exception_table_init(&s->pending, hash_size, 0)) { 7613510cb94SJon Brassow dm_exception_table_exit(&s->complete, exception_cache); 7621da177e4SLinus Torvalds return -ENOMEM; 7631da177e4SLinus Torvalds } 7641da177e4SLinus Torvalds 7651da177e4SLinus Torvalds return 0; 7661da177e4SLinus Torvalds } 7671da177e4SLinus Torvalds 7681e03f97eSMikulas Patocka static void merge_shutdown(struct dm_snapshot *s) 7691e03f97eSMikulas Patocka { 7701e03f97eSMikulas Patocka clear_bit_unlock(RUNNING_MERGE, &s->state_bits); 7711e03f97eSMikulas Patocka smp_mb__after_clear_bit(); 7721e03f97eSMikulas Patocka wake_up_bit(&s->state_bits, RUNNING_MERGE); 7731e03f97eSMikulas Patocka } 7741e03f97eSMikulas Patocka 7759fe86254SMikulas Patocka static struct bio *__release_queued_bios_after_merge(struct dm_snapshot *s) 7769fe86254SMikulas Patocka { 7779fe86254SMikulas Patocka s->first_merging_chunk = 0; 7789fe86254SMikulas Patocka s->num_merging_chunks = 0; 7799fe86254SMikulas Patocka 7809fe86254SMikulas Patocka return bio_list_get(&s->bios_queued_during_merge); 7819fe86254SMikulas Patocka } 7829fe86254SMikulas Patocka 7831e03f97eSMikulas Patocka /* 7841e03f97eSMikulas Patocka * Remove one chunk from the index of completed exceptions. 7851e03f97eSMikulas Patocka */ 7861e03f97eSMikulas Patocka static int __remove_single_exception_chunk(struct dm_snapshot *s, 7871e03f97eSMikulas Patocka chunk_t old_chunk) 7881e03f97eSMikulas Patocka { 7891e03f97eSMikulas Patocka struct dm_exception *e; 7901e03f97eSMikulas Patocka 7911e03f97eSMikulas Patocka e = dm_lookup_exception(&s->complete, old_chunk); 7921e03f97eSMikulas Patocka if (!e) { 7931e03f97eSMikulas Patocka DMERR("Corruption detected: exception for block %llu is " 7941e03f97eSMikulas Patocka "on disk but not in memory", 7951e03f97eSMikulas Patocka (unsigned long long)old_chunk); 7961e03f97eSMikulas Patocka return -EINVAL; 7971e03f97eSMikulas Patocka } 7981e03f97eSMikulas Patocka 7991e03f97eSMikulas Patocka /* 8001e03f97eSMikulas Patocka * If this is the only chunk using this exception, remove exception. 8011e03f97eSMikulas Patocka */ 8021e03f97eSMikulas Patocka if (!dm_consecutive_chunk_count(e)) { 8031e03f97eSMikulas Patocka dm_remove_exception(e); 8041e03f97eSMikulas Patocka free_completed_exception(e); 8051e03f97eSMikulas Patocka return 0; 8061e03f97eSMikulas Patocka } 8071e03f97eSMikulas Patocka 8081e03f97eSMikulas Patocka /* 8091e03f97eSMikulas Patocka * The chunk may be either at the beginning or the end of a 8101e03f97eSMikulas Patocka * group of consecutive chunks - never in the middle. We are 8111e03f97eSMikulas Patocka * removing chunks in the opposite order to that in which they 8121e03f97eSMikulas Patocka * were added, so this should always be true. 8131e03f97eSMikulas Patocka * Decrement the consecutive chunk counter and adjust the 8141e03f97eSMikulas Patocka * starting point if necessary. 8151e03f97eSMikulas Patocka */ 8161e03f97eSMikulas Patocka if (old_chunk == e->old_chunk) { 8171e03f97eSMikulas Patocka e->old_chunk++; 8181e03f97eSMikulas Patocka e->new_chunk++; 8191e03f97eSMikulas Patocka } else if (old_chunk != e->old_chunk + 8201e03f97eSMikulas Patocka dm_consecutive_chunk_count(e)) { 8211e03f97eSMikulas Patocka DMERR("Attempt to merge block %llu from the " 8221e03f97eSMikulas Patocka "middle of a chunk range [%llu - %llu]", 8231e03f97eSMikulas Patocka (unsigned long long)old_chunk, 8241e03f97eSMikulas Patocka (unsigned long long)e->old_chunk, 8251e03f97eSMikulas Patocka (unsigned long long) 8261e03f97eSMikulas Patocka e->old_chunk + dm_consecutive_chunk_count(e)); 8271e03f97eSMikulas Patocka return -EINVAL; 8281e03f97eSMikulas Patocka } 8291e03f97eSMikulas Patocka 8301e03f97eSMikulas Patocka dm_consecutive_chunk_count_dec(e); 8311e03f97eSMikulas Patocka 8321e03f97eSMikulas Patocka return 0; 8331e03f97eSMikulas Patocka } 8341e03f97eSMikulas Patocka 8359fe86254SMikulas Patocka static void flush_bios(struct bio *bio); 8369fe86254SMikulas Patocka 8379fe86254SMikulas Patocka static int remove_single_exception_chunk(struct dm_snapshot *s) 8381e03f97eSMikulas Patocka { 8399fe86254SMikulas Patocka struct bio *b = NULL; 8409fe86254SMikulas Patocka int r; 8419fe86254SMikulas Patocka chunk_t old_chunk = s->first_merging_chunk + s->num_merging_chunks - 1; 8421e03f97eSMikulas Patocka 8431e03f97eSMikulas Patocka down_write(&s->lock); 8449fe86254SMikulas Patocka 8459fe86254SMikulas Patocka /* 8469fe86254SMikulas Patocka * Process chunks (and associated exceptions) in reverse order 8479fe86254SMikulas Patocka * so that dm_consecutive_chunk_count_dec() accounting works. 8489fe86254SMikulas Patocka */ 8499fe86254SMikulas Patocka do { 8501e03f97eSMikulas Patocka r = __remove_single_exception_chunk(s, old_chunk); 8519fe86254SMikulas Patocka if (r) 8529fe86254SMikulas Patocka goto out; 8539fe86254SMikulas Patocka } while (old_chunk-- > s->first_merging_chunk); 8549fe86254SMikulas Patocka 8559fe86254SMikulas Patocka b = __release_queued_bios_after_merge(s); 8569fe86254SMikulas Patocka 8579fe86254SMikulas Patocka out: 8581e03f97eSMikulas Patocka up_write(&s->lock); 8599fe86254SMikulas Patocka if (b) 8609fe86254SMikulas Patocka flush_bios(b); 8611e03f97eSMikulas Patocka 8621e03f97eSMikulas Patocka return r; 8631e03f97eSMikulas Patocka } 8641e03f97eSMikulas Patocka 86573dfd078SMikulas Patocka static int origin_write_extent(struct dm_snapshot *merging_snap, 86673dfd078SMikulas Patocka sector_t sector, unsigned chunk_size); 86773dfd078SMikulas Patocka 8681e03f97eSMikulas Patocka static void merge_callback(int read_err, unsigned long write_err, 8691e03f97eSMikulas Patocka void *context); 8701e03f97eSMikulas Patocka 87173dfd078SMikulas Patocka static uint64_t read_pending_exceptions_done_count(void) 87273dfd078SMikulas Patocka { 87373dfd078SMikulas Patocka uint64_t pending_exceptions_done; 87473dfd078SMikulas Patocka 87573dfd078SMikulas Patocka spin_lock(&_pending_exceptions_done_spinlock); 87673dfd078SMikulas Patocka pending_exceptions_done = _pending_exceptions_done_count; 87773dfd078SMikulas Patocka spin_unlock(&_pending_exceptions_done_spinlock); 87873dfd078SMikulas Patocka 87973dfd078SMikulas Patocka return pending_exceptions_done; 88073dfd078SMikulas Patocka } 88173dfd078SMikulas Patocka 88273dfd078SMikulas Patocka static void increment_pending_exceptions_done_count(void) 88373dfd078SMikulas Patocka { 88473dfd078SMikulas Patocka spin_lock(&_pending_exceptions_done_spinlock); 88573dfd078SMikulas Patocka _pending_exceptions_done_count++; 88673dfd078SMikulas Patocka spin_unlock(&_pending_exceptions_done_spinlock); 88773dfd078SMikulas Patocka 88873dfd078SMikulas Patocka wake_up_all(&_pending_exceptions_done); 88973dfd078SMikulas Patocka } 89073dfd078SMikulas Patocka 8911e03f97eSMikulas Patocka static void snapshot_merge_next_chunks(struct dm_snapshot *s) 8921e03f97eSMikulas Patocka { 8938a2d5286SMike Snitzer int i, linear_chunks; 8941e03f97eSMikulas Patocka chunk_t old_chunk, new_chunk; 8951e03f97eSMikulas Patocka struct dm_io_region src, dest; 8968a2d5286SMike Snitzer sector_t io_size; 89773dfd078SMikulas Patocka uint64_t previous_count; 8981e03f97eSMikulas Patocka 8991e03f97eSMikulas Patocka BUG_ON(!test_bit(RUNNING_MERGE, &s->state_bits)); 9001e03f97eSMikulas Patocka if (unlikely(test_bit(SHUTDOWN_MERGE, &s->state_bits))) 9011e03f97eSMikulas Patocka goto shut; 9021e03f97eSMikulas Patocka 9031e03f97eSMikulas Patocka /* 9041e03f97eSMikulas Patocka * valid flag never changes during merge, so no lock required. 9051e03f97eSMikulas Patocka */ 9061e03f97eSMikulas Patocka if (!s->valid) { 9071e03f97eSMikulas Patocka DMERR("Snapshot is invalid: can't merge"); 9081e03f97eSMikulas Patocka goto shut; 9091e03f97eSMikulas Patocka } 9101e03f97eSMikulas Patocka 9118a2d5286SMike Snitzer linear_chunks = s->store->type->prepare_merge(s->store, &old_chunk, 9128a2d5286SMike Snitzer &new_chunk); 9138a2d5286SMike Snitzer if (linear_chunks <= 0) { 914d8ddb1cfSMike Snitzer if (linear_chunks < 0) { 9151e03f97eSMikulas Patocka DMERR("Read error in exception store: " 9161e03f97eSMikulas Patocka "shutting down merge"); 917d8ddb1cfSMike Snitzer down_write(&s->lock); 918d8ddb1cfSMike Snitzer s->merge_failed = 1; 919d8ddb1cfSMike Snitzer up_write(&s->lock); 920d8ddb1cfSMike Snitzer } 9211e03f97eSMikulas Patocka goto shut; 9221e03f97eSMikulas Patocka } 9231e03f97eSMikulas Patocka 9248a2d5286SMike Snitzer /* Adjust old_chunk and new_chunk to reflect start of linear region */ 9258a2d5286SMike Snitzer old_chunk = old_chunk + 1 - linear_chunks; 9268a2d5286SMike Snitzer new_chunk = new_chunk + 1 - linear_chunks; 9278a2d5286SMike Snitzer 9288a2d5286SMike Snitzer /* 9298a2d5286SMike Snitzer * Use one (potentially large) I/O to copy all 'linear_chunks' 9308a2d5286SMike Snitzer * from the exception store to the origin 9318a2d5286SMike Snitzer */ 9328a2d5286SMike Snitzer io_size = linear_chunks * s->store->chunk_size; 9331e03f97eSMikulas Patocka 9341e03f97eSMikulas Patocka dest.bdev = s->origin->bdev; 9351e03f97eSMikulas Patocka dest.sector = chunk_to_sector(s->store, old_chunk); 9368a2d5286SMike Snitzer dest.count = min(io_size, get_dev_size(dest.bdev) - dest.sector); 9371e03f97eSMikulas Patocka 9381e03f97eSMikulas Patocka src.bdev = s->cow->bdev; 9391e03f97eSMikulas Patocka src.sector = chunk_to_sector(s->store, new_chunk); 9401e03f97eSMikulas Patocka src.count = dest.count; 9411e03f97eSMikulas Patocka 94273dfd078SMikulas Patocka /* 94373dfd078SMikulas Patocka * Reallocate any exceptions needed in other snapshots then 94473dfd078SMikulas Patocka * wait for the pending exceptions to complete. 94573dfd078SMikulas Patocka * Each time any pending exception (globally on the system) 94673dfd078SMikulas Patocka * completes we are woken and repeat the process to find out 94773dfd078SMikulas Patocka * if we can proceed. While this may not seem a particularly 94873dfd078SMikulas Patocka * efficient algorithm, it is not expected to have any 94973dfd078SMikulas Patocka * significant impact on performance. 95073dfd078SMikulas Patocka */ 95173dfd078SMikulas Patocka previous_count = read_pending_exceptions_done_count(); 9528a2d5286SMike Snitzer while (origin_write_extent(s, dest.sector, io_size)) { 95373dfd078SMikulas Patocka wait_event(_pending_exceptions_done, 95473dfd078SMikulas Patocka (read_pending_exceptions_done_count() != 95573dfd078SMikulas Patocka previous_count)); 95673dfd078SMikulas Patocka /* Retry after the wait, until all exceptions are done. */ 95773dfd078SMikulas Patocka previous_count = read_pending_exceptions_done_count(); 95873dfd078SMikulas Patocka } 95973dfd078SMikulas Patocka 9609fe86254SMikulas Patocka down_write(&s->lock); 9619fe86254SMikulas Patocka s->first_merging_chunk = old_chunk; 9628a2d5286SMike Snitzer s->num_merging_chunks = linear_chunks; 9639fe86254SMikulas Patocka up_write(&s->lock); 9649fe86254SMikulas Patocka 9658a2d5286SMike Snitzer /* Wait until writes to all 'linear_chunks' drain */ 9668a2d5286SMike Snitzer for (i = 0; i < linear_chunks; i++) 9678a2d5286SMike Snitzer __check_for_conflicting_io(s, old_chunk + i); 9689fe86254SMikulas Patocka 9691e03f97eSMikulas Patocka dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, merge_callback, s); 9701e03f97eSMikulas Patocka return; 9711e03f97eSMikulas Patocka 9721e03f97eSMikulas Patocka shut: 9731e03f97eSMikulas Patocka merge_shutdown(s); 9741e03f97eSMikulas Patocka } 9751e03f97eSMikulas Patocka 9769fe86254SMikulas Patocka static void error_bios(struct bio *bio); 9779fe86254SMikulas Patocka 9781e03f97eSMikulas Patocka static void merge_callback(int read_err, unsigned long write_err, void *context) 9791e03f97eSMikulas Patocka { 9801e03f97eSMikulas Patocka struct dm_snapshot *s = context; 9819fe86254SMikulas Patocka struct bio *b = NULL; 9821e03f97eSMikulas Patocka 9831e03f97eSMikulas Patocka if (read_err || write_err) { 9841e03f97eSMikulas Patocka if (read_err) 9851e03f97eSMikulas Patocka DMERR("Read error: shutting down merge."); 9861e03f97eSMikulas Patocka else 9871e03f97eSMikulas Patocka DMERR("Write error: shutting down merge."); 9881e03f97eSMikulas Patocka goto shut; 9891e03f97eSMikulas Patocka } 9901e03f97eSMikulas Patocka 9919fe86254SMikulas Patocka if (s->store->type->commit_merge(s->store, 9929fe86254SMikulas Patocka s->num_merging_chunks) < 0) { 9931e03f97eSMikulas Patocka DMERR("Write error in exception store: shutting down merge"); 9941e03f97eSMikulas Patocka goto shut; 9951e03f97eSMikulas Patocka } 9961e03f97eSMikulas Patocka 9979fe86254SMikulas Patocka if (remove_single_exception_chunk(s) < 0) 9989fe86254SMikulas Patocka goto shut; 9999fe86254SMikulas Patocka 10001e03f97eSMikulas Patocka snapshot_merge_next_chunks(s); 10011e03f97eSMikulas Patocka 10021e03f97eSMikulas Patocka return; 10031e03f97eSMikulas Patocka 10041e03f97eSMikulas Patocka shut: 10059fe86254SMikulas Patocka down_write(&s->lock); 1006d8ddb1cfSMike Snitzer s->merge_failed = 1; 10079fe86254SMikulas Patocka b = __release_queued_bios_after_merge(s); 10089fe86254SMikulas Patocka up_write(&s->lock); 10099fe86254SMikulas Patocka error_bios(b); 10109fe86254SMikulas Patocka 10111e03f97eSMikulas Patocka merge_shutdown(s); 10121e03f97eSMikulas Patocka } 10131e03f97eSMikulas Patocka 10141e03f97eSMikulas Patocka static void start_merge(struct dm_snapshot *s) 10151e03f97eSMikulas Patocka { 10161e03f97eSMikulas Patocka if (!test_and_set_bit(RUNNING_MERGE, &s->state_bits)) 10171e03f97eSMikulas Patocka snapshot_merge_next_chunks(s); 10181e03f97eSMikulas Patocka } 10191e03f97eSMikulas Patocka 10201e03f97eSMikulas Patocka static int wait_schedule(void *ptr) 10211e03f97eSMikulas Patocka { 10221e03f97eSMikulas Patocka schedule(); 10231e03f97eSMikulas Patocka 10241e03f97eSMikulas Patocka return 0; 10251e03f97eSMikulas Patocka } 10261e03f97eSMikulas Patocka 10271e03f97eSMikulas Patocka /* 10281e03f97eSMikulas Patocka * Stop the merging process and wait until it finishes. 10291e03f97eSMikulas Patocka */ 10301e03f97eSMikulas Patocka static void stop_merge(struct dm_snapshot *s) 10311e03f97eSMikulas Patocka { 10321e03f97eSMikulas Patocka set_bit(SHUTDOWN_MERGE, &s->state_bits); 10331e03f97eSMikulas Patocka wait_on_bit(&s->state_bits, RUNNING_MERGE, wait_schedule, 10341e03f97eSMikulas Patocka TASK_UNINTERRUPTIBLE); 10351e03f97eSMikulas Patocka clear_bit(SHUTDOWN_MERGE, &s->state_bits); 10361e03f97eSMikulas Patocka } 10371e03f97eSMikulas Patocka 10381da177e4SLinus Torvalds /* 10391da177e4SLinus Torvalds * Construct a snapshot mapping: <origin_dev> <COW-dev> <p/n> <chunk-size> 10401da177e4SLinus Torvalds */ 10411da177e4SLinus Torvalds static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) 10421da177e4SLinus Torvalds { 10431da177e4SLinus Torvalds struct dm_snapshot *s; 1044cd45daffSMikulas Patocka int i; 10451da177e4SLinus Torvalds int r = -EINVAL; 1046fc56f6fbSMike Snitzer char *origin_path, *cow_path; 104710b8106aSMike Snitzer unsigned args_used, num_flush_requests = 1; 104810b8106aSMike Snitzer fmode_t origin_mode = FMODE_READ; 10491da177e4SLinus Torvalds 10504c7e3bf4SMark McLoughlin if (argc != 4) { 105172d94861SAlasdair G Kergon ti->error = "requires exactly 4 arguments"; 10521da177e4SLinus Torvalds r = -EINVAL; 1053fc56f6fbSMike Snitzer goto bad; 10541da177e4SLinus Torvalds } 10551da177e4SLinus Torvalds 105610b8106aSMike Snitzer if (dm_target_is_snapshot_merge(ti)) { 105710b8106aSMike Snitzer num_flush_requests = 2; 105810b8106aSMike Snitzer origin_mode = FMODE_WRITE; 105910b8106aSMike Snitzer } 106010b8106aSMike Snitzer 10611da177e4SLinus Torvalds s = kmalloc(sizeof(*s), GFP_KERNEL); 1062fee1998eSJonathan Brassow if (!s) { 10631da177e4SLinus Torvalds ti->error = "Cannot allocate snapshot context private " 10641da177e4SLinus Torvalds "structure"; 10651da177e4SLinus Torvalds r = -ENOMEM; 1066fc56f6fbSMike Snitzer goto bad; 10671da177e4SLinus Torvalds } 10681da177e4SLinus Torvalds 1069c2411045SMikulas Patocka origin_path = argv[0]; 1070c2411045SMikulas Patocka argv++; 1071c2411045SMikulas Patocka argc--; 1072c2411045SMikulas Patocka 1073c2411045SMikulas Patocka r = dm_get_device(ti, origin_path, origin_mode, &s->origin); 1074c2411045SMikulas Patocka if (r) { 1075c2411045SMikulas Patocka ti->error = "Cannot get origin device"; 1076c2411045SMikulas Patocka goto bad_origin; 1077c2411045SMikulas Patocka } 1078c2411045SMikulas Patocka 1079fc56f6fbSMike Snitzer cow_path = argv[0]; 1080fc56f6fbSMike Snitzer argv++; 1081fc56f6fbSMike Snitzer argc--; 1082fc56f6fbSMike Snitzer 1083024d37e9SMilan Broz r = dm_get_device(ti, cow_path, dm_table_get_mode(ti->table), &s->cow); 1084fc56f6fbSMike Snitzer if (r) { 1085fc56f6fbSMike Snitzer ti->error = "Cannot get COW device"; 1086fc56f6fbSMike Snitzer goto bad_cow; 1087fc56f6fbSMike Snitzer } 1088fc56f6fbSMike Snitzer 1089fc56f6fbSMike Snitzer r = dm_exception_store_create(ti, argc, argv, s, &args_used, &s->store); 1090fc56f6fbSMike Snitzer if (r) { 1091fc56f6fbSMike Snitzer ti->error = "Couldn't create exception store"; 1092fc56f6fbSMike Snitzer r = -EINVAL; 1093fc56f6fbSMike Snitzer goto bad_store; 1094fc56f6fbSMike Snitzer } 1095fc56f6fbSMike Snitzer 1096fc56f6fbSMike Snitzer argv += args_used; 1097fc56f6fbSMike Snitzer argc -= args_used; 1098fc56f6fbSMike Snitzer 1099fc56f6fbSMike Snitzer s->ti = ti; 11001da177e4SLinus Torvalds s->valid = 1; 1101aa14edebSAlasdair G Kergon s->active = 0; 1102879129d2SMikulas Patocka atomic_set(&s->pending_exceptions_count, 0); 11031da177e4SLinus Torvalds init_rwsem(&s->lock); 1104c1f0c183SMike Snitzer INIT_LIST_HEAD(&s->list); 1105ca3a931fSAlasdair G Kergon spin_lock_init(&s->pe_lock); 11061e03f97eSMikulas Patocka s->state_bits = 0; 1107d8ddb1cfSMike Snitzer s->merge_failed = 0; 11089fe86254SMikulas Patocka s->first_merging_chunk = 0; 11099fe86254SMikulas Patocka s->num_merging_chunks = 0; 11109fe86254SMikulas Patocka bio_list_init(&s->bios_queued_during_merge); 11111da177e4SLinus Torvalds 11121da177e4SLinus Torvalds /* Allocate hash table for COW data */ 1113fee1998eSJonathan Brassow if (init_hash_tables(s)) { 11141da177e4SLinus Torvalds ti->error = "Unable to allocate hash table space"; 11151da177e4SLinus Torvalds r = -ENOMEM; 1116fee1998eSJonathan Brassow goto bad_hash_tables; 11171da177e4SLinus Torvalds } 11181da177e4SLinus Torvalds 1119eb69aca5SHeinz Mauelshagen r = dm_kcopyd_client_create(SNAPSHOT_PAGES, &s->kcopyd_client); 11201da177e4SLinus Torvalds if (r) { 11211da177e4SLinus Torvalds ti->error = "Could not create kcopyd client"; 1122fee1998eSJonathan Brassow goto bad_kcopyd; 11231da177e4SLinus Torvalds } 11241da177e4SLinus Torvalds 112592e86812SMikulas Patocka s->pending_pool = mempool_create_slab_pool(MIN_IOS, pending_cache); 112692e86812SMikulas Patocka if (!s->pending_pool) { 112792e86812SMikulas Patocka ti->error = "Could not allocate mempool for pending exceptions"; 1128fee1998eSJonathan Brassow goto bad_pending_pool; 112992e86812SMikulas Patocka } 113092e86812SMikulas Patocka 1131cd45daffSMikulas Patocka s->tracked_chunk_pool = mempool_create_slab_pool(MIN_IOS, 1132cd45daffSMikulas Patocka tracked_chunk_cache); 1133cd45daffSMikulas Patocka if (!s->tracked_chunk_pool) { 1134cd45daffSMikulas Patocka ti->error = "Could not allocate tracked_chunk mempool for " 1135cd45daffSMikulas Patocka "tracking reads"; 113692e86812SMikulas Patocka goto bad_tracked_chunk_pool; 1137cd45daffSMikulas Patocka } 1138cd45daffSMikulas Patocka 1139cd45daffSMikulas Patocka for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++) 1140cd45daffSMikulas Patocka INIT_HLIST_HEAD(&s->tracked_chunk_hash[i]); 1141cd45daffSMikulas Patocka 1142cd45daffSMikulas Patocka spin_lock_init(&s->tracked_chunk_lock); 1143cd45daffSMikulas Patocka 1144c1f0c183SMike Snitzer ti->private = s; 114510b8106aSMike Snitzer ti->num_flush_requests = num_flush_requests; 1146c1f0c183SMike Snitzer 1147c1f0c183SMike Snitzer /* Add snapshot to the list of snapshots for this origin */ 1148c1f0c183SMike Snitzer /* Exceptions aren't triggered till snapshot_resume() is called */ 1149c1f0c183SMike Snitzer r = register_snapshot(s); 1150c1f0c183SMike Snitzer if (r == -ENOMEM) { 1151c1f0c183SMike Snitzer ti->error = "Snapshot origin struct allocation failed"; 1152c1f0c183SMike Snitzer goto bad_load_and_register; 1153c1f0c183SMike Snitzer } else if (r < 0) { 1154c1f0c183SMike Snitzer /* invalid handover, register_snapshot has set ti->error */ 1155c1f0c183SMike Snitzer goto bad_load_and_register; 1156c1f0c183SMike Snitzer } 1157c1f0c183SMike Snitzer 1158c1f0c183SMike Snitzer /* 1159c1f0c183SMike Snitzer * Metadata must only be loaded into one table at once, so skip this 1160c1f0c183SMike Snitzer * if metadata will be handed over during resume. 1161c1f0c183SMike Snitzer * Chunk size will be set during the handover - set it to zero to 1162c1f0c183SMike Snitzer * ensure it's ignored. 1163c1f0c183SMike Snitzer */ 1164c1f0c183SMike Snitzer if (r > 0) { 1165c1f0c183SMike Snitzer s->store->chunk_size = 0; 1166c1f0c183SMike Snitzer return 0; 1167c1f0c183SMike Snitzer } 1168c1f0c183SMike Snitzer 1169493df71cSJonathan Brassow r = s->store->type->read_metadata(s->store, dm_add_exception, 1170493df71cSJonathan Brassow (void *)s); 11710764147bSMilan Broz if (r < 0) { 1172f9cea4f7SMark McLoughlin ti->error = "Failed to read snapshot metadata"; 1173c1f0c183SMike Snitzer goto bad_read_metadata; 11740764147bSMilan Broz } else if (r > 0) { 11750764147bSMilan Broz s->valid = 0; 11760764147bSMilan Broz DMWARN("Snapshot is marked invalid."); 1177f9cea4f7SMark McLoughlin } 1178aa14edebSAlasdair G Kergon 11793f2412dcSMikulas Patocka if (!s->store->chunk_size) { 11803f2412dcSMikulas Patocka ti->error = "Chunk size not set"; 1181c1f0c183SMike Snitzer goto bad_read_metadata; 11823f2412dcSMikulas Patocka } 1183d0216849SJonathan Brassow ti->split_io = s->store->chunk_size; 11841da177e4SLinus Torvalds 11851da177e4SLinus Torvalds return 0; 11861da177e4SLinus Torvalds 1187c1f0c183SMike Snitzer bad_read_metadata: 1188c1f0c183SMike Snitzer unregister_snapshot(s); 1189c1f0c183SMike Snitzer 1190cd45daffSMikulas Patocka bad_load_and_register: 1191cd45daffSMikulas Patocka mempool_destroy(s->tracked_chunk_pool); 1192cd45daffSMikulas Patocka 119392e86812SMikulas Patocka bad_tracked_chunk_pool: 119492e86812SMikulas Patocka mempool_destroy(s->pending_pool); 119592e86812SMikulas Patocka 1196fee1998eSJonathan Brassow bad_pending_pool: 1197eb69aca5SHeinz Mauelshagen dm_kcopyd_client_destroy(s->kcopyd_client); 11981da177e4SLinus Torvalds 1199fee1998eSJonathan Brassow bad_kcopyd: 12003510cb94SJon Brassow dm_exception_table_exit(&s->pending, pending_cache); 12013510cb94SJon Brassow dm_exception_table_exit(&s->complete, exception_cache); 12021da177e4SLinus Torvalds 1203fee1998eSJonathan Brassow bad_hash_tables: 1204fc56f6fbSMike Snitzer dm_exception_store_destroy(s->store); 1205fc56f6fbSMike Snitzer 1206fc56f6fbSMike Snitzer bad_store: 1207fc56f6fbSMike Snitzer dm_put_device(ti, s->cow); 1208fc56f6fbSMike Snitzer 1209fc56f6fbSMike Snitzer bad_cow: 1210c2411045SMikulas Patocka dm_put_device(ti, s->origin); 1211c2411045SMikulas Patocka 1212c2411045SMikulas Patocka bad_origin: 12131da177e4SLinus Torvalds kfree(s); 12141da177e4SLinus Torvalds 1215fc56f6fbSMike Snitzer bad: 12161da177e4SLinus Torvalds return r; 12171da177e4SLinus Torvalds } 12181da177e4SLinus Torvalds 121931c93a0cSMilan Broz static void __free_exceptions(struct dm_snapshot *s) 122031c93a0cSMilan Broz { 1221eb69aca5SHeinz Mauelshagen dm_kcopyd_client_destroy(s->kcopyd_client); 122231c93a0cSMilan Broz s->kcopyd_client = NULL; 122331c93a0cSMilan Broz 12243510cb94SJon Brassow dm_exception_table_exit(&s->pending, pending_cache); 12253510cb94SJon Brassow dm_exception_table_exit(&s->complete, exception_cache); 122631c93a0cSMilan Broz } 122731c93a0cSMilan Broz 1228c1f0c183SMike Snitzer static void __handover_exceptions(struct dm_snapshot *snap_src, 1229c1f0c183SMike Snitzer struct dm_snapshot *snap_dest) 1230c1f0c183SMike Snitzer { 1231c1f0c183SMike Snitzer union { 1232c1f0c183SMike Snitzer struct dm_exception_table table_swap; 1233c1f0c183SMike Snitzer struct dm_exception_store *store_swap; 1234c1f0c183SMike Snitzer } u; 1235c1f0c183SMike Snitzer 1236c1f0c183SMike Snitzer /* 1237c1f0c183SMike Snitzer * Swap all snapshot context information between the two instances. 1238c1f0c183SMike Snitzer */ 1239c1f0c183SMike Snitzer u.table_swap = snap_dest->complete; 1240c1f0c183SMike Snitzer snap_dest->complete = snap_src->complete; 1241c1f0c183SMike Snitzer snap_src->complete = u.table_swap; 1242c1f0c183SMike Snitzer 1243c1f0c183SMike Snitzer u.store_swap = snap_dest->store; 1244c1f0c183SMike Snitzer snap_dest->store = snap_src->store; 1245c1f0c183SMike Snitzer snap_src->store = u.store_swap; 1246c1f0c183SMike Snitzer 1247c1f0c183SMike Snitzer snap_dest->store->snap = snap_dest; 1248c1f0c183SMike Snitzer snap_src->store->snap = snap_src; 1249c1f0c183SMike Snitzer 1250c1f0c183SMike Snitzer snap_dest->ti->split_io = snap_dest->store->chunk_size; 1251c1f0c183SMike Snitzer snap_dest->valid = snap_src->valid; 1252c1f0c183SMike Snitzer 1253c1f0c183SMike Snitzer /* 1254c1f0c183SMike Snitzer * Set source invalid to ensure it receives no further I/O. 1255c1f0c183SMike Snitzer */ 1256c1f0c183SMike Snitzer snap_src->valid = 0; 1257c1f0c183SMike Snitzer } 1258c1f0c183SMike Snitzer 12591da177e4SLinus Torvalds static void snapshot_dtr(struct dm_target *ti) 12601da177e4SLinus Torvalds { 1261cd45daffSMikulas Patocka #ifdef CONFIG_DM_DEBUG 1262cd45daffSMikulas Patocka int i; 1263cd45daffSMikulas Patocka #endif 1264028867acSAlasdair G Kergon struct dm_snapshot *s = ti->private; 1265c1f0c183SMike Snitzer struct dm_snapshot *snap_src = NULL, *snap_dest = NULL; 12661da177e4SLinus Torvalds 1267c1f0c183SMike Snitzer down_read(&_origins_lock); 1268c1f0c183SMike Snitzer /* Check whether exception handover must be cancelled */ 12699d3b15c4SMikulas Patocka (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL); 1270c1f0c183SMike Snitzer if (snap_src && snap_dest && (s == snap_src)) { 1271c1f0c183SMike Snitzer down_write(&snap_dest->lock); 1272c1f0c183SMike Snitzer snap_dest->valid = 0; 1273c1f0c183SMike Snitzer up_write(&snap_dest->lock); 1274c1f0c183SMike Snitzer DMERR("Cancelling snapshot handover."); 1275c1f0c183SMike Snitzer } 1276c1f0c183SMike Snitzer up_read(&_origins_lock); 1277c1f0c183SMike Snitzer 12781e03f97eSMikulas Patocka if (dm_target_is_snapshot_merge(ti)) 12791e03f97eSMikulas Patocka stop_merge(s); 12801e03f97eSMikulas Patocka 1281138728dcSAlasdair G Kergon /* Prevent further origin writes from using this snapshot. */ 1282138728dcSAlasdair G Kergon /* After this returns there can be no new kcopyd jobs. */ 12831da177e4SLinus Torvalds unregister_snapshot(s); 12841da177e4SLinus Torvalds 1285879129d2SMikulas Patocka while (atomic_read(&s->pending_exceptions_count)) 128690fa1527SMikulas Patocka msleep(1); 1287879129d2SMikulas Patocka /* 1288879129d2SMikulas Patocka * Ensure instructions in mempool_destroy aren't reordered 1289879129d2SMikulas Patocka * before atomic_read. 1290879129d2SMikulas Patocka */ 1291879129d2SMikulas Patocka smp_mb(); 1292879129d2SMikulas Patocka 1293cd45daffSMikulas Patocka #ifdef CONFIG_DM_DEBUG 1294cd45daffSMikulas Patocka for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++) 1295cd45daffSMikulas Patocka BUG_ON(!hlist_empty(&s->tracked_chunk_hash[i])); 1296cd45daffSMikulas Patocka #endif 1297cd45daffSMikulas Patocka 1298cd45daffSMikulas Patocka mempool_destroy(s->tracked_chunk_pool); 1299cd45daffSMikulas Patocka 130031c93a0cSMilan Broz __free_exceptions(s); 13011da177e4SLinus Torvalds 130292e86812SMikulas Patocka mempool_destroy(s->pending_pool); 130392e86812SMikulas Patocka 1304fee1998eSJonathan Brassow dm_exception_store_destroy(s->store); 1305138728dcSAlasdair G Kergon 1306fc56f6fbSMike Snitzer dm_put_device(ti, s->cow); 1307fc56f6fbSMike Snitzer 1308c2411045SMikulas Patocka dm_put_device(ti, s->origin); 1309c2411045SMikulas Patocka 13101da177e4SLinus Torvalds kfree(s); 13111da177e4SLinus Torvalds } 13121da177e4SLinus Torvalds 13131da177e4SLinus Torvalds /* 13141da177e4SLinus Torvalds * Flush a list of buffers. 13151da177e4SLinus Torvalds */ 13161da177e4SLinus Torvalds static void flush_bios(struct bio *bio) 13171da177e4SLinus Torvalds { 13181da177e4SLinus Torvalds struct bio *n; 13191da177e4SLinus Torvalds 13201da177e4SLinus Torvalds while (bio) { 13211da177e4SLinus Torvalds n = bio->bi_next; 13221da177e4SLinus Torvalds bio->bi_next = NULL; 13231da177e4SLinus Torvalds generic_make_request(bio); 13241da177e4SLinus Torvalds bio = n; 13251da177e4SLinus Torvalds } 13261da177e4SLinus Torvalds } 13271da177e4SLinus Torvalds 1328515ad66cSMikulas Patocka static int do_origin(struct dm_dev *origin, struct bio *bio); 1329515ad66cSMikulas Patocka 1330515ad66cSMikulas Patocka /* 1331515ad66cSMikulas Patocka * Flush a list of buffers. 1332515ad66cSMikulas Patocka */ 1333515ad66cSMikulas Patocka static void retry_origin_bios(struct dm_snapshot *s, struct bio *bio) 1334515ad66cSMikulas Patocka { 1335515ad66cSMikulas Patocka struct bio *n; 1336515ad66cSMikulas Patocka int r; 1337515ad66cSMikulas Patocka 1338515ad66cSMikulas Patocka while (bio) { 1339515ad66cSMikulas Patocka n = bio->bi_next; 1340515ad66cSMikulas Patocka bio->bi_next = NULL; 1341515ad66cSMikulas Patocka r = do_origin(s->origin, bio); 1342515ad66cSMikulas Patocka if (r == DM_MAPIO_REMAPPED) 1343515ad66cSMikulas Patocka generic_make_request(bio); 1344515ad66cSMikulas Patocka bio = n; 1345515ad66cSMikulas Patocka } 1346515ad66cSMikulas Patocka } 1347515ad66cSMikulas Patocka 13481da177e4SLinus Torvalds /* 13491da177e4SLinus Torvalds * Error a list of buffers. 13501da177e4SLinus Torvalds */ 13511da177e4SLinus Torvalds static void error_bios(struct bio *bio) 13521da177e4SLinus Torvalds { 13531da177e4SLinus Torvalds struct bio *n; 13541da177e4SLinus Torvalds 13551da177e4SLinus Torvalds while (bio) { 13561da177e4SLinus Torvalds n = bio->bi_next; 13571da177e4SLinus Torvalds bio->bi_next = NULL; 13586712ecf8SNeilBrown bio_io_error(bio); 13591da177e4SLinus Torvalds bio = n; 13601da177e4SLinus Torvalds } 13611da177e4SLinus Torvalds } 13621da177e4SLinus Torvalds 1363695368acSAlasdair G Kergon static void __invalidate_snapshot(struct dm_snapshot *s, int err) 136476df1c65SAlasdair G Kergon { 136576df1c65SAlasdair G Kergon if (!s->valid) 136676df1c65SAlasdair G Kergon return; 136776df1c65SAlasdair G Kergon 136876df1c65SAlasdair G Kergon if (err == -EIO) 136976df1c65SAlasdair G Kergon DMERR("Invalidating snapshot: Error reading/writing."); 137076df1c65SAlasdair G Kergon else if (err == -ENOMEM) 137176df1c65SAlasdair G Kergon DMERR("Invalidating snapshot: Unable to allocate exception."); 137276df1c65SAlasdair G Kergon 1373493df71cSJonathan Brassow if (s->store->type->drop_snapshot) 1374493df71cSJonathan Brassow s->store->type->drop_snapshot(s->store); 137576df1c65SAlasdair G Kergon 137676df1c65SAlasdair G Kergon s->valid = 0; 137776df1c65SAlasdair G Kergon 1378fc56f6fbSMike Snitzer dm_table_event(s->ti->table); 137976df1c65SAlasdair G Kergon } 138076df1c65SAlasdair G Kergon 1381028867acSAlasdair G Kergon static void pending_complete(struct dm_snap_pending_exception *pe, int success) 13821da177e4SLinus Torvalds { 13831d4989c8SJon Brassow struct dm_exception *e; 13841da177e4SLinus Torvalds struct dm_snapshot *s = pe->snap; 13859d493fa8SAlasdair G Kergon struct bio *origin_bios = NULL; 13869d493fa8SAlasdair G Kergon struct bio *snapshot_bios = NULL; 13879d493fa8SAlasdair G Kergon int error = 0; 13881da177e4SLinus Torvalds 138976df1c65SAlasdair G Kergon if (!success) { 139076df1c65SAlasdair G Kergon /* Read/write error - snapshot is unusable */ 13911da177e4SLinus Torvalds down_write(&s->lock); 1392695368acSAlasdair G Kergon __invalidate_snapshot(s, -EIO); 13939d493fa8SAlasdair G Kergon error = 1; 139476df1c65SAlasdair G Kergon goto out; 139576df1c65SAlasdair G Kergon } 139676df1c65SAlasdair G Kergon 13973510cb94SJon Brassow e = alloc_completed_exception(); 139876df1c65SAlasdair G Kergon if (!e) { 139976df1c65SAlasdair G Kergon down_write(&s->lock); 1400695368acSAlasdair G Kergon __invalidate_snapshot(s, -ENOMEM); 14019d493fa8SAlasdair G Kergon error = 1; 14021da177e4SLinus Torvalds goto out; 14031da177e4SLinus Torvalds } 14041da177e4SLinus Torvalds *e = pe->e; 14051da177e4SLinus Torvalds 14069d493fa8SAlasdair G Kergon down_write(&s->lock); 14079d493fa8SAlasdair G Kergon if (!s->valid) { 14083510cb94SJon Brassow free_completed_exception(e); 14099d493fa8SAlasdair G Kergon error = 1; 14109d493fa8SAlasdair G Kergon goto out; 14119d493fa8SAlasdair G Kergon } 14129d493fa8SAlasdair G Kergon 1413615d1eb9SMike Snitzer /* Check for conflicting reads */ 1414615d1eb9SMike Snitzer __check_for_conflicting_io(s, pe->e.old_chunk); 1415a8d41b59SMikulas Patocka 1416a8d41b59SMikulas Patocka /* 14171da177e4SLinus Torvalds * Add a proper exception, and remove the 14181da177e4SLinus Torvalds * in-flight exception from the list. 14191da177e4SLinus Torvalds */ 14203510cb94SJon Brassow dm_insert_exception(&s->complete, e); 14211da177e4SLinus Torvalds 14221da177e4SLinus Torvalds out: 14233510cb94SJon Brassow dm_remove_exception(&pe->e); 14249d493fa8SAlasdair G Kergon snapshot_bios = bio_list_get(&pe->snapshot_bios); 1425515ad66cSMikulas Patocka origin_bios = bio_list_get(&pe->origin_bios); 1426515ad66cSMikulas Patocka free_pending_exception(pe); 1427b4b610f6SAlasdair G Kergon 142873dfd078SMikulas Patocka increment_pending_exceptions_done_count(); 142973dfd078SMikulas Patocka 14309d493fa8SAlasdair G Kergon up_write(&s->lock); 14319d493fa8SAlasdair G Kergon 14329d493fa8SAlasdair G Kergon /* Submit any pending write bios */ 14339d493fa8SAlasdair G Kergon if (error) 14349d493fa8SAlasdair G Kergon error_bios(snapshot_bios); 14359d493fa8SAlasdair G Kergon else 14369d493fa8SAlasdair G Kergon flush_bios(snapshot_bios); 14379d493fa8SAlasdair G Kergon 1438515ad66cSMikulas Patocka retry_origin_bios(s, origin_bios); 14391da177e4SLinus Torvalds } 14401da177e4SLinus Torvalds 14411da177e4SLinus Torvalds static void commit_callback(void *context, int success) 14421da177e4SLinus Torvalds { 1443028867acSAlasdair G Kergon struct dm_snap_pending_exception *pe = context; 1444028867acSAlasdair G Kergon 14451da177e4SLinus Torvalds pending_complete(pe, success); 14461da177e4SLinus Torvalds } 14471da177e4SLinus Torvalds 14481da177e4SLinus Torvalds /* 14491da177e4SLinus Torvalds * Called when the copy I/O has finished. kcopyd actually runs 14501da177e4SLinus Torvalds * this code so don't block. 14511da177e4SLinus Torvalds */ 14524cdc1d1fSAlasdair G Kergon static void copy_callback(int read_err, unsigned long write_err, void *context) 14531da177e4SLinus Torvalds { 1454028867acSAlasdair G Kergon struct dm_snap_pending_exception *pe = context; 14551da177e4SLinus Torvalds struct dm_snapshot *s = pe->snap; 14561da177e4SLinus Torvalds 14571da177e4SLinus Torvalds if (read_err || write_err) 14581da177e4SLinus Torvalds pending_complete(pe, 0); 14591da177e4SLinus Torvalds 14601da177e4SLinus Torvalds else 14611da177e4SLinus Torvalds /* Update the metadata if we are persistent */ 1462493df71cSJonathan Brassow s->store->type->commit_exception(s->store, &pe->e, 1463b2a11465SJonathan Brassow commit_callback, pe); 14641da177e4SLinus Torvalds } 14651da177e4SLinus Torvalds 14661da177e4SLinus Torvalds /* 14671da177e4SLinus Torvalds * Dispatches the copy operation to kcopyd. 14681da177e4SLinus Torvalds */ 1469028867acSAlasdair G Kergon static void start_copy(struct dm_snap_pending_exception *pe) 14701da177e4SLinus Torvalds { 14711da177e4SLinus Torvalds struct dm_snapshot *s = pe->snap; 147222a1ceb1SHeinz Mauelshagen struct dm_io_region src, dest; 14731da177e4SLinus Torvalds struct block_device *bdev = s->origin->bdev; 14741da177e4SLinus Torvalds sector_t dev_size; 14751da177e4SLinus Torvalds 14761da177e4SLinus Torvalds dev_size = get_dev_size(bdev); 14771da177e4SLinus Torvalds 14781da177e4SLinus Torvalds src.bdev = bdev; 147971fab00aSJonathan Brassow src.sector = chunk_to_sector(s->store, pe->e.old_chunk); 1480df96eee6SMikulas Patocka src.count = min((sector_t)s->store->chunk_size, dev_size - src.sector); 14811da177e4SLinus Torvalds 1482fc56f6fbSMike Snitzer dest.bdev = s->cow->bdev; 148371fab00aSJonathan Brassow dest.sector = chunk_to_sector(s->store, pe->e.new_chunk); 14841da177e4SLinus Torvalds dest.count = src.count; 14851da177e4SLinus Torvalds 14861da177e4SLinus Torvalds /* Hand over to kcopyd */ 1487eb69aca5SHeinz Mauelshagen dm_kcopyd_copy(s->kcopyd_client, 14881da177e4SLinus Torvalds &src, 1, &dest, 0, copy_callback, pe); 14891da177e4SLinus Torvalds } 14901da177e4SLinus Torvalds 14912913808eSMikulas Patocka static struct dm_snap_pending_exception * 14922913808eSMikulas Patocka __lookup_pending_exception(struct dm_snapshot *s, chunk_t chunk) 14932913808eSMikulas Patocka { 14943510cb94SJon Brassow struct dm_exception *e = dm_lookup_exception(&s->pending, chunk); 14952913808eSMikulas Patocka 14962913808eSMikulas Patocka if (!e) 14972913808eSMikulas Patocka return NULL; 14982913808eSMikulas Patocka 14992913808eSMikulas Patocka return container_of(e, struct dm_snap_pending_exception, e); 15002913808eSMikulas Patocka } 15012913808eSMikulas Patocka 15021da177e4SLinus Torvalds /* 15031da177e4SLinus Torvalds * Looks to see if this snapshot already has a pending exception 15041da177e4SLinus Torvalds * for this chunk, otherwise it allocates a new one and inserts 15051da177e4SLinus Torvalds * it into the pending table. 15061da177e4SLinus Torvalds * 15071da177e4SLinus Torvalds * NOTE: a write lock must be held on snap->lock before calling 15081da177e4SLinus Torvalds * this. 15091da177e4SLinus Torvalds */ 1510028867acSAlasdair G Kergon static struct dm_snap_pending_exception * 1511c6621392SMikulas Patocka __find_pending_exception(struct dm_snapshot *s, 1512c6621392SMikulas Patocka struct dm_snap_pending_exception *pe, chunk_t chunk) 15131da177e4SLinus Torvalds { 1514c6621392SMikulas Patocka struct dm_snap_pending_exception *pe2; 151576df1c65SAlasdair G Kergon 15162913808eSMikulas Patocka pe2 = __lookup_pending_exception(s, chunk); 15172913808eSMikulas Patocka if (pe2) { 15181da177e4SLinus Torvalds free_pending_exception(pe); 15192913808eSMikulas Patocka return pe2; 152076df1c65SAlasdair G Kergon } 152176df1c65SAlasdair G Kergon 15221da177e4SLinus Torvalds pe->e.old_chunk = chunk; 15231da177e4SLinus Torvalds bio_list_init(&pe->origin_bios); 15241da177e4SLinus Torvalds bio_list_init(&pe->snapshot_bios); 15251da177e4SLinus Torvalds pe->started = 0; 15261da177e4SLinus Torvalds 1527493df71cSJonathan Brassow if (s->store->type->prepare_exception(s->store, &pe->e)) { 15281da177e4SLinus Torvalds free_pending_exception(pe); 15291da177e4SLinus Torvalds return NULL; 15301da177e4SLinus Torvalds } 15311da177e4SLinus Torvalds 15323510cb94SJon Brassow dm_insert_exception(&s->pending, &pe->e); 15331da177e4SLinus Torvalds 15341da177e4SLinus Torvalds return pe; 15351da177e4SLinus Torvalds } 15361da177e4SLinus Torvalds 15371d4989c8SJon Brassow static void remap_exception(struct dm_snapshot *s, struct dm_exception *e, 1538d74f81f8SMilan Broz struct bio *bio, chunk_t chunk) 15391da177e4SLinus Torvalds { 1540fc56f6fbSMike Snitzer bio->bi_bdev = s->cow->bdev; 154171fab00aSJonathan Brassow bio->bi_sector = chunk_to_sector(s->store, 154271fab00aSJonathan Brassow dm_chunk_number(e->new_chunk) + 1543d74f81f8SMilan Broz (chunk - e->old_chunk)) + 154471fab00aSJonathan Brassow (bio->bi_sector & 154571fab00aSJonathan Brassow s->store->chunk_mask); 15461da177e4SLinus Torvalds } 15471da177e4SLinus Torvalds 15481da177e4SLinus Torvalds static int snapshot_map(struct dm_target *ti, struct bio *bio, 15491da177e4SLinus Torvalds union map_info *map_context) 15501da177e4SLinus Torvalds { 15511d4989c8SJon Brassow struct dm_exception *e; 1552028867acSAlasdair G Kergon struct dm_snapshot *s = ti->private; 1553d2a7ad29SKiyoshi Ueda int r = DM_MAPIO_REMAPPED; 15541da177e4SLinus Torvalds chunk_t chunk; 1555028867acSAlasdair G Kergon struct dm_snap_pending_exception *pe = NULL; 15561da177e4SLinus Torvalds 1557d87f4c14STejun Heo if (bio->bi_rw & REQ_FLUSH) { 1558fc56f6fbSMike Snitzer bio->bi_bdev = s->cow->bdev; 1559494b3ee7SMikulas Patocka return DM_MAPIO_REMAPPED; 1560494b3ee7SMikulas Patocka } 1561494b3ee7SMikulas Patocka 156271fab00aSJonathan Brassow chunk = sector_to_chunk(s->store, bio->bi_sector); 15631da177e4SLinus Torvalds 15641da177e4SLinus Torvalds /* Full snapshots are not usable */ 156576df1c65SAlasdair G Kergon /* To get here the table must be live so s->active is always set. */ 15661da177e4SLinus Torvalds if (!s->valid) 1567f6a80ea8SAlasdair G Kergon return -EIO; 15681da177e4SLinus Torvalds 15691da177e4SLinus Torvalds /* FIXME: should only take write lock if we need 15701da177e4SLinus Torvalds * to copy an exception */ 15711da177e4SLinus Torvalds down_write(&s->lock); 15721da177e4SLinus Torvalds 157376df1c65SAlasdair G Kergon if (!s->valid) { 157476df1c65SAlasdair G Kergon r = -EIO; 157576df1c65SAlasdair G Kergon goto out_unlock; 157676df1c65SAlasdair G Kergon } 157776df1c65SAlasdair G Kergon 15781da177e4SLinus Torvalds /* If the block is already remapped - use that, else remap it */ 15793510cb94SJon Brassow e = dm_lookup_exception(&s->complete, chunk); 15801da177e4SLinus Torvalds if (e) { 1581d74f81f8SMilan Broz remap_exception(s, e, bio, chunk); 158276df1c65SAlasdair G Kergon goto out_unlock; 158376df1c65SAlasdair G Kergon } 15841da177e4SLinus Torvalds 1585ba40a2aaSAlasdair G Kergon /* 1586ba40a2aaSAlasdair G Kergon * Write to snapshot - higher level takes care of RW/RO 1587ba40a2aaSAlasdair G Kergon * flags so we should only get this if we are 1588ba40a2aaSAlasdair G Kergon * writeable. 1589ba40a2aaSAlasdair G Kergon */ 1590ba40a2aaSAlasdair G Kergon if (bio_rw(bio) == WRITE) { 15912913808eSMikulas Patocka pe = __lookup_pending_exception(s, chunk); 15922913808eSMikulas Patocka if (!pe) { 1593c6621392SMikulas Patocka up_write(&s->lock); 1594c6621392SMikulas Patocka pe = alloc_pending_exception(s); 1595c6621392SMikulas Patocka down_write(&s->lock); 1596c6621392SMikulas Patocka 1597c6621392SMikulas Patocka if (!s->valid) { 1598c6621392SMikulas Patocka free_pending_exception(pe); 1599c6621392SMikulas Patocka r = -EIO; 1600c6621392SMikulas Patocka goto out_unlock; 1601c6621392SMikulas Patocka } 1602c6621392SMikulas Patocka 16033510cb94SJon Brassow e = dm_lookup_exception(&s->complete, chunk); 160435bf659bSMikulas Patocka if (e) { 160535bf659bSMikulas Patocka free_pending_exception(pe); 160635bf659bSMikulas Patocka remap_exception(s, e, bio, chunk); 160735bf659bSMikulas Patocka goto out_unlock; 160835bf659bSMikulas Patocka } 160935bf659bSMikulas Patocka 1610c6621392SMikulas Patocka pe = __find_pending_exception(s, pe, chunk); 16111da177e4SLinus Torvalds if (!pe) { 1612695368acSAlasdair G Kergon __invalidate_snapshot(s, -ENOMEM); 16131da177e4SLinus Torvalds r = -EIO; 161476df1c65SAlasdair G Kergon goto out_unlock; 161576df1c65SAlasdair G Kergon } 16162913808eSMikulas Patocka } 161776df1c65SAlasdair G Kergon 1618d74f81f8SMilan Broz remap_exception(s, &pe->e, bio, chunk); 16191da177e4SLinus Torvalds bio_list_add(&pe->snapshot_bios, bio); 16201da177e4SLinus Torvalds 1621d2a7ad29SKiyoshi Ueda r = DM_MAPIO_SUBMITTED; 1622ba40a2aaSAlasdair G Kergon 16231da177e4SLinus Torvalds if (!pe->started) { 16241da177e4SLinus Torvalds /* this is protected by snap->lock */ 16251da177e4SLinus Torvalds pe->started = 1; 162676df1c65SAlasdair G Kergon up_write(&s->lock); 162776df1c65SAlasdair G Kergon start_copy(pe); 1628ba40a2aaSAlasdair G Kergon goto out; 1629ba40a2aaSAlasdair G Kergon } 1630cd45daffSMikulas Patocka } else { 16311da177e4SLinus Torvalds bio->bi_bdev = s->origin->bdev; 1632cd45daffSMikulas Patocka map_context->ptr = track_chunk(s, chunk); 1633cd45daffSMikulas Patocka } 16341da177e4SLinus Torvalds 1635ba40a2aaSAlasdair G Kergon out_unlock: 1636ba40a2aaSAlasdair G Kergon up_write(&s->lock); 1637ba40a2aaSAlasdair G Kergon out: 16381da177e4SLinus Torvalds return r; 16391da177e4SLinus Torvalds } 16401da177e4SLinus Torvalds 16413452c2a1SMikulas Patocka /* 16423452c2a1SMikulas Patocka * A snapshot-merge target behaves like a combination of a snapshot 16433452c2a1SMikulas Patocka * target and a snapshot-origin target. It only generates new 16443452c2a1SMikulas Patocka * exceptions in other snapshots and not in the one that is being 16453452c2a1SMikulas Patocka * merged. 16463452c2a1SMikulas Patocka * 16473452c2a1SMikulas Patocka * For each chunk, if there is an existing exception, it is used to 16483452c2a1SMikulas Patocka * redirect I/O to the cow device. Otherwise I/O is sent to the origin, 16493452c2a1SMikulas Patocka * which in turn might generate exceptions in other snapshots. 16509fe86254SMikulas Patocka * If merging is currently taking place on the chunk in question, the 16519fe86254SMikulas Patocka * I/O is deferred by adding it to s->bios_queued_during_merge. 16523452c2a1SMikulas Patocka */ 16533452c2a1SMikulas Patocka static int snapshot_merge_map(struct dm_target *ti, struct bio *bio, 16543452c2a1SMikulas Patocka union map_info *map_context) 16553452c2a1SMikulas Patocka { 16563452c2a1SMikulas Patocka struct dm_exception *e; 16573452c2a1SMikulas Patocka struct dm_snapshot *s = ti->private; 16583452c2a1SMikulas Patocka int r = DM_MAPIO_REMAPPED; 16593452c2a1SMikulas Patocka chunk_t chunk; 16603452c2a1SMikulas Patocka 1661d87f4c14STejun Heo if (bio->bi_rw & REQ_FLUSH) { 166257cba5d3SMike Snitzer if (!map_context->target_request_nr) 166310b8106aSMike Snitzer bio->bi_bdev = s->origin->bdev; 166410b8106aSMike Snitzer else 166510b8106aSMike Snitzer bio->bi_bdev = s->cow->bdev; 166610b8106aSMike Snitzer map_context->ptr = NULL; 166710b8106aSMike Snitzer return DM_MAPIO_REMAPPED; 166810b8106aSMike Snitzer } 166910b8106aSMike Snitzer 16703452c2a1SMikulas Patocka chunk = sector_to_chunk(s->store, bio->bi_sector); 16713452c2a1SMikulas Patocka 16729fe86254SMikulas Patocka down_write(&s->lock); 16733452c2a1SMikulas Patocka 1674d2fdb776SMikulas Patocka /* Full merging snapshots are redirected to the origin */ 1675d2fdb776SMikulas Patocka if (!s->valid) 1676d2fdb776SMikulas Patocka goto redirect_to_origin; 16773452c2a1SMikulas Patocka 16783452c2a1SMikulas Patocka /* If the block is already remapped - use that */ 16793452c2a1SMikulas Patocka e = dm_lookup_exception(&s->complete, chunk); 16803452c2a1SMikulas Patocka if (e) { 16819fe86254SMikulas Patocka /* Queue writes overlapping with chunks being merged */ 16829fe86254SMikulas Patocka if (bio_rw(bio) == WRITE && 16839fe86254SMikulas Patocka chunk >= s->first_merging_chunk && 16849fe86254SMikulas Patocka chunk < (s->first_merging_chunk + 16859fe86254SMikulas Patocka s->num_merging_chunks)) { 16869fe86254SMikulas Patocka bio->bi_bdev = s->origin->bdev; 16879fe86254SMikulas Patocka bio_list_add(&s->bios_queued_during_merge, bio); 16889fe86254SMikulas Patocka r = DM_MAPIO_SUBMITTED; 16899fe86254SMikulas Patocka goto out_unlock; 16909fe86254SMikulas Patocka } 169117aa0332SMikulas Patocka 16923452c2a1SMikulas Patocka remap_exception(s, e, bio, chunk); 169317aa0332SMikulas Patocka 169417aa0332SMikulas Patocka if (bio_rw(bio) == WRITE) 169517aa0332SMikulas Patocka map_context->ptr = track_chunk(s, chunk); 16963452c2a1SMikulas Patocka goto out_unlock; 16973452c2a1SMikulas Patocka } 16983452c2a1SMikulas Patocka 1699d2fdb776SMikulas Patocka redirect_to_origin: 17003452c2a1SMikulas Patocka bio->bi_bdev = s->origin->bdev; 17013452c2a1SMikulas Patocka 17023452c2a1SMikulas Patocka if (bio_rw(bio) == WRITE) { 17039fe86254SMikulas Patocka up_write(&s->lock); 17043452c2a1SMikulas Patocka return do_origin(s->origin, bio); 17053452c2a1SMikulas Patocka } 17063452c2a1SMikulas Patocka 17073452c2a1SMikulas Patocka out_unlock: 17089fe86254SMikulas Patocka up_write(&s->lock); 17093452c2a1SMikulas Patocka 17103452c2a1SMikulas Patocka return r; 17113452c2a1SMikulas Patocka } 17123452c2a1SMikulas Patocka 1713cd45daffSMikulas Patocka static int snapshot_end_io(struct dm_target *ti, struct bio *bio, 1714cd45daffSMikulas Patocka int error, union map_info *map_context) 1715cd45daffSMikulas Patocka { 1716cd45daffSMikulas Patocka struct dm_snapshot *s = ti->private; 1717cd45daffSMikulas Patocka struct dm_snap_tracked_chunk *c = map_context->ptr; 1718cd45daffSMikulas Patocka 1719cd45daffSMikulas Patocka if (c) 1720cd45daffSMikulas Patocka stop_tracking_chunk(s, c); 1721cd45daffSMikulas Patocka 1722cd45daffSMikulas Patocka return 0; 1723cd45daffSMikulas Patocka } 1724cd45daffSMikulas Patocka 17251e03f97eSMikulas Patocka static void snapshot_merge_presuspend(struct dm_target *ti) 17261e03f97eSMikulas Patocka { 17271e03f97eSMikulas Patocka struct dm_snapshot *s = ti->private; 17281e03f97eSMikulas Patocka 17291e03f97eSMikulas Patocka stop_merge(s); 17301e03f97eSMikulas Patocka } 17311e03f97eSMikulas Patocka 1732c1f0c183SMike Snitzer static int snapshot_preresume(struct dm_target *ti) 1733c1f0c183SMike Snitzer { 1734c1f0c183SMike Snitzer int r = 0; 1735c1f0c183SMike Snitzer struct dm_snapshot *s = ti->private; 1736c1f0c183SMike Snitzer struct dm_snapshot *snap_src = NULL, *snap_dest = NULL; 1737c1f0c183SMike Snitzer 1738c1f0c183SMike Snitzer down_read(&_origins_lock); 17399d3b15c4SMikulas Patocka (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL); 1740c1f0c183SMike Snitzer if (snap_src && snap_dest) { 1741c1f0c183SMike Snitzer down_read(&snap_src->lock); 1742c1f0c183SMike Snitzer if (s == snap_src) { 1743c1f0c183SMike Snitzer DMERR("Unable to resume snapshot source until " 1744c1f0c183SMike Snitzer "handover completes."); 1745c1f0c183SMike Snitzer r = -EINVAL; 1746b83b2f29SMike Snitzer } else if (!dm_suspended(snap_src->ti)) { 1747c1f0c183SMike Snitzer DMERR("Unable to perform snapshot handover until " 1748c1f0c183SMike Snitzer "source is suspended."); 1749c1f0c183SMike Snitzer r = -EINVAL; 1750c1f0c183SMike Snitzer } 1751c1f0c183SMike Snitzer up_read(&snap_src->lock); 1752c1f0c183SMike Snitzer } 1753c1f0c183SMike Snitzer up_read(&_origins_lock); 1754c1f0c183SMike Snitzer 1755c1f0c183SMike Snitzer return r; 1756c1f0c183SMike Snitzer } 1757c1f0c183SMike Snitzer 17581da177e4SLinus Torvalds static void snapshot_resume(struct dm_target *ti) 17591da177e4SLinus Torvalds { 1760028867acSAlasdair G Kergon struct dm_snapshot *s = ti->private; 1761c1f0c183SMike Snitzer struct dm_snapshot *snap_src = NULL, *snap_dest = NULL; 1762c1f0c183SMike Snitzer 1763c1f0c183SMike Snitzer down_read(&_origins_lock); 17649d3b15c4SMikulas Patocka (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL); 1765c1f0c183SMike Snitzer if (snap_src && snap_dest) { 1766c1f0c183SMike Snitzer down_write(&snap_src->lock); 1767c1f0c183SMike Snitzer down_write_nested(&snap_dest->lock, SINGLE_DEPTH_NESTING); 1768c1f0c183SMike Snitzer __handover_exceptions(snap_src, snap_dest); 1769c1f0c183SMike Snitzer up_write(&snap_dest->lock); 1770c1f0c183SMike Snitzer up_write(&snap_src->lock); 1771c1f0c183SMike Snitzer } 1772c1f0c183SMike Snitzer up_read(&_origins_lock); 1773c1f0c183SMike Snitzer 1774c1f0c183SMike Snitzer /* Now we have correct chunk size, reregister */ 1775c1f0c183SMike Snitzer reregister_snapshot(s); 17761da177e4SLinus Torvalds 1777aa14edebSAlasdair G Kergon down_write(&s->lock); 1778aa14edebSAlasdair G Kergon s->active = 1; 1779aa14edebSAlasdair G Kergon up_write(&s->lock); 17801da177e4SLinus Torvalds } 17811da177e4SLinus Torvalds 17821e03f97eSMikulas Patocka static sector_t get_origin_minimum_chunksize(struct block_device *bdev) 17831e03f97eSMikulas Patocka { 17841e03f97eSMikulas Patocka sector_t min_chunksize; 17851e03f97eSMikulas Patocka 17861e03f97eSMikulas Patocka down_read(&_origins_lock); 17871e03f97eSMikulas Patocka min_chunksize = __minimum_chunk_size(__lookup_origin(bdev)); 17881e03f97eSMikulas Patocka up_read(&_origins_lock); 17891e03f97eSMikulas Patocka 17901e03f97eSMikulas Patocka return min_chunksize; 17911e03f97eSMikulas Patocka } 17921e03f97eSMikulas Patocka 17931e03f97eSMikulas Patocka static void snapshot_merge_resume(struct dm_target *ti) 17941e03f97eSMikulas Patocka { 17951e03f97eSMikulas Patocka struct dm_snapshot *s = ti->private; 17961e03f97eSMikulas Patocka 17971e03f97eSMikulas Patocka /* 17981e03f97eSMikulas Patocka * Handover exceptions from existing snapshot. 17991e03f97eSMikulas Patocka */ 18001e03f97eSMikulas Patocka snapshot_resume(ti); 18011e03f97eSMikulas Patocka 18021e03f97eSMikulas Patocka /* 18031e03f97eSMikulas Patocka * snapshot-merge acts as an origin, so set ti->split_io 18041e03f97eSMikulas Patocka */ 18051e03f97eSMikulas Patocka ti->split_io = get_origin_minimum_chunksize(s->origin->bdev); 18061e03f97eSMikulas Patocka 18071e03f97eSMikulas Patocka start_merge(s); 18081e03f97eSMikulas Patocka } 18091e03f97eSMikulas Patocka 18101da177e4SLinus Torvalds static int snapshot_status(struct dm_target *ti, status_type_t type, 18111da177e4SLinus Torvalds char *result, unsigned int maxlen) 18121da177e4SLinus Torvalds { 18132e4a31dfSJonathan Brassow unsigned sz = 0; 1814028867acSAlasdair G Kergon struct dm_snapshot *snap = ti->private; 18151da177e4SLinus Torvalds 18161da177e4SLinus Torvalds switch (type) { 18171da177e4SLinus Torvalds case STATUSTYPE_INFO: 181894e76572SMikulas Patocka 181994e76572SMikulas Patocka down_write(&snap->lock); 182094e76572SMikulas Patocka 18211da177e4SLinus Torvalds if (!snap->valid) 18222e4a31dfSJonathan Brassow DMEMIT("Invalid"); 1823d8ddb1cfSMike Snitzer else if (snap->merge_failed) 1824d8ddb1cfSMike Snitzer DMEMIT("Merge failed"); 18251da177e4SLinus Torvalds else { 1826985903bbSMike Snitzer if (snap->store->type->usage) { 1827985903bbSMike Snitzer sector_t total_sectors, sectors_allocated, 1828985903bbSMike Snitzer metadata_sectors; 1829985903bbSMike Snitzer snap->store->type->usage(snap->store, 1830985903bbSMike Snitzer &total_sectors, 1831985903bbSMike Snitzer §ors_allocated, 1832985903bbSMike Snitzer &metadata_sectors); 1833985903bbSMike Snitzer DMEMIT("%llu/%llu %llu", 1834985903bbSMike Snitzer (unsigned long long)sectors_allocated, 1835985903bbSMike Snitzer (unsigned long long)total_sectors, 1836985903bbSMike Snitzer (unsigned long long)metadata_sectors); 18371da177e4SLinus Torvalds } 18381da177e4SLinus Torvalds else 18392e4a31dfSJonathan Brassow DMEMIT("Unknown"); 18401da177e4SLinus Torvalds } 184194e76572SMikulas Patocka 184294e76572SMikulas Patocka up_write(&snap->lock); 184394e76572SMikulas Patocka 18441da177e4SLinus Torvalds break; 18451da177e4SLinus Torvalds 18461da177e4SLinus Torvalds case STATUSTYPE_TABLE: 18471da177e4SLinus Torvalds /* 18481da177e4SLinus Torvalds * kdevname returns a static pointer so we need 18491da177e4SLinus Torvalds * to make private copies if the output is to 18501da177e4SLinus Torvalds * make sense. 18511da177e4SLinus Torvalds */ 1852fc56f6fbSMike Snitzer DMEMIT("%s %s", snap->origin->name, snap->cow->name); 18531e302a92SJonathan Brassow snap->store->type->status(snap->store, type, result + sz, 18541e302a92SJonathan Brassow maxlen - sz); 18551da177e4SLinus Torvalds break; 18561da177e4SLinus Torvalds } 18571da177e4SLinus Torvalds 18581da177e4SLinus Torvalds return 0; 18591da177e4SLinus Torvalds } 18601da177e4SLinus Torvalds 18618811f46cSMike Snitzer static int snapshot_iterate_devices(struct dm_target *ti, 18628811f46cSMike Snitzer iterate_devices_callout_fn fn, void *data) 18638811f46cSMike Snitzer { 18648811f46cSMike Snitzer struct dm_snapshot *snap = ti->private; 18651e5554c8SMikulas Patocka int r; 18668811f46cSMike Snitzer 18671e5554c8SMikulas Patocka r = fn(ti, snap->origin, 0, ti->len, data); 18681e5554c8SMikulas Patocka 18691e5554c8SMikulas Patocka if (!r) 18701e5554c8SMikulas Patocka r = fn(ti, snap->cow, 0, get_dev_size(snap->cow->bdev), data); 18711e5554c8SMikulas Patocka 18721e5554c8SMikulas Patocka return r; 18738811f46cSMike Snitzer } 18748811f46cSMike Snitzer 18758811f46cSMike Snitzer 18761da177e4SLinus Torvalds /*----------------------------------------------------------------- 18771da177e4SLinus Torvalds * Origin methods 18781da177e4SLinus Torvalds *---------------------------------------------------------------*/ 18799eaae8ffSMikulas Patocka 18809eaae8ffSMikulas Patocka /* 18819eaae8ffSMikulas Patocka * If no exceptions need creating, DM_MAPIO_REMAPPED is returned and any 18829eaae8ffSMikulas Patocka * supplied bio was ignored. The caller may submit it immediately. 18839eaae8ffSMikulas Patocka * (No remapping actually occurs as the origin is always a direct linear 18849eaae8ffSMikulas Patocka * map.) 18859eaae8ffSMikulas Patocka * 18869eaae8ffSMikulas Patocka * If further exceptions are required, DM_MAPIO_SUBMITTED is returned 18879eaae8ffSMikulas Patocka * and any supplied bio is added to a list to be submitted once all 18889eaae8ffSMikulas Patocka * the necessary exceptions exist. 18899eaae8ffSMikulas Patocka */ 18909eaae8ffSMikulas Patocka static int __origin_write(struct list_head *snapshots, sector_t sector, 18919eaae8ffSMikulas Patocka struct bio *bio) 18921da177e4SLinus Torvalds { 1893515ad66cSMikulas Patocka int r = DM_MAPIO_REMAPPED; 18941da177e4SLinus Torvalds struct dm_snapshot *snap; 18951d4989c8SJon Brassow struct dm_exception *e; 1896515ad66cSMikulas Patocka struct dm_snap_pending_exception *pe; 1897515ad66cSMikulas Patocka struct dm_snap_pending_exception *pe_to_start_now = NULL; 1898515ad66cSMikulas Patocka struct dm_snap_pending_exception *pe_to_start_last = NULL; 18991da177e4SLinus Torvalds chunk_t chunk; 19001da177e4SLinus Torvalds 19011da177e4SLinus Torvalds /* Do all the snapshots on this origin */ 19021da177e4SLinus Torvalds list_for_each_entry (snap, snapshots, list) { 19033452c2a1SMikulas Patocka /* 19043452c2a1SMikulas Patocka * Don't make new exceptions in a merging snapshot 19053452c2a1SMikulas Patocka * because it has effectively been deleted 19063452c2a1SMikulas Patocka */ 19073452c2a1SMikulas Patocka if (dm_target_is_snapshot_merge(snap->ti)) 19083452c2a1SMikulas Patocka continue; 19093452c2a1SMikulas Patocka 191076df1c65SAlasdair G Kergon down_write(&snap->lock); 191176df1c65SAlasdair G Kergon 1912aa14edebSAlasdair G Kergon /* Only deal with valid and active snapshots */ 1913aa14edebSAlasdair G Kergon if (!snap->valid || !snap->active) 191476df1c65SAlasdair G Kergon goto next_snapshot; 19151da177e4SLinus Torvalds 1916d5e404c1SAlasdair G Kergon /* Nothing to do if writing beyond end of snapshot */ 19179eaae8ffSMikulas Patocka if (sector >= dm_table_get_size(snap->ti->table)) 191876df1c65SAlasdair G Kergon goto next_snapshot; 19191da177e4SLinus Torvalds 19201da177e4SLinus Torvalds /* 19211da177e4SLinus Torvalds * Remember, different snapshots can have 19221da177e4SLinus Torvalds * different chunk sizes. 19231da177e4SLinus Torvalds */ 19249eaae8ffSMikulas Patocka chunk = sector_to_chunk(snap->store, sector); 19251da177e4SLinus Torvalds 19261da177e4SLinus Torvalds /* 19271da177e4SLinus Torvalds * Check exception table to see if block 19281da177e4SLinus Torvalds * is already remapped in this snapshot 19291da177e4SLinus Torvalds * and trigger an exception if not. 19301da177e4SLinus Torvalds */ 19313510cb94SJon Brassow e = dm_lookup_exception(&snap->complete, chunk); 193276df1c65SAlasdair G Kergon if (e) 193376df1c65SAlasdair G Kergon goto next_snapshot; 193476df1c65SAlasdair G Kergon 19352913808eSMikulas Patocka pe = __lookup_pending_exception(snap, chunk); 19362913808eSMikulas Patocka if (!pe) { 1937c6621392SMikulas Patocka up_write(&snap->lock); 1938c6621392SMikulas Patocka pe = alloc_pending_exception(snap); 1939c6621392SMikulas Patocka down_write(&snap->lock); 1940c6621392SMikulas Patocka 1941c6621392SMikulas Patocka if (!snap->valid) { 1942c6621392SMikulas Patocka free_pending_exception(pe); 1943c6621392SMikulas Patocka goto next_snapshot; 1944c6621392SMikulas Patocka } 1945c6621392SMikulas Patocka 19463510cb94SJon Brassow e = dm_lookup_exception(&snap->complete, chunk); 194735bf659bSMikulas Patocka if (e) { 194835bf659bSMikulas Patocka free_pending_exception(pe); 194935bf659bSMikulas Patocka goto next_snapshot; 195035bf659bSMikulas Patocka } 195135bf659bSMikulas Patocka 1952c6621392SMikulas Patocka pe = __find_pending_exception(snap, pe, chunk); 19531da177e4SLinus Torvalds if (!pe) { 1954695368acSAlasdair G Kergon __invalidate_snapshot(snap, -ENOMEM); 195576df1c65SAlasdair G Kergon goto next_snapshot; 195676df1c65SAlasdair G Kergon } 19572913808eSMikulas Patocka } 19581da177e4SLinus Torvalds 1959d2a7ad29SKiyoshi Ueda r = DM_MAPIO_SUBMITTED; 196076df1c65SAlasdair G Kergon 1961515ad66cSMikulas Patocka /* 1962515ad66cSMikulas Patocka * If an origin bio was supplied, queue it to wait for the 1963515ad66cSMikulas Patocka * completion of this exception, and start this one last, 1964515ad66cSMikulas Patocka * at the end of the function. 1965515ad66cSMikulas Patocka */ 1966515ad66cSMikulas Patocka if (bio) { 1967515ad66cSMikulas Patocka bio_list_add(&pe->origin_bios, bio); 1968515ad66cSMikulas Patocka bio = NULL; 1969515ad66cSMikulas Patocka 1970515ad66cSMikulas Patocka if (!pe->started) { 1971515ad66cSMikulas Patocka pe->started = 1; 1972515ad66cSMikulas Patocka pe_to_start_last = pe; 1973515ad66cSMikulas Patocka } 1974b4b610f6SAlasdair G Kergon } 197576df1c65SAlasdair G Kergon 1976eccf0817SAlasdair G Kergon if (!pe->started) { 1977eccf0817SAlasdair G Kergon pe->started = 1; 1978515ad66cSMikulas Patocka pe_to_start_now = pe; 1979eccf0817SAlasdair G Kergon } 19801da177e4SLinus Torvalds 198176df1c65SAlasdair G Kergon next_snapshot: 19821da177e4SLinus Torvalds up_write(&snap->lock); 1983515ad66cSMikulas Patocka 1984515ad66cSMikulas Patocka if (pe_to_start_now) { 1985515ad66cSMikulas Patocka start_copy(pe_to_start_now); 1986515ad66cSMikulas Patocka pe_to_start_now = NULL; 19871da177e4SLinus Torvalds } 1988b4b610f6SAlasdair G Kergon } 1989b4b610f6SAlasdair G Kergon 19901da177e4SLinus Torvalds /* 1991515ad66cSMikulas Patocka * Submit the exception against which the bio is queued last, 1992515ad66cSMikulas Patocka * to give the other exceptions a head start. 19931da177e4SLinus Torvalds */ 1994515ad66cSMikulas Patocka if (pe_to_start_last) 1995515ad66cSMikulas Patocka start_copy(pe_to_start_last); 19961da177e4SLinus Torvalds 19971da177e4SLinus Torvalds return r; 19981da177e4SLinus Torvalds } 19991da177e4SLinus Torvalds 20001da177e4SLinus Torvalds /* 20011da177e4SLinus Torvalds * Called on a write from the origin driver. 20021da177e4SLinus Torvalds */ 20031da177e4SLinus Torvalds static int do_origin(struct dm_dev *origin, struct bio *bio) 20041da177e4SLinus Torvalds { 20051da177e4SLinus Torvalds struct origin *o; 2006d2a7ad29SKiyoshi Ueda int r = DM_MAPIO_REMAPPED; 20071da177e4SLinus Torvalds 20081da177e4SLinus Torvalds down_read(&_origins_lock); 20091da177e4SLinus Torvalds o = __lookup_origin(origin->bdev); 20101da177e4SLinus Torvalds if (o) 20119eaae8ffSMikulas Patocka r = __origin_write(&o->snapshots, bio->bi_sector, bio); 20121da177e4SLinus Torvalds up_read(&_origins_lock); 20131da177e4SLinus Torvalds 20141da177e4SLinus Torvalds return r; 20151da177e4SLinus Torvalds } 20161da177e4SLinus Torvalds 20171da177e4SLinus Torvalds /* 201873dfd078SMikulas Patocka * Trigger exceptions in all non-merging snapshots. 201973dfd078SMikulas Patocka * 202073dfd078SMikulas Patocka * The chunk size of the merging snapshot may be larger than the chunk 202173dfd078SMikulas Patocka * size of some other snapshot so we may need to reallocate multiple 202273dfd078SMikulas Patocka * chunks in other snapshots. 202373dfd078SMikulas Patocka * 202473dfd078SMikulas Patocka * We scan all the overlapping exceptions in the other snapshots. 202573dfd078SMikulas Patocka * Returns 1 if anything was reallocated and must be waited for, 202673dfd078SMikulas Patocka * otherwise returns 0. 202773dfd078SMikulas Patocka * 202873dfd078SMikulas Patocka * size must be a multiple of merging_snap's chunk_size. 202973dfd078SMikulas Patocka */ 203073dfd078SMikulas Patocka static int origin_write_extent(struct dm_snapshot *merging_snap, 203173dfd078SMikulas Patocka sector_t sector, unsigned size) 203273dfd078SMikulas Patocka { 203373dfd078SMikulas Patocka int must_wait = 0; 203473dfd078SMikulas Patocka sector_t n; 203573dfd078SMikulas Patocka struct origin *o; 203673dfd078SMikulas Patocka 203773dfd078SMikulas Patocka /* 203873dfd078SMikulas Patocka * The origin's __minimum_chunk_size() got stored in split_io 203973dfd078SMikulas Patocka * by snapshot_merge_resume(). 204073dfd078SMikulas Patocka */ 204173dfd078SMikulas Patocka down_read(&_origins_lock); 204273dfd078SMikulas Patocka o = __lookup_origin(merging_snap->origin->bdev); 204373dfd078SMikulas Patocka for (n = 0; n < size; n += merging_snap->ti->split_io) 204473dfd078SMikulas Patocka if (__origin_write(&o->snapshots, sector + n, NULL) == 204573dfd078SMikulas Patocka DM_MAPIO_SUBMITTED) 204673dfd078SMikulas Patocka must_wait = 1; 204773dfd078SMikulas Patocka up_read(&_origins_lock); 204873dfd078SMikulas Patocka 204973dfd078SMikulas Patocka return must_wait; 205073dfd078SMikulas Patocka } 205173dfd078SMikulas Patocka 205273dfd078SMikulas Patocka /* 20531da177e4SLinus Torvalds * Origin: maps a linear range of a device, with hooks for snapshotting. 20541da177e4SLinus Torvalds */ 20551da177e4SLinus Torvalds 20561da177e4SLinus Torvalds /* 20571da177e4SLinus Torvalds * Construct an origin mapping: <dev_path> 20581da177e4SLinus Torvalds * The context for an origin is merely a 'struct dm_dev *' 20591da177e4SLinus Torvalds * pointing to the real device. 20601da177e4SLinus Torvalds */ 20611da177e4SLinus Torvalds static int origin_ctr(struct dm_target *ti, unsigned int argc, char **argv) 20621da177e4SLinus Torvalds { 20631da177e4SLinus Torvalds int r; 20641da177e4SLinus Torvalds struct dm_dev *dev; 20651da177e4SLinus Torvalds 20661da177e4SLinus Torvalds if (argc != 1) { 206772d94861SAlasdair G Kergon ti->error = "origin: incorrect number of arguments"; 20681da177e4SLinus Torvalds return -EINVAL; 20691da177e4SLinus Torvalds } 20701da177e4SLinus Torvalds 20718215d6ecSNikanth Karthikesan r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &dev); 20721da177e4SLinus Torvalds if (r) { 20731da177e4SLinus Torvalds ti->error = "Cannot get target device"; 20741da177e4SLinus Torvalds return r; 20751da177e4SLinus Torvalds } 20761da177e4SLinus Torvalds 20771da177e4SLinus Torvalds ti->private = dev; 2078494b3ee7SMikulas Patocka ti->num_flush_requests = 1; 2079494b3ee7SMikulas Patocka 20801da177e4SLinus Torvalds return 0; 20811da177e4SLinus Torvalds } 20821da177e4SLinus Torvalds 20831da177e4SLinus Torvalds static void origin_dtr(struct dm_target *ti) 20841da177e4SLinus Torvalds { 2085028867acSAlasdair G Kergon struct dm_dev *dev = ti->private; 20861da177e4SLinus Torvalds dm_put_device(ti, dev); 20871da177e4SLinus Torvalds } 20881da177e4SLinus Torvalds 20891da177e4SLinus Torvalds static int origin_map(struct dm_target *ti, struct bio *bio, 20901da177e4SLinus Torvalds union map_info *map_context) 20911da177e4SLinus Torvalds { 2092028867acSAlasdair G Kergon struct dm_dev *dev = ti->private; 20931da177e4SLinus Torvalds bio->bi_bdev = dev->bdev; 20941da177e4SLinus Torvalds 2095d87f4c14STejun Heo if (bio->bi_rw & REQ_FLUSH) 2096494b3ee7SMikulas Patocka return DM_MAPIO_REMAPPED; 2097494b3ee7SMikulas Patocka 20981da177e4SLinus Torvalds /* Only tell snapshots if this is a write */ 2099d2a7ad29SKiyoshi Ueda return (bio_rw(bio) == WRITE) ? do_origin(dev, bio) : DM_MAPIO_REMAPPED; 21001da177e4SLinus Torvalds } 21011da177e4SLinus Torvalds 21021da177e4SLinus Torvalds /* 21031da177e4SLinus Torvalds * Set the target "split_io" field to the minimum of all the snapshots' 21041da177e4SLinus Torvalds * chunk sizes. 21051da177e4SLinus Torvalds */ 21061da177e4SLinus Torvalds static void origin_resume(struct dm_target *ti) 21071da177e4SLinus Torvalds { 2108028867acSAlasdair G Kergon struct dm_dev *dev = ti->private; 21091da177e4SLinus Torvalds 21101e03f97eSMikulas Patocka ti->split_io = get_origin_minimum_chunksize(dev->bdev); 21111da177e4SLinus Torvalds } 21121da177e4SLinus Torvalds 21131da177e4SLinus Torvalds static int origin_status(struct dm_target *ti, status_type_t type, char *result, 21141da177e4SLinus Torvalds unsigned int maxlen) 21151da177e4SLinus Torvalds { 2116028867acSAlasdair G Kergon struct dm_dev *dev = ti->private; 21171da177e4SLinus Torvalds 21181da177e4SLinus Torvalds switch (type) { 21191da177e4SLinus Torvalds case STATUSTYPE_INFO: 21201da177e4SLinus Torvalds result[0] = '\0'; 21211da177e4SLinus Torvalds break; 21221da177e4SLinus Torvalds 21231da177e4SLinus Torvalds case STATUSTYPE_TABLE: 21241da177e4SLinus Torvalds snprintf(result, maxlen, "%s", dev->name); 21251da177e4SLinus Torvalds break; 21261da177e4SLinus Torvalds } 21271da177e4SLinus Torvalds 21281da177e4SLinus Torvalds return 0; 21291da177e4SLinus Torvalds } 21301da177e4SLinus Torvalds 2131b1d55528SMikulas Patocka static int origin_merge(struct dm_target *ti, struct bvec_merge_data *bvm, 2132b1d55528SMikulas Patocka struct bio_vec *biovec, int max_size) 2133b1d55528SMikulas Patocka { 2134b1d55528SMikulas Patocka struct dm_dev *dev = ti->private; 2135b1d55528SMikulas Patocka struct request_queue *q = bdev_get_queue(dev->bdev); 2136b1d55528SMikulas Patocka 2137b1d55528SMikulas Patocka if (!q->merge_bvec_fn) 2138b1d55528SMikulas Patocka return max_size; 2139b1d55528SMikulas Patocka 2140b1d55528SMikulas Patocka bvm->bi_bdev = dev->bdev; 2141b1d55528SMikulas Patocka bvm->bi_sector = bvm->bi_sector; 2142b1d55528SMikulas Patocka 2143b1d55528SMikulas Patocka return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); 2144b1d55528SMikulas Patocka } 2145b1d55528SMikulas Patocka 21468811f46cSMike Snitzer static int origin_iterate_devices(struct dm_target *ti, 21478811f46cSMike Snitzer iterate_devices_callout_fn fn, void *data) 21488811f46cSMike Snitzer { 21498811f46cSMike Snitzer struct dm_dev *dev = ti->private; 21508811f46cSMike Snitzer 21518811f46cSMike Snitzer return fn(ti, dev, 0, ti->len, data); 21528811f46cSMike Snitzer } 21538811f46cSMike Snitzer 21541da177e4SLinus Torvalds static struct target_type origin_target = { 21551da177e4SLinus Torvalds .name = "snapshot-origin", 2156b83b2f29SMike Snitzer .version = {1, 7, 1}, 21571da177e4SLinus Torvalds .module = THIS_MODULE, 21581da177e4SLinus Torvalds .ctr = origin_ctr, 21591da177e4SLinus Torvalds .dtr = origin_dtr, 21601da177e4SLinus Torvalds .map = origin_map, 21611da177e4SLinus Torvalds .resume = origin_resume, 21621da177e4SLinus Torvalds .status = origin_status, 2163b1d55528SMikulas Patocka .merge = origin_merge, 21648811f46cSMike Snitzer .iterate_devices = origin_iterate_devices, 21651da177e4SLinus Torvalds }; 21661da177e4SLinus Torvalds 21671da177e4SLinus Torvalds static struct target_type snapshot_target = { 21681da177e4SLinus Torvalds .name = "snapshot", 2169b83b2f29SMike Snitzer .version = {1, 10, 0}, 21701da177e4SLinus Torvalds .module = THIS_MODULE, 21711da177e4SLinus Torvalds .ctr = snapshot_ctr, 21721da177e4SLinus Torvalds .dtr = snapshot_dtr, 21731da177e4SLinus Torvalds .map = snapshot_map, 2174cd45daffSMikulas Patocka .end_io = snapshot_end_io, 2175c1f0c183SMike Snitzer .preresume = snapshot_preresume, 21761da177e4SLinus Torvalds .resume = snapshot_resume, 21771da177e4SLinus Torvalds .status = snapshot_status, 21788811f46cSMike Snitzer .iterate_devices = snapshot_iterate_devices, 21791da177e4SLinus Torvalds }; 21801da177e4SLinus Torvalds 2181d698aa45SMikulas Patocka static struct target_type merge_target = { 2182d698aa45SMikulas Patocka .name = dm_snapshot_merge_target_name, 2183b83b2f29SMike Snitzer .version = {1, 1, 0}, 2184d698aa45SMikulas Patocka .module = THIS_MODULE, 2185d698aa45SMikulas Patocka .ctr = snapshot_ctr, 2186d698aa45SMikulas Patocka .dtr = snapshot_dtr, 21873452c2a1SMikulas Patocka .map = snapshot_merge_map, 2188d698aa45SMikulas Patocka .end_io = snapshot_end_io, 21891e03f97eSMikulas Patocka .presuspend = snapshot_merge_presuspend, 2190d698aa45SMikulas Patocka .preresume = snapshot_preresume, 21911e03f97eSMikulas Patocka .resume = snapshot_merge_resume, 2192d698aa45SMikulas Patocka .status = snapshot_status, 2193d698aa45SMikulas Patocka .iterate_devices = snapshot_iterate_devices, 2194d698aa45SMikulas Patocka }; 2195d698aa45SMikulas Patocka 21961da177e4SLinus Torvalds static int __init dm_snapshot_init(void) 21971da177e4SLinus Torvalds { 21981da177e4SLinus Torvalds int r; 21991da177e4SLinus Torvalds 22004db6bfe0SAlasdair G Kergon r = dm_exception_store_init(); 22014db6bfe0SAlasdair G Kergon if (r) { 22024db6bfe0SAlasdair G Kergon DMERR("Failed to initialize exception stores"); 22034db6bfe0SAlasdair G Kergon return r; 22044db6bfe0SAlasdair G Kergon } 22054db6bfe0SAlasdair G Kergon 22061da177e4SLinus Torvalds r = dm_register_target(&snapshot_target); 2207d698aa45SMikulas Patocka if (r < 0) { 22081da177e4SLinus Torvalds DMERR("snapshot target register failed %d", r); 2209034a186dSJonathan Brassow goto bad_register_snapshot_target; 22101da177e4SLinus Torvalds } 22111da177e4SLinus Torvalds 22121da177e4SLinus Torvalds r = dm_register_target(&origin_target); 22131da177e4SLinus Torvalds if (r < 0) { 221472d94861SAlasdair G Kergon DMERR("Origin target register failed %d", r); 2215d698aa45SMikulas Patocka goto bad_register_origin_target; 2216d698aa45SMikulas Patocka } 2217d698aa45SMikulas Patocka 2218d698aa45SMikulas Patocka r = dm_register_target(&merge_target); 2219d698aa45SMikulas Patocka if (r < 0) { 2220d698aa45SMikulas Patocka DMERR("Merge target register failed %d", r); 2221d698aa45SMikulas Patocka goto bad_register_merge_target; 22221da177e4SLinus Torvalds } 22231da177e4SLinus Torvalds 22241da177e4SLinus Torvalds r = init_origin_hash(); 22251da177e4SLinus Torvalds if (r) { 22261da177e4SLinus Torvalds DMERR("init_origin_hash failed."); 2227d698aa45SMikulas Patocka goto bad_origin_hash; 22281da177e4SLinus Torvalds } 22291da177e4SLinus Torvalds 22301d4989c8SJon Brassow exception_cache = KMEM_CACHE(dm_exception, 0); 22311da177e4SLinus Torvalds if (!exception_cache) { 22321da177e4SLinus Torvalds DMERR("Couldn't create exception cache."); 22331da177e4SLinus Torvalds r = -ENOMEM; 2234d698aa45SMikulas Patocka goto bad_exception_cache; 22351da177e4SLinus Torvalds } 22361da177e4SLinus Torvalds 2237028867acSAlasdair G Kergon pending_cache = KMEM_CACHE(dm_snap_pending_exception, 0); 22381da177e4SLinus Torvalds if (!pending_cache) { 22391da177e4SLinus Torvalds DMERR("Couldn't create pending cache."); 22401da177e4SLinus Torvalds r = -ENOMEM; 2241d698aa45SMikulas Patocka goto bad_pending_cache; 22421da177e4SLinus Torvalds } 22431da177e4SLinus Torvalds 2244cd45daffSMikulas Patocka tracked_chunk_cache = KMEM_CACHE(dm_snap_tracked_chunk, 0); 2245cd45daffSMikulas Patocka if (!tracked_chunk_cache) { 2246cd45daffSMikulas Patocka DMERR("Couldn't create cache to track chunks in use."); 2247cd45daffSMikulas Patocka r = -ENOMEM; 2248d698aa45SMikulas Patocka goto bad_tracked_chunk_cache; 2249cd45daffSMikulas Patocka } 2250cd45daffSMikulas Patocka 22511da177e4SLinus Torvalds return 0; 22521da177e4SLinus Torvalds 2253d698aa45SMikulas Patocka bad_tracked_chunk_cache: 22541da177e4SLinus Torvalds kmem_cache_destroy(pending_cache); 2255d698aa45SMikulas Patocka bad_pending_cache: 22561da177e4SLinus Torvalds kmem_cache_destroy(exception_cache); 2257d698aa45SMikulas Patocka bad_exception_cache: 22581da177e4SLinus Torvalds exit_origin_hash(); 2259d698aa45SMikulas Patocka bad_origin_hash: 2260d698aa45SMikulas Patocka dm_unregister_target(&merge_target); 2261d698aa45SMikulas Patocka bad_register_merge_target: 22621da177e4SLinus Torvalds dm_unregister_target(&origin_target); 2263d698aa45SMikulas Patocka bad_register_origin_target: 22641da177e4SLinus Torvalds dm_unregister_target(&snapshot_target); 2265034a186dSJonathan Brassow bad_register_snapshot_target: 2266034a186dSJonathan Brassow dm_exception_store_exit(); 2267d698aa45SMikulas Patocka 22681da177e4SLinus Torvalds return r; 22691da177e4SLinus Torvalds } 22701da177e4SLinus Torvalds 22711da177e4SLinus Torvalds static void __exit dm_snapshot_exit(void) 22721da177e4SLinus Torvalds { 227310d3bd09SMikulas Patocka dm_unregister_target(&snapshot_target); 227410d3bd09SMikulas Patocka dm_unregister_target(&origin_target); 2275d698aa45SMikulas Patocka dm_unregister_target(&merge_target); 22761da177e4SLinus Torvalds 22771da177e4SLinus Torvalds exit_origin_hash(); 22781da177e4SLinus Torvalds kmem_cache_destroy(pending_cache); 22791da177e4SLinus Torvalds kmem_cache_destroy(exception_cache); 2280cd45daffSMikulas Patocka kmem_cache_destroy(tracked_chunk_cache); 22814db6bfe0SAlasdair G Kergon 22824db6bfe0SAlasdair G Kergon dm_exception_store_exit(); 22831da177e4SLinus Torvalds } 22841da177e4SLinus Torvalds 22851da177e4SLinus Torvalds /* Module hooks */ 22861da177e4SLinus Torvalds module_init(dm_snapshot_init); 22871da177e4SLinus Torvalds module_exit(dm_snapshot_exit); 22881da177e4SLinus Torvalds 22891da177e4SLinus Torvalds MODULE_DESCRIPTION(DM_NAME " snapshot target"); 22901da177e4SLinus Torvalds MODULE_AUTHOR("Joe Thornber"); 22911da177e4SLinus Torvalds MODULE_LICENSE("GPL"); 2292