11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * dm-snapshot.c 31da177e4SLinus Torvalds * 41da177e4SLinus Torvalds * Copyright (C) 2001-2002 Sistina Software (UK) Limited. 51da177e4SLinus Torvalds * 61da177e4SLinus Torvalds * This file is released under the GPL. 71da177e4SLinus Torvalds */ 81da177e4SLinus Torvalds 91da177e4SLinus Torvalds #include <linux/blkdev.h> 101da177e4SLinus Torvalds #include <linux/device-mapper.h> 1190fa1527SMikulas Patocka #include <linux/delay.h> 121da177e4SLinus Torvalds #include <linux/fs.h> 131da177e4SLinus Torvalds #include <linux/init.h> 141da177e4SLinus Torvalds #include <linux/kdev_t.h> 151da177e4SLinus Torvalds #include <linux/list.h> 161da177e4SLinus Torvalds #include <linux/mempool.h> 171da177e4SLinus Torvalds #include <linux/module.h> 181da177e4SLinus Torvalds #include <linux/slab.h> 191da177e4SLinus Torvalds #include <linux/vmalloc.h> 206f3c3f0aSvignesh babu #include <linux/log2.h> 21a765e20eSAlasdair G Kergon #include <linux/dm-kcopyd.h> 221da177e4SLinus Torvalds 23aea53d92SJonathan Brassow #include "dm-exception-store.h" 241da177e4SLinus Torvalds 2572d94861SAlasdair G Kergon #define DM_MSG_PREFIX "snapshots" 2672d94861SAlasdair G Kergon 27d698aa45SMikulas Patocka static const char dm_snapshot_merge_target_name[] = "snapshot-merge"; 28d698aa45SMikulas Patocka 29d698aa45SMikulas Patocka #define dm_target_is_snapshot_merge(ti) \ 30d698aa45SMikulas Patocka ((ti)->type->name == dm_snapshot_merge_target_name) 31d698aa45SMikulas Patocka 321da177e4SLinus Torvalds /* 33cd45daffSMikulas Patocka * The size of the mempool used to track chunks in use. 34cd45daffSMikulas Patocka */ 35cd45daffSMikulas Patocka #define MIN_IOS 256 36cd45daffSMikulas Patocka 37ccc45ea8SJonathan Brassow #define DM_TRACKED_CHUNK_HASH_SIZE 16 38ccc45ea8SJonathan Brassow #define DM_TRACKED_CHUNK_HASH(x) ((unsigned long)(x) & \ 39ccc45ea8SJonathan Brassow (DM_TRACKED_CHUNK_HASH_SIZE - 1)) 40ccc45ea8SJonathan Brassow 41191437a5SJon Brassow struct dm_exception_table { 42ccc45ea8SJonathan Brassow uint32_t hash_mask; 43ccc45ea8SJonathan Brassow unsigned hash_shift; 44ccc45ea8SJonathan Brassow struct list_head *table; 45ccc45ea8SJonathan Brassow }; 46ccc45ea8SJonathan Brassow 47ccc45ea8SJonathan Brassow struct dm_snapshot { 48ccc45ea8SJonathan Brassow struct rw_semaphore lock; 49ccc45ea8SJonathan Brassow 50ccc45ea8SJonathan Brassow struct dm_dev *origin; 51fc56f6fbSMike Snitzer struct dm_dev *cow; 52fc56f6fbSMike Snitzer 53fc56f6fbSMike Snitzer struct dm_target *ti; 54ccc45ea8SJonathan Brassow 55ccc45ea8SJonathan Brassow /* List of snapshots per Origin */ 56ccc45ea8SJonathan Brassow struct list_head list; 57ccc45ea8SJonathan Brassow 58d8ddb1cfSMike Snitzer /* 59d8ddb1cfSMike Snitzer * You can't use a snapshot if this is 0 (e.g. if full). 60d8ddb1cfSMike Snitzer * A snapshot-merge target never clears this. 61d8ddb1cfSMike Snitzer */ 62ccc45ea8SJonathan Brassow int valid; 63ccc45ea8SJonathan Brassow 64ccc45ea8SJonathan Brassow /* Origin writes don't trigger exceptions until this is set */ 65ccc45ea8SJonathan Brassow int active; 66ccc45ea8SJonathan Brassow 67ccc45ea8SJonathan Brassow atomic_t pending_exceptions_count; 68ccc45ea8SJonathan Brassow 69924e600dSMike Snitzer mempool_t *pending_pool; 70924e600dSMike Snitzer 71191437a5SJon Brassow struct dm_exception_table pending; 72191437a5SJon Brassow struct dm_exception_table complete; 73ccc45ea8SJonathan Brassow 74ccc45ea8SJonathan Brassow /* 75ccc45ea8SJonathan Brassow * pe_lock protects all pending_exception operations and access 76ccc45ea8SJonathan Brassow * as well as the snapshot_bios list. 77ccc45ea8SJonathan Brassow */ 78ccc45ea8SJonathan Brassow spinlock_t pe_lock; 79ccc45ea8SJonathan Brassow 80924e600dSMike Snitzer /* Chunks with outstanding reads */ 81924e600dSMike Snitzer spinlock_t tracked_chunk_lock; 82924e600dSMike Snitzer struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE]; 83924e600dSMike Snitzer 84ccc45ea8SJonathan Brassow /* The on disk metadata handler */ 85ccc45ea8SJonathan Brassow struct dm_exception_store *store; 86ccc45ea8SJonathan Brassow 87ccc45ea8SJonathan Brassow struct dm_kcopyd_client *kcopyd_client; 88ccc45ea8SJonathan Brassow 89924e600dSMike Snitzer /* Wait for events based on state_bits */ 90924e600dSMike Snitzer unsigned long state_bits; 91924e600dSMike Snitzer 92924e600dSMike Snitzer /* Range of chunks currently being merged. */ 93924e600dSMike Snitzer chunk_t first_merging_chunk; 94924e600dSMike Snitzer int num_merging_chunks; 951e03f97eSMikulas Patocka 96d8ddb1cfSMike Snitzer /* 97d8ddb1cfSMike Snitzer * The merge operation failed if this flag is set. 98d8ddb1cfSMike Snitzer * Failure modes are handled as follows: 99d8ddb1cfSMike Snitzer * - I/O error reading the header 100d8ddb1cfSMike Snitzer * => don't load the target; abort. 101d8ddb1cfSMike Snitzer * - Header does not have "valid" flag set 102d8ddb1cfSMike Snitzer * => use the origin; forget about the snapshot. 103d8ddb1cfSMike Snitzer * - I/O error when reading exceptions 104d8ddb1cfSMike Snitzer * => don't load the target; abort. 105d8ddb1cfSMike Snitzer * (We can't use the intermediate origin state.) 106d8ddb1cfSMike Snitzer * - I/O error while merging 107d8ddb1cfSMike Snitzer * => stop merging; set merge_failed; process I/O normally. 108d8ddb1cfSMike Snitzer */ 109d8ddb1cfSMike Snitzer int merge_failed; 110d8ddb1cfSMike Snitzer 1119fe86254SMikulas Patocka /* 1129fe86254SMikulas Patocka * Incoming bios that overlap with chunks being merged must wait 1139fe86254SMikulas Patocka * for them to be committed. 1149fe86254SMikulas Patocka */ 1159fe86254SMikulas Patocka struct bio_list bios_queued_during_merge; 116ccc45ea8SJonathan Brassow }; 117ccc45ea8SJonathan Brassow 1181e03f97eSMikulas Patocka /* 1191e03f97eSMikulas Patocka * state_bits: 1201e03f97eSMikulas Patocka * RUNNING_MERGE - Merge operation is in progress. 1211e03f97eSMikulas Patocka * SHUTDOWN_MERGE - Set to signal that merge needs to be stopped; 1221e03f97eSMikulas Patocka * cleared afterwards. 1231e03f97eSMikulas Patocka */ 1241e03f97eSMikulas Patocka #define RUNNING_MERGE 0 1251e03f97eSMikulas Patocka #define SHUTDOWN_MERGE 1 1261e03f97eSMikulas Patocka 127c2411045SMikulas Patocka struct dm_dev *dm_snap_origin(struct dm_snapshot *s) 128c2411045SMikulas Patocka { 129c2411045SMikulas Patocka return s->origin; 130c2411045SMikulas Patocka } 131c2411045SMikulas Patocka EXPORT_SYMBOL(dm_snap_origin); 132c2411045SMikulas Patocka 133fc56f6fbSMike Snitzer struct dm_dev *dm_snap_cow(struct dm_snapshot *s) 134fc56f6fbSMike Snitzer { 135fc56f6fbSMike Snitzer return s->cow; 136fc56f6fbSMike Snitzer } 137fc56f6fbSMike Snitzer EXPORT_SYMBOL(dm_snap_cow); 138fc56f6fbSMike Snitzer 139ccc45ea8SJonathan Brassow static sector_t chunk_to_sector(struct dm_exception_store *store, 140ccc45ea8SJonathan Brassow chunk_t chunk) 141ccc45ea8SJonathan Brassow { 142ccc45ea8SJonathan Brassow return chunk << store->chunk_shift; 143ccc45ea8SJonathan Brassow } 144ccc45ea8SJonathan Brassow 145ccc45ea8SJonathan Brassow static int bdev_equal(struct block_device *lhs, struct block_device *rhs) 146ccc45ea8SJonathan Brassow { 147ccc45ea8SJonathan Brassow /* 148ccc45ea8SJonathan Brassow * There is only ever one instance of a particular block 149ccc45ea8SJonathan Brassow * device so we can compare pointers safely. 150ccc45ea8SJonathan Brassow */ 151ccc45ea8SJonathan Brassow return lhs == rhs; 152ccc45ea8SJonathan Brassow } 153ccc45ea8SJonathan Brassow 154028867acSAlasdair G Kergon struct dm_snap_pending_exception { 1551d4989c8SJon Brassow struct dm_exception e; 1561da177e4SLinus Torvalds 1571da177e4SLinus Torvalds /* 1581da177e4SLinus Torvalds * Origin buffers waiting for this to complete are held 1591da177e4SLinus Torvalds * in a bio list 1601da177e4SLinus Torvalds */ 1611da177e4SLinus Torvalds struct bio_list origin_bios; 1621da177e4SLinus Torvalds struct bio_list snapshot_bios; 1631da177e4SLinus Torvalds 1641da177e4SLinus Torvalds /* Pointer back to snapshot context */ 1651da177e4SLinus Torvalds struct dm_snapshot *snap; 1661da177e4SLinus Torvalds 1671da177e4SLinus Torvalds /* 1681da177e4SLinus Torvalds * 1 indicates the exception has already been sent to 1691da177e4SLinus Torvalds * kcopyd. 1701da177e4SLinus Torvalds */ 1711da177e4SLinus Torvalds int started; 172a6e50b40SMikulas Patocka 173a6e50b40SMikulas Patocka /* 174a6e50b40SMikulas Patocka * For writing a complete chunk, bypassing the copy. 175a6e50b40SMikulas Patocka */ 176a6e50b40SMikulas Patocka struct bio *full_bio; 177a6e50b40SMikulas Patocka bio_end_io_t *full_bio_end_io; 178a6e50b40SMikulas Patocka void *full_bio_private; 1791da177e4SLinus Torvalds }; 1801da177e4SLinus Torvalds 1811da177e4SLinus Torvalds /* 1821da177e4SLinus Torvalds * Hash table mapping origin volumes to lists of snapshots and 1831da177e4SLinus Torvalds * a lock to protect it 1841da177e4SLinus Torvalds */ 185e18b890bSChristoph Lameter static struct kmem_cache *exception_cache; 186e18b890bSChristoph Lameter static struct kmem_cache *pending_cache; 1871da177e4SLinus Torvalds 188cd45daffSMikulas Patocka struct dm_snap_tracked_chunk { 189cd45daffSMikulas Patocka struct hlist_node node; 190cd45daffSMikulas Patocka chunk_t chunk; 191cd45daffSMikulas Patocka }; 192cd45daffSMikulas Patocka 193ee18026aSMikulas Patocka static void init_tracked_chunk(struct bio *bio) 194ee18026aSMikulas Patocka { 195ee18026aSMikulas Patocka struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk)); 196ee18026aSMikulas Patocka INIT_HLIST_NODE(&c->node); 197ee18026aSMikulas Patocka } 198ee18026aSMikulas Patocka 199ee18026aSMikulas Patocka static bool is_bio_tracked(struct bio *bio) 200ee18026aSMikulas Patocka { 201ee18026aSMikulas Patocka struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk)); 202ee18026aSMikulas Patocka return !hlist_unhashed(&c->node); 203ee18026aSMikulas Patocka } 204ee18026aSMikulas Patocka 205ee18026aSMikulas Patocka static void track_chunk(struct dm_snapshot *s, struct bio *bio, chunk_t chunk) 206cd45daffSMikulas Patocka { 20742bc954fSMikulas Patocka struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk)); 208cd45daffSMikulas Patocka 209cd45daffSMikulas Patocka c->chunk = chunk; 210cd45daffSMikulas Patocka 2119aa0c0e6SMikulas Patocka spin_lock_irq(&s->tracked_chunk_lock); 212cd45daffSMikulas Patocka hlist_add_head(&c->node, 213cd45daffSMikulas Patocka &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)]); 2149aa0c0e6SMikulas Patocka spin_unlock_irq(&s->tracked_chunk_lock); 215cd45daffSMikulas Patocka } 216cd45daffSMikulas Patocka 217ee18026aSMikulas Patocka static void stop_tracking_chunk(struct dm_snapshot *s, struct bio *bio) 218cd45daffSMikulas Patocka { 219ee18026aSMikulas Patocka struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk)); 220cd45daffSMikulas Patocka unsigned long flags; 221cd45daffSMikulas Patocka 222cd45daffSMikulas Patocka spin_lock_irqsave(&s->tracked_chunk_lock, flags); 223cd45daffSMikulas Patocka hlist_del(&c->node); 224cd45daffSMikulas Patocka spin_unlock_irqrestore(&s->tracked_chunk_lock, flags); 225cd45daffSMikulas Patocka } 226cd45daffSMikulas Patocka 227a8d41b59SMikulas Patocka static int __chunk_is_tracked(struct dm_snapshot *s, chunk_t chunk) 228a8d41b59SMikulas Patocka { 229a8d41b59SMikulas Patocka struct dm_snap_tracked_chunk *c; 230a8d41b59SMikulas Patocka int found = 0; 231a8d41b59SMikulas Patocka 232a8d41b59SMikulas Patocka spin_lock_irq(&s->tracked_chunk_lock); 233a8d41b59SMikulas Patocka 234b67bfe0dSSasha Levin hlist_for_each_entry(c, 235a8d41b59SMikulas Patocka &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)], node) { 236a8d41b59SMikulas Patocka if (c->chunk == chunk) { 237a8d41b59SMikulas Patocka found = 1; 238a8d41b59SMikulas Patocka break; 239a8d41b59SMikulas Patocka } 240a8d41b59SMikulas Patocka } 241a8d41b59SMikulas Patocka 242a8d41b59SMikulas Patocka spin_unlock_irq(&s->tracked_chunk_lock); 243a8d41b59SMikulas Patocka 244a8d41b59SMikulas Patocka return found; 245a8d41b59SMikulas Patocka } 246a8d41b59SMikulas Patocka 2471da177e4SLinus Torvalds /* 248615d1eb9SMike Snitzer * This conflicting I/O is extremely improbable in the caller, 249615d1eb9SMike Snitzer * so msleep(1) is sufficient and there is no need for a wait queue. 250615d1eb9SMike Snitzer */ 251615d1eb9SMike Snitzer static void __check_for_conflicting_io(struct dm_snapshot *s, chunk_t chunk) 252615d1eb9SMike Snitzer { 253615d1eb9SMike Snitzer while (__chunk_is_tracked(s, chunk)) 254615d1eb9SMike Snitzer msleep(1); 255615d1eb9SMike Snitzer } 256615d1eb9SMike Snitzer 257615d1eb9SMike Snitzer /* 2581da177e4SLinus Torvalds * One of these per registered origin, held in the snapshot_origins hash 2591da177e4SLinus Torvalds */ 2601da177e4SLinus Torvalds struct origin { 2611da177e4SLinus Torvalds /* The origin device */ 2621da177e4SLinus Torvalds struct block_device *bdev; 2631da177e4SLinus Torvalds 2641da177e4SLinus Torvalds struct list_head hash_list; 2651da177e4SLinus Torvalds 2661da177e4SLinus Torvalds /* List of snapshots for this origin */ 2671da177e4SLinus Torvalds struct list_head snapshots; 2681da177e4SLinus Torvalds }; 2691da177e4SLinus Torvalds 2701da177e4SLinus Torvalds /* 2711da177e4SLinus Torvalds * Size of the hash table for origin volumes. If we make this 2721da177e4SLinus Torvalds * the size of the minors list then it should be nearly perfect 2731da177e4SLinus Torvalds */ 2741da177e4SLinus Torvalds #define ORIGIN_HASH_SIZE 256 2751da177e4SLinus Torvalds #define ORIGIN_MASK 0xFF 2761da177e4SLinus Torvalds static struct list_head *_origins; 2771da177e4SLinus Torvalds static struct rw_semaphore _origins_lock; 2781da177e4SLinus Torvalds 27973dfd078SMikulas Patocka static DECLARE_WAIT_QUEUE_HEAD(_pending_exceptions_done); 28073dfd078SMikulas Patocka static DEFINE_SPINLOCK(_pending_exceptions_done_spinlock); 28173dfd078SMikulas Patocka static uint64_t _pending_exceptions_done_count; 28273dfd078SMikulas Patocka 2831da177e4SLinus Torvalds static int init_origin_hash(void) 2841da177e4SLinus Torvalds { 2851da177e4SLinus Torvalds int i; 2861da177e4SLinus Torvalds 2871da177e4SLinus Torvalds _origins = kmalloc(ORIGIN_HASH_SIZE * sizeof(struct list_head), 2881da177e4SLinus Torvalds GFP_KERNEL); 2891da177e4SLinus Torvalds if (!_origins) { 29072d94861SAlasdair G Kergon DMERR("unable to allocate memory"); 2911da177e4SLinus Torvalds return -ENOMEM; 2921da177e4SLinus Torvalds } 2931da177e4SLinus Torvalds 2941da177e4SLinus Torvalds for (i = 0; i < ORIGIN_HASH_SIZE; i++) 2951da177e4SLinus Torvalds INIT_LIST_HEAD(_origins + i); 2961da177e4SLinus Torvalds init_rwsem(&_origins_lock); 2971da177e4SLinus Torvalds 2981da177e4SLinus Torvalds return 0; 2991da177e4SLinus Torvalds } 3001da177e4SLinus Torvalds 3011da177e4SLinus Torvalds static void exit_origin_hash(void) 3021da177e4SLinus Torvalds { 3031da177e4SLinus Torvalds kfree(_origins); 3041da177e4SLinus Torvalds } 3051da177e4SLinus Torvalds 306028867acSAlasdair G Kergon static unsigned origin_hash(struct block_device *bdev) 3071da177e4SLinus Torvalds { 3081da177e4SLinus Torvalds return bdev->bd_dev & ORIGIN_MASK; 3091da177e4SLinus Torvalds } 3101da177e4SLinus Torvalds 3111da177e4SLinus Torvalds static struct origin *__lookup_origin(struct block_device *origin) 3121da177e4SLinus Torvalds { 3131da177e4SLinus Torvalds struct list_head *ol; 3141da177e4SLinus Torvalds struct origin *o; 3151da177e4SLinus Torvalds 3161da177e4SLinus Torvalds ol = &_origins[origin_hash(origin)]; 3171da177e4SLinus Torvalds list_for_each_entry (o, ol, hash_list) 3181da177e4SLinus Torvalds if (bdev_equal(o->bdev, origin)) 3191da177e4SLinus Torvalds return o; 3201da177e4SLinus Torvalds 3211da177e4SLinus Torvalds return NULL; 3221da177e4SLinus Torvalds } 3231da177e4SLinus Torvalds 3241da177e4SLinus Torvalds static void __insert_origin(struct origin *o) 3251da177e4SLinus Torvalds { 3261da177e4SLinus Torvalds struct list_head *sl = &_origins[origin_hash(o->bdev)]; 3271da177e4SLinus Torvalds list_add_tail(&o->hash_list, sl); 3281da177e4SLinus Torvalds } 3291da177e4SLinus Torvalds 3301da177e4SLinus Torvalds /* 331c1f0c183SMike Snitzer * _origins_lock must be held when calling this function. 332c1f0c183SMike Snitzer * Returns number of snapshots registered using the supplied cow device, plus: 333c1f0c183SMike Snitzer * snap_src - a snapshot suitable for use as a source of exception handover 334c1f0c183SMike Snitzer * snap_dest - a snapshot capable of receiving exception handover. 3359d3b15c4SMikulas Patocka * snap_merge - an existing snapshot-merge target linked to the same origin. 3369d3b15c4SMikulas Patocka * There can be at most one snapshot-merge target. The parameter is optional. 337c1f0c183SMike Snitzer * 3389d3b15c4SMikulas Patocka * Possible return values and states of snap_src and snap_dest. 339c1f0c183SMike Snitzer * 0: NULL, NULL - first new snapshot 340c1f0c183SMike Snitzer * 1: snap_src, NULL - normal snapshot 341c1f0c183SMike Snitzer * 2: snap_src, snap_dest - waiting for handover 342c1f0c183SMike Snitzer * 2: snap_src, NULL - handed over, waiting for old to be deleted 343c1f0c183SMike Snitzer * 1: NULL, snap_dest - source got destroyed without handover 344c1f0c183SMike Snitzer */ 345c1f0c183SMike Snitzer static int __find_snapshots_sharing_cow(struct dm_snapshot *snap, 346c1f0c183SMike Snitzer struct dm_snapshot **snap_src, 3479d3b15c4SMikulas Patocka struct dm_snapshot **snap_dest, 3489d3b15c4SMikulas Patocka struct dm_snapshot **snap_merge) 349c1f0c183SMike Snitzer { 350c1f0c183SMike Snitzer struct dm_snapshot *s; 351c1f0c183SMike Snitzer struct origin *o; 352c1f0c183SMike Snitzer int count = 0; 353c1f0c183SMike Snitzer int active; 354c1f0c183SMike Snitzer 355c1f0c183SMike Snitzer o = __lookup_origin(snap->origin->bdev); 356c1f0c183SMike Snitzer if (!o) 357c1f0c183SMike Snitzer goto out; 358c1f0c183SMike Snitzer 359c1f0c183SMike Snitzer list_for_each_entry(s, &o->snapshots, list) { 3609d3b15c4SMikulas Patocka if (dm_target_is_snapshot_merge(s->ti) && snap_merge) 3619d3b15c4SMikulas Patocka *snap_merge = s; 362c1f0c183SMike Snitzer if (!bdev_equal(s->cow->bdev, snap->cow->bdev)) 363c1f0c183SMike Snitzer continue; 364c1f0c183SMike Snitzer 365c1f0c183SMike Snitzer down_read(&s->lock); 366c1f0c183SMike Snitzer active = s->active; 367c1f0c183SMike Snitzer up_read(&s->lock); 368c1f0c183SMike Snitzer 369c1f0c183SMike Snitzer if (active) { 370c1f0c183SMike Snitzer if (snap_src) 371c1f0c183SMike Snitzer *snap_src = s; 372c1f0c183SMike Snitzer } else if (snap_dest) 373c1f0c183SMike Snitzer *snap_dest = s; 374c1f0c183SMike Snitzer 375c1f0c183SMike Snitzer count++; 376c1f0c183SMike Snitzer } 377c1f0c183SMike Snitzer 378c1f0c183SMike Snitzer out: 379c1f0c183SMike Snitzer return count; 380c1f0c183SMike Snitzer } 381c1f0c183SMike Snitzer 382c1f0c183SMike Snitzer /* 383c1f0c183SMike Snitzer * On success, returns 1 if this snapshot is a handover destination, 384c1f0c183SMike Snitzer * otherwise returns 0. 385c1f0c183SMike Snitzer */ 386c1f0c183SMike Snitzer static int __validate_exception_handover(struct dm_snapshot *snap) 387c1f0c183SMike Snitzer { 388c1f0c183SMike Snitzer struct dm_snapshot *snap_src = NULL, *snap_dest = NULL; 3899d3b15c4SMikulas Patocka struct dm_snapshot *snap_merge = NULL; 390c1f0c183SMike Snitzer 391c1f0c183SMike Snitzer /* Does snapshot need exceptions handed over to it? */ 3929d3b15c4SMikulas Patocka if ((__find_snapshots_sharing_cow(snap, &snap_src, &snap_dest, 3939d3b15c4SMikulas Patocka &snap_merge) == 2) || 394c1f0c183SMike Snitzer snap_dest) { 395c1f0c183SMike Snitzer snap->ti->error = "Snapshot cow pairing for exception " 396c1f0c183SMike Snitzer "table handover failed"; 397c1f0c183SMike Snitzer return -EINVAL; 398c1f0c183SMike Snitzer } 399c1f0c183SMike Snitzer 400c1f0c183SMike Snitzer /* 401c1f0c183SMike Snitzer * If no snap_src was found, snap cannot become a handover 402c1f0c183SMike Snitzer * destination. 403c1f0c183SMike Snitzer */ 404c1f0c183SMike Snitzer if (!snap_src) 405c1f0c183SMike Snitzer return 0; 406c1f0c183SMike Snitzer 4079d3b15c4SMikulas Patocka /* 4089d3b15c4SMikulas Patocka * Non-snapshot-merge handover? 4099d3b15c4SMikulas Patocka */ 4109d3b15c4SMikulas Patocka if (!dm_target_is_snapshot_merge(snap->ti)) 4119d3b15c4SMikulas Patocka return 1; 4129d3b15c4SMikulas Patocka 4139d3b15c4SMikulas Patocka /* 4149d3b15c4SMikulas Patocka * Do not allow more than one merging snapshot. 4159d3b15c4SMikulas Patocka */ 4169d3b15c4SMikulas Patocka if (snap_merge) { 4179d3b15c4SMikulas Patocka snap->ti->error = "A snapshot is already merging."; 4189d3b15c4SMikulas Patocka return -EINVAL; 4199d3b15c4SMikulas Patocka } 4209d3b15c4SMikulas Patocka 4211e03f97eSMikulas Patocka if (!snap_src->store->type->prepare_merge || 4221e03f97eSMikulas Patocka !snap_src->store->type->commit_merge) { 4231e03f97eSMikulas Patocka snap->ti->error = "Snapshot exception store does not " 4241e03f97eSMikulas Patocka "support snapshot-merge."; 4251e03f97eSMikulas Patocka return -EINVAL; 4261e03f97eSMikulas Patocka } 4271e03f97eSMikulas Patocka 428c1f0c183SMike Snitzer return 1; 429c1f0c183SMike Snitzer } 430c1f0c183SMike Snitzer 431c1f0c183SMike Snitzer static void __insert_snapshot(struct origin *o, struct dm_snapshot *s) 432c1f0c183SMike Snitzer { 433c1f0c183SMike Snitzer struct dm_snapshot *l; 434c1f0c183SMike Snitzer 435c1f0c183SMike Snitzer /* Sort the list according to chunk size, largest-first smallest-last */ 436c1f0c183SMike Snitzer list_for_each_entry(l, &o->snapshots, list) 437c1f0c183SMike Snitzer if (l->store->chunk_size < s->store->chunk_size) 438c1f0c183SMike Snitzer break; 439c1f0c183SMike Snitzer list_add_tail(&s->list, &l->list); 440c1f0c183SMike Snitzer } 441c1f0c183SMike Snitzer 442c1f0c183SMike Snitzer /* 4431da177e4SLinus Torvalds * Make a note of the snapshot and its origin so we can look it 4441da177e4SLinus Torvalds * up when the origin has a write on it. 445c1f0c183SMike Snitzer * 446c1f0c183SMike Snitzer * Also validate snapshot exception store handovers. 447c1f0c183SMike Snitzer * On success, returns 1 if this registration is a handover destination, 448c1f0c183SMike Snitzer * otherwise returns 0. 4491da177e4SLinus Torvalds */ 4501da177e4SLinus Torvalds static int register_snapshot(struct dm_snapshot *snap) 4511da177e4SLinus Torvalds { 452c1f0c183SMike Snitzer struct origin *o, *new_o = NULL; 4531da177e4SLinus Torvalds struct block_device *bdev = snap->origin->bdev; 454c1f0c183SMike Snitzer int r = 0; 4551da177e4SLinus Torvalds 45660c856c8SMikulas Patocka new_o = kmalloc(sizeof(*new_o), GFP_KERNEL); 45760c856c8SMikulas Patocka if (!new_o) 45860c856c8SMikulas Patocka return -ENOMEM; 45960c856c8SMikulas Patocka 4601da177e4SLinus Torvalds down_write(&_origins_lock); 4611da177e4SLinus Torvalds 462c1f0c183SMike Snitzer r = __validate_exception_handover(snap); 463c1f0c183SMike Snitzer if (r < 0) { 464c1f0c183SMike Snitzer kfree(new_o); 465c1f0c183SMike Snitzer goto out; 466c1f0c183SMike Snitzer } 467c1f0c183SMike Snitzer 468c1f0c183SMike Snitzer o = __lookup_origin(bdev); 46960c856c8SMikulas Patocka if (o) 47060c856c8SMikulas Patocka kfree(new_o); 47160c856c8SMikulas Patocka else { 4721da177e4SLinus Torvalds /* New origin */ 47360c856c8SMikulas Patocka o = new_o; 4741da177e4SLinus Torvalds 4751da177e4SLinus Torvalds /* Initialise the struct */ 4761da177e4SLinus Torvalds INIT_LIST_HEAD(&o->snapshots); 4771da177e4SLinus Torvalds o->bdev = bdev; 4781da177e4SLinus Torvalds 4791da177e4SLinus Torvalds __insert_origin(o); 4801da177e4SLinus Torvalds } 4811da177e4SLinus Torvalds 482c1f0c183SMike Snitzer __insert_snapshot(o, snap); 483c1f0c183SMike Snitzer 484c1f0c183SMike Snitzer out: 485c1f0c183SMike Snitzer up_write(&_origins_lock); 486c1f0c183SMike Snitzer 487c1f0c183SMike Snitzer return r; 488c1f0c183SMike Snitzer } 489c1f0c183SMike Snitzer 490c1f0c183SMike Snitzer /* 491c1f0c183SMike Snitzer * Move snapshot to correct place in list according to chunk size. 492c1f0c183SMike Snitzer */ 493c1f0c183SMike Snitzer static void reregister_snapshot(struct dm_snapshot *s) 494c1f0c183SMike Snitzer { 495c1f0c183SMike Snitzer struct block_device *bdev = s->origin->bdev; 496c1f0c183SMike Snitzer 497c1f0c183SMike Snitzer down_write(&_origins_lock); 498c1f0c183SMike Snitzer 499c1f0c183SMike Snitzer list_del(&s->list); 500c1f0c183SMike Snitzer __insert_snapshot(__lookup_origin(bdev), s); 5011da177e4SLinus Torvalds 5021da177e4SLinus Torvalds up_write(&_origins_lock); 5031da177e4SLinus Torvalds } 5041da177e4SLinus Torvalds 5051da177e4SLinus Torvalds static void unregister_snapshot(struct dm_snapshot *s) 5061da177e4SLinus Torvalds { 5071da177e4SLinus Torvalds struct origin *o; 5081da177e4SLinus Torvalds 5091da177e4SLinus Torvalds down_write(&_origins_lock); 5101da177e4SLinus Torvalds o = __lookup_origin(s->origin->bdev); 5111da177e4SLinus Torvalds 5121da177e4SLinus Torvalds list_del(&s->list); 513c1f0c183SMike Snitzer if (o && list_empty(&o->snapshots)) { 5141da177e4SLinus Torvalds list_del(&o->hash_list); 5151da177e4SLinus Torvalds kfree(o); 5161da177e4SLinus Torvalds } 5171da177e4SLinus Torvalds 5181da177e4SLinus Torvalds up_write(&_origins_lock); 5191da177e4SLinus Torvalds } 5201da177e4SLinus Torvalds 5211da177e4SLinus Torvalds /* 5221da177e4SLinus Torvalds * Implementation of the exception hash tables. 523d74f81f8SMilan Broz * The lowest hash_shift bits of the chunk number are ignored, allowing 524d74f81f8SMilan Broz * some consecutive chunks to be grouped together. 5251da177e4SLinus Torvalds */ 5263510cb94SJon Brassow static int dm_exception_table_init(struct dm_exception_table *et, 5273510cb94SJon Brassow uint32_t size, unsigned hash_shift) 5281da177e4SLinus Torvalds { 5291da177e4SLinus Torvalds unsigned int i; 5301da177e4SLinus Torvalds 531d74f81f8SMilan Broz et->hash_shift = hash_shift; 5321da177e4SLinus Torvalds et->hash_mask = size - 1; 5331da177e4SLinus Torvalds et->table = dm_vcalloc(size, sizeof(struct list_head)); 5341da177e4SLinus Torvalds if (!et->table) 5351da177e4SLinus Torvalds return -ENOMEM; 5361da177e4SLinus Torvalds 5371da177e4SLinus Torvalds for (i = 0; i < size; i++) 5381da177e4SLinus Torvalds INIT_LIST_HEAD(et->table + i); 5391da177e4SLinus Torvalds 5401da177e4SLinus Torvalds return 0; 5411da177e4SLinus Torvalds } 5421da177e4SLinus Torvalds 5433510cb94SJon Brassow static void dm_exception_table_exit(struct dm_exception_table *et, 544191437a5SJon Brassow struct kmem_cache *mem) 5451da177e4SLinus Torvalds { 5461da177e4SLinus Torvalds struct list_head *slot; 5471d4989c8SJon Brassow struct dm_exception *ex, *next; 5481da177e4SLinus Torvalds int i, size; 5491da177e4SLinus Torvalds 5501da177e4SLinus Torvalds size = et->hash_mask + 1; 5511da177e4SLinus Torvalds for (i = 0; i < size; i++) { 5521da177e4SLinus Torvalds slot = et->table + i; 5531da177e4SLinus Torvalds 5541da177e4SLinus Torvalds list_for_each_entry_safe (ex, next, slot, hash_list) 5551da177e4SLinus Torvalds kmem_cache_free(mem, ex); 5561da177e4SLinus Torvalds } 5571da177e4SLinus Torvalds 5581da177e4SLinus Torvalds vfree(et->table); 5591da177e4SLinus Torvalds } 5601da177e4SLinus Torvalds 561191437a5SJon Brassow static uint32_t exception_hash(struct dm_exception_table *et, chunk_t chunk) 5621da177e4SLinus Torvalds { 563d74f81f8SMilan Broz return (chunk >> et->hash_shift) & et->hash_mask; 5641da177e4SLinus Torvalds } 5651da177e4SLinus Torvalds 5663510cb94SJon Brassow static void dm_remove_exception(struct dm_exception *e) 5671da177e4SLinus Torvalds { 5681da177e4SLinus Torvalds list_del(&e->hash_list); 5691da177e4SLinus Torvalds } 5701da177e4SLinus Torvalds 5711da177e4SLinus Torvalds /* 5721da177e4SLinus Torvalds * Return the exception data for a sector, or NULL if not 5731da177e4SLinus Torvalds * remapped. 5741da177e4SLinus Torvalds */ 5753510cb94SJon Brassow static struct dm_exception *dm_lookup_exception(struct dm_exception_table *et, 5761da177e4SLinus Torvalds chunk_t chunk) 5771da177e4SLinus Torvalds { 5781da177e4SLinus Torvalds struct list_head *slot; 5791d4989c8SJon Brassow struct dm_exception *e; 5801da177e4SLinus Torvalds 5811da177e4SLinus Torvalds slot = &et->table[exception_hash(et, chunk)]; 5821da177e4SLinus Torvalds list_for_each_entry (e, slot, hash_list) 583d74f81f8SMilan Broz if (chunk >= e->old_chunk && 584d74f81f8SMilan Broz chunk <= e->old_chunk + dm_consecutive_chunk_count(e)) 5851da177e4SLinus Torvalds return e; 5861da177e4SLinus Torvalds 5871da177e4SLinus Torvalds return NULL; 5881da177e4SLinus Torvalds } 5891da177e4SLinus Torvalds 5903510cb94SJon Brassow static struct dm_exception *alloc_completed_exception(void) 5911da177e4SLinus Torvalds { 5921d4989c8SJon Brassow struct dm_exception *e; 5931da177e4SLinus Torvalds 5941da177e4SLinus Torvalds e = kmem_cache_alloc(exception_cache, GFP_NOIO); 5951da177e4SLinus Torvalds if (!e) 5961da177e4SLinus Torvalds e = kmem_cache_alloc(exception_cache, GFP_ATOMIC); 5971da177e4SLinus Torvalds 5981da177e4SLinus Torvalds return e; 5991da177e4SLinus Torvalds } 6001da177e4SLinus Torvalds 6013510cb94SJon Brassow static void free_completed_exception(struct dm_exception *e) 6021da177e4SLinus Torvalds { 6031da177e4SLinus Torvalds kmem_cache_free(exception_cache, e); 6041da177e4SLinus Torvalds } 6051da177e4SLinus Torvalds 60692e86812SMikulas Patocka static struct dm_snap_pending_exception *alloc_pending_exception(struct dm_snapshot *s) 6071da177e4SLinus Torvalds { 60892e86812SMikulas Patocka struct dm_snap_pending_exception *pe = mempool_alloc(s->pending_pool, 60992e86812SMikulas Patocka GFP_NOIO); 61092e86812SMikulas Patocka 611879129d2SMikulas Patocka atomic_inc(&s->pending_exceptions_count); 61292e86812SMikulas Patocka pe->snap = s; 61392e86812SMikulas Patocka 61492e86812SMikulas Patocka return pe; 6151da177e4SLinus Torvalds } 6161da177e4SLinus Torvalds 617028867acSAlasdair G Kergon static void free_pending_exception(struct dm_snap_pending_exception *pe) 6181da177e4SLinus Torvalds { 619879129d2SMikulas Patocka struct dm_snapshot *s = pe->snap; 620879129d2SMikulas Patocka 621879129d2SMikulas Patocka mempool_free(pe, s->pending_pool); 622879129d2SMikulas Patocka smp_mb__before_atomic_dec(); 623879129d2SMikulas Patocka atomic_dec(&s->pending_exceptions_count); 6241da177e4SLinus Torvalds } 6251da177e4SLinus Torvalds 6263510cb94SJon Brassow static void dm_insert_exception(struct dm_exception_table *eh, 6271d4989c8SJon Brassow struct dm_exception *new_e) 628d74f81f8SMilan Broz { 629d74f81f8SMilan Broz struct list_head *l; 6301d4989c8SJon Brassow struct dm_exception *e = NULL; 631d74f81f8SMilan Broz 632d74f81f8SMilan Broz l = &eh->table[exception_hash(eh, new_e->old_chunk)]; 633d74f81f8SMilan Broz 634d74f81f8SMilan Broz /* Add immediately if this table doesn't support consecutive chunks */ 635d74f81f8SMilan Broz if (!eh->hash_shift) 636d74f81f8SMilan Broz goto out; 637d74f81f8SMilan Broz 638d74f81f8SMilan Broz /* List is ordered by old_chunk */ 639d74f81f8SMilan Broz list_for_each_entry_reverse(e, l, hash_list) { 640d74f81f8SMilan Broz /* Insert after an existing chunk? */ 641d74f81f8SMilan Broz if (new_e->old_chunk == (e->old_chunk + 642d74f81f8SMilan Broz dm_consecutive_chunk_count(e) + 1) && 643d74f81f8SMilan Broz new_e->new_chunk == (dm_chunk_number(e->new_chunk) + 644d74f81f8SMilan Broz dm_consecutive_chunk_count(e) + 1)) { 645d74f81f8SMilan Broz dm_consecutive_chunk_count_inc(e); 6463510cb94SJon Brassow free_completed_exception(new_e); 647d74f81f8SMilan Broz return; 648d74f81f8SMilan Broz } 649d74f81f8SMilan Broz 650d74f81f8SMilan Broz /* Insert before an existing chunk? */ 651d74f81f8SMilan Broz if (new_e->old_chunk == (e->old_chunk - 1) && 652d74f81f8SMilan Broz new_e->new_chunk == (dm_chunk_number(e->new_chunk) - 1)) { 653d74f81f8SMilan Broz dm_consecutive_chunk_count_inc(e); 654d74f81f8SMilan Broz e->old_chunk--; 655d74f81f8SMilan Broz e->new_chunk--; 6563510cb94SJon Brassow free_completed_exception(new_e); 657d74f81f8SMilan Broz return; 658d74f81f8SMilan Broz } 659d74f81f8SMilan Broz 660d74f81f8SMilan Broz if (new_e->old_chunk > e->old_chunk) 661d74f81f8SMilan Broz break; 662d74f81f8SMilan Broz } 663d74f81f8SMilan Broz 664d74f81f8SMilan Broz out: 665d74f81f8SMilan Broz list_add(&new_e->hash_list, e ? &e->hash_list : l); 666d74f81f8SMilan Broz } 667d74f81f8SMilan Broz 668a159c1acSJonathan Brassow /* 669a159c1acSJonathan Brassow * Callback used by the exception stores to load exceptions when 670a159c1acSJonathan Brassow * initialising. 671a159c1acSJonathan Brassow */ 672a159c1acSJonathan Brassow static int dm_add_exception(void *context, chunk_t old, chunk_t new) 6731da177e4SLinus Torvalds { 674a159c1acSJonathan Brassow struct dm_snapshot *s = context; 6751d4989c8SJon Brassow struct dm_exception *e; 6761da177e4SLinus Torvalds 6773510cb94SJon Brassow e = alloc_completed_exception(); 6781da177e4SLinus Torvalds if (!e) 6791da177e4SLinus Torvalds return -ENOMEM; 6801da177e4SLinus Torvalds 6811da177e4SLinus Torvalds e->old_chunk = old; 682d74f81f8SMilan Broz 683d74f81f8SMilan Broz /* Consecutive_count is implicitly initialised to zero */ 6841da177e4SLinus Torvalds e->new_chunk = new; 685d74f81f8SMilan Broz 6863510cb94SJon Brassow dm_insert_exception(&s->complete, e); 687d74f81f8SMilan Broz 6881da177e4SLinus Torvalds return 0; 6891da177e4SLinus Torvalds } 6901da177e4SLinus Torvalds 6917e201b35SMikulas Patocka /* 6927e201b35SMikulas Patocka * Return a minimum chunk size of all snapshots that have the specified origin. 6937e201b35SMikulas Patocka * Return zero if the origin has no snapshots. 6947e201b35SMikulas Patocka */ 695542f9038SMike Snitzer static uint32_t __minimum_chunk_size(struct origin *o) 6967e201b35SMikulas Patocka { 6977e201b35SMikulas Patocka struct dm_snapshot *snap; 6987e201b35SMikulas Patocka unsigned chunk_size = 0; 6997e201b35SMikulas Patocka 7007e201b35SMikulas Patocka if (o) 7017e201b35SMikulas Patocka list_for_each_entry(snap, &o->snapshots, list) 7027e201b35SMikulas Patocka chunk_size = min_not_zero(chunk_size, 7037e201b35SMikulas Patocka snap->store->chunk_size); 7047e201b35SMikulas Patocka 705542f9038SMike Snitzer return (uint32_t) chunk_size; 7067e201b35SMikulas Patocka } 7077e201b35SMikulas Patocka 7081da177e4SLinus Torvalds /* 7091da177e4SLinus Torvalds * Hard coded magic. 7101da177e4SLinus Torvalds */ 7111da177e4SLinus Torvalds static int calc_max_buckets(void) 7121da177e4SLinus Torvalds { 7131da177e4SLinus Torvalds /* use a fixed size of 2MB */ 7141da177e4SLinus Torvalds unsigned long mem = 2 * 1024 * 1024; 7151da177e4SLinus Torvalds mem /= sizeof(struct list_head); 7161da177e4SLinus Torvalds 7171da177e4SLinus Torvalds return mem; 7181da177e4SLinus Torvalds } 7191da177e4SLinus Torvalds 7201da177e4SLinus Torvalds /* 7211da177e4SLinus Torvalds * Allocate room for a suitable hash table. 7221da177e4SLinus Torvalds */ 723fee1998eSJonathan Brassow static int init_hash_tables(struct dm_snapshot *s) 7241da177e4SLinus Torvalds { 7251da177e4SLinus Torvalds sector_t hash_size, cow_dev_size, origin_dev_size, max_buckets; 7261da177e4SLinus Torvalds 7271da177e4SLinus Torvalds /* 7281da177e4SLinus Torvalds * Calculate based on the size of the original volume or 7291da177e4SLinus Torvalds * the COW volume... 7301da177e4SLinus Torvalds */ 731fc56f6fbSMike Snitzer cow_dev_size = get_dev_size(s->cow->bdev); 7321da177e4SLinus Torvalds origin_dev_size = get_dev_size(s->origin->bdev); 7331da177e4SLinus Torvalds max_buckets = calc_max_buckets(); 7341da177e4SLinus Torvalds 735fee1998eSJonathan Brassow hash_size = min(origin_dev_size, cow_dev_size) >> s->store->chunk_shift; 7361da177e4SLinus Torvalds hash_size = min(hash_size, max_buckets); 7371da177e4SLinus Torvalds 7388e87b9b8SMikulas Patocka if (hash_size < 64) 7398e87b9b8SMikulas Patocka hash_size = 64; 7408defd830SRobert P. J. Day hash_size = rounddown_pow_of_two(hash_size); 7413510cb94SJon Brassow if (dm_exception_table_init(&s->complete, hash_size, 742d74f81f8SMilan Broz DM_CHUNK_CONSECUTIVE_BITS)) 7431da177e4SLinus Torvalds return -ENOMEM; 7441da177e4SLinus Torvalds 7451da177e4SLinus Torvalds /* 7461da177e4SLinus Torvalds * Allocate hash table for in-flight exceptions 7471da177e4SLinus Torvalds * Make this smaller than the real hash table 7481da177e4SLinus Torvalds */ 7491da177e4SLinus Torvalds hash_size >>= 3; 7501da177e4SLinus Torvalds if (hash_size < 64) 7511da177e4SLinus Torvalds hash_size = 64; 7521da177e4SLinus Torvalds 7533510cb94SJon Brassow if (dm_exception_table_init(&s->pending, hash_size, 0)) { 7543510cb94SJon Brassow dm_exception_table_exit(&s->complete, exception_cache); 7551da177e4SLinus Torvalds return -ENOMEM; 7561da177e4SLinus Torvalds } 7571da177e4SLinus Torvalds 7581da177e4SLinus Torvalds return 0; 7591da177e4SLinus Torvalds } 7601da177e4SLinus Torvalds 7611e03f97eSMikulas Patocka static void merge_shutdown(struct dm_snapshot *s) 7621e03f97eSMikulas Patocka { 7631e03f97eSMikulas Patocka clear_bit_unlock(RUNNING_MERGE, &s->state_bits); 7641e03f97eSMikulas Patocka smp_mb__after_clear_bit(); 7651e03f97eSMikulas Patocka wake_up_bit(&s->state_bits, RUNNING_MERGE); 7661e03f97eSMikulas Patocka } 7671e03f97eSMikulas Patocka 7689fe86254SMikulas Patocka static struct bio *__release_queued_bios_after_merge(struct dm_snapshot *s) 7699fe86254SMikulas Patocka { 7709fe86254SMikulas Patocka s->first_merging_chunk = 0; 7719fe86254SMikulas Patocka s->num_merging_chunks = 0; 7729fe86254SMikulas Patocka 7739fe86254SMikulas Patocka return bio_list_get(&s->bios_queued_during_merge); 7749fe86254SMikulas Patocka } 7759fe86254SMikulas Patocka 7761e03f97eSMikulas Patocka /* 7771e03f97eSMikulas Patocka * Remove one chunk from the index of completed exceptions. 7781e03f97eSMikulas Patocka */ 7791e03f97eSMikulas Patocka static int __remove_single_exception_chunk(struct dm_snapshot *s, 7801e03f97eSMikulas Patocka chunk_t old_chunk) 7811e03f97eSMikulas Patocka { 7821e03f97eSMikulas Patocka struct dm_exception *e; 7831e03f97eSMikulas Patocka 7841e03f97eSMikulas Patocka e = dm_lookup_exception(&s->complete, old_chunk); 7851e03f97eSMikulas Patocka if (!e) { 7861e03f97eSMikulas Patocka DMERR("Corruption detected: exception for block %llu is " 7871e03f97eSMikulas Patocka "on disk but not in memory", 7881e03f97eSMikulas Patocka (unsigned long long)old_chunk); 7891e03f97eSMikulas Patocka return -EINVAL; 7901e03f97eSMikulas Patocka } 7911e03f97eSMikulas Patocka 7921e03f97eSMikulas Patocka /* 7931e03f97eSMikulas Patocka * If this is the only chunk using this exception, remove exception. 7941e03f97eSMikulas Patocka */ 7951e03f97eSMikulas Patocka if (!dm_consecutive_chunk_count(e)) { 7961e03f97eSMikulas Patocka dm_remove_exception(e); 7971e03f97eSMikulas Patocka free_completed_exception(e); 7981e03f97eSMikulas Patocka return 0; 7991e03f97eSMikulas Patocka } 8001e03f97eSMikulas Patocka 8011e03f97eSMikulas Patocka /* 8021e03f97eSMikulas Patocka * The chunk may be either at the beginning or the end of a 8031e03f97eSMikulas Patocka * group of consecutive chunks - never in the middle. We are 8041e03f97eSMikulas Patocka * removing chunks in the opposite order to that in which they 8051e03f97eSMikulas Patocka * were added, so this should always be true. 8061e03f97eSMikulas Patocka * Decrement the consecutive chunk counter and adjust the 8071e03f97eSMikulas Patocka * starting point if necessary. 8081e03f97eSMikulas Patocka */ 8091e03f97eSMikulas Patocka if (old_chunk == e->old_chunk) { 8101e03f97eSMikulas Patocka e->old_chunk++; 8111e03f97eSMikulas Patocka e->new_chunk++; 8121e03f97eSMikulas Patocka } else if (old_chunk != e->old_chunk + 8131e03f97eSMikulas Patocka dm_consecutive_chunk_count(e)) { 8141e03f97eSMikulas Patocka DMERR("Attempt to merge block %llu from the " 8151e03f97eSMikulas Patocka "middle of a chunk range [%llu - %llu]", 8161e03f97eSMikulas Patocka (unsigned long long)old_chunk, 8171e03f97eSMikulas Patocka (unsigned long long)e->old_chunk, 8181e03f97eSMikulas Patocka (unsigned long long) 8191e03f97eSMikulas Patocka e->old_chunk + dm_consecutive_chunk_count(e)); 8201e03f97eSMikulas Patocka return -EINVAL; 8211e03f97eSMikulas Patocka } 8221e03f97eSMikulas Patocka 8231e03f97eSMikulas Patocka dm_consecutive_chunk_count_dec(e); 8241e03f97eSMikulas Patocka 8251e03f97eSMikulas Patocka return 0; 8261e03f97eSMikulas Patocka } 8271e03f97eSMikulas Patocka 8289fe86254SMikulas Patocka static void flush_bios(struct bio *bio); 8299fe86254SMikulas Patocka 8309fe86254SMikulas Patocka static int remove_single_exception_chunk(struct dm_snapshot *s) 8311e03f97eSMikulas Patocka { 8329fe86254SMikulas Patocka struct bio *b = NULL; 8339fe86254SMikulas Patocka int r; 8349fe86254SMikulas Patocka chunk_t old_chunk = s->first_merging_chunk + s->num_merging_chunks - 1; 8351e03f97eSMikulas Patocka 8361e03f97eSMikulas Patocka down_write(&s->lock); 8379fe86254SMikulas Patocka 8389fe86254SMikulas Patocka /* 8399fe86254SMikulas Patocka * Process chunks (and associated exceptions) in reverse order 8409fe86254SMikulas Patocka * so that dm_consecutive_chunk_count_dec() accounting works. 8419fe86254SMikulas Patocka */ 8429fe86254SMikulas Patocka do { 8431e03f97eSMikulas Patocka r = __remove_single_exception_chunk(s, old_chunk); 8449fe86254SMikulas Patocka if (r) 8459fe86254SMikulas Patocka goto out; 8469fe86254SMikulas Patocka } while (old_chunk-- > s->first_merging_chunk); 8479fe86254SMikulas Patocka 8489fe86254SMikulas Patocka b = __release_queued_bios_after_merge(s); 8499fe86254SMikulas Patocka 8509fe86254SMikulas Patocka out: 8511e03f97eSMikulas Patocka up_write(&s->lock); 8529fe86254SMikulas Patocka if (b) 8539fe86254SMikulas Patocka flush_bios(b); 8541e03f97eSMikulas Patocka 8551e03f97eSMikulas Patocka return r; 8561e03f97eSMikulas Patocka } 8571e03f97eSMikulas Patocka 85873dfd078SMikulas Patocka static int origin_write_extent(struct dm_snapshot *merging_snap, 85973dfd078SMikulas Patocka sector_t sector, unsigned chunk_size); 86073dfd078SMikulas Patocka 8611e03f97eSMikulas Patocka static void merge_callback(int read_err, unsigned long write_err, 8621e03f97eSMikulas Patocka void *context); 8631e03f97eSMikulas Patocka 86473dfd078SMikulas Patocka static uint64_t read_pending_exceptions_done_count(void) 86573dfd078SMikulas Patocka { 86673dfd078SMikulas Patocka uint64_t pending_exceptions_done; 86773dfd078SMikulas Patocka 86873dfd078SMikulas Patocka spin_lock(&_pending_exceptions_done_spinlock); 86973dfd078SMikulas Patocka pending_exceptions_done = _pending_exceptions_done_count; 87073dfd078SMikulas Patocka spin_unlock(&_pending_exceptions_done_spinlock); 87173dfd078SMikulas Patocka 87273dfd078SMikulas Patocka return pending_exceptions_done; 87373dfd078SMikulas Patocka } 87473dfd078SMikulas Patocka 87573dfd078SMikulas Patocka static void increment_pending_exceptions_done_count(void) 87673dfd078SMikulas Patocka { 87773dfd078SMikulas Patocka spin_lock(&_pending_exceptions_done_spinlock); 87873dfd078SMikulas Patocka _pending_exceptions_done_count++; 87973dfd078SMikulas Patocka spin_unlock(&_pending_exceptions_done_spinlock); 88073dfd078SMikulas Patocka 88173dfd078SMikulas Patocka wake_up_all(&_pending_exceptions_done); 88273dfd078SMikulas Patocka } 88373dfd078SMikulas Patocka 8841e03f97eSMikulas Patocka static void snapshot_merge_next_chunks(struct dm_snapshot *s) 8851e03f97eSMikulas Patocka { 8868a2d5286SMike Snitzer int i, linear_chunks; 8871e03f97eSMikulas Patocka chunk_t old_chunk, new_chunk; 8881e03f97eSMikulas Patocka struct dm_io_region src, dest; 8898a2d5286SMike Snitzer sector_t io_size; 89073dfd078SMikulas Patocka uint64_t previous_count; 8911e03f97eSMikulas Patocka 8921e03f97eSMikulas Patocka BUG_ON(!test_bit(RUNNING_MERGE, &s->state_bits)); 8931e03f97eSMikulas Patocka if (unlikely(test_bit(SHUTDOWN_MERGE, &s->state_bits))) 8941e03f97eSMikulas Patocka goto shut; 8951e03f97eSMikulas Patocka 8961e03f97eSMikulas Patocka /* 8971e03f97eSMikulas Patocka * valid flag never changes during merge, so no lock required. 8981e03f97eSMikulas Patocka */ 8991e03f97eSMikulas Patocka if (!s->valid) { 9001e03f97eSMikulas Patocka DMERR("Snapshot is invalid: can't merge"); 9011e03f97eSMikulas Patocka goto shut; 9021e03f97eSMikulas Patocka } 9031e03f97eSMikulas Patocka 9048a2d5286SMike Snitzer linear_chunks = s->store->type->prepare_merge(s->store, &old_chunk, 9058a2d5286SMike Snitzer &new_chunk); 9068a2d5286SMike Snitzer if (linear_chunks <= 0) { 907d8ddb1cfSMike Snitzer if (linear_chunks < 0) { 9081e03f97eSMikulas Patocka DMERR("Read error in exception store: " 9091e03f97eSMikulas Patocka "shutting down merge"); 910d8ddb1cfSMike Snitzer down_write(&s->lock); 911d8ddb1cfSMike Snitzer s->merge_failed = 1; 912d8ddb1cfSMike Snitzer up_write(&s->lock); 913d8ddb1cfSMike Snitzer } 9141e03f97eSMikulas Patocka goto shut; 9151e03f97eSMikulas Patocka } 9161e03f97eSMikulas Patocka 9178a2d5286SMike Snitzer /* Adjust old_chunk and new_chunk to reflect start of linear region */ 9188a2d5286SMike Snitzer old_chunk = old_chunk + 1 - linear_chunks; 9198a2d5286SMike Snitzer new_chunk = new_chunk + 1 - linear_chunks; 9208a2d5286SMike Snitzer 9218a2d5286SMike Snitzer /* 9228a2d5286SMike Snitzer * Use one (potentially large) I/O to copy all 'linear_chunks' 9238a2d5286SMike Snitzer * from the exception store to the origin 9248a2d5286SMike Snitzer */ 9258a2d5286SMike Snitzer io_size = linear_chunks * s->store->chunk_size; 9261e03f97eSMikulas Patocka 9271e03f97eSMikulas Patocka dest.bdev = s->origin->bdev; 9281e03f97eSMikulas Patocka dest.sector = chunk_to_sector(s->store, old_chunk); 9298a2d5286SMike Snitzer dest.count = min(io_size, get_dev_size(dest.bdev) - dest.sector); 9301e03f97eSMikulas Patocka 9311e03f97eSMikulas Patocka src.bdev = s->cow->bdev; 9321e03f97eSMikulas Patocka src.sector = chunk_to_sector(s->store, new_chunk); 9331e03f97eSMikulas Patocka src.count = dest.count; 9341e03f97eSMikulas Patocka 93573dfd078SMikulas Patocka /* 93673dfd078SMikulas Patocka * Reallocate any exceptions needed in other snapshots then 93773dfd078SMikulas Patocka * wait for the pending exceptions to complete. 93873dfd078SMikulas Patocka * Each time any pending exception (globally on the system) 93973dfd078SMikulas Patocka * completes we are woken and repeat the process to find out 94073dfd078SMikulas Patocka * if we can proceed. While this may not seem a particularly 94173dfd078SMikulas Patocka * efficient algorithm, it is not expected to have any 94273dfd078SMikulas Patocka * significant impact on performance. 94373dfd078SMikulas Patocka */ 94473dfd078SMikulas Patocka previous_count = read_pending_exceptions_done_count(); 9458a2d5286SMike Snitzer while (origin_write_extent(s, dest.sector, io_size)) { 94673dfd078SMikulas Patocka wait_event(_pending_exceptions_done, 94773dfd078SMikulas Patocka (read_pending_exceptions_done_count() != 94873dfd078SMikulas Patocka previous_count)); 94973dfd078SMikulas Patocka /* Retry after the wait, until all exceptions are done. */ 95073dfd078SMikulas Patocka previous_count = read_pending_exceptions_done_count(); 95173dfd078SMikulas Patocka } 95273dfd078SMikulas Patocka 9539fe86254SMikulas Patocka down_write(&s->lock); 9549fe86254SMikulas Patocka s->first_merging_chunk = old_chunk; 9558a2d5286SMike Snitzer s->num_merging_chunks = linear_chunks; 9569fe86254SMikulas Patocka up_write(&s->lock); 9579fe86254SMikulas Patocka 9588a2d5286SMike Snitzer /* Wait until writes to all 'linear_chunks' drain */ 9598a2d5286SMike Snitzer for (i = 0; i < linear_chunks; i++) 9608a2d5286SMike Snitzer __check_for_conflicting_io(s, old_chunk + i); 9619fe86254SMikulas Patocka 9621e03f97eSMikulas Patocka dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, merge_callback, s); 9631e03f97eSMikulas Patocka return; 9641e03f97eSMikulas Patocka 9651e03f97eSMikulas Patocka shut: 9661e03f97eSMikulas Patocka merge_shutdown(s); 9671e03f97eSMikulas Patocka } 9681e03f97eSMikulas Patocka 9699fe86254SMikulas Patocka static void error_bios(struct bio *bio); 9709fe86254SMikulas Patocka 9711e03f97eSMikulas Patocka static void merge_callback(int read_err, unsigned long write_err, void *context) 9721e03f97eSMikulas Patocka { 9731e03f97eSMikulas Patocka struct dm_snapshot *s = context; 9749fe86254SMikulas Patocka struct bio *b = NULL; 9751e03f97eSMikulas Patocka 9761e03f97eSMikulas Patocka if (read_err || write_err) { 9771e03f97eSMikulas Patocka if (read_err) 9781e03f97eSMikulas Patocka DMERR("Read error: shutting down merge."); 9791e03f97eSMikulas Patocka else 9801e03f97eSMikulas Patocka DMERR("Write error: shutting down merge."); 9811e03f97eSMikulas Patocka goto shut; 9821e03f97eSMikulas Patocka } 9831e03f97eSMikulas Patocka 9849fe86254SMikulas Patocka if (s->store->type->commit_merge(s->store, 9859fe86254SMikulas Patocka s->num_merging_chunks) < 0) { 9861e03f97eSMikulas Patocka DMERR("Write error in exception store: shutting down merge"); 9871e03f97eSMikulas Patocka goto shut; 9881e03f97eSMikulas Patocka } 9891e03f97eSMikulas Patocka 9909fe86254SMikulas Patocka if (remove_single_exception_chunk(s) < 0) 9919fe86254SMikulas Patocka goto shut; 9929fe86254SMikulas Patocka 9931e03f97eSMikulas Patocka snapshot_merge_next_chunks(s); 9941e03f97eSMikulas Patocka 9951e03f97eSMikulas Patocka return; 9961e03f97eSMikulas Patocka 9971e03f97eSMikulas Patocka shut: 9989fe86254SMikulas Patocka down_write(&s->lock); 999d8ddb1cfSMike Snitzer s->merge_failed = 1; 10009fe86254SMikulas Patocka b = __release_queued_bios_after_merge(s); 10019fe86254SMikulas Patocka up_write(&s->lock); 10029fe86254SMikulas Patocka error_bios(b); 10039fe86254SMikulas Patocka 10041e03f97eSMikulas Patocka merge_shutdown(s); 10051e03f97eSMikulas Patocka } 10061e03f97eSMikulas Patocka 10071e03f97eSMikulas Patocka static void start_merge(struct dm_snapshot *s) 10081e03f97eSMikulas Patocka { 10091e03f97eSMikulas Patocka if (!test_and_set_bit(RUNNING_MERGE, &s->state_bits)) 10101e03f97eSMikulas Patocka snapshot_merge_next_chunks(s); 10111e03f97eSMikulas Patocka } 10121e03f97eSMikulas Patocka 10131e03f97eSMikulas Patocka static int wait_schedule(void *ptr) 10141e03f97eSMikulas Patocka { 10151e03f97eSMikulas Patocka schedule(); 10161e03f97eSMikulas Patocka 10171e03f97eSMikulas Patocka return 0; 10181e03f97eSMikulas Patocka } 10191e03f97eSMikulas Patocka 10201e03f97eSMikulas Patocka /* 10211e03f97eSMikulas Patocka * Stop the merging process and wait until it finishes. 10221e03f97eSMikulas Patocka */ 10231e03f97eSMikulas Patocka static void stop_merge(struct dm_snapshot *s) 10241e03f97eSMikulas Patocka { 10251e03f97eSMikulas Patocka set_bit(SHUTDOWN_MERGE, &s->state_bits); 10261e03f97eSMikulas Patocka wait_on_bit(&s->state_bits, RUNNING_MERGE, wait_schedule, 10271e03f97eSMikulas Patocka TASK_UNINTERRUPTIBLE); 10281e03f97eSMikulas Patocka clear_bit(SHUTDOWN_MERGE, &s->state_bits); 10291e03f97eSMikulas Patocka } 10301e03f97eSMikulas Patocka 10311da177e4SLinus Torvalds /* 10321da177e4SLinus Torvalds * Construct a snapshot mapping: <origin_dev> <COW-dev> <p/n> <chunk-size> 10331da177e4SLinus Torvalds */ 10341da177e4SLinus Torvalds static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) 10351da177e4SLinus Torvalds { 10361da177e4SLinus Torvalds struct dm_snapshot *s; 1037cd45daffSMikulas Patocka int i; 10381da177e4SLinus Torvalds int r = -EINVAL; 1039fc56f6fbSMike Snitzer char *origin_path, *cow_path; 104055a62eefSAlasdair G Kergon unsigned args_used, num_flush_bios = 1; 104110b8106aSMike Snitzer fmode_t origin_mode = FMODE_READ; 10421da177e4SLinus Torvalds 10434c7e3bf4SMark McLoughlin if (argc != 4) { 104472d94861SAlasdair G Kergon ti->error = "requires exactly 4 arguments"; 10451da177e4SLinus Torvalds r = -EINVAL; 1046fc56f6fbSMike Snitzer goto bad; 10471da177e4SLinus Torvalds } 10481da177e4SLinus Torvalds 104910b8106aSMike Snitzer if (dm_target_is_snapshot_merge(ti)) { 105055a62eefSAlasdair G Kergon num_flush_bios = 2; 105110b8106aSMike Snitzer origin_mode = FMODE_WRITE; 105210b8106aSMike Snitzer } 105310b8106aSMike Snitzer 10541da177e4SLinus Torvalds s = kmalloc(sizeof(*s), GFP_KERNEL); 1055fee1998eSJonathan Brassow if (!s) { 1056a2d2b034SJonathan Brassow ti->error = "Cannot allocate private snapshot structure"; 10571da177e4SLinus Torvalds r = -ENOMEM; 1058fc56f6fbSMike Snitzer goto bad; 10591da177e4SLinus Torvalds } 10601da177e4SLinus Torvalds 1061c2411045SMikulas Patocka origin_path = argv[0]; 1062c2411045SMikulas Patocka argv++; 1063c2411045SMikulas Patocka argc--; 1064c2411045SMikulas Patocka 1065c2411045SMikulas Patocka r = dm_get_device(ti, origin_path, origin_mode, &s->origin); 1066c2411045SMikulas Patocka if (r) { 1067c2411045SMikulas Patocka ti->error = "Cannot get origin device"; 1068c2411045SMikulas Patocka goto bad_origin; 1069c2411045SMikulas Patocka } 1070c2411045SMikulas Patocka 1071fc56f6fbSMike Snitzer cow_path = argv[0]; 1072fc56f6fbSMike Snitzer argv++; 1073fc56f6fbSMike Snitzer argc--; 1074fc56f6fbSMike Snitzer 1075024d37e9SMilan Broz r = dm_get_device(ti, cow_path, dm_table_get_mode(ti->table), &s->cow); 1076fc56f6fbSMike Snitzer if (r) { 1077fc56f6fbSMike Snitzer ti->error = "Cannot get COW device"; 1078fc56f6fbSMike Snitzer goto bad_cow; 1079fc56f6fbSMike Snitzer } 1080fc56f6fbSMike Snitzer 1081fc56f6fbSMike Snitzer r = dm_exception_store_create(ti, argc, argv, s, &args_used, &s->store); 1082fc56f6fbSMike Snitzer if (r) { 1083fc56f6fbSMike Snitzer ti->error = "Couldn't create exception store"; 1084fc56f6fbSMike Snitzer r = -EINVAL; 1085fc56f6fbSMike Snitzer goto bad_store; 1086fc56f6fbSMike Snitzer } 1087fc56f6fbSMike Snitzer 1088fc56f6fbSMike Snitzer argv += args_used; 1089fc56f6fbSMike Snitzer argc -= args_used; 1090fc56f6fbSMike Snitzer 1091fc56f6fbSMike Snitzer s->ti = ti; 10921da177e4SLinus Torvalds s->valid = 1; 1093aa14edebSAlasdair G Kergon s->active = 0; 1094879129d2SMikulas Patocka atomic_set(&s->pending_exceptions_count, 0); 10951da177e4SLinus Torvalds init_rwsem(&s->lock); 1096c1f0c183SMike Snitzer INIT_LIST_HEAD(&s->list); 1097ca3a931fSAlasdair G Kergon spin_lock_init(&s->pe_lock); 10981e03f97eSMikulas Patocka s->state_bits = 0; 1099d8ddb1cfSMike Snitzer s->merge_failed = 0; 11009fe86254SMikulas Patocka s->first_merging_chunk = 0; 11019fe86254SMikulas Patocka s->num_merging_chunks = 0; 11029fe86254SMikulas Patocka bio_list_init(&s->bios_queued_during_merge); 11031da177e4SLinus Torvalds 11041da177e4SLinus Torvalds /* Allocate hash table for COW data */ 1105fee1998eSJonathan Brassow if (init_hash_tables(s)) { 11061da177e4SLinus Torvalds ti->error = "Unable to allocate hash table space"; 11071da177e4SLinus Torvalds r = -ENOMEM; 1108fee1998eSJonathan Brassow goto bad_hash_tables; 11091da177e4SLinus Torvalds } 11101da177e4SLinus Torvalds 1111fa34ce73SMikulas Patocka s->kcopyd_client = dm_kcopyd_client_create(); 1112fa34ce73SMikulas Patocka if (IS_ERR(s->kcopyd_client)) { 1113fa34ce73SMikulas Patocka r = PTR_ERR(s->kcopyd_client); 11141da177e4SLinus Torvalds ti->error = "Could not create kcopyd client"; 1115fee1998eSJonathan Brassow goto bad_kcopyd; 11161da177e4SLinus Torvalds } 11171da177e4SLinus Torvalds 111892e86812SMikulas Patocka s->pending_pool = mempool_create_slab_pool(MIN_IOS, pending_cache); 111992e86812SMikulas Patocka if (!s->pending_pool) { 112092e86812SMikulas Patocka ti->error = "Could not allocate mempool for pending exceptions"; 1121fee1998eSJonathan Brassow goto bad_pending_pool; 112292e86812SMikulas Patocka } 112392e86812SMikulas Patocka 1124cd45daffSMikulas Patocka for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++) 1125cd45daffSMikulas Patocka INIT_HLIST_HEAD(&s->tracked_chunk_hash[i]); 1126cd45daffSMikulas Patocka 1127cd45daffSMikulas Patocka spin_lock_init(&s->tracked_chunk_lock); 1128cd45daffSMikulas Patocka 1129c1f0c183SMike Snitzer ti->private = s; 113055a62eefSAlasdair G Kergon ti->num_flush_bios = num_flush_bios; 113142bc954fSMikulas Patocka ti->per_bio_data_size = sizeof(struct dm_snap_tracked_chunk); 1132c1f0c183SMike Snitzer 1133c1f0c183SMike Snitzer /* Add snapshot to the list of snapshots for this origin */ 1134c1f0c183SMike Snitzer /* Exceptions aren't triggered till snapshot_resume() is called */ 1135c1f0c183SMike Snitzer r = register_snapshot(s); 1136c1f0c183SMike Snitzer if (r == -ENOMEM) { 1137c1f0c183SMike Snitzer ti->error = "Snapshot origin struct allocation failed"; 1138c1f0c183SMike Snitzer goto bad_load_and_register; 1139c1f0c183SMike Snitzer } else if (r < 0) { 1140c1f0c183SMike Snitzer /* invalid handover, register_snapshot has set ti->error */ 1141c1f0c183SMike Snitzer goto bad_load_and_register; 1142c1f0c183SMike Snitzer } 1143c1f0c183SMike Snitzer 1144c1f0c183SMike Snitzer /* 1145c1f0c183SMike Snitzer * Metadata must only be loaded into one table at once, so skip this 1146c1f0c183SMike Snitzer * if metadata will be handed over during resume. 1147c1f0c183SMike Snitzer * Chunk size will be set during the handover - set it to zero to 1148c1f0c183SMike Snitzer * ensure it's ignored. 1149c1f0c183SMike Snitzer */ 1150c1f0c183SMike Snitzer if (r > 0) { 1151c1f0c183SMike Snitzer s->store->chunk_size = 0; 1152c1f0c183SMike Snitzer return 0; 1153c1f0c183SMike Snitzer } 1154c1f0c183SMike Snitzer 1155493df71cSJonathan Brassow r = s->store->type->read_metadata(s->store, dm_add_exception, 1156493df71cSJonathan Brassow (void *)s); 11570764147bSMilan Broz if (r < 0) { 1158f9cea4f7SMark McLoughlin ti->error = "Failed to read snapshot metadata"; 1159c1f0c183SMike Snitzer goto bad_read_metadata; 11600764147bSMilan Broz } else if (r > 0) { 11610764147bSMilan Broz s->valid = 0; 11620764147bSMilan Broz DMWARN("Snapshot is marked invalid."); 1163f9cea4f7SMark McLoughlin } 1164aa14edebSAlasdair G Kergon 11653f2412dcSMikulas Patocka if (!s->store->chunk_size) { 11663f2412dcSMikulas Patocka ti->error = "Chunk size not set"; 1167c1f0c183SMike Snitzer goto bad_read_metadata; 11683f2412dcSMikulas Patocka } 1169542f9038SMike Snitzer 1170542f9038SMike Snitzer r = dm_set_target_max_io_len(ti, s->store->chunk_size); 1171542f9038SMike Snitzer if (r) 1172542f9038SMike Snitzer goto bad_read_metadata; 11731da177e4SLinus Torvalds 11741da177e4SLinus Torvalds return 0; 11751da177e4SLinus Torvalds 1176c1f0c183SMike Snitzer bad_read_metadata: 1177c1f0c183SMike Snitzer unregister_snapshot(s); 1178c1f0c183SMike Snitzer 1179cd45daffSMikulas Patocka bad_load_and_register: 118092e86812SMikulas Patocka mempool_destroy(s->pending_pool); 118192e86812SMikulas Patocka 1182fee1998eSJonathan Brassow bad_pending_pool: 1183eb69aca5SHeinz Mauelshagen dm_kcopyd_client_destroy(s->kcopyd_client); 11841da177e4SLinus Torvalds 1185fee1998eSJonathan Brassow bad_kcopyd: 11863510cb94SJon Brassow dm_exception_table_exit(&s->pending, pending_cache); 11873510cb94SJon Brassow dm_exception_table_exit(&s->complete, exception_cache); 11881da177e4SLinus Torvalds 1189fee1998eSJonathan Brassow bad_hash_tables: 1190fc56f6fbSMike Snitzer dm_exception_store_destroy(s->store); 1191fc56f6fbSMike Snitzer 1192fc56f6fbSMike Snitzer bad_store: 1193fc56f6fbSMike Snitzer dm_put_device(ti, s->cow); 1194fc56f6fbSMike Snitzer 1195fc56f6fbSMike Snitzer bad_cow: 1196c2411045SMikulas Patocka dm_put_device(ti, s->origin); 1197c2411045SMikulas Patocka 1198c2411045SMikulas Patocka bad_origin: 11991da177e4SLinus Torvalds kfree(s); 12001da177e4SLinus Torvalds 1201fc56f6fbSMike Snitzer bad: 12021da177e4SLinus Torvalds return r; 12031da177e4SLinus Torvalds } 12041da177e4SLinus Torvalds 120531c93a0cSMilan Broz static void __free_exceptions(struct dm_snapshot *s) 120631c93a0cSMilan Broz { 1207eb69aca5SHeinz Mauelshagen dm_kcopyd_client_destroy(s->kcopyd_client); 120831c93a0cSMilan Broz s->kcopyd_client = NULL; 120931c93a0cSMilan Broz 12103510cb94SJon Brassow dm_exception_table_exit(&s->pending, pending_cache); 12113510cb94SJon Brassow dm_exception_table_exit(&s->complete, exception_cache); 121231c93a0cSMilan Broz } 121331c93a0cSMilan Broz 1214c1f0c183SMike Snitzer static void __handover_exceptions(struct dm_snapshot *snap_src, 1215c1f0c183SMike Snitzer struct dm_snapshot *snap_dest) 1216c1f0c183SMike Snitzer { 1217c1f0c183SMike Snitzer union { 1218c1f0c183SMike Snitzer struct dm_exception_table table_swap; 1219c1f0c183SMike Snitzer struct dm_exception_store *store_swap; 1220c1f0c183SMike Snitzer } u; 1221c1f0c183SMike Snitzer 1222c1f0c183SMike Snitzer /* 1223c1f0c183SMike Snitzer * Swap all snapshot context information between the two instances. 1224c1f0c183SMike Snitzer */ 1225c1f0c183SMike Snitzer u.table_swap = snap_dest->complete; 1226c1f0c183SMike Snitzer snap_dest->complete = snap_src->complete; 1227c1f0c183SMike Snitzer snap_src->complete = u.table_swap; 1228c1f0c183SMike Snitzer 1229c1f0c183SMike Snitzer u.store_swap = snap_dest->store; 1230c1f0c183SMike Snitzer snap_dest->store = snap_src->store; 1231c1f0c183SMike Snitzer snap_src->store = u.store_swap; 1232c1f0c183SMike Snitzer 1233c1f0c183SMike Snitzer snap_dest->store->snap = snap_dest; 1234c1f0c183SMike Snitzer snap_src->store->snap = snap_src; 1235c1f0c183SMike Snitzer 1236542f9038SMike Snitzer snap_dest->ti->max_io_len = snap_dest->store->chunk_size; 1237c1f0c183SMike Snitzer snap_dest->valid = snap_src->valid; 1238c1f0c183SMike Snitzer 1239c1f0c183SMike Snitzer /* 1240c1f0c183SMike Snitzer * Set source invalid to ensure it receives no further I/O. 1241c1f0c183SMike Snitzer */ 1242c1f0c183SMike Snitzer snap_src->valid = 0; 1243c1f0c183SMike Snitzer } 1244c1f0c183SMike Snitzer 12451da177e4SLinus Torvalds static void snapshot_dtr(struct dm_target *ti) 12461da177e4SLinus Torvalds { 1247cd45daffSMikulas Patocka #ifdef CONFIG_DM_DEBUG 1248cd45daffSMikulas Patocka int i; 1249cd45daffSMikulas Patocka #endif 1250028867acSAlasdair G Kergon struct dm_snapshot *s = ti->private; 1251c1f0c183SMike Snitzer struct dm_snapshot *snap_src = NULL, *snap_dest = NULL; 12521da177e4SLinus Torvalds 1253c1f0c183SMike Snitzer down_read(&_origins_lock); 1254c1f0c183SMike Snitzer /* Check whether exception handover must be cancelled */ 12559d3b15c4SMikulas Patocka (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL); 1256c1f0c183SMike Snitzer if (snap_src && snap_dest && (s == snap_src)) { 1257c1f0c183SMike Snitzer down_write(&snap_dest->lock); 1258c1f0c183SMike Snitzer snap_dest->valid = 0; 1259c1f0c183SMike Snitzer up_write(&snap_dest->lock); 1260c1f0c183SMike Snitzer DMERR("Cancelling snapshot handover."); 1261c1f0c183SMike Snitzer } 1262c1f0c183SMike Snitzer up_read(&_origins_lock); 1263c1f0c183SMike Snitzer 12641e03f97eSMikulas Patocka if (dm_target_is_snapshot_merge(ti)) 12651e03f97eSMikulas Patocka stop_merge(s); 12661e03f97eSMikulas Patocka 1267138728dcSAlasdair G Kergon /* Prevent further origin writes from using this snapshot. */ 1268138728dcSAlasdair G Kergon /* After this returns there can be no new kcopyd jobs. */ 12691da177e4SLinus Torvalds unregister_snapshot(s); 12701da177e4SLinus Torvalds 1271879129d2SMikulas Patocka while (atomic_read(&s->pending_exceptions_count)) 127290fa1527SMikulas Patocka msleep(1); 1273879129d2SMikulas Patocka /* 1274879129d2SMikulas Patocka * Ensure instructions in mempool_destroy aren't reordered 1275879129d2SMikulas Patocka * before atomic_read. 1276879129d2SMikulas Patocka */ 1277879129d2SMikulas Patocka smp_mb(); 1278879129d2SMikulas Patocka 1279cd45daffSMikulas Patocka #ifdef CONFIG_DM_DEBUG 1280cd45daffSMikulas Patocka for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++) 1281cd45daffSMikulas Patocka BUG_ON(!hlist_empty(&s->tracked_chunk_hash[i])); 1282cd45daffSMikulas Patocka #endif 1283cd45daffSMikulas Patocka 128431c93a0cSMilan Broz __free_exceptions(s); 12851da177e4SLinus Torvalds 128692e86812SMikulas Patocka mempool_destroy(s->pending_pool); 128792e86812SMikulas Patocka 1288fee1998eSJonathan Brassow dm_exception_store_destroy(s->store); 1289138728dcSAlasdair G Kergon 1290fc56f6fbSMike Snitzer dm_put_device(ti, s->cow); 1291fc56f6fbSMike Snitzer 1292c2411045SMikulas Patocka dm_put_device(ti, s->origin); 1293c2411045SMikulas Patocka 12941da177e4SLinus Torvalds kfree(s); 12951da177e4SLinus Torvalds } 12961da177e4SLinus Torvalds 12971da177e4SLinus Torvalds /* 12981da177e4SLinus Torvalds * Flush a list of buffers. 12991da177e4SLinus Torvalds */ 13001da177e4SLinus Torvalds static void flush_bios(struct bio *bio) 13011da177e4SLinus Torvalds { 13021da177e4SLinus Torvalds struct bio *n; 13031da177e4SLinus Torvalds 13041da177e4SLinus Torvalds while (bio) { 13051da177e4SLinus Torvalds n = bio->bi_next; 13061da177e4SLinus Torvalds bio->bi_next = NULL; 13071da177e4SLinus Torvalds generic_make_request(bio); 13081da177e4SLinus Torvalds bio = n; 13091da177e4SLinus Torvalds } 13101da177e4SLinus Torvalds } 13111da177e4SLinus Torvalds 1312515ad66cSMikulas Patocka static int do_origin(struct dm_dev *origin, struct bio *bio); 1313515ad66cSMikulas Patocka 1314515ad66cSMikulas Patocka /* 1315515ad66cSMikulas Patocka * Flush a list of buffers. 1316515ad66cSMikulas Patocka */ 1317515ad66cSMikulas Patocka static void retry_origin_bios(struct dm_snapshot *s, struct bio *bio) 1318515ad66cSMikulas Patocka { 1319515ad66cSMikulas Patocka struct bio *n; 1320515ad66cSMikulas Patocka int r; 1321515ad66cSMikulas Patocka 1322515ad66cSMikulas Patocka while (bio) { 1323515ad66cSMikulas Patocka n = bio->bi_next; 1324515ad66cSMikulas Patocka bio->bi_next = NULL; 1325515ad66cSMikulas Patocka r = do_origin(s->origin, bio); 1326515ad66cSMikulas Patocka if (r == DM_MAPIO_REMAPPED) 1327515ad66cSMikulas Patocka generic_make_request(bio); 1328515ad66cSMikulas Patocka bio = n; 1329515ad66cSMikulas Patocka } 1330515ad66cSMikulas Patocka } 1331515ad66cSMikulas Patocka 13321da177e4SLinus Torvalds /* 13331da177e4SLinus Torvalds * Error a list of buffers. 13341da177e4SLinus Torvalds */ 13351da177e4SLinus Torvalds static void error_bios(struct bio *bio) 13361da177e4SLinus Torvalds { 13371da177e4SLinus Torvalds struct bio *n; 13381da177e4SLinus Torvalds 13391da177e4SLinus Torvalds while (bio) { 13401da177e4SLinus Torvalds n = bio->bi_next; 13411da177e4SLinus Torvalds bio->bi_next = NULL; 13426712ecf8SNeilBrown bio_io_error(bio); 13431da177e4SLinus Torvalds bio = n; 13441da177e4SLinus Torvalds } 13451da177e4SLinus Torvalds } 13461da177e4SLinus Torvalds 1347695368acSAlasdair G Kergon static void __invalidate_snapshot(struct dm_snapshot *s, int err) 134876df1c65SAlasdair G Kergon { 134976df1c65SAlasdair G Kergon if (!s->valid) 135076df1c65SAlasdair G Kergon return; 135176df1c65SAlasdair G Kergon 135276df1c65SAlasdair G Kergon if (err == -EIO) 135376df1c65SAlasdair G Kergon DMERR("Invalidating snapshot: Error reading/writing."); 135476df1c65SAlasdair G Kergon else if (err == -ENOMEM) 135576df1c65SAlasdair G Kergon DMERR("Invalidating snapshot: Unable to allocate exception."); 135676df1c65SAlasdair G Kergon 1357493df71cSJonathan Brassow if (s->store->type->drop_snapshot) 1358493df71cSJonathan Brassow s->store->type->drop_snapshot(s->store); 135976df1c65SAlasdair G Kergon 136076df1c65SAlasdair G Kergon s->valid = 0; 136176df1c65SAlasdair G Kergon 1362fc56f6fbSMike Snitzer dm_table_event(s->ti->table); 136376df1c65SAlasdair G Kergon } 136476df1c65SAlasdair G Kergon 1365028867acSAlasdair G Kergon static void pending_complete(struct dm_snap_pending_exception *pe, int success) 13661da177e4SLinus Torvalds { 13671d4989c8SJon Brassow struct dm_exception *e; 13681da177e4SLinus Torvalds struct dm_snapshot *s = pe->snap; 13699d493fa8SAlasdair G Kergon struct bio *origin_bios = NULL; 13709d493fa8SAlasdair G Kergon struct bio *snapshot_bios = NULL; 1371a6e50b40SMikulas Patocka struct bio *full_bio = NULL; 13729d493fa8SAlasdair G Kergon int error = 0; 13731da177e4SLinus Torvalds 137476df1c65SAlasdair G Kergon if (!success) { 137576df1c65SAlasdair G Kergon /* Read/write error - snapshot is unusable */ 13761da177e4SLinus Torvalds down_write(&s->lock); 1377695368acSAlasdair G Kergon __invalidate_snapshot(s, -EIO); 13789d493fa8SAlasdair G Kergon error = 1; 137976df1c65SAlasdair G Kergon goto out; 138076df1c65SAlasdair G Kergon } 138176df1c65SAlasdair G Kergon 13823510cb94SJon Brassow e = alloc_completed_exception(); 138376df1c65SAlasdair G Kergon if (!e) { 138476df1c65SAlasdair G Kergon down_write(&s->lock); 1385695368acSAlasdair G Kergon __invalidate_snapshot(s, -ENOMEM); 13869d493fa8SAlasdair G Kergon error = 1; 13871da177e4SLinus Torvalds goto out; 13881da177e4SLinus Torvalds } 13891da177e4SLinus Torvalds *e = pe->e; 13901da177e4SLinus Torvalds 13919d493fa8SAlasdair G Kergon down_write(&s->lock); 13929d493fa8SAlasdair G Kergon if (!s->valid) { 13933510cb94SJon Brassow free_completed_exception(e); 13949d493fa8SAlasdair G Kergon error = 1; 13959d493fa8SAlasdair G Kergon goto out; 13969d493fa8SAlasdair G Kergon } 13979d493fa8SAlasdair G Kergon 1398615d1eb9SMike Snitzer /* Check for conflicting reads */ 1399615d1eb9SMike Snitzer __check_for_conflicting_io(s, pe->e.old_chunk); 1400a8d41b59SMikulas Patocka 1401a8d41b59SMikulas Patocka /* 14021da177e4SLinus Torvalds * Add a proper exception, and remove the 14031da177e4SLinus Torvalds * in-flight exception from the list. 14041da177e4SLinus Torvalds */ 14053510cb94SJon Brassow dm_insert_exception(&s->complete, e); 14061da177e4SLinus Torvalds 14071da177e4SLinus Torvalds out: 14083510cb94SJon Brassow dm_remove_exception(&pe->e); 14099d493fa8SAlasdair G Kergon snapshot_bios = bio_list_get(&pe->snapshot_bios); 1410515ad66cSMikulas Patocka origin_bios = bio_list_get(&pe->origin_bios); 1411a6e50b40SMikulas Patocka full_bio = pe->full_bio; 1412a6e50b40SMikulas Patocka if (full_bio) { 1413a6e50b40SMikulas Patocka full_bio->bi_end_io = pe->full_bio_end_io; 1414a6e50b40SMikulas Patocka full_bio->bi_private = pe->full_bio_private; 1415a6e50b40SMikulas Patocka } 1416515ad66cSMikulas Patocka free_pending_exception(pe); 1417b4b610f6SAlasdair G Kergon 141873dfd078SMikulas Patocka increment_pending_exceptions_done_count(); 141973dfd078SMikulas Patocka 14209d493fa8SAlasdair G Kergon up_write(&s->lock); 14219d493fa8SAlasdair G Kergon 14229d493fa8SAlasdair G Kergon /* Submit any pending write bios */ 1423a6e50b40SMikulas Patocka if (error) { 1424a6e50b40SMikulas Patocka if (full_bio) 1425a6e50b40SMikulas Patocka bio_io_error(full_bio); 14269d493fa8SAlasdair G Kergon error_bios(snapshot_bios); 1427a6e50b40SMikulas Patocka } else { 1428a6e50b40SMikulas Patocka if (full_bio) 1429a6e50b40SMikulas Patocka bio_endio(full_bio, 0); 14309d493fa8SAlasdair G Kergon flush_bios(snapshot_bios); 1431a6e50b40SMikulas Patocka } 14329d493fa8SAlasdair G Kergon 1433515ad66cSMikulas Patocka retry_origin_bios(s, origin_bios); 14341da177e4SLinus Torvalds } 14351da177e4SLinus Torvalds 14361da177e4SLinus Torvalds static void commit_callback(void *context, int success) 14371da177e4SLinus Torvalds { 1438028867acSAlasdair G Kergon struct dm_snap_pending_exception *pe = context; 1439028867acSAlasdair G Kergon 14401da177e4SLinus Torvalds pending_complete(pe, success); 14411da177e4SLinus Torvalds } 14421da177e4SLinus Torvalds 14431da177e4SLinus Torvalds /* 14441da177e4SLinus Torvalds * Called when the copy I/O has finished. kcopyd actually runs 14451da177e4SLinus Torvalds * this code so don't block. 14461da177e4SLinus Torvalds */ 14474cdc1d1fSAlasdair G Kergon static void copy_callback(int read_err, unsigned long write_err, void *context) 14481da177e4SLinus Torvalds { 1449028867acSAlasdair G Kergon struct dm_snap_pending_exception *pe = context; 14501da177e4SLinus Torvalds struct dm_snapshot *s = pe->snap; 14511da177e4SLinus Torvalds 14521da177e4SLinus Torvalds if (read_err || write_err) 14531da177e4SLinus Torvalds pending_complete(pe, 0); 14541da177e4SLinus Torvalds 14551da177e4SLinus Torvalds else 14561da177e4SLinus Torvalds /* Update the metadata if we are persistent */ 1457493df71cSJonathan Brassow s->store->type->commit_exception(s->store, &pe->e, 1458b2a11465SJonathan Brassow commit_callback, pe); 14591da177e4SLinus Torvalds } 14601da177e4SLinus Torvalds 14611da177e4SLinus Torvalds /* 14621da177e4SLinus Torvalds * Dispatches the copy operation to kcopyd. 14631da177e4SLinus Torvalds */ 1464028867acSAlasdair G Kergon static void start_copy(struct dm_snap_pending_exception *pe) 14651da177e4SLinus Torvalds { 14661da177e4SLinus Torvalds struct dm_snapshot *s = pe->snap; 146722a1ceb1SHeinz Mauelshagen struct dm_io_region src, dest; 14681da177e4SLinus Torvalds struct block_device *bdev = s->origin->bdev; 14691da177e4SLinus Torvalds sector_t dev_size; 14701da177e4SLinus Torvalds 14711da177e4SLinus Torvalds dev_size = get_dev_size(bdev); 14721da177e4SLinus Torvalds 14731da177e4SLinus Torvalds src.bdev = bdev; 147471fab00aSJonathan Brassow src.sector = chunk_to_sector(s->store, pe->e.old_chunk); 1475df96eee6SMikulas Patocka src.count = min((sector_t)s->store->chunk_size, dev_size - src.sector); 14761da177e4SLinus Torvalds 1477fc56f6fbSMike Snitzer dest.bdev = s->cow->bdev; 147871fab00aSJonathan Brassow dest.sector = chunk_to_sector(s->store, pe->e.new_chunk); 14791da177e4SLinus Torvalds dest.count = src.count; 14801da177e4SLinus Torvalds 14811da177e4SLinus Torvalds /* Hand over to kcopyd */ 1482a2d2b034SJonathan Brassow dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, copy_callback, pe); 14831da177e4SLinus Torvalds } 14841da177e4SLinus Torvalds 1485a6e50b40SMikulas Patocka static void full_bio_end_io(struct bio *bio, int error) 1486a6e50b40SMikulas Patocka { 1487a6e50b40SMikulas Patocka void *callback_data = bio->bi_private; 1488a6e50b40SMikulas Patocka 1489a6e50b40SMikulas Patocka dm_kcopyd_do_callback(callback_data, 0, error ? 1 : 0); 1490a6e50b40SMikulas Patocka } 1491a6e50b40SMikulas Patocka 1492a6e50b40SMikulas Patocka static void start_full_bio(struct dm_snap_pending_exception *pe, 1493a6e50b40SMikulas Patocka struct bio *bio) 1494a6e50b40SMikulas Patocka { 1495a6e50b40SMikulas Patocka struct dm_snapshot *s = pe->snap; 1496a6e50b40SMikulas Patocka void *callback_data; 1497a6e50b40SMikulas Patocka 1498a6e50b40SMikulas Patocka pe->full_bio = bio; 1499a6e50b40SMikulas Patocka pe->full_bio_end_io = bio->bi_end_io; 1500a6e50b40SMikulas Patocka pe->full_bio_private = bio->bi_private; 1501a6e50b40SMikulas Patocka 1502a6e50b40SMikulas Patocka callback_data = dm_kcopyd_prepare_callback(s->kcopyd_client, 1503a6e50b40SMikulas Patocka copy_callback, pe); 1504a6e50b40SMikulas Patocka 1505a6e50b40SMikulas Patocka bio->bi_end_io = full_bio_end_io; 1506a6e50b40SMikulas Patocka bio->bi_private = callback_data; 1507a6e50b40SMikulas Patocka 1508a6e50b40SMikulas Patocka generic_make_request(bio); 1509a6e50b40SMikulas Patocka } 1510a6e50b40SMikulas Patocka 15112913808eSMikulas Patocka static struct dm_snap_pending_exception * 15122913808eSMikulas Patocka __lookup_pending_exception(struct dm_snapshot *s, chunk_t chunk) 15132913808eSMikulas Patocka { 15143510cb94SJon Brassow struct dm_exception *e = dm_lookup_exception(&s->pending, chunk); 15152913808eSMikulas Patocka 15162913808eSMikulas Patocka if (!e) 15172913808eSMikulas Patocka return NULL; 15182913808eSMikulas Patocka 15192913808eSMikulas Patocka return container_of(e, struct dm_snap_pending_exception, e); 15202913808eSMikulas Patocka } 15212913808eSMikulas Patocka 15221da177e4SLinus Torvalds /* 15231da177e4SLinus Torvalds * Looks to see if this snapshot already has a pending exception 15241da177e4SLinus Torvalds * for this chunk, otherwise it allocates a new one and inserts 15251da177e4SLinus Torvalds * it into the pending table. 15261da177e4SLinus Torvalds * 15271da177e4SLinus Torvalds * NOTE: a write lock must be held on snap->lock before calling 15281da177e4SLinus Torvalds * this. 15291da177e4SLinus Torvalds */ 1530028867acSAlasdair G Kergon static struct dm_snap_pending_exception * 1531c6621392SMikulas Patocka __find_pending_exception(struct dm_snapshot *s, 1532c6621392SMikulas Patocka struct dm_snap_pending_exception *pe, chunk_t chunk) 15331da177e4SLinus Torvalds { 1534c6621392SMikulas Patocka struct dm_snap_pending_exception *pe2; 153576df1c65SAlasdair G Kergon 15362913808eSMikulas Patocka pe2 = __lookup_pending_exception(s, chunk); 15372913808eSMikulas Patocka if (pe2) { 15381da177e4SLinus Torvalds free_pending_exception(pe); 15392913808eSMikulas Patocka return pe2; 154076df1c65SAlasdair G Kergon } 154176df1c65SAlasdair G Kergon 15421da177e4SLinus Torvalds pe->e.old_chunk = chunk; 15431da177e4SLinus Torvalds bio_list_init(&pe->origin_bios); 15441da177e4SLinus Torvalds bio_list_init(&pe->snapshot_bios); 15451da177e4SLinus Torvalds pe->started = 0; 1546a6e50b40SMikulas Patocka pe->full_bio = NULL; 15471da177e4SLinus Torvalds 1548493df71cSJonathan Brassow if (s->store->type->prepare_exception(s->store, &pe->e)) { 15491da177e4SLinus Torvalds free_pending_exception(pe); 15501da177e4SLinus Torvalds return NULL; 15511da177e4SLinus Torvalds } 15521da177e4SLinus Torvalds 15533510cb94SJon Brassow dm_insert_exception(&s->pending, &pe->e); 15541da177e4SLinus Torvalds 15551da177e4SLinus Torvalds return pe; 15561da177e4SLinus Torvalds } 15571da177e4SLinus Torvalds 15581d4989c8SJon Brassow static void remap_exception(struct dm_snapshot *s, struct dm_exception *e, 1559d74f81f8SMilan Broz struct bio *bio, chunk_t chunk) 15601da177e4SLinus Torvalds { 1561fc56f6fbSMike Snitzer bio->bi_bdev = s->cow->bdev; 156271fab00aSJonathan Brassow bio->bi_sector = chunk_to_sector(s->store, 156371fab00aSJonathan Brassow dm_chunk_number(e->new_chunk) + 1564d74f81f8SMilan Broz (chunk - e->old_chunk)) + 156571fab00aSJonathan Brassow (bio->bi_sector & 156671fab00aSJonathan Brassow s->store->chunk_mask); 15671da177e4SLinus Torvalds } 15681da177e4SLinus Torvalds 15697de3ee57SMikulas Patocka static int snapshot_map(struct dm_target *ti, struct bio *bio) 15701da177e4SLinus Torvalds { 15711d4989c8SJon Brassow struct dm_exception *e; 1572028867acSAlasdair G Kergon struct dm_snapshot *s = ti->private; 1573d2a7ad29SKiyoshi Ueda int r = DM_MAPIO_REMAPPED; 15741da177e4SLinus Torvalds chunk_t chunk; 1575028867acSAlasdair G Kergon struct dm_snap_pending_exception *pe = NULL; 15761da177e4SLinus Torvalds 1577ee18026aSMikulas Patocka init_tracked_chunk(bio); 1578ee18026aSMikulas Patocka 1579d87f4c14STejun Heo if (bio->bi_rw & REQ_FLUSH) { 1580fc56f6fbSMike Snitzer bio->bi_bdev = s->cow->bdev; 1581494b3ee7SMikulas Patocka return DM_MAPIO_REMAPPED; 1582494b3ee7SMikulas Patocka } 1583494b3ee7SMikulas Patocka 158471fab00aSJonathan Brassow chunk = sector_to_chunk(s->store, bio->bi_sector); 15851da177e4SLinus Torvalds 15861da177e4SLinus Torvalds /* Full snapshots are not usable */ 158776df1c65SAlasdair G Kergon /* To get here the table must be live so s->active is always set. */ 15881da177e4SLinus Torvalds if (!s->valid) 1589f6a80ea8SAlasdair G Kergon return -EIO; 15901da177e4SLinus Torvalds 15911da177e4SLinus Torvalds /* FIXME: should only take write lock if we need 15921da177e4SLinus Torvalds * to copy an exception */ 15931da177e4SLinus Torvalds down_write(&s->lock); 15941da177e4SLinus Torvalds 159576df1c65SAlasdair G Kergon if (!s->valid) { 159676df1c65SAlasdair G Kergon r = -EIO; 159776df1c65SAlasdair G Kergon goto out_unlock; 159876df1c65SAlasdair G Kergon } 159976df1c65SAlasdair G Kergon 16001da177e4SLinus Torvalds /* If the block is already remapped - use that, else remap it */ 16013510cb94SJon Brassow e = dm_lookup_exception(&s->complete, chunk); 16021da177e4SLinus Torvalds if (e) { 1603d74f81f8SMilan Broz remap_exception(s, e, bio, chunk); 160476df1c65SAlasdair G Kergon goto out_unlock; 160576df1c65SAlasdair G Kergon } 16061da177e4SLinus Torvalds 1607ba40a2aaSAlasdair G Kergon /* 1608ba40a2aaSAlasdair G Kergon * Write to snapshot - higher level takes care of RW/RO 1609ba40a2aaSAlasdair G Kergon * flags so we should only get this if we are 1610ba40a2aaSAlasdair G Kergon * writeable. 1611ba40a2aaSAlasdair G Kergon */ 1612ba40a2aaSAlasdair G Kergon if (bio_rw(bio) == WRITE) { 16132913808eSMikulas Patocka pe = __lookup_pending_exception(s, chunk); 16142913808eSMikulas Patocka if (!pe) { 1615c6621392SMikulas Patocka up_write(&s->lock); 1616c6621392SMikulas Patocka pe = alloc_pending_exception(s); 1617c6621392SMikulas Patocka down_write(&s->lock); 1618c6621392SMikulas Patocka 1619c6621392SMikulas Patocka if (!s->valid) { 1620c6621392SMikulas Patocka free_pending_exception(pe); 1621c6621392SMikulas Patocka r = -EIO; 1622c6621392SMikulas Patocka goto out_unlock; 1623c6621392SMikulas Patocka } 1624c6621392SMikulas Patocka 16253510cb94SJon Brassow e = dm_lookup_exception(&s->complete, chunk); 162635bf659bSMikulas Patocka if (e) { 162735bf659bSMikulas Patocka free_pending_exception(pe); 162835bf659bSMikulas Patocka remap_exception(s, e, bio, chunk); 162935bf659bSMikulas Patocka goto out_unlock; 163035bf659bSMikulas Patocka } 163135bf659bSMikulas Patocka 1632c6621392SMikulas Patocka pe = __find_pending_exception(s, pe, chunk); 16331da177e4SLinus Torvalds if (!pe) { 1634695368acSAlasdair G Kergon __invalidate_snapshot(s, -ENOMEM); 16351da177e4SLinus Torvalds r = -EIO; 163676df1c65SAlasdair G Kergon goto out_unlock; 163776df1c65SAlasdair G Kergon } 16382913808eSMikulas Patocka } 163976df1c65SAlasdair G Kergon 1640d74f81f8SMilan Broz remap_exception(s, &pe->e, bio, chunk); 16411da177e4SLinus Torvalds 1642d2a7ad29SKiyoshi Ueda r = DM_MAPIO_SUBMITTED; 1643ba40a2aaSAlasdair G Kergon 1644a6e50b40SMikulas Patocka if (!pe->started && 1645a6e50b40SMikulas Patocka bio->bi_size == (s->store->chunk_size << SECTOR_SHIFT)) { 1646a6e50b40SMikulas Patocka pe->started = 1; 1647a6e50b40SMikulas Patocka up_write(&s->lock); 1648a6e50b40SMikulas Patocka start_full_bio(pe, bio); 1649a6e50b40SMikulas Patocka goto out; 1650a6e50b40SMikulas Patocka } 1651a6e50b40SMikulas Patocka 1652a6e50b40SMikulas Patocka bio_list_add(&pe->snapshot_bios, bio); 1653a6e50b40SMikulas Patocka 16541da177e4SLinus Torvalds if (!pe->started) { 16551da177e4SLinus Torvalds /* this is protected by snap->lock */ 16561da177e4SLinus Torvalds pe->started = 1; 165776df1c65SAlasdair G Kergon up_write(&s->lock); 165876df1c65SAlasdair G Kergon start_copy(pe); 1659ba40a2aaSAlasdair G Kergon goto out; 1660ba40a2aaSAlasdair G Kergon } 1661cd45daffSMikulas Patocka } else { 16621da177e4SLinus Torvalds bio->bi_bdev = s->origin->bdev; 1663ee18026aSMikulas Patocka track_chunk(s, bio, chunk); 1664cd45daffSMikulas Patocka } 16651da177e4SLinus Torvalds 1666ba40a2aaSAlasdair G Kergon out_unlock: 1667ba40a2aaSAlasdair G Kergon up_write(&s->lock); 1668ba40a2aaSAlasdair G Kergon out: 16691da177e4SLinus Torvalds return r; 16701da177e4SLinus Torvalds } 16711da177e4SLinus Torvalds 16723452c2a1SMikulas Patocka /* 16733452c2a1SMikulas Patocka * A snapshot-merge target behaves like a combination of a snapshot 16743452c2a1SMikulas Patocka * target and a snapshot-origin target. It only generates new 16753452c2a1SMikulas Patocka * exceptions in other snapshots and not in the one that is being 16763452c2a1SMikulas Patocka * merged. 16773452c2a1SMikulas Patocka * 16783452c2a1SMikulas Patocka * For each chunk, if there is an existing exception, it is used to 16793452c2a1SMikulas Patocka * redirect I/O to the cow device. Otherwise I/O is sent to the origin, 16803452c2a1SMikulas Patocka * which in turn might generate exceptions in other snapshots. 16819fe86254SMikulas Patocka * If merging is currently taking place on the chunk in question, the 16829fe86254SMikulas Patocka * I/O is deferred by adding it to s->bios_queued_during_merge. 16833452c2a1SMikulas Patocka */ 16847de3ee57SMikulas Patocka static int snapshot_merge_map(struct dm_target *ti, struct bio *bio) 16853452c2a1SMikulas Patocka { 16863452c2a1SMikulas Patocka struct dm_exception *e; 16873452c2a1SMikulas Patocka struct dm_snapshot *s = ti->private; 16883452c2a1SMikulas Patocka int r = DM_MAPIO_REMAPPED; 16893452c2a1SMikulas Patocka chunk_t chunk; 16903452c2a1SMikulas Patocka 1691ee18026aSMikulas Patocka init_tracked_chunk(bio); 1692ee18026aSMikulas Patocka 1693d87f4c14STejun Heo if (bio->bi_rw & REQ_FLUSH) { 169455a62eefSAlasdair G Kergon if (!dm_bio_get_target_bio_nr(bio)) 169510b8106aSMike Snitzer bio->bi_bdev = s->origin->bdev; 169610b8106aSMike Snitzer else 169710b8106aSMike Snitzer bio->bi_bdev = s->cow->bdev; 169810b8106aSMike Snitzer return DM_MAPIO_REMAPPED; 169910b8106aSMike Snitzer } 170010b8106aSMike Snitzer 17013452c2a1SMikulas Patocka chunk = sector_to_chunk(s->store, bio->bi_sector); 17023452c2a1SMikulas Patocka 17039fe86254SMikulas Patocka down_write(&s->lock); 17043452c2a1SMikulas Patocka 1705d2fdb776SMikulas Patocka /* Full merging snapshots are redirected to the origin */ 1706d2fdb776SMikulas Patocka if (!s->valid) 1707d2fdb776SMikulas Patocka goto redirect_to_origin; 17083452c2a1SMikulas Patocka 17093452c2a1SMikulas Patocka /* If the block is already remapped - use that */ 17103452c2a1SMikulas Patocka e = dm_lookup_exception(&s->complete, chunk); 17113452c2a1SMikulas Patocka if (e) { 17129fe86254SMikulas Patocka /* Queue writes overlapping with chunks being merged */ 17139fe86254SMikulas Patocka if (bio_rw(bio) == WRITE && 17149fe86254SMikulas Patocka chunk >= s->first_merging_chunk && 17159fe86254SMikulas Patocka chunk < (s->first_merging_chunk + 17169fe86254SMikulas Patocka s->num_merging_chunks)) { 17179fe86254SMikulas Patocka bio->bi_bdev = s->origin->bdev; 17189fe86254SMikulas Patocka bio_list_add(&s->bios_queued_during_merge, bio); 17199fe86254SMikulas Patocka r = DM_MAPIO_SUBMITTED; 17209fe86254SMikulas Patocka goto out_unlock; 17219fe86254SMikulas Patocka } 172217aa0332SMikulas Patocka 17233452c2a1SMikulas Patocka remap_exception(s, e, bio, chunk); 172417aa0332SMikulas Patocka 172517aa0332SMikulas Patocka if (bio_rw(bio) == WRITE) 1726ee18026aSMikulas Patocka track_chunk(s, bio, chunk); 17273452c2a1SMikulas Patocka goto out_unlock; 17283452c2a1SMikulas Patocka } 17293452c2a1SMikulas Patocka 1730d2fdb776SMikulas Patocka redirect_to_origin: 17313452c2a1SMikulas Patocka bio->bi_bdev = s->origin->bdev; 17323452c2a1SMikulas Patocka 17333452c2a1SMikulas Patocka if (bio_rw(bio) == WRITE) { 17349fe86254SMikulas Patocka up_write(&s->lock); 17353452c2a1SMikulas Patocka return do_origin(s->origin, bio); 17363452c2a1SMikulas Patocka } 17373452c2a1SMikulas Patocka 17383452c2a1SMikulas Patocka out_unlock: 17399fe86254SMikulas Patocka up_write(&s->lock); 17403452c2a1SMikulas Patocka 17413452c2a1SMikulas Patocka return r; 17423452c2a1SMikulas Patocka } 17433452c2a1SMikulas Patocka 17447de3ee57SMikulas Patocka static int snapshot_end_io(struct dm_target *ti, struct bio *bio, int error) 1745cd45daffSMikulas Patocka { 1746cd45daffSMikulas Patocka struct dm_snapshot *s = ti->private; 1747cd45daffSMikulas Patocka 1748ee18026aSMikulas Patocka if (is_bio_tracked(bio)) 1749ee18026aSMikulas Patocka stop_tracking_chunk(s, bio); 1750cd45daffSMikulas Patocka 1751cd45daffSMikulas Patocka return 0; 1752cd45daffSMikulas Patocka } 1753cd45daffSMikulas Patocka 17541e03f97eSMikulas Patocka static void snapshot_merge_presuspend(struct dm_target *ti) 17551e03f97eSMikulas Patocka { 17561e03f97eSMikulas Patocka struct dm_snapshot *s = ti->private; 17571e03f97eSMikulas Patocka 17581e03f97eSMikulas Patocka stop_merge(s); 17591e03f97eSMikulas Patocka } 17601e03f97eSMikulas Patocka 1761c1f0c183SMike Snitzer static int snapshot_preresume(struct dm_target *ti) 1762c1f0c183SMike Snitzer { 1763c1f0c183SMike Snitzer int r = 0; 1764c1f0c183SMike Snitzer struct dm_snapshot *s = ti->private; 1765c1f0c183SMike Snitzer struct dm_snapshot *snap_src = NULL, *snap_dest = NULL; 1766c1f0c183SMike Snitzer 1767c1f0c183SMike Snitzer down_read(&_origins_lock); 17689d3b15c4SMikulas Patocka (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL); 1769c1f0c183SMike Snitzer if (snap_src && snap_dest) { 1770c1f0c183SMike Snitzer down_read(&snap_src->lock); 1771c1f0c183SMike Snitzer if (s == snap_src) { 1772c1f0c183SMike Snitzer DMERR("Unable to resume snapshot source until " 1773c1f0c183SMike Snitzer "handover completes."); 1774c1f0c183SMike Snitzer r = -EINVAL; 1775b83b2f29SMike Snitzer } else if (!dm_suspended(snap_src->ti)) { 1776c1f0c183SMike Snitzer DMERR("Unable to perform snapshot handover until " 1777c1f0c183SMike Snitzer "source is suspended."); 1778c1f0c183SMike Snitzer r = -EINVAL; 1779c1f0c183SMike Snitzer } 1780c1f0c183SMike Snitzer up_read(&snap_src->lock); 1781c1f0c183SMike Snitzer } 1782c1f0c183SMike Snitzer up_read(&_origins_lock); 1783c1f0c183SMike Snitzer 1784c1f0c183SMike Snitzer return r; 1785c1f0c183SMike Snitzer } 1786c1f0c183SMike Snitzer 17871da177e4SLinus Torvalds static void snapshot_resume(struct dm_target *ti) 17881da177e4SLinus Torvalds { 1789028867acSAlasdair G Kergon struct dm_snapshot *s = ti->private; 1790c1f0c183SMike Snitzer struct dm_snapshot *snap_src = NULL, *snap_dest = NULL; 1791c1f0c183SMike Snitzer 1792c1f0c183SMike Snitzer down_read(&_origins_lock); 17939d3b15c4SMikulas Patocka (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL); 1794c1f0c183SMike Snitzer if (snap_src && snap_dest) { 1795c1f0c183SMike Snitzer down_write(&snap_src->lock); 1796c1f0c183SMike Snitzer down_write_nested(&snap_dest->lock, SINGLE_DEPTH_NESTING); 1797c1f0c183SMike Snitzer __handover_exceptions(snap_src, snap_dest); 1798c1f0c183SMike Snitzer up_write(&snap_dest->lock); 1799c1f0c183SMike Snitzer up_write(&snap_src->lock); 1800c1f0c183SMike Snitzer } 1801c1f0c183SMike Snitzer up_read(&_origins_lock); 1802c1f0c183SMike Snitzer 1803c1f0c183SMike Snitzer /* Now we have correct chunk size, reregister */ 1804c1f0c183SMike Snitzer reregister_snapshot(s); 18051da177e4SLinus Torvalds 1806aa14edebSAlasdair G Kergon down_write(&s->lock); 1807aa14edebSAlasdair G Kergon s->active = 1; 1808aa14edebSAlasdair G Kergon up_write(&s->lock); 18091da177e4SLinus Torvalds } 18101da177e4SLinus Torvalds 1811542f9038SMike Snitzer static uint32_t get_origin_minimum_chunksize(struct block_device *bdev) 18121e03f97eSMikulas Patocka { 1813542f9038SMike Snitzer uint32_t min_chunksize; 18141e03f97eSMikulas Patocka 18151e03f97eSMikulas Patocka down_read(&_origins_lock); 18161e03f97eSMikulas Patocka min_chunksize = __minimum_chunk_size(__lookup_origin(bdev)); 18171e03f97eSMikulas Patocka up_read(&_origins_lock); 18181e03f97eSMikulas Patocka 18191e03f97eSMikulas Patocka return min_chunksize; 18201e03f97eSMikulas Patocka } 18211e03f97eSMikulas Patocka 18221e03f97eSMikulas Patocka static void snapshot_merge_resume(struct dm_target *ti) 18231e03f97eSMikulas Patocka { 18241e03f97eSMikulas Patocka struct dm_snapshot *s = ti->private; 18251e03f97eSMikulas Patocka 18261e03f97eSMikulas Patocka /* 18271e03f97eSMikulas Patocka * Handover exceptions from existing snapshot. 18281e03f97eSMikulas Patocka */ 18291e03f97eSMikulas Patocka snapshot_resume(ti); 18301e03f97eSMikulas Patocka 18311e03f97eSMikulas Patocka /* 1832542f9038SMike Snitzer * snapshot-merge acts as an origin, so set ti->max_io_len 18331e03f97eSMikulas Patocka */ 1834542f9038SMike Snitzer ti->max_io_len = get_origin_minimum_chunksize(s->origin->bdev); 18351e03f97eSMikulas Patocka 18361e03f97eSMikulas Patocka start_merge(s); 18371e03f97eSMikulas Patocka } 18381e03f97eSMikulas Patocka 1839fd7c092eSMikulas Patocka static void snapshot_status(struct dm_target *ti, status_type_t type, 18401f4e0ff0SAlasdair G Kergon unsigned status_flags, char *result, unsigned maxlen) 18411da177e4SLinus Torvalds { 18422e4a31dfSJonathan Brassow unsigned sz = 0; 1843028867acSAlasdair G Kergon struct dm_snapshot *snap = ti->private; 18441da177e4SLinus Torvalds 18451da177e4SLinus Torvalds switch (type) { 18461da177e4SLinus Torvalds case STATUSTYPE_INFO: 184794e76572SMikulas Patocka 184894e76572SMikulas Patocka down_write(&snap->lock); 184994e76572SMikulas Patocka 18501da177e4SLinus Torvalds if (!snap->valid) 18512e4a31dfSJonathan Brassow DMEMIT("Invalid"); 1852d8ddb1cfSMike Snitzer else if (snap->merge_failed) 1853d8ddb1cfSMike Snitzer DMEMIT("Merge failed"); 18541da177e4SLinus Torvalds else { 1855985903bbSMike Snitzer if (snap->store->type->usage) { 1856985903bbSMike Snitzer sector_t total_sectors, sectors_allocated, 1857985903bbSMike Snitzer metadata_sectors; 1858985903bbSMike Snitzer snap->store->type->usage(snap->store, 1859985903bbSMike Snitzer &total_sectors, 1860985903bbSMike Snitzer §ors_allocated, 1861985903bbSMike Snitzer &metadata_sectors); 1862985903bbSMike Snitzer DMEMIT("%llu/%llu %llu", 1863985903bbSMike Snitzer (unsigned long long)sectors_allocated, 1864985903bbSMike Snitzer (unsigned long long)total_sectors, 1865985903bbSMike Snitzer (unsigned long long)metadata_sectors); 18661da177e4SLinus Torvalds } 18671da177e4SLinus Torvalds else 18682e4a31dfSJonathan Brassow DMEMIT("Unknown"); 18691da177e4SLinus Torvalds } 187094e76572SMikulas Patocka 187194e76572SMikulas Patocka up_write(&snap->lock); 187294e76572SMikulas Patocka 18731da177e4SLinus Torvalds break; 18741da177e4SLinus Torvalds 18751da177e4SLinus Torvalds case STATUSTYPE_TABLE: 18761da177e4SLinus Torvalds /* 18771da177e4SLinus Torvalds * kdevname returns a static pointer so we need 18781da177e4SLinus Torvalds * to make private copies if the output is to 18791da177e4SLinus Torvalds * make sense. 18801da177e4SLinus Torvalds */ 1881fc56f6fbSMike Snitzer DMEMIT("%s %s", snap->origin->name, snap->cow->name); 18821e302a92SJonathan Brassow snap->store->type->status(snap->store, type, result + sz, 18831e302a92SJonathan Brassow maxlen - sz); 18841da177e4SLinus Torvalds break; 18851da177e4SLinus Torvalds } 18861da177e4SLinus Torvalds } 18871da177e4SLinus Torvalds 18888811f46cSMike Snitzer static int snapshot_iterate_devices(struct dm_target *ti, 18898811f46cSMike Snitzer iterate_devices_callout_fn fn, void *data) 18908811f46cSMike Snitzer { 18918811f46cSMike Snitzer struct dm_snapshot *snap = ti->private; 18921e5554c8SMikulas Patocka int r; 18938811f46cSMike Snitzer 18941e5554c8SMikulas Patocka r = fn(ti, snap->origin, 0, ti->len, data); 18951e5554c8SMikulas Patocka 18961e5554c8SMikulas Patocka if (!r) 18971e5554c8SMikulas Patocka r = fn(ti, snap->cow, 0, get_dev_size(snap->cow->bdev), data); 18981e5554c8SMikulas Patocka 18991e5554c8SMikulas Patocka return r; 19008811f46cSMike Snitzer } 19018811f46cSMike Snitzer 19028811f46cSMike Snitzer 19031da177e4SLinus Torvalds /*----------------------------------------------------------------- 19041da177e4SLinus Torvalds * Origin methods 19051da177e4SLinus Torvalds *---------------------------------------------------------------*/ 19069eaae8ffSMikulas Patocka 19079eaae8ffSMikulas Patocka /* 19089eaae8ffSMikulas Patocka * If no exceptions need creating, DM_MAPIO_REMAPPED is returned and any 19099eaae8ffSMikulas Patocka * supplied bio was ignored. The caller may submit it immediately. 19109eaae8ffSMikulas Patocka * (No remapping actually occurs as the origin is always a direct linear 19119eaae8ffSMikulas Patocka * map.) 19129eaae8ffSMikulas Patocka * 19139eaae8ffSMikulas Patocka * If further exceptions are required, DM_MAPIO_SUBMITTED is returned 19149eaae8ffSMikulas Patocka * and any supplied bio is added to a list to be submitted once all 19159eaae8ffSMikulas Patocka * the necessary exceptions exist. 19169eaae8ffSMikulas Patocka */ 19179eaae8ffSMikulas Patocka static int __origin_write(struct list_head *snapshots, sector_t sector, 19189eaae8ffSMikulas Patocka struct bio *bio) 19191da177e4SLinus Torvalds { 1920515ad66cSMikulas Patocka int r = DM_MAPIO_REMAPPED; 19211da177e4SLinus Torvalds struct dm_snapshot *snap; 19221d4989c8SJon Brassow struct dm_exception *e; 1923515ad66cSMikulas Patocka struct dm_snap_pending_exception *pe; 1924515ad66cSMikulas Patocka struct dm_snap_pending_exception *pe_to_start_now = NULL; 1925515ad66cSMikulas Patocka struct dm_snap_pending_exception *pe_to_start_last = NULL; 19261da177e4SLinus Torvalds chunk_t chunk; 19271da177e4SLinus Torvalds 19281da177e4SLinus Torvalds /* Do all the snapshots on this origin */ 19291da177e4SLinus Torvalds list_for_each_entry (snap, snapshots, list) { 19303452c2a1SMikulas Patocka /* 19313452c2a1SMikulas Patocka * Don't make new exceptions in a merging snapshot 19323452c2a1SMikulas Patocka * because it has effectively been deleted 19333452c2a1SMikulas Patocka */ 19343452c2a1SMikulas Patocka if (dm_target_is_snapshot_merge(snap->ti)) 19353452c2a1SMikulas Patocka continue; 19363452c2a1SMikulas Patocka 193776df1c65SAlasdair G Kergon down_write(&snap->lock); 193876df1c65SAlasdair G Kergon 1939aa14edebSAlasdair G Kergon /* Only deal with valid and active snapshots */ 1940aa14edebSAlasdair G Kergon if (!snap->valid || !snap->active) 194176df1c65SAlasdair G Kergon goto next_snapshot; 19421da177e4SLinus Torvalds 1943d5e404c1SAlasdair G Kergon /* Nothing to do if writing beyond end of snapshot */ 19449eaae8ffSMikulas Patocka if (sector >= dm_table_get_size(snap->ti->table)) 194576df1c65SAlasdair G Kergon goto next_snapshot; 19461da177e4SLinus Torvalds 19471da177e4SLinus Torvalds /* 19481da177e4SLinus Torvalds * Remember, different snapshots can have 19491da177e4SLinus Torvalds * different chunk sizes. 19501da177e4SLinus Torvalds */ 19519eaae8ffSMikulas Patocka chunk = sector_to_chunk(snap->store, sector); 19521da177e4SLinus Torvalds 19531da177e4SLinus Torvalds /* 19541da177e4SLinus Torvalds * Check exception table to see if block 19551da177e4SLinus Torvalds * is already remapped in this snapshot 19561da177e4SLinus Torvalds * and trigger an exception if not. 19571da177e4SLinus Torvalds */ 19583510cb94SJon Brassow e = dm_lookup_exception(&snap->complete, chunk); 195976df1c65SAlasdair G Kergon if (e) 196076df1c65SAlasdair G Kergon goto next_snapshot; 196176df1c65SAlasdair G Kergon 19622913808eSMikulas Patocka pe = __lookup_pending_exception(snap, chunk); 19632913808eSMikulas Patocka if (!pe) { 1964c6621392SMikulas Patocka up_write(&snap->lock); 1965c6621392SMikulas Patocka pe = alloc_pending_exception(snap); 1966c6621392SMikulas Patocka down_write(&snap->lock); 1967c6621392SMikulas Patocka 1968c6621392SMikulas Patocka if (!snap->valid) { 1969c6621392SMikulas Patocka free_pending_exception(pe); 1970c6621392SMikulas Patocka goto next_snapshot; 1971c6621392SMikulas Patocka } 1972c6621392SMikulas Patocka 19733510cb94SJon Brassow e = dm_lookup_exception(&snap->complete, chunk); 197435bf659bSMikulas Patocka if (e) { 197535bf659bSMikulas Patocka free_pending_exception(pe); 197635bf659bSMikulas Patocka goto next_snapshot; 197735bf659bSMikulas Patocka } 197835bf659bSMikulas Patocka 1979c6621392SMikulas Patocka pe = __find_pending_exception(snap, pe, chunk); 19801da177e4SLinus Torvalds if (!pe) { 1981695368acSAlasdair G Kergon __invalidate_snapshot(snap, -ENOMEM); 198276df1c65SAlasdair G Kergon goto next_snapshot; 198376df1c65SAlasdair G Kergon } 19842913808eSMikulas Patocka } 19851da177e4SLinus Torvalds 1986d2a7ad29SKiyoshi Ueda r = DM_MAPIO_SUBMITTED; 198776df1c65SAlasdair G Kergon 1988515ad66cSMikulas Patocka /* 1989515ad66cSMikulas Patocka * If an origin bio was supplied, queue it to wait for the 1990515ad66cSMikulas Patocka * completion of this exception, and start this one last, 1991515ad66cSMikulas Patocka * at the end of the function. 1992515ad66cSMikulas Patocka */ 1993515ad66cSMikulas Patocka if (bio) { 1994515ad66cSMikulas Patocka bio_list_add(&pe->origin_bios, bio); 1995515ad66cSMikulas Patocka bio = NULL; 1996515ad66cSMikulas Patocka 1997515ad66cSMikulas Patocka if (!pe->started) { 1998515ad66cSMikulas Patocka pe->started = 1; 1999515ad66cSMikulas Patocka pe_to_start_last = pe; 2000515ad66cSMikulas Patocka } 2001b4b610f6SAlasdair G Kergon } 200276df1c65SAlasdair G Kergon 2003eccf0817SAlasdair G Kergon if (!pe->started) { 2004eccf0817SAlasdair G Kergon pe->started = 1; 2005515ad66cSMikulas Patocka pe_to_start_now = pe; 2006eccf0817SAlasdair G Kergon } 20071da177e4SLinus Torvalds 200876df1c65SAlasdair G Kergon next_snapshot: 20091da177e4SLinus Torvalds up_write(&snap->lock); 2010515ad66cSMikulas Patocka 2011515ad66cSMikulas Patocka if (pe_to_start_now) { 2012515ad66cSMikulas Patocka start_copy(pe_to_start_now); 2013515ad66cSMikulas Patocka pe_to_start_now = NULL; 20141da177e4SLinus Torvalds } 2015b4b610f6SAlasdair G Kergon } 2016b4b610f6SAlasdair G Kergon 20171da177e4SLinus Torvalds /* 2018515ad66cSMikulas Patocka * Submit the exception against which the bio is queued last, 2019515ad66cSMikulas Patocka * to give the other exceptions a head start. 20201da177e4SLinus Torvalds */ 2021515ad66cSMikulas Patocka if (pe_to_start_last) 2022515ad66cSMikulas Patocka start_copy(pe_to_start_last); 20231da177e4SLinus Torvalds 20241da177e4SLinus Torvalds return r; 20251da177e4SLinus Torvalds } 20261da177e4SLinus Torvalds 20271da177e4SLinus Torvalds /* 20281da177e4SLinus Torvalds * Called on a write from the origin driver. 20291da177e4SLinus Torvalds */ 20301da177e4SLinus Torvalds static int do_origin(struct dm_dev *origin, struct bio *bio) 20311da177e4SLinus Torvalds { 20321da177e4SLinus Torvalds struct origin *o; 2033d2a7ad29SKiyoshi Ueda int r = DM_MAPIO_REMAPPED; 20341da177e4SLinus Torvalds 20351da177e4SLinus Torvalds down_read(&_origins_lock); 20361da177e4SLinus Torvalds o = __lookup_origin(origin->bdev); 20371da177e4SLinus Torvalds if (o) 20389eaae8ffSMikulas Patocka r = __origin_write(&o->snapshots, bio->bi_sector, bio); 20391da177e4SLinus Torvalds up_read(&_origins_lock); 20401da177e4SLinus Torvalds 20411da177e4SLinus Torvalds return r; 20421da177e4SLinus Torvalds } 20431da177e4SLinus Torvalds 20441da177e4SLinus Torvalds /* 204573dfd078SMikulas Patocka * Trigger exceptions in all non-merging snapshots. 204673dfd078SMikulas Patocka * 204773dfd078SMikulas Patocka * The chunk size of the merging snapshot may be larger than the chunk 204873dfd078SMikulas Patocka * size of some other snapshot so we may need to reallocate multiple 204973dfd078SMikulas Patocka * chunks in other snapshots. 205073dfd078SMikulas Patocka * 205173dfd078SMikulas Patocka * We scan all the overlapping exceptions in the other snapshots. 205273dfd078SMikulas Patocka * Returns 1 if anything was reallocated and must be waited for, 205373dfd078SMikulas Patocka * otherwise returns 0. 205473dfd078SMikulas Patocka * 205573dfd078SMikulas Patocka * size must be a multiple of merging_snap's chunk_size. 205673dfd078SMikulas Patocka */ 205773dfd078SMikulas Patocka static int origin_write_extent(struct dm_snapshot *merging_snap, 205873dfd078SMikulas Patocka sector_t sector, unsigned size) 205973dfd078SMikulas Patocka { 206073dfd078SMikulas Patocka int must_wait = 0; 206173dfd078SMikulas Patocka sector_t n; 206273dfd078SMikulas Patocka struct origin *o; 206373dfd078SMikulas Patocka 206473dfd078SMikulas Patocka /* 2065542f9038SMike Snitzer * The origin's __minimum_chunk_size() got stored in max_io_len 206673dfd078SMikulas Patocka * by snapshot_merge_resume(). 206773dfd078SMikulas Patocka */ 206873dfd078SMikulas Patocka down_read(&_origins_lock); 206973dfd078SMikulas Patocka o = __lookup_origin(merging_snap->origin->bdev); 2070542f9038SMike Snitzer for (n = 0; n < size; n += merging_snap->ti->max_io_len) 207173dfd078SMikulas Patocka if (__origin_write(&o->snapshots, sector + n, NULL) == 207273dfd078SMikulas Patocka DM_MAPIO_SUBMITTED) 207373dfd078SMikulas Patocka must_wait = 1; 207473dfd078SMikulas Patocka up_read(&_origins_lock); 207573dfd078SMikulas Patocka 207673dfd078SMikulas Patocka return must_wait; 207773dfd078SMikulas Patocka } 207873dfd078SMikulas Patocka 207973dfd078SMikulas Patocka /* 20801da177e4SLinus Torvalds * Origin: maps a linear range of a device, with hooks for snapshotting. 20811da177e4SLinus Torvalds */ 20821da177e4SLinus Torvalds 20831da177e4SLinus Torvalds /* 20841da177e4SLinus Torvalds * Construct an origin mapping: <dev_path> 20851da177e4SLinus Torvalds * The context for an origin is merely a 'struct dm_dev *' 20861da177e4SLinus Torvalds * pointing to the real device. 20871da177e4SLinus Torvalds */ 20881da177e4SLinus Torvalds static int origin_ctr(struct dm_target *ti, unsigned int argc, char **argv) 20891da177e4SLinus Torvalds { 20901da177e4SLinus Torvalds int r; 20911da177e4SLinus Torvalds struct dm_dev *dev; 20921da177e4SLinus Torvalds 20931da177e4SLinus Torvalds if (argc != 1) { 209472d94861SAlasdair G Kergon ti->error = "origin: incorrect number of arguments"; 20951da177e4SLinus Torvalds return -EINVAL; 20961da177e4SLinus Torvalds } 20971da177e4SLinus Torvalds 20988215d6ecSNikanth Karthikesan r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &dev); 20991da177e4SLinus Torvalds if (r) { 21001da177e4SLinus Torvalds ti->error = "Cannot get target device"; 21011da177e4SLinus Torvalds return r; 21021da177e4SLinus Torvalds } 21031da177e4SLinus Torvalds 21041da177e4SLinus Torvalds ti->private = dev; 210555a62eefSAlasdair G Kergon ti->num_flush_bios = 1; 2106494b3ee7SMikulas Patocka 21071da177e4SLinus Torvalds return 0; 21081da177e4SLinus Torvalds } 21091da177e4SLinus Torvalds 21101da177e4SLinus Torvalds static void origin_dtr(struct dm_target *ti) 21111da177e4SLinus Torvalds { 2112028867acSAlasdair G Kergon struct dm_dev *dev = ti->private; 21131da177e4SLinus Torvalds dm_put_device(ti, dev); 21141da177e4SLinus Torvalds } 21151da177e4SLinus Torvalds 21167de3ee57SMikulas Patocka static int origin_map(struct dm_target *ti, struct bio *bio) 21171da177e4SLinus Torvalds { 2118028867acSAlasdair G Kergon struct dm_dev *dev = ti->private; 21191da177e4SLinus Torvalds bio->bi_bdev = dev->bdev; 21201da177e4SLinus Torvalds 2121d87f4c14STejun Heo if (bio->bi_rw & REQ_FLUSH) 2122494b3ee7SMikulas Patocka return DM_MAPIO_REMAPPED; 2123494b3ee7SMikulas Patocka 21241da177e4SLinus Torvalds /* Only tell snapshots if this is a write */ 2125d2a7ad29SKiyoshi Ueda return (bio_rw(bio) == WRITE) ? do_origin(dev, bio) : DM_MAPIO_REMAPPED; 21261da177e4SLinus Torvalds } 21271da177e4SLinus Torvalds 21281da177e4SLinus Torvalds /* 2129542f9038SMike Snitzer * Set the target "max_io_len" field to the minimum of all the snapshots' 21301da177e4SLinus Torvalds * chunk sizes. 21311da177e4SLinus Torvalds */ 21321da177e4SLinus Torvalds static void origin_resume(struct dm_target *ti) 21331da177e4SLinus Torvalds { 2134028867acSAlasdair G Kergon struct dm_dev *dev = ti->private; 21351da177e4SLinus Torvalds 2136542f9038SMike Snitzer ti->max_io_len = get_origin_minimum_chunksize(dev->bdev); 21371da177e4SLinus Torvalds } 21381da177e4SLinus Torvalds 2139fd7c092eSMikulas Patocka static void origin_status(struct dm_target *ti, status_type_t type, 21401f4e0ff0SAlasdair G Kergon unsigned status_flags, char *result, unsigned maxlen) 21411da177e4SLinus Torvalds { 2142028867acSAlasdair G Kergon struct dm_dev *dev = ti->private; 21431da177e4SLinus Torvalds 21441da177e4SLinus Torvalds switch (type) { 21451da177e4SLinus Torvalds case STATUSTYPE_INFO: 21461da177e4SLinus Torvalds result[0] = '\0'; 21471da177e4SLinus Torvalds break; 21481da177e4SLinus Torvalds 21491da177e4SLinus Torvalds case STATUSTYPE_TABLE: 21501da177e4SLinus Torvalds snprintf(result, maxlen, "%s", dev->name); 21511da177e4SLinus Torvalds break; 21521da177e4SLinus Torvalds } 21531da177e4SLinus Torvalds } 21541da177e4SLinus Torvalds 2155b1d55528SMikulas Patocka static int origin_merge(struct dm_target *ti, struct bvec_merge_data *bvm, 2156b1d55528SMikulas Patocka struct bio_vec *biovec, int max_size) 2157b1d55528SMikulas Patocka { 2158b1d55528SMikulas Patocka struct dm_dev *dev = ti->private; 2159b1d55528SMikulas Patocka struct request_queue *q = bdev_get_queue(dev->bdev); 2160b1d55528SMikulas Patocka 2161b1d55528SMikulas Patocka if (!q->merge_bvec_fn) 2162b1d55528SMikulas Patocka return max_size; 2163b1d55528SMikulas Patocka 2164b1d55528SMikulas Patocka bvm->bi_bdev = dev->bdev; 2165b1d55528SMikulas Patocka 2166b1d55528SMikulas Patocka return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); 2167b1d55528SMikulas Patocka } 2168b1d55528SMikulas Patocka 21698811f46cSMike Snitzer static int origin_iterate_devices(struct dm_target *ti, 21708811f46cSMike Snitzer iterate_devices_callout_fn fn, void *data) 21718811f46cSMike Snitzer { 21728811f46cSMike Snitzer struct dm_dev *dev = ti->private; 21738811f46cSMike Snitzer 21748811f46cSMike Snitzer return fn(ti, dev, 0, ti->len, data); 21758811f46cSMike Snitzer } 21768811f46cSMike Snitzer 21771da177e4SLinus Torvalds static struct target_type origin_target = { 21781da177e4SLinus Torvalds .name = "snapshot-origin", 2179fd7c092eSMikulas Patocka .version = {1, 8, 1}, 21801da177e4SLinus Torvalds .module = THIS_MODULE, 21811da177e4SLinus Torvalds .ctr = origin_ctr, 21821da177e4SLinus Torvalds .dtr = origin_dtr, 21831da177e4SLinus Torvalds .map = origin_map, 21841da177e4SLinus Torvalds .resume = origin_resume, 21851da177e4SLinus Torvalds .status = origin_status, 2186b1d55528SMikulas Patocka .merge = origin_merge, 21878811f46cSMike Snitzer .iterate_devices = origin_iterate_devices, 21881da177e4SLinus Torvalds }; 21891da177e4SLinus Torvalds 21901da177e4SLinus Torvalds static struct target_type snapshot_target = { 21911da177e4SLinus Torvalds .name = "snapshot", 2192fd7c092eSMikulas Patocka .version = {1, 11, 1}, 21931da177e4SLinus Torvalds .module = THIS_MODULE, 21941da177e4SLinus Torvalds .ctr = snapshot_ctr, 21951da177e4SLinus Torvalds .dtr = snapshot_dtr, 21961da177e4SLinus Torvalds .map = snapshot_map, 2197cd45daffSMikulas Patocka .end_io = snapshot_end_io, 2198c1f0c183SMike Snitzer .preresume = snapshot_preresume, 21991da177e4SLinus Torvalds .resume = snapshot_resume, 22001da177e4SLinus Torvalds .status = snapshot_status, 22018811f46cSMike Snitzer .iterate_devices = snapshot_iterate_devices, 22021da177e4SLinus Torvalds }; 22031da177e4SLinus Torvalds 2204d698aa45SMikulas Patocka static struct target_type merge_target = { 2205d698aa45SMikulas Patocka .name = dm_snapshot_merge_target_name, 220642bc954fSMikulas Patocka .version = {1, 2, 0}, 2207d698aa45SMikulas Patocka .module = THIS_MODULE, 2208d698aa45SMikulas Patocka .ctr = snapshot_ctr, 2209d698aa45SMikulas Patocka .dtr = snapshot_dtr, 22103452c2a1SMikulas Patocka .map = snapshot_merge_map, 2211d698aa45SMikulas Patocka .end_io = snapshot_end_io, 22121e03f97eSMikulas Patocka .presuspend = snapshot_merge_presuspend, 2213d698aa45SMikulas Patocka .preresume = snapshot_preresume, 22141e03f97eSMikulas Patocka .resume = snapshot_merge_resume, 2215d698aa45SMikulas Patocka .status = snapshot_status, 2216d698aa45SMikulas Patocka .iterate_devices = snapshot_iterate_devices, 2217d698aa45SMikulas Patocka }; 2218d698aa45SMikulas Patocka 22191da177e4SLinus Torvalds static int __init dm_snapshot_init(void) 22201da177e4SLinus Torvalds { 22211da177e4SLinus Torvalds int r; 22221da177e4SLinus Torvalds 22234db6bfe0SAlasdair G Kergon r = dm_exception_store_init(); 22244db6bfe0SAlasdair G Kergon if (r) { 22254db6bfe0SAlasdair G Kergon DMERR("Failed to initialize exception stores"); 22264db6bfe0SAlasdair G Kergon return r; 22274db6bfe0SAlasdair G Kergon } 22284db6bfe0SAlasdair G Kergon 22291da177e4SLinus Torvalds r = dm_register_target(&snapshot_target); 2230d698aa45SMikulas Patocka if (r < 0) { 22311da177e4SLinus Torvalds DMERR("snapshot target register failed %d", r); 2232034a186dSJonathan Brassow goto bad_register_snapshot_target; 22331da177e4SLinus Torvalds } 22341da177e4SLinus Torvalds 22351da177e4SLinus Torvalds r = dm_register_target(&origin_target); 22361da177e4SLinus Torvalds if (r < 0) { 223772d94861SAlasdair G Kergon DMERR("Origin target register failed %d", r); 2238d698aa45SMikulas Patocka goto bad_register_origin_target; 2239d698aa45SMikulas Patocka } 2240d698aa45SMikulas Patocka 2241d698aa45SMikulas Patocka r = dm_register_target(&merge_target); 2242d698aa45SMikulas Patocka if (r < 0) { 2243d698aa45SMikulas Patocka DMERR("Merge target register failed %d", r); 2244d698aa45SMikulas Patocka goto bad_register_merge_target; 22451da177e4SLinus Torvalds } 22461da177e4SLinus Torvalds 22471da177e4SLinus Torvalds r = init_origin_hash(); 22481da177e4SLinus Torvalds if (r) { 22491da177e4SLinus Torvalds DMERR("init_origin_hash failed."); 2250d698aa45SMikulas Patocka goto bad_origin_hash; 22511da177e4SLinus Torvalds } 22521da177e4SLinus Torvalds 22531d4989c8SJon Brassow exception_cache = KMEM_CACHE(dm_exception, 0); 22541da177e4SLinus Torvalds if (!exception_cache) { 22551da177e4SLinus Torvalds DMERR("Couldn't create exception cache."); 22561da177e4SLinus Torvalds r = -ENOMEM; 2257d698aa45SMikulas Patocka goto bad_exception_cache; 22581da177e4SLinus Torvalds } 22591da177e4SLinus Torvalds 2260028867acSAlasdair G Kergon pending_cache = KMEM_CACHE(dm_snap_pending_exception, 0); 22611da177e4SLinus Torvalds if (!pending_cache) { 22621da177e4SLinus Torvalds DMERR("Couldn't create pending cache."); 22631da177e4SLinus Torvalds r = -ENOMEM; 2264d698aa45SMikulas Patocka goto bad_pending_cache; 22651da177e4SLinus Torvalds } 22661da177e4SLinus Torvalds 22671da177e4SLinus Torvalds return 0; 22681da177e4SLinus Torvalds 2269d698aa45SMikulas Patocka bad_pending_cache: 22701da177e4SLinus Torvalds kmem_cache_destroy(exception_cache); 2271d698aa45SMikulas Patocka bad_exception_cache: 22721da177e4SLinus Torvalds exit_origin_hash(); 2273d698aa45SMikulas Patocka bad_origin_hash: 2274d698aa45SMikulas Patocka dm_unregister_target(&merge_target); 2275d698aa45SMikulas Patocka bad_register_merge_target: 22761da177e4SLinus Torvalds dm_unregister_target(&origin_target); 2277d698aa45SMikulas Patocka bad_register_origin_target: 22781da177e4SLinus Torvalds dm_unregister_target(&snapshot_target); 2279034a186dSJonathan Brassow bad_register_snapshot_target: 2280034a186dSJonathan Brassow dm_exception_store_exit(); 2281d698aa45SMikulas Patocka 22821da177e4SLinus Torvalds return r; 22831da177e4SLinus Torvalds } 22841da177e4SLinus Torvalds 22851da177e4SLinus Torvalds static void __exit dm_snapshot_exit(void) 22861da177e4SLinus Torvalds { 228710d3bd09SMikulas Patocka dm_unregister_target(&snapshot_target); 228810d3bd09SMikulas Patocka dm_unregister_target(&origin_target); 2289d698aa45SMikulas Patocka dm_unregister_target(&merge_target); 22901da177e4SLinus Torvalds 22911da177e4SLinus Torvalds exit_origin_hash(); 22921da177e4SLinus Torvalds kmem_cache_destroy(pending_cache); 22931da177e4SLinus Torvalds kmem_cache_destroy(exception_cache); 22944db6bfe0SAlasdair G Kergon 22954db6bfe0SAlasdair G Kergon dm_exception_store_exit(); 22961da177e4SLinus Torvalds } 22971da177e4SLinus Torvalds 22981da177e4SLinus Torvalds /* Module hooks */ 22991da177e4SLinus Torvalds module_init(dm_snapshot_init); 23001da177e4SLinus Torvalds module_exit(dm_snapshot_exit); 23011da177e4SLinus Torvalds 23021da177e4SLinus Torvalds MODULE_DESCRIPTION(DM_NAME " snapshot target"); 23031da177e4SLinus Torvalds MODULE_AUTHOR("Joe Thornber"); 23041da177e4SLinus Torvalds MODULE_LICENSE("GPL"); 2305