11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * dm-snapshot.c 31da177e4SLinus Torvalds * 41da177e4SLinus Torvalds * Copyright (C) 2001-2002 Sistina Software (UK) Limited. 51da177e4SLinus Torvalds * 61da177e4SLinus Torvalds * This file is released under the GPL. 71da177e4SLinus Torvalds */ 81da177e4SLinus Torvalds 91da177e4SLinus Torvalds #include <linux/blkdev.h> 101da177e4SLinus Torvalds #include <linux/device-mapper.h> 1190fa1527SMikulas Patocka #include <linux/delay.h> 121da177e4SLinus Torvalds #include <linux/fs.h> 131da177e4SLinus Torvalds #include <linux/init.h> 141da177e4SLinus Torvalds #include <linux/kdev_t.h> 151da177e4SLinus Torvalds #include <linux/list.h> 161da177e4SLinus Torvalds #include <linux/mempool.h> 171da177e4SLinus Torvalds #include <linux/module.h> 181da177e4SLinus Torvalds #include <linux/slab.h> 191da177e4SLinus Torvalds #include <linux/vmalloc.h> 206f3c3f0aSvignesh babu #include <linux/log2.h> 21a765e20eSAlasdair G Kergon #include <linux/dm-kcopyd.h> 22ccc45ea8SJonathan Brassow #include <linux/workqueue.h> 231da177e4SLinus Torvalds 24aea53d92SJonathan Brassow #include "dm-exception-store.h" 251da177e4SLinus Torvalds 2672d94861SAlasdair G Kergon #define DM_MSG_PREFIX "snapshots" 2772d94861SAlasdair G Kergon 28d698aa45SMikulas Patocka static const char dm_snapshot_merge_target_name[] = "snapshot-merge"; 29d698aa45SMikulas Patocka 30d698aa45SMikulas Patocka #define dm_target_is_snapshot_merge(ti) \ 31d698aa45SMikulas Patocka ((ti)->type->name == dm_snapshot_merge_target_name) 32d698aa45SMikulas Patocka 331da177e4SLinus Torvalds /* 341da177e4SLinus Torvalds * The percentage increment we will wake up users at 351da177e4SLinus Torvalds */ 361da177e4SLinus Torvalds #define WAKE_UP_PERCENT 5 371da177e4SLinus Torvalds 381da177e4SLinus Torvalds /* 391da177e4SLinus Torvalds * kcopyd priority of snapshot operations 401da177e4SLinus Torvalds */ 411da177e4SLinus Torvalds #define SNAPSHOT_COPY_PRIORITY 2 421da177e4SLinus Torvalds 431da177e4SLinus Torvalds /* 448ee2767aSMilan Broz * Reserve 1MB for each snapshot initially (with minimum of 1 page). 451da177e4SLinus Torvalds */ 468ee2767aSMilan Broz #define SNAPSHOT_PAGES (((1UL << 20) >> PAGE_SHIFT) ? : 1) 471da177e4SLinus Torvalds 48cd45daffSMikulas Patocka /* 49cd45daffSMikulas Patocka * The size of the mempool used to track chunks in use. 50cd45daffSMikulas Patocka */ 51cd45daffSMikulas Patocka #define MIN_IOS 256 52cd45daffSMikulas Patocka 53ccc45ea8SJonathan Brassow #define DM_TRACKED_CHUNK_HASH_SIZE 16 54ccc45ea8SJonathan Brassow #define DM_TRACKED_CHUNK_HASH(x) ((unsigned long)(x) & \ 55ccc45ea8SJonathan Brassow (DM_TRACKED_CHUNK_HASH_SIZE - 1)) 56ccc45ea8SJonathan Brassow 57191437a5SJon Brassow struct dm_exception_table { 58ccc45ea8SJonathan Brassow uint32_t hash_mask; 59ccc45ea8SJonathan Brassow unsigned hash_shift; 60ccc45ea8SJonathan Brassow struct list_head *table; 61ccc45ea8SJonathan Brassow }; 62ccc45ea8SJonathan Brassow 63ccc45ea8SJonathan Brassow struct dm_snapshot { 64ccc45ea8SJonathan Brassow struct rw_semaphore lock; 65ccc45ea8SJonathan Brassow 66ccc45ea8SJonathan Brassow struct dm_dev *origin; 67fc56f6fbSMike Snitzer struct dm_dev *cow; 68fc56f6fbSMike Snitzer 69fc56f6fbSMike Snitzer struct dm_target *ti; 70ccc45ea8SJonathan Brassow 71ccc45ea8SJonathan Brassow /* List of snapshots per Origin */ 72ccc45ea8SJonathan Brassow struct list_head list; 73ccc45ea8SJonathan Brassow 74d8ddb1cfSMike Snitzer /* 75d8ddb1cfSMike Snitzer * You can't use a snapshot if this is 0 (e.g. if full). 76d8ddb1cfSMike Snitzer * A snapshot-merge target never clears this. 77d8ddb1cfSMike Snitzer */ 78ccc45ea8SJonathan Brassow int valid; 79ccc45ea8SJonathan Brassow 80ccc45ea8SJonathan Brassow /* Origin writes don't trigger exceptions until this is set */ 81ccc45ea8SJonathan Brassow int active; 82ccc45ea8SJonathan Brassow 83c26655caSMike Snitzer /* Whether or not owning mapped_device is suspended */ 84c26655caSMike Snitzer int suspended; 85c26655caSMike Snitzer 86ccc45ea8SJonathan Brassow atomic_t pending_exceptions_count; 87ccc45ea8SJonathan Brassow 88924e600dSMike Snitzer mempool_t *pending_pool; 89924e600dSMike Snitzer 90191437a5SJon Brassow struct dm_exception_table pending; 91191437a5SJon Brassow struct dm_exception_table complete; 92ccc45ea8SJonathan Brassow 93ccc45ea8SJonathan Brassow /* 94ccc45ea8SJonathan Brassow * pe_lock protects all pending_exception operations and access 95ccc45ea8SJonathan Brassow * as well as the snapshot_bios list. 96ccc45ea8SJonathan Brassow */ 97ccc45ea8SJonathan Brassow spinlock_t pe_lock; 98ccc45ea8SJonathan Brassow 99924e600dSMike Snitzer /* Chunks with outstanding reads */ 100924e600dSMike Snitzer spinlock_t tracked_chunk_lock; 101924e600dSMike Snitzer mempool_t *tracked_chunk_pool; 102924e600dSMike Snitzer struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE]; 103924e600dSMike Snitzer 104ccc45ea8SJonathan Brassow /* The on disk metadata handler */ 105ccc45ea8SJonathan Brassow struct dm_exception_store *store; 106ccc45ea8SJonathan Brassow 107ccc45ea8SJonathan Brassow struct dm_kcopyd_client *kcopyd_client; 108ccc45ea8SJonathan Brassow 109ccc45ea8SJonathan Brassow /* Queue of snapshot writes for ksnapd to flush */ 110ccc45ea8SJonathan Brassow struct bio_list queued_bios; 111ccc45ea8SJonathan Brassow struct work_struct queued_bios_work; 112ccc45ea8SJonathan Brassow 113924e600dSMike Snitzer /* Wait for events based on state_bits */ 114924e600dSMike Snitzer unsigned long state_bits; 115924e600dSMike Snitzer 116924e600dSMike Snitzer /* Range of chunks currently being merged. */ 117924e600dSMike Snitzer chunk_t first_merging_chunk; 118924e600dSMike Snitzer int num_merging_chunks; 1191e03f97eSMikulas Patocka 120d8ddb1cfSMike Snitzer /* 121d8ddb1cfSMike Snitzer * The merge operation failed if this flag is set. 122d8ddb1cfSMike Snitzer * Failure modes are handled as follows: 123d8ddb1cfSMike Snitzer * - I/O error reading the header 124d8ddb1cfSMike Snitzer * => don't load the target; abort. 125d8ddb1cfSMike Snitzer * - Header does not have "valid" flag set 126d8ddb1cfSMike Snitzer * => use the origin; forget about the snapshot. 127d8ddb1cfSMike Snitzer * - I/O error when reading exceptions 128d8ddb1cfSMike Snitzer * => don't load the target; abort. 129d8ddb1cfSMike Snitzer * (We can't use the intermediate origin state.) 130d8ddb1cfSMike Snitzer * - I/O error while merging 131d8ddb1cfSMike Snitzer * => stop merging; set merge_failed; process I/O normally. 132d8ddb1cfSMike Snitzer */ 133d8ddb1cfSMike Snitzer int merge_failed; 134d8ddb1cfSMike Snitzer 1359fe86254SMikulas Patocka /* 1369fe86254SMikulas Patocka * Incoming bios that overlap with chunks being merged must wait 1379fe86254SMikulas Patocka * for them to be committed. 1389fe86254SMikulas Patocka */ 1399fe86254SMikulas Patocka struct bio_list bios_queued_during_merge; 140ccc45ea8SJonathan Brassow }; 141ccc45ea8SJonathan Brassow 1421e03f97eSMikulas Patocka /* 1431e03f97eSMikulas Patocka * state_bits: 1441e03f97eSMikulas Patocka * RUNNING_MERGE - Merge operation is in progress. 1451e03f97eSMikulas Patocka * SHUTDOWN_MERGE - Set to signal that merge needs to be stopped; 1461e03f97eSMikulas Patocka * cleared afterwards. 1471e03f97eSMikulas Patocka */ 1481e03f97eSMikulas Patocka #define RUNNING_MERGE 0 1491e03f97eSMikulas Patocka #define SHUTDOWN_MERGE 1 1501e03f97eSMikulas Patocka 151c2411045SMikulas Patocka struct dm_dev *dm_snap_origin(struct dm_snapshot *s) 152c2411045SMikulas Patocka { 153c2411045SMikulas Patocka return s->origin; 154c2411045SMikulas Patocka } 155c2411045SMikulas Patocka EXPORT_SYMBOL(dm_snap_origin); 156c2411045SMikulas Patocka 157fc56f6fbSMike Snitzer struct dm_dev *dm_snap_cow(struct dm_snapshot *s) 158fc56f6fbSMike Snitzer { 159fc56f6fbSMike Snitzer return s->cow; 160fc56f6fbSMike Snitzer } 161fc56f6fbSMike Snitzer EXPORT_SYMBOL(dm_snap_cow); 162fc56f6fbSMike Snitzer 163c642f9e0SAdrian Bunk static struct workqueue_struct *ksnapd; 164c4028958SDavid Howells static void flush_queued_bios(struct work_struct *work); 165ca3a931fSAlasdair G Kergon 166ccc45ea8SJonathan Brassow static sector_t chunk_to_sector(struct dm_exception_store *store, 167ccc45ea8SJonathan Brassow chunk_t chunk) 168ccc45ea8SJonathan Brassow { 169ccc45ea8SJonathan Brassow return chunk << store->chunk_shift; 170ccc45ea8SJonathan Brassow } 171ccc45ea8SJonathan Brassow 172ccc45ea8SJonathan Brassow static int bdev_equal(struct block_device *lhs, struct block_device *rhs) 173ccc45ea8SJonathan Brassow { 174ccc45ea8SJonathan Brassow /* 175ccc45ea8SJonathan Brassow * There is only ever one instance of a particular block 176ccc45ea8SJonathan Brassow * device so we can compare pointers safely. 177ccc45ea8SJonathan Brassow */ 178ccc45ea8SJonathan Brassow return lhs == rhs; 179ccc45ea8SJonathan Brassow } 180ccc45ea8SJonathan Brassow 181028867acSAlasdair G Kergon struct dm_snap_pending_exception { 1821d4989c8SJon Brassow struct dm_exception e; 1831da177e4SLinus Torvalds 1841da177e4SLinus Torvalds /* 1851da177e4SLinus Torvalds * Origin buffers waiting for this to complete are held 1861da177e4SLinus Torvalds * in a bio list 1871da177e4SLinus Torvalds */ 1881da177e4SLinus Torvalds struct bio_list origin_bios; 1891da177e4SLinus Torvalds struct bio_list snapshot_bios; 1901da177e4SLinus Torvalds 1911da177e4SLinus Torvalds /* Pointer back to snapshot context */ 1921da177e4SLinus Torvalds struct dm_snapshot *snap; 1931da177e4SLinus Torvalds 1941da177e4SLinus Torvalds /* 1951da177e4SLinus Torvalds * 1 indicates the exception has already been sent to 1961da177e4SLinus Torvalds * kcopyd. 1971da177e4SLinus Torvalds */ 1981da177e4SLinus Torvalds int started; 1991da177e4SLinus Torvalds }; 2001da177e4SLinus Torvalds 2011da177e4SLinus Torvalds /* 2021da177e4SLinus Torvalds * Hash table mapping origin volumes to lists of snapshots and 2031da177e4SLinus Torvalds * a lock to protect it 2041da177e4SLinus Torvalds */ 205e18b890bSChristoph Lameter static struct kmem_cache *exception_cache; 206e18b890bSChristoph Lameter static struct kmem_cache *pending_cache; 2071da177e4SLinus Torvalds 208cd45daffSMikulas Patocka struct dm_snap_tracked_chunk { 209cd45daffSMikulas Patocka struct hlist_node node; 210cd45daffSMikulas Patocka chunk_t chunk; 211cd45daffSMikulas Patocka }; 212cd45daffSMikulas Patocka 213cd45daffSMikulas Patocka static struct kmem_cache *tracked_chunk_cache; 214cd45daffSMikulas Patocka 215cd45daffSMikulas Patocka static struct dm_snap_tracked_chunk *track_chunk(struct dm_snapshot *s, 216cd45daffSMikulas Patocka chunk_t chunk) 217cd45daffSMikulas Patocka { 218cd45daffSMikulas Patocka struct dm_snap_tracked_chunk *c = mempool_alloc(s->tracked_chunk_pool, 219cd45daffSMikulas Patocka GFP_NOIO); 220cd45daffSMikulas Patocka unsigned long flags; 221cd45daffSMikulas Patocka 222cd45daffSMikulas Patocka c->chunk = chunk; 223cd45daffSMikulas Patocka 224cd45daffSMikulas Patocka spin_lock_irqsave(&s->tracked_chunk_lock, flags); 225cd45daffSMikulas Patocka hlist_add_head(&c->node, 226cd45daffSMikulas Patocka &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)]); 227cd45daffSMikulas Patocka spin_unlock_irqrestore(&s->tracked_chunk_lock, flags); 228cd45daffSMikulas Patocka 229cd45daffSMikulas Patocka return c; 230cd45daffSMikulas Patocka } 231cd45daffSMikulas Patocka 232cd45daffSMikulas Patocka static void stop_tracking_chunk(struct dm_snapshot *s, 233cd45daffSMikulas Patocka struct dm_snap_tracked_chunk *c) 234cd45daffSMikulas Patocka { 235cd45daffSMikulas Patocka unsigned long flags; 236cd45daffSMikulas Patocka 237cd45daffSMikulas Patocka spin_lock_irqsave(&s->tracked_chunk_lock, flags); 238cd45daffSMikulas Patocka hlist_del(&c->node); 239cd45daffSMikulas Patocka spin_unlock_irqrestore(&s->tracked_chunk_lock, flags); 240cd45daffSMikulas Patocka 241cd45daffSMikulas Patocka mempool_free(c, s->tracked_chunk_pool); 242cd45daffSMikulas Patocka } 243cd45daffSMikulas Patocka 244a8d41b59SMikulas Patocka static int __chunk_is_tracked(struct dm_snapshot *s, chunk_t chunk) 245a8d41b59SMikulas Patocka { 246a8d41b59SMikulas Patocka struct dm_snap_tracked_chunk *c; 247a8d41b59SMikulas Patocka struct hlist_node *hn; 248a8d41b59SMikulas Patocka int found = 0; 249a8d41b59SMikulas Patocka 250a8d41b59SMikulas Patocka spin_lock_irq(&s->tracked_chunk_lock); 251a8d41b59SMikulas Patocka 252a8d41b59SMikulas Patocka hlist_for_each_entry(c, hn, 253a8d41b59SMikulas Patocka &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)], node) { 254a8d41b59SMikulas Patocka if (c->chunk == chunk) { 255a8d41b59SMikulas Patocka found = 1; 256a8d41b59SMikulas Patocka break; 257a8d41b59SMikulas Patocka } 258a8d41b59SMikulas Patocka } 259a8d41b59SMikulas Patocka 260a8d41b59SMikulas Patocka spin_unlock_irq(&s->tracked_chunk_lock); 261a8d41b59SMikulas Patocka 262a8d41b59SMikulas Patocka return found; 263a8d41b59SMikulas Patocka } 264a8d41b59SMikulas Patocka 2651da177e4SLinus Torvalds /* 266615d1eb9SMike Snitzer * This conflicting I/O is extremely improbable in the caller, 267615d1eb9SMike Snitzer * so msleep(1) is sufficient and there is no need for a wait queue. 268615d1eb9SMike Snitzer */ 269615d1eb9SMike Snitzer static void __check_for_conflicting_io(struct dm_snapshot *s, chunk_t chunk) 270615d1eb9SMike Snitzer { 271615d1eb9SMike Snitzer while (__chunk_is_tracked(s, chunk)) 272615d1eb9SMike Snitzer msleep(1); 273615d1eb9SMike Snitzer } 274615d1eb9SMike Snitzer 275615d1eb9SMike Snitzer /* 2761da177e4SLinus Torvalds * One of these per registered origin, held in the snapshot_origins hash 2771da177e4SLinus Torvalds */ 2781da177e4SLinus Torvalds struct origin { 2791da177e4SLinus Torvalds /* The origin device */ 2801da177e4SLinus Torvalds struct block_device *bdev; 2811da177e4SLinus Torvalds 2821da177e4SLinus Torvalds struct list_head hash_list; 2831da177e4SLinus Torvalds 2841da177e4SLinus Torvalds /* List of snapshots for this origin */ 2851da177e4SLinus Torvalds struct list_head snapshots; 2861da177e4SLinus Torvalds }; 2871da177e4SLinus Torvalds 2881da177e4SLinus Torvalds /* 2891da177e4SLinus Torvalds * Size of the hash table for origin volumes. If we make this 2901da177e4SLinus Torvalds * the size of the minors list then it should be nearly perfect 2911da177e4SLinus Torvalds */ 2921da177e4SLinus Torvalds #define ORIGIN_HASH_SIZE 256 2931da177e4SLinus Torvalds #define ORIGIN_MASK 0xFF 2941da177e4SLinus Torvalds static struct list_head *_origins; 2951da177e4SLinus Torvalds static struct rw_semaphore _origins_lock; 2961da177e4SLinus Torvalds 29773dfd078SMikulas Patocka static DECLARE_WAIT_QUEUE_HEAD(_pending_exceptions_done); 29873dfd078SMikulas Patocka static DEFINE_SPINLOCK(_pending_exceptions_done_spinlock); 29973dfd078SMikulas Patocka static uint64_t _pending_exceptions_done_count; 30073dfd078SMikulas Patocka 3011da177e4SLinus Torvalds static int init_origin_hash(void) 3021da177e4SLinus Torvalds { 3031da177e4SLinus Torvalds int i; 3041da177e4SLinus Torvalds 3051da177e4SLinus Torvalds _origins = kmalloc(ORIGIN_HASH_SIZE * sizeof(struct list_head), 3061da177e4SLinus Torvalds GFP_KERNEL); 3071da177e4SLinus Torvalds if (!_origins) { 30872d94861SAlasdair G Kergon DMERR("unable to allocate memory"); 3091da177e4SLinus Torvalds return -ENOMEM; 3101da177e4SLinus Torvalds } 3111da177e4SLinus Torvalds 3121da177e4SLinus Torvalds for (i = 0; i < ORIGIN_HASH_SIZE; i++) 3131da177e4SLinus Torvalds INIT_LIST_HEAD(_origins + i); 3141da177e4SLinus Torvalds init_rwsem(&_origins_lock); 3151da177e4SLinus Torvalds 3161da177e4SLinus Torvalds return 0; 3171da177e4SLinus Torvalds } 3181da177e4SLinus Torvalds 3191da177e4SLinus Torvalds static void exit_origin_hash(void) 3201da177e4SLinus Torvalds { 3211da177e4SLinus Torvalds kfree(_origins); 3221da177e4SLinus Torvalds } 3231da177e4SLinus Torvalds 324028867acSAlasdair G Kergon static unsigned origin_hash(struct block_device *bdev) 3251da177e4SLinus Torvalds { 3261da177e4SLinus Torvalds return bdev->bd_dev & ORIGIN_MASK; 3271da177e4SLinus Torvalds } 3281da177e4SLinus Torvalds 3291da177e4SLinus Torvalds static struct origin *__lookup_origin(struct block_device *origin) 3301da177e4SLinus Torvalds { 3311da177e4SLinus Torvalds struct list_head *ol; 3321da177e4SLinus Torvalds struct origin *o; 3331da177e4SLinus Torvalds 3341da177e4SLinus Torvalds ol = &_origins[origin_hash(origin)]; 3351da177e4SLinus Torvalds list_for_each_entry (o, ol, hash_list) 3361da177e4SLinus Torvalds if (bdev_equal(o->bdev, origin)) 3371da177e4SLinus Torvalds return o; 3381da177e4SLinus Torvalds 3391da177e4SLinus Torvalds return NULL; 3401da177e4SLinus Torvalds } 3411da177e4SLinus Torvalds 3421da177e4SLinus Torvalds static void __insert_origin(struct origin *o) 3431da177e4SLinus Torvalds { 3441da177e4SLinus Torvalds struct list_head *sl = &_origins[origin_hash(o->bdev)]; 3451da177e4SLinus Torvalds list_add_tail(&o->hash_list, sl); 3461da177e4SLinus Torvalds } 3471da177e4SLinus Torvalds 3481da177e4SLinus Torvalds /* 349c1f0c183SMike Snitzer * _origins_lock must be held when calling this function. 350c1f0c183SMike Snitzer * Returns number of snapshots registered using the supplied cow device, plus: 351c1f0c183SMike Snitzer * snap_src - a snapshot suitable for use as a source of exception handover 352c1f0c183SMike Snitzer * snap_dest - a snapshot capable of receiving exception handover. 3539d3b15c4SMikulas Patocka * snap_merge - an existing snapshot-merge target linked to the same origin. 3549d3b15c4SMikulas Patocka * There can be at most one snapshot-merge target. The parameter is optional. 355c1f0c183SMike Snitzer * 3569d3b15c4SMikulas Patocka * Possible return values and states of snap_src and snap_dest. 357c1f0c183SMike Snitzer * 0: NULL, NULL - first new snapshot 358c1f0c183SMike Snitzer * 1: snap_src, NULL - normal snapshot 359c1f0c183SMike Snitzer * 2: snap_src, snap_dest - waiting for handover 360c1f0c183SMike Snitzer * 2: snap_src, NULL - handed over, waiting for old to be deleted 361c1f0c183SMike Snitzer * 1: NULL, snap_dest - source got destroyed without handover 362c1f0c183SMike Snitzer */ 363c1f0c183SMike Snitzer static int __find_snapshots_sharing_cow(struct dm_snapshot *snap, 364c1f0c183SMike Snitzer struct dm_snapshot **snap_src, 3659d3b15c4SMikulas Patocka struct dm_snapshot **snap_dest, 3669d3b15c4SMikulas Patocka struct dm_snapshot **snap_merge) 367c1f0c183SMike Snitzer { 368c1f0c183SMike Snitzer struct dm_snapshot *s; 369c1f0c183SMike Snitzer struct origin *o; 370c1f0c183SMike Snitzer int count = 0; 371c1f0c183SMike Snitzer int active; 372c1f0c183SMike Snitzer 373c1f0c183SMike Snitzer o = __lookup_origin(snap->origin->bdev); 374c1f0c183SMike Snitzer if (!o) 375c1f0c183SMike Snitzer goto out; 376c1f0c183SMike Snitzer 377c1f0c183SMike Snitzer list_for_each_entry(s, &o->snapshots, list) { 3789d3b15c4SMikulas Patocka if (dm_target_is_snapshot_merge(s->ti) && snap_merge) 3799d3b15c4SMikulas Patocka *snap_merge = s; 380c1f0c183SMike Snitzer if (!bdev_equal(s->cow->bdev, snap->cow->bdev)) 381c1f0c183SMike Snitzer continue; 382c1f0c183SMike Snitzer 383c1f0c183SMike Snitzer down_read(&s->lock); 384c1f0c183SMike Snitzer active = s->active; 385c1f0c183SMike Snitzer up_read(&s->lock); 386c1f0c183SMike Snitzer 387c1f0c183SMike Snitzer if (active) { 388c1f0c183SMike Snitzer if (snap_src) 389c1f0c183SMike Snitzer *snap_src = s; 390c1f0c183SMike Snitzer } else if (snap_dest) 391c1f0c183SMike Snitzer *snap_dest = s; 392c1f0c183SMike Snitzer 393c1f0c183SMike Snitzer count++; 394c1f0c183SMike Snitzer } 395c1f0c183SMike Snitzer 396c1f0c183SMike Snitzer out: 397c1f0c183SMike Snitzer return count; 398c1f0c183SMike Snitzer } 399c1f0c183SMike Snitzer 400c1f0c183SMike Snitzer /* 401c1f0c183SMike Snitzer * On success, returns 1 if this snapshot is a handover destination, 402c1f0c183SMike Snitzer * otherwise returns 0. 403c1f0c183SMike Snitzer */ 404c1f0c183SMike Snitzer static int __validate_exception_handover(struct dm_snapshot *snap) 405c1f0c183SMike Snitzer { 406c1f0c183SMike Snitzer struct dm_snapshot *snap_src = NULL, *snap_dest = NULL; 4079d3b15c4SMikulas Patocka struct dm_snapshot *snap_merge = NULL; 408c1f0c183SMike Snitzer 409c1f0c183SMike Snitzer /* Does snapshot need exceptions handed over to it? */ 4109d3b15c4SMikulas Patocka if ((__find_snapshots_sharing_cow(snap, &snap_src, &snap_dest, 4119d3b15c4SMikulas Patocka &snap_merge) == 2) || 412c1f0c183SMike Snitzer snap_dest) { 413c1f0c183SMike Snitzer snap->ti->error = "Snapshot cow pairing for exception " 414c1f0c183SMike Snitzer "table handover failed"; 415c1f0c183SMike Snitzer return -EINVAL; 416c1f0c183SMike Snitzer } 417c1f0c183SMike Snitzer 418c1f0c183SMike Snitzer /* 419c1f0c183SMike Snitzer * If no snap_src was found, snap cannot become a handover 420c1f0c183SMike Snitzer * destination. 421c1f0c183SMike Snitzer */ 422c1f0c183SMike Snitzer if (!snap_src) 423c1f0c183SMike Snitzer return 0; 424c1f0c183SMike Snitzer 4259d3b15c4SMikulas Patocka /* 4269d3b15c4SMikulas Patocka * Non-snapshot-merge handover? 4279d3b15c4SMikulas Patocka */ 4289d3b15c4SMikulas Patocka if (!dm_target_is_snapshot_merge(snap->ti)) 4299d3b15c4SMikulas Patocka return 1; 4309d3b15c4SMikulas Patocka 4319d3b15c4SMikulas Patocka /* 4329d3b15c4SMikulas Patocka * Do not allow more than one merging snapshot. 4339d3b15c4SMikulas Patocka */ 4349d3b15c4SMikulas Patocka if (snap_merge) { 4359d3b15c4SMikulas Patocka snap->ti->error = "A snapshot is already merging."; 4369d3b15c4SMikulas Patocka return -EINVAL; 4379d3b15c4SMikulas Patocka } 4389d3b15c4SMikulas Patocka 4391e03f97eSMikulas Patocka if (!snap_src->store->type->prepare_merge || 4401e03f97eSMikulas Patocka !snap_src->store->type->commit_merge) { 4411e03f97eSMikulas Patocka snap->ti->error = "Snapshot exception store does not " 4421e03f97eSMikulas Patocka "support snapshot-merge."; 4431e03f97eSMikulas Patocka return -EINVAL; 4441e03f97eSMikulas Patocka } 4451e03f97eSMikulas Patocka 446c1f0c183SMike Snitzer return 1; 447c1f0c183SMike Snitzer } 448c1f0c183SMike Snitzer 449c1f0c183SMike Snitzer static void __insert_snapshot(struct origin *o, struct dm_snapshot *s) 450c1f0c183SMike Snitzer { 451c1f0c183SMike Snitzer struct dm_snapshot *l; 452c1f0c183SMike Snitzer 453c1f0c183SMike Snitzer /* Sort the list according to chunk size, largest-first smallest-last */ 454c1f0c183SMike Snitzer list_for_each_entry(l, &o->snapshots, list) 455c1f0c183SMike Snitzer if (l->store->chunk_size < s->store->chunk_size) 456c1f0c183SMike Snitzer break; 457c1f0c183SMike Snitzer list_add_tail(&s->list, &l->list); 458c1f0c183SMike Snitzer } 459c1f0c183SMike Snitzer 460c1f0c183SMike Snitzer /* 4611da177e4SLinus Torvalds * Make a note of the snapshot and its origin so we can look it 4621da177e4SLinus Torvalds * up when the origin has a write on it. 463c1f0c183SMike Snitzer * 464c1f0c183SMike Snitzer * Also validate snapshot exception store handovers. 465c1f0c183SMike Snitzer * On success, returns 1 if this registration is a handover destination, 466c1f0c183SMike Snitzer * otherwise returns 0. 4671da177e4SLinus Torvalds */ 4681da177e4SLinus Torvalds static int register_snapshot(struct dm_snapshot *snap) 4691da177e4SLinus Torvalds { 470c1f0c183SMike Snitzer struct origin *o, *new_o = NULL; 4711da177e4SLinus Torvalds struct block_device *bdev = snap->origin->bdev; 472c1f0c183SMike Snitzer int r = 0; 4731da177e4SLinus Torvalds 47460c856c8SMikulas Patocka new_o = kmalloc(sizeof(*new_o), GFP_KERNEL); 47560c856c8SMikulas Patocka if (!new_o) 47660c856c8SMikulas Patocka return -ENOMEM; 47760c856c8SMikulas Patocka 4781da177e4SLinus Torvalds down_write(&_origins_lock); 4791da177e4SLinus Torvalds 480c1f0c183SMike Snitzer r = __validate_exception_handover(snap); 481c1f0c183SMike Snitzer if (r < 0) { 482c1f0c183SMike Snitzer kfree(new_o); 483c1f0c183SMike Snitzer goto out; 484c1f0c183SMike Snitzer } 485c1f0c183SMike Snitzer 486c1f0c183SMike Snitzer o = __lookup_origin(bdev); 48760c856c8SMikulas Patocka if (o) 48860c856c8SMikulas Patocka kfree(new_o); 48960c856c8SMikulas Patocka else { 4901da177e4SLinus Torvalds /* New origin */ 49160c856c8SMikulas Patocka o = new_o; 4921da177e4SLinus Torvalds 4931da177e4SLinus Torvalds /* Initialise the struct */ 4941da177e4SLinus Torvalds INIT_LIST_HEAD(&o->snapshots); 4951da177e4SLinus Torvalds o->bdev = bdev; 4961da177e4SLinus Torvalds 4971da177e4SLinus Torvalds __insert_origin(o); 4981da177e4SLinus Torvalds } 4991da177e4SLinus Torvalds 500c1f0c183SMike Snitzer __insert_snapshot(o, snap); 501c1f0c183SMike Snitzer 502c1f0c183SMike Snitzer out: 503c1f0c183SMike Snitzer up_write(&_origins_lock); 504c1f0c183SMike Snitzer 505c1f0c183SMike Snitzer return r; 506c1f0c183SMike Snitzer } 507c1f0c183SMike Snitzer 508c1f0c183SMike Snitzer /* 509c1f0c183SMike Snitzer * Move snapshot to correct place in list according to chunk size. 510c1f0c183SMike Snitzer */ 511c1f0c183SMike Snitzer static void reregister_snapshot(struct dm_snapshot *s) 512c1f0c183SMike Snitzer { 513c1f0c183SMike Snitzer struct block_device *bdev = s->origin->bdev; 514c1f0c183SMike Snitzer 515c1f0c183SMike Snitzer down_write(&_origins_lock); 516c1f0c183SMike Snitzer 517c1f0c183SMike Snitzer list_del(&s->list); 518c1f0c183SMike Snitzer __insert_snapshot(__lookup_origin(bdev), s); 5191da177e4SLinus Torvalds 5201da177e4SLinus Torvalds up_write(&_origins_lock); 5211da177e4SLinus Torvalds } 5221da177e4SLinus Torvalds 5231da177e4SLinus Torvalds static void unregister_snapshot(struct dm_snapshot *s) 5241da177e4SLinus Torvalds { 5251da177e4SLinus Torvalds struct origin *o; 5261da177e4SLinus Torvalds 5271da177e4SLinus Torvalds down_write(&_origins_lock); 5281da177e4SLinus Torvalds o = __lookup_origin(s->origin->bdev); 5291da177e4SLinus Torvalds 5301da177e4SLinus Torvalds list_del(&s->list); 531c1f0c183SMike Snitzer if (o && list_empty(&o->snapshots)) { 5321da177e4SLinus Torvalds list_del(&o->hash_list); 5331da177e4SLinus Torvalds kfree(o); 5341da177e4SLinus Torvalds } 5351da177e4SLinus Torvalds 5361da177e4SLinus Torvalds up_write(&_origins_lock); 5371da177e4SLinus Torvalds } 5381da177e4SLinus Torvalds 5391da177e4SLinus Torvalds /* 5401da177e4SLinus Torvalds * Implementation of the exception hash tables. 541d74f81f8SMilan Broz * The lowest hash_shift bits of the chunk number are ignored, allowing 542d74f81f8SMilan Broz * some consecutive chunks to be grouped together. 5431da177e4SLinus Torvalds */ 5443510cb94SJon Brassow static int dm_exception_table_init(struct dm_exception_table *et, 5453510cb94SJon Brassow uint32_t size, unsigned hash_shift) 5461da177e4SLinus Torvalds { 5471da177e4SLinus Torvalds unsigned int i; 5481da177e4SLinus Torvalds 549d74f81f8SMilan Broz et->hash_shift = hash_shift; 5501da177e4SLinus Torvalds et->hash_mask = size - 1; 5511da177e4SLinus Torvalds et->table = dm_vcalloc(size, sizeof(struct list_head)); 5521da177e4SLinus Torvalds if (!et->table) 5531da177e4SLinus Torvalds return -ENOMEM; 5541da177e4SLinus Torvalds 5551da177e4SLinus Torvalds for (i = 0; i < size; i++) 5561da177e4SLinus Torvalds INIT_LIST_HEAD(et->table + i); 5571da177e4SLinus Torvalds 5581da177e4SLinus Torvalds return 0; 5591da177e4SLinus Torvalds } 5601da177e4SLinus Torvalds 5613510cb94SJon Brassow static void dm_exception_table_exit(struct dm_exception_table *et, 562191437a5SJon Brassow struct kmem_cache *mem) 5631da177e4SLinus Torvalds { 5641da177e4SLinus Torvalds struct list_head *slot; 5651d4989c8SJon Brassow struct dm_exception *ex, *next; 5661da177e4SLinus Torvalds int i, size; 5671da177e4SLinus Torvalds 5681da177e4SLinus Torvalds size = et->hash_mask + 1; 5691da177e4SLinus Torvalds for (i = 0; i < size; i++) { 5701da177e4SLinus Torvalds slot = et->table + i; 5711da177e4SLinus Torvalds 5721da177e4SLinus Torvalds list_for_each_entry_safe (ex, next, slot, hash_list) 5731da177e4SLinus Torvalds kmem_cache_free(mem, ex); 5741da177e4SLinus Torvalds } 5751da177e4SLinus Torvalds 5761da177e4SLinus Torvalds vfree(et->table); 5771da177e4SLinus Torvalds } 5781da177e4SLinus Torvalds 579191437a5SJon Brassow static uint32_t exception_hash(struct dm_exception_table *et, chunk_t chunk) 5801da177e4SLinus Torvalds { 581d74f81f8SMilan Broz return (chunk >> et->hash_shift) & et->hash_mask; 5821da177e4SLinus Torvalds } 5831da177e4SLinus Torvalds 5843510cb94SJon Brassow static void dm_remove_exception(struct dm_exception *e) 5851da177e4SLinus Torvalds { 5861da177e4SLinus Torvalds list_del(&e->hash_list); 5871da177e4SLinus Torvalds } 5881da177e4SLinus Torvalds 5891da177e4SLinus Torvalds /* 5901da177e4SLinus Torvalds * Return the exception data for a sector, or NULL if not 5911da177e4SLinus Torvalds * remapped. 5921da177e4SLinus Torvalds */ 5933510cb94SJon Brassow static struct dm_exception *dm_lookup_exception(struct dm_exception_table *et, 5941da177e4SLinus Torvalds chunk_t chunk) 5951da177e4SLinus Torvalds { 5961da177e4SLinus Torvalds struct list_head *slot; 5971d4989c8SJon Brassow struct dm_exception *e; 5981da177e4SLinus Torvalds 5991da177e4SLinus Torvalds slot = &et->table[exception_hash(et, chunk)]; 6001da177e4SLinus Torvalds list_for_each_entry (e, slot, hash_list) 601d74f81f8SMilan Broz if (chunk >= e->old_chunk && 602d74f81f8SMilan Broz chunk <= e->old_chunk + dm_consecutive_chunk_count(e)) 6031da177e4SLinus Torvalds return e; 6041da177e4SLinus Torvalds 6051da177e4SLinus Torvalds return NULL; 6061da177e4SLinus Torvalds } 6071da177e4SLinus Torvalds 6083510cb94SJon Brassow static struct dm_exception *alloc_completed_exception(void) 6091da177e4SLinus Torvalds { 6101d4989c8SJon Brassow struct dm_exception *e; 6111da177e4SLinus Torvalds 6121da177e4SLinus Torvalds e = kmem_cache_alloc(exception_cache, GFP_NOIO); 6131da177e4SLinus Torvalds if (!e) 6141da177e4SLinus Torvalds e = kmem_cache_alloc(exception_cache, GFP_ATOMIC); 6151da177e4SLinus Torvalds 6161da177e4SLinus Torvalds return e; 6171da177e4SLinus Torvalds } 6181da177e4SLinus Torvalds 6193510cb94SJon Brassow static void free_completed_exception(struct dm_exception *e) 6201da177e4SLinus Torvalds { 6211da177e4SLinus Torvalds kmem_cache_free(exception_cache, e); 6221da177e4SLinus Torvalds } 6231da177e4SLinus Torvalds 62492e86812SMikulas Patocka static struct dm_snap_pending_exception *alloc_pending_exception(struct dm_snapshot *s) 6251da177e4SLinus Torvalds { 62692e86812SMikulas Patocka struct dm_snap_pending_exception *pe = mempool_alloc(s->pending_pool, 62792e86812SMikulas Patocka GFP_NOIO); 62892e86812SMikulas Patocka 629879129d2SMikulas Patocka atomic_inc(&s->pending_exceptions_count); 63092e86812SMikulas Patocka pe->snap = s; 63192e86812SMikulas Patocka 63292e86812SMikulas Patocka return pe; 6331da177e4SLinus Torvalds } 6341da177e4SLinus Torvalds 635028867acSAlasdair G Kergon static void free_pending_exception(struct dm_snap_pending_exception *pe) 6361da177e4SLinus Torvalds { 637879129d2SMikulas Patocka struct dm_snapshot *s = pe->snap; 638879129d2SMikulas Patocka 639879129d2SMikulas Patocka mempool_free(pe, s->pending_pool); 640879129d2SMikulas Patocka smp_mb__before_atomic_dec(); 641879129d2SMikulas Patocka atomic_dec(&s->pending_exceptions_count); 6421da177e4SLinus Torvalds } 6431da177e4SLinus Torvalds 6443510cb94SJon Brassow static void dm_insert_exception(struct dm_exception_table *eh, 6451d4989c8SJon Brassow struct dm_exception *new_e) 646d74f81f8SMilan Broz { 647d74f81f8SMilan Broz struct list_head *l; 6481d4989c8SJon Brassow struct dm_exception *e = NULL; 649d74f81f8SMilan Broz 650d74f81f8SMilan Broz l = &eh->table[exception_hash(eh, new_e->old_chunk)]; 651d74f81f8SMilan Broz 652d74f81f8SMilan Broz /* Add immediately if this table doesn't support consecutive chunks */ 653d74f81f8SMilan Broz if (!eh->hash_shift) 654d74f81f8SMilan Broz goto out; 655d74f81f8SMilan Broz 656d74f81f8SMilan Broz /* List is ordered by old_chunk */ 657d74f81f8SMilan Broz list_for_each_entry_reverse(e, l, hash_list) { 658d74f81f8SMilan Broz /* Insert after an existing chunk? */ 659d74f81f8SMilan Broz if (new_e->old_chunk == (e->old_chunk + 660d74f81f8SMilan Broz dm_consecutive_chunk_count(e) + 1) && 661d74f81f8SMilan Broz new_e->new_chunk == (dm_chunk_number(e->new_chunk) + 662d74f81f8SMilan Broz dm_consecutive_chunk_count(e) + 1)) { 663d74f81f8SMilan Broz dm_consecutive_chunk_count_inc(e); 6643510cb94SJon Brassow free_completed_exception(new_e); 665d74f81f8SMilan Broz return; 666d74f81f8SMilan Broz } 667d74f81f8SMilan Broz 668d74f81f8SMilan Broz /* Insert before an existing chunk? */ 669d74f81f8SMilan Broz if (new_e->old_chunk == (e->old_chunk - 1) && 670d74f81f8SMilan Broz new_e->new_chunk == (dm_chunk_number(e->new_chunk) - 1)) { 671d74f81f8SMilan Broz dm_consecutive_chunk_count_inc(e); 672d74f81f8SMilan Broz e->old_chunk--; 673d74f81f8SMilan Broz e->new_chunk--; 6743510cb94SJon Brassow free_completed_exception(new_e); 675d74f81f8SMilan Broz return; 676d74f81f8SMilan Broz } 677d74f81f8SMilan Broz 678d74f81f8SMilan Broz if (new_e->old_chunk > e->old_chunk) 679d74f81f8SMilan Broz break; 680d74f81f8SMilan Broz } 681d74f81f8SMilan Broz 682d74f81f8SMilan Broz out: 683d74f81f8SMilan Broz list_add(&new_e->hash_list, e ? &e->hash_list : l); 684d74f81f8SMilan Broz } 685d74f81f8SMilan Broz 686a159c1acSJonathan Brassow /* 687a159c1acSJonathan Brassow * Callback used by the exception stores to load exceptions when 688a159c1acSJonathan Brassow * initialising. 689a159c1acSJonathan Brassow */ 690a159c1acSJonathan Brassow static int dm_add_exception(void *context, chunk_t old, chunk_t new) 6911da177e4SLinus Torvalds { 692a159c1acSJonathan Brassow struct dm_snapshot *s = context; 6931d4989c8SJon Brassow struct dm_exception *e; 6941da177e4SLinus Torvalds 6953510cb94SJon Brassow e = alloc_completed_exception(); 6961da177e4SLinus Torvalds if (!e) 6971da177e4SLinus Torvalds return -ENOMEM; 6981da177e4SLinus Torvalds 6991da177e4SLinus Torvalds e->old_chunk = old; 700d74f81f8SMilan Broz 701d74f81f8SMilan Broz /* Consecutive_count is implicitly initialised to zero */ 7021da177e4SLinus Torvalds e->new_chunk = new; 703d74f81f8SMilan Broz 7043510cb94SJon Brassow dm_insert_exception(&s->complete, e); 705d74f81f8SMilan Broz 7061da177e4SLinus Torvalds return 0; 7071da177e4SLinus Torvalds } 7081da177e4SLinus Torvalds 7097e201b35SMikulas Patocka #define min_not_zero(l, r) (((l) == 0) ? (r) : (((r) == 0) ? (l) : min(l, r))) 7107e201b35SMikulas Patocka 7117e201b35SMikulas Patocka /* 7127e201b35SMikulas Patocka * Return a minimum chunk size of all snapshots that have the specified origin. 7137e201b35SMikulas Patocka * Return zero if the origin has no snapshots. 7147e201b35SMikulas Patocka */ 7157e201b35SMikulas Patocka static sector_t __minimum_chunk_size(struct origin *o) 7167e201b35SMikulas Patocka { 7177e201b35SMikulas Patocka struct dm_snapshot *snap; 7187e201b35SMikulas Patocka unsigned chunk_size = 0; 7197e201b35SMikulas Patocka 7207e201b35SMikulas Patocka if (o) 7217e201b35SMikulas Patocka list_for_each_entry(snap, &o->snapshots, list) 7227e201b35SMikulas Patocka chunk_size = min_not_zero(chunk_size, 7237e201b35SMikulas Patocka snap->store->chunk_size); 7247e201b35SMikulas Patocka 7257e201b35SMikulas Patocka return chunk_size; 7267e201b35SMikulas Patocka } 7277e201b35SMikulas Patocka 7281da177e4SLinus Torvalds /* 7291da177e4SLinus Torvalds * Hard coded magic. 7301da177e4SLinus Torvalds */ 7311da177e4SLinus Torvalds static int calc_max_buckets(void) 7321da177e4SLinus Torvalds { 7331da177e4SLinus Torvalds /* use a fixed size of 2MB */ 7341da177e4SLinus Torvalds unsigned long mem = 2 * 1024 * 1024; 7351da177e4SLinus Torvalds mem /= sizeof(struct list_head); 7361da177e4SLinus Torvalds 7371da177e4SLinus Torvalds return mem; 7381da177e4SLinus Torvalds } 7391da177e4SLinus Torvalds 7401da177e4SLinus Torvalds /* 7411da177e4SLinus Torvalds * Allocate room for a suitable hash table. 7421da177e4SLinus Torvalds */ 743fee1998eSJonathan Brassow static int init_hash_tables(struct dm_snapshot *s) 7441da177e4SLinus Torvalds { 7451da177e4SLinus Torvalds sector_t hash_size, cow_dev_size, origin_dev_size, max_buckets; 7461da177e4SLinus Torvalds 7471da177e4SLinus Torvalds /* 7481da177e4SLinus Torvalds * Calculate based on the size of the original volume or 7491da177e4SLinus Torvalds * the COW volume... 7501da177e4SLinus Torvalds */ 751fc56f6fbSMike Snitzer cow_dev_size = get_dev_size(s->cow->bdev); 7521da177e4SLinus Torvalds origin_dev_size = get_dev_size(s->origin->bdev); 7531da177e4SLinus Torvalds max_buckets = calc_max_buckets(); 7541da177e4SLinus Torvalds 755fee1998eSJonathan Brassow hash_size = min(origin_dev_size, cow_dev_size) >> s->store->chunk_shift; 7561da177e4SLinus Torvalds hash_size = min(hash_size, max_buckets); 7571da177e4SLinus Torvalds 7588e87b9b8SMikulas Patocka if (hash_size < 64) 7598e87b9b8SMikulas Patocka hash_size = 64; 7608defd830SRobert P. J. Day hash_size = rounddown_pow_of_two(hash_size); 7613510cb94SJon Brassow if (dm_exception_table_init(&s->complete, hash_size, 762d74f81f8SMilan Broz DM_CHUNK_CONSECUTIVE_BITS)) 7631da177e4SLinus Torvalds return -ENOMEM; 7641da177e4SLinus Torvalds 7651da177e4SLinus Torvalds /* 7661da177e4SLinus Torvalds * Allocate hash table for in-flight exceptions 7671da177e4SLinus Torvalds * Make this smaller than the real hash table 7681da177e4SLinus Torvalds */ 7691da177e4SLinus Torvalds hash_size >>= 3; 7701da177e4SLinus Torvalds if (hash_size < 64) 7711da177e4SLinus Torvalds hash_size = 64; 7721da177e4SLinus Torvalds 7733510cb94SJon Brassow if (dm_exception_table_init(&s->pending, hash_size, 0)) { 7743510cb94SJon Brassow dm_exception_table_exit(&s->complete, exception_cache); 7751da177e4SLinus Torvalds return -ENOMEM; 7761da177e4SLinus Torvalds } 7771da177e4SLinus Torvalds 7781da177e4SLinus Torvalds return 0; 7791da177e4SLinus Torvalds } 7801da177e4SLinus Torvalds 7811e03f97eSMikulas Patocka static void merge_shutdown(struct dm_snapshot *s) 7821e03f97eSMikulas Patocka { 7831e03f97eSMikulas Patocka clear_bit_unlock(RUNNING_MERGE, &s->state_bits); 7841e03f97eSMikulas Patocka smp_mb__after_clear_bit(); 7851e03f97eSMikulas Patocka wake_up_bit(&s->state_bits, RUNNING_MERGE); 7861e03f97eSMikulas Patocka } 7871e03f97eSMikulas Patocka 7889fe86254SMikulas Patocka static struct bio *__release_queued_bios_after_merge(struct dm_snapshot *s) 7899fe86254SMikulas Patocka { 7909fe86254SMikulas Patocka s->first_merging_chunk = 0; 7919fe86254SMikulas Patocka s->num_merging_chunks = 0; 7929fe86254SMikulas Patocka 7939fe86254SMikulas Patocka return bio_list_get(&s->bios_queued_during_merge); 7949fe86254SMikulas Patocka } 7959fe86254SMikulas Patocka 7961e03f97eSMikulas Patocka /* 7971e03f97eSMikulas Patocka * Remove one chunk from the index of completed exceptions. 7981e03f97eSMikulas Patocka */ 7991e03f97eSMikulas Patocka static int __remove_single_exception_chunk(struct dm_snapshot *s, 8001e03f97eSMikulas Patocka chunk_t old_chunk) 8011e03f97eSMikulas Patocka { 8021e03f97eSMikulas Patocka struct dm_exception *e; 8031e03f97eSMikulas Patocka 8041e03f97eSMikulas Patocka e = dm_lookup_exception(&s->complete, old_chunk); 8051e03f97eSMikulas Patocka if (!e) { 8061e03f97eSMikulas Patocka DMERR("Corruption detected: exception for block %llu is " 8071e03f97eSMikulas Patocka "on disk but not in memory", 8081e03f97eSMikulas Patocka (unsigned long long)old_chunk); 8091e03f97eSMikulas Patocka return -EINVAL; 8101e03f97eSMikulas Patocka } 8111e03f97eSMikulas Patocka 8121e03f97eSMikulas Patocka /* 8131e03f97eSMikulas Patocka * If this is the only chunk using this exception, remove exception. 8141e03f97eSMikulas Patocka */ 8151e03f97eSMikulas Patocka if (!dm_consecutive_chunk_count(e)) { 8161e03f97eSMikulas Patocka dm_remove_exception(e); 8171e03f97eSMikulas Patocka free_completed_exception(e); 8181e03f97eSMikulas Patocka return 0; 8191e03f97eSMikulas Patocka } 8201e03f97eSMikulas Patocka 8211e03f97eSMikulas Patocka /* 8221e03f97eSMikulas Patocka * The chunk may be either at the beginning or the end of a 8231e03f97eSMikulas Patocka * group of consecutive chunks - never in the middle. We are 8241e03f97eSMikulas Patocka * removing chunks in the opposite order to that in which they 8251e03f97eSMikulas Patocka * were added, so this should always be true. 8261e03f97eSMikulas Patocka * Decrement the consecutive chunk counter and adjust the 8271e03f97eSMikulas Patocka * starting point if necessary. 8281e03f97eSMikulas Patocka */ 8291e03f97eSMikulas Patocka if (old_chunk == e->old_chunk) { 8301e03f97eSMikulas Patocka e->old_chunk++; 8311e03f97eSMikulas Patocka e->new_chunk++; 8321e03f97eSMikulas Patocka } else if (old_chunk != e->old_chunk + 8331e03f97eSMikulas Patocka dm_consecutive_chunk_count(e)) { 8341e03f97eSMikulas Patocka DMERR("Attempt to merge block %llu from the " 8351e03f97eSMikulas Patocka "middle of a chunk range [%llu - %llu]", 8361e03f97eSMikulas Patocka (unsigned long long)old_chunk, 8371e03f97eSMikulas Patocka (unsigned long long)e->old_chunk, 8381e03f97eSMikulas Patocka (unsigned long long) 8391e03f97eSMikulas Patocka e->old_chunk + dm_consecutive_chunk_count(e)); 8401e03f97eSMikulas Patocka return -EINVAL; 8411e03f97eSMikulas Patocka } 8421e03f97eSMikulas Patocka 8431e03f97eSMikulas Patocka dm_consecutive_chunk_count_dec(e); 8441e03f97eSMikulas Patocka 8451e03f97eSMikulas Patocka return 0; 8461e03f97eSMikulas Patocka } 8471e03f97eSMikulas Patocka 8489fe86254SMikulas Patocka static void flush_bios(struct bio *bio); 8499fe86254SMikulas Patocka 8509fe86254SMikulas Patocka static int remove_single_exception_chunk(struct dm_snapshot *s) 8511e03f97eSMikulas Patocka { 8529fe86254SMikulas Patocka struct bio *b = NULL; 8539fe86254SMikulas Patocka int r; 8549fe86254SMikulas Patocka chunk_t old_chunk = s->first_merging_chunk + s->num_merging_chunks - 1; 8551e03f97eSMikulas Patocka 8561e03f97eSMikulas Patocka down_write(&s->lock); 8579fe86254SMikulas Patocka 8589fe86254SMikulas Patocka /* 8599fe86254SMikulas Patocka * Process chunks (and associated exceptions) in reverse order 8609fe86254SMikulas Patocka * so that dm_consecutive_chunk_count_dec() accounting works. 8619fe86254SMikulas Patocka */ 8629fe86254SMikulas Patocka do { 8631e03f97eSMikulas Patocka r = __remove_single_exception_chunk(s, old_chunk); 8649fe86254SMikulas Patocka if (r) 8659fe86254SMikulas Patocka goto out; 8669fe86254SMikulas Patocka } while (old_chunk-- > s->first_merging_chunk); 8679fe86254SMikulas Patocka 8689fe86254SMikulas Patocka b = __release_queued_bios_after_merge(s); 8699fe86254SMikulas Patocka 8709fe86254SMikulas Patocka out: 8711e03f97eSMikulas Patocka up_write(&s->lock); 8729fe86254SMikulas Patocka if (b) 8739fe86254SMikulas Patocka flush_bios(b); 8741e03f97eSMikulas Patocka 8751e03f97eSMikulas Patocka return r; 8761e03f97eSMikulas Patocka } 8771e03f97eSMikulas Patocka 87873dfd078SMikulas Patocka static int origin_write_extent(struct dm_snapshot *merging_snap, 87973dfd078SMikulas Patocka sector_t sector, unsigned chunk_size); 88073dfd078SMikulas Patocka 8811e03f97eSMikulas Patocka static void merge_callback(int read_err, unsigned long write_err, 8821e03f97eSMikulas Patocka void *context); 8831e03f97eSMikulas Patocka 88473dfd078SMikulas Patocka static uint64_t read_pending_exceptions_done_count(void) 88573dfd078SMikulas Patocka { 88673dfd078SMikulas Patocka uint64_t pending_exceptions_done; 88773dfd078SMikulas Patocka 88873dfd078SMikulas Patocka spin_lock(&_pending_exceptions_done_spinlock); 88973dfd078SMikulas Patocka pending_exceptions_done = _pending_exceptions_done_count; 89073dfd078SMikulas Patocka spin_unlock(&_pending_exceptions_done_spinlock); 89173dfd078SMikulas Patocka 89273dfd078SMikulas Patocka return pending_exceptions_done; 89373dfd078SMikulas Patocka } 89473dfd078SMikulas Patocka 89573dfd078SMikulas Patocka static void increment_pending_exceptions_done_count(void) 89673dfd078SMikulas Patocka { 89773dfd078SMikulas Patocka spin_lock(&_pending_exceptions_done_spinlock); 89873dfd078SMikulas Patocka _pending_exceptions_done_count++; 89973dfd078SMikulas Patocka spin_unlock(&_pending_exceptions_done_spinlock); 90073dfd078SMikulas Patocka 90173dfd078SMikulas Patocka wake_up_all(&_pending_exceptions_done); 90273dfd078SMikulas Patocka } 90373dfd078SMikulas Patocka 9041e03f97eSMikulas Patocka static void snapshot_merge_next_chunks(struct dm_snapshot *s) 9051e03f97eSMikulas Patocka { 9068a2d5286SMike Snitzer int i, linear_chunks; 9071e03f97eSMikulas Patocka chunk_t old_chunk, new_chunk; 9081e03f97eSMikulas Patocka struct dm_io_region src, dest; 9098a2d5286SMike Snitzer sector_t io_size; 91073dfd078SMikulas Patocka uint64_t previous_count; 9111e03f97eSMikulas Patocka 9121e03f97eSMikulas Patocka BUG_ON(!test_bit(RUNNING_MERGE, &s->state_bits)); 9131e03f97eSMikulas Patocka if (unlikely(test_bit(SHUTDOWN_MERGE, &s->state_bits))) 9141e03f97eSMikulas Patocka goto shut; 9151e03f97eSMikulas Patocka 9161e03f97eSMikulas Patocka /* 9171e03f97eSMikulas Patocka * valid flag never changes during merge, so no lock required. 9181e03f97eSMikulas Patocka */ 9191e03f97eSMikulas Patocka if (!s->valid) { 9201e03f97eSMikulas Patocka DMERR("Snapshot is invalid: can't merge"); 9211e03f97eSMikulas Patocka goto shut; 9221e03f97eSMikulas Patocka } 9231e03f97eSMikulas Patocka 9248a2d5286SMike Snitzer linear_chunks = s->store->type->prepare_merge(s->store, &old_chunk, 9258a2d5286SMike Snitzer &new_chunk); 9268a2d5286SMike Snitzer if (linear_chunks <= 0) { 927d8ddb1cfSMike Snitzer if (linear_chunks < 0) { 9281e03f97eSMikulas Patocka DMERR("Read error in exception store: " 9291e03f97eSMikulas Patocka "shutting down merge"); 930d8ddb1cfSMike Snitzer down_write(&s->lock); 931d8ddb1cfSMike Snitzer s->merge_failed = 1; 932d8ddb1cfSMike Snitzer up_write(&s->lock); 933d8ddb1cfSMike Snitzer } 9341e03f97eSMikulas Patocka goto shut; 9351e03f97eSMikulas Patocka } 9361e03f97eSMikulas Patocka 9378a2d5286SMike Snitzer /* Adjust old_chunk and new_chunk to reflect start of linear region */ 9388a2d5286SMike Snitzer old_chunk = old_chunk + 1 - linear_chunks; 9398a2d5286SMike Snitzer new_chunk = new_chunk + 1 - linear_chunks; 9408a2d5286SMike Snitzer 9418a2d5286SMike Snitzer /* 9428a2d5286SMike Snitzer * Use one (potentially large) I/O to copy all 'linear_chunks' 9438a2d5286SMike Snitzer * from the exception store to the origin 9448a2d5286SMike Snitzer */ 9458a2d5286SMike Snitzer io_size = linear_chunks * s->store->chunk_size; 9461e03f97eSMikulas Patocka 9471e03f97eSMikulas Patocka dest.bdev = s->origin->bdev; 9481e03f97eSMikulas Patocka dest.sector = chunk_to_sector(s->store, old_chunk); 9498a2d5286SMike Snitzer dest.count = min(io_size, get_dev_size(dest.bdev) - dest.sector); 9501e03f97eSMikulas Patocka 9511e03f97eSMikulas Patocka src.bdev = s->cow->bdev; 9521e03f97eSMikulas Patocka src.sector = chunk_to_sector(s->store, new_chunk); 9531e03f97eSMikulas Patocka src.count = dest.count; 9541e03f97eSMikulas Patocka 95573dfd078SMikulas Patocka /* 95673dfd078SMikulas Patocka * Reallocate any exceptions needed in other snapshots then 95773dfd078SMikulas Patocka * wait for the pending exceptions to complete. 95873dfd078SMikulas Patocka * Each time any pending exception (globally on the system) 95973dfd078SMikulas Patocka * completes we are woken and repeat the process to find out 96073dfd078SMikulas Patocka * if we can proceed. While this may not seem a particularly 96173dfd078SMikulas Patocka * efficient algorithm, it is not expected to have any 96273dfd078SMikulas Patocka * significant impact on performance. 96373dfd078SMikulas Patocka */ 96473dfd078SMikulas Patocka previous_count = read_pending_exceptions_done_count(); 9658a2d5286SMike Snitzer while (origin_write_extent(s, dest.sector, io_size)) { 96673dfd078SMikulas Patocka wait_event(_pending_exceptions_done, 96773dfd078SMikulas Patocka (read_pending_exceptions_done_count() != 96873dfd078SMikulas Patocka previous_count)); 96973dfd078SMikulas Patocka /* Retry after the wait, until all exceptions are done. */ 97073dfd078SMikulas Patocka previous_count = read_pending_exceptions_done_count(); 97173dfd078SMikulas Patocka } 97273dfd078SMikulas Patocka 9739fe86254SMikulas Patocka down_write(&s->lock); 9749fe86254SMikulas Patocka s->first_merging_chunk = old_chunk; 9758a2d5286SMike Snitzer s->num_merging_chunks = linear_chunks; 9769fe86254SMikulas Patocka up_write(&s->lock); 9779fe86254SMikulas Patocka 9788a2d5286SMike Snitzer /* Wait until writes to all 'linear_chunks' drain */ 9798a2d5286SMike Snitzer for (i = 0; i < linear_chunks; i++) 9808a2d5286SMike Snitzer __check_for_conflicting_io(s, old_chunk + i); 9819fe86254SMikulas Patocka 9821e03f97eSMikulas Patocka dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, merge_callback, s); 9831e03f97eSMikulas Patocka return; 9841e03f97eSMikulas Patocka 9851e03f97eSMikulas Patocka shut: 9861e03f97eSMikulas Patocka merge_shutdown(s); 9871e03f97eSMikulas Patocka } 9881e03f97eSMikulas Patocka 9899fe86254SMikulas Patocka static void error_bios(struct bio *bio); 9909fe86254SMikulas Patocka 9911e03f97eSMikulas Patocka static void merge_callback(int read_err, unsigned long write_err, void *context) 9921e03f97eSMikulas Patocka { 9931e03f97eSMikulas Patocka struct dm_snapshot *s = context; 9949fe86254SMikulas Patocka struct bio *b = NULL; 9951e03f97eSMikulas Patocka 9961e03f97eSMikulas Patocka if (read_err || write_err) { 9971e03f97eSMikulas Patocka if (read_err) 9981e03f97eSMikulas Patocka DMERR("Read error: shutting down merge."); 9991e03f97eSMikulas Patocka else 10001e03f97eSMikulas Patocka DMERR("Write error: shutting down merge."); 10011e03f97eSMikulas Patocka goto shut; 10021e03f97eSMikulas Patocka } 10031e03f97eSMikulas Patocka 10049fe86254SMikulas Patocka if (s->store->type->commit_merge(s->store, 10059fe86254SMikulas Patocka s->num_merging_chunks) < 0) { 10061e03f97eSMikulas Patocka DMERR("Write error in exception store: shutting down merge"); 10071e03f97eSMikulas Patocka goto shut; 10081e03f97eSMikulas Patocka } 10091e03f97eSMikulas Patocka 10109fe86254SMikulas Patocka if (remove_single_exception_chunk(s) < 0) 10119fe86254SMikulas Patocka goto shut; 10129fe86254SMikulas Patocka 10131e03f97eSMikulas Patocka snapshot_merge_next_chunks(s); 10141e03f97eSMikulas Patocka 10151e03f97eSMikulas Patocka return; 10161e03f97eSMikulas Patocka 10171e03f97eSMikulas Patocka shut: 10189fe86254SMikulas Patocka down_write(&s->lock); 1019d8ddb1cfSMike Snitzer s->merge_failed = 1; 10209fe86254SMikulas Patocka b = __release_queued_bios_after_merge(s); 10219fe86254SMikulas Patocka up_write(&s->lock); 10229fe86254SMikulas Patocka error_bios(b); 10239fe86254SMikulas Patocka 10241e03f97eSMikulas Patocka merge_shutdown(s); 10251e03f97eSMikulas Patocka } 10261e03f97eSMikulas Patocka 10271e03f97eSMikulas Patocka static void start_merge(struct dm_snapshot *s) 10281e03f97eSMikulas Patocka { 10291e03f97eSMikulas Patocka if (!test_and_set_bit(RUNNING_MERGE, &s->state_bits)) 10301e03f97eSMikulas Patocka snapshot_merge_next_chunks(s); 10311e03f97eSMikulas Patocka } 10321e03f97eSMikulas Patocka 10331e03f97eSMikulas Patocka static int wait_schedule(void *ptr) 10341e03f97eSMikulas Patocka { 10351e03f97eSMikulas Patocka schedule(); 10361e03f97eSMikulas Patocka 10371e03f97eSMikulas Patocka return 0; 10381e03f97eSMikulas Patocka } 10391e03f97eSMikulas Patocka 10401e03f97eSMikulas Patocka /* 10411e03f97eSMikulas Patocka * Stop the merging process and wait until it finishes. 10421e03f97eSMikulas Patocka */ 10431e03f97eSMikulas Patocka static void stop_merge(struct dm_snapshot *s) 10441e03f97eSMikulas Patocka { 10451e03f97eSMikulas Patocka set_bit(SHUTDOWN_MERGE, &s->state_bits); 10461e03f97eSMikulas Patocka wait_on_bit(&s->state_bits, RUNNING_MERGE, wait_schedule, 10471e03f97eSMikulas Patocka TASK_UNINTERRUPTIBLE); 10481e03f97eSMikulas Patocka clear_bit(SHUTDOWN_MERGE, &s->state_bits); 10491e03f97eSMikulas Patocka } 10501e03f97eSMikulas Patocka 10511da177e4SLinus Torvalds /* 10521da177e4SLinus Torvalds * Construct a snapshot mapping: <origin_dev> <COW-dev> <p/n> <chunk-size> 10531da177e4SLinus Torvalds */ 10541da177e4SLinus Torvalds static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) 10551da177e4SLinus Torvalds { 10561da177e4SLinus Torvalds struct dm_snapshot *s; 1057cd45daffSMikulas Patocka int i; 10581da177e4SLinus Torvalds int r = -EINVAL; 1059fc56f6fbSMike Snitzer char *origin_path, *cow_path; 106010b8106aSMike Snitzer unsigned args_used, num_flush_requests = 1; 106110b8106aSMike Snitzer fmode_t origin_mode = FMODE_READ; 10621da177e4SLinus Torvalds 10634c7e3bf4SMark McLoughlin if (argc != 4) { 106472d94861SAlasdair G Kergon ti->error = "requires exactly 4 arguments"; 10651da177e4SLinus Torvalds r = -EINVAL; 1066fc56f6fbSMike Snitzer goto bad; 10671da177e4SLinus Torvalds } 10681da177e4SLinus Torvalds 106910b8106aSMike Snitzer if (dm_target_is_snapshot_merge(ti)) { 107010b8106aSMike Snitzer num_flush_requests = 2; 107110b8106aSMike Snitzer origin_mode = FMODE_WRITE; 107210b8106aSMike Snitzer } 107310b8106aSMike Snitzer 10741da177e4SLinus Torvalds s = kmalloc(sizeof(*s), GFP_KERNEL); 1075fee1998eSJonathan Brassow if (!s) { 10761da177e4SLinus Torvalds ti->error = "Cannot allocate snapshot context private " 10771da177e4SLinus Torvalds "structure"; 10781da177e4SLinus Torvalds r = -ENOMEM; 1079fc56f6fbSMike Snitzer goto bad; 10801da177e4SLinus Torvalds } 10811da177e4SLinus Torvalds 1082c2411045SMikulas Patocka origin_path = argv[0]; 1083c2411045SMikulas Patocka argv++; 1084c2411045SMikulas Patocka argc--; 1085c2411045SMikulas Patocka 1086c2411045SMikulas Patocka r = dm_get_device(ti, origin_path, origin_mode, &s->origin); 1087c2411045SMikulas Patocka if (r) { 1088c2411045SMikulas Patocka ti->error = "Cannot get origin device"; 1089c2411045SMikulas Patocka goto bad_origin; 1090c2411045SMikulas Patocka } 1091c2411045SMikulas Patocka 1092fc56f6fbSMike Snitzer cow_path = argv[0]; 1093fc56f6fbSMike Snitzer argv++; 1094fc56f6fbSMike Snitzer argc--; 1095fc56f6fbSMike Snitzer 10968215d6ecSNikanth Karthikesan r = dm_get_device(ti, cow_path, FMODE_READ | FMODE_WRITE, &s->cow); 1097fc56f6fbSMike Snitzer if (r) { 1098fc56f6fbSMike Snitzer ti->error = "Cannot get COW device"; 1099fc56f6fbSMike Snitzer goto bad_cow; 1100fc56f6fbSMike Snitzer } 1101fc56f6fbSMike Snitzer 1102fc56f6fbSMike Snitzer r = dm_exception_store_create(ti, argc, argv, s, &args_used, &s->store); 1103fc56f6fbSMike Snitzer if (r) { 1104fc56f6fbSMike Snitzer ti->error = "Couldn't create exception store"; 1105fc56f6fbSMike Snitzer r = -EINVAL; 1106fc56f6fbSMike Snitzer goto bad_store; 1107fc56f6fbSMike Snitzer } 1108fc56f6fbSMike Snitzer 1109fc56f6fbSMike Snitzer argv += args_used; 1110fc56f6fbSMike Snitzer argc -= args_used; 1111fc56f6fbSMike Snitzer 1112fc56f6fbSMike Snitzer s->ti = ti; 11131da177e4SLinus Torvalds s->valid = 1; 1114aa14edebSAlasdair G Kergon s->active = 0; 1115c26655caSMike Snitzer s->suspended = 0; 1116879129d2SMikulas Patocka atomic_set(&s->pending_exceptions_count, 0); 11171da177e4SLinus Torvalds init_rwsem(&s->lock); 1118c1f0c183SMike Snitzer INIT_LIST_HEAD(&s->list); 1119ca3a931fSAlasdair G Kergon spin_lock_init(&s->pe_lock); 11201e03f97eSMikulas Patocka s->state_bits = 0; 1121d8ddb1cfSMike Snitzer s->merge_failed = 0; 11229fe86254SMikulas Patocka s->first_merging_chunk = 0; 11239fe86254SMikulas Patocka s->num_merging_chunks = 0; 11249fe86254SMikulas Patocka bio_list_init(&s->bios_queued_during_merge); 11251da177e4SLinus Torvalds 11261da177e4SLinus Torvalds /* Allocate hash table for COW data */ 1127fee1998eSJonathan Brassow if (init_hash_tables(s)) { 11281da177e4SLinus Torvalds ti->error = "Unable to allocate hash table space"; 11291da177e4SLinus Torvalds r = -ENOMEM; 1130fee1998eSJonathan Brassow goto bad_hash_tables; 11311da177e4SLinus Torvalds } 11321da177e4SLinus Torvalds 1133eb69aca5SHeinz Mauelshagen r = dm_kcopyd_client_create(SNAPSHOT_PAGES, &s->kcopyd_client); 11341da177e4SLinus Torvalds if (r) { 11351da177e4SLinus Torvalds ti->error = "Could not create kcopyd client"; 1136fee1998eSJonathan Brassow goto bad_kcopyd; 11371da177e4SLinus Torvalds } 11381da177e4SLinus Torvalds 113992e86812SMikulas Patocka s->pending_pool = mempool_create_slab_pool(MIN_IOS, pending_cache); 114092e86812SMikulas Patocka if (!s->pending_pool) { 114192e86812SMikulas Patocka ti->error = "Could not allocate mempool for pending exceptions"; 1142fee1998eSJonathan Brassow goto bad_pending_pool; 114392e86812SMikulas Patocka } 114492e86812SMikulas Patocka 1145cd45daffSMikulas Patocka s->tracked_chunk_pool = mempool_create_slab_pool(MIN_IOS, 1146cd45daffSMikulas Patocka tracked_chunk_cache); 1147cd45daffSMikulas Patocka if (!s->tracked_chunk_pool) { 1148cd45daffSMikulas Patocka ti->error = "Could not allocate tracked_chunk mempool for " 1149cd45daffSMikulas Patocka "tracking reads"; 115092e86812SMikulas Patocka goto bad_tracked_chunk_pool; 1151cd45daffSMikulas Patocka } 1152cd45daffSMikulas Patocka 1153cd45daffSMikulas Patocka for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++) 1154cd45daffSMikulas Patocka INIT_HLIST_HEAD(&s->tracked_chunk_hash[i]); 1155cd45daffSMikulas Patocka 1156cd45daffSMikulas Patocka spin_lock_init(&s->tracked_chunk_lock); 1157cd45daffSMikulas Patocka 1158c1f0c183SMike Snitzer bio_list_init(&s->queued_bios); 1159c1f0c183SMike Snitzer INIT_WORK(&s->queued_bios_work, flush_queued_bios); 1160c1f0c183SMike Snitzer 1161c1f0c183SMike Snitzer ti->private = s; 116210b8106aSMike Snitzer ti->num_flush_requests = num_flush_requests; 1163c1f0c183SMike Snitzer 1164c1f0c183SMike Snitzer /* Add snapshot to the list of snapshots for this origin */ 1165c1f0c183SMike Snitzer /* Exceptions aren't triggered till snapshot_resume() is called */ 1166c1f0c183SMike Snitzer r = register_snapshot(s); 1167c1f0c183SMike Snitzer if (r == -ENOMEM) { 1168c1f0c183SMike Snitzer ti->error = "Snapshot origin struct allocation failed"; 1169c1f0c183SMike Snitzer goto bad_load_and_register; 1170c1f0c183SMike Snitzer } else if (r < 0) { 1171c1f0c183SMike Snitzer /* invalid handover, register_snapshot has set ti->error */ 1172c1f0c183SMike Snitzer goto bad_load_and_register; 1173c1f0c183SMike Snitzer } 1174c1f0c183SMike Snitzer 1175c1f0c183SMike Snitzer /* 1176c1f0c183SMike Snitzer * Metadata must only be loaded into one table at once, so skip this 1177c1f0c183SMike Snitzer * if metadata will be handed over during resume. 1178c1f0c183SMike Snitzer * Chunk size will be set during the handover - set it to zero to 1179c1f0c183SMike Snitzer * ensure it's ignored. 1180c1f0c183SMike Snitzer */ 1181c1f0c183SMike Snitzer if (r > 0) { 1182c1f0c183SMike Snitzer s->store->chunk_size = 0; 1183c1f0c183SMike Snitzer return 0; 1184c1f0c183SMike Snitzer } 1185c1f0c183SMike Snitzer 1186493df71cSJonathan Brassow r = s->store->type->read_metadata(s->store, dm_add_exception, 1187493df71cSJonathan Brassow (void *)s); 11880764147bSMilan Broz if (r < 0) { 1189f9cea4f7SMark McLoughlin ti->error = "Failed to read snapshot metadata"; 1190c1f0c183SMike Snitzer goto bad_read_metadata; 11910764147bSMilan Broz } else if (r > 0) { 11920764147bSMilan Broz s->valid = 0; 11930764147bSMilan Broz DMWARN("Snapshot is marked invalid."); 1194f9cea4f7SMark McLoughlin } 1195aa14edebSAlasdair G Kergon 11963f2412dcSMikulas Patocka if (!s->store->chunk_size) { 11973f2412dcSMikulas Patocka ti->error = "Chunk size not set"; 1198c1f0c183SMike Snitzer goto bad_read_metadata; 11993f2412dcSMikulas Patocka } 1200d0216849SJonathan Brassow ti->split_io = s->store->chunk_size; 12011da177e4SLinus Torvalds 12021da177e4SLinus Torvalds return 0; 12031da177e4SLinus Torvalds 1204c1f0c183SMike Snitzer bad_read_metadata: 1205c1f0c183SMike Snitzer unregister_snapshot(s); 1206c1f0c183SMike Snitzer 1207cd45daffSMikulas Patocka bad_load_and_register: 1208cd45daffSMikulas Patocka mempool_destroy(s->tracked_chunk_pool); 1209cd45daffSMikulas Patocka 121092e86812SMikulas Patocka bad_tracked_chunk_pool: 121192e86812SMikulas Patocka mempool_destroy(s->pending_pool); 121292e86812SMikulas Patocka 1213fee1998eSJonathan Brassow bad_pending_pool: 1214eb69aca5SHeinz Mauelshagen dm_kcopyd_client_destroy(s->kcopyd_client); 12151da177e4SLinus Torvalds 1216fee1998eSJonathan Brassow bad_kcopyd: 12173510cb94SJon Brassow dm_exception_table_exit(&s->pending, pending_cache); 12183510cb94SJon Brassow dm_exception_table_exit(&s->complete, exception_cache); 12191da177e4SLinus Torvalds 1220fee1998eSJonathan Brassow bad_hash_tables: 1221fc56f6fbSMike Snitzer dm_exception_store_destroy(s->store); 1222fc56f6fbSMike Snitzer 1223fc56f6fbSMike Snitzer bad_store: 1224fc56f6fbSMike Snitzer dm_put_device(ti, s->cow); 1225fc56f6fbSMike Snitzer 1226fc56f6fbSMike Snitzer bad_cow: 1227c2411045SMikulas Patocka dm_put_device(ti, s->origin); 1228c2411045SMikulas Patocka 1229c2411045SMikulas Patocka bad_origin: 12301da177e4SLinus Torvalds kfree(s); 12311da177e4SLinus Torvalds 1232fc56f6fbSMike Snitzer bad: 12331da177e4SLinus Torvalds return r; 12341da177e4SLinus Torvalds } 12351da177e4SLinus Torvalds 123631c93a0cSMilan Broz static void __free_exceptions(struct dm_snapshot *s) 123731c93a0cSMilan Broz { 1238eb69aca5SHeinz Mauelshagen dm_kcopyd_client_destroy(s->kcopyd_client); 123931c93a0cSMilan Broz s->kcopyd_client = NULL; 124031c93a0cSMilan Broz 12413510cb94SJon Brassow dm_exception_table_exit(&s->pending, pending_cache); 12423510cb94SJon Brassow dm_exception_table_exit(&s->complete, exception_cache); 124331c93a0cSMilan Broz } 124431c93a0cSMilan Broz 1245c1f0c183SMike Snitzer static void __handover_exceptions(struct dm_snapshot *snap_src, 1246c1f0c183SMike Snitzer struct dm_snapshot *snap_dest) 1247c1f0c183SMike Snitzer { 1248c1f0c183SMike Snitzer union { 1249c1f0c183SMike Snitzer struct dm_exception_table table_swap; 1250c1f0c183SMike Snitzer struct dm_exception_store *store_swap; 1251c1f0c183SMike Snitzer } u; 1252c1f0c183SMike Snitzer 1253c1f0c183SMike Snitzer /* 1254c1f0c183SMike Snitzer * Swap all snapshot context information between the two instances. 1255c1f0c183SMike Snitzer */ 1256c1f0c183SMike Snitzer u.table_swap = snap_dest->complete; 1257c1f0c183SMike Snitzer snap_dest->complete = snap_src->complete; 1258c1f0c183SMike Snitzer snap_src->complete = u.table_swap; 1259c1f0c183SMike Snitzer 1260c1f0c183SMike Snitzer u.store_swap = snap_dest->store; 1261c1f0c183SMike Snitzer snap_dest->store = snap_src->store; 1262c1f0c183SMike Snitzer snap_src->store = u.store_swap; 1263c1f0c183SMike Snitzer 1264c1f0c183SMike Snitzer snap_dest->store->snap = snap_dest; 1265c1f0c183SMike Snitzer snap_src->store->snap = snap_src; 1266c1f0c183SMike Snitzer 1267c1f0c183SMike Snitzer snap_dest->ti->split_io = snap_dest->store->chunk_size; 1268c1f0c183SMike Snitzer snap_dest->valid = snap_src->valid; 1269c1f0c183SMike Snitzer 1270c1f0c183SMike Snitzer /* 1271c1f0c183SMike Snitzer * Set source invalid to ensure it receives no further I/O. 1272c1f0c183SMike Snitzer */ 1273c1f0c183SMike Snitzer snap_src->valid = 0; 1274c1f0c183SMike Snitzer } 1275c1f0c183SMike Snitzer 12761da177e4SLinus Torvalds static void snapshot_dtr(struct dm_target *ti) 12771da177e4SLinus Torvalds { 1278cd45daffSMikulas Patocka #ifdef CONFIG_DM_DEBUG 1279cd45daffSMikulas Patocka int i; 1280cd45daffSMikulas Patocka #endif 1281028867acSAlasdair G Kergon struct dm_snapshot *s = ti->private; 1282c1f0c183SMike Snitzer struct dm_snapshot *snap_src = NULL, *snap_dest = NULL; 12831da177e4SLinus Torvalds 1284ca3a931fSAlasdair G Kergon flush_workqueue(ksnapd); 1285ca3a931fSAlasdair G Kergon 1286c1f0c183SMike Snitzer down_read(&_origins_lock); 1287c1f0c183SMike Snitzer /* Check whether exception handover must be cancelled */ 12889d3b15c4SMikulas Patocka (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL); 1289c1f0c183SMike Snitzer if (snap_src && snap_dest && (s == snap_src)) { 1290c1f0c183SMike Snitzer down_write(&snap_dest->lock); 1291c1f0c183SMike Snitzer snap_dest->valid = 0; 1292c1f0c183SMike Snitzer up_write(&snap_dest->lock); 1293c1f0c183SMike Snitzer DMERR("Cancelling snapshot handover."); 1294c1f0c183SMike Snitzer } 1295c1f0c183SMike Snitzer up_read(&_origins_lock); 1296c1f0c183SMike Snitzer 12971e03f97eSMikulas Patocka if (dm_target_is_snapshot_merge(ti)) 12981e03f97eSMikulas Patocka stop_merge(s); 12991e03f97eSMikulas Patocka 1300138728dcSAlasdair G Kergon /* Prevent further origin writes from using this snapshot. */ 1301138728dcSAlasdair G Kergon /* After this returns there can be no new kcopyd jobs. */ 13021da177e4SLinus Torvalds unregister_snapshot(s); 13031da177e4SLinus Torvalds 1304879129d2SMikulas Patocka while (atomic_read(&s->pending_exceptions_count)) 130590fa1527SMikulas Patocka msleep(1); 1306879129d2SMikulas Patocka /* 1307879129d2SMikulas Patocka * Ensure instructions in mempool_destroy aren't reordered 1308879129d2SMikulas Patocka * before atomic_read. 1309879129d2SMikulas Patocka */ 1310879129d2SMikulas Patocka smp_mb(); 1311879129d2SMikulas Patocka 1312cd45daffSMikulas Patocka #ifdef CONFIG_DM_DEBUG 1313cd45daffSMikulas Patocka for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++) 1314cd45daffSMikulas Patocka BUG_ON(!hlist_empty(&s->tracked_chunk_hash[i])); 1315cd45daffSMikulas Patocka #endif 1316cd45daffSMikulas Patocka 1317cd45daffSMikulas Patocka mempool_destroy(s->tracked_chunk_pool); 1318cd45daffSMikulas Patocka 131931c93a0cSMilan Broz __free_exceptions(s); 13201da177e4SLinus Torvalds 132192e86812SMikulas Patocka mempool_destroy(s->pending_pool); 132292e86812SMikulas Patocka 1323fee1998eSJonathan Brassow dm_exception_store_destroy(s->store); 1324138728dcSAlasdair G Kergon 1325fc56f6fbSMike Snitzer dm_put_device(ti, s->cow); 1326fc56f6fbSMike Snitzer 1327c2411045SMikulas Patocka dm_put_device(ti, s->origin); 1328c2411045SMikulas Patocka 13291da177e4SLinus Torvalds kfree(s); 13301da177e4SLinus Torvalds } 13311da177e4SLinus Torvalds 13321da177e4SLinus Torvalds /* 13331da177e4SLinus Torvalds * Flush a list of buffers. 13341da177e4SLinus Torvalds */ 13351da177e4SLinus Torvalds static void flush_bios(struct bio *bio) 13361da177e4SLinus Torvalds { 13371da177e4SLinus Torvalds struct bio *n; 13381da177e4SLinus Torvalds 13391da177e4SLinus Torvalds while (bio) { 13401da177e4SLinus Torvalds n = bio->bi_next; 13411da177e4SLinus Torvalds bio->bi_next = NULL; 13421da177e4SLinus Torvalds generic_make_request(bio); 13431da177e4SLinus Torvalds bio = n; 13441da177e4SLinus Torvalds } 13451da177e4SLinus Torvalds } 13461da177e4SLinus Torvalds 1347c4028958SDavid Howells static void flush_queued_bios(struct work_struct *work) 1348ca3a931fSAlasdair G Kergon { 1349c4028958SDavid Howells struct dm_snapshot *s = 1350c4028958SDavid Howells container_of(work, struct dm_snapshot, queued_bios_work); 1351ca3a931fSAlasdair G Kergon struct bio *queued_bios; 1352ca3a931fSAlasdair G Kergon unsigned long flags; 1353ca3a931fSAlasdair G Kergon 1354ca3a931fSAlasdair G Kergon spin_lock_irqsave(&s->pe_lock, flags); 1355ca3a931fSAlasdair G Kergon queued_bios = bio_list_get(&s->queued_bios); 1356ca3a931fSAlasdair G Kergon spin_unlock_irqrestore(&s->pe_lock, flags); 1357ca3a931fSAlasdair G Kergon 1358ca3a931fSAlasdair G Kergon flush_bios(queued_bios); 1359ca3a931fSAlasdair G Kergon } 1360ca3a931fSAlasdair G Kergon 1361515ad66cSMikulas Patocka static int do_origin(struct dm_dev *origin, struct bio *bio); 1362515ad66cSMikulas Patocka 1363515ad66cSMikulas Patocka /* 1364515ad66cSMikulas Patocka * Flush a list of buffers. 1365515ad66cSMikulas Patocka */ 1366515ad66cSMikulas Patocka static void retry_origin_bios(struct dm_snapshot *s, struct bio *bio) 1367515ad66cSMikulas Patocka { 1368515ad66cSMikulas Patocka struct bio *n; 1369515ad66cSMikulas Patocka int r; 1370515ad66cSMikulas Patocka 1371515ad66cSMikulas Patocka while (bio) { 1372515ad66cSMikulas Patocka n = bio->bi_next; 1373515ad66cSMikulas Patocka bio->bi_next = NULL; 1374515ad66cSMikulas Patocka r = do_origin(s->origin, bio); 1375515ad66cSMikulas Patocka if (r == DM_MAPIO_REMAPPED) 1376515ad66cSMikulas Patocka generic_make_request(bio); 1377515ad66cSMikulas Patocka bio = n; 1378515ad66cSMikulas Patocka } 1379515ad66cSMikulas Patocka } 1380515ad66cSMikulas Patocka 13811da177e4SLinus Torvalds /* 13821da177e4SLinus Torvalds * Error a list of buffers. 13831da177e4SLinus Torvalds */ 13841da177e4SLinus Torvalds static void error_bios(struct bio *bio) 13851da177e4SLinus Torvalds { 13861da177e4SLinus Torvalds struct bio *n; 13871da177e4SLinus Torvalds 13881da177e4SLinus Torvalds while (bio) { 13891da177e4SLinus Torvalds n = bio->bi_next; 13901da177e4SLinus Torvalds bio->bi_next = NULL; 13916712ecf8SNeilBrown bio_io_error(bio); 13921da177e4SLinus Torvalds bio = n; 13931da177e4SLinus Torvalds } 13941da177e4SLinus Torvalds } 13951da177e4SLinus Torvalds 1396695368acSAlasdair G Kergon static void __invalidate_snapshot(struct dm_snapshot *s, int err) 139776df1c65SAlasdair G Kergon { 139876df1c65SAlasdair G Kergon if (!s->valid) 139976df1c65SAlasdair G Kergon return; 140076df1c65SAlasdair G Kergon 140176df1c65SAlasdair G Kergon if (err == -EIO) 140276df1c65SAlasdair G Kergon DMERR("Invalidating snapshot: Error reading/writing."); 140376df1c65SAlasdair G Kergon else if (err == -ENOMEM) 140476df1c65SAlasdair G Kergon DMERR("Invalidating snapshot: Unable to allocate exception."); 140576df1c65SAlasdair G Kergon 1406493df71cSJonathan Brassow if (s->store->type->drop_snapshot) 1407493df71cSJonathan Brassow s->store->type->drop_snapshot(s->store); 140876df1c65SAlasdair G Kergon 140976df1c65SAlasdair G Kergon s->valid = 0; 141076df1c65SAlasdair G Kergon 1411fc56f6fbSMike Snitzer dm_table_event(s->ti->table); 141276df1c65SAlasdair G Kergon } 141376df1c65SAlasdair G Kergon 1414028867acSAlasdair G Kergon static void pending_complete(struct dm_snap_pending_exception *pe, int success) 14151da177e4SLinus Torvalds { 14161d4989c8SJon Brassow struct dm_exception *e; 14171da177e4SLinus Torvalds struct dm_snapshot *s = pe->snap; 14189d493fa8SAlasdair G Kergon struct bio *origin_bios = NULL; 14199d493fa8SAlasdair G Kergon struct bio *snapshot_bios = NULL; 14209d493fa8SAlasdair G Kergon int error = 0; 14211da177e4SLinus Torvalds 142276df1c65SAlasdair G Kergon if (!success) { 142376df1c65SAlasdair G Kergon /* Read/write error - snapshot is unusable */ 14241da177e4SLinus Torvalds down_write(&s->lock); 1425695368acSAlasdair G Kergon __invalidate_snapshot(s, -EIO); 14269d493fa8SAlasdair G Kergon error = 1; 142776df1c65SAlasdair G Kergon goto out; 142876df1c65SAlasdair G Kergon } 142976df1c65SAlasdair G Kergon 14303510cb94SJon Brassow e = alloc_completed_exception(); 143176df1c65SAlasdair G Kergon if (!e) { 143276df1c65SAlasdair G Kergon down_write(&s->lock); 1433695368acSAlasdair G Kergon __invalidate_snapshot(s, -ENOMEM); 14349d493fa8SAlasdair G Kergon error = 1; 14351da177e4SLinus Torvalds goto out; 14361da177e4SLinus Torvalds } 14371da177e4SLinus Torvalds *e = pe->e; 14381da177e4SLinus Torvalds 14399d493fa8SAlasdair G Kergon down_write(&s->lock); 14409d493fa8SAlasdair G Kergon if (!s->valid) { 14413510cb94SJon Brassow free_completed_exception(e); 14429d493fa8SAlasdair G Kergon error = 1; 14439d493fa8SAlasdair G Kergon goto out; 14449d493fa8SAlasdair G Kergon } 14459d493fa8SAlasdair G Kergon 1446615d1eb9SMike Snitzer /* Check for conflicting reads */ 1447615d1eb9SMike Snitzer __check_for_conflicting_io(s, pe->e.old_chunk); 1448a8d41b59SMikulas Patocka 1449a8d41b59SMikulas Patocka /* 14501da177e4SLinus Torvalds * Add a proper exception, and remove the 14511da177e4SLinus Torvalds * in-flight exception from the list. 14521da177e4SLinus Torvalds */ 14533510cb94SJon Brassow dm_insert_exception(&s->complete, e); 14541da177e4SLinus Torvalds 14551da177e4SLinus Torvalds out: 14563510cb94SJon Brassow dm_remove_exception(&pe->e); 14579d493fa8SAlasdair G Kergon snapshot_bios = bio_list_get(&pe->snapshot_bios); 1458515ad66cSMikulas Patocka origin_bios = bio_list_get(&pe->origin_bios); 1459515ad66cSMikulas Patocka free_pending_exception(pe); 1460b4b610f6SAlasdair G Kergon 146173dfd078SMikulas Patocka increment_pending_exceptions_done_count(); 146273dfd078SMikulas Patocka 14639d493fa8SAlasdair G Kergon up_write(&s->lock); 14649d493fa8SAlasdair G Kergon 14659d493fa8SAlasdair G Kergon /* Submit any pending write bios */ 14669d493fa8SAlasdair G Kergon if (error) 14679d493fa8SAlasdair G Kergon error_bios(snapshot_bios); 14689d493fa8SAlasdair G Kergon else 14699d493fa8SAlasdair G Kergon flush_bios(snapshot_bios); 14709d493fa8SAlasdair G Kergon 1471515ad66cSMikulas Patocka retry_origin_bios(s, origin_bios); 14721da177e4SLinus Torvalds } 14731da177e4SLinus Torvalds 14741da177e4SLinus Torvalds static void commit_callback(void *context, int success) 14751da177e4SLinus Torvalds { 1476028867acSAlasdair G Kergon struct dm_snap_pending_exception *pe = context; 1477028867acSAlasdair G Kergon 14781da177e4SLinus Torvalds pending_complete(pe, success); 14791da177e4SLinus Torvalds } 14801da177e4SLinus Torvalds 14811da177e4SLinus Torvalds /* 14821da177e4SLinus Torvalds * Called when the copy I/O has finished. kcopyd actually runs 14831da177e4SLinus Torvalds * this code so don't block. 14841da177e4SLinus Torvalds */ 14854cdc1d1fSAlasdair G Kergon static void copy_callback(int read_err, unsigned long write_err, void *context) 14861da177e4SLinus Torvalds { 1487028867acSAlasdair G Kergon struct dm_snap_pending_exception *pe = context; 14881da177e4SLinus Torvalds struct dm_snapshot *s = pe->snap; 14891da177e4SLinus Torvalds 14901da177e4SLinus Torvalds if (read_err || write_err) 14911da177e4SLinus Torvalds pending_complete(pe, 0); 14921da177e4SLinus Torvalds 14931da177e4SLinus Torvalds else 14941da177e4SLinus Torvalds /* Update the metadata if we are persistent */ 1495493df71cSJonathan Brassow s->store->type->commit_exception(s->store, &pe->e, 1496b2a11465SJonathan Brassow commit_callback, pe); 14971da177e4SLinus Torvalds } 14981da177e4SLinus Torvalds 14991da177e4SLinus Torvalds /* 15001da177e4SLinus Torvalds * Dispatches the copy operation to kcopyd. 15011da177e4SLinus Torvalds */ 1502028867acSAlasdair G Kergon static void start_copy(struct dm_snap_pending_exception *pe) 15031da177e4SLinus Torvalds { 15041da177e4SLinus Torvalds struct dm_snapshot *s = pe->snap; 150522a1ceb1SHeinz Mauelshagen struct dm_io_region src, dest; 15061da177e4SLinus Torvalds struct block_device *bdev = s->origin->bdev; 15071da177e4SLinus Torvalds sector_t dev_size; 15081da177e4SLinus Torvalds 15091da177e4SLinus Torvalds dev_size = get_dev_size(bdev); 15101da177e4SLinus Torvalds 15111da177e4SLinus Torvalds src.bdev = bdev; 151271fab00aSJonathan Brassow src.sector = chunk_to_sector(s->store, pe->e.old_chunk); 1513df96eee6SMikulas Patocka src.count = min((sector_t)s->store->chunk_size, dev_size - src.sector); 15141da177e4SLinus Torvalds 1515fc56f6fbSMike Snitzer dest.bdev = s->cow->bdev; 151671fab00aSJonathan Brassow dest.sector = chunk_to_sector(s->store, pe->e.new_chunk); 15171da177e4SLinus Torvalds dest.count = src.count; 15181da177e4SLinus Torvalds 15191da177e4SLinus Torvalds /* Hand over to kcopyd */ 1520eb69aca5SHeinz Mauelshagen dm_kcopyd_copy(s->kcopyd_client, 15211da177e4SLinus Torvalds &src, 1, &dest, 0, copy_callback, pe); 15221da177e4SLinus Torvalds } 15231da177e4SLinus Torvalds 15242913808eSMikulas Patocka static struct dm_snap_pending_exception * 15252913808eSMikulas Patocka __lookup_pending_exception(struct dm_snapshot *s, chunk_t chunk) 15262913808eSMikulas Patocka { 15273510cb94SJon Brassow struct dm_exception *e = dm_lookup_exception(&s->pending, chunk); 15282913808eSMikulas Patocka 15292913808eSMikulas Patocka if (!e) 15302913808eSMikulas Patocka return NULL; 15312913808eSMikulas Patocka 15322913808eSMikulas Patocka return container_of(e, struct dm_snap_pending_exception, e); 15332913808eSMikulas Patocka } 15342913808eSMikulas Patocka 15351da177e4SLinus Torvalds /* 15361da177e4SLinus Torvalds * Looks to see if this snapshot already has a pending exception 15371da177e4SLinus Torvalds * for this chunk, otherwise it allocates a new one and inserts 15381da177e4SLinus Torvalds * it into the pending table. 15391da177e4SLinus Torvalds * 15401da177e4SLinus Torvalds * NOTE: a write lock must be held on snap->lock before calling 15411da177e4SLinus Torvalds * this. 15421da177e4SLinus Torvalds */ 1543028867acSAlasdair G Kergon static struct dm_snap_pending_exception * 1544c6621392SMikulas Patocka __find_pending_exception(struct dm_snapshot *s, 1545c6621392SMikulas Patocka struct dm_snap_pending_exception *pe, chunk_t chunk) 15461da177e4SLinus Torvalds { 1547c6621392SMikulas Patocka struct dm_snap_pending_exception *pe2; 154876df1c65SAlasdair G Kergon 15492913808eSMikulas Patocka pe2 = __lookup_pending_exception(s, chunk); 15502913808eSMikulas Patocka if (pe2) { 15511da177e4SLinus Torvalds free_pending_exception(pe); 15522913808eSMikulas Patocka return pe2; 155376df1c65SAlasdair G Kergon } 155476df1c65SAlasdair G Kergon 15551da177e4SLinus Torvalds pe->e.old_chunk = chunk; 15561da177e4SLinus Torvalds bio_list_init(&pe->origin_bios); 15571da177e4SLinus Torvalds bio_list_init(&pe->snapshot_bios); 15581da177e4SLinus Torvalds pe->started = 0; 15591da177e4SLinus Torvalds 1560493df71cSJonathan Brassow if (s->store->type->prepare_exception(s->store, &pe->e)) { 15611da177e4SLinus Torvalds free_pending_exception(pe); 15621da177e4SLinus Torvalds return NULL; 15631da177e4SLinus Torvalds } 15641da177e4SLinus Torvalds 15653510cb94SJon Brassow dm_insert_exception(&s->pending, &pe->e); 15661da177e4SLinus Torvalds 15671da177e4SLinus Torvalds return pe; 15681da177e4SLinus Torvalds } 15691da177e4SLinus Torvalds 15701d4989c8SJon Brassow static void remap_exception(struct dm_snapshot *s, struct dm_exception *e, 1571d74f81f8SMilan Broz struct bio *bio, chunk_t chunk) 15721da177e4SLinus Torvalds { 1573fc56f6fbSMike Snitzer bio->bi_bdev = s->cow->bdev; 157471fab00aSJonathan Brassow bio->bi_sector = chunk_to_sector(s->store, 157571fab00aSJonathan Brassow dm_chunk_number(e->new_chunk) + 1576d74f81f8SMilan Broz (chunk - e->old_chunk)) + 157771fab00aSJonathan Brassow (bio->bi_sector & 157871fab00aSJonathan Brassow s->store->chunk_mask); 15791da177e4SLinus Torvalds } 15801da177e4SLinus Torvalds 15811da177e4SLinus Torvalds static int snapshot_map(struct dm_target *ti, struct bio *bio, 15821da177e4SLinus Torvalds union map_info *map_context) 15831da177e4SLinus Torvalds { 15841d4989c8SJon Brassow struct dm_exception *e; 1585028867acSAlasdair G Kergon struct dm_snapshot *s = ti->private; 1586d2a7ad29SKiyoshi Ueda int r = DM_MAPIO_REMAPPED; 15871da177e4SLinus Torvalds chunk_t chunk; 1588028867acSAlasdair G Kergon struct dm_snap_pending_exception *pe = NULL; 15891da177e4SLinus Torvalds 1590494b3ee7SMikulas Patocka if (unlikely(bio_empty_barrier(bio))) { 1591fc56f6fbSMike Snitzer bio->bi_bdev = s->cow->bdev; 1592494b3ee7SMikulas Patocka return DM_MAPIO_REMAPPED; 1593494b3ee7SMikulas Patocka } 1594494b3ee7SMikulas Patocka 159571fab00aSJonathan Brassow chunk = sector_to_chunk(s->store, bio->bi_sector); 15961da177e4SLinus Torvalds 15971da177e4SLinus Torvalds /* Full snapshots are not usable */ 159876df1c65SAlasdair G Kergon /* To get here the table must be live so s->active is always set. */ 15991da177e4SLinus Torvalds if (!s->valid) 1600f6a80ea8SAlasdair G Kergon return -EIO; 16011da177e4SLinus Torvalds 16021da177e4SLinus Torvalds /* FIXME: should only take write lock if we need 16031da177e4SLinus Torvalds * to copy an exception */ 16041da177e4SLinus Torvalds down_write(&s->lock); 16051da177e4SLinus Torvalds 160676df1c65SAlasdair G Kergon if (!s->valid) { 160776df1c65SAlasdair G Kergon r = -EIO; 160876df1c65SAlasdair G Kergon goto out_unlock; 160976df1c65SAlasdair G Kergon } 161076df1c65SAlasdair G Kergon 16111da177e4SLinus Torvalds /* If the block is already remapped - use that, else remap it */ 16123510cb94SJon Brassow e = dm_lookup_exception(&s->complete, chunk); 16131da177e4SLinus Torvalds if (e) { 1614d74f81f8SMilan Broz remap_exception(s, e, bio, chunk); 161576df1c65SAlasdair G Kergon goto out_unlock; 161676df1c65SAlasdair G Kergon } 16171da177e4SLinus Torvalds 1618ba40a2aaSAlasdair G Kergon /* 1619ba40a2aaSAlasdair G Kergon * Write to snapshot - higher level takes care of RW/RO 1620ba40a2aaSAlasdair G Kergon * flags so we should only get this if we are 1621ba40a2aaSAlasdair G Kergon * writeable. 1622ba40a2aaSAlasdair G Kergon */ 1623ba40a2aaSAlasdair G Kergon if (bio_rw(bio) == WRITE) { 16242913808eSMikulas Patocka pe = __lookup_pending_exception(s, chunk); 16252913808eSMikulas Patocka if (!pe) { 1626c6621392SMikulas Patocka up_write(&s->lock); 1627c6621392SMikulas Patocka pe = alloc_pending_exception(s); 1628c6621392SMikulas Patocka down_write(&s->lock); 1629c6621392SMikulas Patocka 1630c6621392SMikulas Patocka if (!s->valid) { 1631c6621392SMikulas Patocka free_pending_exception(pe); 1632c6621392SMikulas Patocka r = -EIO; 1633c6621392SMikulas Patocka goto out_unlock; 1634c6621392SMikulas Patocka } 1635c6621392SMikulas Patocka 16363510cb94SJon Brassow e = dm_lookup_exception(&s->complete, chunk); 163735bf659bSMikulas Patocka if (e) { 163835bf659bSMikulas Patocka free_pending_exception(pe); 163935bf659bSMikulas Patocka remap_exception(s, e, bio, chunk); 164035bf659bSMikulas Patocka goto out_unlock; 164135bf659bSMikulas Patocka } 164235bf659bSMikulas Patocka 1643c6621392SMikulas Patocka pe = __find_pending_exception(s, pe, chunk); 16441da177e4SLinus Torvalds if (!pe) { 1645695368acSAlasdair G Kergon __invalidate_snapshot(s, -ENOMEM); 16461da177e4SLinus Torvalds r = -EIO; 164776df1c65SAlasdair G Kergon goto out_unlock; 164876df1c65SAlasdair G Kergon } 16492913808eSMikulas Patocka } 165076df1c65SAlasdair G Kergon 1651d74f81f8SMilan Broz remap_exception(s, &pe->e, bio, chunk); 16521da177e4SLinus Torvalds bio_list_add(&pe->snapshot_bios, bio); 16531da177e4SLinus Torvalds 1654d2a7ad29SKiyoshi Ueda r = DM_MAPIO_SUBMITTED; 1655ba40a2aaSAlasdair G Kergon 16561da177e4SLinus Torvalds if (!pe->started) { 16571da177e4SLinus Torvalds /* this is protected by snap->lock */ 16581da177e4SLinus Torvalds pe->started = 1; 165976df1c65SAlasdair G Kergon up_write(&s->lock); 166076df1c65SAlasdair G Kergon start_copy(pe); 1661ba40a2aaSAlasdair G Kergon goto out; 1662ba40a2aaSAlasdair G Kergon } 1663cd45daffSMikulas Patocka } else { 16641da177e4SLinus Torvalds bio->bi_bdev = s->origin->bdev; 1665cd45daffSMikulas Patocka map_context->ptr = track_chunk(s, chunk); 1666cd45daffSMikulas Patocka } 16671da177e4SLinus Torvalds 1668ba40a2aaSAlasdair G Kergon out_unlock: 1669ba40a2aaSAlasdair G Kergon up_write(&s->lock); 1670ba40a2aaSAlasdair G Kergon out: 16711da177e4SLinus Torvalds return r; 16721da177e4SLinus Torvalds } 16731da177e4SLinus Torvalds 16743452c2a1SMikulas Patocka /* 16753452c2a1SMikulas Patocka * A snapshot-merge target behaves like a combination of a snapshot 16763452c2a1SMikulas Patocka * target and a snapshot-origin target. It only generates new 16773452c2a1SMikulas Patocka * exceptions in other snapshots and not in the one that is being 16783452c2a1SMikulas Patocka * merged. 16793452c2a1SMikulas Patocka * 16803452c2a1SMikulas Patocka * For each chunk, if there is an existing exception, it is used to 16813452c2a1SMikulas Patocka * redirect I/O to the cow device. Otherwise I/O is sent to the origin, 16823452c2a1SMikulas Patocka * which in turn might generate exceptions in other snapshots. 16839fe86254SMikulas Patocka * If merging is currently taking place on the chunk in question, the 16849fe86254SMikulas Patocka * I/O is deferred by adding it to s->bios_queued_during_merge. 16853452c2a1SMikulas Patocka */ 16863452c2a1SMikulas Patocka static int snapshot_merge_map(struct dm_target *ti, struct bio *bio, 16873452c2a1SMikulas Patocka union map_info *map_context) 16883452c2a1SMikulas Patocka { 16893452c2a1SMikulas Patocka struct dm_exception *e; 16903452c2a1SMikulas Patocka struct dm_snapshot *s = ti->private; 16913452c2a1SMikulas Patocka int r = DM_MAPIO_REMAPPED; 16923452c2a1SMikulas Patocka chunk_t chunk; 16933452c2a1SMikulas Patocka 169410b8106aSMike Snitzer if (unlikely(bio_empty_barrier(bio))) { 169510b8106aSMike Snitzer if (!map_context->flush_request) 169610b8106aSMike Snitzer bio->bi_bdev = s->origin->bdev; 169710b8106aSMike Snitzer else 169810b8106aSMike Snitzer bio->bi_bdev = s->cow->bdev; 169910b8106aSMike Snitzer map_context->ptr = NULL; 170010b8106aSMike Snitzer return DM_MAPIO_REMAPPED; 170110b8106aSMike Snitzer } 170210b8106aSMike Snitzer 17033452c2a1SMikulas Patocka chunk = sector_to_chunk(s->store, bio->bi_sector); 17043452c2a1SMikulas Patocka 17059fe86254SMikulas Patocka down_write(&s->lock); 17063452c2a1SMikulas Patocka 1707d2fdb776SMikulas Patocka /* Full merging snapshots are redirected to the origin */ 1708d2fdb776SMikulas Patocka if (!s->valid) 1709d2fdb776SMikulas Patocka goto redirect_to_origin; 17103452c2a1SMikulas Patocka 17113452c2a1SMikulas Patocka /* If the block is already remapped - use that */ 17123452c2a1SMikulas Patocka e = dm_lookup_exception(&s->complete, chunk); 17133452c2a1SMikulas Patocka if (e) { 17149fe86254SMikulas Patocka /* Queue writes overlapping with chunks being merged */ 17159fe86254SMikulas Patocka if (bio_rw(bio) == WRITE && 17169fe86254SMikulas Patocka chunk >= s->first_merging_chunk && 17179fe86254SMikulas Patocka chunk < (s->first_merging_chunk + 17189fe86254SMikulas Patocka s->num_merging_chunks)) { 17199fe86254SMikulas Patocka bio->bi_bdev = s->origin->bdev; 17209fe86254SMikulas Patocka bio_list_add(&s->bios_queued_during_merge, bio); 17219fe86254SMikulas Patocka r = DM_MAPIO_SUBMITTED; 17229fe86254SMikulas Patocka goto out_unlock; 17239fe86254SMikulas Patocka } 172417aa0332SMikulas Patocka 17253452c2a1SMikulas Patocka remap_exception(s, e, bio, chunk); 172617aa0332SMikulas Patocka 172717aa0332SMikulas Patocka if (bio_rw(bio) == WRITE) 172817aa0332SMikulas Patocka map_context->ptr = track_chunk(s, chunk); 17293452c2a1SMikulas Patocka goto out_unlock; 17303452c2a1SMikulas Patocka } 17313452c2a1SMikulas Patocka 1732d2fdb776SMikulas Patocka redirect_to_origin: 17333452c2a1SMikulas Patocka bio->bi_bdev = s->origin->bdev; 17343452c2a1SMikulas Patocka 17353452c2a1SMikulas Patocka if (bio_rw(bio) == WRITE) { 17369fe86254SMikulas Patocka up_write(&s->lock); 17373452c2a1SMikulas Patocka return do_origin(s->origin, bio); 17383452c2a1SMikulas Patocka } 17393452c2a1SMikulas Patocka 17403452c2a1SMikulas Patocka out_unlock: 17419fe86254SMikulas Patocka up_write(&s->lock); 17423452c2a1SMikulas Patocka 17433452c2a1SMikulas Patocka return r; 17443452c2a1SMikulas Patocka } 17453452c2a1SMikulas Patocka 1746cd45daffSMikulas Patocka static int snapshot_end_io(struct dm_target *ti, struct bio *bio, 1747cd45daffSMikulas Patocka int error, union map_info *map_context) 1748cd45daffSMikulas Patocka { 1749cd45daffSMikulas Patocka struct dm_snapshot *s = ti->private; 1750cd45daffSMikulas Patocka struct dm_snap_tracked_chunk *c = map_context->ptr; 1751cd45daffSMikulas Patocka 1752cd45daffSMikulas Patocka if (c) 1753cd45daffSMikulas Patocka stop_tracking_chunk(s, c); 1754cd45daffSMikulas Patocka 1755cd45daffSMikulas Patocka return 0; 1756cd45daffSMikulas Patocka } 1757cd45daffSMikulas Patocka 17581e03f97eSMikulas Patocka static void snapshot_merge_presuspend(struct dm_target *ti) 17591e03f97eSMikulas Patocka { 17601e03f97eSMikulas Patocka struct dm_snapshot *s = ti->private; 17611e03f97eSMikulas Patocka 17621e03f97eSMikulas Patocka stop_merge(s); 17631e03f97eSMikulas Patocka } 17641e03f97eSMikulas Patocka 1765c26655caSMike Snitzer static void snapshot_postsuspend(struct dm_target *ti) 1766c26655caSMike Snitzer { 1767c26655caSMike Snitzer struct dm_snapshot *s = ti->private; 1768c26655caSMike Snitzer 1769c26655caSMike Snitzer down_write(&s->lock); 1770c26655caSMike Snitzer s->suspended = 1; 1771c26655caSMike Snitzer up_write(&s->lock); 1772c26655caSMike Snitzer } 1773c26655caSMike Snitzer 1774c1f0c183SMike Snitzer static int snapshot_preresume(struct dm_target *ti) 1775c1f0c183SMike Snitzer { 1776c1f0c183SMike Snitzer int r = 0; 1777c1f0c183SMike Snitzer struct dm_snapshot *s = ti->private; 1778c1f0c183SMike Snitzer struct dm_snapshot *snap_src = NULL, *snap_dest = NULL; 1779c1f0c183SMike Snitzer 1780c1f0c183SMike Snitzer down_read(&_origins_lock); 17819d3b15c4SMikulas Patocka (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL); 1782c1f0c183SMike Snitzer if (snap_src && snap_dest) { 1783c1f0c183SMike Snitzer down_read(&snap_src->lock); 1784c1f0c183SMike Snitzer if (s == snap_src) { 1785c1f0c183SMike Snitzer DMERR("Unable to resume snapshot source until " 1786c1f0c183SMike Snitzer "handover completes."); 1787c1f0c183SMike Snitzer r = -EINVAL; 1788c1f0c183SMike Snitzer } else if (!snap_src->suspended) { 1789c1f0c183SMike Snitzer DMERR("Unable to perform snapshot handover until " 1790c1f0c183SMike Snitzer "source is suspended."); 1791c1f0c183SMike Snitzer r = -EINVAL; 1792c1f0c183SMike Snitzer } 1793c1f0c183SMike Snitzer up_read(&snap_src->lock); 1794c1f0c183SMike Snitzer } 1795c1f0c183SMike Snitzer up_read(&_origins_lock); 1796c1f0c183SMike Snitzer 1797c1f0c183SMike Snitzer return r; 1798c1f0c183SMike Snitzer } 1799c1f0c183SMike Snitzer 18001da177e4SLinus Torvalds static void snapshot_resume(struct dm_target *ti) 18011da177e4SLinus Torvalds { 1802028867acSAlasdair G Kergon struct dm_snapshot *s = ti->private; 1803c1f0c183SMike Snitzer struct dm_snapshot *snap_src = NULL, *snap_dest = NULL; 1804c1f0c183SMike Snitzer 1805c1f0c183SMike Snitzer down_read(&_origins_lock); 18069d3b15c4SMikulas Patocka (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL); 1807c1f0c183SMike Snitzer if (snap_src && snap_dest) { 1808c1f0c183SMike Snitzer down_write(&snap_src->lock); 1809c1f0c183SMike Snitzer down_write_nested(&snap_dest->lock, SINGLE_DEPTH_NESTING); 1810c1f0c183SMike Snitzer __handover_exceptions(snap_src, snap_dest); 1811c1f0c183SMike Snitzer up_write(&snap_dest->lock); 1812c1f0c183SMike Snitzer up_write(&snap_src->lock); 1813c1f0c183SMike Snitzer } 1814c1f0c183SMike Snitzer up_read(&_origins_lock); 1815c1f0c183SMike Snitzer 1816c1f0c183SMike Snitzer /* Now we have correct chunk size, reregister */ 1817c1f0c183SMike Snitzer reregister_snapshot(s); 18181da177e4SLinus Torvalds 1819aa14edebSAlasdair G Kergon down_write(&s->lock); 1820aa14edebSAlasdair G Kergon s->active = 1; 1821c26655caSMike Snitzer s->suspended = 0; 1822aa14edebSAlasdair G Kergon up_write(&s->lock); 18231da177e4SLinus Torvalds } 18241da177e4SLinus Torvalds 18251e03f97eSMikulas Patocka static sector_t get_origin_minimum_chunksize(struct block_device *bdev) 18261e03f97eSMikulas Patocka { 18271e03f97eSMikulas Patocka sector_t min_chunksize; 18281e03f97eSMikulas Patocka 18291e03f97eSMikulas Patocka down_read(&_origins_lock); 18301e03f97eSMikulas Patocka min_chunksize = __minimum_chunk_size(__lookup_origin(bdev)); 18311e03f97eSMikulas Patocka up_read(&_origins_lock); 18321e03f97eSMikulas Patocka 18331e03f97eSMikulas Patocka return min_chunksize; 18341e03f97eSMikulas Patocka } 18351e03f97eSMikulas Patocka 18361e03f97eSMikulas Patocka static void snapshot_merge_resume(struct dm_target *ti) 18371e03f97eSMikulas Patocka { 18381e03f97eSMikulas Patocka struct dm_snapshot *s = ti->private; 18391e03f97eSMikulas Patocka 18401e03f97eSMikulas Patocka /* 18411e03f97eSMikulas Patocka * Handover exceptions from existing snapshot. 18421e03f97eSMikulas Patocka */ 18431e03f97eSMikulas Patocka snapshot_resume(ti); 18441e03f97eSMikulas Patocka 18451e03f97eSMikulas Patocka /* 18461e03f97eSMikulas Patocka * snapshot-merge acts as an origin, so set ti->split_io 18471e03f97eSMikulas Patocka */ 18481e03f97eSMikulas Patocka ti->split_io = get_origin_minimum_chunksize(s->origin->bdev); 18491e03f97eSMikulas Patocka 18501e03f97eSMikulas Patocka start_merge(s); 18511e03f97eSMikulas Patocka } 18521e03f97eSMikulas Patocka 18531da177e4SLinus Torvalds static int snapshot_status(struct dm_target *ti, status_type_t type, 18541da177e4SLinus Torvalds char *result, unsigned int maxlen) 18551da177e4SLinus Torvalds { 18562e4a31dfSJonathan Brassow unsigned sz = 0; 1857028867acSAlasdair G Kergon struct dm_snapshot *snap = ti->private; 18581da177e4SLinus Torvalds 18591da177e4SLinus Torvalds switch (type) { 18601da177e4SLinus Torvalds case STATUSTYPE_INFO: 186194e76572SMikulas Patocka 186294e76572SMikulas Patocka down_write(&snap->lock); 186394e76572SMikulas Patocka 18641da177e4SLinus Torvalds if (!snap->valid) 18652e4a31dfSJonathan Brassow DMEMIT("Invalid"); 1866d8ddb1cfSMike Snitzer else if (snap->merge_failed) 1867d8ddb1cfSMike Snitzer DMEMIT("Merge failed"); 18681da177e4SLinus Torvalds else { 1869985903bbSMike Snitzer if (snap->store->type->usage) { 1870985903bbSMike Snitzer sector_t total_sectors, sectors_allocated, 1871985903bbSMike Snitzer metadata_sectors; 1872985903bbSMike Snitzer snap->store->type->usage(snap->store, 1873985903bbSMike Snitzer &total_sectors, 1874985903bbSMike Snitzer §ors_allocated, 1875985903bbSMike Snitzer &metadata_sectors); 1876985903bbSMike Snitzer DMEMIT("%llu/%llu %llu", 1877985903bbSMike Snitzer (unsigned long long)sectors_allocated, 1878985903bbSMike Snitzer (unsigned long long)total_sectors, 1879985903bbSMike Snitzer (unsigned long long)metadata_sectors); 18801da177e4SLinus Torvalds } 18811da177e4SLinus Torvalds else 18822e4a31dfSJonathan Brassow DMEMIT("Unknown"); 18831da177e4SLinus Torvalds } 188494e76572SMikulas Patocka 188594e76572SMikulas Patocka up_write(&snap->lock); 188694e76572SMikulas Patocka 18871da177e4SLinus Torvalds break; 18881da177e4SLinus Torvalds 18891da177e4SLinus Torvalds case STATUSTYPE_TABLE: 18901da177e4SLinus Torvalds /* 18911da177e4SLinus Torvalds * kdevname returns a static pointer so we need 18921da177e4SLinus Torvalds * to make private copies if the output is to 18931da177e4SLinus Torvalds * make sense. 18941da177e4SLinus Torvalds */ 1895fc56f6fbSMike Snitzer DMEMIT("%s %s", snap->origin->name, snap->cow->name); 18961e302a92SJonathan Brassow snap->store->type->status(snap->store, type, result + sz, 18971e302a92SJonathan Brassow maxlen - sz); 18981da177e4SLinus Torvalds break; 18991da177e4SLinus Torvalds } 19001da177e4SLinus Torvalds 19011da177e4SLinus Torvalds return 0; 19021da177e4SLinus Torvalds } 19031da177e4SLinus Torvalds 19048811f46cSMike Snitzer static int snapshot_iterate_devices(struct dm_target *ti, 19058811f46cSMike Snitzer iterate_devices_callout_fn fn, void *data) 19068811f46cSMike Snitzer { 19078811f46cSMike Snitzer struct dm_snapshot *snap = ti->private; 19081e5554c8SMikulas Patocka int r; 19098811f46cSMike Snitzer 19101e5554c8SMikulas Patocka r = fn(ti, snap->origin, 0, ti->len, data); 19111e5554c8SMikulas Patocka 19121e5554c8SMikulas Patocka if (!r) 19131e5554c8SMikulas Patocka r = fn(ti, snap->cow, 0, get_dev_size(snap->cow->bdev), data); 19141e5554c8SMikulas Patocka 19151e5554c8SMikulas Patocka return r; 19168811f46cSMike Snitzer } 19178811f46cSMike Snitzer 19188811f46cSMike Snitzer 19191da177e4SLinus Torvalds /*----------------------------------------------------------------- 19201da177e4SLinus Torvalds * Origin methods 19211da177e4SLinus Torvalds *---------------------------------------------------------------*/ 19229eaae8ffSMikulas Patocka 19239eaae8ffSMikulas Patocka /* 19249eaae8ffSMikulas Patocka * If no exceptions need creating, DM_MAPIO_REMAPPED is returned and any 19259eaae8ffSMikulas Patocka * supplied bio was ignored. The caller may submit it immediately. 19269eaae8ffSMikulas Patocka * (No remapping actually occurs as the origin is always a direct linear 19279eaae8ffSMikulas Patocka * map.) 19289eaae8ffSMikulas Patocka * 19299eaae8ffSMikulas Patocka * If further exceptions are required, DM_MAPIO_SUBMITTED is returned 19309eaae8ffSMikulas Patocka * and any supplied bio is added to a list to be submitted once all 19319eaae8ffSMikulas Patocka * the necessary exceptions exist. 19329eaae8ffSMikulas Patocka */ 19339eaae8ffSMikulas Patocka static int __origin_write(struct list_head *snapshots, sector_t sector, 19349eaae8ffSMikulas Patocka struct bio *bio) 19351da177e4SLinus Torvalds { 1936515ad66cSMikulas Patocka int r = DM_MAPIO_REMAPPED; 19371da177e4SLinus Torvalds struct dm_snapshot *snap; 19381d4989c8SJon Brassow struct dm_exception *e; 1939515ad66cSMikulas Patocka struct dm_snap_pending_exception *pe; 1940515ad66cSMikulas Patocka struct dm_snap_pending_exception *pe_to_start_now = NULL; 1941515ad66cSMikulas Patocka struct dm_snap_pending_exception *pe_to_start_last = NULL; 19421da177e4SLinus Torvalds chunk_t chunk; 19431da177e4SLinus Torvalds 19441da177e4SLinus Torvalds /* Do all the snapshots on this origin */ 19451da177e4SLinus Torvalds list_for_each_entry (snap, snapshots, list) { 19463452c2a1SMikulas Patocka /* 19473452c2a1SMikulas Patocka * Don't make new exceptions in a merging snapshot 19483452c2a1SMikulas Patocka * because it has effectively been deleted 19493452c2a1SMikulas Patocka */ 19503452c2a1SMikulas Patocka if (dm_target_is_snapshot_merge(snap->ti)) 19513452c2a1SMikulas Patocka continue; 19523452c2a1SMikulas Patocka 195376df1c65SAlasdair G Kergon down_write(&snap->lock); 195476df1c65SAlasdair G Kergon 1955aa14edebSAlasdair G Kergon /* Only deal with valid and active snapshots */ 1956aa14edebSAlasdair G Kergon if (!snap->valid || !snap->active) 195776df1c65SAlasdair G Kergon goto next_snapshot; 19581da177e4SLinus Torvalds 1959d5e404c1SAlasdair G Kergon /* Nothing to do if writing beyond end of snapshot */ 19609eaae8ffSMikulas Patocka if (sector >= dm_table_get_size(snap->ti->table)) 196176df1c65SAlasdair G Kergon goto next_snapshot; 19621da177e4SLinus Torvalds 19631da177e4SLinus Torvalds /* 19641da177e4SLinus Torvalds * Remember, different snapshots can have 19651da177e4SLinus Torvalds * different chunk sizes. 19661da177e4SLinus Torvalds */ 19679eaae8ffSMikulas Patocka chunk = sector_to_chunk(snap->store, sector); 19681da177e4SLinus Torvalds 19691da177e4SLinus Torvalds /* 19701da177e4SLinus Torvalds * Check exception table to see if block 19711da177e4SLinus Torvalds * is already remapped in this snapshot 19721da177e4SLinus Torvalds * and trigger an exception if not. 19731da177e4SLinus Torvalds */ 19743510cb94SJon Brassow e = dm_lookup_exception(&snap->complete, chunk); 197576df1c65SAlasdair G Kergon if (e) 197676df1c65SAlasdair G Kergon goto next_snapshot; 197776df1c65SAlasdair G Kergon 19782913808eSMikulas Patocka pe = __lookup_pending_exception(snap, chunk); 19792913808eSMikulas Patocka if (!pe) { 1980c6621392SMikulas Patocka up_write(&snap->lock); 1981c6621392SMikulas Patocka pe = alloc_pending_exception(snap); 1982c6621392SMikulas Patocka down_write(&snap->lock); 1983c6621392SMikulas Patocka 1984c6621392SMikulas Patocka if (!snap->valid) { 1985c6621392SMikulas Patocka free_pending_exception(pe); 1986c6621392SMikulas Patocka goto next_snapshot; 1987c6621392SMikulas Patocka } 1988c6621392SMikulas Patocka 19893510cb94SJon Brassow e = dm_lookup_exception(&snap->complete, chunk); 199035bf659bSMikulas Patocka if (e) { 199135bf659bSMikulas Patocka free_pending_exception(pe); 199235bf659bSMikulas Patocka goto next_snapshot; 199335bf659bSMikulas Patocka } 199435bf659bSMikulas Patocka 1995c6621392SMikulas Patocka pe = __find_pending_exception(snap, pe, chunk); 19961da177e4SLinus Torvalds if (!pe) { 1997695368acSAlasdair G Kergon __invalidate_snapshot(snap, -ENOMEM); 199876df1c65SAlasdair G Kergon goto next_snapshot; 199976df1c65SAlasdair G Kergon } 20002913808eSMikulas Patocka } 20011da177e4SLinus Torvalds 2002d2a7ad29SKiyoshi Ueda r = DM_MAPIO_SUBMITTED; 200376df1c65SAlasdair G Kergon 2004515ad66cSMikulas Patocka /* 2005515ad66cSMikulas Patocka * If an origin bio was supplied, queue it to wait for the 2006515ad66cSMikulas Patocka * completion of this exception, and start this one last, 2007515ad66cSMikulas Patocka * at the end of the function. 2008515ad66cSMikulas Patocka */ 2009515ad66cSMikulas Patocka if (bio) { 2010515ad66cSMikulas Patocka bio_list_add(&pe->origin_bios, bio); 2011515ad66cSMikulas Patocka bio = NULL; 2012515ad66cSMikulas Patocka 2013515ad66cSMikulas Patocka if (!pe->started) { 2014515ad66cSMikulas Patocka pe->started = 1; 2015515ad66cSMikulas Patocka pe_to_start_last = pe; 2016515ad66cSMikulas Patocka } 2017b4b610f6SAlasdair G Kergon } 201876df1c65SAlasdair G Kergon 2019eccf0817SAlasdair G Kergon if (!pe->started) { 2020eccf0817SAlasdair G Kergon pe->started = 1; 2021515ad66cSMikulas Patocka pe_to_start_now = pe; 2022eccf0817SAlasdair G Kergon } 20231da177e4SLinus Torvalds 202476df1c65SAlasdair G Kergon next_snapshot: 20251da177e4SLinus Torvalds up_write(&snap->lock); 2026515ad66cSMikulas Patocka 2027515ad66cSMikulas Patocka if (pe_to_start_now) { 2028515ad66cSMikulas Patocka start_copy(pe_to_start_now); 2029515ad66cSMikulas Patocka pe_to_start_now = NULL; 20301da177e4SLinus Torvalds } 2031b4b610f6SAlasdair G Kergon } 2032b4b610f6SAlasdair G Kergon 20331da177e4SLinus Torvalds /* 2034515ad66cSMikulas Patocka * Submit the exception against which the bio is queued last, 2035515ad66cSMikulas Patocka * to give the other exceptions a head start. 20361da177e4SLinus Torvalds */ 2037515ad66cSMikulas Patocka if (pe_to_start_last) 2038515ad66cSMikulas Patocka start_copy(pe_to_start_last); 20391da177e4SLinus Torvalds 20401da177e4SLinus Torvalds return r; 20411da177e4SLinus Torvalds } 20421da177e4SLinus Torvalds 20431da177e4SLinus Torvalds /* 20441da177e4SLinus Torvalds * Called on a write from the origin driver. 20451da177e4SLinus Torvalds */ 20461da177e4SLinus Torvalds static int do_origin(struct dm_dev *origin, struct bio *bio) 20471da177e4SLinus Torvalds { 20481da177e4SLinus Torvalds struct origin *o; 2049d2a7ad29SKiyoshi Ueda int r = DM_MAPIO_REMAPPED; 20501da177e4SLinus Torvalds 20511da177e4SLinus Torvalds down_read(&_origins_lock); 20521da177e4SLinus Torvalds o = __lookup_origin(origin->bdev); 20531da177e4SLinus Torvalds if (o) 20549eaae8ffSMikulas Patocka r = __origin_write(&o->snapshots, bio->bi_sector, bio); 20551da177e4SLinus Torvalds up_read(&_origins_lock); 20561da177e4SLinus Torvalds 20571da177e4SLinus Torvalds return r; 20581da177e4SLinus Torvalds } 20591da177e4SLinus Torvalds 20601da177e4SLinus Torvalds /* 206173dfd078SMikulas Patocka * Trigger exceptions in all non-merging snapshots. 206273dfd078SMikulas Patocka * 206373dfd078SMikulas Patocka * The chunk size of the merging snapshot may be larger than the chunk 206473dfd078SMikulas Patocka * size of some other snapshot so we may need to reallocate multiple 206573dfd078SMikulas Patocka * chunks in other snapshots. 206673dfd078SMikulas Patocka * 206773dfd078SMikulas Patocka * We scan all the overlapping exceptions in the other snapshots. 206873dfd078SMikulas Patocka * Returns 1 if anything was reallocated and must be waited for, 206973dfd078SMikulas Patocka * otherwise returns 0. 207073dfd078SMikulas Patocka * 207173dfd078SMikulas Patocka * size must be a multiple of merging_snap's chunk_size. 207273dfd078SMikulas Patocka */ 207373dfd078SMikulas Patocka static int origin_write_extent(struct dm_snapshot *merging_snap, 207473dfd078SMikulas Patocka sector_t sector, unsigned size) 207573dfd078SMikulas Patocka { 207673dfd078SMikulas Patocka int must_wait = 0; 207773dfd078SMikulas Patocka sector_t n; 207873dfd078SMikulas Patocka struct origin *o; 207973dfd078SMikulas Patocka 208073dfd078SMikulas Patocka /* 208173dfd078SMikulas Patocka * The origin's __minimum_chunk_size() got stored in split_io 208273dfd078SMikulas Patocka * by snapshot_merge_resume(). 208373dfd078SMikulas Patocka */ 208473dfd078SMikulas Patocka down_read(&_origins_lock); 208573dfd078SMikulas Patocka o = __lookup_origin(merging_snap->origin->bdev); 208673dfd078SMikulas Patocka for (n = 0; n < size; n += merging_snap->ti->split_io) 208773dfd078SMikulas Patocka if (__origin_write(&o->snapshots, sector + n, NULL) == 208873dfd078SMikulas Patocka DM_MAPIO_SUBMITTED) 208973dfd078SMikulas Patocka must_wait = 1; 209073dfd078SMikulas Patocka up_read(&_origins_lock); 209173dfd078SMikulas Patocka 209273dfd078SMikulas Patocka return must_wait; 209373dfd078SMikulas Patocka } 209473dfd078SMikulas Patocka 209573dfd078SMikulas Patocka /* 20961da177e4SLinus Torvalds * Origin: maps a linear range of a device, with hooks for snapshotting. 20971da177e4SLinus Torvalds */ 20981da177e4SLinus Torvalds 20991da177e4SLinus Torvalds /* 21001da177e4SLinus Torvalds * Construct an origin mapping: <dev_path> 21011da177e4SLinus Torvalds * The context for an origin is merely a 'struct dm_dev *' 21021da177e4SLinus Torvalds * pointing to the real device. 21031da177e4SLinus Torvalds */ 21041da177e4SLinus Torvalds static int origin_ctr(struct dm_target *ti, unsigned int argc, char **argv) 21051da177e4SLinus Torvalds { 21061da177e4SLinus Torvalds int r; 21071da177e4SLinus Torvalds struct dm_dev *dev; 21081da177e4SLinus Torvalds 21091da177e4SLinus Torvalds if (argc != 1) { 211072d94861SAlasdair G Kergon ti->error = "origin: incorrect number of arguments"; 21111da177e4SLinus Torvalds return -EINVAL; 21121da177e4SLinus Torvalds } 21131da177e4SLinus Torvalds 21148215d6ecSNikanth Karthikesan r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &dev); 21151da177e4SLinus Torvalds if (r) { 21161da177e4SLinus Torvalds ti->error = "Cannot get target device"; 21171da177e4SLinus Torvalds return r; 21181da177e4SLinus Torvalds } 21191da177e4SLinus Torvalds 21201da177e4SLinus Torvalds ti->private = dev; 2121494b3ee7SMikulas Patocka ti->num_flush_requests = 1; 2122494b3ee7SMikulas Patocka 21231da177e4SLinus Torvalds return 0; 21241da177e4SLinus Torvalds } 21251da177e4SLinus Torvalds 21261da177e4SLinus Torvalds static void origin_dtr(struct dm_target *ti) 21271da177e4SLinus Torvalds { 2128028867acSAlasdair G Kergon struct dm_dev *dev = ti->private; 21291da177e4SLinus Torvalds dm_put_device(ti, dev); 21301da177e4SLinus Torvalds } 21311da177e4SLinus Torvalds 21321da177e4SLinus Torvalds static int origin_map(struct dm_target *ti, struct bio *bio, 21331da177e4SLinus Torvalds union map_info *map_context) 21341da177e4SLinus Torvalds { 2135028867acSAlasdair G Kergon struct dm_dev *dev = ti->private; 21361da177e4SLinus Torvalds bio->bi_bdev = dev->bdev; 21371da177e4SLinus Torvalds 2138494b3ee7SMikulas Patocka if (unlikely(bio_empty_barrier(bio))) 2139494b3ee7SMikulas Patocka return DM_MAPIO_REMAPPED; 2140494b3ee7SMikulas Patocka 21411da177e4SLinus Torvalds /* Only tell snapshots if this is a write */ 2142d2a7ad29SKiyoshi Ueda return (bio_rw(bio) == WRITE) ? do_origin(dev, bio) : DM_MAPIO_REMAPPED; 21431da177e4SLinus Torvalds } 21441da177e4SLinus Torvalds 21451da177e4SLinus Torvalds /* 21461da177e4SLinus Torvalds * Set the target "split_io" field to the minimum of all the snapshots' 21471da177e4SLinus Torvalds * chunk sizes. 21481da177e4SLinus Torvalds */ 21491da177e4SLinus Torvalds static void origin_resume(struct dm_target *ti) 21501da177e4SLinus Torvalds { 2151028867acSAlasdair G Kergon struct dm_dev *dev = ti->private; 21521da177e4SLinus Torvalds 21531e03f97eSMikulas Patocka ti->split_io = get_origin_minimum_chunksize(dev->bdev); 21541da177e4SLinus Torvalds } 21551da177e4SLinus Torvalds 21561da177e4SLinus Torvalds static int origin_status(struct dm_target *ti, status_type_t type, char *result, 21571da177e4SLinus Torvalds unsigned int maxlen) 21581da177e4SLinus Torvalds { 2159028867acSAlasdair G Kergon struct dm_dev *dev = ti->private; 21601da177e4SLinus Torvalds 21611da177e4SLinus Torvalds switch (type) { 21621da177e4SLinus Torvalds case STATUSTYPE_INFO: 21631da177e4SLinus Torvalds result[0] = '\0'; 21641da177e4SLinus Torvalds break; 21651da177e4SLinus Torvalds 21661da177e4SLinus Torvalds case STATUSTYPE_TABLE: 21671da177e4SLinus Torvalds snprintf(result, maxlen, "%s", dev->name); 21681da177e4SLinus Torvalds break; 21691da177e4SLinus Torvalds } 21701da177e4SLinus Torvalds 21711da177e4SLinus Torvalds return 0; 21721da177e4SLinus Torvalds } 21731da177e4SLinus Torvalds 21748811f46cSMike Snitzer static int origin_iterate_devices(struct dm_target *ti, 21758811f46cSMike Snitzer iterate_devices_callout_fn fn, void *data) 21768811f46cSMike Snitzer { 21778811f46cSMike Snitzer struct dm_dev *dev = ti->private; 21788811f46cSMike Snitzer 21798811f46cSMike Snitzer return fn(ti, dev, 0, ti->len, data); 21808811f46cSMike Snitzer } 21818811f46cSMike Snitzer 21821da177e4SLinus Torvalds static struct target_type origin_target = { 21831da177e4SLinus Torvalds .name = "snapshot-origin", 21848811f46cSMike Snitzer .version = {1, 7, 0}, 21851da177e4SLinus Torvalds .module = THIS_MODULE, 21861da177e4SLinus Torvalds .ctr = origin_ctr, 21871da177e4SLinus Torvalds .dtr = origin_dtr, 21881da177e4SLinus Torvalds .map = origin_map, 21891da177e4SLinus Torvalds .resume = origin_resume, 21901da177e4SLinus Torvalds .status = origin_status, 21918811f46cSMike Snitzer .iterate_devices = origin_iterate_devices, 21921da177e4SLinus Torvalds }; 21931da177e4SLinus Torvalds 21941da177e4SLinus Torvalds static struct target_type snapshot_target = { 21951da177e4SLinus Torvalds .name = "snapshot", 2196c26655caSMike Snitzer .version = {1, 9, 0}, 21971da177e4SLinus Torvalds .module = THIS_MODULE, 21981da177e4SLinus Torvalds .ctr = snapshot_ctr, 21991da177e4SLinus Torvalds .dtr = snapshot_dtr, 22001da177e4SLinus Torvalds .map = snapshot_map, 2201cd45daffSMikulas Patocka .end_io = snapshot_end_io, 2202c26655caSMike Snitzer .postsuspend = snapshot_postsuspend, 2203c1f0c183SMike Snitzer .preresume = snapshot_preresume, 22041da177e4SLinus Torvalds .resume = snapshot_resume, 22051da177e4SLinus Torvalds .status = snapshot_status, 22068811f46cSMike Snitzer .iterate_devices = snapshot_iterate_devices, 22071da177e4SLinus Torvalds }; 22081da177e4SLinus Torvalds 2209d698aa45SMikulas Patocka static struct target_type merge_target = { 2210d698aa45SMikulas Patocka .name = dm_snapshot_merge_target_name, 2211d698aa45SMikulas Patocka .version = {1, 0, 0}, 2212d698aa45SMikulas Patocka .module = THIS_MODULE, 2213d698aa45SMikulas Patocka .ctr = snapshot_ctr, 2214d698aa45SMikulas Patocka .dtr = snapshot_dtr, 22153452c2a1SMikulas Patocka .map = snapshot_merge_map, 2216d698aa45SMikulas Patocka .end_io = snapshot_end_io, 22171e03f97eSMikulas Patocka .presuspend = snapshot_merge_presuspend, 2218d698aa45SMikulas Patocka .postsuspend = snapshot_postsuspend, 2219d698aa45SMikulas Patocka .preresume = snapshot_preresume, 22201e03f97eSMikulas Patocka .resume = snapshot_merge_resume, 2221d698aa45SMikulas Patocka .status = snapshot_status, 2222d698aa45SMikulas Patocka .iterate_devices = snapshot_iterate_devices, 2223d698aa45SMikulas Patocka }; 2224d698aa45SMikulas Patocka 22251da177e4SLinus Torvalds static int __init dm_snapshot_init(void) 22261da177e4SLinus Torvalds { 22271da177e4SLinus Torvalds int r; 22281da177e4SLinus Torvalds 22294db6bfe0SAlasdair G Kergon r = dm_exception_store_init(); 22304db6bfe0SAlasdair G Kergon if (r) { 22314db6bfe0SAlasdair G Kergon DMERR("Failed to initialize exception stores"); 22324db6bfe0SAlasdair G Kergon return r; 22334db6bfe0SAlasdair G Kergon } 22344db6bfe0SAlasdair G Kergon 22351da177e4SLinus Torvalds r = dm_register_target(&snapshot_target); 2236d698aa45SMikulas Patocka if (r < 0) { 22371da177e4SLinus Torvalds DMERR("snapshot target register failed %d", r); 2238034a186dSJonathan Brassow goto bad_register_snapshot_target; 22391da177e4SLinus Torvalds } 22401da177e4SLinus Torvalds 22411da177e4SLinus Torvalds r = dm_register_target(&origin_target); 22421da177e4SLinus Torvalds if (r < 0) { 224372d94861SAlasdair G Kergon DMERR("Origin target register failed %d", r); 2244d698aa45SMikulas Patocka goto bad_register_origin_target; 2245d698aa45SMikulas Patocka } 2246d698aa45SMikulas Patocka 2247d698aa45SMikulas Patocka r = dm_register_target(&merge_target); 2248d698aa45SMikulas Patocka if (r < 0) { 2249d698aa45SMikulas Patocka DMERR("Merge target register failed %d", r); 2250d698aa45SMikulas Patocka goto bad_register_merge_target; 22511da177e4SLinus Torvalds } 22521da177e4SLinus Torvalds 22531da177e4SLinus Torvalds r = init_origin_hash(); 22541da177e4SLinus Torvalds if (r) { 22551da177e4SLinus Torvalds DMERR("init_origin_hash failed."); 2256d698aa45SMikulas Patocka goto bad_origin_hash; 22571da177e4SLinus Torvalds } 22581da177e4SLinus Torvalds 22591d4989c8SJon Brassow exception_cache = KMEM_CACHE(dm_exception, 0); 22601da177e4SLinus Torvalds if (!exception_cache) { 22611da177e4SLinus Torvalds DMERR("Couldn't create exception cache."); 22621da177e4SLinus Torvalds r = -ENOMEM; 2263d698aa45SMikulas Patocka goto bad_exception_cache; 22641da177e4SLinus Torvalds } 22651da177e4SLinus Torvalds 2266028867acSAlasdair G Kergon pending_cache = KMEM_CACHE(dm_snap_pending_exception, 0); 22671da177e4SLinus Torvalds if (!pending_cache) { 22681da177e4SLinus Torvalds DMERR("Couldn't create pending cache."); 22691da177e4SLinus Torvalds r = -ENOMEM; 2270d698aa45SMikulas Patocka goto bad_pending_cache; 22711da177e4SLinus Torvalds } 22721da177e4SLinus Torvalds 2273cd45daffSMikulas Patocka tracked_chunk_cache = KMEM_CACHE(dm_snap_tracked_chunk, 0); 2274cd45daffSMikulas Patocka if (!tracked_chunk_cache) { 2275cd45daffSMikulas Patocka DMERR("Couldn't create cache to track chunks in use."); 2276cd45daffSMikulas Patocka r = -ENOMEM; 2277d698aa45SMikulas Patocka goto bad_tracked_chunk_cache; 2278cd45daffSMikulas Patocka } 2279cd45daffSMikulas Patocka 2280ca3a931fSAlasdair G Kergon ksnapd = create_singlethread_workqueue("ksnapd"); 2281ca3a931fSAlasdair G Kergon if (!ksnapd) { 2282ca3a931fSAlasdair G Kergon DMERR("Failed to create ksnapd workqueue."); 2283ca3a931fSAlasdair G Kergon r = -ENOMEM; 228492e86812SMikulas Patocka goto bad_pending_pool; 2285ca3a931fSAlasdair G Kergon } 2286ca3a931fSAlasdair G Kergon 22871da177e4SLinus Torvalds return 0; 22881da177e4SLinus Torvalds 2289cd45daffSMikulas Patocka bad_pending_pool: 2290cd45daffSMikulas Patocka kmem_cache_destroy(tracked_chunk_cache); 2291d698aa45SMikulas Patocka bad_tracked_chunk_cache: 22921da177e4SLinus Torvalds kmem_cache_destroy(pending_cache); 2293d698aa45SMikulas Patocka bad_pending_cache: 22941da177e4SLinus Torvalds kmem_cache_destroy(exception_cache); 2295d698aa45SMikulas Patocka bad_exception_cache: 22961da177e4SLinus Torvalds exit_origin_hash(); 2297d698aa45SMikulas Patocka bad_origin_hash: 2298d698aa45SMikulas Patocka dm_unregister_target(&merge_target); 2299d698aa45SMikulas Patocka bad_register_merge_target: 23001da177e4SLinus Torvalds dm_unregister_target(&origin_target); 2301d698aa45SMikulas Patocka bad_register_origin_target: 23021da177e4SLinus Torvalds dm_unregister_target(&snapshot_target); 2303034a186dSJonathan Brassow bad_register_snapshot_target: 2304034a186dSJonathan Brassow dm_exception_store_exit(); 2305d698aa45SMikulas Patocka 23061da177e4SLinus Torvalds return r; 23071da177e4SLinus Torvalds } 23081da177e4SLinus Torvalds 23091da177e4SLinus Torvalds static void __exit dm_snapshot_exit(void) 23101da177e4SLinus Torvalds { 2311ca3a931fSAlasdair G Kergon destroy_workqueue(ksnapd); 2312ca3a931fSAlasdair G Kergon 231310d3bd09SMikulas Patocka dm_unregister_target(&snapshot_target); 231410d3bd09SMikulas Patocka dm_unregister_target(&origin_target); 2315d698aa45SMikulas Patocka dm_unregister_target(&merge_target); 23161da177e4SLinus Torvalds 23171da177e4SLinus Torvalds exit_origin_hash(); 23181da177e4SLinus Torvalds kmem_cache_destroy(pending_cache); 23191da177e4SLinus Torvalds kmem_cache_destroy(exception_cache); 2320cd45daffSMikulas Patocka kmem_cache_destroy(tracked_chunk_cache); 23214db6bfe0SAlasdair G Kergon 23224db6bfe0SAlasdair G Kergon dm_exception_store_exit(); 23231da177e4SLinus Torvalds } 23241da177e4SLinus Torvalds 23251da177e4SLinus Torvalds /* Module hooks */ 23261da177e4SLinus Torvalds module_init(dm_snapshot_init); 23271da177e4SLinus Torvalds module_exit(dm_snapshot_exit); 23281da177e4SLinus Torvalds 23291da177e4SLinus Torvalds MODULE_DESCRIPTION(DM_NAME " snapshot target"); 23301da177e4SLinus Torvalds MODULE_AUTHOR("Joe Thornber"); 23311da177e4SLinus Torvalds MODULE_LICENSE("GPL"); 2332