11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * dm-snapshot.c 31da177e4SLinus Torvalds * 41da177e4SLinus Torvalds * Copyright (C) 2001-2002 Sistina Software (UK) Limited. 51da177e4SLinus Torvalds * 61da177e4SLinus Torvalds * This file is released under the GPL. 71da177e4SLinus Torvalds */ 81da177e4SLinus Torvalds 91da177e4SLinus Torvalds #include <linux/blkdev.h> 101da177e4SLinus Torvalds #include <linux/device-mapper.h> 1190fa1527SMikulas Patocka #include <linux/delay.h> 121da177e4SLinus Torvalds #include <linux/fs.h> 131da177e4SLinus Torvalds #include <linux/init.h> 141da177e4SLinus Torvalds #include <linux/kdev_t.h> 151da177e4SLinus Torvalds #include <linux/list.h> 161da177e4SLinus Torvalds #include <linux/mempool.h> 171da177e4SLinus Torvalds #include <linux/module.h> 181da177e4SLinus Torvalds #include <linux/slab.h> 191da177e4SLinus Torvalds #include <linux/vmalloc.h> 206f3c3f0aSvignesh babu #include <linux/log2.h> 21a765e20eSAlasdair G Kergon #include <linux/dm-kcopyd.h> 22ccc45ea8SJonathan Brassow #include <linux/workqueue.h> 231da177e4SLinus Torvalds 24aea53d92SJonathan Brassow #include "dm-exception-store.h" 251da177e4SLinus Torvalds 2672d94861SAlasdair G Kergon #define DM_MSG_PREFIX "snapshots" 2772d94861SAlasdair G Kergon 28d698aa45SMikulas Patocka static const char dm_snapshot_merge_target_name[] = "snapshot-merge"; 29d698aa45SMikulas Patocka 30d698aa45SMikulas Patocka #define dm_target_is_snapshot_merge(ti) \ 31d698aa45SMikulas Patocka ((ti)->type->name == dm_snapshot_merge_target_name) 32d698aa45SMikulas Patocka 331da177e4SLinus Torvalds /* 341da177e4SLinus Torvalds * The percentage increment we will wake up users at 351da177e4SLinus Torvalds */ 361da177e4SLinus Torvalds #define WAKE_UP_PERCENT 5 371da177e4SLinus Torvalds 381da177e4SLinus Torvalds /* 391da177e4SLinus Torvalds * kcopyd priority of snapshot operations 401da177e4SLinus Torvalds */ 411da177e4SLinus Torvalds #define SNAPSHOT_COPY_PRIORITY 2 421da177e4SLinus Torvalds 431da177e4SLinus Torvalds /* 448ee2767aSMilan Broz * Reserve 1MB for each snapshot initially (with minimum of 1 page). 451da177e4SLinus Torvalds */ 468ee2767aSMilan Broz #define SNAPSHOT_PAGES (((1UL << 20) >> PAGE_SHIFT) ? : 1) 471da177e4SLinus Torvalds 48cd45daffSMikulas Patocka /* 49cd45daffSMikulas Patocka * The size of the mempool used to track chunks in use. 50cd45daffSMikulas Patocka */ 51cd45daffSMikulas Patocka #define MIN_IOS 256 52cd45daffSMikulas Patocka 53ccc45ea8SJonathan Brassow #define DM_TRACKED_CHUNK_HASH_SIZE 16 54ccc45ea8SJonathan Brassow #define DM_TRACKED_CHUNK_HASH(x) ((unsigned long)(x) & \ 55ccc45ea8SJonathan Brassow (DM_TRACKED_CHUNK_HASH_SIZE - 1)) 56ccc45ea8SJonathan Brassow 57191437a5SJon Brassow struct dm_exception_table { 58ccc45ea8SJonathan Brassow uint32_t hash_mask; 59ccc45ea8SJonathan Brassow unsigned hash_shift; 60ccc45ea8SJonathan Brassow struct list_head *table; 61ccc45ea8SJonathan Brassow }; 62ccc45ea8SJonathan Brassow 63ccc45ea8SJonathan Brassow struct dm_snapshot { 64ccc45ea8SJonathan Brassow struct rw_semaphore lock; 65ccc45ea8SJonathan Brassow 66ccc45ea8SJonathan Brassow struct dm_dev *origin; 67fc56f6fbSMike Snitzer struct dm_dev *cow; 68fc56f6fbSMike Snitzer 69fc56f6fbSMike Snitzer struct dm_target *ti; 70ccc45ea8SJonathan Brassow 71ccc45ea8SJonathan Brassow /* List of snapshots per Origin */ 72ccc45ea8SJonathan Brassow struct list_head list; 73ccc45ea8SJonathan Brassow 74ccc45ea8SJonathan Brassow /* You can't use a snapshot if this is 0 (e.g. if full) */ 75ccc45ea8SJonathan Brassow int valid; 76ccc45ea8SJonathan Brassow 77ccc45ea8SJonathan Brassow /* Origin writes don't trigger exceptions until this is set */ 78ccc45ea8SJonathan Brassow int active; 79ccc45ea8SJonathan Brassow 80c26655caSMike Snitzer /* Whether or not owning mapped_device is suspended */ 81c26655caSMike Snitzer int suspended; 82c26655caSMike Snitzer 83ccc45ea8SJonathan Brassow mempool_t *pending_pool; 84ccc45ea8SJonathan Brassow 85ccc45ea8SJonathan Brassow atomic_t pending_exceptions_count; 86ccc45ea8SJonathan Brassow 87191437a5SJon Brassow struct dm_exception_table pending; 88191437a5SJon Brassow struct dm_exception_table complete; 89ccc45ea8SJonathan Brassow 90ccc45ea8SJonathan Brassow /* 91ccc45ea8SJonathan Brassow * pe_lock protects all pending_exception operations and access 92ccc45ea8SJonathan Brassow * as well as the snapshot_bios list. 93ccc45ea8SJonathan Brassow */ 94ccc45ea8SJonathan Brassow spinlock_t pe_lock; 95ccc45ea8SJonathan Brassow 96ccc45ea8SJonathan Brassow /* The on disk metadata handler */ 97ccc45ea8SJonathan Brassow struct dm_exception_store *store; 98ccc45ea8SJonathan Brassow 99ccc45ea8SJonathan Brassow struct dm_kcopyd_client *kcopyd_client; 100ccc45ea8SJonathan Brassow 101ccc45ea8SJonathan Brassow /* Queue of snapshot writes for ksnapd to flush */ 102ccc45ea8SJonathan Brassow struct bio_list queued_bios; 103ccc45ea8SJonathan Brassow struct work_struct queued_bios_work; 104ccc45ea8SJonathan Brassow 105ccc45ea8SJonathan Brassow /* Chunks with outstanding reads */ 106ccc45ea8SJonathan Brassow mempool_t *tracked_chunk_pool; 107ccc45ea8SJonathan Brassow spinlock_t tracked_chunk_lock; 108ccc45ea8SJonathan Brassow struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE]; 109ccc45ea8SJonathan Brassow }; 110ccc45ea8SJonathan Brassow 111fc56f6fbSMike Snitzer struct dm_dev *dm_snap_cow(struct dm_snapshot *s) 112fc56f6fbSMike Snitzer { 113fc56f6fbSMike Snitzer return s->cow; 114fc56f6fbSMike Snitzer } 115fc56f6fbSMike Snitzer EXPORT_SYMBOL(dm_snap_cow); 116fc56f6fbSMike Snitzer 117c642f9e0SAdrian Bunk static struct workqueue_struct *ksnapd; 118c4028958SDavid Howells static void flush_queued_bios(struct work_struct *work); 119ca3a931fSAlasdair G Kergon 120ccc45ea8SJonathan Brassow static sector_t chunk_to_sector(struct dm_exception_store *store, 121ccc45ea8SJonathan Brassow chunk_t chunk) 122ccc45ea8SJonathan Brassow { 123ccc45ea8SJonathan Brassow return chunk << store->chunk_shift; 124ccc45ea8SJonathan Brassow } 125ccc45ea8SJonathan Brassow 126ccc45ea8SJonathan Brassow static int bdev_equal(struct block_device *lhs, struct block_device *rhs) 127ccc45ea8SJonathan Brassow { 128ccc45ea8SJonathan Brassow /* 129ccc45ea8SJonathan Brassow * There is only ever one instance of a particular block 130ccc45ea8SJonathan Brassow * device so we can compare pointers safely. 131ccc45ea8SJonathan Brassow */ 132ccc45ea8SJonathan Brassow return lhs == rhs; 133ccc45ea8SJonathan Brassow } 134ccc45ea8SJonathan Brassow 135028867acSAlasdair G Kergon struct dm_snap_pending_exception { 1361d4989c8SJon Brassow struct dm_exception e; 1371da177e4SLinus Torvalds 1381da177e4SLinus Torvalds /* 1391da177e4SLinus Torvalds * Origin buffers waiting for this to complete are held 1401da177e4SLinus Torvalds * in a bio list 1411da177e4SLinus Torvalds */ 1421da177e4SLinus Torvalds struct bio_list origin_bios; 1431da177e4SLinus Torvalds struct bio_list snapshot_bios; 1441da177e4SLinus Torvalds 1451da177e4SLinus Torvalds /* Pointer back to snapshot context */ 1461da177e4SLinus Torvalds struct dm_snapshot *snap; 1471da177e4SLinus Torvalds 1481da177e4SLinus Torvalds /* 1491da177e4SLinus Torvalds * 1 indicates the exception has already been sent to 1501da177e4SLinus Torvalds * kcopyd. 1511da177e4SLinus Torvalds */ 1521da177e4SLinus Torvalds int started; 1531da177e4SLinus Torvalds }; 1541da177e4SLinus Torvalds 1551da177e4SLinus Torvalds /* 1561da177e4SLinus Torvalds * Hash table mapping origin volumes to lists of snapshots and 1571da177e4SLinus Torvalds * a lock to protect it 1581da177e4SLinus Torvalds */ 159e18b890bSChristoph Lameter static struct kmem_cache *exception_cache; 160e18b890bSChristoph Lameter static struct kmem_cache *pending_cache; 1611da177e4SLinus Torvalds 162cd45daffSMikulas Patocka struct dm_snap_tracked_chunk { 163cd45daffSMikulas Patocka struct hlist_node node; 164cd45daffSMikulas Patocka chunk_t chunk; 165cd45daffSMikulas Patocka }; 166cd45daffSMikulas Patocka 167cd45daffSMikulas Patocka static struct kmem_cache *tracked_chunk_cache; 168cd45daffSMikulas Patocka 169cd45daffSMikulas Patocka static struct dm_snap_tracked_chunk *track_chunk(struct dm_snapshot *s, 170cd45daffSMikulas Patocka chunk_t chunk) 171cd45daffSMikulas Patocka { 172cd45daffSMikulas Patocka struct dm_snap_tracked_chunk *c = mempool_alloc(s->tracked_chunk_pool, 173cd45daffSMikulas Patocka GFP_NOIO); 174cd45daffSMikulas Patocka unsigned long flags; 175cd45daffSMikulas Patocka 176cd45daffSMikulas Patocka c->chunk = chunk; 177cd45daffSMikulas Patocka 178cd45daffSMikulas Patocka spin_lock_irqsave(&s->tracked_chunk_lock, flags); 179cd45daffSMikulas Patocka hlist_add_head(&c->node, 180cd45daffSMikulas Patocka &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)]); 181cd45daffSMikulas Patocka spin_unlock_irqrestore(&s->tracked_chunk_lock, flags); 182cd45daffSMikulas Patocka 183cd45daffSMikulas Patocka return c; 184cd45daffSMikulas Patocka } 185cd45daffSMikulas Patocka 186cd45daffSMikulas Patocka static void stop_tracking_chunk(struct dm_snapshot *s, 187cd45daffSMikulas Patocka struct dm_snap_tracked_chunk *c) 188cd45daffSMikulas Patocka { 189cd45daffSMikulas Patocka unsigned long flags; 190cd45daffSMikulas Patocka 191cd45daffSMikulas Patocka spin_lock_irqsave(&s->tracked_chunk_lock, flags); 192cd45daffSMikulas Patocka hlist_del(&c->node); 193cd45daffSMikulas Patocka spin_unlock_irqrestore(&s->tracked_chunk_lock, flags); 194cd45daffSMikulas Patocka 195cd45daffSMikulas Patocka mempool_free(c, s->tracked_chunk_pool); 196cd45daffSMikulas Patocka } 197cd45daffSMikulas Patocka 198a8d41b59SMikulas Patocka static int __chunk_is_tracked(struct dm_snapshot *s, chunk_t chunk) 199a8d41b59SMikulas Patocka { 200a8d41b59SMikulas Patocka struct dm_snap_tracked_chunk *c; 201a8d41b59SMikulas Patocka struct hlist_node *hn; 202a8d41b59SMikulas Patocka int found = 0; 203a8d41b59SMikulas Patocka 204a8d41b59SMikulas Patocka spin_lock_irq(&s->tracked_chunk_lock); 205a8d41b59SMikulas Patocka 206a8d41b59SMikulas Patocka hlist_for_each_entry(c, hn, 207a8d41b59SMikulas Patocka &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)], node) { 208a8d41b59SMikulas Patocka if (c->chunk == chunk) { 209a8d41b59SMikulas Patocka found = 1; 210a8d41b59SMikulas Patocka break; 211a8d41b59SMikulas Patocka } 212a8d41b59SMikulas Patocka } 213a8d41b59SMikulas Patocka 214a8d41b59SMikulas Patocka spin_unlock_irq(&s->tracked_chunk_lock); 215a8d41b59SMikulas Patocka 216a8d41b59SMikulas Patocka return found; 217a8d41b59SMikulas Patocka } 218a8d41b59SMikulas Patocka 2191da177e4SLinus Torvalds /* 220615d1eb9SMike Snitzer * This conflicting I/O is extremely improbable in the caller, 221615d1eb9SMike Snitzer * so msleep(1) is sufficient and there is no need for a wait queue. 222615d1eb9SMike Snitzer */ 223615d1eb9SMike Snitzer static void __check_for_conflicting_io(struct dm_snapshot *s, chunk_t chunk) 224615d1eb9SMike Snitzer { 225615d1eb9SMike Snitzer while (__chunk_is_tracked(s, chunk)) 226615d1eb9SMike Snitzer msleep(1); 227615d1eb9SMike Snitzer } 228615d1eb9SMike Snitzer 229615d1eb9SMike Snitzer /* 2301da177e4SLinus Torvalds * One of these per registered origin, held in the snapshot_origins hash 2311da177e4SLinus Torvalds */ 2321da177e4SLinus Torvalds struct origin { 2331da177e4SLinus Torvalds /* The origin device */ 2341da177e4SLinus Torvalds struct block_device *bdev; 2351da177e4SLinus Torvalds 2361da177e4SLinus Torvalds struct list_head hash_list; 2371da177e4SLinus Torvalds 2381da177e4SLinus Torvalds /* List of snapshots for this origin */ 2391da177e4SLinus Torvalds struct list_head snapshots; 2401da177e4SLinus Torvalds }; 2411da177e4SLinus Torvalds 2421da177e4SLinus Torvalds /* 2431da177e4SLinus Torvalds * Size of the hash table for origin volumes. If we make this 2441da177e4SLinus Torvalds * the size of the minors list then it should be nearly perfect 2451da177e4SLinus Torvalds */ 2461da177e4SLinus Torvalds #define ORIGIN_HASH_SIZE 256 2471da177e4SLinus Torvalds #define ORIGIN_MASK 0xFF 2481da177e4SLinus Torvalds static struct list_head *_origins; 2491da177e4SLinus Torvalds static struct rw_semaphore _origins_lock; 2501da177e4SLinus Torvalds 2511da177e4SLinus Torvalds static int init_origin_hash(void) 2521da177e4SLinus Torvalds { 2531da177e4SLinus Torvalds int i; 2541da177e4SLinus Torvalds 2551da177e4SLinus Torvalds _origins = kmalloc(ORIGIN_HASH_SIZE * sizeof(struct list_head), 2561da177e4SLinus Torvalds GFP_KERNEL); 2571da177e4SLinus Torvalds if (!_origins) { 25872d94861SAlasdair G Kergon DMERR("unable to allocate memory"); 2591da177e4SLinus Torvalds return -ENOMEM; 2601da177e4SLinus Torvalds } 2611da177e4SLinus Torvalds 2621da177e4SLinus Torvalds for (i = 0; i < ORIGIN_HASH_SIZE; i++) 2631da177e4SLinus Torvalds INIT_LIST_HEAD(_origins + i); 2641da177e4SLinus Torvalds init_rwsem(&_origins_lock); 2651da177e4SLinus Torvalds 2661da177e4SLinus Torvalds return 0; 2671da177e4SLinus Torvalds } 2681da177e4SLinus Torvalds 2691da177e4SLinus Torvalds static void exit_origin_hash(void) 2701da177e4SLinus Torvalds { 2711da177e4SLinus Torvalds kfree(_origins); 2721da177e4SLinus Torvalds } 2731da177e4SLinus Torvalds 274028867acSAlasdair G Kergon static unsigned origin_hash(struct block_device *bdev) 2751da177e4SLinus Torvalds { 2761da177e4SLinus Torvalds return bdev->bd_dev & ORIGIN_MASK; 2771da177e4SLinus Torvalds } 2781da177e4SLinus Torvalds 2791da177e4SLinus Torvalds static struct origin *__lookup_origin(struct block_device *origin) 2801da177e4SLinus Torvalds { 2811da177e4SLinus Torvalds struct list_head *ol; 2821da177e4SLinus Torvalds struct origin *o; 2831da177e4SLinus Torvalds 2841da177e4SLinus Torvalds ol = &_origins[origin_hash(origin)]; 2851da177e4SLinus Torvalds list_for_each_entry (o, ol, hash_list) 2861da177e4SLinus Torvalds if (bdev_equal(o->bdev, origin)) 2871da177e4SLinus Torvalds return o; 2881da177e4SLinus Torvalds 2891da177e4SLinus Torvalds return NULL; 2901da177e4SLinus Torvalds } 2911da177e4SLinus Torvalds 2921da177e4SLinus Torvalds static void __insert_origin(struct origin *o) 2931da177e4SLinus Torvalds { 2941da177e4SLinus Torvalds struct list_head *sl = &_origins[origin_hash(o->bdev)]; 2951da177e4SLinus Torvalds list_add_tail(&o->hash_list, sl); 2961da177e4SLinus Torvalds } 2971da177e4SLinus Torvalds 2981da177e4SLinus Torvalds /* 299c1f0c183SMike Snitzer * _origins_lock must be held when calling this function. 300c1f0c183SMike Snitzer * Returns number of snapshots registered using the supplied cow device, plus: 301c1f0c183SMike Snitzer * snap_src - a snapshot suitable for use as a source of exception handover 302c1f0c183SMike Snitzer * snap_dest - a snapshot capable of receiving exception handover. 3039d3b15c4SMikulas Patocka * snap_merge - an existing snapshot-merge target linked to the same origin. 3049d3b15c4SMikulas Patocka * There can be at most one snapshot-merge target. The parameter is optional. 305c1f0c183SMike Snitzer * 3069d3b15c4SMikulas Patocka * Possible return values and states of snap_src and snap_dest. 307c1f0c183SMike Snitzer * 0: NULL, NULL - first new snapshot 308c1f0c183SMike Snitzer * 1: snap_src, NULL - normal snapshot 309c1f0c183SMike Snitzer * 2: snap_src, snap_dest - waiting for handover 310c1f0c183SMike Snitzer * 2: snap_src, NULL - handed over, waiting for old to be deleted 311c1f0c183SMike Snitzer * 1: NULL, snap_dest - source got destroyed without handover 312c1f0c183SMike Snitzer */ 313c1f0c183SMike Snitzer static int __find_snapshots_sharing_cow(struct dm_snapshot *snap, 314c1f0c183SMike Snitzer struct dm_snapshot **snap_src, 3159d3b15c4SMikulas Patocka struct dm_snapshot **snap_dest, 3169d3b15c4SMikulas Patocka struct dm_snapshot **snap_merge) 317c1f0c183SMike Snitzer { 318c1f0c183SMike Snitzer struct dm_snapshot *s; 319c1f0c183SMike Snitzer struct origin *o; 320c1f0c183SMike Snitzer int count = 0; 321c1f0c183SMike Snitzer int active; 322c1f0c183SMike Snitzer 323c1f0c183SMike Snitzer o = __lookup_origin(snap->origin->bdev); 324c1f0c183SMike Snitzer if (!o) 325c1f0c183SMike Snitzer goto out; 326c1f0c183SMike Snitzer 327c1f0c183SMike Snitzer list_for_each_entry(s, &o->snapshots, list) { 3289d3b15c4SMikulas Patocka if (dm_target_is_snapshot_merge(s->ti) && snap_merge) 3299d3b15c4SMikulas Patocka *snap_merge = s; 330c1f0c183SMike Snitzer if (!bdev_equal(s->cow->bdev, snap->cow->bdev)) 331c1f0c183SMike Snitzer continue; 332c1f0c183SMike Snitzer 333c1f0c183SMike Snitzer down_read(&s->lock); 334c1f0c183SMike Snitzer active = s->active; 335c1f0c183SMike Snitzer up_read(&s->lock); 336c1f0c183SMike Snitzer 337c1f0c183SMike Snitzer if (active) { 338c1f0c183SMike Snitzer if (snap_src) 339c1f0c183SMike Snitzer *snap_src = s; 340c1f0c183SMike Snitzer } else if (snap_dest) 341c1f0c183SMike Snitzer *snap_dest = s; 342c1f0c183SMike Snitzer 343c1f0c183SMike Snitzer count++; 344c1f0c183SMike Snitzer } 345c1f0c183SMike Snitzer 346c1f0c183SMike Snitzer out: 347c1f0c183SMike Snitzer return count; 348c1f0c183SMike Snitzer } 349c1f0c183SMike Snitzer 350c1f0c183SMike Snitzer /* 351c1f0c183SMike Snitzer * On success, returns 1 if this snapshot is a handover destination, 352c1f0c183SMike Snitzer * otherwise returns 0. 353c1f0c183SMike Snitzer */ 354c1f0c183SMike Snitzer static int __validate_exception_handover(struct dm_snapshot *snap) 355c1f0c183SMike Snitzer { 356c1f0c183SMike Snitzer struct dm_snapshot *snap_src = NULL, *snap_dest = NULL; 3579d3b15c4SMikulas Patocka struct dm_snapshot *snap_merge = NULL; 358c1f0c183SMike Snitzer 359c1f0c183SMike Snitzer /* Does snapshot need exceptions handed over to it? */ 3609d3b15c4SMikulas Patocka if ((__find_snapshots_sharing_cow(snap, &snap_src, &snap_dest, 3619d3b15c4SMikulas Patocka &snap_merge) == 2) || 362c1f0c183SMike Snitzer snap_dest) { 363c1f0c183SMike Snitzer snap->ti->error = "Snapshot cow pairing for exception " 364c1f0c183SMike Snitzer "table handover failed"; 365c1f0c183SMike Snitzer return -EINVAL; 366c1f0c183SMike Snitzer } 367c1f0c183SMike Snitzer 368c1f0c183SMike Snitzer /* 369c1f0c183SMike Snitzer * If no snap_src was found, snap cannot become a handover 370c1f0c183SMike Snitzer * destination. 371c1f0c183SMike Snitzer */ 372c1f0c183SMike Snitzer if (!snap_src) 373c1f0c183SMike Snitzer return 0; 374c1f0c183SMike Snitzer 3759d3b15c4SMikulas Patocka /* 3769d3b15c4SMikulas Patocka * Non-snapshot-merge handover? 3779d3b15c4SMikulas Patocka */ 3789d3b15c4SMikulas Patocka if (!dm_target_is_snapshot_merge(snap->ti)) 3799d3b15c4SMikulas Patocka return 1; 3809d3b15c4SMikulas Patocka 3819d3b15c4SMikulas Patocka /* 3829d3b15c4SMikulas Patocka * Do not allow more than one merging snapshot. 3839d3b15c4SMikulas Patocka */ 3849d3b15c4SMikulas Patocka if (snap_merge) { 3859d3b15c4SMikulas Patocka snap->ti->error = "A snapshot is already merging."; 3869d3b15c4SMikulas Patocka return -EINVAL; 3879d3b15c4SMikulas Patocka } 3889d3b15c4SMikulas Patocka 389c1f0c183SMike Snitzer return 1; 390c1f0c183SMike Snitzer } 391c1f0c183SMike Snitzer 392c1f0c183SMike Snitzer static void __insert_snapshot(struct origin *o, struct dm_snapshot *s) 393c1f0c183SMike Snitzer { 394c1f0c183SMike Snitzer struct dm_snapshot *l; 395c1f0c183SMike Snitzer 396c1f0c183SMike Snitzer /* Sort the list according to chunk size, largest-first smallest-last */ 397c1f0c183SMike Snitzer list_for_each_entry(l, &o->snapshots, list) 398c1f0c183SMike Snitzer if (l->store->chunk_size < s->store->chunk_size) 399c1f0c183SMike Snitzer break; 400c1f0c183SMike Snitzer list_add_tail(&s->list, &l->list); 401c1f0c183SMike Snitzer } 402c1f0c183SMike Snitzer 403c1f0c183SMike Snitzer /* 4041da177e4SLinus Torvalds * Make a note of the snapshot and its origin so we can look it 4051da177e4SLinus Torvalds * up when the origin has a write on it. 406c1f0c183SMike Snitzer * 407c1f0c183SMike Snitzer * Also validate snapshot exception store handovers. 408c1f0c183SMike Snitzer * On success, returns 1 if this registration is a handover destination, 409c1f0c183SMike Snitzer * otherwise returns 0. 4101da177e4SLinus Torvalds */ 4111da177e4SLinus Torvalds static int register_snapshot(struct dm_snapshot *snap) 4121da177e4SLinus Torvalds { 413c1f0c183SMike Snitzer struct origin *o, *new_o = NULL; 4141da177e4SLinus Torvalds struct block_device *bdev = snap->origin->bdev; 415c1f0c183SMike Snitzer int r = 0; 4161da177e4SLinus Torvalds 41760c856c8SMikulas Patocka new_o = kmalloc(sizeof(*new_o), GFP_KERNEL); 41860c856c8SMikulas Patocka if (!new_o) 41960c856c8SMikulas Patocka return -ENOMEM; 42060c856c8SMikulas Patocka 4211da177e4SLinus Torvalds down_write(&_origins_lock); 4221da177e4SLinus Torvalds 423c1f0c183SMike Snitzer r = __validate_exception_handover(snap); 424c1f0c183SMike Snitzer if (r < 0) { 425c1f0c183SMike Snitzer kfree(new_o); 426c1f0c183SMike Snitzer goto out; 427c1f0c183SMike Snitzer } 428c1f0c183SMike Snitzer 429c1f0c183SMike Snitzer o = __lookup_origin(bdev); 43060c856c8SMikulas Patocka if (o) 43160c856c8SMikulas Patocka kfree(new_o); 43260c856c8SMikulas Patocka else { 4331da177e4SLinus Torvalds /* New origin */ 43460c856c8SMikulas Patocka o = new_o; 4351da177e4SLinus Torvalds 4361da177e4SLinus Torvalds /* Initialise the struct */ 4371da177e4SLinus Torvalds INIT_LIST_HEAD(&o->snapshots); 4381da177e4SLinus Torvalds o->bdev = bdev; 4391da177e4SLinus Torvalds 4401da177e4SLinus Torvalds __insert_origin(o); 4411da177e4SLinus Torvalds } 4421da177e4SLinus Torvalds 443c1f0c183SMike Snitzer __insert_snapshot(o, snap); 444c1f0c183SMike Snitzer 445c1f0c183SMike Snitzer out: 446c1f0c183SMike Snitzer up_write(&_origins_lock); 447c1f0c183SMike Snitzer 448c1f0c183SMike Snitzer return r; 449c1f0c183SMike Snitzer } 450c1f0c183SMike Snitzer 451c1f0c183SMike Snitzer /* 452c1f0c183SMike Snitzer * Move snapshot to correct place in list according to chunk size. 453c1f0c183SMike Snitzer */ 454c1f0c183SMike Snitzer static void reregister_snapshot(struct dm_snapshot *s) 455c1f0c183SMike Snitzer { 456c1f0c183SMike Snitzer struct block_device *bdev = s->origin->bdev; 457c1f0c183SMike Snitzer 458c1f0c183SMike Snitzer down_write(&_origins_lock); 459c1f0c183SMike Snitzer 460c1f0c183SMike Snitzer list_del(&s->list); 461c1f0c183SMike Snitzer __insert_snapshot(__lookup_origin(bdev), s); 4621da177e4SLinus Torvalds 4631da177e4SLinus Torvalds up_write(&_origins_lock); 4641da177e4SLinus Torvalds } 4651da177e4SLinus Torvalds 4661da177e4SLinus Torvalds static void unregister_snapshot(struct dm_snapshot *s) 4671da177e4SLinus Torvalds { 4681da177e4SLinus Torvalds struct origin *o; 4691da177e4SLinus Torvalds 4701da177e4SLinus Torvalds down_write(&_origins_lock); 4711da177e4SLinus Torvalds o = __lookup_origin(s->origin->bdev); 4721da177e4SLinus Torvalds 4731da177e4SLinus Torvalds list_del(&s->list); 474c1f0c183SMike Snitzer if (o && list_empty(&o->snapshots)) { 4751da177e4SLinus Torvalds list_del(&o->hash_list); 4761da177e4SLinus Torvalds kfree(o); 4771da177e4SLinus Torvalds } 4781da177e4SLinus Torvalds 4791da177e4SLinus Torvalds up_write(&_origins_lock); 4801da177e4SLinus Torvalds } 4811da177e4SLinus Torvalds 4821da177e4SLinus Torvalds /* 4831da177e4SLinus Torvalds * Implementation of the exception hash tables. 484d74f81f8SMilan Broz * The lowest hash_shift bits of the chunk number are ignored, allowing 485d74f81f8SMilan Broz * some consecutive chunks to be grouped together. 4861da177e4SLinus Torvalds */ 4873510cb94SJon Brassow static int dm_exception_table_init(struct dm_exception_table *et, 4883510cb94SJon Brassow uint32_t size, unsigned hash_shift) 4891da177e4SLinus Torvalds { 4901da177e4SLinus Torvalds unsigned int i; 4911da177e4SLinus Torvalds 492d74f81f8SMilan Broz et->hash_shift = hash_shift; 4931da177e4SLinus Torvalds et->hash_mask = size - 1; 4941da177e4SLinus Torvalds et->table = dm_vcalloc(size, sizeof(struct list_head)); 4951da177e4SLinus Torvalds if (!et->table) 4961da177e4SLinus Torvalds return -ENOMEM; 4971da177e4SLinus Torvalds 4981da177e4SLinus Torvalds for (i = 0; i < size; i++) 4991da177e4SLinus Torvalds INIT_LIST_HEAD(et->table + i); 5001da177e4SLinus Torvalds 5011da177e4SLinus Torvalds return 0; 5021da177e4SLinus Torvalds } 5031da177e4SLinus Torvalds 5043510cb94SJon Brassow static void dm_exception_table_exit(struct dm_exception_table *et, 505191437a5SJon Brassow struct kmem_cache *mem) 5061da177e4SLinus Torvalds { 5071da177e4SLinus Torvalds struct list_head *slot; 5081d4989c8SJon Brassow struct dm_exception *ex, *next; 5091da177e4SLinus Torvalds int i, size; 5101da177e4SLinus Torvalds 5111da177e4SLinus Torvalds size = et->hash_mask + 1; 5121da177e4SLinus Torvalds for (i = 0; i < size; i++) { 5131da177e4SLinus Torvalds slot = et->table + i; 5141da177e4SLinus Torvalds 5151da177e4SLinus Torvalds list_for_each_entry_safe (ex, next, slot, hash_list) 5161da177e4SLinus Torvalds kmem_cache_free(mem, ex); 5171da177e4SLinus Torvalds } 5181da177e4SLinus Torvalds 5191da177e4SLinus Torvalds vfree(et->table); 5201da177e4SLinus Torvalds } 5211da177e4SLinus Torvalds 522191437a5SJon Brassow static uint32_t exception_hash(struct dm_exception_table *et, chunk_t chunk) 5231da177e4SLinus Torvalds { 524d74f81f8SMilan Broz return (chunk >> et->hash_shift) & et->hash_mask; 5251da177e4SLinus Torvalds } 5261da177e4SLinus Torvalds 5273510cb94SJon Brassow static void dm_remove_exception(struct dm_exception *e) 5281da177e4SLinus Torvalds { 5291da177e4SLinus Torvalds list_del(&e->hash_list); 5301da177e4SLinus Torvalds } 5311da177e4SLinus Torvalds 5321da177e4SLinus Torvalds /* 5331da177e4SLinus Torvalds * Return the exception data for a sector, or NULL if not 5341da177e4SLinus Torvalds * remapped. 5351da177e4SLinus Torvalds */ 5363510cb94SJon Brassow static struct dm_exception *dm_lookup_exception(struct dm_exception_table *et, 5371da177e4SLinus Torvalds chunk_t chunk) 5381da177e4SLinus Torvalds { 5391da177e4SLinus Torvalds struct list_head *slot; 5401d4989c8SJon Brassow struct dm_exception *e; 5411da177e4SLinus Torvalds 5421da177e4SLinus Torvalds slot = &et->table[exception_hash(et, chunk)]; 5431da177e4SLinus Torvalds list_for_each_entry (e, slot, hash_list) 544d74f81f8SMilan Broz if (chunk >= e->old_chunk && 545d74f81f8SMilan Broz chunk <= e->old_chunk + dm_consecutive_chunk_count(e)) 5461da177e4SLinus Torvalds return e; 5471da177e4SLinus Torvalds 5481da177e4SLinus Torvalds return NULL; 5491da177e4SLinus Torvalds } 5501da177e4SLinus Torvalds 5513510cb94SJon Brassow static struct dm_exception *alloc_completed_exception(void) 5521da177e4SLinus Torvalds { 5531d4989c8SJon Brassow struct dm_exception *e; 5541da177e4SLinus Torvalds 5551da177e4SLinus Torvalds e = kmem_cache_alloc(exception_cache, GFP_NOIO); 5561da177e4SLinus Torvalds if (!e) 5571da177e4SLinus Torvalds e = kmem_cache_alloc(exception_cache, GFP_ATOMIC); 5581da177e4SLinus Torvalds 5591da177e4SLinus Torvalds return e; 5601da177e4SLinus Torvalds } 5611da177e4SLinus Torvalds 5623510cb94SJon Brassow static void free_completed_exception(struct dm_exception *e) 5631da177e4SLinus Torvalds { 5641da177e4SLinus Torvalds kmem_cache_free(exception_cache, e); 5651da177e4SLinus Torvalds } 5661da177e4SLinus Torvalds 56792e86812SMikulas Patocka static struct dm_snap_pending_exception *alloc_pending_exception(struct dm_snapshot *s) 5681da177e4SLinus Torvalds { 56992e86812SMikulas Patocka struct dm_snap_pending_exception *pe = mempool_alloc(s->pending_pool, 57092e86812SMikulas Patocka GFP_NOIO); 57192e86812SMikulas Patocka 572879129d2SMikulas Patocka atomic_inc(&s->pending_exceptions_count); 57392e86812SMikulas Patocka pe->snap = s; 57492e86812SMikulas Patocka 57592e86812SMikulas Patocka return pe; 5761da177e4SLinus Torvalds } 5771da177e4SLinus Torvalds 578028867acSAlasdair G Kergon static void free_pending_exception(struct dm_snap_pending_exception *pe) 5791da177e4SLinus Torvalds { 580879129d2SMikulas Patocka struct dm_snapshot *s = pe->snap; 581879129d2SMikulas Patocka 582879129d2SMikulas Patocka mempool_free(pe, s->pending_pool); 583879129d2SMikulas Patocka smp_mb__before_atomic_dec(); 584879129d2SMikulas Patocka atomic_dec(&s->pending_exceptions_count); 5851da177e4SLinus Torvalds } 5861da177e4SLinus Torvalds 5873510cb94SJon Brassow static void dm_insert_exception(struct dm_exception_table *eh, 5881d4989c8SJon Brassow struct dm_exception *new_e) 589d74f81f8SMilan Broz { 590d74f81f8SMilan Broz struct list_head *l; 5911d4989c8SJon Brassow struct dm_exception *e = NULL; 592d74f81f8SMilan Broz 593d74f81f8SMilan Broz l = &eh->table[exception_hash(eh, new_e->old_chunk)]; 594d74f81f8SMilan Broz 595d74f81f8SMilan Broz /* Add immediately if this table doesn't support consecutive chunks */ 596d74f81f8SMilan Broz if (!eh->hash_shift) 597d74f81f8SMilan Broz goto out; 598d74f81f8SMilan Broz 599d74f81f8SMilan Broz /* List is ordered by old_chunk */ 600d74f81f8SMilan Broz list_for_each_entry_reverse(e, l, hash_list) { 601d74f81f8SMilan Broz /* Insert after an existing chunk? */ 602d74f81f8SMilan Broz if (new_e->old_chunk == (e->old_chunk + 603d74f81f8SMilan Broz dm_consecutive_chunk_count(e) + 1) && 604d74f81f8SMilan Broz new_e->new_chunk == (dm_chunk_number(e->new_chunk) + 605d74f81f8SMilan Broz dm_consecutive_chunk_count(e) + 1)) { 606d74f81f8SMilan Broz dm_consecutive_chunk_count_inc(e); 6073510cb94SJon Brassow free_completed_exception(new_e); 608d74f81f8SMilan Broz return; 609d74f81f8SMilan Broz } 610d74f81f8SMilan Broz 611d74f81f8SMilan Broz /* Insert before an existing chunk? */ 612d74f81f8SMilan Broz if (new_e->old_chunk == (e->old_chunk - 1) && 613d74f81f8SMilan Broz new_e->new_chunk == (dm_chunk_number(e->new_chunk) - 1)) { 614d74f81f8SMilan Broz dm_consecutive_chunk_count_inc(e); 615d74f81f8SMilan Broz e->old_chunk--; 616d74f81f8SMilan Broz e->new_chunk--; 6173510cb94SJon Brassow free_completed_exception(new_e); 618d74f81f8SMilan Broz return; 619d74f81f8SMilan Broz } 620d74f81f8SMilan Broz 621d74f81f8SMilan Broz if (new_e->old_chunk > e->old_chunk) 622d74f81f8SMilan Broz break; 623d74f81f8SMilan Broz } 624d74f81f8SMilan Broz 625d74f81f8SMilan Broz out: 626d74f81f8SMilan Broz list_add(&new_e->hash_list, e ? &e->hash_list : l); 627d74f81f8SMilan Broz } 628d74f81f8SMilan Broz 629a159c1acSJonathan Brassow /* 630a159c1acSJonathan Brassow * Callback used by the exception stores to load exceptions when 631a159c1acSJonathan Brassow * initialising. 632a159c1acSJonathan Brassow */ 633a159c1acSJonathan Brassow static int dm_add_exception(void *context, chunk_t old, chunk_t new) 6341da177e4SLinus Torvalds { 635a159c1acSJonathan Brassow struct dm_snapshot *s = context; 6361d4989c8SJon Brassow struct dm_exception *e; 6371da177e4SLinus Torvalds 6383510cb94SJon Brassow e = alloc_completed_exception(); 6391da177e4SLinus Torvalds if (!e) 6401da177e4SLinus Torvalds return -ENOMEM; 6411da177e4SLinus Torvalds 6421da177e4SLinus Torvalds e->old_chunk = old; 643d74f81f8SMilan Broz 644d74f81f8SMilan Broz /* Consecutive_count is implicitly initialised to zero */ 6451da177e4SLinus Torvalds e->new_chunk = new; 646d74f81f8SMilan Broz 6473510cb94SJon Brassow dm_insert_exception(&s->complete, e); 648d74f81f8SMilan Broz 6491da177e4SLinus Torvalds return 0; 6501da177e4SLinus Torvalds } 6511da177e4SLinus Torvalds 6527e201b35SMikulas Patocka #define min_not_zero(l, r) (((l) == 0) ? (r) : (((r) == 0) ? (l) : min(l, r))) 6537e201b35SMikulas Patocka 6547e201b35SMikulas Patocka /* 6557e201b35SMikulas Patocka * Return a minimum chunk size of all snapshots that have the specified origin. 6567e201b35SMikulas Patocka * Return zero if the origin has no snapshots. 6577e201b35SMikulas Patocka */ 6587e201b35SMikulas Patocka static sector_t __minimum_chunk_size(struct origin *o) 6597e201b35SMikulas Patocka { 6607e201b35SMikulas Patocka struct dm_snapshot *snap; 6617e201b35SMikulas Patocka unsigned chunk_size = 0; 6627e201b35SMikulas Patocka 6637e201b35SMikulas Patocka if (o) 6647e201b35SMikulas Patocka list_for_each_entry(snap, &o->snapshots, list) 6657e201b35SMikulas Patocka chunk_size = min_not_zero(chunk_size, 6667e201b35SMikulas Patocka snap->store->chunk_size); 6677e201b35SMikulas Patocka 6687e201b35SMikulas Patocka return chunk_size; 6697e201b35SMikulas Patocka } 6707e201b35SMikulas Patocka 6711da177e4SLinus Torvalds /* 6721da177e4SLinus Torvalds * Hard coded magic. 6731da177e4SLinus Torvalds */ 6741da177e4SLinus Torvalds static int calc_max_buckets(void) 6751da177e4SLinus Torvalds { 6761da177e4SLinus Torvalds /* use a fixed size of 2MB */ 6771da177e4SLinus Torvalds unsigned long mem = 2 * 1024 * 1024; 6781da177e4SLinus Torvalds mem /= sizeof(struct list_head); 6791da177e4SLinus Torvalds 6801da177e4SLinus Torvalds return mem; 6811da177e4SLinus Torvalds } 6821da177e4SLinus Torvalds 6831da177e4SLinus Torvalds /* 6841da177e4SLinus Torvalds * Allocate room for a suitable hash table. 6851da177e4SLinus Torvalds */ 686fee1998eSJonathan Brassow static int init_hash_tables(struct dm_snapshot *s) 6871da177e4SLinus Torvalds { 6881da177e4SLinus Torvalds sector_t hash_size, cow_dev_size, origin_dev_size, max_buckets; 6891da177e4SLinus Torvalds 6901da177e4SLinus Torvalds /* 6911da177e4SLinus Torvalds * Calculate based on the size of the original volume or 6921da177e4SLinus Torvalds * the COW volume... 6931da177e4SLinus Torvalds */ 694fc56f6fbSMike Snitzer cow_dev_size = get_dev_size(s->cow->bdev); 6951da177e4SLinus Torvalds origin_dev_size = get_dev_size(s->origin->bdev); 6961da177e4SLinus Torvalds max_buckets = calc_max_buckets(); 6971da177e4SLinus Torvalds 698fee1998eSJonathan Brassow hash_size = min(origin_dev_size, cow_dev_size) >> s->store->chunk_shift; 6991da177e4SLinus Torvalds hash_size = min(hash_size, max_buckets); 7001da177e4SLinus Torvalds 7018e87b9b8SMikulas Patocka if (hash_size < 64) 7028e87b9b8SMikulas Patocka hash_size = 64; 7038defd830SRobert P. J. Day hash_size = rounddown_pow_of_two(hash_size); 7043510cb94SJon Brassow if (dm_exception_table_init(&s->complete, hash_size, 705d74f81f8SMilan Broz DM_CHUNK_CONSECUTIVE_BITS)) 7061da177e4SLinus Torvalds return -ENOMEM; 7071da177e4SLinus Torvalds 7081da177e4SLinus Torvalds /* 7091da177e4SLinus Torvalds * Allocate hash table for in-flight exceptions 7101da177e4SLinus Torvalds * Make this smaller than the real hash table 7111da177e4SLinus Torvalds */ 7121da177e4SLinus Torvalds hash_size >>= 3; 7131da177e4SLinus Torvalds if (hash_size < 64) 7141da177e4SLinus Torvalds hash_size = 64; 7151da177e4SLinus Torvalds 7163510cb94SJon Brassow if (dm_exception_table_init(&s->pending, hash_size, 0)) { 7173510cb94SJon Brassow dm_exception_table_exit(&s->complete, exception_cache); 7181da177e4SLinus Torvalds return -ENOMEM; 7191da177e4SLinus Torvalds } 7201da177e4SLinus Torvalds 7211da177e4SLinus Torvalds return 0; 7221da177e4SLinus Torvalds } 7231da177e4SLinus Torvalds 7241da177e4SLinus Torvalds /* 7251da177e4SLinus Torvalds * Construct a snapshot mapping: <origin_dev> <COW-dev> <p/n> <chunk-size> 7261da177e4SLinus Torvalds */ 7271da177e4SLinus Torvalds static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) 7281da177e4SLinus Torvalds { 7291da177e4SLinus Torvalds struct dm_snapshot *s; 730cd45daffSMikulas Patocka int i; 7311da177e4SLinus Torvalds int r = -EINVAL; 732fc56f6fbSMike Snitzer char *origin_path, *cow_path; 73310b8106aSMike Snitzer unsigned args_used, num_flush_requests = 1; 73410b8106aSMike Snitzer fmode_t origin_mode = FMODE_READ; 7351da177e4SLinus Torvalds 7364c7e3bf4SMark McLoughlin if (argc != 4) { 73772d94861SAlasdair G Kergon ti->error = "requires exactly 4 arguments"; 7381da177e4SLinus Torvalds r = -EINVAL; 739fc56f6fbSMike Snitzer goto bad; 7401da177e4SLinus Torvalds } 7411da177e4SLinus Torvalds 74210b8106aSMike Snitzer if (dm_target_is_snapshot_merge(ti)) { 74310b8106aSMike Snitzer num_flush_requests = 2; 74410b8106aSMike Snitzer origin_mode = FMODE_WRITE; 74510b8106aSMike Snitzer } 74610b8106aSMike Snitzer 7471da177e4SLinus Torvalds origin_path = argv[0]; 748fee1998eSJonathan Brassow argv++; 749fee1998eSJonathan Brassow argc--; 7501da177e4SLinus Torvalds 7511da177e4SLinus Torvalds s = kmalloc(sizeof(*s), GFP_KERNEL); 752fee1998eSJonathan Brassow if (!s) { 7531da177e4SLinus Torvalds ti->error = "Cannot allocate snapshot context private " 7541da177e4SLinus Torvalds "structure"; 7551da177e4SLinus Torvalds r = -ENOMEM; 756fc56f6fbSMike Snitzer goto bad; 7571da177e4SLinus Torvalds } 7581da177e4SLinus Torvalds 759fc56f6fbSMike Snitzer cow_path = argv[0]; 760fc56f6fbSMike Snitzer argv++; 761fc56f6fbSMike Snitzer argc--; 762fc56f6fbSMike Snitzer 763fc56f6fbSMike Snitzer r = dm_get_device(ti, cow_path, 0, 0, 764fc56f6fbSMike Snitzer FMODE_READ | FMODE_WRITE, &s->cow); 765fc56f6fbSMike Snitzer if (r) { 766fc56f6fbSMike Snitzer ti->error = "Cannot get COW device"; 767fc56f6fbSMike Snitzer goto bad_cow; 768fc56f6fbSMike Snitzer } 769fc56f6fbSMike Snitzer 770fc56f6fbSMike Snitzer r = dm_exception_store_create(ti, argc, argv, s, &args_used, &s->store); 771fc56f6fbSMike Snitzer if (r) { 772fc56f6fbSMike Snitzer ti->error = "Couldn't create exception store"; 773fc56f6fbSMike Snitzer r = -EINVAL; 774fc56f6fbSMike Snitzer goto bad_store; 775fc56f6fbSMike Snitzer } 776fc56f6fbSMike Snitzer 777fc56f6fbSMike Snitzer argv += args_used; 778fc56f6fbSMike Snitzer argc -= args_used; 779fc56f6fbSMike Snitzer 78010b8106aSMike Snitzer r = dm_get_device(ti, origin_path, 0, ti->len, origin_mode, &s->origin); 7811da177e4SLinus Torvalds if (r) { 7821da177e4SLinus Torvalds ti->error = "Cannot get origin device"; 783fee1998eSJonathan Brassow goto bad_origin; 7841da177e4SLinus Torvalds } 7851da177e4SLinus Torvalds 786fc56f6fbSMike Snitzer s->ti = ti; 7871da177e4SLinus Torvalds s->valid = 1; 788aa14edebSAlasdair G Kergon s->active = 0; 789c26655caSMike Snitzer s->suspended = 0; 790879129d2SMikulas Patocka atomic_set(&s->pending_exceptions_count, 0); 7911da177e4SLinus Torvalds init_rwsem(&s->lock); 792c1f0c183SMike Snitzer INIT_LIST_HEAD(&s->list); 793ca3a931fSAlasdair G Kergon spin_lock_init(&s->pe_lock); 7941da177e4SLinus Torvalds 7951da177e4SLinus Torvalds /* Allocate hash table for COW data */ 796fee1998eSJonathan Brassow if (init_hash_tables(s)) { 7971da177e4SLinus Torvalds ti->error = "Unable to allocate hash table space"; 7981da177e4SLinus Torvalds r = -ENOMEM; 799fee1998eSJonathan Brassow goto bad_hash_tables; 8001da177e4SLinus Torvalds } 8011da177e4SLinus Torvalds 802eb69aca5SHeinz Mauelshagen r = dm_kcopyd_client_create(SNAPSHOT_PAGES, &s->kcopyd_client); 8031da177e4SLinus Torvalds if (r) { 8041da177e4SLinus Torvalds ti->error = "Could not create kcopyd client"; 805fee1998eSJonathan Brassow goto bad_kcopyd; 8061da177e4SLinus Torvalds } 8071da177e4SLinus Torvalds 80892e86812SMikulas Patocka s->pending_pool = mempool_create_slab_pool(MIN_IOS, pending_cache); 80992e86812SMikulas Patocka if (!s->pending_pool) { 81092e86812SMikulas Patocka ti->error = "Could not allocate mempool for pending exceptions"; 811fee1998eSJonathan Brassow goto bad_pending_pool; 81292e86812SMikulas Patocka } 81392e86812SMikulas Patocka 814cd45daffSMikulas Patocka s->tracked_chunk_pool = mempool_create_slab_pool(MIN_IOS, 815cd45daffSMikulas Patocka tracked_chunk_cache); 816cd45daffSMikulas Patocka if (!s->tracked_chunk_pool) { 817cd45daffSMikulas Patocka ti->error = "Could not allocate tracked_chunk mempool for " 818cd45daffSMikulas Patocka "tracking reads"; 81992e86812SMikulas Patocka goto bad_tracked_chunk_pool; 820cd45daffSMikulas Patocka } 821cd45daffSMikulas Patocka 822cd45daffSMikulas Patocka for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++) 823cd45daffSMikulas Patocka INIT_HLIST_HEAD(&s->tracked_chunk_hash[i]); 824cd45daffSMikulas Patocka 825cd45daffSMikulas Patocka spin_lock_init(&s->tracked_chunk_lock); 826cd45daffSMikulas Patocka 827c1f0c183SMike Snitzer bio_list_init(&s->queued_bios); 828c1f0c183SMike Snitzer INIT_WORK(&s->queued_bios_work, flush_queued_bios); 829c1f0c183SMike Snitzer 830c1f0c183SMike Snitzer ti->private = s; 83110b8106aSMike Snitzer ti->num_flush_requests = num_flush_requests; 832c1f0c183SMike Snitzer 833c1f0c183SMike Snitzer /* Add snapshot to the list of snapshots for this origin */ 834c1f0c183SMike Snitzer /* Exceptions aren't triggered till snapshot_resume() is called */ 835c1f0c183SMike Snitzer r = register_snapshot(s); 836c1f0c183SMike Snitzer if (r == -ENOMEM) { 837c1f0c183SMike Snitzer ti->error = "Snapshot origin struct allocation failed"; 838c1f0c183SMike Snitzer goto bad_load_and_register; 839c1f0c183SMike Snitzer } else if (r < 0) { 840c1f0c183SMike Snitzer /* invalid handover, register_snapshot has set ti->error */ 841c1f0c183SMike Snitzer goto bad_load_and_register; 842c1f0c183SMike Snitzer } 843c1f0c183SMike Snitzer 844c1f0c183SMike Snitzer /* 845c1f0c183SMike Snitzer * Metadata must only be loaded into one table at once, so skip this 846c1f0c183SMike Snitzer * if metadata will be handed over during resume. 847c1f0c183SMike Snitzer * Chunk size will be set during the handover - set it to zero to 848c1f0c183SMike Snitzer * ensure it's ignored. 849c1f0c183SMike Snitzer */ 850c1f0c183SMike Snitzer if (r > 0) { 851c1f0c183SMike Snitzer s->store->chunk_size = 0; 852c1f0c183SMike Snitzer return 0; 853c1f0c183SMike Snitzer } 854c1f0c183SMike Snitzer 855493df71cSJonathan Brassow r = s->store->type->read_metadata(s->store, dm_add_exception, 856493df71cSJonathan Brassow (void *)s); 8570764147bSMilan Broz if (r < 0) { 858f9cea4f7SMark McLoughlin ti->error = "Failed to read snapshot metadata"; 859c1f0c183SMike Snitzer goto bad_read_metadata; 8600764147bSMilan Broz } else if (r > 0) { 8610764147bSMilan Broz s->valid = 0; 8620764147bSMilan Broz DMWARN("Snapshot is marked invalid."); 863f9cea4f7SMark McLoughlin } 864aa14edebSAlasdair G Kergon 8653f2412dcSMikulas Patocka if (!s->store->chunk_size) { 8663f2412dcSMikulas Patocka ti->error = "Chunk size not set"; 867c1f0c183SMike Snitzer goto bad_read_metadata; 8683f2412dcSMikulas Patocka } 869d0216849SJonathan Brassow ti->split_io = s->store->chunk_size; 8701da177e4SLinus Torvalds 8711da177e4SLinus Torvalds return 0; 8721da177e4SLinus Torvalds 873c1f0c183SMike Snitzer bad_read_metadata: 874c1f0c183SMike Snitzer unregister_snapshot(s); 875c1f0c183SMike Snitzer 876cd45daffSMikulas Patocka bad_load_and_register: 877cd45daffSMikulas Patocka mempool_destroy(s->tracked_chunk_pool); 878cd45daffSMikulas Patocka 87992e86812SMikulas Patocka bad_tracked_chunk_pool: 88092e86812SMikulas Patocka mempool_destroy(s->pending_pool); 88192e86812SMikulas Patocka 882fee1998eSJonathan Brassow bad_pending_pool: 883eb69aca5SHeinz Mauelshagen dm_kcopyd_client_destroy(s->kcopyd_client); 8841da177e4SLinus Torvalds 885fee1998eSJonathan Brassow bad_kcopyd: 8863510cb94SJon Brassow dm_exception_table_exit(&s->pending, pending_cache); 8873510cb94SJon Brassow dm_exception_table_exit(&s->complete, exception_cache); 8881da177e4SLinus Torvalds 889fee1998eSJonathan Brassow bad_hash_tables: 8901da177e4SLinus Torvalds dm_put_device(ti, s->origin); 8911da177e4SLinus Torvalds 892fee1998eSJonathan Brassow bad_origin: 893fc56f6fbSMike Snitzer dm_exception_store_destroy(s->store); 894fc56f6fbSMike Snitzer 895fc56f6fbSMike Snitzer bad_store: 896fc56f6fbSMike Snitzer dm_put_device(ti, s->cow); 897fc56f6fbSMike Snitzer 898fc56f6fbSMike Snitzer bad_cow: 8991da177e4SLinus Torvalds kfree(s); 9001da177e4SLinus Torvalds 901fc56f6fbSMike Snitzer bad: 9021da177e4SLinus Torvalds return r; 9031da177e4SLinus Torvalds } 9041da177e4SLinus Torvalds 90531c93a0cSMilan Broz static void __free_exceptions(struct dm_snapshot *s) 90631c93a0cSMilan Broz { 907eb69aca5SHeinz Mauelshagen dm_kcopyd_client_destroy(s->kcopyd_client); 90831c93a0cSMilan Broz s->kcopyd_client = NULL; 90931c93a0cSMilan Broz 9103510cb94SJon Brassow dm_exception_table_exit(&s->pending, pending_cache); 9113510cb94SJon Brassow dm_exception_table_exit(&s->complete, exception_cache); 91231c93a0cSMilan Broz } 91331c93a0cSMilan Broz 914c1f0c183SMike Snitzer static void __handover_exceptions(struct dm_snapshot *snap_src, 915c1f0c183SMike Snitzer struct dm_snapshot *snap_dest) 916c1f0c183SMike Snitzer { 917c1f0c183SMike Snitzer union { 918c1f0c183SMike Snitzer struct dm_exception_table table_swap; 919c1f0c183SMike Snitzer struct dm_exception_store *store_swap; 920c1f0c183SMike Snitzer } u; 921c1f0c183SMike Snitzer 922c1f0c183SMike Snitzer /* 923c1f0c183SMike Snitzer * Swap all snapshot context information between the two instances. 924c1f0c183SMike Snitzer */ 925c1f0c183SMike Snitzer u.table_swap = snap_dest->complete; 926c1f0c183SMike Snitzer snap_dest->complete = snap_src->complete; 927c1f0c183SMike Snitzer snap_src->complete = u.table_swap; 928c1f0c183SMike Snitzer 929c1f0c183SMike Snitzer u.store_swap = snap_dest->store; 930c1f0c183SMike Snitzer snap_dest->store = snap_src->store; 931c1f0c183SMike Snitzer snap_src->store = u.store_swap; 932c1f0c183SMike Snitzer 933c1f0c183SMike Snitzer snap_dest->store->snap = snap_dest; 934c1f0c183SMike Snitzer snap_src->store->snap = snap_src; 935c1f0c183SMike Snitzer 936c1f0c183SMike Snitzer snap_dest->ti->split_io = snap_dest->store->chunk_size; 937c1f0c183SMike Snitzer snap_dest->valid = snap_src->valid; 938c1f0c183SMike Snitzer 939c1f0c183SMike Snitzer /* 940c1f0c183SMike Snitzer * Set source invalid to ensure it receives no further I/O. 941c1f0c183SMike Snitzer */ 942c1f0c183SMike Snitzer snap_src->valid = 0; 943c1f0c183SMike Snitzer } 944c1f0c183SMike Snitzer 9451da177e4SLinus Torvalds static void snapshot_dtr(struct dm_target *ti) 9461da177e4SLinus Torvalds { 947cd45daffSMikulas Patocka #ifdef CONFIG_DM_DEBUG 948cd45daffSMikulas Patocka int i; 949cd45daffSMikulas Patocka #endif 950028867acSAlasdair G Kergon struct dm_snapshot *s = ti->private; 951c1f0c183SMike Snitzer struct dm_snapshot *snap_src = NULL, *snap_dest = NULL; 9521da177e4SLinus Torvalds 953ca3a931fSAlasdair G Kergon flush_workqueue(ksnapd); 954ca3a931fSAlasdair G Kergon 955c1f0c183SMike Snitzer down_read(&_origins_lock); 956c1f0c183SMike Snitzer /* Check whether exception handover must be cancelled */ 9579d3b15c4SMikulas Patocka (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL); 958c1f0c183SMike Snitzer if (snap_src && snap_dest && (s == snap_src)) { 959c1f0c183SMike Snitzer down_write(&snap_dest->lock); 960c1f0c183SMike Snitzer snap_dest->valid = 0; 961c1f0c183SMike Snitzer up_write(&snap_dest->lock); 962c1f0c183SMike Snitzer DMERR("Cancelling snapshot handover."); 963c1f0c183SMike Snitzer } 964c1f0c183SMike Snitzer up_read(&_origins_lock); 965c1f0c183SMike Snitzer 966138728dcSAlasdair G Kergon /* Prevent further origin writes from using this snapshot. */ 967138728dcSAlasdair G Kergon /* After this returns there can be no new kcopyd jobs. */ 9681da177e4SLinus Torvalds unregister_snapshot(s); 9691da177e4SLinus Torvalds 970879129d2SMikulas Patocka while (atomic_read(&s->pending_exceptions_count)) 97190fa1527SMikulas Patocka msleep(1); 972879129d2SMikulas Patocka /* 973879129d2SMikulas Patocka * Ensure instructions in mempool_destroy aren't reordered 974879129d2SMikulas Patocka * before atomic_read. 975879129d2SMikulas Patocka */ 976879129d2SMikulas Patocka smp_mb(); 977879129d2SMikulas Patocka 978cd45daffSMikulas Patocka #ifdef CONFIG_DM_DEBUG 979cd45daffSMikulas Patocka for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++) 980cd45daffSMikulas Patocka BUG_ON(!hlist_empty(&s->tracked_chunk_hash[i])); 981cd45daffSMikulas Patocka #endif 982cd45daffSMikulas Patocka 983cd45daffSMikulas Patocka mempool_destroy(s->tracked_chunk_pool); 984cd45daffSMikulas Patocka 98531c93a0cSMilan Broz __free_exceptions(s); 9861da177e4SLinus Torvalds 98792e86812SMikulas Patocka mempool_destroy(s->pending_pool); 98892e86812SMikulas Patocka 9891da177e4SLinus Torvalds dm_put_device(ti, s->origin); 990fee1998eSJonathan Brassow 991fee1998eSJonathan Brassow dm_exception_store_destroy(s->store); 992138728dcSAlasdair G Kergon 993fc56f6fbSMike Snitzer dm_put_device(ti, s->cow); 994fc56f6fbSMike Snitzer 9951da177e4SLinus Torvalds kfree(s); 9961da177e4SLinus Torvalds } 9971da177e4SLinus Torvalds 9981da177e4SLinus Torvalds /* 9991da177e4SLinus Torvalds * Flush a list of buffers. 10001da177e4SLinus Torvalds */ 10011da177e4SLinus Torvalds static void flush_bios(struct bio *bio) 10021da177e4SLinus Torvalds { 10031da177e4SLinus Torvalds struct bio *n; 10041da177e4SLinus Torvalds 10051da177e4SLinus Torvalds while (bio) { 10061da177e4SLinus Torvalds n = bio->bi_next; 10071da177e4SLinus Torvalds bio->bi_next = NULL; 10081da177e4SLinus Torvalds generic_make_request(bio); 10091da177e4SLinus Torvalds bio = n; 10101da177e4SLinus Torvalds } 10111da177e4SLinus Torvalds } 10121da177e4SLinus Torvalds 1013c4028958SDavid Howells static void flush_queued_bios(struct work_struct *work) 1014ca3a931fSAlasdair G Kergon { 1015c4028958SDavid Howells struct dm_snapshot *s = 1016c4028958SDavid Howells container_of(work, struct dm_snapshot, queued_bios_work); 1017ca3a931fSAlasdair G Kergon struct bio *queued_bios; 1018ca3a931fSAlasdair G Kergon unsigned long flags; 1019ca3a931fSAlasdair G Kergon 1020ca3a931fSAlasdair G Kergon spin_lock_irqsave(&s->pe_lock, flags); 1021ca3a931fSAlasdair G Kergon queued_bios = bio_list_get(&s->queued_bios); 1022ca3a931fSAlasdair G Kergon spin_unlock_irqrestore(&s->pe_lock, flags); 1023ca3a931fSAlasdair G Kergon 1024ca3a931fSAlasdair G Kergon flush_bios(queued_bios); 1025ca3a931fSAlasdair G Kergon } 1026ca3a931fSAlasdair G Kergon 1027515ad66cSMikulas Patocka static int do_origin(struct dm_dev *origin, struct bio *bio); 1028515ad66cSMikulas Patocka 1029515ad66cSMikulas Patocka /* 1030515ad66cSMikulas Patocka * Flush a list of buffers. 1031515ad66cSMikulas Patocka */ 1032515ad66cSMikulas Patocka static void retry_origin_bios(struct dm_snapshot *s, struct bio *bio) 1033515ad66cSMikulas Patocka { 1034515ad66cSMikulas Patocka struct bio *n; 1035515ad66cSMikulas Patocka int r; 1036515ad66cSMikulas Patocka 1037515ad66cSMikulas Patocka while (bio) { 1038515ad66cSMikulas Patocka n = bio->bi_next; 1039515ad66cSMikulas Patocka bio->bi_next = NULL; 1040515ad66cSMikulas Patocka r = do_origin(s->origin, bio); 1041515ad66cSMikulas Patocka if (r == DM_MAPIO_REMAPPED) 1042515ad66cSMikulas Patocka generic_make_request(bio); 1043515ad66cSMikulas Patocka bio = n; 1044515ad66cSMikulas Patocka } 1045515ad66cSMikulas Patocka } 1046515ad66cSMikulas Patocka 10471da177e4SLinus Torvalds /* 10481da177e4SLinus Torvalds * Error a list of buffers. 10491da177e4SLinus Torvalds */ 10501da177e4SLinus Torvalds static void error_bios(struct bio *bio) 10511da177e4SLinus Torvalds { 10521da177e4SLinus Torvalds struct bio *n; 10531da177e4SLinus Torvalds 10541da177e4SLinus Torvalds while (bio) { 10551da177e4SLinus Torvalds n = bio->bi_next; 10561da177e4SLinus Torvalds bio->bi_next = NULL; 10576712ecf8SNeilBrown bio_io_error(bio); 10581da177e4SLinus Torvalds bio = n; 10591da177e4SLinus Torvalds } 10601da177e4SLinus Torvalds } 10611da177e4SLinus Torvalds 1062695368acSAlasdair G Kergon static void __invalidate_snapshot(struct dm_snapshot *s, int err) 106376df1c65SAlasdair G Kergon { 106476df1c65SAlasdair G Kergon if (!s->valid) 106576df1c65SAlasdair G Kergon return; 106676df1c65SAlasdair G Kergon 106776df1c65SAlasdair G Kergon if (err == -EIO) 106876df1c65SAlasdair G Kergon DMERR("Invalidating snapshot: Error reading/writing."); 106976df1c65SAlasdair G Kergon else if (err == -ENOMEM) 107076df1c65SAlasdair G Kergon DMERR("Invalidating snapshot: Unable to allocate exception."); 107176df1c65SAlasdair G Kergon 1072493df71cSJonathan Brassow if (s->store->type->drop_snapshot) 1073493df71cSJonathan Brassow s->store->type->drop_snapshot(s->store); 107476df1c65SAlasdair G Kergon 107576df1c65SAlasdair G Kergon s->valid = 0; 107676df1c65SAlasdair G Kergon 1077fc56f6fbSMike Snitzer dm_table_event(s->ti->table); 107876df1c65SAlasdair G Kergon } 107976df1c65SAlasdair G Kergon 1080028867acSAlasdair G Kergon static void pending_complete(struct dm_snap_pending_exception *pe, int success) 10811da177e4SLinus Torvalds { 10821d4989c8SJon Brassow struct dm_exception *e; 10831da177e4SLinus Torvalds struct dm_snapshot *s = pe->snap; 10849d493fa8SAlasdair G Kergon struct bio *origin_bios = NULL; 10859d493fa8SAlasdair G Kergon struct bio *snapshot_bios = NULL; 10869d493fa8SAlasdair G Kergon int error = 0; 10871da177e4SLinus Torvalds 108876df1c65SAlasdair G Kergon if (!success) { 108976df1c65SAlasdair G Kergon /* Read/write error - snapshot is unusable */ 10901da177e4SLinus Torvalds down_write(&s->lock); 1091695368acSAlasdair G Kergon __invalidate_snapshot(s, -EIO); 10929d493fa8SAlasdair G Kergon error = 1; 109376df1c65SAlasdair G Kergon goto out; 109476df1c65SAlasdair G Kergon } 109576df1c65SAlasdair G Kergon 10963510cb94SJon Brassow e = alloc_completed_exception(); 109776df1c65SAlasdair G Kergon if (!e) { 109876df1c65SAlasdair G Kergon down_write(&s->lock); 1099695368acSAlasdair G Kergon __invalidate_snapshot(s, -ENOMEM); 11009d493fa8SAlasdair G Kergon error = 1; 11011da177e4SLinus Torvalds goto out; 11021da177e4SLinus Torvalds } 11031da177e4SLinus Torvalds *e = pe->e; 11041da177e4SLinus Torvalds 11059d493fa8SAlasdair G Kergon down_write(&s->lock); 11069d493fa8SAlasdair G Kergon if (!s->valid) { 11073510cb94SJon Brassow free_completed_exception(e); 11089d493fa8SAlasdair G Kergon error = 1; 11099d493fa8SAlasdair G Kergon goto out; 11109d493fa8SAlasdair G Kergon } 11119d493fa8SAlasdair G Kergon 1112615d1eb9SMike Snitzer /* Check for conflicting reads */ 1113615d1eb9SMike Snitzer __check_for_conflicting_io(s, pe->e.old_chunk); 1114a8d41b59SMikulas Patocka 1115a8d41b59SMikulas Patocka /* 11161da177e4SLinus Torvalds * Add a proper exception, and remove the 11171da177e4SLinus Torvalds * in-flight exception from the list. 11181da177e4SLinus Torvalds */ 11193510cb94SJon Brassow dm_insert_exception(&s->complete, e); 11201da177e4SLinus Torvalds 11211da177e4SLinus Torvalds out: 11223510cb94SJon Brassow dm_remove_exception(&pe->e); 11239d493fa8SAlasdair G Kergon snapshot_bios = bio_list_get(&pe->snapshot_bios); 1124515ad66cSMikulas Patocka origin_bios = bio_list_get(&pe->origin_bios); 1125515ad66cSMikulas Patocka free_pending_exception(pe); 1126b4b610f6SAlasdair G Kergon 11279d493fa8SAlasdair G Kergon up_write(&s->lock); 11289d493fa8SAlasdair G Kergon 11299d493fa8SAlasdair G Kergon /* Submit any pending write bios */ 11309d493fa8SAlasdair G Kergon if (error) 11319d493fa8SAlasdair G Kergon error_bios(snapshot_bios); 11329d493fa8SAlasdair G Kergon else 11339d493fa8SAlasdair G Kergon flush_bios(snapshot_bios); 11349d493fa8SAlasdair G Kergon 1135515ad66cSMikulas Patocka retry_origin_bios(s, origin_bios); 11361da177e4SLinus Torvalds } 11371da177e4SLinus Torvalds 11381da177e4SLinus Torvalds static void commit_callback(void *context, int success) 11391da177e4SLinus Torvalds { 1140028867acSAlasdair G Kergon struct dm_snap_pending_exception *pe = context; 1141028867acSAlasdair G Kergon 11421da177e4SLinus Torvalds pending_complete(pe, success); 11431da177e4SLinus Torvalds } 11441da177e4SLinus Torvalds 11451da177e4SLinus Torvalds /* 11461da177e4SLinus Torvalds * Called when the copy I/O has finished. kcopyd actually runs 11471da177e4SLinus Torvalds * this code so don't block. 11481da177e4SLinus Torvalds */ 11494cdc1d1fSAlasdair G Kergon static void copy_callback(int read_err, unsigned long write_err, void *context) 11501da177e4SLinus Torvalds { 1151028867acSAlasdair G Kergon struct dm_snap_pending_exception *pe = context; 11521da177e4SLinus Torvalds struct dm_snapshot *s = pe->snap; 11531da177e4SLinus Torvalds 11541da177e4SLinus Torvalds if (read_err || write_err) 11551da177e4SLinus Torvalds pending_complete(pe, 0); 11561da177e4SLinus Torvalds 11571da177e4SLinus Torvalds else 11581da177e4SLinus Torvalds /* Update the metadata if we are persistent */ 1159493df71cSJonathan Brassow s->store->type->commit_exception(s->store, &pe->e, 1160b2a11465SJonathan Brassow commit_callback, pe); 11611da177e4SLinus Torvalds } 11621da177e4SLinus Torvalds 11631da177e4SLinus Torvalds /* 11641da177e4SLinus Torvalds * Dispatches the copy operation to kcopyd. 11651da177e4SLinus Torvalds */ 1166028867acSAlasdair G Kergon static void start_copy(struct dm_snap_pending_exception *pe) 11671da177e4SLinus Torvalds { 11681da177e4SLinus Torvalds struct dm_snapshot *s = pe->snap; 116922a1ceb1SHeinz Mauelshagen struct dm_io_region src, dest; 11701da177e4SLinus Torvalds struct block_device *bdev = s->origin->bdev; 11711da177e4SLinus Torvalds sector_t dev_size; 11721da177e4SLinus Torvalds 11731da177e4SLinus Torvalds dev_size = get_dev_size(bdev); 11741da177e4SLinus Torvalds 11751da177e4SLinus Torvalds src.bdev = bdev; 117671fab00aSJonathan Brassow src.sector = chunk_to_sector(s->store, pe->e.old_chunk); 1177df96eee6SMikulas Patocka src.count = min((sector_t)s->store->chunk_size, dev_size - src.sector); 11781da177e4SLinus Torvalds 1179fc56f6fbSMike Snitzer dest.bdev = s->cow->bdev; 118071fab00aSJonathan Brassow dest.sector = chunk_to_sector(s->store, pe->e.new_chunk); 11811da177e4SLinus Torvalds dest.count = src.count; 11821da177e4SLinus Torvalds 11831da177e4SLinus Torvalds /* Hand over to kcopyd */ 1184eb69aca5SHeinz Mauelshagen dm_kcopyd_copy(s->kcopyd_client, 11851da177e4SLinus Torvalds &src, 1, &dest, 0, copy_callback, pe); 11861da177e4SLinus Torvalds } 11871da177e4SLinus Torvalds 11882913808eSMikulas Patocka static struct dm_snap_pending_exception * 11892913808eSMikulas Patocka __lookup_pending_exception(struct dm_snapshot *s, chunk_t chunk) 11902913808eSMikulas Patocka { 11913510cb94SJon Brassow struct dm_exception *e = dm_lookup_exception(&s->pending, chunk); 11922913808eSMikulas Patocka 11932913808eSMikulas Patocka if (!e) 11942913808eSMikulas Patocka return NULL; 11952913808eSMikulas Patocka 11962913808eSMikulas Patocka return container_of(e, struct dm_snap_pending_exception, e); 11972913808eSMikulas Patocka } 11982913808eSMikulas Patocka 11991da177e4SLinus Torvalds /* 12001da177e4SLinus Torvalds * Looks to see if this snapshot already has a pending exception 12011da177e4SLinus Torvalds * for this chunk, otherwise it allocates a new one and inserts 12021da177e4SLinus Torvalds * it into the pending table. 12031da177e4SLinus Torvalds * 12041da177e4SLinus Torvalds * NOTE: a write lock must be held on snap->lock before calling 12051da177e4SLinus Torvalds * this. 12061da177e4SLinus Torvalds */ 1207028867acSAlasdair G Kergon static struct dm_snap_pending_exception * 1208c6621392SMikulas Patocka __find_pending_exception(struct dm_snapshot *s, 1209c6621392SMikulas Patocka struct dm_snap_pending_exception *pe, chunk_t chunk) 12101da177e4SLinus Torvalds { 1211c6621392SMikulas Patocka struct dm_snap_pending_exception *pe2; 121276df1c65SAlasdair G Kergon 12132913808eSMikulas Patocka pe2 = __lookup_pending_exception(s, chunk); 12142913808eSMikulas Patocka if (pe2) { 12151da177e4SLinus Torvalds free_pending_exception(pe); 12162913808eSMikulas Patocka return pe2; 121776df1c65SAlasdair G Kergon } 121876df1c65SAlasdair G Kergon 12191da177e4SLinus Torvalds pe->e.old_chunk = chunk; 12201da177e4SLinus Torvalds bio_list_init(&pe->origin_bios); 12211da177e4SLinus Torvalds bio_list_init(&pe->snapshot_bios); 12221da177e4SLinus Torvalds pe->started = 0; 12231da177e4SLinus Torvalds 1224493df71cSJonathan Brassow if (s->store->type->prepare_exception(s->store, &pe->e)) { 12251da177e4SLinus Torvalds free_pending_exception(pe); 12261da177e4SLinus Torvalds return NULL; 12271da177e4SLinus Torvalds } 12281da177e4SLinus Torvalds 12293510cb94SJon Brassow dm_insert_exception(&s->pending, &pe->e); 12301da177e4SLinus Torvalds 12311da177e4SLinus Torvalds return pe; 12321da177e4SLinus Torvalds } 12331da177e4SLinus Torvalds 12341d4989c8SJon Brassow static void remap_exception(struct dm_snapshot *s, struct dm_exception *e, 1235d74f81f8SMilan Broz struct bio *bio, chunk_t chunk) 12361da177e4SLinus Torvalds { 1237fc56f6fbSMike Snitzer bio->bi_bdev = s->cow->bdev; 123871fab00aSJonathan Brassow bio->bi_sector = chunk_to_sector(s->store, 123971fab00aSJonathan Brassow dm_chunk_number(e->new_chunk) + 1240d74f81f8SMilan Broz (chunk - e->old_chunk)) + 124171fab00aSJonathan Brassow (bio->bi_sector & 124271fab00aSJonathan Brassow s->store->chunk_mask); 12431da177e4SLinus Torvalds } 12441da177e4SLinus Torvalds 12451da177e4SLinus Torvalds static int snapshot_map(struct dm_target *ti, struct bio *bio, 12461da177e4SLinus Torvalds union map_info *map_context) 12471da177e4SLinus Torvalds { 12481d4989c8SJon Brassow struct dm_exception *e; 1249028867acSAlasdair G Kergon struct dm_snapshot *s = ti->private; 1250d2a7ad29SKiyoshi Ueda int r = DM_MAPIO_REMAPPED; 12511da177e4SLinus Torvalds chunk_t chunk; 1252028867acSAlasdair G Kergon struct dm_snap_pending_exception *pe = NULL; 12531da177e4SLinus Torvalds 1254494b3ee7SMikulas Patocka if (unlikely(bio_empty_barrier(bio))) { 1255fc56f6fbSMike Snitzer bio->bi_bdev = s->cow->bdev; 1256494b3ee7SMikulas Patocka return DM_MAPIO_REMAPPED; 1257494b3ee7SMikulas Patocka } 1258494b3ee7SMikulas Patocka 125971fab00aSJonathan Brassow chunk = sector_to_chunk(s->store, bio->bi_sector); 12601da177e4SLinus Torvalds 12611da177e4SLinus Torvalds /* Full snapshots are not usable */ 126276df1c65SAlasdair G Kergon /* To get here the table must be live so s->active is always set. */ 12631da177e4SLinus Torvalds if (!s->valid) 1264f6a80ea8SAlasdair G Kergon return -EIO; 12651da177e4SLinus Torvalds 12661da177e4SLinus Torvalds /* FIXME: should only take write lock if we need 12671da177e4SLinus Torvalds * to copy an exception */ 12681da177e4SLinus Torvalds down_write(&s->lock); 12691da177e4SLinus Torvalds 127076df1c65SAlasdair G Kergon if (!s->valid) { 127176df1c65SAlasdair G Kergon r = -EIO; 127276df1c65SAlasdair G Kergon goto out_unlock; 127376df1c65SAlasdair G Kergon } 127476df1c65SAlasdair G Kergon 12751da177e4SLinus Torvalds /* If the block is already remapped - use that, else remap it */ 12763510cb94SJon Brassow e = dm_lookup_exception(&s->complete, chunk); 12771da177e4SLinus Torvalds if (e) { 1278d74f81f8SMilan Broz remap_exception(s, e, bio, chunk); 127976df1c65SAlasdair G Kergon goto out_unlock; 128076df1c65SAlasdair G Kergon } 12811da177e4SLinus Torvalds 1282ba40a2aaSAlasdair G Kergon /* 1283ba40a2aaSAlasdair G Kergon * Write to snapshot - higher level takes care of RW/RO 1284ba40a2aaSAlasdair G Kergon * flags so we should only get this if we are 1285ba40a2aaSAlasdair G Kergon * writeable. 1286ba40a2aaSAlasdair G Kergon */ 1287ba40a2aaSAlasdair G Kergon if (bio_rw(bio) == WRITE) { 12882913808eSMikulas Patocka pe = __lookup_pending_exception(s, chunk); 12892913808eSMikulas Patocka if (!pe) { 1290c6621392SMikulas Patocka up_write(&s->lock); 1291c6621392SMikulas Patocka pe = alloc_pending_exception(s); 1292c6621392SMikulas Patocka down_write(&s->lock); 1293c6621392SMikulas Patocka 1294c6621392SMikulas Patocka if (!s->valid) { 1295c6621392SMikulas Patocka free_pending_exception(pe); 1296c6621392SMikulas Patocka r = -EIO; 1297c6621392SMikulas Patocka goto out_unlock; 1298c6621392SMikulas Patocka } 1299c6621392SMikulas Patocka 13003510cb94SJon Brassow e = dm_lookup_exception(&s->complete, chunk); 130135bf659bSMikulas Patocka if (e) { 130235bf659bSMikulas Patocka free_pending_exception(pe); 130335bf659bSMikulas Patocka remap_exception(s, e, bio, chunk); 130435bf659bSMikulas Patocka goto out_unlock; 130535bf659bSMikulas Patocka } 130635bf659bSMikulas Patocka 1307c6621392SMikulas Patocka pe = __find_pending_exception(s, pe, chunk); 13081da177e4SLinus Torvalds if (!pe) { 1309695368acSAlasdair G Kergon __invalidate_snapshot(s, -ENOMEM); 13101da177e4SLinus Torvalds r = -EIO; 131176df1c65SAlasdair G Kergon goto out_unlock; 131276df1c65SAlasdair G Kergon } 13132913808eSMikulas Patocka } 131476df1c65SAlasdair G Kergon 1315d74f81f8SMilan Broz remap_exception(s, &pe->e, bio, chunk); 13161da177e4SLinus Torvalds bio_list_add(&pe->snapshot_bios, bio); 13171da177e4SLinus Torvalds 1318d2a7ad29SKiyoshi Ueda r = DM_MAPIO_SUBMITTED; 1319ba40a2aaSAlasdair G Kergon 13201da177e4SLinus Torvalds if (!pe->started) { 13211da177e4SLinus Torvalds /* this is protected by snap->lock */ 13221da177e4SLinus Torvalds pe->started = 1; 132376df1c65SAlasdair G Kergon up_write(&s->lock); 132476df1c65SAlasdair G Kergon start_copy(pe); 1325ba40a2aaSAlasdair G Kergon goto out; 1326ba40a2aaSAlasdair G Kergon } 1327cd45daffSMikulas Patocka } else { 13281da177e4SLinus Torvalds bio->bi_bdev = s->origin->bdev; 1329cd45daffSMikulas Patocka map_context->ptr = track_chunk(s, chunk); 1330cd45daffSMikulas Patocka } 13311da177e4SLinus Torvalds 1332ba40a2aaSAlasdair G Kergon out_unlock: 1333ba40a2aaSAlasdair G Kergon up_write(&s->lock); 1334ba40a2aaSAlasdair G Kergon out: 13351da177e4SLinus Torvalds return r; 13361da177e4SLinus Torvalds } 13371da177e4SLinus Torvalds 13383452c2a1SMikulas Patocka /* 13393452c2a1SMikulas Patocka * A snapshot-merge target behaves like a combination of a snapshot 13403452c2a1SMikulas Patocka * target and a snapshot-origin target. It only generates new 13413452c2a1SMikulas Patocka * exceptions in other snapshots and not in the one that is being 13423452c2a1SMikulas Patocka * merged. 13433452c2a1SMikulas Patocka * 13443452c2a1SMikulas Patocka * For each chunk, if there is an existing exception, it is used to 13453452c2a1SMikulas Patocka * redirect I/O to the cow device. Otherwise I/O is sent to the origin, 13463452c2a1SMikulas Patocka * which in turn might generate exceptions in other snapshots. 13473452c2a1SMikulas Patocka */ 13483452c2a1SMikulas Patocka static int snapshot_merge_map(struct dm_target *ti, struct bio *bio, 13493452c2a1SMikulas Patocka union map_info *map_context) 13503452c2a1SMikulas Patocka { 13513452c2a1SMikulas Patocka struct dm_exception *e; 13523452c2a1SMikulas Patocka struct dm_snapshot *s = ti->private; 13533452c2a1SMikulas Patocka int r = DM_MAPIO_REMAPPED; 13543452c2a1SMikulas Patocka chunk_t chunk; 13553452c2a1SMikulas Patocka 135610b8106aSMike Snitzer if (unlikely(bio_empty_barrier(bio))) { 135710b8106aSMike Snitzer if (!map_context->flush_request) 135810b8106aSMike Snitzer bio->bi_bdev = s->origin->bdev; 135910b8106aSMike Snitzer else 136010b8106aSMike Snitzer bio->bi_bdev = s->cow->bdev; 136110b8106aSMike Snitzer map_context->ptr = NULL; 136210b8106aSMike Snitzer return DM_MAPIO_REMAPPED; 136310b8106aSMike Snitzer } 136410b8106aSMike Snitzer 13653452c2a1SMikulas Patocka chunk = sector_to_chunk(s->store, bio->bi_sector); 13663452c2a1SMikulas Patocka 13673452c2a1SMikulas Patocka down_read(&s->lock); 13683452c2a1SMikulas Patocka 13693452c2a1SMikulas Patocka /* Full snapshots are not usable */ 13703452c2a1SMikulas Patocka if (!s->valid) { 13713452c2a1SMikulas Patocka r = -EIO; 13723452c2a1SMikulas Patocka goto out_unlock; 13733452c2a1SMikulas Patocka } 13743452c2a1SMikulas Patocka 13753452c2a1SMikulas Patocka /* If the block is already remapped - use that */ 13763452c2a1SMikulas Patocka e = dm_lookup_exception(&s->complete, chunk); 13773452c2a1SMikulas Patocka if (e) { 13783452c2a1SMikulas Patocka remap_exception(s, e, bio, chunk); 13793452c2a1SMikulas Patocka goto out_unlock; 13803452c2a1SMikulas Patocka } 13813452c2a1SMikulas Patocka 13823452c2a1SMikulas Patocka bio->bi_bdev = s->origin->bdev; 13833452c2a1SMikulas Patocka 13843452c2a1SMikulas Patocka if (bio_rw(bio) == WRITE) { 13853452c2a1SMikulas Patocka up_read(&s->lock); 13863452c2a1SMikulas Patocka return do_origin(s->origin, bio); 13873452c2a1SMikulas Patocka } 13883452c2a1SMikulas Patocka 13893452c2a1SMikulas Patocka out_unlock: 13903452c2a1SMikulas Patocka up_read(&s->lock); 13913452c2a1SMikulas Patocka 13923452c2a1SMikulas Patocka return r; 13933452c2a1SMikulas Patocka } 13943452c2a1SMikulas Patocka 1395cd45daffSMikulas Patocka static int snapshot_end_io(struct dm_target *ti, struct bio *bio, 1396cd45daffSMikulas Patocka int error, union map_info *map_context) 1397cd45daffSMikulas Patocka { 1398cd45daffSMikulas Patocka struct dm_snapshot *s = ti->private; 1399cd45daffSMikulas Patocka struct dm_snap_tracked_chunk *c = map_context->ptr; 1400cd45daffSMikulas Patocka 1401cd45daffSMikulas Patocka if (c) 1402cd45daffSMikulas Patocka stop_tracking_chunk(s, c); 1403cd45daffSMikulas Patocka 1404cd45daffSMikulas Patocka return 0; 1405cd45daffSMikulas Patocka } 1406cd45daffSMikulas Patocka 1407c26655caSMike Snitzer static void snapshot_postsuspend(struct dm_target *ti) 1408c26655caSMike Snitzer { 1409c26655caSMike Snitzer struct dm_snapshot *s = ti->private; 1410c26655caSMike Snitzer 1411c26655caSMike Snitzer down_write(&s->lock); 1412c26655caSMike Snitzer s->suspended = 1; 1413c26655caSMike Snitzer up_write(&s->lock); 1414c26655caSMike Snitzer } 1415c26655caSMike Snitzer 1416c1f0c183SMike Snitzer static int snapshot_preresume(struct dm_target *ti) 1417c1f0c183SMike Snitzer { 1418c1f0c183SMike Snitzer int r = 0; 1419c1f0c183SMike Snitzer struct dm_snapshot *s = ti->private; 1420c1f0c183SMike Snitzer struct dm_snapshot *snap_src = NULL, *snap_dest = NULL; 1421c1f0c183SMike Snitzer 1422c1f0c183SMike Snitzer down_read(&_origins_lock); 14239d3b15c4SMikulas Patocka (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL); 1424c1f0c183SMike Snitzer if (snap_src && snap_dest) { 1425c1f0c183SMike Snitzer down_read(&snap_src->lock); 1426c1f0c183SMike Snitzer if (s == snap_src) { 1427c1f0c183SMike Snitzer DMERR("Unable to resume snapshot source until " 1428c1f0c183SMike Snitzer "handover completes."); 1429c1f0c183SMike Snitzer r = -EINVAL; 1430c1f0c183SMike Snitzer } else if (!snap_src->suspended) { 1431c1f0c183SMike Snitzer DMERR("Unable to perform snapshot handover until " 1432c1f0c183SMike Snitzer "source is suspended."); 1433c1f0c183SMike Snitzer r = -EINVAL; 1434c1f0c183SMike Snitzer } 1435c1f0c183SMike Snitzer up_read(&snap_src->lock); 1436c1f0c183SMike Snitzer } 1437c1f0c183SMike Snitzer up_read(&_origins_lock); 1438c1f0c183SMike Snitzer 1439c1f0c183SMike Snitzer return r; 1440c1f0c183SMike Snitzer } 1441c1f0c183SMike Snitzer 14421da177e4SLinus Torvalds static void snapshot_resume(struct dm_target *ti) 14431da177e4SLinus Torvalds { 1444028867acSAlasdair G Kergon struct dm_snapshot *s = ti->private; 1445c1f0c183SMike Snitzer struct dm_snapshot *snap_src = NULL, *snap_dest = NULL; 1446c1f0c183SMike Snitzer 1447c1f0c183SMike Snitzer down_read(&_origins_lock); 14489d3b15c4SMikulas Patocka (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL); 1449c1f0c183SMike Snitzer if (snap_src && snap_dest) { 1450c1f0c183SMike Snitzer down_write(&snap_src->lock); 1451c1f0c183SMike Snitzer down_write_nested(&snap_dest->lock, SINGLE_DEPTH_NESTING); 1452c1f0c183SMike Snitzer __handover_exceptions(snap_src, snap_dest); 1453c1f0c183SMike Snitzer up_write(&snap_dest->lock); 1454c1f0c183SMike Snitzer up_write(&snap_src->lock); 1455c1f0c183SMike Snitzer } 1456c1f0c183SMike Snitzer up_read(&_origins_lock); 1457c1f0c183SMike Snitzer 1458c1f0c183SMike Snitzer /* Now we have correct chunk size, reregister */ 1459c1f0c183SMike Snitzer reregister_snapshot(s); 14601da177e4SLinus Torvalds 1461aa14edebSAlasdair G Kergon down_write(&s->lock); 1462aa14edebSAlasdair G Kergon s->active = 1; 1463c26655caSMike Snitzer s->suspended = 0; 1464aa14edebSAlasdair G Kergon up_write(&s->lock); 14651da177e4SLinus Torvalds } 14661da177e4SLinus Torvalds 14671da177e4SLinus Torvalds static int snapshot_status(struct dm_target *ti, status_type_t type, 14681da177e4SLinus Torvalds char *result, unsigned int maxlen) 14691da177e4SLinus Torvalds { 14702e4a31dfSJonathan Brassow unsigned sz = 0; 1471028867acSAlasdair G Kergon struct dm_snapshot *snap = ti->private; 14721da177e4SLinus Torvalds 14731da177e4SLinus Torvalds switch (type) { 14741da177e4SLinus Torvalds case STATUSTYPE_INFO: 147594e76572SMikulas Patocka 147694e76572SMikulas Patocka down_write(&snap->lock); 147794e76572SMikulas Patocka 14781da177e4SLinus Torvalds if (!snap->valid) 14792e4a31dfSJonathan Brassow DMEMIT("Invalid"); 14801da177e4SLinus Torvalds else { 1481985903bbSMike Snitzer if (snap->store->type->usage) { 1482985903bbSMike Snitzer sector_t total_sectors, sectors_allocated, 1483985903bbSMike Snitzer metadata_sectors; 1484985903bbSMike Snitzer snap->store->type->usage(snap->store, 1485985903bbSMike Snitzer &total_sectors, 1486985903bbSMike Snitzer §ors_allocated, 1487985903bbSMike Snitzer &metadata_sectors); 1488985903bbSMike Snitzer DMEMIT("%llu/%llu %llu", 1489985903bbSMike Snitzer (unsigned long long)sectors_allocated, 1490985903bbSMike Snitzer (unsigned long long)total_sectors, 1491985903bbSMike Snitzer (unsigned long long)metadata_sectors); 14921da177e4SLinus Torvalds } 14931da177e4SLinus Torvalds else 14942e4a31dfSJonathan Brassow DMEMIT("Unknown"); 14951da177e4SLinus Torvalds } 149694e76572SMikulas Patocka 149794e76572SMikulas Patocka up_write(&snap->lock); 149894e76572SMikulas Patocka 14991da177e4SLinus Torvalds break; 15001da177e4SLinus Torvalds 15011da177e4SLinus Torvalds case STATUSTYPE_TABLE: 15021da177e4SLinus Torvalds /* 15031da177e4SLinus Torvalds * kdevname returns a static pointer so we need 15041da177e4SLinus Torvalds * to make private copies if the output is to 15051da177e4SLinus Torvalds * make sense. 15061da177e4SLinus Torvalds */ 1507fc56f6fbSMike Snitzer DMEMIT("%s %s", snap->origin->name, snap->cow->name); 15081e302a92SJonathan Brassow snap->store->type->status(snap->store, type, result + sz, 15091e302a92SJonathan Brassow maxlen - sz); 15101da177e4SLinus Torvalds break; 15111da177e4SLinus Torvalds } 15121da177e4SLinus Torvalds 15131da177e4SLinus Torvalds return 0; 15141da177e4SLinus Torvalds } 15151da177e4SLinus Torvalds 15168811f46cSMike Snitzer static int snapshot_iterate_devices(struct dm_target *ti, 15178811f46cSMike Snitzer iterate_devices_callout_fn fn, void *data) 15188811f46cSMike Snitzer { 15198811f46cSMike Snitzer struct dm_snapshot *snap = ti->private; 15208811f46cSMike Snitzer 15218811f46cSMike Snitzer return fn(ti, snap->origin, 0, ti->len, data); 15228811f46cSMike Snitzer } 15238811f46cSMike Snitzer 15248811f46cSMike Snitzer 15251da177e4SLinus Torvalds /*----------------------------------------------------------------- 15261da177e4SLinus Torvalds * Origin methods 15271da177e4SLinus Torvalds *---------------------------------------------------------------*/ 15289eaae8ffSMikulas Patocka 15299eaae8ffSMikulas Patocka /* 15309eaae8ffSMikulas Patocka * If no exceptions need creating, DM_MAPIO_REMAPPED is returned and any 15319eaae8ffSMikulas Patocka * supplied bio was ignored. The caller may submit it immediately. 15329eaae8ffSMikulas Patocka * (No remapping actually occurs as the origin is always a direct linear 15339eaae8ffSMikulas Patocka * map.) 15349eaae8ffSMikulas Patocka * 15359eaae8ffSMikulas Patocka * If further exceptions are required, DM_MAPIO_SUBMITTED is returned 15369eaae8ffSMikulas Patocka * and any supplied bio is added to a list to be submitted once all 15379eaae8ffSMikulas Patocka * the necessary exceptions exist. 15389eaae8ffSMikulas Patocka */ 15399eaae8ffSMikulas Patocka static int __origin_write(struct list_head *snapshots, sector_t sector, 15409eaae8ffSMikulas Patocka struct bio *bio) 15411da177e4SLinus Torvalds { 1542515ad66cSMikulas Patocka int r = DM_MAPIO_REMAPPED; 15431da177e4SLinus Torvalds struct dm_snapshot *snap; 15441d4989c8SJon Brassow struct dm_exception *e; 1545515ad66cSMikulas Patocka struct dm_snap_pending_exception *pe; 1546515ad66cSMikulas Patocka struct dm_snap_pending_exception *pe_to_start_now = NULL; 1547515ad66cSMikulas Patocka struct dm_snap_pending_exception *pe_to_start_last = NULL; 15481da177e4SLinus Torvalds chunk_t chunk; 15491da177e4SLinus Torvalds 15501da177e4SLinus Torvalds /* Do all the snapshots on this origin */ 15511da177e4SLinus Torvalds list_for_each_entry (snap, snapshots, list) { 15523452c2a1SMikulas Patocka /* 15533452c2a1SMikulas Patocka * Don't make new exceptions in a merging snapshot 15543452c2a1SMikulas Patocka * because it has effectively been deleted 15553452c2a1SMikulas Patocka */ 15563452c2a1SMikulas Patocka if (dm_target_is_snapshot_merge(snap->ti)) 15573452c2a1SMikulas Patocka continue; 15583452c2a1SMikulas Patocka 155976df1c65SAlasdair G Kergon down_write(&snap->lock); 156076df1c65SAlasdair G Kergon 1561aa14edebSAlasdair G Kergon /* Only deal with valid and active snapshots */ 1562aa14edebSAlasdair G Kergon if (!snap->valid || !snap->active) 156376df1c65SAlasdair G Kergon goto next_snapshot; 15641da177e4SLinus Torvalds 1565d5e404c1SAlasdair G Kergon /* Nothing to do if writing beyond end of snapshot */ 15669eaae8ffSMikulas Patocka if (sector >= dm_table_get_size(snap->ti->table)) 156776df1c65SAlasdair G Kergon goto next_snapshot; 15681da177e4SLinus Torvalds 15691da177e4SLinus Torvalds /* 15701da177e4SLinus Torvalds * Remember, different snapshots can have 15711da177e4SLinus Torvalds * different chunk sizes. 15721da177e4SLinus Torvalds */ 15739eaae8ffSMikulas Patocka chunk = sector_to_chunk(snap->store, sector); 15741da177e4SLinus Torvalds 15751da177e4SLinus Torvalds /* 15761da177e4SLinus Torvalds * Check exception table to see if block 15771da177e4SLinus Torvalds * is already remapped in this snapshot 15781da177e4SLinus Torvalds * and trigger an exception if not. 15791da177e4SLinus Torvalds */ 15803510cb94SJon Brassow e = dm_lookup_exception(&snap->complete, chunk); 158176df1c65SAlasdair G Kergon if (e) 158276df1c65SAlasdair G Kergon goto next_snapshot; 158376df1c65SAlasdair G Kergon 15842913808eSMikulas Patocka pe = __lookup_pending_exception(snap, chunk); 15852913808eSMikulas Patocka if (!pe) { 1586c6621392SMikulas Patocka up_write(&snap->lock); 1587c6621392SMikulas Patocka pe = alloc_pending_exception(snap); 1588c6621392SMikulas Patocka down_write(&snap->lock); 1589c6621392SMikulas Patocka 1590c6621392SMikulas Patocka if (!snap->valid) { 1591c6621392SMikulas Patocka free_pending_exception(pe); 1592c6621392SMikulas Patocka goto next_snapshot; 1593c6621392SMikulas Patocka } 1594c6621392SMikulas Patocka 15953510cb94SJon Brassow e = dm_lookup_exception(&snap->complete, chunk); 159635bf659bSMikulas Patocka if (e) { 159735bf659bSMikulas Patocka free_pending_exception(pe); 159835bf659bSMikulas Patocka goto next_snapshot; 159935bf659bSMikulas Patocka } 160035bf659bSMikulas Patocka 1601c6621392SMikulas Patocka pe = __find_pending_exception(snap, pe, chunk); 16021da177e4SLinus Torvalds if (!pe) { 1603695368acSAlasdair G Kergon __invalidate_snapshot(snap, -ENOMEM); 160476df1c65SAlasdair G Kergon goto next_snapshot; 160576df1c65SAlasdair G Kergon } 16062913808eSMikulas Patocka } 16071da177e4SLinus Torvalds 1608d2a7ad29SKiyoshi Ueda r = DM_MAPIO_SUBMITTED; 160976df1c65SAlasdair G Kergon 1610515ad66cSMikulas Patocka /* 1611515ad66cSMikulas Patocka * If an origin bio was supplied, queue it to wait for the 1612515ad66cSMikulas Patocka * completion of this exception, and start this one last, 1613515ad66cSMikulas Patocka * at the end of the function. 1614515ad66cSMikulas Patocka */ 1615515ad66cSMikulas Patocka if (bio) { 1616515ad66cSMikulas Patocka bio_list_add(&pe->origin_bios, bio); 1617515ad66cSMikulas Patocka bio = NULL; 1618515ad66cSMikulas Patocka 1619515ad66cSMikulas Patocka if (!pe->started) { 1620515ad66cSMikulas Patocka pe->started = 1; 1621515ad66cSMikulas Patocka pe_to_start_last = pe; 1622515ad66cSMikulas Patocka } 1623b4b610f6SAlasdair G Kergon } 162476df1c65SAlasdair G Kergon 1625eccf0817SAlasdair G Kergon if (!pe->started) { 1626eccf0817SAlasdair G Kergon pe->started = 1; 1627515ad66cSMikulas Patocka pe_to_start_now = pe; 1628eccf0817SAlasdair G Kergon } 16291da177e4SLinus Torvalds 163076df1c65SAlasdair G Kergon next_snapshot: 16311da177e4SLinus Torvalds up_write(&snap->lock); 1632515ad66cSMikulas Patocka 1633515ad66cSMikulas Patocka if (pe_to_start_now) { 1634515ad66cSMikulas Patocka start_copy(pe_to_start_now); 1635515ad66cSMikulas Patocka pe_to_start_now = NULL; 16361da177e4SLinus Torvalds } 1637b4b610f6SAlasdair G Kergon } 1638b4b610f6SAlasdair G Kergon 16391da177e4SLinus Torvalds /* 1640515ad66cSMikulas Patocka * Submit the exception against which the bio is queued last, 1641515ad66cSMikulas Patocka * to give the other exceptions a head start. 16421da177e4SLinus Torvalds */ 1643515ad66cSMikulas Patocka if (pe_to_start_last) 1644515ad66cSMikulas Patocka start_copy(pe_to_start_last); 16451da177e4SLinus Torvalds 16461da177e4SLinus Torvalds return r; 16471da177e4SLinus Torvalds } 16481da177e4SLinus Torvalds 16491da177e4SLinus Torvalds /* 16501da177e4SLinus Torvalds * Called on a write from the origin driver. 16511da177e4SLinus Torvalds */ 16521da177e4SLinus Torvalds static int do_origin(struct dm_dev *origin, struct bio *bio) 16531da177e4SLinus Torvalds { 16541da177e4SLinus Torvalds struct origin *o; 1655d2a7ad29SKiyoshi Ueda int r = DM_MAPIO_REMAPPED; 16561da177e4SLinus Torvalds 16571da177e4SLinus Torvalds down_read(&_origins_lock); 16581da177e4SLinus Torvalds o = __lookup_origin(origin->bdev); 16591da177e4SLinus Torvalds if (o) 16609eaae8ffSMikulas Patocka r = __origin_write(&o->snapshots, bio->bi_sector, bio); 16611da177e4SLinus Torvalds up_read(&_origins_lock); 16621da177e4SLinus Torvalds 16631da177e4SLinus Torvalds return r; 16641da177e4SLinus Torvalds } 16651da177e4SLinus Torvalds 16661da177e4SLinus Torvalds /* 16671da177e4SLinus Torvalds * Origin: maps a linear range of a device, with hooks for snapshotting. 16681da177e4SLinus Torvalds */ 16691da177e4SLinus Torvalds 16701da177e4SLinus Torvalds /* 16711da177e4SLinus Torvalds * Construct an origin mapping: <dev_path> 16721da177e4SLinus Torvalds * The context for an origin is merely a 'struct dm_dev *' 16731da177e4SLinus Torvalds * pointing to the real device. 16741da177e4SLinus Torvalds */ 16751da177e4SLinus Torvalds static int origin_ctr(struct dm_target *ti, unsigned int argc, char **argv) 16761da177e4SLinus Torvalds { 16771da177e4SLinus Torvalds int r; 16781da177e4SLinus Torvalds struct dm_dev *dev; 16791da177e4SLinus Torvalds 16801da177e4SLinus Torvalds if (argc != 1) { 168172d94861SAlasdair G Kergon ti->error = "origin: incorrect number of arguments"; 16821da177e4SLinus Torvalds return -EINVAL; 16831da177e4SLinus Torvalds } 16841da177e4SLinus Torvalds 16851da177e4SLinus Torvalds r = dm_get_device(ti, argv[0], 0, ti->len, 16861da177e4SLinus Torvalds dm_table_get_mode(ti->table), &dev); 16871da177e4SLinus Torvalds if (r) { 16881da177e4SLinus Torvalds ti->error = "Cannot get target device"; 16891da177e4SLinus Torvalds return r; 16901da177e4SLinus Torvalds } 16911da177e4SLinus Torvalds 16921da177e4SLinus Torvalds ti->private = dev; 1693494b3ee7SMikulas Patocka ti->num_flush_requests = 1; 1694494b3ee7SMikulas Patocka 16951da177e4SLinus Torvalds return 0; 16961da177e4SLinus Torvalds } 16971da177e4SLinus Torvalds 16981da177e4SLinus Torvalds static void origin_dtr(struct dm_target *ti) 16991da177e4SLinus Torvalds { 1700028867acSAlasdair G Kergon struct dm_dev *dev = ti->private; 17011da177e4SLinus Torvalds dm_put_device(ti, dev); 17021da177e4SLinus Torvalds } 17031da177e4SLinus Torvalds 17041da177e4SLinus Torvalds static int origin_map(struct dm_target *ti, struct bio *bio, 17051da177e4SLinus Torvalds union map_info *map_context) 17061da177e4SLinus Torvalds { 1707028867acSAlasdair G Kergon struct dm_dev *dev = ti->private; 17081da177e4SLinus Torvalds bio->bi_bdev = dev->bdev; 17091da177e4SLinus Torvalds 1710494b3ee7SMikulas Patocka if (unlikely(bio_empty_barrier(bio))) 1711494b3ee7SMikulas Patocka return DM_MAPIO_REMAPPED; 1712494b3ee7SMikulas Patocka 17131da177e4SLinus Torvalds /* Only tell snapshots if this is a write */ 1714d2a7ad29SKiyoshi Ueda return (bio_rw(bio) == WRITE) ? do_origin(dev, bio) : DM_MAPIO_REMAPPED; 17151da177e4SLinus Torvalds } 17161da177e4SLinus Torvalds 17171da177e4SLinus Torvalds /* 17181da177e4SLinus Torvalds * Set the target "split_io" field to the minimum of all the snapshots' 17191da177e4SLinus Torvalds * chunk sizes. 17201da177e4SLinus Torvalds */ 17211da177e4SLinus Torvalds static void origin_resume(struct dm_target *ti) 17221da177e4SLinus Torvalds { 1723028867acSAlasdair G Kergon struct dm_dev *dev = ti->private; 17241da177e4SLinus Torvalds 17251da177e4SLinus Torvalds down_read(&_origins_lock); 17261da177e4SLinus Torvalds 17277e201b35SMikulas Patocka ti->split_io = __minimum_chunk_size(__lookup_origin(dev->bdev)); 17287e201b35SMikulas Patocka 17297e201b35SMikulas Patocka up_read(&_origins_lock); 17301da177e4SLinus Torvalds } 17311da177e4SLinus Torvalds 17321da177e4SLinus Torvalds static int origin_status(struct dm_target *ti, status_type_t type, char *result, 17331da177e4SLinus Torvalds unsigned int maxlen) 17341da177e4SLinus Torvalds { 1735028867acSAlasdair G Kergon struct dm_dev *dev = ti->private; 17361da177e4SLinus Torvalds 17371da177e4SLinus Torvalds switch (type) { 17381da177e4SLinus Torvalds case STATUSTYPE_INFO: 17391da177e4SLinus Torvalds result[0] = '\0'; 17401da177e4SLinus Torvalds break; 17411da177e4SLinus Torvalds 17421da177e4SLinus Torvalds case STATUSTYPE_TABLE: 17431da177e4SLinus Torvalds snprintf(result, maxlen, "%s", dev->name); 17441da177e4SLinus Torvalds break; 17451da177e4SLinus Torvalds } 17461da177e4SLinus Torvalds 17471da177e4SLinus Torvalds return 0; 17481da177e4SLinus Torvalds } 17491da177e4SLinus Torvalds 17508811f46cSMike Snitzer static int origin_iterate_devices(struct dm_target *ti, 17518811f46cSMike Snitzer iterate_devices_callout_fn fn, void *data) 17528811f46cSMike Snitzer { 17538811f46cSMike Snitzer struct dm_dev *dev = ti->private; 17548811f46cSMike Snitzer 17558811f46cSMike Snitzer return fn(ti, dev, 0, ti->len, data); 17568811f46cSMike Snitzer } 17578811f46cSMike Snitzer 17581da177e4SLinus Torvalds static struct target_type origin_target = { 17591da177e4SLinus Torvalds .name = "snapshot-origin", 17608811f46cSMike Snitzer .version = {1, 7, 0}, 17611da177e4SLinus Torvalds .module = THIS_MODULE, 17621da177e4SLinus Torvalds .ctr = origin_ctr, 17631da177e4SLinus Torvalds .dtr = origin_dtr, 17641da177e4SLinus Torvalds .map = origin_map, 17651da177e4SLinus Torvalds .resume = origin_resume, 17661da177e4SLinus Torvalds .status = origin_status, 17678811f46cSMike Snitzer .iterate_devices = origin_iterate_devices, 17681da177e4SLinus Torvalds }; 17691da177e4SLinus Torvalds 17701da177e4SLinus Torvalds static struct target_type snapshot_target = { 17711da177e4SLinus Torvalds .name = "snapshot", 1772c26655caSMike Snitzer .version = {1, 9, 0}, 17731da177e4SLinus Torvalds .module = THIS_MODULE, 17741da177e4SLinus Torvalds .ctr = snapshot_ctr, 17751da177e4SLinus Torvalds .dtr = snapshot_dtr, 17761da177e4SLinus Torvalds .map = snapshot_map, 1777cd45daffSMikulas Patocka .end_io = snapshot_end_io, 1778c26655caSMike Snitzer .postsuspend = snapshot_postsuspend, 1779c1f0c183SMike Snitzer .preresume = snapshot_preresume, 17801da177e4SLinus Torvalds .resume = snapshot_resume, 17811da177e4SLinus Torvalds .status = snapshot_status, 17828811f46cSMike Snitzer .iterate_devices = snapshot_iterate_devices, 17831da177e4SLinus Torvalds }; 17841da177e4SLinus Torvalds 1785d698aa45SMikulas Patocka static struct target_type merge_target = { 1786d698aa45SMikulas Patocka .name = dm_snapshot_merge_target_name, 1787d698aa45SMikulas Patocka .version = {1, 0, 0}, 1788d698aa45SMikulas Patocka .module = THIS_MODULE, 1789d698aa45SMikulas Patocka .ctr = snapshot_ctr, 1790d698aa45SMikulas Patocka .dtr = snapshot_dtr, 17913452c2a1SMikulas Patocka .map = snapshot_merge_map, 1792d698aa45SMikulas Patocka .end_io = snapshot_end_io, 1793d698aa45SMikulas Patocka .postsuspend = snapshot_postsuspend, 1794d698aa45SMikulas Patocka .preresume = snapshot_preresume, 1795d698aa45SMikulas Patocka .resume = snapshot_resume, 1796d698aa45SMikulas Patocka .status = snapshot_status, 1797d698aa45SMikulas Patocka .iterate_devices = snapshot_iterate_devices, 1798d698aa45SMikulas Patocka }; 1799d698aa45SMikulas Patocka 18001da177e4SLinus Torvalds static int __init dm_snapshot_init(void) 18011da177e4SLinus Torvalds { 18021da177e4SLinus Torvalds int r; 18031da177e4SLinus Torvalds 18044db6bfe0SAlasdair G Kergon r = dm_exception_store_init(); 18054db6bfe0SAlasdair G Kergon if (r) { 18064db6bfe0SAlasdair G Kergon DMERR("Failed to initialize exception stores"); 18074db6bfe0SAlasdair G Kergon return r; 18084db6bfe0SAlasdair G Kergon } 18094db6bfe0SAlasdair G Kergon 18101da177e4SLinus Torvalds r = dm_register_target(&snapshot_target); 1811d698aa45SMikulas Patocka if (r < 0) { 18121da177e4SLinus Torvalds DMERR("snapshot target register failed %d", r); 1813034a186dSJonathan Brassow goto bad_register_snapshot_target; 18141da177e4SLinus Torvalds } 18151da177e4SLinus Torvalds 18161da177e4SLinus Torvalds r = dm_register_target(&origin_target); 18171da177e4SLinus Torvalds if (r < 0) { 181872d94861SAlasdair G Kergon DMERR("Origin target register failed %d", r); 1819d698aa45SMikulas Patocka goto bad_register_origin_target; 1820d698aa45SMikulas Patocka } 1821d698aa45SMikulas Patocka 1822d698aa45SMikulas Patocka r = dm_register_target(&merge_target); 1823d698aa45SMikulas Patocka if (r < 0) { 1824d698aa45SMikulas Patocka DMERR("Merge target register failed %d", r); 1825d698aa45SMikulas Patocka goto bad_register_merge_target; 18261da177e4SLinus Torvalds } 18271da177e4SLinus Torvalds 18281da177e4SLinus Torvalds r = init_origin_hash(); 18291da177e4SLinus Torvalds if (r) { 18301da177e4SLinus Torvalds DMERR("init_origin_hash failed."); 1831d698aa45SMikulas Patocka goto bad_origin_hash; 18321da177e4SLinus Torvalds } 18331da177e4SLinus Torvalds 18341d4989c8SJon Brassow exception_cache = KMEM_CACHE(dm_exception, 0); 18351da177e4SLinus Torvalds if (!exception_cache) { 18361da177e4SLinus Torvalds DMERR("Couldn't create exception cache."); 18371da177e4SLinus Torvalds r = -ENOMEM; 1838d698aa45SMikulas Patocka goto bad_exception_cache; 18391da177e4SLinus Torvalds } 18401da177e4SLinus Torvalds 1841028867acSAlasdair G Kergon pending_cache = KMEM_CACHE(dm_snap_pending_exception, 0); 18421da177e4SLinus Torvalds if (!pending_cache) { 18431da177e4SLinus Torvalds DMERR("Couldn't create pending cache."); 18441da177e4SLinus Torvalds r = -ENOMEM; 1845d698aa45SMikulas Patocka goto bad_pending_cache; 18461da177e4SLinus Torvalds } 18471da177e4SLinus Torvalds 1848cd45daffSMikulas Patocka tracked_chunk_cache = KMEM_CACHE(dm_snap_tracked_chunk, 0); 1849cd45daffSMikulas Patocka if (!tracked_chunk_cache) { 1850cd45daffSMikulas Patocka DMERR("Couldn't create cache to track chunks in use."); 1851cd45daffSMikulas Patocka r = -ENOMEM; 1852d698aa45SMikulas Patocka goto bad_tracked_chunk_cache; 1853cd45daffSMikulas Patocka } 1854cd45daffSMikulas Patocka 1855ca3a931fSAlasdair G Kergon ksnapd = create_singlethread_workqueue("ksnapd"); 1856ca3a931fSAlasdair G Kergon if (!ksnapd) { 1857ca3a931fSAlasdair G Kergon DMERR("Failed to create ksnapd workqueue."); 1858ca3a931fSAlasdair G Kergon r = -ENOMEM; 185992e86812SMikulas Patocka goto bad_pending_pool; 1860ca3a931fSAlasdair G Kergon } 1861ca3a931fSAlasdair G Kergon 18621da177e4SLinus Torvalds return 0; 18631da177e4SLinus Torvalds 1864cd45daffSMikulas Patocka bad_pending_pool: 1865cd45daffSMikulas Patocka kmem_cache_destroy(tracked_chunk_cache); 1866d698aa45SMikulas Patocka bad_tracked_chunk_cache: 18671da177e4SLinus Torvalds kmem_cache_destroy(pending_cache); 1868d698aa45SMikulas Patocka bad_pending_cache: 18691da177e4SLinus Torvalds kmem_cache_destroy(exception_cache); 1870d698aa45SMikulas Patocka bad_exception_cache: 18711da177e4SLinus Torvalds exit_origin_hash(); 1872d698aa45SMikulas Patocka bad_origin_hash: 1873d698aa45SMikulas Patocka dm_unregister_target(&merge_target); 1874d698aa45SMikulas Patocka bad_register_merge_target: 18751da177e4SLinus Torvalds dm_unregister_target(&origin_target); 1876d698aa45SMikulas Patocka bad_register_origin_target: 18771da177e4SLinus Torvalds dm_unregister_target(&snapshot_target); 1878034a186dSJonathan Brassow bad_register_snapshot_target: 1879034a186dSJonathan Brassow dm_exception_store_exit(); 1880d698aa45SMikulas Patocka 18811da177e4SLinus Torvalds return r; 18821da177e4SLinus Torvalds } 18831da177e4SLinus Torvalds 18841da177e4SLinus Torvalds static void __exit dm_snapshot_exit(void) 18851da177e4SLinus Torvalds { 1886ca3a931fSAlasdair G Kergon destroy_workqueue(ksnapd); 1887ca3a931fSAlasdair G Kergon 188810d3bd09SMikulas Patocka dm_unregister_target(&snapshot_target); 188910d3bd09SMikulas Patocka dm_unregister_target(&origin_target); 1890d698aa45SMikulas Patocka dm_unregister_target(&merge_target); 18911da177e4SLinus Torvalds 18921da177e4SLinus Torvalds exit_origin_hash(); 18931da177e4SLinus Torvalds kmem_cache_destroy(pending_cache); 18941da177e4SLinus Torvalds kmem_cache_destroy(exception_cache); 1895cd45daffSMikulas Patocka kmem_cache_destroy(tracked_chunk_cache); 18964db6bfe0SAlasdair G Kergon 18974db6bfe0SAlasdair G Kergon dm_exception_store_exit(); 18981da177e4SLinus Torvalds } 18991da177e4SLinus Torvalds 19001da177e4SLinus Torvalds /* Module hooks */ 19011da177e4SLinus Torvalds module_init(dm_snapshot_init); 19021da177e4SLinus Torvalds module_exit(dm_snapshot_exit); 19031da177e4SLinus Torvalds 19041da177e4SLinus Torvalds MODULE_DESCRIPTION(DM_NAME " snapshot target"); 19051da177e4SLinus Torvalds MODULE_AUTHOR("Joe Thornber"); 19061da177e4SLinus Torvalds MODULE_LICENSE("GPL"); 1907