11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * dm-snapshot.c 31da177e4SLinus Torvalds * 41da177e4SLinus Torvalds * Copyright (C) 2001-2002 Sistina Software (UK) Limited. 51da177e4SLinus Torvalds * 61da177e4SLinus Torvalds * This file is released under the GPL. 71da177e4SLinus Torvalds */ 81da177e4SLinus Torvalds 91da177e4SLinus Torvalds #include <linux/blkdev.h> 101da177e4SLinus Torvalds #include <linux/device-mapper.h> 1190fa1527SMikulas Patocka #include <linux/delay.h> 121da177e4SLinus Torvalds #include <linux/fs.h> 131da177e4SLinus Torvalds #include <linux/init.h> 141da177e4SLinus Torvalds #include <linux/kdev_t.h> 151da177e4SLinus Torvalds #include <linux/list.h> 161da177e4SLinus Torvalds #include <linux/mempool.h> 171da177e4SLinus Torvalds #include <linux/module.h> 181da177e4SLinus Torvalds #include <linux/slab.h> 191da177e4SLinus Torvalds #include <linux/vmalloc.h> 206f3c3f0aSvignesh babu #include <linux/log2.h> 21a765e20eSAlasdair G Kergon #include <linux/dm-kcopyd.h> 22ccc45ea8SJonathan Brassow #include <linux/workqueue.h> 231da177e4SLinus Torvalds 24aea53d92SJonathan Brassow #include "dm-exception-store.h" 251da177e4SLinus Torvalds 2672d94861SAlasdair G Kergon #define DM_MSG_PREFIX "snapshots" 2772d94861SAlasdair G Kergon 281da177e4SLinus Torvalds /* 291da177e4SLinus Torvalds * The percentage increment we will wake up users at 301da177e4SLinus Torvalds */ 311da177e4SLinus Torvalds #define WAKE_UP_PERCENT 5 321da177e4SLinus Torvalds 331da177e4SLinus Torvalds /* 341da177e4SLinus Torvalds * kcopyd priority of snapshot operations 351da177e4SLinus Torvalds */ 361da177e4SLinus Torvalds #define SNAPSHOT_COPY_PRIORITY 2 371da177e4SLinus Torvalds 381da177e4SLinus Torvalds /* 398ee2767aSMilan Broz * Reserve 1MB for each snapshot initially (with minimum of 1 page). 401da177e4SLinus Torvalds */ 418ee2767aSMilan Broz #define SNAPSHOT_PAGES (((1UL << 20) >> PAGE_SHIFT) ? : 1) 421da177e4SLinus Torvalds 43cd45daffSMikulas Patocka /* 44cd45daffSMikulas Patocka * The size of the mempool used to track chunks in use. 45cd45daffSMikulas Patocka */ 46cd45daffSMikulas Patocka #define MIN_IOS 256 47cd45daffSMikulas Patocka 48ccc45ea8SJonathan Brassow #define DM_TRACKED_CHUNK_HASH_SIZE 16 49ccc45ea8SJonathan Brassow #define DM_TRACKED_CHUNK_HASH(x) ((unsigned long)(x) & \ 50ccc45ea8SJonathan Brassow (DM_TRACKED_CHUNK_HASH_SIZE - 1)) 51ccc45ea8SJonathan Brassow 52191437a5SJon Brassow struct dm_exception_table { 53ccc45ea8SJonathan Brassow uint32_t hash_mask; 54ccc45ea8SJonathan Brassow unsigned hash_shift; 55ccc45ea8SJonathan Brassow struct list_head *table; 56ccc45ea8SJonathan Brassow }; 57ccc45ea8SJonathan Brassow 58ccc45ea8SJonathan Brassow struct dm_snapshot { 59ccc45ea8SJonathan Brassow struct rw_semaphore lock; 60ccc45ea8SJonathan Brassow 61ccc45ea8SJonathan Brassow struct dm_dev *origin; 62fc56f6fbSMike Snitzer struct dm_dev *cow; 63fc56f6fbSMike Snitzer 64fc56f6fbSMike Snitzer struct dm_target *ti; 65ccc45ea8SJonathan Brassow 66ccc45ea8SJonathan Brassow /* List of snapshots per Origin */ 67ccc45ea8SJonathan Brassow struct list_head list; 68ccc45ea8SJonathan Brassow 69ccc45ea8SJonathan Brassow /* You can't use a snapshot if this is 0 (e.g. if full) */ 70ccc45ea8SJonathan Brassow int valid; 71ccc45ea8SJonathan Brassow 72ccc45ea8SJonathan Brassow /* Origin writes don't trigger exceptions until this is set */ 73ccc45ea8SJonathan Brassow int active; 74ccc45ea8SJonathan Brassow 75c26655caSMike Snitzer /* Whether or not owning mapped_device is suspended */ 76c26655caSMike Snitzer int suspended; 77c26655caSMike Snitzer 78ccc45ea8SJonathan Brassow mempool_t *pending_pool; 79ccc45ea8SJonathan Brassow 80ccc45ea8SJonathan Brassow atomic_t pending_exceptions_count; 81ccc45ea8SJonathan Brassow 82191437a5SJon Brassow struct dm_exception_table pending; 83191437a5SJon Brassow struct dm_exception_table complete; 84ccc45ea8SJonathan Brassow 85ccc45ea8SJonathan Brassow /* 86ccc45ea8SJonathan Brassow * pe_lock protects all pending_exception operations and access 87ccc45ea8SJonathan Brassow * as well as the snapshot_bios list. 88ccc45ea8SJonathan Brassow */ 89ccc45ea8SJonathan Brassow spinlock_t pe_lock; 90ccc45ea8SJonathan Brassow 91ccc45ea8SJonathan Brassow /* The on disk metadata handler */ 92ccc45ea8SJonathan Brassow struct dm_exception_store *store; 93ccc45ea8SJonathan Brassow 94ccc45ea8SJonathan Brassow struct dm_kcopyd_client *kcopyd_client; 95ccc45ea8SJonathan Brassow 96ccc45ea8SJonathan Brassow /* Queue of snapshot writes for ksnapd to flush */ 97ccc45ea8SJonathan Brassow struct bio_list queued_bios; 98ccc45ea8SJonathan Brassow struct work_struct queued_bios_work; 99ccc45ea8SJonathan Brassow 100ccc45ea8SJonathan Brassow /* Chunks with outstanding reads */ 101ccc45ea8SJonathan Brassow mempool_t *tracked_chunk_pool; 102ccc45ea8SJonathan Brassow spinlock_t tracked_chunk_lock; 103ccc45ea8SJonathan Brassow struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE]; 104ccc45ea8SJonathan Brassow }; 105ccc45ea8SJonathan Brassow 106fc56f6fbSMike Snitzer struct dm_dev *dm_snap_cow(struct dm_snapshot *s) 107fc56f6fbSMike Snitzer { 108fc56f6fbSMike Snitzer return s->cow; 109fc56f6fbSMike Snitzer } 110fc56f6fbSMike Snitzer EXPORT_SYMBOL(dm_snap_cow); 111fc56f6fbSMike Snitzer 112c642f9e0SAdrian Bunk static struct workqueue_struct *ksnapd; 113c4028958SDavid Howells static void flush_queued_bios(struct work_struct *work); 114ca3a931fSAlasdair G Kergon 115ccc45ea8SJonathan Brassow static sector_t chunk_to_sector(struct dm_exception_store *store, 116ccc45ea8SJonathan Brassow chunk_t chunk) 117ccc45ea8SJonathan Brassow { 118ccc45ea8SJonathan Brassow return chunk << store->chunk_shift; 119ccc45ea8SJonathan Brassow } 120ccc45ea8SJonathan Brassow 121ccc45ea8SJonathan Brassow static int bdev_equal(struct block_device *lhs, struct block_device *rhs) 122ccc45ea8SJonathan Brassow { 123ccc45ea8SJonathan Brassow /* 124ccc45ea8SJonathan Brassow * There is only ever one instance of a particular block 125ccc45ea8SJonathan Brassow * device so we can compare pointers safely. 126ccc45ea8SJonathan Brassow */ 127ccc45ea8SJonathan Brassow return lhs == rhs; 128ccc45ea8SJonathan Brassow } 129ccc45ea8SJonathan Brassow 130028867acSAlasdair G Kergon struct dm_snap_pending_exception { 1311d4989c8SJon Brassow struct dm_exception e; 1321da177e4SLinus Torvalds 1331da177e4SLinus Torvalds /* 1341da177e4SLinus Torvalds * Origin buffers waiting for this to complete are held 1351da177e4SLinus Torvalds * in a bio list 1361da177e4SLinus Torvalds */ 1371da177e4SLinus Torvalds struct bio_list origin_bios; 1381da177e4SLinus Torvalds struct bio_list snapshot_bios; 1391da177e4SLinus Torvalds 1401da177e4SLinus Torvalds /* 141eccf0817SAlasdair G Kergon * Short-term queue of pending exceptions prior to submission. 142eccf0817SAlasdair G Kergon */ 143eccf0817SAlasdair G Kergon struct list_head list; 144eccf0817SAlasdair G Kergon 145eccf0817SAlasdair G Kergon /* 146b4b610f6SAlasdair G Kergon * The primary pending_exception is the one that holds 1474b832e8dSAlasdair G Kergon * the ref_count and the list of origin_bios for a 148b4b610f6SAlasdair G Kergon * group of pending_exceptions. It is always last to get freed. 149b4b610f6SAlasdair G Kergon * These fields get set up when writing to the origin. 1501da177e4SLinus Torvalds */ 151028867acSAlasdair G Kergon struct dm_snap_pending_exception *primary_pe; 152b4b610f6SAlasdair G Kergon 153b4b610f6SAlasdair G Kergon /* 154b4b610f6SAlasdair G Kergon * Number of pending_exceptions processing this chunk. 155b4b610f6SAlasdair G Kergon * When this drops to zero we must complete the origin bios. 156b4b610f6SAlasdair G Kergon * If incrementing or decrementing this, hold pe->snap->lock for 157b4b610f6SAlasdair G Kergon * the sibling concerned and not pe->primary_pe->snap->lock unless 158b4b610f6SAlasdair G Kergon * they are the same. 159b4b610f6SAlasdair G Kergon */ 1604b832e8dSAlasdair G Kergon atomic_t ref_count; 1611da177e4SLinus Torvalds 1621da177e4SLinus Torvalds /* Pointer back to snapshot context */ 1631da177e4SLinus Torvalds struct dm_snapshot *snap; 1641da177e4SLinus Torvalds 1651da177e4SLinus Torvalds /* 1661da177e4SLinus Torvalds * 1 indicates the exception has already been sent to 1671da177e4SLinus Torvalds * kcopyd. 1681da177e4SLinus Torvalds */ 1691da177e4SLinus Torvalds int started; 1701da177e4SLinus Torvalds }; 1711da177e4SLinus Torvalds 1721da177e4SLinus Torvalds /* 1731da177e4SLinus Torvalds * Hash table mapping origin volumes to lists of snapshots and 1741da177e4SLinus Torvalds * a lock to protect it 1751da177e4SLinus Torvalds */ 176e18b890bSChristoph Lameter static struct kmem_cache *exception_cache; 177e18b890bSChristoph Lameter static struct kmem_cache *pending_cache; 1781da177e4SLinus Torvalds 179cd45daffSMikulas Patocka struct dm_snap_tracked_chunk { 180cd45daffSMikulas Patocka struct hlist_node node; 181cd45daffSMikulas Patocka chunk_t chunk; 182cd45daffSMikulas Patocka }; 183cd45daffSMikulas Patocka 184cd45daffSMikulas Patocka static struct kmem_cache *tracked_chunk_cache; 185cd45daffSMikulas Patocka 186cd45daffSMikulas Patocka static struct dm_snap_tracked_chunk *track_chunk(struct dm_snapshot *s, 187cd45daffSMikulas Patocka chunk_t chunk) 188cd45daffSMikulas Patocka { 189cd45daffSMikulas Patocka struct dm_snap_tracked_chunk *c = mempool_alloc(s->tracked_chunk_pool, 190cd45daffSMikulas Patocka GFP_NOIO); 191cd45daffSMikulas Patocka unsigned long flags; 192cd45daffSMikulas Patocka 193cd45daffSMikulas Patocka c->chunk = chunk; 194cd45daffSMikulas Patocka 195cd45daffSMikulas Patocka spin_lock_irqsave(&s->tracked_chunk_lock, flags); 196cd45daffSMikulas Patocka hlist_add_head(&c->node, 197cd45daffSMikulas Patocka &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)]); 198cd45daffSMikulas Patocka spin_unlock_irqrestore(&s->tracked_chunk_lock, flags); 199cd45daffSMikulas Patocka 200cd45daffSMikulas Patocka return c; 201cd45daffSMikulas Patocka } 202cd45daffSMikulas Patocka 203cd45daffSMikulas Patocka static void stop_tracking_chunk(struct dm_snapshot *s, 204cd45daffSMikulas Patocka struct dm_snap_tracked_chunk *c) 205cd45daffSMikulas Patocka { 206cd45daffSMikulas Patocka unsigned long flags; 207cd45daffSMikulas Patocka 208cd45daffSMikulas Patocka spin_lock_irqsave(&s->tracked_chunk_lock, flags); 209cd45daffSMikulas Patocka hlist_del(&c->node); 210cd45daffSMikulas Patocka spin_unlock_irqrestore(&s->tracked_chunk_lock, flags); 211cd45daffSMikulas Patocka 212cd45daffSMikulas Patocka mempool_free(c, s->tracked_chunk_pool); 213cd45daffSMikulas Patocka } 214cd45daffSMikulas Patocka 215a8d41b59SMikulas Patocka static int __chunk_is_tracked(struct dm_snapshot *s, chunk_t chunk) 216a8d41b59SMikulas Patocka { 217a8d41b59SMikulas Patocka struct dm_snap_tracked_chunk *c; 218a8d41b59SMikulas Patocka struct hlist_node *hn; 219a8d41b59SMikulas Patocka int found = 0; 220a8d41b59SMikulas Patocka 221a8d41b59SMikulas Patocka spin_lock_irq(&s->tracked_chunk_lock); 222a8d41b59SMikulas Patocka 223a8d41b59SMikulas Patocka hlist_for_each_entry(c, hn, 224a8d41b59SMikulas Patocka &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)], node) { 225a8d41b59SMikulas Patocka if (c->chunk == chunk) { 226a8d41b59SMikulas Patocka found = 1; 227a8d41b59SMikulas Patocka break; 228a8d41b59SMikulas Patocka } 229a8d41b59SMikulas Patocka } 230a8d41b59SMikulas Patocka 231a8d41b59SMikulas Patocka spin_unlock_irq(&s->tracked_chunk_lock); 232a8d41b59SMikulas Patocka 233a8d41b59SMikulas Patocka return found; 234a8d41b59SMikulas Patocka } 235a8d41b59SMikulas Patocka 2361da177e4SLinus Torvalds /* 2371da177e4SLinus Torvalds * One of these per registered origin, held in the snapshot_origins hash 2381da177e4SLinus Torvalds */ 2391da177e4SLinus Torvalds struct origin { 2401da177e4SLinus Torvalds /* The origin device */ 2411da177e4SLinus Torvalds struct block_device *bdev; 2421da177e4SLinus Torvalds 2431da177e4SLinus Torvalds struct list_head hash_list; 2441da177e4SLinus Torvalds 2451da177e4SLinus Torvalds /* List of snapshots for this origin */ 2461da177e4SLinus Torvalds struct list_head snapshots; 2471da177e4SLinus Torvalds }; 2481da177e4SLinus Torvalds 2491da177e4SLinus Torvalds /* 2501da177e4SLinus Torvalds * Size of the hash table for origin volumes. If we make this 2511da177e4SLinus Torvalds * the size of the minors list then it should be nearly perfect 2521da177e4SLinus Torvalds */ 2531da177e4SLinus Torvalds #define ORIGIN_HASH_SIZE 256 2541da177e4SLinus Torvalds #define ORIGIN_MASK 0xFF 2551da177e4SLinus Torvalds static struct list_head *_origins; 2561da177e4SLinus Torvalds static struct rw_semaphore _origins_lock; 2571da177e4SLinus Torvalds 2581da177e4SLinus Torvalds static int init_origin_hash(void) 2591da177e4SLinus Torvalds { 2601da177e4SLinus Torvalds int i; 2611da177e4SLinus Torvalds 2621da177e4SLinus Torvalds _origins = kmalloc(ORIGIN_HASH_SIZE * sizeof(struct list_head), 2631da177e4SLinus Torvalds GFP_KERNEL); 2641da177e4SLinus Torvalds if (!_origins) { 26572d94861SAlasdair G Kergon DMERR("unable to allocate memory"); 2661da177e4SLinus Torvalds return -ENOMEM; 2671da177e4SLinus Torvalds } 2681da177e4SLinus Torvalds 2691da177e4SLinus Torvalds for (i = 0; i < ORIGIN_HASH_SIZE; i++) 2701da177e4SLinus Torvalds INIT_LIST_HEAD(_origins + i); 2711da177e4SLinus Torvalds init_rwsem(&_origins_lock); 2721da177e4SLinus Torvalds 2731da177e4SLinus Torvalds return 0; 2741da177e4SLinus Torvalds } 2751da177e4SLinus Torvalds 2761da177e4SLinus Torvalds static void exit_origin_hash(void) 2771da177e4SLinus Torvalds { 2781da177e4SLinus Torvalds kfree(_origins); 2791da177e4SLinus Torvalds } 2801da177e4SLinus Torvalds 281028867acSAlasdair G Kergon static unsigned origin_hash(struct block_device *bdev) 2821da177e4SLinus Torvalds { 2831da177e4SLinus Torvalds return bdev->bd_dev & ORIGIN_MASK; 2841da177e4SLinus Torvalds } 2851da177e4SLinus Torvalds 2861da177e4SLinus Torvalds static struct origin *__lookup_origin(struct block_device *origin) 2871da177e4SLinus Torvalds { 2881da177e4SLinus Torvalds struct list_head *ol; 2891da177e4SLinus Torvalds struct origin *o; 2901da177e4SLinus Torvalds 2911da177e4SLinus Torvalds ol = &_origins[origin_hash(origin)]; 2921da177e4SLinus Torvalds list_for_each_entry (o, ol, hash_list) 2931da177e4SLinus Torvalds if (bdev_equal(o->bdev, origin)) 2941da177e4SLinus Torvalds return o; 2951da177e4SLinus Torvalds 2961da177e4SLinus Torvalds return NULL; 2971da177e4SLinus Torvalds } 2981da177e4SLinus Torvalds 2991da177e4SLinus Torvalds static void __insert_origin(struct origin *o) 3001da177e4SLinus Torvalds { 3011da177e4SLinus Torvalds struct list_head *sl = &_origins[origin_hash(o->bdev)]; 3021da177e4SLinus Torvalds list_add_tail(&o->hash_list, sl); 3031da177e4SLinus Torvalds } 3041da177e4SLinus Torvalds 3051da177e4SLinus Torvalds /* 306c1f0c183SMike Snitzer * _origins_lock must be held when calling this function. 307c1f0c183SMike Snitzer * Returns number of snapshots registered using the supplied cow device, plus: 308c1f0c183SMike Snitzer * snap_src - a snapshot suitable for use as a source of exception handover 309c1f0c183SMike Snitzer * snap_dest - a snapshot capable of receiving exception handover. 310c1f0c183SMike Snitzer * 311c1f0c183SMike Snitzer * Possible return values and states: 312c1f0c183SMike Snitzer * 0: NULL, NULL - first new snapshot 313c1f0c183SMike Snitzer * 1: snap_src, NULL - normal snapshot 314c1f0c183SMike Snitzer * 2: snap_src, snap_dest - waiting for handover 315c1f0c183SMike Snitzer * 2: snap_src, NULL - handed over, waiting for old to be deleted 316c1f0c183SMike Snitzer * 1: NULL, snap_dest - source got destroyed without handover 317c1f0c183SMike Snitzer */ 318c1f0c183SMike Snitzer static int __find_snapshots_sharing_cow(struct dm_snapshot *snap, 319c1f0c183SMike Snitzer struct dm_snapshot **snap_src, 320c1f0c183SMike Snitzer struct dm_snapshot **snap_dest) 321c1f0c183SMike Snitzer { 322c1f0c183SMike Snitzer struct dm_snapshot *s; 323c1f0c183SMike Snitzer struct origin *o; 324c1f0c183SMike Snitzer int count = 0; 325c1f0c183SMike Snitzer int active; 326c1f0c183SMike Snitzer 327c1f0c183SMike Snitzer o = __lookup_origin(snap->origin->bdev); 328c1f0c183SMike Snitzer if (!o) 329c1f0c183SMike Snitzer goto out; 330c1f0c183SMike Snitzer 331c1f0c183SMike Snitzer list_for_each_entry(s, &o->snapshots, list) { 332c1f0c183SMike Snitzer if (!bdev_equal(s->cow->bdev, snap->cow->bdev)) 333c1f0c183SMike Snitzer continue; 334c1f0c183SMike Snitzer 335c1f0c183SMike Snitzer down_read(&s->lock); 336c1f0c183SMike Snitzer active = s->active; 337c1f0c183SMike Snitzer up_read(&s->lock); 338c1f0c183SMike Snitzer 339c1f0c183SMike Snitzer if (active) { 340c1f0c183SMike Snitzer if (snap_src) 341c1f0c183SMike Snitzer *snap_src = s; 342c1f0c183SMike Snitzer } else if (snap_dest) 343c1f0c183SMike Snitzer *snap_dest = s; 344c1f0c183SMike Snitzer 345c1f0c183SMike Snitzer count++; 346c1f0c183SMike Snitzer } 347c1f0c183SMike Snitzer 348c1f0c183SMike Snitzer out: 349c1f0c183SMike Snitzer return count; 350c1f0c183SMike Snitzer } 351c1f0c183SMike Snitzer 352c1f0c183SMike Snitzer /* 353c1f0c183SMike Snitzer * On success, returns 1 if this snapshot is a handover destination, 354c1f0c183SMike Snitzer * otherwise returns 0. 355c1f0c183SMike Snitzer */ 356c1f0c183SMike Snitzer static int __validate_exception_handover(struct dm_snapshot *snap) 357c1f0c183SMike Snitzer { 358c1f0c183SMike Snitzer struct dm_snapshot *snap_src = NULL, *snap_dest = NULL; 359c1f0c183SMike Snitzer 360c1f0c183SMike Snitzer /* Does snapshot need exceptions handed over to it? */ 361c1f0c183SMike Snitzer if ((__find_snapshots_sharing_cow(snap, &snap_src, &snap_dest) == 2) || 362c1f0c183SMike Snitzer snap_dest) { 363c1f0c183SMike Snitzer snap->ti->error = "Snapshot cow pairing for exception " 364c1f0c183SMike Snitzer "table handover failed"; 365c1f0c183SMike Snitzer return -EINVAL; 366c1f0c183SMike Snitzer } 367c1f0c183SMike Snitzer 368c1f0c183SMike Snitzer /* 369c1f0c183SMike Snitzer * If no snap_src was found, snap cannot become a handover 370c1f0c183SMike Snitzer * destination. 371c1f0c183SMike Snitzer */ 372c1f0c183SMike Snitzer if (!snap_src) 373c1f0c183SMike Snitzer return 0; 374c1f0c183SMike Snitzer 375c1f0c183SMike Snitzer return 1; 376c1f0c183SMike Snitzer } 377c1f0c183SMike Snitzer 378c1f0c183SMike Snitzer static void __insert_snapshot(struct origin *o, struct dm_snapshot *s) 379c1f0c183SMike Snitzer { 380c1f0c183SMike Snitzer struct dm_snapshot *l; 381c1f0c183SMike Snitzer 382c1f0c183SMike Snitzer /* Sort the list according to chunk size, largest-first smallest-last */ 383c1f0c183SMike Snitzer list_for_each_entry(l, &o->snapshots, list) 384c1f0c183SMike Snitzer if (l->store->chunk_size < s->store->chunk_size) 385c1f0c183SMike Snitzer break; 386c1f0c183SMike Snitzer list_add_tail(&s->list, &l->list); 387c1f0c183SMike Snitzer } 388c1f0c183SMike Snitzer 389c1f0c183SMike Snitzer /* 3901da177e4SLinus Torvalds * Make a note of the snapshot and its origin so we can look it 3911da177e4SLinus Torvalds * up when the origin has a write on it. 392c1f0c183SMike Snitzer * 393c1f0c183SMike Snitzer * Also validate snapshot exception store handovers. 394c1f0c183SMike Snitzer * On success, returns 1 if this registration is a handover destination, 395c1f0c183SMike Snitzer * otherwise returns 0. 3961da177e4SLinus Torvalds */ 3971da177e4SLinus Torvalds static int register_snapshot(struct dm_snapshot *snap) 3981da177e4SLinus Torvalds { 399c1f0c183SMike Snitzer struct origin *o, *new_o = NULL; 4001da177e4SLinus Torvalds struct block_device *bdev = snap->origin->bdev; 401c1f0c183SMike Snitzer int r = 0; 4021da177e4SLinus Torvalds 40360c856c8SMikulas Patocka new_o = kmalloc(sizeof(*new_o), GFP_KERNEL); 40460c856c8SMikulas Patocka if (!new_o) 40560c856c8SMikulas Patocka return -ENOMEM; 40660c856c8SMikulas Patocka 4071da177e4SLinus Torvalds down_write(&_origins_lock); 4081da177e4SLinus Torvalds 409c1f0c183SMike Snitzer r = __validate_exception_handover(snap); 410c1f0c183SMike Snitzer if (r < 0) { 411c1f0c183SMike Snitzer kfree(new_o); 412c1f0c183SMike Snitzer goto out; 413c1f0c183SMike Snitzer } 414c1f0c183SMike Snitzer 415c1f0c183SMike Snitzer o = __lookup_origin(bdev); 41660c856c8SMikulas Patocka if (o) 41760c856c8SMikulas Patocka kfree(new_o); 41860c856c8SMikulas Patocka else { 4191da177e4SLinus Torvalds /* New origin */ 42060c856c8SMikulas Patocka o = new_o; 4211da177e4SLinus Torvalds 4221da177e4SLinus Torvalds /* Initialise the struct */ 4231da177e4SLinus Torvalds INIT_LIST_HEAD(&o->snapshots); 4241da177e4SLinus Torvalds o->bdev = bdev; 4251da177e4SLinus Torvalds 4261da177e4SLinus Torvalds __insert_origin(o); 4271da177e4SLinus Torvalds } 4281da177e4SLinus Torvalds 429c1f0c183SMike Snitzer __insert_snapshot(o, snap); 430c1f0c183SMike Snitzer 431c1f0c183SMike Snitzer out: 432c1f0c183SMike Snitzer up_write(&_origins_lock); 433c1f0c183SMike Snitzer 434c1f0c183SMike Snitzer return r; 435c1f0c183SMike Snitzer } 436c1f0c183SMike Snitzer 437c1f0c183SMike Snitzer /* 438c1f0c183SMike Snitzer * Move snapshot to correct place in list according to chunk size. 439c1f0c183SMike Snitzer */ 440c1f0c183SMike Snitzer static void reregister_snapshot(struct dm_snapshot *s) 441c1f0c183SMike Snitzer { 442c1f0c183SMike Snitzer struct block_device *bdev = s->origin->bdev; 443c1f0c183SMike Snitzer 444c1f0c183SMike Snitzer down_write(&_origins_lock); 445c1f0c183SMike Snitzer 446c1f0c183SMike Snitzer list_del(&s->list); 447c1f0c183SMike Snitzer __insert_snapshot(__lookup_origin(bdev), s); 4481da177e4SLinus Torvalds 4491da177e4SLinus Torvalds up_write(&_origins_lock); 4501da177e4SLinus Torvalds } 4511da177e4SLinus Torvalds 4521da177e4SLinus Torvalds static void unregister_snapshot(struct dm_snapshot *s) 4531da177e4SLinus Torvalds { 4541da177e4SLinus Torvalds struct origin *o; 4551da177e4SLinus Torvalds 4561da177e4SLinus Torvalds down_write(&_origins_lock); 4571da177e4SLinus Torvalds o = __lookup_origin(s->origin->bdev); 4581da177e4SLinus Torvalds 4591da177e4SLinus Torvalds list_del(&s->list); 460c1f0c183SMike Snitzer if (o && list_empty(&o->snapshots)) { 4611da177e4SLinus Torvalds list_del(&o->hash_list); 4621da177e4SLinus Torvalds kfree(o); 4631da177e4SLinus Torvalds } 4641da177e4SLinus Torvalds 4651da177e4SLinus Torvalds up_write(&_origins_lock); 4661da177e4SLinus Torvalds } 4671da177e4SLinus Torvalds 4681da177e4SLinus Torvalds /* 4691da177e4SLinus Torvalds * Implementation of the exception hash tables. 470d74f81f8SMilan Broz * The lowest hash_shift bits of the chunk number are ignored, allowing 471d74f81f8SMilan Broz * some consecutive chunks to be grouped together. 4721da177e4SLinus Torvalds */ 4733510cb94SJon Brassow static int dm_exception_table_init(struct dm_exception_table *et, 4743510cb94SJon Brassow uint32_t size, unsigned hash_shift) 4751da177e4SLinus Torvalds { 4761da177e4SLinus Torvalds unsigned int i; 4771da177e4SLinus Torvalds 478d74f81f8SMilan Broz et->hash_shift = hash_shift; 4791da177e4SLinus Torvalds et->hash_mask = size - 1; 4801da177e4SLinus Torvalds et->table = dm_vcalloc(size, sizeof(struct list_head)); 4811da177e4SLinus Torvalds if (!et->table) 4821da177e4SLinus Torvalds return -ENOMEM; 4831da177e4SLinus Torvalds 4841da177e4SLinus Torvalds for (i = 0; i < size; i++) 4851da177e4SLinus Torvalds INIT_LIST_HEAD(et->table + i); 4861da177e4SLinus Torvalds 4871da177e4SLinus Torvalds return 0; 4881da177e4SLinus Torvalds } 4891da177e4SLinus Torvalds 4903510cb94SJon Brassow static void dm_exception_table_exit(struct dm_exception_table *et, 491191437a5SJon Brassow struct kmem_cache *mem) 4921da177e4SLinus Torvalds { 4931da177e4SLinus Torvalds struct list_head *slot; 4941d4989c8SJon Brassow struct dm_exception *ex, *next; 4951da177e4SLinus Torvalds int i, size; 4961da177e4SLinus Torvalds 4971da177e4SLinus Torvalds size = et->hash_mask + 1; 4981da177e4SLinus Torvalds for (i = 0; i < size; i++) { 4991da177e4SLinus Torvalds slot = et->table + i; 5001da177e4SLinus Torvalds 5011da177e4SLinus Torvalds list_for_each_entry_safe (ex, next, slot, hash_list) 5021da177e4SLinus Torvalds kmem_cache_free(mem, ex); 5031da177e4SLinus Torvalds } 5041da177e4SLinus Torvalds 5051da177e4SLinus Torvalds vfree(et->table); 5061da177e4SLinus Torvalds } 5071da177e4SLinus Torvalds 508191437a5SJon Brassow static uint32_t exception_hash(struct dm_exception_table *et, chunk_t chunk) 5091da177e4SLinus Torvalds { 510d74f81f8SMilan Broz return (chunk >> et->hash_shift) & et->hash_mask; 5111da177e4SLinus Torvalds } 5121da177e4SLinus Torvalds 5133510cb94SJon Brassow static void dm_remove_exception(struct dm_exception *e) 5141da177e4SLinus Torvalds { 5151da177e4SLinus Torvalds list_del(&e->hash_list); 5161da177e4SLinus Torvalds } 5171da177e4SLinus Torvalds 5181da177e4SLinus Torvalds /* 5191da177e4SLinus Torvalds * Return the exception data for a sector, or NULL if not 5201da177e4SLinus Torvalds * remapped. 5211da177e4SLinus Torvalds */ 5223510cb94SJon Brassow static struct dm_exception *dm_lookup_exception(struct dm_exception_table *et, 5231da177e4SLinus Torvalds chunk_t chunk) 5241da177e4SLinus Torvalds { 5251da177e4SLinus Torvalds struct list_head *slot; 5261d4989c8SJon Brassow struct dm_exception *e; 5271da177e4SLinus Torvalds 5281da177e4SLinus Torvalds slot = &et->table[exception_hash(et, chunk)]; 5291da177e4SLinus Torvalds list_for_each_entry (e, slot, hash_list) 530d74f81f8SMilan Broz if (chunk >= e->old_chunk && 531d74f81f8SMilan Broz chunk <= e->old_chunk + dm_consecutive_chunk_count(e)) 5321da177e4SLinus Torvalds return e; 5331da177e4SLinus Torvalds 5341da177e4SLinus Torvalds return NULL; 5351da177e4SLinus Torvalds } 5361da177e4SLinus Torvalds 5373510cb94SJon Brassow static struct dm_exception *alloc_completed_exception(void) 5381da177e4SLinus Torvalds { 5391d4989c8SJon Brassow struct dm_exception *e; 5401da177e4SLinus Torvalds 5411da177e4SLinus Torvalds e = kmem_cache_alloc(exception_cache, GFP_NOIO); 5421da177e4SLinus Torvalds if (!e) 5431da177e4SLinus Torvalds e = kmem_cache_alloc(exception_cache, GFP_ATOMIC); 5441da177e4SLinus Torvalds 5451da177e4SLinus Torvalds return e; 5461da177e4SLinus Torvalds } 5471da177e4SLinus Torvalds 5483510cb94SJon Brassow static void free_completed_exception(struct dm_exception *e) 5491da177e4SLinus Torvalds { 5501da177e4SLinus Torvalds kmem_cache_free(exception_cache, e); 5511da177e4SLinus Torvalds } 5521da177e4SLinus Torvalds 55392e86812SMikulas Patocka static struct dm_snap_pending_exception *alloc_pending_exception(struct dm_snapshot *s) 5541da177e4SLinus Torvalds { 55592e86812SMikulas Patocka struct dm_snap_pending_exception *pe = mempool_alloc(s->pending_pool, 55692e86812SMikulas Patocka GFP_NOIO); 55792e86812SMikulas Patocka 558879129d2SMikulas Patocka atomic_inc(&s->pending_exceptions_count); 55992e86812SMikulas Patocka pe->snap = s; 56092e86812SMikulas Patocka 56192e86812SMikulas Patocka return pe; 5621da177e4SLinus Torvalds } 5631da177e4SLinus Torvalds 564028867acSAlasdair G Kergon static void free_pending_exception(struct dm_snap_pending_exception *pe) 5651da177e4SLinus Torvalds { 566879129d2SMikulas Patocka struct dm_snapshot *s = pe->snap; 567879129d2SMikulas Patocka 568879129d2SMikulas Patocka mempool_free(pe, s->pending_pool); 569879129d2SMikulas Patocka smp_mb__before_atomic_dec(); 570879129d2SMikulas Patocka atomic_dec(&s->pending_exceptions_count); 5711da177e4SLinus Torvalds } 5721da177e4SLinus Torvalds 5733510cb94SJon Brassow static void dm_insert_exception(struct dm_exception_table *eh, 5741d4989c8SJon Brassow struct dm_exception *new_e) 575d74f81f8SMilan Broz { 576d74f81f8SMilan Broz struct list_head *l; 5771d4989c8SJon Brassow struct dm_exception *e = NULL; 578d74f81f8SMilan Broz 579d74f81f8SMilan Broz l = &eh->table[exception_hash(eh, new_e->old_chunk)]; 580d74f81f8SMilan Broz 581d74f81f8SMilan Broz /* Add immediately if this table doesn't support consecutive chunks */ 582d74f81f8SMilan Broz if (!eh->hash_shift) 583d74f81f8SMilan Broz goto out; 584d74f81f8SMilan Broz 585d74f81f8SMilan Broz /* List is ordered by old_chunk */ 586d74f81f8SMilan Broz list_for_each_entry_reverse(e, l, hash_list) { 587d74f81f8SMilan Broz /* Insert after an existing chunk? */ 588d74f81f8SMilan Broz if (new_e->old_chunk == (e->old_chunk + 589d74f81f8SMilan Broz dm_consecutive_chunk_count(e) + 1) && 590d74f81f8SMilan Broz new_e->new_chunk == (dm_chunk_number(e->new_chunk) + 591d74f81f8SMilan Broz dm_consecutive_chunk_count(e) + 1)) { 592d74f81f8SMilan Broz dm_consecutive_chunk_count_inc(e); 5933510cb94SJon Brassow free_completed_exception(new_e); 594d74f81f8SMilan Broz return; 595d74f81f8SMilan Broz } 596d74f81f8SMilan Broz 597d74f81f8SMilan Broz /* Insert before an existing chunk? */ 598d74f81f8SMilan Broz if (new_e->old_chunk == (e->old_chunk - 1) && 599d74f81f8SMilan Broz new_e->new_chunk == (dm_chunk_number(e->new_chunk) - 1)) { 600d74f81f8SMilan Broz dm_consecutive_chunk_count_inc(e); 601d74f81f8SMilan Broz e->old_chunk--; 602d74f81f8SMilan Broz e->new_chunk--; 6033510cb94SJon Brassow free_completed_exception(new_e); 604d74f81f8SMilan Broz return; 605d74f81f8SMilan Broz } 606d74f81f8SMilan Broz 607d74f81f8SMilan Broz if (new_e->old_chunk > e->old_chunk) 608d74f81f8SMilan Broz break; 609d74f81f8SMilan Broz } 610d74f81f8SMilan Broz 611d74f81f8SMilan Broz out: 612d74f81f8SMilan Broz list_add(&new_e->hash_list, e ? &e->hash_list : l); 613d74f81f8SMilan Broz } 614d74f81f8SMilan Broz 615a159c1acSJonathan Brassow /* 616a159c1acSJonathan Brassow * Callback used by the exception stores to load exceptions when 617a159c1acSJonathan Brassow * initialising. 618a159c1acSJonathan Brassow */ 619a159c1acSJonathan Brassow static int dm_add_exception(void *context, chunk_t old, chunk_t new) 6201da177e4SLinus Torvalds { 621a159c1acSJonathan Brassow struct dm_snapshot *s = context; 6221d4989c8SJon Brassow struct dm_exception *e; 6231da177e4SLinus Torvalds 6243510cb94SJon Brassow e = alloc_completed_exception(); 6251da177e4SLinus Torvalds if (!e) 6261da177e4SLinus Torvalds return -ENOMEM; 6271da177e4SLinus Torvalds 6281da177e4SLinus Torvalds e->old_chunk = old; 629d74f81f8SMilan Broz 630d74f81f8SMilan Broz /* Consecutive_count is implicitly initialised to zero */ 6311da177e4SLinus Torvalds e->new_chunk = new; 632d74f81f8SMilan Broz 6333510cb94SJon Brassow dm_insert_exception(&s->complete, e); 634d74f81f8SMilan Broz 6351da177e4SLinus Torvalds return 0; 6361da177e4SLinus Torvalds } 6371da177e4SLinus Torvalds 6387e201b35SMikulas Patocka #define min_not_zero(l, r) (((l) == 0) ? (r) : (((r) == 0) ? (l) : min(l, r))) 6397e201b35SMikulas Patocka 6407e201b35SMikulas Patocka /* 6417e201b35SMikulas Patocka * Return a minimum chunk size of all snapshots that have the specified origin. 6427e201b35SMikulas Patocka * Return zero if the origin has no snapshots. 6437e201b35SMikulas Patocka */ 6447e201b35SMikulas Patocka static sector_t __minimum_chunk_size(struct origin *o) 6457e201b35SMikulas Patocka { 6467e201b35SMikulas Patocka struct dm_snapshot *snap; 6477e201b35SMikulas Patocka unsigned chunk_size = 0; 6487e201b35SMikulas Patocka 6497e201b35SMikulas Patocka if (o) 6507e201b35SMikulas Patocka list_for_each_entry(snap, &o->snapshots, list) 6517e201b35SMikulas Patocka chunk_size = min_not_zero(chunk_size, 6527e201b35SMikulas Patocka snap->store->chunk_size); 6537e201b35SMikulas Patocka 6547e201b35SMikulas Patocka return chunk_size; 6557e201b35SMikulas Patocka } 6567e201b35SMikulas Patocka 6571da177e4SLinus Torvalds /* 6581da177e4SLinus Torvalds * Hard coded magic. 6591da177e4SLinus Torvalds */ 6601da177e4SLinus Torvalds static int calc_max_buckets(void) 6611da177e4SLinus Torvalds { 6621da177e4SLinus Torvalds /* use a fixed size of 2MB */ 6631da177e4SLinus Torvalds unsigned long mem = 2 * 1024 * 1024; 6641da177e4SLinus Torvalds mem /= sizeof(struct list_head); 6651da177e4SLinus Torvalds 6661da177e4SLinus Torvalds return mem; 6671da177e4SLinus Torvalds } 6681da177e4SLinus Torvalds 6691da177e4SLinus Torvalds /* 6701da177e4SLinus Torvalds * Allocate room for a suitable hash table. 6711da177e4SLinus Torvalds */ 672fee1998eSJonathan Brassow static int init_hash_tables(struct dm_snapshot *s) 6731da177e4SLinus Torvalds { 6741da177e4SLinus Torvalds sector_t hash_size, cow_dev_size, origin_dev_size, max_buckets; 6751da177e4SLinus Torvalds 6761da177e4SLinus Torvalds /* 6771da177e4SLinus Torvalds * Calculate based on the size of the original volume or 6781da177e4SLinus Torvalds * the COW volume... 6791da177e4SLinus Torvalds */ 680fc56f6fbSMike Snitzer cow_dev_size = get_dev_size(s->cow->bdev); 6811da177e4SLinus Torvalds origin_dev_size = get_dev_size(s->origin->bdev); 6821da177e4SLinus Torvalds max_buckets = calc_max_buckets(); 6831da177e4SLinus Torvalds 684fee1998eSJonathan Brassow hash_size = min(origin_dev_size, cow_dev_size) >> s->store->chunk_shift; 6851da177e4SLinus Torvalds hash_size = min(hash_size, max_buckets); 6861da177e4SLinus Torvalds 6878e87b9b8SMikulas Patocka if (hash_size < 64) 6888e87b9b8SMikulas Patocka hash_size = 64; 6898defd830SRobert P. J. Day hash_size = rounddown_pow_of_two(hash_size); 6903510cb94SJon Brassow if (dm_exception_table_init(&s->complete, hash_size, 691d74f81f8SMilan Broz DM_CHUNK_CONSECUTIVE_BITS)) 6921da177e4SLinus Torvalds return -ENOMEM; 6931da177e4SLinus Torvalds 6941da177e4SLinus Torvalds /* 6951da177e4SLinus Torvalds * Allocate hash table for in-flight exceptions 6961da177e4SLinus Torvalds * Make this smaller than the real hash table 6971da177e4SLinus Torvalds */ 6981da177e4SLinus Torvalds hash_size >>= 3; 6991da177e4SLinus Torvalds if (hash_size < 64) 7001da177e4SLinus Torvalds hash_size = 64; 7011da177e4SLinus Torvalds 7023510cb94SJon Brassow if (dm_exception_table_init(&s->pending, hash_size, 0)) { 7033510cb94SJon Brassow dm_exception_table_exit(&s->complete, exception_cache); 7041da177e4SLinus Torvalds return -ENOMEM; 7051da177e4SLinus Torvalds } 7061da177e4SLinus Torvalds 7071da177e4SLinus Torvalds return 0; 7081da177e4SLinus Torvalds } 7091da177e4SLinus Torvalds 7101da177e4SLinus Torvalds /* 7111da177e4SLinus Torvalds * Construct a snapshot mapping: <origin_dev> <COW-dev> <p/n> <chunk-size> 7121da177e4SLinus Torvalds */ 7131da177e4SLinus Torvalds static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) 7141da177e4SLinus Torvalds { 7151da177e4SLinus Torvalds struct dm_snapshot *s; 716cd45daffSMikulas Patocka int i; 7171da177e4SLinus Torvalds int r = -EINVAL; 718fc56f6fbSMike Snitzer char *origin_path, *cow_path; 719fee1998eSJonathan Brassow unsigned args_used; 7201da177e4SLinus Torvalds 7214c7e3bf4SMark McLoughlin if (argc != 4) { 72272d94861SAlasdair G Kergon ti->error = "requires exactly 4 arguments"; 7231da177e4SLinus Torvalds r = -EINVAL; 724fc56f6fbSMike Snitzer goto bad; 7251da177e4SLinus Torvalds } 7261da177e4SLinus Torvalds 7271da177e4SLinus Torvalds origin_path = argv[0]; 728fee1998eSJonathan Brassow argv++; 729fee1998eSJonathan Brassow argc--; 7301da177e4SLinus Torvalds 7311da177e4SLinus Torvalds s = kmalloc(sizeof(*s), GFP_KERNEL); 732fee1998eSJonathan Brassow if (!s) { 7331da177e4SLinus Torvalds ti->error = "Cannot allocate snapshot context private " 7341da177e4SLinus Torvalds "structure"; 7351da177e4SLinus Torvalds r = -ENOMEM; 736fc56f6fbSMike Snitzer goto bad; 7371da177e4SLinus Torvalds } 7381da177e4SLinus Torvalds 739fc56f6fbSMike Snitzer cow_path = argv[0]; 740fc56f6fbSMike Snitzer argv++; 741fc56f6fbSMike Snitzer argc--; 742fc56f6fbSMike Snitzer 743fc56f6fbSMike Snitzer r = dm_get_device(ti, cow_path, 0, 0, 744fc56f6fbSMike Snitzer FMODE_READ | FMODE_WRITE, &s->cow); 745fc56f6fbSMike Snitzer if (r) { 746fc56f6fbSMike Snitzer ti->error = "Cannot get COW device"; 747fc56f6fbSMike Snitzer goto bad_cow; 748fc56f6fbSMike Snitzer } 749fc56f6fbSMike Snitzer 750fc56f6fbSMike Snitzer r = dm_exception_store_create(ti, argc, argv, s, &args_used, &s->store); 751fc56f6fbSMike Snitzer if (r) { 752fc56f6fbSMike Snitzer ti->error = "Couldn't create exception store"; 753fc56f6fbSMike Snitzer r = -EINVAL; 754fc56f6fbSMike Snitzer goto bad_store; 755fc56f6fbSMike Snitzer } 756fc56f6fbSMike Snitzer 757fc56f6fbSMike Snitzer argv += args_used; 758fc56f6fbSMike Snitzer argc -= args_used; 759fc56f6fbSMike Snitzer 7601da177e4SLinus Torvalds r = dm_get_device(ti, origin_path, 0, ti->len, FMODE_READ, &s->origin); 7611da177e4SLinus Torvalds if (r) { 7621da177e4SLinus Torvalds ti->error = "Cannot get origin device"; 763fee1998eSJonathan Brassow goto bad_origin; 7641da177e4SLinus Torvalds } 7651da177e4SLinus Torvalds 766fc56f6fbSMike Snitzer s->ti = ti; 7671da177e4SLinus Torvalds s->valid = 1; 768aa14edebSAlasdair G Kergon s->active = 0; 769c26655caSMike Snitzer s->suspended = 0; 770879129d2SMikulas Patocka atomic_set(&s->pending_exceptions_count, 0); 7711da177e4SLinus Torvalds init_rwsem(&s->lock); 772c1f0c183SMike Snitzer INIT_LIST_HEAD(&s->list); 773ca3a931fSAlasdair G Kergon spin_lock_init(&s->pe_lock); 7741da177e4SLinus Torvalds 7751da177e4SLinus Torvalds /* Allocate hash table for COW data */ 776fee1998eSJonathan Brassow if (init_hash_tables(s)) { 7771da177e4SLinus Torvalds ti->error = "Unable to allocate hash table space"; 7781da177e4SLinus Torvalds r = -ENOMEM; 779fee1998eSJonathan Brassow goto bad_hash_tables; 7801da177e4SLinus Torvalds } 7811da177e4SLinus Torvalds 782eb69aca5SHeinz Mauelshagen r = dm_kcopyd_client_create(SNAPSHOT_PAGES, &s->kcopyd_client); 7831da177e4SLinus Torvalds if (r) { 7841da177e4SLinus Torvalds ti->error = "Could not create kcopyd client"; 785fee1998eSJonathan Brassow goto bad_kcopyd; 7861da177e4SLinus Torvalds } 7871da177e4SLinus Torvalds 78892e86812SMikulas Patocka s->pending_pool = mempool_create_slab_pool(MIN_IOS, pending_cache); 78992e86812SMikulas Patocka if (!s->pending_pool) { 79092e86812SMikulas Patocka ti->error = "Could not allocate mempool for pending exceptions"; 791fee1998eSJonathan Brassow goto bad_pending_pool; 79292e86812SMikulas Patocka } 79392e86812SMikulas Patocka 794cd45daffSMikulas Patocka s->tracked_chunk_pool = mempool_create_slab_pool(MIN_IOS, 795cd45daffSMikulas Patocka tracked_chunk_cache); 796cd45daffSMikulas Patocka if (!s->tracked_chunk_pool) { 797cd45daffSMikulas Patocka ti->error = "Could not allocate tracked_chunk mempool for " 798cd45daffSMikulas Patocka "tracking reads"; 79992e86812SMikulas Patocka goto bad_tracked_chunk_pool; 800cd45daffSMikulas Patocka } 801cd45daffSMikulas Patocka 802cd45daffSMikulas Patocka for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++) 803cd45daffSMikulas Patocka INIT_HLIST_HEAD(&s->tracked_chunk_hash[i]); 804cd45daffSMikulas Patocka 805cd45daffSMikulas Patocka spin_lock_init(&s->tracked_chunk_lock); 806cd45daffSMikulas Patocka 807c1f0c183SMike Snitzer bio_list_init(&s->queued_bios); 808c1f0c183SMike Snitzer INIT_WORK(&s->queued_bios_work, flush_queued_bios); 809c1f0c183SMike Snitzer 810c1f0c183SMike Snitzer ti->private = s; 811c1f0c183SMike Snitzer ti->num_flush_requests = 1; 812c1f0c183SMike Snitzer 813c1f0c183SMike Snitzer /* Add snapshot to the list of snapshots for this origin */ 814c1f0c183SMike Snitzer /* Exceptions aren't triggered till snapshot_resume() is called */ 815c1f0c183SMike Snitzer r = register_snapshot(s); 816c1f0c183SMike Snitzer if (r == -ENOMEM) { 817c1f0c183SMike Snitzer ti->error = "Snapshot origin struct allocation failed"; 818c1f0c183SMike Snitzer goto bad_load_and_register; 819c1f0c183SMike Snitzer } else if (r < 0) { 820c1f0c183SMike Snitzer /* invalid handover, register_snapshot has set ti->error */ 821c1f0c183SMike Snitzer goto bad_load_and_register; 822c1f0c183SMike Snitzer } 823c1f0c183SMike Snitzer 824c1f0c183SMike Snitzer /* 825c1f0c183SMike Snitzer * Metadata must only be loaded into one table at once, so skip this 826c1f0c183SMike Snitzer * if metadata will be handed over during resume. 827c1f0c183SMike Snitzer * Chunk size will be set during the handover - set it to zero to 828c1f0c183SMike Snitzer * ensure it's ignored. 829c1f0c183SMike Snitzer */ 830c1f0c183SMike Snitzer if (r > 0) { 831c1f0c183SMike Snitzer s->store->chunk_size = 0; 832c1f0c183SMike Snitzer return 0; 833c1f0c183SMike Snitzer } 834c1f0c183SMike Snitzer 835493df71cSJonathan Brassow r = s->store->type->read_metadata(s->store, dm_add_exception, 836493df71cSJonathan Brassow (void *)s); 8370764147bSMilan Broz if (r < 0) { 838f9cea4f7SMark McLoughlin ti->error = "Failed to read snapshot metadata"; 839c1f0c183SMike Snitzer goto bad_read_metadata; 8400764147bSMilan Broz } else if (r > 0) { 8410764147bSMilan Broz s->valid = 0; 8420764147bSMilan Broz DMWARN("Snapshot is marked invalid."); 843f9cea4f7SMark McLoughlin } 844aa14edebSAlasdair G Kergon 8453f2412dcSMikulas Patocka if (!s->store->chunk_size) { 8463f2412dcSMikulas Patocka ti->error = "Chunk size not set"; 847c1f0c183SMike Snitzer goto bad_read_metadata; 8483f2412dcSMikulas Patocka } 849d0216849SJonathan Brassow ti->split_io = s->store->chunk_size; 8501da177e4SLinus Torvalds 8511da177e4SLinus Torvalds return 0; 8521da177e4SLinus Torvalds 853c1f0c183SMike Snitzer bad_read_metadata: 854c1f0c183SMike Snitzer unregister_snapshot(s); 855c1f0c183SMike Snitzer 856cd45daffSMikulas Patocka bad_load_and_register: 857cd45daffSMikulas Patocka mempool_destroy(s->tracked_chunk_pool); 858cd45daffSMikulas Patocka 85992e86812SMikulas Patocka bad_tracked_chunk_pool: 86092e86812SMikulas Patocka mempool_destroy(s->pending_pool); 86192e86812SMikulas Patocka 862fee1998eSJonathan Brassow bad_pending_pool: 863eb69aca5SHeinz Mauelshagen dm_kcopyd_client_destroy(s->kcopyd_client); 8641da177e4SLinus Torvalds 865fee1998eSJonathan Brassow bad_kcopyd: 8663510cb94SJon Brassow dm_exception_table_exit(&s->pending, pending_cache); 8673510cb94SJon Brassow dm_exception_table_exit(&s->complete, exception_cache); 8681da177e4SLinus Torvalds 869fee1998eSJonathan Brassow bad_hash_tables: 8701da177e4SLinus Torvalds dm_put_device(ti, s->origin); 8711da177e4SLinus Torvalds 872fee1998eSJonathan Brassow bad_origin: 873fc56f6fbSMike Snitzer dm_exception_store_destroy(s->store); 874fc56f6fbSMike Snitzer 875fc56f6fbSMike Snitzer bad_store: 876fc56f6fbSMike Snitzer dm_put_device(ti, s->cow); 877fc56f6fbSMike Snitzer 878fc56f6fbSMike Snitzer bad_cow: 8791da177e4SLinus Torvalds kfree(s); 8801da177e4SLinus Torvalds 881fc56f6fbSMike Snitzer bad: 8821da177e4SLinus Torvalds return r; 8831da177e4SLinus Torvalds } 8841da177e4SLinus Torvalds 88531c93a0cSMilan Broz static void __free_exceptions(struct dm_snapshot *s) 88631c93a0cSMilan Broz { 887eb69aca5SHeinz Mauelshagen dm_kcopyd_client_destroy(s->kcopyd_client); 88831c93a0cSMilan Broz s->kcopyd_client = NULL; 88931c93a0cSMilan Broz 8903510cb94SJon Brassow dm_exception_table_exit(&s->pending, pending_cache); 8913510cb94SJon Brassow dm_exception_table_exit(&s->complete, exception_cache); 89231c93a0cSMilan Broz } 89331c93a0cSMilan Broz 894c1f0c183SMike Snitzer static void __handover_exceptions(struct dm_snapshot *snap_src, 895c1f0c183SMike Snitzer struct dm_snapshot *snap_dest) 896c1f0c183SMike Snitzer { 897c1f0c183SMike Snitzer union { 898c1f0c183SMike Snitzer struct dm_exception_table table_swap; 899c1f0c183SMike Snitzer struct dm_exception_store *store_swap; 900c1f0c183SMike Snitzer } u; 901c1f0c183SMike Snitzer 902c1f0c183SMike Snitzer /* 903c1f0c183SMike Snitzer * Swap all snapshot context information between the two instances. 904c1f0c183SMike Snitzer */ 905c1f0c183SMike Snitzer u.table_swap = snap_dest->complete; 906c1f0c183SMike Snitzer snap_dest->complete = snap_src->complete; 907c1f0c183SMike Snitzer snap_src->complete = u.table_swap; 908c1f0c183SMike Snitzer 909c1f0c183SMike Snitzer u.store_swap = snap_dest->store; 910c1f0c183SMike Snitzer snap_dest->store = snap_src->store; 911c1f0c183SMike Snitzer snap_src->store = u.store_swap; 912c1f0c183SMike Snitzer 913c1f0c183SMike Snitzer snap_dest->store->snap = snap_dest; 914c1f0c183SMike Snitzer snap_src->store->snap = snap_src; 915c1f0c183SMike Snitzer 916c1f0c183SMike Snitzer snap_dest->ti->split_io = snap_dest->store->chunk_size; 917c1f0c183SMike Snitzer snap_dest->valid = snap_src->valid; 918c1f0c183SMike Snitzer 919c1f0c183SMike Snitzer /* 920c1f0c183SMike Snitzer * Set source invalid to ensure it receives no further I/O. 921c1f0c183SMike Snitzer */ 922c1f0c183SMike Snitzer snap_src->valid = 0; 923c1f0c183SMike Snitzer } 924c1f0c183SMike Snitzer 9251da177e4SLinus Torvalds static void snapshot_dtr(struct dm_target *ti) 9261da177e4SLinus Torvalds { 927cd45daffSMikulas Patocka #ifdef CONFIG_DM_DEBUG 928cd45daffSMikulas Patocka int i; 929cd45daffSMikulas Patocka #endif 930028867acSAlasdair G Kergon struct dm_snapshot *s = ti->private; 931c1f0c183SMike Snitzer struct dm_snapshot *snap_src = NULL, *snap_dest = NULL; 9321da177e4SLinus Torvalds 933ca3a931fSAlasdair G Kergon flush_workqueue(ksnapd); 934ca3a931fSAlasdair G Kergon 935c1f0c183SMike Snitzer down_read(&_origins_lock); 936c1f0c183SMike Snitzer /* Check whether exception handover must be cancelled */ 937c1f0c183SMike Snitzer (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest); 938c1f0c183SMike Snitzer if (snap_src && snap_dest && (s == snap_src)) { 939c1f0c183SMike Snitzer down_write(&snap_dest->lock); 940c1f0c183SMike Snitzer snap_dest->valid = 0; 941c1f0c183SMike Snitzer up_write(&snap_dest->lock); 942c1f0c183SMike Snitzer DMERR("Cancelling snapshot handover."); 943c1f0c183SMike Snitzer } 944c1f0c183SMike Snitzer up_read(&_origins_lock); 945c1f0c183SMike Snitzer 946138728dcSAlasdair G Kergon /* Prevent further origin writes from using this snapshot. */ 947138728dcSAlasdair G Kergon /* After this returns there can be no new kcopyd jobs. */ 9481da177e4SLinus Torvalds unregister_snapshot(s); 9491da177e4SLinus Torvalds 950879129d2SMikulas Patocka while (atomic_read(&s->pending_exceptions_count)) 95190fa1527SMikulas Patocka msleep(1); 952879129d2SMikulas Patocka /* 953879129d2SMikulas Patocka * Ensure instructions in mempool_destroy aren't reordered 954879129d2SMikulas Patocka * before atomic_read. 955879129d2SMikulas Patocka */ 956879129d2SMikulas Patocka smp_mb(); 957879129d2SMikulas Patocka 958cd45daffSMikulas Patocka #ifdef CONFIG_DM_DEBUG 959cd45daffSMikulas Patocka for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++) 960cd45daffSMikulas Patocka BUG_ON(!hlist_empty(&s->tracked_chunk_hash[i])); 961cd45daffSMikulas Patocka #endif 962cd45daffSMikulas Patocka 963cd45daffSMikulas Patocka mempool_destroy(s->tracked_chunk_pool); 964cd45daffSMikulas Patocka 96531c93a0cSMilan Broz __free_exceptions(s); 9661da177e4SLinus Torvalds 96792e86812SMikulas Patocka mempool_destroy(s->pending_pool); 96892e86812SMikulas Patocka 9691da177e4SLinus Torvalds dm_put_device(ti, s->origin); 970fee1998eSJonathan Brassow 971fee1998eSJonathan Brassow dm_exception_store_destroy(s->store); 972138728dcSAlasdair G Kergon 973fc56f6fbSMike Snitzer dm_put_device(ti, s->cow); 974fc56f6fbSMike Snitzer 9751da177e4SLinus Torvalds kfree(s); 9761da177e4SLinus Torvalds } 9771da177e4SLinus Torvalds 9781da177e4SLinus Torvalds /* 9791da177e4SLinus Torvalds * Flush a list of buffers. 9801da177e4SLinus Torvalds */ 9811da177e4SLinus Torvalds static void flush_bios(struct bio *bio) 9821da177e4SLinus Torvalds { 9831da177e4SLinus Torvalds struct bio *n; 9841da177e4SLinus Torvalds 9851da177e4SLinus Torvalds while (bio) { 9861da177e4SLinus Torvalds n = bio->bi_next; 9871da177e4SLinus Torvalds bio->bi_next = NULL; 9881da177e4SLinus Torvalds generic_make_request(bio); 9891da177e4SLinus Torvalds bio = n; 9901da177e4SLinus Torvalds } 9911da177e4SLinus Torvalds } 9921da177e4SLinus Torvalds 993c4028958SDavid Howells static void flush_queued_bios(struct work_struct *work) 994ca3a931fSAlasdair G Kergon { 995c4028958SDavid Howells struct dm_snapshot *s = 996c4028958SDavid Howells container_of(work, struct dm_snapshot, queued_bios_work); 997ca3a931fSAlasdair G Kergon struct bio *queued_bios; 998ca3a931fSAlasdair G Kergon unsigned long flags; 999ca3a931fSAlasdair G Kergon 1000ca3a931fSAlasdair G Kergon spin_lock_irqsave(&s->pe_lock, flags); 1001ca3a931fSAlasdair G Kergon queued_bios = bio_list_get(&s->queued_bios); 1002ca3a931fSAlasdair G Kergon spin_unlock_irqrestore(&s->pe_lock, flags); 1003ca3a931fSAlasdair G Kergon 1004ca3a931fSAlasdair G Kergon flush_bios(queued_bios); 1005ca3a931fSAlasdair G Kergon } 1006ca3a931fSAlasdair G Kergon 10071da177e4SLinus Torvalds /* 10081da177e4SLinus Torvalds * Error a list of buffers. 10091da177e4SLinus Torvalds */ 10101da177e4SLinus Torvalds static void error_bios(struct bio *bio) 10111da177e4SLinus Torvalds { 10121da177e4SLinus Torvalds struct bio *n; 10131da177e4SLinus Torvalds 10141da177e4SLinus Torvalds while (bio) { 10151da177e4SLinus Torvalds n = bio->bi_next; 10161da177e4SLinus Torvalds bio->bi_next = NULL; 10176712ecf8SNeilBrown bio_io_error(bio); 10181da177e4SLinus Torvalds bio = n; 10191da177e4SLinus Torvalds } 10201da177e4SLinus Torvalds } 10211da177e4SLinus Torvalds 1022695368acSAlasdair G Kergon static void __invalidate_snapshot(struct dm_snapshot *s, int err) 102376df1c65SAlasdair G Kergon { 102476df1c65SAlasdair G Kergon if (!s->valid) 102576df1c65SAlasdair G Kergon return; 102676df1c65SAlasdair G Kergon 102776df1c65SAlasdair G Kergon if (err == -EIO) 102876df1c65SAlasdair G Kergon DMERR("Invalidating snapshot: Error reading/writing."); 102976df1c65SAlasdair G Kergon else if (err == -ENOMEM) 103076df1c65SAlasdair G Kergon DMERR("Invalidating snapshot: Unable to allocate exception."); 103176df1c65SAlasdair G Kergon 1032493df71cSJonathan Brassow if (s->store->type->drop_snapshot) 1033493df71cSJonathan Brassow s->store->type->drop_snapshot(s->store); 103476df1c65SAlasdair G Kergon 103576df1c65SAlasdair G Kergon s->valid = 0; 103676df1c65SAlasdair G Kergon 1037fc56f6fbSMike Snitzer dm_table_event(s->ti->table); 103876df1c65SAlasdair G Kergon } 103976df1c65SAlasdair G Kergon 1040028867acSAlasdair G Kergon static void get_pending_exception(struct dm_snap_pending_exception *pe) 10414b832e8dSAlasdair G Kergon { 10424b832e8dSAlasdair G Kergon atomic_inc(&pe->ref_count); 10434b832e8dSAlasdair G Kergon } 10444b832e8dSAlasdair G Kergon 1045028867acSAlasdair G Kergon static struct bio *put_pending_exception(struct dm_snap_pending_exception *pe) 10464b832e8dSAlasdair G Kergon { 1047028867acSAlasdair G Kergon struct dm_snap_pending_exception *primary_pe; 10484b832e8dSAlasdair G Kergon struct bio *origin_bios = NULL; 10494b832e8dSAlasdair G Kergon 10504b832e8dSAlasdair G Kergon primary_pe = pe->primary_pe; 10514b832e8dSAlasdair G Kergon 10524b832e8dSAlasdair G Kergon /* 10534b832e8dSAlasdair G Kergon * If this pe is involved in a write to the origin and 10544b832e8dSAlasdair G Kergon * it is the last sibling to complete then release 10554b832e8dSAlasdair G Kergon * the bios for the original write to the origin. 10564b832e8dSAlasdair G Kergon */ 10574b832e8dSAlasdair G Kergon if (primary_pe && 10587c5f78b9SMikulas Patocka atomic_dec_and_test(&primary_pe->ref_count)) { 10594b832e8dSAlasdair G Kergon origin_bios = bio_list_get(&primary_pe->origin_bios); 10607c5f78b9SMikulas Patocka free_pending_exception(primary_pe); 10617c5f78b9SMikulas Patocka } 10624b832e8dSAlasdair G Kergon 10634b832e8dSAlasdair G Kergon /* 10644b832e8dSAlasdair G Kergon * Free the pe if it's not linked to an origin write or if 10654b832e8dSAlasdair G Kergon * it's not itself a primary pe. 10664b832e8dSAlasdair G Kergon */ 10674b832e8dSAlasdair G Kergon if (!primary_pe || primary_pe != pe) 10684b832e8dSAlasdair G Kergon free_pending_exception(pe); 10694b832e8dSAlasdair G Kergon 10704b832e8dSAlasdair G Kergon return origin_bios; 10714b832e8dSAlasdair G Kergon } 10724b832e8dSAlasdair G Kergon 1073028867acSAlasdair G Kergon static void pending_complete(struct dm_snap_pending_exception *pe, int success) 10741da177e4SLinus Torvalds { 10751d4989c8SJon Brassow struct dm_exception *e; 10761da177e4SLinus Torvalds struct dm_snapshot *s = pe->snap; 10779d493fa8SAlasdair G Kergon struct bio *origin_bios = NULL; 10789d493fa8SAlasdair G Kergon struct bio *snapshot_bios = NULL; 10799d493fa8SAlasdair G Kergon int error = 0; 10801da177e4SLinus Torvalds 108176df1c65SAlasdair G Kergon if (!success) { 108276df1c65SAlasdair G Kergon /* Read/write error - snapshot is unusable */ 10831da177e4SLinus Torvalds down_write(&s->lock); 1084695368acSAlasdair G Kergon __invalidate_snapshot(s, -EIO); 10859d493fa8SAlasdair G Kergon error = 1; 108676df1c65SAlasdair G Kergon goto out; 108776df1c65SAlasdair G Kergon } 108876df1c65SAlasdair G Kergon 10893510cb94SJon Brassow e = alloc_completed_exception(); 109076df1c65SAlasdair G Kergon if (!e) { 109176df1c65SAlasdair G Kergon down_write(&s->lock); 1092695368acSAlasdair G Kergon __invalidate_snapshot(s, -ENOMEM); 10939d493fa8SAlasdair G Kergon error = 1; 10941da177e4SLinus Torvalds goto out; 10951da177e4SLinus Torvalds } 10961da177e4SLinus Torvalds *e = pe->e; 10971da177e4SLinus Torvalds 10989d493fa8SAlasdair G Kergon down_write(&s->lock); 10999d493fa8SAlasdair G Kergon if (!s->valid) { 11003510cb94SJon Brassow free_completed_exception(e); 11019d493fa8SAlasdair G Kergon error = 1; 11029d493fa8SAlasdair G Kergon goto out; 11039d493fa8SAlasdair G Kergon } 11049d493fa8SAlasdair G Kergon 11051da177e4SLinus Torvalds /* 1106a8d41b59SMikulas Patocka * Check for conflicting reads. This is extremely improbable, 110790fa1527SMikulas Patocka * so msleep(1) is sufficient and there is no need for a wait queue. 1108a8d41b59SMikulas Patocka */ 1109a8d41b59SMikulas Patocka while (__chunk_is_tracked(s, pe->e.old_chunk)) 111090fa1527SMikulas Patocka msleep(1); 1111a8d41b59SMikulas Patocka 1112a8d41b59SMikulas Patocka /* 11131da177e4SLinus Torvalds * Add a proper exception, and remove the 11141da177e4SLinus Torvalds * in-flight exception from the list. 11151da177e4SLinus Torvalds */ 11163510cb94SJon Brassow dm_insert_exception(&s->complete, e); 11171da177e4SLinus Torvalds 11181da177e4SLinus Torvalds out: 11193510cb94SJon Brassow dm_remove_exception(&pe->e); 11209d493fa8SAlasdair G Kergon snapshot_bios = bio_list_get(&pe->snapshot_bios); 11214b832e8dSAlasdair G Kergon origin_bios = put_pending_exception(pe); 1122b4b610f6SAlasdair G Kergon 11239d493fa8SAlasdair G Kergon up_write(&s->lock); 11249d493fa8SAlasdair G Kergon 11259d493fa8SAlasdair G Kergon /* Submit any pending write bios */ 11269d493fa8SAlasdair G Kergon if (error) 11279d493fa8SAlasdair G Kergon error_bios(snapshot_bios); 11289d493fa8SAlasdair G Kergon else 11299d493fa8SAlasdair G Kergon flush_bios(snapshot_bios); 11309d493fa8SAlasdair G Kergon 11319d493fa8SAlasdair G Kergon flush_bios(origin_bios); 11321da177e4SLinus Torvalds } 11331da177e4SLinus Torvalds 11341da177e4SLinus Torvalds static void commit_callback(void *context, int success) 11351da177e4SLinus Torvalds { 1136028867acSAlasdair G Kergon struct dm_snap_pending_exception *pe = context; 1137028867acSAlasdair G Kergon 11381da177e4SLinus Torvalds pending_complete(pe, success); 11391da177e4SLinus Torvalds } 11401da177e4SLinus Torvalds 11411da177e4SLinus Torvalds /* 11421da177e4SLinus Torvalds * Called when the copy I/O has finished. kcopyd actually runs 11431da177e4SLinus Torvalds * this code so don't block. 11441da177e4SLinus Torvalds */ 11454cdc1d1fSAlasdair G Kergon static void copy_callback(int read_err, unsigned long write_err, void *context) 11461da177e4SLinus Torvalds { 1147028867acSAlasdair G Kergon struct dm_snap_pending_exception *pe = context; 11481da177e4SLinus Torvalds struct dm_snapshot *s = pe->snap; 11491da177e4SLinus Torvalds 11501da177e4SLinus Torvalds if (read_err || write_err) 11511da177e4SLinus Torvalds pending_complete(pe, 0); 11521da177e4SLinus Torvalds 11531da177e4SLinus Torvalds else 11541da177e4SLinus Torvalds /* Update the metadata if we are persistent */ 1155493df71cSJonathan Brassow s->store->type->commit_exception(s->store, &pe->e, 1156b2a11465SJonathan Brassow commit_callback, pe); 11571da177e4SLinus Torvalds } 11581da177e4SLinus Torvalds 11591da177e4SLinus Torvalds /* 11601da177e4SLinus Torvalds * Dispatches the copy operation to kcopyd. 11611da177e4SLinus Torvalds */ 1162028867acSAlasdair G Kergon static void start_copy(struct dm_snap_pending_exception *pe) 11631da177e4SLinus Torvalds { 11641da177e4SLinus Torvalds struct dm_snapshot *s = pe->snap; 116522a1ceb1SHeinz Mauelshagen struct dm_io_region src, dest; 11661da177e4SLinus Torvalds struct block_device *bdev = s->origin->bdev; 11671da177e4SLinus Torvalds sector_t dev_size; 11681da177e4SLinus Torvalds 11691da177e4SLinus Torvalds dev_size = get_dev_size(bdev); 11701da177e4SLinus Torvalds 11711da177e4SLinus Torvalds src.bdev = bdev; 117271fab00aSJonathan Brassow src.sector = chunk_to_sector(s->store, pe->e.old_chunk); 1173df96eee6SMikulas Patocka src.count = min((sector_t)s->store->chunk_size, dev_size - src.sector); 11741da177e4SLinus Torvalds 1175fc56f6fbSMike Snitzer dest.bdev = s->cow->bdev; 117671fab00aSJonathan Brassow dest.sector = chunk_to_sector(s->store, pe->e.new_chunk); 11771da177e4SLinus Torvalds dest.count = src.count; 11781da177e4SLinus Torvalds 11791da177e4SLinus Torvalds /* Hand over to kcopyd */ 1180eb69aca5SHeinz Mauelshagen dm_kcopyd_copy(s->kcopyd_client, 11811da177e4SLinus Torvalds &src, 1, &dest, 0, copy_callback, pe); 11821da177e4SLinus Torvalds } 11831da177e4SLinus Torvalds 11842913808eSMikulas Patocka static struct dm_snap_pending_exception * 11852913808eSMikulas Patocka __lookup_pending_exception(struct dm_snapshot *s, chunk_t chunk) 11862913808eSMikulas Patocka { 11873510cb94SJon Brassow struct dm_exception *e = dm_lookup_exception(&s->pending, chunk); 11882913808eSMikulas Patocka 11892913808eSMikulas Patocka if (!e) 11902913808eSMikulas Patocka return NULL; 11912913808eSMikulas Patocka 11922913808eSMikulas Patocka return container_of(e, struct dm_snap_pending_exception, e); 11932913808eSMikulas Patocka } 11942913808eSMikulas Patocka 11951da177e4SLinus Torvalds /* 11961da177e4SLinus Torvalds * Looks to see if this snapshot already has a pending exception 11971da177e4SLinus Torvalds * for this chunk, otherwise it allocates a new one and inserts 11981da177e4SLinus Torvalds * it into the pending table. 11991da177e4SLinus Torvalds * 12001da177e4SLinus Torvalds * NOTE: a write lock must be held on snap->lock before calling 12011da177e4SLinus Torvalds * this. 12021da177e4SLinus Torvalds */ 1203028867acSAlasdair G Kergon static struct dm_snap_pending_exception * 1204c6621392SMikulas Patocka __find_pending_exception(struct dm_snapshot *s, 1205c6621392SMikulas Patocka struct dm_snap_pending_exception *pe, chunk_t chunk) 12061da177e4SLinus Torvalds { 1207c6621392SMikulas Patocka struct dm_snap_pending_exception *pe2; 120876df1c65SAlasdair G Kergon 12092913808eSMikulas Patocka pe2 = __lookup_pending_exception(s, chunk); 12102913808eSMikulas Patocka if (pe2) { 12111da177e4SLinus Torvalds free_pending_exception(pe); 12122913808eSMikulas Patocka return pe2; 121376df1c65SAlasdair G Kergon } 121476df1c65SAlasdair G Kergon 12151da177e4SLinus Torvalds pe->e.old_chunk = chunk; 12161da177e4SLinus Torvalds bio_list_init(&pe->origin_bios); 12171da177e4SLinus Torvalds bio_list_init(&pe->snapshot_bios); 1218b4b610f6SAlasdair G Kergon pe->primary_pe = NULL; 12194b832e8dSAlasdair G Kergon atomic_set(&pe->ref_count, 0); 12201da177e4SLinus Torvalds pe->started = 0; 12211da177e4SLinus Torvalds 1222493df71cSJonathan Brassow if (s->store->type->prepare_exception(s->store, &pe->e)) { 12231da177e4SLinus Torvalds free_pending_exception(pe); 12241da177e4SLinus Torvalds return NULL; 12251da177e4SLinus Torvalds } 12261da177e4SLinus Torvalds 12274b832e8dSAlasdair G Kergon get_pending_exception(pe); 12283510cb94SJon Brassow dm_insert_exception(&s->pending, &pe->e); 12291da177e4SLinus Torvalds 12301da177e4SLinus Torvalds return pe; 12311da177e4SLinus Torvalds } 12321da177e4SLinus Torvalds 12331d4989c8SJon Brassow static void remap_exception(struct dm_snapshot *s, struct dm_exception *e, 1234d74f81f8SMilan Broz struct bio *bio, chunk_t chunk) 12351da177e4SLinus Torvalds { 1236fc56f6fbSMike Snitzer bio->bi_bdev = s->cow->bdev; 123771fab00aSJonathan Brassow bio->bi_sector = chunk_to_sector(s->store, 123871fab00aSJonathan Brassow dm_chunk_number(e->new_chunk) + 1239d74f81f8SMilan Broz (chunk - e->old_chunk)) + 124071fab00aSJonathan Brassow (bio->bi_sector & 124171fab00aSJonathan Brassow s->store->chunk_mask); 12421da177e4SLinus Torvalds } 12431da177e4SLinus Torvalds 12441da177e4SLinus Torvalds static int snapshot_map(struct dm_target *ti, struct bio *bio, 12451da177e4SLinus Torvalds union map_info *map_context) 12461da177e4SLinus Torvalds { 12471d4989c8SJon Brassow struct dm_exception *e; 1248028867acSAlasdair G Kergon struct dm_snapshot *s = ti->private; 1249d2a7ad29SKiyoshi Ueda int r = DM_MAPIO_REMAPPED; 12501da177e4SLinus Torvalds chunk_t chunk; 1251028867acSAlasdair G Kergon struct dm_snap_pending_exception *pe = NULL; 12521da177e4SLinus Torvalds 1253494b3ee7SMikulas Patocka if (unlikely(bio_empty_barrier(bio))) { 1254fc56f6fbSMike Snitzer bio->bi_bdev = s->cow->bdev; 1255494b3ee7SMikulas Patocka return DM_MAPIO_REMAPPED; 1256494b3ee7SMikulas Patocka } 1257494b3ee7SMikulas Patocka 125871fab00aSJonathan Brassow chunk = sector_to_chunk(s->store, bio->bi_sector); 12591da177e4SLinus Torvalds 12601da177e4SLinus Torvalds /* Full snapshots are not usable */ 126176df1c65SAlasdair G Kergon /* To get here the table must be live so s->active is always set. */ 12621da177e4SLinus Torvalds if (!s->valid) 1263f6a80ea8SAlasdair G Kergon return -EIO; 12641da177e4SLinus Torvalds 12651da177e4SLinus Torvalds /* FIXME: should only take write lock if we need 12661da177e4SLinus Torvalds * to copy an exception */ 12671da177e4SLinus Torvalds down_write(&s->lock); 12681da177e4SLinus Torvalds 126976df1c65SAlasdair G Kergon if (!s->valid) { 127076df1c65SAlasdair G Kergon r = -EIO; 127176df1c65SAlasdair G Kergon goto out_unlock; 127276df1c65SAlasdair G Kergon } 127376df1c65SAlasdair G Kergon 12741da177e4SLinus Torvalds /* If the block is already remapped - use that, else remap it */ 12753510cb94SJon Brassow e = dm_lookup_exception(&s->complete, chunk); 12761da177e4SLinus Torvalds if (e) { 1277d74f81f8SMilan Broz remap_exception(s, e, bio, chunk); 127876df1c65SAlasdair G Kergon goto out_unlock; 127976df1c65SAlasdair G Kergon } 12801da177e4SLinus Torvalds 1281ba40a2aaSAlasdair G Kergon /* 1282ba40a2aaSAlasdair G Kergon * Write to snapshot - higher level takes care of RW/RO 1283ba40a2aaSAlasdair G Kergon * flags so we should only get this if we are 1284ba40a2aaSAlasdair G Kergon * writeable. 1285ba40a2aaSAlasdair G Kergon */ 1286ba40a2aaSAlasdair G Kergon if (bio_rw(bio) == WRITE) { 12872913808eSMikulas Patocka pe = __lookup_pending_exception(s, chunk); 12882913808eSMikulas Patocka if (!pe) { 1289c6621392SMikulas Patocka up_write(&s->lock); 1290c6621392SMikulas Patocka pe = alloc_pending_exception(s); 1291c6621392SMikulas Patocka down_write(&s->lock); 1292c6621392SMikulas Patocka 1293c6621392SMikulas Patocka if (!s->valid) { 1294c6621392SMikulas Patocka free_pending_exception(pe); 1295c6621392SMikulas Patocka r = -EIO; 1296c6621392SMikulas Patocka goto out_unlock; 1297c6621392SMikulas Patocka } 1298c6621392SMikulas Patocka 12993510cb94SJon Brassow e = dm_lookup_exception(&s->complete, chunk); 130035bf659bSMikulas Patocka if (e) { 130135bf659bSMikulas Patocka free_pending_exception(pe); 130235bf659bSMikulas Patocka remap_exception(s, e, bio, chunk); 130335bf659bSMikulas Patocka goto out_unlock; 130435bf659bSMikulas Patocka } 130535bf659bSMikulas Patocka 1306c6621392SMikulas Patocka pe = __find_pending_exception(s, pe, chunk); 13071da177e4SLinus Torvalds if (!pe) { 1308695368acSAlasdair G Kergon __invalidate_snapshot(s, -ENOMEM); 13091da177e4SLinus Torvalds r = -EIO; 131076df1c65SAlasdair G Kergon goto out_unlock; 131176df1c65SAlasdair G Kergon } 13122913808eSMikulas Patocka } 131376df1c65SAlasdair G Kergon 1314d74f81f8SMilan Broz remap_exception(s, &pe->e, bio, chunk); 13151da177e4SLinus Torvalds bio_list_add(&pe->snapshot_bios, bio); 13161da177e4SLinus Torvalds 1317d2a7ad29SKiyoshi Ueda r = DM_MAPIO_SUBMITTED; 1318ba40a2aaSAlasdair G Kergon 13191da177e4SLinus Torvalds if (!pe->started) { 13201da177e4SLinus Torvalds /* this is protected by snap->lock */ 13211da177e4SLinus Torvalds pe->started = 1; 132276df1c65SAlasdair G Kergon up_write(&s->lock); 132376df1c65SAlasdair G Kergon start_copy(pe); 1324ba40a2aaSAlasdair G Kergon goto out; 1325ba40a2aaSAlasdair G Kergon } 1326cd45daffSMikulas Patocka } else { 13271da177e4SLinus Torvalds bio->bi_bdev = s->origin->bdev; 1328cd45daffSMikulas Patocka map_context->ptr = track_chunk(s, chunk); 1329cd45daffSMikulas Patocka } 13301da177e4SLinus Torvalds 1331ba40a2aaSAlasdair G Kergon out_unlock: 1332ba40a2aaSAlasdair G Kergon up_write(&s->lock); 1333ba40a2aaSAlasdair G Kergon out: 13341da177e4SLinus Torvalds return r; 13351da177e4SLinus Torvalds } 13361da177e4SLinus Torvalds 1337cd45daffSMikulas Patocka static int snapshot_end_io(struct dm_target *ti, struct bio *bio, 1338cd45daffSMikulas Patocka int error, union map_info *map_context) 1339cd45daffSMikulas Patocka { 1340cd45daffSMikulas Patocka struct dm_snapshot *s = ti->private; 1341cd45daffSMikulas Patocka struct dm_snap_tracked_chunk *c = map_context->ptr; 1342cd45daffSMikulas Patocka 1343cd45daffSMikulas Patocka if (c) 1344cd45daffSMikulas Patocka stop_tracking_chunk(s, c); 1345cd45daffSMikulas Patocka 1346cd45daffSMikulas Patocka return 0; 1347cd45daffSMikulas Patocka } 1348cd45daffSMikulas Patocka 1349c26655caSMike Snitzer static void snapshot_postsuspend(struct dm_target *ti) 1350c26655caSMike Snitzer { 1351c26655caSMike Snitzer struct dm_snapshot *s = ti->private; 1352c26655caSMike Snitzer 1353c26655caSMike Snitzer down_write(&s->lock); 1354c26655caSMike Snitzer s->suspended = 1; 1355c26655caSMike Snitzer up_write(&s->lock); 1356c26655caSMike Snitzer } 1357c26655caSMike Snitzer 1358c1f0c183SMike Snitzer static int snapshot_preresume(struct dm_target *ti) 1359c1f0c183SMike Snitzer { 1360c1f0c183SMike Snitzer int r = 0; 1361c1f0c183SMike Snitzer struct dm_snapshot *s = ti->private; 1362c1f0c183SMike Snitzer struct dm_snapshot *snap_src = NULL, *snap_dest = NULL; 1363c1f0c183SMike Snitzer 1364c1f0c183SMike Snitzer down_read(&_origins_lock); 1365c1f0c183SMike Snitzer (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest); 1366c1f0c183SMike Snitzer if (snap_src && snap_dest) { 1367c1f0c183SMike Snitzer down_read(&snap_src->lock); 1368c1f0c183SMike Snitzer if (s == snap_src) { 1369c1f0c183SMike Snitzer DMERR("Unable to resume snapshot source until " 1370c1f0c183SMike Snitzer "handover completes."); 1371c1f0c183SMike Snitzer r = -EINVAL; 1372c1f0c183SMike Snitzer } else if (!snap_src->suspended) { 1373c1f0c183SMike Snitzer DMERR("Unable to perform snapshot handover until " 1374c1f0c183SMike Snitzer "source is suspended."); 1375c1f0c183SMike Snitzer r = -EINVAL; 1376c1f0c183SMike Snitzer } 1377c1f0c183SMike Snitzer up_read(&snap_src->lock); 1378c1f0c183SMike Snitzer } 1379c1f0c183SMike Snitzer up_read(&_origins_lock); 1380c1f0c183SMike Snitzer 1381c1f0c183SMike Snitzer return r; 1382c1f0c183SMike Snitzer } 1383c1f0c183SMike Snitzer 13841da177e4SLinus Torvalds static void snapshot_resume(struct dm_target *ti) 13851da177e4SLinus Torvalds { 1386028867acSAlasdair G Kergon struct dm_snapshot *s = ti->private; 1387c1f0c183SMike Snitzer struct dm_snapshot *snap_src = NULL, *snap_dest = NULL; 1388c1f0c183SMike Snitzer 1389c1f0c183SMike Snitzer down_read(&_origins_lock); 1390c1f0c183SMike Snitzer (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest); 1391c1f0c183SMike Snitzer if (snap_src && snap_dest) { 1392c1f0c183SMike Snitzer down_write(&snap_src->lock); 1393c1f0c183SMike Snitzer down_write_nested(&snap_dest->lock, SINGLE_DEPTH_NESTING); 1394c1f0c183SMike Snitzer __handover_exceptions(snap_src, snap_dest); 1395c1f0c183SMike Snitzer up_write(&snap_dest->lock); 1396c1f0c183SMike Snitzer up_write(&snap_src->lock); 1397c1f0c183SMike Snitzer } 1398c1f0c183SMike Snitzer up_read(&_origins_lock); 1399c1f0c183SMike Snitzer 1400c1f0c183SMike Snitzer /* Now we have correct chunk size, reregister */ 1401c1f0c183SMike Snitzer reregister_snapshot(s); 14021da177e4SLinus Torvalds 1403aa14edebSAlasdair G Kergon down_write(&s->lock); 1404aa14edebSAlasdair G Kergon s->active = 1; 1405c26655caSMike Snitzer s->suspended = 0; 1406aa14edebSAlasdair G Kergon up_write(&s->lock); 14071da177e4SLinus Torvalds } 14081da177e4SLinus Torvalds 14091da177e4SLinus Torvalds static int snapshot_status(struct dm_target *ti, status_type_t type, 14101da177e4SLinus Torvalds char *result, unsigned int maxlen) 14111da177e4SLinus Torvalds { 14122e4a31dfSJonathan Brassow unsigned sz = 0; 1413028867acSAlasdair G Kergon struct dm_snapshot *snap = ti->private; 14141da177e4SLinus Torvalds 14151da177e4SLinus Torvalds switch (type) { 14161da177e4SLinus Torvalds case STATUSTYPE_INFO: 141794e76572SMikulas Patocka 141894e76572SMikulas Patocka down_write(&snap->lock); 141994e76572SMikulas Patocka 14201da177e4SLinus Torvalds if (!snap->valid) 14212e4a31dfSJonathan Brassow DMEMIT("Invalid"); 14221da177e4SLinus Torvalds else { 1423985903bbSMike Snitzer if (snap->store->type->usage) { 1424985903bbSMike Snitzer sector_t total_sectors, sectors_allocated, 1425985903bbSMike Snitzer metadata_sectors; 1426985903bbSMike Snitzer snap->store->type->usage(snap->store, 1427985903bbSMike Snitzer &total_sectors, 1428985903bbSMike Snitzer §ors_allocated, 1429985903bbSMike Snitzer &metadata_sectors); 1430985903bbSMike Snitzer DMEMIT("%llu/%llu %llu", 1431985903bbSMike Snitzer (unsigned long long)sectors_allocated, 1432985903bbSMike Snitzer (unsigned long long)total_sectors, 1433985903bbSMike Snitzer (unsigned long long)metadata_sectors); 14341da177e4SLinus Torvalds } 14351da177e4SLinus Torvalds else 14362e4a31dfSJonathan Brassow DMEMIT("Unknown"); 14371da177e4SLinus Torvalds } 143894e76572SMikulas Patocka 143994e76572SMikulas Patocka up_write(&snap->lock); 144094e76572SMikulas Patocka 14411da177e4SLinus Torvalds break; 14421da177e4SLinus Torvalds 14431da177e4SLinus Torvalds case STATUSTYPE_TABLE: 14441da177e4SLinus Torvalds /* 14451da177e4SLinus Torvalds * kdevname returns a static pointer so we need 14461da177e4SLinus Torvalds * to make private copies if the output is to 14471da177e4SLinus Torvalds * make sense. 14481da177e4SLinus Torvalds */ 1449fc56f6fbSMike Snitzer DMEMIT("%s %s", snap->origin->name, snap->cow->name); 14501e302a92SJonathan Brassow snap->store->type->status(snap->store, type, result + sz, 14511e302a92SJonathan Brassow maxlen - sz); 14521da177e4SLinus Torvalds break; 14531da177e4SLinus Torvalds } 14541da177e4SLinus Torvalds 14551da177e4SLinus Torvalds return 0; 14561da177e4SLinus Torvalds } 14571da177e4SLinus Torvalds 14588811f46cSMike Snitzer static int snapshot_iterate_devices(struct dm_target *ti, 14598811f46cSMike Snitzer iterate_devices_callout_fn fn, void *data) 14608811f46cSMike Snitzer { 14618811f46cSMike Snitzer struct dm_snapshot *snap = ti->private; 14628811f46cSMike Snitzer 14638811f46cSMike Snitzer return fn(ti, snap->origin, 0, ti->len, data); 14648811f46cSMike Snitzer } 14658811f46cSMike Snitzer 14668811f46cSMike Snitzer 14671da177e4SLinus Torvalds /*----------------------------------------------------------------- 14681da177e4SLinus Torvalds * Origin methods 14691da177e4SLinus Torvalds *---------------------------------------------------------------*/ 14701da177e4SLinus Torvalds static int __origin_write(struct list_head *snapshots, struct bio *bio) 14711da177e4SLinus Torvalds { 1472d2a7ad29SKiyoshi Ueda int r = DM_MAPIO_REMAPPED, first = 0; 14731da177e4SLinus Torvalds struct dm_snapshot *snap; 14741d4989c8SJon Brassow struct dm_exception *e; 1475028867acSAlasdair G Kergon struct dm_snap_pending_exception *pe, *next_pe, *primary_pe = NULL; 14761da177e4SLinus Torvalds chunk_t chunk; 1477eccf0817SAlasdair G Kergon LIST_HEAD(pe_queue); 14781da177e4SLinus Torvalds 14791da177e4SLinus Torvalds /* Do all the snapshots on this origin */ 14801da177e4SLinus Torvalds list_for_each_entry (snap, snapshots, list) { 14811da177e4SLinus Torvalds 148276df1c65SAlasdair G Kergon down_write(&snap->lock); 148376df1c65SAlasdair G Kergon 1484aa14edebSAlasdair G Kergon /* Only deal with valid and active snapshots */ 1485aa14edebSAlasdair G Kergon if (!snap->valid || !snap->active) 148676df1c65SAlasdair G Kergon goto next_snapshot; 14871da177e4SLinus Torvalds 1488d5e404c1SAlasdair G Kergon /* Nothing to do if writing beyond end of snapshot */ 1489fc56f6fbSMike Snitzer if (bio->bi_sector >= dm_table_get_size(snap->ti->table)) 149076df1c65SAlasdair G Kergon goto next_snapshot; 14911da177e4SLinus Torvalds 14921da177e4SLinus Torvalds /* 14931da177e4SLinus Torvalds * Remember, different snapshots can have 14941da177e4SLinus Torvalds * different chunk sizes. 14951da177e4SLinus Torvalds */ 149671fab00aSJonathan Brassow chunk = sector_to_chunk(snap->store, bio->bi_sector); 14971da177e4SLinus Torvalds 14981da177e4SLinus Torvalds /* 14991da177e4SLinus Torvalds * Check exception table to see if block 15001da177e4SLinus Torvalds * is already remapped in this snapshot 15011da177e4SLinus Torvalds * and trigger an exception if not. 1502b4b610f6SAlasdair G Kergon * 15034b832e8dSAlasdair G Kergon * ref_count is initialised to 1 so pending_complete() 1504b4b610f6SAlasdair G Kergon * won't destroy the primary_pe while we're inside this loop. 15051da177e4SLinus Torvalds */ 15063510cb94SJon Brassow e = dm_lookup_exception(&snap->complete, chunk); 150776df1c65SAlasdair G Kergon if (e) 150876df1c65SAlasdair G Kergon goto next_snapshot; 150976df1c65SAlasdair G Kergon 15102913808eSMikulas Patocka pe = __lookup_pending_exception(snap, chunk); 15112913808eSMikulas Patocka if (!pe) { 1512c6621392SMikulas Patocka up_write(&snap->lock); 1513c6621392SMikulas Patocka pe = alloc_pending_exception(snap); 1514c6621392SMikulas Patocka down_write(&snap->lock); 1515c6621392SMikulas Patocka 1516c6621392SMikulas Patocka if (!snap->valid) { 1517c6621392SMikulas Patocka free_pending_exception(pe); 1518c6621392SMikulas Patocka goto next_snapshot; 1519c6621392SMikulas Patocka } 1520c6621392SMikulas Patocka 15213510cb94SJon Brassow e = dm_lookup_exception(&snap->complete, chunk); 152235bf659bSMikulas Patocka if (e) { 152335bf659bSMikulas Patocka free_pending_exception(pe); 152435bf659bSMikulas Patocka goto next_snapshot; 152535bf659bSMikulas Patocka } 152635bf659bSMikulas Patocka 1527c6621392SMikulas Patocka pe = __find_pending_exception(snap, pe, chunk); 15281da177e4SLinus Torvalds if (!pe) { 1529695368acSAlasdair G Kergon __invalidate_snapshot(snap, -ENOMEM); 153076df1c65SAlasdair G Kergon goto next_snapshot; 153176df1c65SAlasdair G Kergon } 15322913808eSMikulas Patocka } 15331da177e4SLinus Torvalds 1534b4b610f6SAlasdair G Kergon if (!primary_pe) { 1535b4b610f6SAlasdair G Kergon /* 1536b4b610f6SAlasdair G Kergon * Either every pe here has same 1537b4b610f6SAlasdair G Kergon * primary_pe or none has one yet. 1538b4b610f6SAlasdair G Kergon */ 1539b4b610f6SAlasdair G Kergon if (pe->primary_pe) 1540b4b610f6SAlasdair G Kergon primary_pe = pe->primary_pe; 1541b4b610f6SAlasdair G Kergon else { 1542b4b610f6SAlasdair G Kergon primary_pe = pe; 1543b4b610f6SAlasdair G Kergon first = 1; 1544eccf0817SAlasdair G Kergon } 1545b4b610f6SAlasdair G Kergon 154676df1c65SAlasdair G Kergon bio_list_add(&primary_pe->origin_bios, bio); 154776df1c65SAlasdair G Kergon 1548d2a7ad29SKiyoshi Ueda r = DM_MAPIO_SUBMITTED; 1549b4b610f6SAlasdair G Kergon } 155076df1c65SAlasdair G Kergon 1551b4b610f6SAlasdair G Kergon if (!pe->primary_pe) { 1552b4b610f6SAlasdair G Kergon pe->primary_pe = primary_pe; 15534b832e8dSAlasdair G Kergon get_pending_exception(primary_pe); 1554b4b610f6SAlasdair G Kergon } 155576df1c65SAlasdair G Kergon 1556eccf0817SAlasdair G Kergon if (!pe->started) { 1557eccf0817SAlasdair G Kergon pe->started = 1; 1558eccf0817SAlasdair G Kergon list_add_tail(&pe->list, &pe_queue); 1559eccf0817SAlasdair G Kergon } 15601da177e4SLinus Torvalds 156176df1c65SAlasdair G Kergon next_snapshot: 15621da177e4SLinus Torvalds up_write(&snap->lock); 15631da177e4SLinus Torvalds } 15641da177e4SLinus Torvalds 1565b4b610f6SAlasdair G Kergon if (!primary_pe) 15664b832e8dSAlasdair G Kergon return r; 1567b4b610f6SAlasdair G Kergon 1568b4b610f6SAlasdair G Kergon /* 1569b4b610f6SAlasdair G Kergon * If this is the first time we're processing this chunk and 15704b832e8dSAlasdair G Kergon * ref_count is now 1 it means all the pending exceptions 1571b4b610f6SAlasdair G Kergon * got completed while we were in the loop above, so it falls to 1572b4b610f6SAlasdair G Kergon * us here to remove the primary_pe and submit any origin_bios. 1573b4b610f6SAlasdair G Kergon */ 1574b4b610f6SAlasdair G Kergon 15754b832e8dSAlasdair G Kergon if (first && atomic_dec_and_test(&primary_pe->ref_count)) { 1576b4b610f6SAlasdair G Kergon flush_bios(bio_list_get(&primary_pe->origin_bios)); 1577b4b610f6SAlasdair G Kergon free_pending_exception(primary_pe); 1578b4b610f6SAlasdair G Kergon /* If we got here, pe_queue is necessarily empty. */ 15794b832e8dSAlasdair G Kergon return r; 1580b4b610f6SAlasdair G Kergon } 1581b4b610f6SAlasdair G Kergon 15821da177e4SLinus Torvalds /* 15831da177e4SLinus Torvalds * Now that we have a complete pe list we can start the copying. 15841da177e4SLinus Torvalds */ 1585eccf0817SAlasdair G Kergon list_for_each_entry_safe(pe, next_pe, &pe_queue, list) 15861da177e4SLinus Torvalds start_copy(pe); 15871da177e4SLinus Torvalds 15881da177e4SLinus Torvalds return r; 15891da177e4SLinus Torvalds } 15901da177e4SLinus Torvalds 15911da177e4SLinus Torvalds /* 15921da177e4SLinus Torvalds * Called on a write from the origin driver. 15931da177e4SLinus Torvalds */ 15941da177e4SLinus Torvalds static int do_origin(struct dm_dev *origin, struct bio *bio) 15951da177e4SLinus Torvalds { 15961da177e4SLinus Torvalds struct origin *o; 1597d2a7ad29SKiyoshi Ueda int r = DM_MAPIO_REMAPPED; 15981da177e4SLinus Torvalds 15991da177e4SLinus Torvalds down_read(&_origins_lock); 16001da177e4SLinus Torvalds o = __lookup_origin(origin->bdev); 16011da177e4SLinus Torvalds if (o) 16021da177e4SLinus Torvalds r = __origin_write(&o->snapshots, bio); 16031da177e4SLinus Torvalds up_read(&_origins_lock); 16041da177e4SLinus Torvalds 16051da177e4SLinus Torvalds return r; 16061da177e4SLinus Torvalds } 16071da177e4SLinus Torvalds 16081da177e4SLinus Torvalds /* 16091da177e4SLinus Torvalds * Origin: maps a linear range of a device, with hooks for snapshotting. 16101da177e4SLinus Torvalds */ 16111da177e4SLinus Torvalds 16121da177e4SLinus Torvalds /* 16131da177e4SLinus Torvalds * Construct an origin mapping: <dev_path> 16141da177e4SLinus Torvalds * The context for an origin is merely a 'struct dm_dev *' 16151da177e4SLinus Torvalds * pointing to the real device. 16161da177e4SLinus Torvalds */ 16171da177e4SLinus Torvalds static int origin_ctr(struct dm_target *ti, unsigned int argc, char **argv) 16181da177e4SLinus Torvalds { 16191da177e4SLinus Torvalds int r; 16201da177e4SLinus Torvalds struct dm_dev *dev; 16211da177e4SLinus Torvalds 16221da177e4SLinus Torvalds if (argc != 1) { 162372d94861SAlasdair G Kergon ti->error = "origin: incorrect number of arguments"; 16241da177e4SLinus Torvalds return -EINVAL; 16251da177e4SLinus Torvalds } 16261da177e4SLinus Torvalds 16271da177e4SLinus Torvalds r = dm_get_device(ti, argv[0], 0, ti->len, 16281da177e4SLinus Torvalds dm_table_get_mode(ti->table), &dev); 16291da177e4SLinus Torvalds if (r) { 16301da177e4SLinus Torvalds ti->error = "Cannot get target device"; 16311da177e4SLinus Torvalds return r; 16321da177e4SLinus Torvalds } 16331da177e4SLinus Torvalds 16341da177e4SLinus Torvalds ti->private = dev; 1635494b3ee7SMikulas Patocka ti->num_flush_requests = 1; 1636494b3ee7SMikulas Patocka 16371da177e4SLinus Torvalds return 0; 16381da177e4SLinus Torvalds } 16391da177e4SLinus Torvalds 16401da177e4SLinus Torvalds static void origin_dtr(struct dm_target *ti) 16411da177e4SLinus Torvalds { 1642028867acSAlasdair G Kergon struct dm_dev *dev = ti->private; 16431da177e4SLinus Torvalds dm_put_device(ti, dev); 16441da177e4SLinus Torvalds } 16451da177e4SLinus Torvalds 16461da177e4SLinus Torvalds static int origin_map(struct dm_target *ti, struct bio *bio, 16471da177e4SLinus Torvalds union map_info *map_context) 16481da177e4SLinus Torvalds { 1649028867acSAlasdair G Kergon struct dm_dev *dev = ti->private; 16501da177e4SLinus Torvalds bio->bi_bdev = dev->bdev; 16511da177e4SLinus Torvalds 1652494b3ee7SMikulas Patocka if (unlikely(bio_empty_barrier(bio))) 1653494b3ee7SMikulas Patocka return DM_MAPIO_REMAPPED; 1654494b3ee7SMikulas Patocka 16551da177e4SLinus Torvalds /* Only tell snapshots if this is a write */ 1656d2a7ad29SKiyoshi Ueda return (bio_rw(bio) == WRITE) ? do_origin(dev, bio) : DM_MAPIO_REMAPPED; 16571da177e4SLinus Torvalds } 16581da177e4SLinus Torvalds 16591da177e4SLinus Torvalds /* 16601da177e4SLinus Torvalds * Set the target "split_io" field to the minimum of all the snapshots' 16611da177e4SLinus Torvalds * chunk sizes. 16621da177e4SLinus Torvalds */ 16631da177e4SLinus Torvalds static void origin_resume(struct dm_target *ti) 16641da177e4SLinus Torvalds { 1665028867acSAlasdair G Kergon struct dm_dev *dev = ti->private; 16661da177e4SLinus Torvalds 16671da177e4SLinus Torvalds down_read(&_origins_lock); 16681da177e4SLinus Torvalds 16697e201b35SMikulas Patocka ti->split_io = __minimum_chunk_size(__lookup_origin(dev->bdev)); 16707e201b35SMikulas Patocka 16717e201b35SMikulas Patocka up_read(&_origins_lock); 16721da177e4SLinus Torvalds } 16731da177e4SLinus Torvalds 16741da177e4SLinus Torvalds static int origin_status(struct dm_target *ti, status_type_t type, char *result, 16751da177e4SLinus Torvalds unsigned int maxlen) 16761da177e4SLinus Torvalds { 1677028867acSAlasdair G Kergon struct dm_dev *dev = ti->private; 16781da177e4SLinus Torvalds 16791da177e4SLinus Torvalds switch (type) { 16801da177e4SLinus Torvalds case STATUSTYPE_INFO: 16811da177e4SLinus Torvalds result[0] = '\0'; 16821da177e4SLinus Torvalds break; 16831da177e4SLinus Torvalds 16841da177e4SLinus Torvalds case STATUSTYPE_TABLE: 16851da177e4SLinus Torvalds snprintf(result, maxlen, "%s", dev->name); 16861da177e4SLinus Torvalds break; 16871da177e4SLinus Torvalds } 16881da177e4SLinus Torvalds 16891da177e4SLinus Torvalds return 0; 16901da177e4SLinus Torvalds } 16911da177e4SLinus Torvalds 16928811f46cSMike Snitzer static int origin_iterate_devices(struct dm_target *ti, 16938811f46cSMike Snitzer iterate_devices_callout_fn fn, void *data) 16948811f46cSMike Snitzer { 16958811f46cSMike Snitzer struct dm_dev *dev = ti->private; 16968811f46cSMike Snitzer 16978811f46cSMike Snitzer return fn(ti, dev, 0, ti->len, data); 16988811f46cSMike Snitzer } 16998811f46cSMike Snitzer 17001da177e4SLinus Torvalds static struct target_type origin_target = { 17011da177e4SLinus Torvalds .name = "snapshot-origin", 17028811f46cSMike Snitzer .version = {1, 7, 0}, 17031da177e4SLinus Torvalds .module = THIS_MODULE, 17041da177e4SLinus Torvalds .ctr = origin_ctr, 17051da177e4SLinus Torvalds .dtr = origin_dtr, 17061da177e4SLinus Torvalds .map = origin_map, 17071da177e4SLinus Torvalds .resume = origin_resume, 17081da177e4SLinus Torvalds .status = origin_status, 17098811f46cSMike Snitzer .iterate_devices = origin_iterate_devices, 17101da177e4SLinus Torvalds }; 17111da177e4SLinus Torvalds 17121da177e4SLinus Torvalds static struct target_type snapshot_target = { 17131da177e4SLinus Torvalds .name = "snapshot", 1714c26655caSMike Snitzer .version = {1, 9, 0}, 17151da177e4SLinus Torvalds .module = THIS_MODULE, 17161da177e4SLinus Torvalds .ctr = snapshot_ctr, 17171da177e4SLinus Torvalds .dtr = snapshot_dtr, 17181da177e4SLinus Torvalds .map = snapshot_map, 1719cd45daffSMikulas Patocka .end_io = snapshot_end_io, 1720c26655caSMike Snitzer .postsuspend = snapshot_postsuspend, 1721c1f0c183SMike Snitzer .preresume = snapshot_preresume, 17221da177e4SLinus Torvalds .resume = snapshot_resume, 17231da177e4SLinus Torvalds .status = snapshot_status, 17248811f46cSMike Snitzer .iterate_devices = snapshot_iterate_devices, 17251da177e4SLinus Torvalds }; 17261da177e4SLinus Torvalds 17271da177e4SLinus Torvalds static int __init dm_snapshot_init(void) 17281da177e4SLinus Torvalds { 17291da177e4SLinus Torvalds int r; 17301da177e4SLinus Torvalds 17314db6bfe0SAlasdair G Kergon r = dm_exception_store_init(); 17324db6bfe0SAlasdair G Kergon if (r) { 17334db6bfe0SAlasdair G Kergon DMERR("Failed to initialize exception stores"); 17344db6bfe0SAlasdair G Kergon return r; 17354db6bfe0SAlasdair G Kergon } 17364db6bfe0SAlasdair G Kergon 17371da177e4SLinus Torvalds r = dm_register_target(&snapshot_target); 17381da177e4SLinus Torvalds if (r) { 17391da177e4SLinus Torvalds DMERR("snapshot target register failed %d", r); 1740034a186dSJonathan Brassow goto bad_register_snapshot_target; 17411da177e4SLinus Torvalds } 17421da177e4SLinus Torvalds 17431da177e4SLinus Torvalds r = dm_register_target(&origin_target); 17441da177e4SLinus Torvalds if (r < 0) { 174572d94861SAlasdair G Kergon DMERR("Origin target register failed %d", r); 17461da177e4SLinus Torvalds goto bad1; 17471da177e4SLinus Torvalds } 17481da177e4SLinus Torvalds 17491da177e4SLinus Torvalds r = init_origin_hash(); 17501da177e4SLinus Torvalds if (r) { 17511da177e4SLinus Torvalds DMERR("init_origin_hash failed."); 17521da177e4SLinus Torvalds goto bad2; 17531da177e4SLinus Torvalds } 17541da177e4SLinus Torvalds 17551d4989c8SJon Brassow exception_cache = KMEM_CACHE(dm_exception, 0); 17561da177e4SLinus Torvalds if (!exception_cache) { 17571da177e4SLinus Torvalds DMERR("Couldn't create exception cache."); 17581da177e4SLinus Torvalds r = -ENOMEM; 17591da177e4SLinus Torvalds goto bad3; 17601da177e4SLinus Torvalds } 17611da177e4SLinus Torvalds 1762028867acSAlasdair G Kergon pending_cache = KMEM_CACHE(dm_snap_pending_exception, 0); 17631da177e4SLinus Torvalds if (!pending_cache) { 17641da177e4SLinus Torvalds DMERR("Couldn't create pending cache."); 17651da177e4SLinus Torvalds r = -ENOMEM; 17661da177e4SLinus Torvalds goto bad4; 17671da177e4SLinus Torvalds } 17681da177e4SLinus Torvalds 1769cd45daffSMikulas Patocka tracked_chunk_cache = KMEM_CACHE(dm_snap_tracked_chunk, 0); 1770cd45daffSMikulas Patocka if (!tracked_chunk_cache) { 1771cd45daffSMikulas Patocka DMERR("Couldn't create cache to track chunks in use."); 1772cd45daffSMikulas Patocka r = -ENOMEM; 1773cd45daffSMikulas Patocka goto bad5; 1774cd45daffSMikulas Patocka } 1775cd45daffSMikulas Patocka 1776ca3a931fSAlasdair G Kergon ksnapd = create_singlethread_workqueue("ksnapd"); 1777ca3a931fSAlasdair G Kergon if (!ksnapd) { 1778ca3a931fSAlasdair G Kergon DMERR("Failed to create ksnapd workqueue."); 1779ca3a931fSAlasdair G Kergon r = -ENOMEM; 178092e86812SMikulas Patocka goto bad_pending_pool; 1781ca3a931fSAlasdair G Kergon } 1782ca3a931fSAlasdair G Kergon 17831da177e4SLinus Torvalds return 0; 17841da177e4SLinus Torvalds 1785cd45daffSMikulas Patocka bad_pending_pool: 1786cd45daffSMikulas Patocka kmem_cache_destroy(tracked_chunk_cache); 17871da177e4SLinus Torvalds bad5: 17881da177e4SLinus Torvalds kmem_cache_destroy(pending_cache); 17891da177e4SLinus Torvalds bad4: 17901da177e4SLinus Torvalds kmem_cache_destroy(exception_cache); 17911da177e4SLinus Torvalds bad3: 17921da177e4SLinus Torvalds exit_origin_hash(); 17931da177e4SLinus Torvalds bad2: 17941da177e4SLinus Torvalds dm_unregister_target(&origin_target); 17951da177e4SLinus Torvalds bad1: 17961da177e4SLinus Torvalds dm_unregister_target(&snapshot_target); 1797034a186dSJonathan Brassow 1798034a186dSJonathan Brassow bad_register_snapshot_target: 1799034a186dSJonathan Brassow dm_exception_store_exit(); 18001da177e4SLinus Torvalds return r; 18011da177e4SLinus Torvalds } 18021da177e4SLinus Torvalds 18031da177e4SLinus Torvalds static void __exit dm_snapshot_exit(void) 18041da177e4SLinus Torvalds { 1805ca3a931fSAlasdair G Kergon destroy_workqueue(ksnapd); 1806ca3a931fSAlasdair G Kergon 180710d3bd09SMikulas Patocka dm_unregister_target(&snapshot_target); 180810d3bd09SMikulas Patocka dm_unregister_target(&origin_target); 18091da177e4SLinus Torvalds 18101da177e4SLinus Torvalds exit_origin_hash(); 18111da177e4SLinus Torvalds kmem_cache_destroy(pending_cache); 18121da177e4SLinus Torvalds kmem_cache_destroy(exception_cache); 1813cd45daffSMikulas Patocka kmem_cache_destroy(tracked_chunk_cache); 18144db6bfe0SAlasdair G Kergon 18154db6bfe0SAlasdair G Kergon dm_exception_store_exit(); 18161da177e4SLinus Torvalds } 18171da177e4SLinus Torvalds 18181da177e4SLinus Torvalds /* Module hooks */ 18191da177e4SLinus Torvalds module_init(dm_snapshot_init); 18201da177e4SLinus Torvalds module_exit(dm_snapshot_exit); 18211da177e4SLinus Torvalds 18221da177e4SLinus Torvalds MODULE_DESCRIPTION(DM_NAME " snapshot target"); 18231da177e4SLinus Torvalds MODULE_AUTHOR("Joe Thornber"); 18241da177e4SLinus Torvalds MODULE_LICENSE("GPL"); 1825