11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * dm-snapshot.c 31da177e4SLinus Torvalds * 41da177e4SLinus Torvalds * Copyright (C) 2001-2002 Sistina Software (UK) Limited. 51da177e4SLinus Torvalds * 61da177e4SLinus Torvalds * This file is released under the GPL. 71da177e4SLinus Torvalds */ 81da177e4SLinus Torvalds 91da177e4SLinus Torvalds #include <linux/blkdev.h> 101da177e4SLinus Torvalds #include <linux/config.h> 111da177e4SLinus Torvalds #include <linux/ctype.h> 121da177e4SLinus Torvalds #include <linux/device-mapper.h> 131da177e4SLinus Torvalds #include <linux/fs.h> 141da177e4SLinus Torvalds #include <linux/init.h> 151da177e4SLinus Torvalds #include <linux/kdev_t.h> 161da177e4SLinus Torvalds #include <linux/list.h> 171da177e4SLinus Torvalds #include <linux/mempool.h> 181da177e4SLinus Torvalds #include <linux/module.h> 191da177e4SLinus Torvalds #include <linux/slab.h> 201da177e4SLinus Torvalds #include <linux/vmalloc.h> 211da177e4SLinus Torvalds 221da177e4SLinus Torvalds #include "dm-snap.h" 231da177e4SLinus Torvalds #include "dm-bio-list.h" 241da177e4SLinus Torvalds #include "kcopyd.h" 251da177e4SLinus Torvalds 261da177e4SLinus Torvalds /* 271da177e4SLinus Torvalds * The percentage increment we will wake up users at 281da177e4SLinus Torvalds */ 291da177e4SLinus Torvalds #define WAKE_UP_PERCENT 5 301da177e4SLinus Torvalds 311da177e4SLinus Torvalds /* 321da177e4SLinus Torvalds * kcopyd priority of snapshot operations 331da177e4SLinus Torvalds */ 341da177e4SLinus Torvalds #define SNAPSHOT_COPY_PRIORITY 2 351da177e4SLinus Torvalds 361da177e4SLinus Torvalds /* 371da177e4SLinus Torvalds * Each snapshot reserves this many pages for io 381da177e4SLinus Torvalds */ 391da177e4SLinus Torvalds #define SNAPSHOT_PAGES 256 401da177e4SLinus Torvalds 411da177e4SLinus Torvalds struct pending_exception { 421da177e4SLinus Torvalds struct exception e; 431da177e4SLinus Torvalds 441da177e4SLinus Torvalds /* 451da177e4SLinus Torvalds * Origin buffers waiting for this to complete are held 461da177e4SLinus Torvalds * in a bio list 471da177e4SLinus Torvalds */ 481da177e4SLinus Torvalds struct bio_list origin_bios; 491da177e4SLinus Torvalds struct bio_list snapshot_bios; 501da177e4SLinus Torvalds 511da177e4SLinus Torvalds /* 52eccf0817SAlasdair G Kergon * Short-term queue of pending exceptions prior to submission. 53eccf0817SAlasdair G Kergon */ 54eccf0817SAlasdair G Kergon struct list_head list; 55eccf0817SAlasdair G Kergon 56eccf0817SAlasdair G Kergon /* 571da177e4SLinus Torvalds * Other pending_exceptions that are processing this 581da177e4SLinus Torvalds * chunk. When this list is empty, we know we can 591da177e4SLinus Torvalds * complete the origins. 601da177e4SLinus Torvalds */ 611da177e4SLinus Torvalds struct list_head siblings; 621da177e4SLinus Torvalds 631da177e4SLinus Torvalds /* Pointer back to snapshot context */ 641da177e4SLinus Torvalds struct dm_snapshot *snap; 651da177e4SLinus Torvalds 661da177e4SLinus Torvalds /* 671da177e4SLinus Torvalds * 1 indicates the exception has already been sent to 681da177e4SLinus Torvalds * kcopyd. 691da177e4SLinus Torvalds */ 701da177e4SLinus Torvalds int started; 711da177e4SLinus Torvalds }; 721da177e4SLinus Torvalds 731da177e4SLinus Torvalds /* 741da177e4SLinus Torvalds * Hash table mapping origin volumes to lists of snapshots and 751da177e4SLinus Torvalds * a lock to protect it 761da177e4SLinus Torvalds */ 771da177e4SLinus Torvalds static kmem_cache_t *exception_cache; 781da177e4SLinus Torvalds static kmem_cache_t *pending_cache; 791da177e4SLinus Torvalds static mempool_t *pending_pool; 801da177e4SLinus Torvalds 811da177e4SLinus Torvalds /* 821da177e4SLinus Torvalds * One of these per registered origin, held in the snapshot_origins hash 831da177e4SLinus Torvalds */ 841da177e4SLinus Torvalds struct origin { 851da177e4SLinus Torvalds /* The origin device */ 861da177e4SLinus Torvalds struct block_device *bdev; 871da177e4SLinus Torvalds 881da177e4SLinus Torvalds struct list_head hash_list; 891da177e4SLinus Torvalds 901da177e4SLinus Torvalds /* List of snapshots for this origin */ 911da177e4SLinus Torvalds struct list_head snapshots; 921da177e4SLinus Torvalds }; 931da177e4SLinus Torvalds 941da177e4SLinus Torvalds /* 951da177e4SLinus Torvalds * Size of the hash table for origin volumes. If we make this 961da177e4SLinus Torvalds * the size of the minors list then it should be nearly perfect 971da177e4SLinus Torvalds */ 981da177e4SLinus Torvalds #define ORIGIN_HASH_SIZE 256 991da177e4SLinus Torvalds #define ORIGIN_MASK 0xFF 1001da177e4SLinus Torvalds static struct list_head *_origins; 1011da177e4SLinus Torvalds static struct rw_semaphore _origins_lock; 1021da177e4SLinus Torvalds 1031da177e4SLinus Torvalds static int init_origin_hash(void) 1041da177e4SLinus Torvalds { 1051da177e4SLinus Torvalds int i; 1061da177e4SLinus Torvalds 1071da177e4SLinus Torvalds _origins = kmalloc(ORIGIN_HASH_SIZE * sizeof(struct list_head), 1081da177e4SLinus Torvalds GFP_KERNEL); 1091da177e4SLinus Torvalds if (!_origins) { 1101da177e4SLinus Torvalds DMERR("Device mapper: Snapshot: unable to allocate memory"); 1111da177e4SLinus Torvalds return -ENOMEM; 1121da177e4SLinus Torvalds } 1131da177e4SLinus Torvalds 1141da177e4SLinus Torvalds for (i = 0; i < ORIGIN_HASH_SIZE; i++) 1151da177e4SLinus Torvalds INIT_LIST_HEAD(_origins + i); 1161da177e4SLinus Torvalds init_rwsem(&_origins_lock); 1171da177e4SLinus Torvalds 1181da177e4SLinus Torvalds return 0; 1191da177e4SLinus Torvalds } 1201da177e4SLinus Torvalds 1211da177e4SLinus Torvalds static void exit_origin_hash(void) 1221da177e4SLinus Torvalds { 1231da177e4SLinus Torvalds kfree(_origins); 1241da177e4SLinus Torvalds } 1251da177e4SLinus Torvalds 1261da177e4SLinus Torvalds static inline unsigned int origin_hash(struct block_device *bdev) 1271da177e4SLinus Torvalds { 1281da177e4SLinus Torvalds return bdev->bd_dev & ORIGIN_MASK; 1291da177e4SLinus Torvalds } 1301da177e4SLinus Torvalds 1311da177e4SLinus Torvalds static struct origin *__lookup_origin(struct block_device *origin) 1321da177e4SLinus Torvalds { 1331da177e4SLinus Torvalds struct list_head *ol; 1341da177e4SLinus Torvalds struct origin *o; 1351da177e4SLinus Torvalds 1361da177e4SLinus Torvalds ol = &_origins[origin_hash(origin)]; 1371da177e4SLinus Torvalds list_for_each_entry (o, ol, hash_list) 1381da177e4SLinus Torvalds if (bdev_equal(o->bdev, origin)) 1391da177e4SLinus Torvalds return o; 1401da177e4SLinus Torvalds 1411da177e4SLinus Torvalds return NULL; 1421da177e4SLinus Torvalds } 1431da177e4SLinus Torvalds 1441da177e4SLinus Torvalds static void __insert_origin(struct origin *o) 1451da177e4SLinus Torvalds { 1461da177e4SLinus Torvalds struct list_head *sl = &_origins[origin_hash(o->bdev)]; 1471da177e4SLinus Torvalds list_add_tail(&o->hash_list, sl); 1481da177e4SLinus Torvalds } 1491da177e4SLinus Torvalds 1501da177e4SLinus Torvalds /* 1511da177e4SLinus Torvalds * Make a note of the snapshot and its origin so we can look it 1521da177e4SLinus Torvalds * up when the origin has a write on it. 1531da177e4SLinus Torvalds */ 1541da177e4SLinus Torvalds static int register_snapshot(struct dm_snapshot *snap) 1551da177e4SLinus Torvalds { 1561da177e4SLinus Torvalds struct origin *o; 1571da177e4SLinus Torvalds struct block_device *bdev = snap->origin->bdev; 1581da177e4SLinus Torvalds 1591da177e4SLinus Torvalds down_write(&_origins_lock); 1601da177e4SLinus Torvalds o = __lookup_origin(bdev); 1611da177e4SLinus Torvalds 1621da177e4SLinus Torvalds if (!o) { 1631da177e4SLinus Torvalds /* New origin */ 1641da177e4SLinus Torvalds o = kmalloc(sizeof(*o), GFP_KERNEL); 1651da177e4SLinus Torvalds if (!o) { 1661da177e4SLinus Torvalds up_write(&_origins_lock); 1671da177e4SLinus Torvalds return -ENOMEM; 1681da177e4SLinus Torvalds } 1691da177e4SLinus Torvalds 1701da177e4SLinus Torvalds /* Initialise the struct */ 1711da177e4SLinus Torvalds INIT_LIST_HEAD(&o->snapshots); 1721da177e4SLinus Torvalds o->bdev = bdev; 1731da177e4SLinus Torvalds 1741da177e4SLinus Torvalds __insert_origin(o); 1751da177e4SLinus Torvalds } 1761da177e4SLinus Torvalds 1771da177e4SLinus Torvalds list_add_tail(&snap->list, &o->snapshots); 1781da177e4SLinus Torvalds 1791da177e4SLinus Torvalds up_write(&_origins_lock); 1801da177e4SLinus Torvalds return 0; 1811da177e4SLinus Torvalds } 1821da177e4SLinus Torvalds 1831da177e4SLinus Torvalds static void unregister_snapshot(struct dm_snapshot *s) 1841da177e4SLinus Torvalds { 1851da177e4SLinus Torvalds struct origin *o; 1861da177e4SLinus Torvalds 1871da177e4SLinus Torvalds down_write(&_origins_lock); 1881da177e4SLinus Torvalds o = __lookup_origin(s->origin->bdev); 1891da177e4SLinus Torvalds 1901da177e4SLinus Torvalds list_del(&s->list); 1911da177e4SLinus Torvalds if (list_empty(&o->snapshots)) { 1921da177e4SLinus Torvalds list_del(&o->hash_list); 1931da177e4SLinus Torvalds kfree(o); 1941da177e4SLinus Torvalds } 1951da177e4SLinus Torvalds 1961da177e4SLinus Torvalds up_write(&_origins_lock); 1971da177e4SLinus Torvalds } 1981da177e4SLinus Torvalds 1991da177e4SLinus Torvalds /* 2001da177e4SLinus Torvalds * Implementation of the exception hash tables. 2011da177e4SLinus Torvalds */ 2021da177e4SLinus Torvalds static int init_exception_table(struct exception_table *et, uint32_t size) 2031da177e4SLinus Torvalds { 2041da177e4SLinus Torvalds unsigned int i; 2051da177e4SLinus Torvalds 2061da177e4SLinus Torvalds et->hash_mask = size - 1; 2071da177e4SLinus Torvalds et->table = dm_vcalloc(size, sizeof(struct list_head)); 2081da177e4SLinus Torvalds if (!et->table) 2091da177e4SLinus Torvalds return -ENOMEM; 2101da177e4SLinus Torvalds 2111da177e4SLinus Torvalds for (i = 0; i < size; i++) 2121da177e4SLinus Torvalds INIT_LIST_HEAD(et->table + i); 2131da177e4SLinus Torvalds 2141da177e4SLinus Torvalds return 0; 2151da177e4SLinus Torvalds } 2161da177e4SLinus Torvalds 2171da177e4SLinus Torvalds static void exit_exception_table(struct exception_table *et, kmem_cache_t *mem) 2181da177e4SLinus Torvalds { 2191da177e4SLinus Torvalds struct list_head *slot; 2201da177e4SLinus Torvalds struct exception *ex, *next; 2211da177e4SLinus Torvalds int i, size; 2221da177e4SLinus Torvalds 2231da177e4SLinus Torvalds size = et->hash_mask + 1; 2241da177e4SLinus Torvalds for (i = 0; i < size; i++) { 2251da177e4SLinus Torvalds slot = et->table + i; 2261da177e4SLinus Torvalds 2271da177e4SLinus Torvalds list_for_each_entry_safe (ex, next, slot, hash_list) 2281da177e4SLinus Torvalds kmem_cache_free(mem, ex); 2291da177e4SLinus Torvalds } 2301da177e4SLinus Torvalds 2311da177e4SLinus Torvalds vfree(et->table); 2321da177e4SLinus Torvalds } 2331da177e4SLinus Torvalds 2341da177e4SLinus Torvalds static inline uint32_t exception_hash(struct exception_table *et, chunk_t chunk) 2351da177e4SLinus Torvalds { 2361da177e4SLinus Torvalds return chunk & et->hash_mask; 2371da177e4SLinus Torvalds } 2381da177e4SLinus Torvalds 2391da177e4SLinus Torvalds static void insert_exception(struct exception_table *eh, struct exception *e) 2401da177e4SLinus Torvalds { 2411da177e4SLinus Torvalds struct list_head *l = &eh->table[exception_hash(eh, e->old_chunk)]; 2421da177e4SLinus Torvalds list_add(&e->hash_list, l); 2431da177e4SLinus Torvalds } 2441da177e4SLinus Torvalds 2451da177e4SLinus Torvalds static inline void remove_exception(struct exception *e) 2461da177e4SLinus Torvalds { 2471da177e4SLinus Torvalds list_del(&e->hash_list); 2481da177e4SLinus Torvalds } 2491da177e4SLinus Torvalds 2501da177e4SLinus Torvalds /* 2511da177e4SLinus Torvalds * Return the exception data for a sector, or NULL if not 2521da177e4SLinus Torvalds * remapped. 2531da177e4SLinus Torvalds */ 2541da177e4SLinus Torvalds static struct exception *lookup_exception(struct exception_table *et, 2551da177e4SLinus Torvalds chunk_t chunk) 2561da177e4SLinus Torvalds { 2571da177e4SLinus Torvalds struct list_head *slot; 2581da177e4SLinus Torvalds struct exception *e; 2591da177e4SLinus Torvalds 2601da177e4SLinus Torvalds slot = &et->table[exception_hash(et, chunk)]; 2611da177e4SLinus Torvalds list_for_each_entry (e, slot, hash_list) 2621da177e4SLinus Torvalds if (e->old_chunk == chunk) 2631da177e4SLinus Torvalds return e; 2641da177e4SLinus Torvalds 2651da177e4SLinus Torvalds return NULL; 2661da177e4SLinus Torvalds } 2671da177e4SLinus Torvalds 2681da177e4SLinus Torvalds static inline struct exception *alloc_exception(void) 2691da177e4SLinus Torvalds { 2701da177e4SLinus Torvalds struct exception *e; 2711da177e4SLinus Torvalds 2721da177e4SLinus Torvalds e = kmem_cache_alloc(exception_cache, GFP_NOIO); 2731da177e4SLinus Torvalds if (!e) 2741da177e4SLinus Torvalds e = kmem_cache_alloc(exception_cache, GFP_ATOMIC); 2751da177e4SLinus Torvalds 2761da177e4SLinus Torvalds return e; 2771da177e4SLinus Torvalds } 2781da177e4SLinus Torvalds 2791da177e4SLinus Torvalds static inline void free_exception(struct exception *e) 2801da177e4SLinus Torvalds { 2811da177e4SLinus Torvalds kmem_cache_free(exception_cache, e); 2821da177e4SLinus Torvalds } 2831da177e4SLinus Torvalds 2841da177e4SLinus Torvalds static inline struct pending_exception *alloc_pending_exception(void) 2851da177e4SLinus Torvalds { 2861da177e4SLinus Torvalds return mempool_alloc(pending_pool, GFP_NOIO); 2871da177e4SLinus Torvalds } 2881da177e4SLinus Torvalds 2891da177e4SLinus Torvalds static inline void free_pending_exception(struct pending_exception *pe) 2901da177e4SLinus Torvalds { 2911da177e4SLinus Torvalds mempool_free(pe, pending_pool); 2921da177e4SLinus Torvalds } 2931da177e4SLinus Torvalds 2941da177e4SLinus Torvalds int dm_add_exception(struct dm_snapshot *s, chunk_t old, chunk_t new) 2951da177e4SLinus Torvalds { 2961da177e4SLinus Torvalds struct exception *e; 2971da177e4SLinus Torvalds 2981da177e4SLinus Torvalds e = alloc_exception(); 2991da177e4SLinus Torvalds if (!e) 3001da177e4SLinus Torvalds return -ENOMEM; 3011da177e4SLinus Torvalds 3021da177e4SLinus Torvalds e->old_chunk = old; 3031da177e4SLinus Torvalds e->new_chunk = new; 3041da177e4SLinus Torvalds insert_exception(&s->complete, e); 3051da177e4SLinus Torvalds return 0; 3061da177e4SLinus Torvalds } 3071da177e4SLinus Torvalds 3081da177e4SLinus Torvalds /* 3091da177e4SLinus Torvalds * Hard coded magic. 3101da177e4SLinus Torvalds */ 3111da177e4SLinus Torvalds static int calc_max_buckets(void) 3121da177e4SLinus Torvalds { 3131da177e4SLinus Torvalds /* use a fixed size of 2MB */ 3141da177e4SLinus Torvalds unsigned long mem = 2 * 1024 * 1024; 3151da177e4SLinus Torvalds mem /= sizeof(struct list_head); 3161da177e4SLinus Torvalds 3171da177e4SLinus Torvalds return mem; 3181da177e4SLinus Torvalds } 3191da177e4SLinus Torvalds 3201da177e4SLinus Torvalds /* 3211da177e4SLinus Torvalds * Rounds a number down to a power of 2. 3221da177e4SLinus Torvalds */ 3231da177e4SLinus Torvalds static inline uint32_t round_down(uint32_t n) 3241da177e4SLinus Torvalds { 3251da177e4SLinus Torvalds while (n & (n - 1)) 3261da177e4SLinus Torvalds n &= (n - 1); 3271da177e4SLinus Torvalds return n; 3281da177e4SLinus Torvalds } 3291da177e4SLinus Torvalds 3301da177e4SLinus Torvalds /* 3311da177e4SLinus Torvalds * Allocate room for a suitable hash table. 3321da177e4SLinus Torvalds */ 3331da177e4SLinus Torvalds static int init_hash_tables(struct dm_snapshot *s) 3341da177e4SLinus Torvalds { 3351da177e4SLinus Torvalds sector_t hash_size, cow_dev_size, origin_dev_size, max_buckets; 3361da177e4SLinus Torvalds 3371da177e4SLinus Torvalds /* 3381da177e4SLinus Torvalds * Calculate based on the size of the original volume or 3391da177e4SLinus Torvalds * the COW volume... 3401da177e4SLinus Torvalds */ 3411da177e4SLinus Torvalds cow_dev_size = get_dev_size(s->cow->bdev); 3421da177e4SLinus Torvalds origin_dev_size = get_dev_size(s->origin->bdev); 3431da177e4SLinus Torvalds max_buckets = calc_max_buckets(); 3441da177e4SLinus Torvalds 3451da177e4SLinus Torvalds hash_size = min(origin_dev_size, cow_dev_size) >> s->chunk_shift; 3461da177e4SLinus Torvalds hash_size = min(hash_size, max_buckets); 3471da177e4SLinus Torvalds 3481da177e4SLinus Torvalds /* Round it down to a power of 2 */ 3491da177e4SLinus Torvalds hash_size = round_down(hash_size); 3501da177e4SLinus Torvalds if (init_exception_table(&s->complete, hash_size)) 3511da177e4SLinus Torvalds return -ENOMEM; 3521da177e4SLinus Torvalds 3531da177e4SLinus Torvalds /* 3541da177e4SLinus Torvalds * Allocate hash table for in-flight exceptions 3551da177e4SLinus Torvalds * Make this smaller than the real hash table 3561da177e4SLinus Torvalds */ 3571da177e4SLinus Torvalds hash_size >>= 3; 3581da177e4SLinus Torvalds if (hash_size < 64) 3591da177e4SLinus Torvalds hash_size = 64; 3601da177e4SLinus Torvalds 3611da177e4SLinus Torvalds if (init_exception_table(&s->pending, hash_size)) { 3621da177e4SLinus Torvalds exit_exception_table(&s->complete, exception_cache); 3631da177e4SLinus Torvalds return -ENOMEM; 3641da177e4SLinus Torvalds } 3651da177e4SLinus Torvalds 3661da177e4SLinus Torvalds return 0; 3671da177e4SLinus Torvalds } 3681da177e4SLinus Torvalds 3691da177e4SLinus Torvalds /* 3701da177e4SLinus Torvalds * Round a number up to the nearest 'size' boundary. size must 3711da177e4SLinus Torvalds * be a power of 2. 3721da177e4SLinus Torvalds */ 3731da177e4SLinus Torvalds static inline ulong round_up(ulong n, ulong size) 3741da177e4SLinus Torvalds { 3751da177e4SLinus Torvalds size--; 3761da177e4SLinus Torvalds return (n + size) & ~size; 3771da177e4SLinus Torvalds } 3781da177e4SLinus Torvalds 3792d38fe20SAlasdair G Kergon static void read_snapshot_metadata(struct dm_snapshot *s) 3802d38fe20SAlasdair G Kergon { 3812d38fe20SAlasdair G Kergon if (s->store.read_metadata(&s->store)) { 3822d38fe20SAlasdair G Kergon down_write(&s->lock); 3832d38fe20SAlasdair G Kergon s->valid = 0; 3842d38fe20SAlasdair G Kergon up_write(&s->lock); 3852d38fe20SAlasdair G Kergon } 3862d38fe20SAlasdair G Kergon } 3872d38fe20SAlasdair G Kergon 3881da177e4SLinus Torvalds /* 3891da177e4SLinus Torvalds * Construct a snapshot mapping: <origin_dev> <COW-dev> <p/n> <chunk-size> 3901da177e4SLinus Torvalds */ 3911da177e4SLinus Torvalds static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) 3921da177e4SLinus Torvalds { 3931da177e4SLinus Torvalds struct dm_snapshot *s; 3941da177e4SLinus Torvalds unsigned long chunk_size; 3951da177e4SLinus Torvalds int r = -EINVAL; 3961da177e4SLinus Torvalds char persistent; 3971da177e4SLinus Torvalds char *origin_path; 3981da177e4SLinus Torvalds char *cow_path; 3991da177e4SLinus Torvalds char *value; 4001da177e4SLinus Torvalds int blocksize; 4011da177e4SLinus Torvalds 4021da177e4SLinus Torvalds if (argc < 4) { 4031da177e4SLinus Torvalds ti->error = "dm-snapshot: requires exactly 4 arguments"; 4041da177e4SLinus Torvalds r = -EINVAL; 4051da177e4SLinus Torvalds goto bad1; 4061da177e4SLinus Torvalds } 4071da177e4SLinus Torvalds 4081da177e4SLinus Torvalds origin_path = argv[0]; 4091da177e4SLinus Torvalds cow_path = argv[1]; 4101da177e4SLinus Torvalds persistent = toupper(*argv[2]); 4111da177e4SLinus Torvalds 4121da177e4SLinus Torvalds if (persistent != 'P' && persistent != 'N') { 4131da177e4SLinus Torvalds ti->error = "Persistent flag is not P or N"; 4141da177e4SLinus Torvalds r = -EINVAL; 4151da177e4SLinus Torvalds goto bad1; 4161da177e4SLinus Torvalds } 4171da177e4SLinus Torvalds 4181da177e4SLinus Torvalds chunk_size = simple_strtoul(argv[3], &value, 10); 4191da177e4SLinus Torvalds if (chunk_size == 0 || value == NULL) { 4201da177e4SLinus Torvalds ti->error = "Invalid chunk size"; 4211da177e4SLinus Torvalds r = -EINVAL; 4221da177e4SLinus Torvalds goto bad1; 4231da177e4SLinus Torvalds } 4241da177e4SLinus Torvalds 4251da177e4SLinus Torvalds s = kmalloc(sizeof(*s), GFP_KERNEL); 4261da177e4SLinus Torvalds if (s == NULL) { 4271da177e4SLinus Torvalds ti->error = "Cannot allocate snapshot context private " 4281da177e4SLinus Torvalds "structure"; 4291da177e4SLinus Torvalds r = -ENOMEM; 4301da177e4SLinus Torvalds goto bad1; 4311da177e4SLinus Torvalds } 4321da177e4SLinus Torvalds 4331da177e4SLinus Torvalds r = dm_get_device(ti, origin_path, 0, ti->len, FMODE_READ, &s->origin); 4341da177e4SLinus Torvalds if (r) { 4351da177e4SLinus Torvalds ti->error = "Cannot get origin device"; 4361da177e4SLinus Torvalds goto bad2; 4371da177e4SLinus Torvalds } 4381da177e4SLinus Torvalds 4391da177e4SLinus Torvalds r = dm_get_device(ti, cow_path, 0, 0, 4401da177e4SLinus Torvalds FMODE_READ | FMODE_WRITE, &s->cow); 4411da177e4SLinus Torvalds if (r) { 4421da177e4SLinus Torvalds dm_put_device(ti, s->origin); 4431da177e4SLinus Torvalds ti->error = "Cannot get COW device"; 4441da177e4SLinus Torvalds goto bad2; 4451da177e4SLinus Torvalds } 4461da177e4SLinus Torvalds 4471da177e4SLinus Torvalds /* 4481da177e4SLinus Torvalds * Chunk size must be multiple of page size. Silently 4491da177e4SLinus Torvalds * round up if it's not. 4501da177e4SLinus Torvalds */ 4511da177e4SLinus Torvalds chunk_size = round_up(chunk_size, PAGE_SIZE >> 9); 4521da177e4SLinus Torvalds 4531da177e4SLinus Torvalds /* Validate the chunk size against the device block size */ 4541da177e4SLinus Torvalds blocksize = s->cow->bdev->bd_disk->queue->hardsect_size; 4551da177e4SLinus Torvalds if (chunk_size % (blocksize >> 9)) { 4561da177e4SLinus Torvalds ti->error = "Chunk size is not a multiple of device blocksize"; 4571da177e4SLinus Torvalds r = -EINVAL; 4581da177e4SLinus Torvalds goto bad3; 4591da177e4SLinus Torvalds } 4601da177e4SLinus Torvalds 4611da177e4SLinus Torvalds /* Check chunk_size is a power of 2 */ 4621da177e4SLinus Torvalds if (chunk_size & (chunk_size - 1)) { 4631da177e4SLinus Torvalds ti->error = "Chunk size is not a power of 2"; 4641da177e4SLinus Torvalds r = -EINVAL; 4651da177e4SLinus Torvalds goto bad3; 4661da177e4SLinus Torvalds } 4671da177e4SLinus Torvalds 4681da177e4SLinus Torvalds s->chunk_size = chunk_size; 4691da177e4SLinus Torvalds s->chunk_mask = chunk_size - 1; 4701da177e4SLinus Torvalds s->type = persistent; 4711da177e4SLinus Torvalds s->chunk_shift = ffs(chunk_size) - 1; 4721da177e4SLinus Torvalds 4731da177e4SLinus Torvalds s->valid = 1; 474aa14edebSAlasdair G Kergon s->active = 0; 4751da177e4SLinus Torvalds s->last_percent = 0; 4761da177e4SLinus Torvalds init_rwsem(&s->lock); 4771da177e4SLinus Torvalds s->table = ti->table; 4781da177e4SLinus Torvalds 4791da177e4SLinus Torvalds /* Allocate hash table for COW data */ 4801da177e4SLinus Torvalds if (init_hash_tables(s)) { 4811da177e4SLinus Torvalds ti->error = "Unable to allocate hash table space"; 4821da177e4SLinus Torvalds r = -ENOMEM; 4831da177e4SLinus Torvalds goto bad3; 4841da177e4SLinus Torvalds } 4851da177e4SLinus Torvalds 4861da177e4SLinus Torvalds /* 4871da177e4SLinus Torvalds * Check the persistent flag - done here because we need the iobuf 4881da177e4SLinus Torvalds * to check the LV header 4891da177e4SLinus Torvalds */ 4901da177e4SLinus Torvalds s->store.snap = s; 4911da177e4SLinus Torvalds 4921da177e4SLinus Torvalds if (persistent == 'P') 4931da177e4SLinus Torvalds r = dm_create_persistent(&s->store, chunk_size); 4941da177e4SLinus Torvalds else 4951da177e4SLinus Torvalds r = dm_create_transient(&s->store, s, blocksize); 4961da177e4SLinus Torvalds 4971da177e4SLinus Torvalds if (r) { 4981da177e4SLinus Torvalds ti->error = "Couldn't create exception store"; 4991da177e4SLinus Torvalds r = -EINVAL; 5001da177e4SLinus Torvalds goto bad4; 5011da177e4SLinus Torvalds } 5021da177e4SLinus Torvalds 5031da177e4SLinus Torvalds r = kcopyd_client_create(SNAPSHOT_PAGES, &s->kcopyd_client); 5041da177e4SLinus Torvalds if (r) { 5051da177e4SLinus Torvalds ti->error = "Could not create kcopyd client"; 5061da177e4SLinus Torvalds goto bad5; 5071da177e4SLinus Torvalds } 5081da177e4SLinus Torvalds 509aa14edebSAlasdair G Kergon /* Metadata must only be loaded into one table at once */ 510aa14edebSAlasdair G Kergon read_snapshot_metadata(s); 511aa14edebSAlasdair G Kergon 5121da177e4SLinus Torvalds /* Add snapshot to the list of snapshots for this origin */ 513aa14edebSAlasdair G Kergon /* Exceptions aren't triggered till snapshot_resume() is called */ 5141da177e4SLinus Torvalds if (register_snapshot(s)) { 5151da177e4SLinus Torvalds r = -EINVAL; 5161da177e4SLinus Torvalds ti->error = "Cannot register snapshot origin"; 5171da177e4SLinus Torvalds goto bad6; 5181da177e4SLinus Torvalds } 5191da177e4SLinus Torvalds 5201da177e4SLinus Torvalds ti->private = s; 5211da177e4SLinus Torvalds ti->split_io = chunk_size; 5221da177e4SLinus Torvalds 5231da177e4SLinus Torvalds return 0; 5241da177e4SLinus Torvalds 5251da177e4SLinus Torvalds bad6: 5261da177e4SLinus Torvalds kcopyd_client_destroy(s->kcopyd_client); 5271da177e4SLinus Torvalds 5281da177e4SLinus Torvalds bad5: 5291da177e4SLinus Torvalds s->store.destroy(&s->store); 5301da177e4SLinus Torvalds 5311da177e4SLinus Torvalds bad4: 5321da177e4SLinus Torvalds exit_exception_table(&s->pending, pending_cache); 5331da177e4SLinus Torvalds exit_exception_table(&s->complete, exception_cache); 5341da177e4SLinus Torvalds 5351da177e4SLinus Torvalds bad3: 5361da177e4SLinus Torvalds dm_put_device(ti, s->cow); 5371da177e4SLinus Torvalds dm_put_device(ti, s->origin); 5381da177e4SLinus Torvalds 5391da177e4SLinus Torvalds bad2: 5401da177e4SLinus Torvalds kfree(s); 5411da177e4SLinus Torvalds 5421da177e4SLinus Torvalds bad1: 5431da177e4SLinus Torvalds return r; 5441da177e4SLinus Torvalds } 5451da177e4SLinus Torvalds 5461da177e4SLinus Torvalds static void snapshot_dtr(struct dm_target *ti) 5471da177e4SLinus Torvalds { 5481da177e4SLinus Torvalds struct dm_snapshot *s = (struct dm_snapshot *) ti->private; 5491da177e4SLinus Torvalds 5501da177e4SLinus Torvalds unregister_snapshot(s); 5511da177e4SLinus Torvalds 5521da177e4SLinus Torvalds exit_exception_table(&s->pending, pending_cache); 5531da177e4SLinus Torvalds exit_exception_table(&s->complete, exception_cache); 5541da177e4SLinus Torvalds 5551da177e4SLinus Torvalds /* Deallocate memory used */ 5561da177e4SLinus Torvalds s->store.destroy(&s->store); 5571da177e4SLinus Torvalds 5581da177e4SLinus Torvalds dm_put_device(ti, s->origin); 5591da177e4SLinus Torvalds dm_put_device(ti, s->cow); 5601da177e4SLinus Torvalds kcopyd_client_destroy(s->kcopyd_client); 5611da177e4SLinus Torvalds kfree(s); 5621da177e4SLinus Torvalds } 5631da177e4SLinus Torvalds 5641da177e4SLinus Torvalds /* 5651da177e4SLinus Torvalds * Flush a list of buffers. 5661da177e4SLinus Torvalds */ 5671da177e4SLinus Torvalds static void flush_bios(struct bio *bio) 5681da177e4SLinus Torvalds { 5691da177e4SLinus Torvalds struct bio *n; 5701da177e4SLinus Torvalds 5711da177e4SLinus Torvalds while (bio) { 5721da177e4SLinus Torvalds n = bio->bi_next; 5731da177e4SLinus Torvalds bio->bi_next = NULL; 5741da177e4SLinus Torvalds generic_make_request(bio); 5751da177e4SLinus Torvalds bio = n; 5761da177e4SLinus Torvalds } 5771da177e4SLinus Torvalds } 5781da177e4SLinus Torvalds 5791da177e4SLinus Torvalds /* 5801da177e4SLinus Torvalds * Error a list of buffers. 5811da177e4SLinus Torvalds */ 5821da177e4SLinus Torvalds static void error_bios(struct bio *bio) 5831da177e4SLinus Torvalds { 5841da177e4SLinus Torvalds struct bio *n; 5851da177e4SLinus Torvalds 5861da177e4SLinus Torvalds while (bio) { 5871da177e4SLinus Torvalds n = bio->bi_next; 5881da177e4SLinus Torvalds bio->bi_next = NULL; 5891da177e4SLinus Torvalds bio_io_error(bio, bio->bi_size); 5901da177e4SLinus Torvalds bio = n; 5911da177e4SLinus Torvalds } 5921da177e4SLinus Torvalds } 5931da177e4SLinus Torvalds 5941da177e4SLinus Torvalds static struct bio *__flush_bios(struct pending_exception *pe) 5951da177e4SLinus Torvalds { 5961da177e4SLinus Torvalds struct pending_exception *sibling; 5971da177e4SLinus Torvalds 5981da177e4SLinus Torvalds if (list_empty(&pe->siblings)) 5991da177e4SLinus Torvalds return bio_list_get(&pe->origin_bios); 6001da177e4SLinus Torvalds 6011da177e4SLinus Torvalds sibling = list_entry(pe->siblings.next, 6021da177e4SLinus Torvalds struct pending_exception, siblings); 6031da177e4SLinus Torvalds 6041da177e4SLinus Torvalds list_del(&pe->siblings); 6051da177e4SLinus Torvalds 6061da177e4SLinus Torvalds /* This is fine as long as kcopyd is single-threaded. If kcopyd 6071da177e4SLinus Torvalds * becomes multi-threaded, we'll need some locking here. 6081da177e4SLinus Torvalds */ 6091da177e4SLinus Torvalds bio_list_merge(&sibling->origin_bios, &pe->origin_bios); 6101da177e4SLinus Torvalds 6111da177e4SLinus Torvalds return NULL; 6121da177e4SLinus Torvalds } 6131da177e4SLinus Torvalds 6141da177e4SLinus Torvalds static void pending_complete(struct pending_exception *pe, int success) 6151da177e4SLinus Torvalds { 6161da177e4SLinus Torvalds struct exception *e; 6171da177e4SLinus Torvalds struct dm_snapshot *s = pe->snap; 6181da177e4SLinus Torvalds struct bio *flush = NULL; 6191da177e4SLinus Torvalds 6201da177e4SLinus Torvalds if (success) { 6211da177e4SLinus Torvalds e = alloc_exception(); 6221da177e4SLinus Torvalds if (!e) { 6231da177e4SLinus Torvalds DMWARN("Unable to allocate exception."); 6241da177e4SLinus Torvalds down_write(&s->lock); 6251da177e4SLinus Torvalds s->store.drop_snapshot(&s->store); 6261da177e4SLinus Torvalds s->valid = 0; 6271da177e4SLinus Torvalds flush = __flush_bios(pe); 6281da177e4SLinus Torvalds up_write(&s->lock); 6291da177e4SLinus Torvalds 6301da177e4SLinus Torvalds error_bios(bio_list_get(&pe->snapshot_bios)); 6311da177e4SLinus Torvalds goto out; 6321da177e4SLinus Torvalds } 6331da177e4SLinus Torvalds *e = pe->e; 6341da177e4SLinus Torvalds 6351da177e4SLinus Torvalds /* 6361da177e4SLinus Torvalds * Add a proper exception, and remove the 6371da177e4SLinus Torvalds * in-flight exception from the list. 6381da177e4SLinus Torvalds */ 6391da177e4SLinus Torvalds down_write(&s->lock); 6401da177e4SLinus Torvalds insert_exception(&s->complete, e); 6411da177e4SLinus Torvalds remove_exception(&pe->e); 6421da177e4SLinus Torvalds flush = __flush_bios(pe); 6431da177e4SLinus Torvalds 6441da177e4SLinus Torvalds /* Submit any pending write bios */ 6451da177e4SLinus Torvalds up_write(&s->lock); 6461da177e4SLinus Torvalds 6471da177e4SLinus Torvalds flush_bios(bio_list_get(&pe->snapshot_bios)); 6481da177e4SLinus Torvalds } else { 6491da177e4SLinus Torvalds /* Read/write error - snapshot is unusable */ 6501da177e4SLinus Torvalds down_write(&s->lock); 6511da177e4SLinus Torvalds if (s->valid) 6521da177e4SLinus Torvalds DMERR("Error reading/writing snapshot"); 6531da177e4SLinus Torvalds s->store.drop_snapshot(&s->store); 6541da177e4SLinus Torvalds s->valid = 0; 6551da177e4SLinus Torvalds remove_exception(&pe->e); 6561da177e4SLinus Torvalds flush = __flush_bios(pe); 6571da177e4SLinus Torvalds up_write(&s->lock); 6581da177e4SLinus Torvalds 6591da177e4SLinus Torvalds error_bios(bio_list_get(&pe->snapshot_bios)); 6601da177e4SLinus Torvalds 6611da177e4SLinus Torvalds dm_table_event(s->table); 6621da177e4SLinus Torvalds } 6631da177e4SLinus Torvalds 6641da177e4SLinus Torvalds out: 6651da177e4SLinus Torvalds free_pending_exception(pe); 6661da177e4SLinus Torvalds 6671da177e4SLinus Torvalds if (flush) 6681da177e4SLinus Torvalds flush_bios(flush); 6691da177e4SLinus Torvalds } 6701da177e4SLinus Torvalds 6711da177e4SLinus Torvalds static void commit_callback(void *context, int success) 6721da177e4SLinus Torvalds { 6731da177e4SLinus Torvalds struct pending_exception *pe = (struct pending_exception *) context; 6741da177e4SLinus Torvalds pending_complete(pe, success); 6751da177e4SLinus Torvalds } 6761da177e4SLinus Torvalds 6771da177e4SLinus Torvalds /* 6781da177e4SLinus Torvalds * Called when the copy I/O has finished. kcopyd actually runs 6791da177e4SLinus Torvalds * this code so don't block. 6801da177e4SLinus Torvalds */ 6811da177e4SLinus Torvalds static void copy_callback(int read_err, unsigned int write_err, void *context) 6821da177e4SLinus Torvalds { 6831da177e4SLinus Torvalds struct pending_exception *pe = (struct pending_exception *) context; 6841da177e4SLinus Torvalds struct dm_snapshot *s = pe->snap; 6851da177e4SLinus Torvalds 6861da177e4SLinus Torvalds if (read_err || write_err) 6871da177e4SLinus Torvalds pending_complete(pe, 0); 6881da177e4SLinus Torvalds 6891da177e4SLinus Torvalds else 6901da177e4SLinus Torvalds /* Update the metadata if we are persistent */ 6911da177e4SLinus Torvalds s->store.commit_exception(&s->store, &pe->e, commit_callback, 6921da177e4SLinus Torvalds pe); 6931da177e4SLinus Torvalds } 6941da177e4SLinus Torvalds 6951da177e4SLinus Torvalds /* 6961da177e4SLinus Torvalds * Dispatches the copy operation to kcopyd. 6971da177e4SLinus Torvalds */ 698858119e1SArjan van de Ven static void start_copy(struct pending_exception *pe) 6991da177e4SLinus Torvalds { 7001da177e4SLinus Torvalds struct dm_snapshot *s = pe->snap; 7011da177e4SLinus Torvalds struct io_region src, dest; 7021da177e4SLinus Torvalds struct block_device *bdev = s->origin->bdev; 7031da177e4SLinus Torvalds sector_t dev_size; 7041da177e4SLinus Torvalds 7051da177e4SLinus Torvalds dev_size = get_dev_size(bdev); 7061da177e4SLinus Torvalds 7071da177e4SLinus Torvalds src.bdev = bdev; 7081da177e4SLinus Torvalds src.sector = chunk_to_sector(s, pe->e.old_chunk); 7091da177e4SLinus Torvalds src.count = min(s->chunk_size, dev_size - src.sector); 7101da177e4SLinus Torvalds 7111da177e4SLinus Torvalds dest.bdev = s->cow->bdev; 7121da177e4SLinus Torvalds dest.sector = chunk_to_sector(s, pe->e.new_chunk); 7131da177e4SLinus Torvalds dest.count = src.count; 7141da177e4SLinus Torvalds 7151da177e4SLinus Torvalds /* Hand over to kcopyd */ 7161da177e4SLinus Torvalds kcopyd_copy(s->kcopyd_client, 7171da177e4SLinus Torvalds &src, 1, &dest, 0, copy_callback, pe); 7181da177e4SLinus Torvalds } 7191da177e4SLinus Torvalds 7201da177e4SLinus Torvalds /* 7211da177e4SLinus Torvalds * Looks to see if this snapshot already has a pending exception 7221da177e4SLinus Torvalds * for this chunk, otherwise it allocates a new one and inserts 7231da177e4SLinus Torvalds * it into the pending table. 7241da177e4SLinus Torvalds * 7251da177e4SLinus Torvalds * NOTE: a write lock must be held on snap->lock before calling 7261da177e4SLinus Torvalds * this. 7271da177e4SLinus Torvalds */ 7281da177e4SLinus Torvalds static struct pending_exception * 7291da177e4SLinus Torvalds __find_pending_exception(struct dm_snapshot *s, struct bio *bio) 7301da177e4SLinus Torvalds { 7311da177e4SLinus Torvalds struct exception *e; 7321da177e4SLinus Torvalds struct pending_exception *pe; 7331da177e4SLinus Torvalds chunk_t chunk = sector_to_chunk(s, bio->bi_sector); 7341da177e4SLinus Torvalds 7351da177e4SLinus Torvalds /* 7361da177e4SLinus Torvalds * Is there a pending exception for this already ? 7371da177e4SLinus Torvalds */ 7381da177e4SLinus Torvalds e = lookup_exception(&s->pending, chunk); 7391da177e4SLinus Torvalds if (e) { 7401da177e4SLinus Torvalds /* cast the exception to a pending exception */ 7411da177e4SLinus Torvalds pe = container_of(e, struct pending_exception, e); 7421da177e4SLinus Torvalds 7431da177e4SLinus Torvalds } else { 7441da177e4SLinus Torvalds /* 7451da177e4SLinus Torvalds * Create a new pending exception, we don't want 7461da177e4SLinus Torvalds * to hold the lock while we do this. 7471da177e4SLinus Torvalds */ 7481da177e4SLinus Torvalds up_write(&s->lock); 7491da177e4SLinus Torvalds pe = alloc_pending_exception(); 7501da177e4SLinus Torvalds down_write(&s->lock); 7511da177e4SLinus Torvalds 7521da177e4SLinus Torvalds e = lookup_exception(&s->pending, chunk); 7531da177e4SLinus Torvalds if (e) { 7541da177e4SLinus Torvalds free_pending_exception(pe); 7551da177e4SLinus Torvalds pe = container_of(e, struct pending_exception, e); 7561da177e4SLinus Torvalds } else { 7571da177e4SLinus Torvalds pe->e.old_chunk = chunk; 7581da177e4SLinus Torvalds bio_list_init(&pe->origin_bios); 7591da177e4SLinus Torvalds bio_list_init(&pe->snapshot_bios); 7601da177e4SLinus Torvalds INIT_LIST_HEAD(&pe->siblings); 7611da177e4SLinus Torvalds pe->snap = s; 7621da177e4SLinus Torvalds pe->started = 0; 7631da177e4SLinus Torvalds 7641da177e4SLinus Torvalds if (s->store.prepare_exception(&s->store, &pe->e)) { 7651da177e4SLinus Torvalds free_pending_exception(pe); 7661da177e4SLinus Torvalds s->valid = 0; 7671da177e4SLinus Torvalds return NULL; 7681da177e4SLinus Torvalds } 7691da177e4SLinus Torvalds 7701da177e4SLinus Torvalds insert_exception(&s->pending, &pe->e); 7711da177e4SLinus Torvalds } 7721da177e4SLinus Torvalds } 7731da177e4SLinus Torvalds 7741da177e4SLinus Torvalds return pe; 7751da177e4SLinus Torvalds } 7761da177e4SLinus Torvalds 7771da177e4SLinus Torvalds static inline void remap_exception(struct dm_snapshot *s, struct exception *e, 7781da177e4SLinus Torvalds struct bio *bio) 7791da177e4SLinus Torvalds { 7801da177e4SLinus Torvalds bio->bi_bdev = s->cow->bdev; 7811da177e4SLinus Torvalds bio->bi_sector = chunk_to_sector(s, e->new_chunk) + 7821da177e4SLinus Torvalds (bio->bi_sector & s->chunk_mask); 7831da177e4SLinus Torvalds } 7841da177e4SLinus Torvalds 7851da177e4SLinus Torvalds static int snapshot_map(struct dm_target *ti, struct bio *bio, 7861da177e4SLinus Torvalds union map_info *map_context) 7871da177e4SLinus Torvalds { 7881da177e4SLinus Torvalds struct exception *e; 7891da177e4SLinus Torvalds struct dm_snapshot *s = (struct dm_snapshot *) ti->private; 7901da177e4SLinus Torvalds int r = 1; 7911da177e4SLinus Torvalds chunk_t chunk; 7921da177e4SLinus Torvalds struct pending_exception *pe; 7931da177e4SLinus Torvalds 7941da177e4SLinus Torvalds chunk = sector_to_chunk(s, bio->bi_sector); 7951da177e4SLinus Torvalds 7961da177e4SLinus Torvalds /* Full snapshots are not usable */ 7971da177e4SLinus Torvalds if (!s->valid) 798f6a80ea8SAlasdair G Kergon return -EIO; 7991da177e4SLinus Torvalds 8004aac0a63SAlasdair G Kergon if (unlikely(bio_barrier(bio))) 8014aac0a63SAlasdair G Kergon return -EOPNOTSUPP; 8024aac0a63SAlasdair G Kergon 8031da177e4SLinus Torvalds /* 8041da177e4SLinus Torvalds * Write to snapshot - higher level takes care of RW/RO 8051da177e4SLinus Torvalds * flags so we should only get this if we are 8061da177e4SLinus Torvalds * writeable. 8071da177e4SLinus Torvalds */ 8081da177e4SLinus Torvalds if (bio_rw(bio) == WRITE) { 8091da177e4SLinus Torvalds 8101da177e4SLinus Torvalds /* FIXME: should only take write lock if we need 8111da177e4SLinus Torvalds * to copy an exception */ 8121da177e4SLinus Torvalds down_write(&s->lock); 8131da177e4SLinus Torvalds 8141da177e4SLinus Torvalds /* If the block is already remapped - use that, else remap it */ 8151da177e4SLinus Torvalds e = lookup_exception(&s->complete, chunk); 8161da177e4SLinus Torvalds if (e) { 8171da177e4SLinus Torvalds remap_exception(s, e, bio); 8181da177e4SLinus Torvalds up_write(&s->lock); 8191da177e4SLinus Torvalds 8201da177e4SLinus Torvalds } else { 8211da177e4SLinus Torvalds pe = __find_pending_exception(s, bio); 8221da177e4SLinus Torvalds 8231da177e4SLinus Torvalds if (!pe) { 8241da177e4SLinus Torvalds if (s->store.drop_snapshot) 8251da177e4SLinus Torvalds s->store.drop_snapshot(&s->store); 8261da177e4SLinus Torvalds s->valid = 0; 8271da177e4SLinus Torvalds r = -EIO; 8281da177e4SLinus Torvalds up_write(&s->lock); 8291da177e4SLinus Torvalds } else { 8301da177e4SLinus Torvalds remap_exception(s, &pe->e, bio); 8311da177e4SLinus Torvalds bio_list_add(&pe->snapshot_bios, bio); 8321da177e4SLinus Torvalds 8331da177e4SLinus Torvalds if (!pe->started) { 8341da177e4SLinus Torvalds /* this is protected by snap->lock */ 8351da177e4SLinus Torvalds pe->started = 1; 8361da177e4SLinus Torvalds up_write(&s->lock); 8371da177e4SLinus Torvalds start_copy(pe); 8381da177e4SLinus Torvalds } else 8391da177e4SLinus Torvalds up_write(&s->lock); 8401da177e4SLinus Torvalds r = 0; 8411da177e4SLinus Torvalds } 8421da177e4SLinus Torvalds } 8431da177e4SLinus Torvalds 8441da177e4SLinus Torvalds } else { 8451da177e4SLinus Torvalds /* 8461da177e4SLinus Torvalds * FIXME: this read path scares me because we 8471da177e4SLinus Torvalds * always use the origin when we have a pending 8481da177e4SLinus Torvalds * exception. However I can't think of a 8491da177e4SLinus Torvalds * situation where this is wrong - ejt. 8501da177e4SLinus Torvalds */ 8511da177e4SLinus Torvalds 8521da177e4SLinus Torvalds /* Do reads */ 8531da177e4SLinus Torvalds down_read(&s->lock); 8541da177e4SLinus Torvalds 8551da177e4SLinus Torvalds /* See if it it has been remapped */ 8561da177e4SLinus Torvalds e = lookup_exception(&s->complete, chunk); 8571da177e4SLinus Torvalds if (e) 8581da177e4SLinus Torvalds remap_exception(s, e, bio); 8591da177e4SLinus Torvalds else 8601da177e4SLinus Torvalds bio->bi_bdev = s->origin->bdev; 8611da177e4SLinus Torvalds 8621da177e4SLinus Torvalds up_read(&s->lock); 8631da177e4SLinus Torvalds } 8641da177e4SLinus Torvalds 8651da177e4SLinus Torvalds return r; 8661da177e4SLinus Torvalds } 8671da177e4SLinus Torvalds 8681da177e4SLinus Torvalds static void snapshot_resume(struct dm_target *ti) 8691da177e4SLinus Torvalds { 8701da177e4SLinus Torvalds struct dm_snapshot *s = (struct dm_snapshot *) ti->private; 8711da177e4SLinus Torvalds 872aa14edebSAlasdair G Kergon down_write(&s->lock); 873aa14edebSAlasdair G Kergon s->active = 1; 874aa14edebSAlasdair G Kergon up_write(&s->lock); 8751da177e4SLinus Torvalds } 8761da177e4SLinus Torvalds 8771da177e4SLinus Torvalds static int snapshot_status(struct dm_target *ti, status_type_t type, 8781da177e4SLinus Torvalds char *result, unsigned int maxlen) 8791da177e4SLinus Torvalds { 8801da177e4SLinus Torvalds struct dm_snapshot *snap = (struct dm_snapshot *) ti->private; 8811da177e4SLinus Torvalds 8821da177e4SLinus Torvalds switch (type) { 8831da177e4SLinus Torvalds case STATUSTYPE_INFO: 8841da177e4SLinus Torvalds if (!snap->valid) 8851da177e4SLinus Torvalds snprintf(result, maxlen, "Invalid"); 8861da177e4SLinus Torvalds else { 8871da177e4SLinus Torvalds if (snap->store.fraction_full) { 8881da177e4SLinus Torvalds sector_t numerator, denominator; 8891da177e4SLinus Torvalds snap->store.fraction_full(&snap->store, 8901da177e4SLinus Torvalds &numerator, 8911da177e4SLinus Torvalds &denominator); 8921da177e4SLinus Torvalds snprintf(result, maxlen, 8931da177e4SLinus Torvalds SECTOR_FORMAT "/" SECTOR_FORMAT, 8941da177e4SLinus Torvalds numerator, denominator); 8951da177e4SLinus Torvalds } 8961da177e4SLinus Torvalds else 8971da177e4SLinus Torvalds snprintf(result, maxlen, "Unknown"); 8981da177e4SLinus Torvalds } 8991da177e4SLinus Torvalds break; 9001da177e4SLinus Torvalds 9011da177e4SLinus Torvalds case STATUSTYPE_TABLE: 9021da177e4SLinus Torvalds /* 9031da177e4SLinus Torvalds * kdevname returns a static pointer so we need 9041da177e4SLinus Torvalds * to make private copies if the output is to 9051da177e4SLinus Torvalds * make sense. 9061da177e4SLinus Torvalds */ 9071da177e4SLinus Torvalds snprintf(result, maxlen, "%s %s %c " SECTOR_FORMAT, 9081da177e4SLinus Torvalds snap->origin->name, snap->cow->name, 9091da177e4SLinus Torvalds snap->type, snap->chunk_size); 9101da177e4SLinus Torvalds break; 9111da177e4SLinus Torvalds } 9121da177e4SLinus Torvalds 9131da177e4SLinus Torvalds return 0; 9141da177e4SLinus Torvalds } 9151da177e4SLinus Torvalds 9161da177e4SLinus Torvalds /*----------------------------------------------------------------- 9171da177e4SLinus Torvalds * Origin methods 9181da177e4SLinus Torvalds *---------------------------------------------------------------*/ 9191da177e4SLinus Torvalds static void list_merge(struct list_head *l1, struct list_head *l2) 9201da177e4SLinus Torvalds { 9211da177e4SLinus Torvalds struct list_head *l1_n, *l2_p; 9221da177e4SLinus Torvalds 9231da177e4SLinus Torvalds l1_n = l1->next; 9241da177e4SLinus Torvalds l2_p = l2->prev; 9251da177e4SLinus Torvalds 9261da177e4SLinus Torvalds l1->next = l2; 9271da177e4SLinus Torvalds l2->prev = l1; 9281da177e4SLinus Torvalds 9291da177e4SLinus Torvalds l2_p->next = l1_n; 9301da177e4SLinus Torvalds l1_n->prev = l2_p; 9311da177e4SLinus Torvalds } 9321da177e4SLinus Torvalds 9331da177e4SLinus Torvalds static int __origin_write(struct list_head *snapshots, struct bio *bio) 9341da177e4SLinus Torvalds { 9351da177e4SLinus Torvalds int r = 1, first = 1; 9361da177e4SLinus Torvalds struct dm_snapshot *snap; 9371da177e4SLinus Torvalds struct exception *e; 938eccf0817SAlasdair G Kergon struct pending_exception *pe, *next_pe, *last = NULL; 9391da177e4SLinus Torvalds chunk_t chunk; 940eccf0817SAlasdair G Kergon LIST_HEAD(pe_queue); 9411da177e4SLinus Torvalds 9421da177e4SLinus Torvalds /* Do all the snapshots on this origin */ 9431da177e4SLinus Torvalds list_for_each_entry (snap, snapshots, list) { 9441da177e4SLinus Torvalds 945aa14edebSAlasdair G Kergon /* Only deal with valid and active snapshots */ 946aa14edebSAlasdair G Kergon if (!snap->valid || !snap->active) 9471da177e4SLinus Torvalds continue; 9481da177e4SLinus Torvalds 949d5e404c1SAlasdair G Kergon /* Nothing to do if writing beyond end of snapshot */ 950d5e404c1SAlasdair G Kergon if (bio->bi_sector >= dm_table_get_size(snap->table)) 951d5e404c1SAlasdair G Kergon continue; 952d5e404c1SAlasdair G Kergon 9531da177e4SLinus Torvalds down_write(&snap->lock); 9541da177e4SLinus Torvalds 9551da177e4SLinus Torvalds /* 9561da177e4SLinus Torvalds * Remember, different snapshots can have 9571da177e4SLinus Torvalds * different chunk sizes. 9581da177e4SLinus Torvalds */ 9591da177e4SLinus Torvalds chunk = sector_to_chunk(snap, bio->bi_sector); 9601da177e4SLinus Torvalds 9611da177e4SLinus Torvalds /* 9621da177e4SLinus Torvalds * Check exception table to see if block 9631da177e4SLinus Torvalds * is already remapped in this snapshot 9641da177e4SLinus Torvalds * and trigger an exception if not. 9651da177e4SLinus Torvalds */ 9661da177e4SLinus Torvalds e = lookup_exception(&snap->complete, chunk); 9671da177e4SLinus Torvalds if (!e) { 9681da177e4SLinus Torvalds pe = __find_pending_exception(snap, bio); 9691da177e4SLinus Torvalds if (!pe) { 9701da177e4SLinus Torvalds snap->store.drop_snapshot(&snap->store); 9711da177e4SLinus Torvalds snap->valid = 0; 9721da177e4SLinus Torvalds 9731da177e4SLinus Torvalds } else { 974eccf0817SAlasdair G Kergon if (first) { 975eccf0817SAlasdair G Kergon bio_list_add(&pe->origin_bios, bio); 976eccf0817SAlasdair G Kergon r = 0; 977eccf0817SAlasdair G Kergon first = 0; 978eccf0817SAlasdair G Kergon } 979eccf0817SAlasdair G Kergon if (last && list_empty(&pe->siblings)) 9801da177e4SLinus Torvalds list_merge(&pe->siblings, 9811da177e4SLinus Torvalds &last->siblings); 982eccf0817SAlasdair G Kergon if (!pe->started) { 983eccf0817SAlasdair G Kergon pe->started = 1; 984eccf0817SAlasdair G Kergon list_add_tail(&pe->list, &pe_queue); 985eccf0817SAlasdair G Kergon } 9861da177e4SLinus Torvalds last = pe; 9871da177e4SLinus Torvalds } 9881da177e4SLinus Torvalds } 9891da177e4SLinus Torvalds 9901da177e4SLinus Torvalds up_write(&snap->lock); 9911da177e4SLinus Torvalds } 9921da177e4SLinus Torvalds 9931da177e4SLinus Torvalds /* 9941da177e4SLinus Torvalds * Now that we have a complete pe list we can start the copying. 9951da177e4SLinus Torvalds */ 996eccf0817SAlasdair G Kergon list_for_each_entry_safe(pe, next_pe, &pe_queue, list) 9971da177e4SLinus Torvalds start_copy(pe); 9981da177e4SLinus Torvalds 9991da177e4SLinus Torvalds return r; 10001da177e4SLinus Torvalds } 10011da177e4SLinus Torvalds 10021da177e4SLinus Torvalds /* 10031da177e4SLinus Torvalds * Called on a write from the origin driver. 10041da177e4SLinus Torvalds */ 10051da177e4SLinus Torvalds static int do_origin(struct dm_dev *origin, struct bio *bio) 10061da177e4SLinus Torvalds { 10071da177e4SLinus Torvalds struct origin *o; 10081da177e4SLinus Torvalds int r = 1; 10091da177e4SLinus Torvalds 10101da177e4SLinus Torvalds down_read(&_origins_lock); 10111da177e4SLinus Torvalds o = __lookup_origin(origin->bdev); 10121da177e4SLinus Torvalds if (o) 10131da177e4SLinus Torvalds r = __origin_write(&o->snapshots, bio); 10141da177e4SLinus Torvalds up_read(&_origins_lock); 10151da177e4SLinus Torvalds 10161da177e4SLinus Torvalds return r; 10171da177e4SLinus Torvalds } 10181da177e4SLinus Torvalds 10191da177e4SLinus Torvalds /* 10201da177e4SLinus Torvalds * Origin: maps a linear range of a device, with hooks for snapshotting. 10211da177e4SLinus Torvalds */ 10221da177e4SLinus Torvalds 10231da177e4SLinus Torvalds /* 10241da177e4SLinus Torvalds * Construct an origin mapping: <dev_path> 10251da177e4SLinus Torvalds * The context for an origin is merely a 'struct dm_dev *' 10261da177e4SLinus Torvalds * pointing to the real device. 10271da177e4SLinus Torvalds */ 10281da177e4SLinus Torvalds static int origin_ctr(struct dm_target *ti, unsigned int argc, char **argv) 10291da177e4SLinus Torvalds { 10301da177e4SLinus Torvalds int r; 10311da177e4SLinus Torvalds struct dm_dev *dev; 10321da177e4SLinus Torvalds 10331da177e4SLinus Torvalds if (argc != 1) { 10341da177e4SLinus Torvalds ti->error = "dm-origin: incorrect number of arguments"; 10351da177e4SLinus Torvalds return -EINVAL; 10361da177e4SLinus Torvalds } 10371da177e4SLinus Torvalds 10381da177e4SLinus Torvalds r = dm_get_device(ti, argv[0], 0, ti->len, 10391da177e4SLinus Torvalds dm_table_get_mode(ti->table), &dev); 10401da177e4SLinus Torvalds if (r) { 10411da177e4SLinus Torvalds ti->error = "Cannot get target device"; 10421da177e4SLinus Torvalds return r; 10431da177e4SLinus Torvalds } 10441da177e4SLinus Torvalds 10451da177e4SLinus Torvalds ti->private = dev; 10461da177e4SLinus Torvalds return 0; 10471da177e4SLinus Torvalds } 10481da177e4SLinus Torvalds 10491da177e4SLinus Torvalds static void origin_dtr(struct dm_target *ti) 10501da177e4SLinus Torvalds { 10511da177e4SLinus Torvalds struct dm_dev *dev = (struct dm_dev *) ti->private; 10521da177e4SLinus Torvalds dm_put_device(ti, dev); 10531da177e4SLinus Torvalds } 10541da177e4SLinus Torvalds 10551da177e4SLinus Torvalds static int origin_map(struct dm_target *ti, struct bio *bio, 10561da177e4SLinus Torvalds union map_info *map_context) 10571da177e4SLinus Torvalds { 10581da177e4SLinus Torvalds struct dm_dev *dev = (struct dm_dev *) ti->private; 10591da177e4SLinus Torvalds bio->bi_bdev = dev->bdev; 10601da177e4SLinus Torvalds 10614aac0a63SAlasdair G Kergon if (unlikely(bio_barrier(bio))) 10624aac0a63SAlasdair G Kergon return -EOPNOTSUPP; 10634aac0a63SAlasdair G Kergon 10641da177e4SLinus Torvalds /* Only tell snapshots if this is a write */ 10651da177e4SLinus Torvalds return (bio_rw(bio) == WRITE) ? do_origin(dev, bio) : 1; 10661da177e4SLinus Torvalds } 10671da177e4SLinus Torvalds 10681da177e4SLinus Torvalds #define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r)) 10691da177e4SLinus Torvalds 10701da177e4SLinus Torvalds /* 10711da177e4SLinus Torvalds * Set the target "split_io" field to the minimum of all the snapshots' 10721da177e4SLinus Torvalds * chunk sizes. 10731da177e4SLinus Torvalds */ 10741da177e4SLinus Torvalds static void origin_resume(struct dm_target *ti) 10751da177e4SLinus Torvalds { 10761da177e4SLinus Torvalds struct dm_dev *dev = (struct dm_dev *) ti->private; 10771da177e4SLinus Torvalds struct dm_snapshot *snap; 10781da177e4SLinus Torvalds struct origin *o; 10791da177e4SLinus Torvalds chunk_t chunk_size = 0; 10801da177e4SLinus Torvalds 10811da177e4SLinus Torvalds down_read(&_origins_lock); 10821da177e4SLinus Torvalds o = __lookup_origin(dev->bdev); 10831da177e4SLinus Torvalds if (o) 10841da177e4SLinus Torvalds list_for_each_entry (snap, &o->snapshots, list) 10851da177e4SLinus Torvalds chunk_size = min_not_zero(chunk_size, snap->chunk_size); 10861da177e4SLinus Torvalds up_read(&_origins_lock); 10871da177e4SLinus Torvalds 10881da177e4SLinus Torvalds ti->split_io = chunk_size; 10891da177e4SLinus Torvalds } 10901da177e4SLinus Torvalds 10911da177e4SLinus Torvalds static int origin_status(struct dm_target *ti, status_type_t type, char *result, 10921da177e4SLinus Torvalds unsigned int maxlen) 10931da177e4SLinus Torvalds { 10941da177e4SLinus Torvalds struct dm_dev *dev = (struct dm_dev *) ti->private; 10951da177e4SLinus Torvalds 10961da177e4SLinus Torvalds switch (type) { 10971da177e4SLinus Torvalds case STATUSTYPE_INFO: 10981da177e4SLinus Torvalds result[0] = '\0'; 10991da177e4SLinus Torvalds break; 11001da177e4SLinus Torvalds 11011da177e4SLinus Torvalds case STATUSTYPE_TABLE: 11021da177e4SLinus Torvalds snprintf(result, maxlen, "%s", dev->name); 11031da177e4SLinus Torvalds break; 11041da177e4SLinus Torvalds } 11051da177e4SLinus Torvalds 11061da177e4SLinus Torvalds return 0; 11071da177e4SLinus Torvalds } 11081da177e4SLinus Torvalds 11091da177e4SLinus Torvalds static struct target_type origin_target = { 11101da177e4SLinus Torvalds .name = "snapshot-origin", 1111aa14edebSAlasdair G Kergon .version = {1, 1, 0}, 11121da177e4SLinus Torvalds .module = THIS_MODULE, 11131da177e4SLinus Torvalds .ctr = origin_ctr, 11141da177e4SLinus Torvalds .dtr = origin_dtr, 11151da177e4SLinus Torvalds .map = origin_map, 11161da177e4SLinus Torvalds .resume = origin_resume, 11171da177e4SLinus Torvalds .status = origin_status, 11181da177e4SLinus Torvalds }; 11191da177e4SLinus Torvalds 11201da177e4SLinus Torvalds static struct target_type snapshot_target = { 11211da177e4SLinus Torvalds .name = "snapshot", 1122aa14edebSAlasdair G Kergon .version = {1, 1, 0}, 11231da177e4SLinus Torvalds .module = THIS_MODULE, 11241da177e4SLinus Torvalds .ctr = snapshot_ctr, 11251da177e4SLinus Torvalds .dtr = snapshot_dtr, 11261da177e4SLinus Torvalds .map = snapshot_map, 11271da177e4SLinus Torvalds .resume = snapshot_resume, 11281da177e4SLinus Torvalds .status = snapshot_status, 11291da177e4SLinus Torvalds }; 11301da177e4SLinus Torvalds 11311da177e4SLinus Torvalds static int __init dm_snapshot_init(void) 11321da177e4SLinus Torvalds { 11331da177e4SLinus Torvalds int r; 11341da177e4SLinus Torvalds 11351da177e4SLinus Torvalds r = dm_register_target(&snapshot_target); 11361da177e4SLinus Torvalds if (r) { 11371da177e4SLinus Torvalds DMERR("snapshot target register failed %d", r); 11381da177e4SLinus Torvalds return r; 11391da177e4SLinus Torvalds } 11401da177e4SLinus Torvalds 11411da177e4SLinus Torvalds r = dm_register_target(&origin_target); 11421da177e4SLinus Torvalds if (r < 0) { 11431da177e4SLinus Torvalds DMERR("Device mapper: Origin: register failed %d\n", r); 11441da177e4SLinus Torvalds goto bad1; 11451da177e4SLinus Torvalds } 11461da177e4SLinus Torvalds 11471da177e4SLinus Torvalds r = init_origin_hash(); 11481da177e4SLinus Torvalds if (r) { 11491da177e4SLinus Torvalds DMERR("init_origin_hash failed."); 11501da177e4SLinus Torvalds goto bad2; 11511da177e4SLinus Torvalds } 11521da177e4SLinus Torvalds 11531da177e4SLinus Torvalds exception_cache = kmem_cache_create("dm-snapshot-ex", 11541da177e4SLinus Torvalds sizeof(struct exception), 11551da177e4SLinus Torvalds __alignof__(struct exception), 11561da177e4SLinus Torvalds 0, NULL, NULL); 11571da177e4SLinus Torvalds if (!exception_cache) { 11581da177e4SLinus Torvalds DMERR("Couldn't create exception cache."); 11591da177e4SLinus Torvalds r = -ENOMEM; 11601da177e4SLinus Torvalds goto bad3; 11611da177e4SLinus Torvalds } 11621da177e4SLinus Torvalds 11631da177e4SLinus Torvalds pending_cache = 11641da177e4SLinus Torvalds kmem_cache_create("dm-snapshot-in", 11651da177e4SLinus Torvalds sizeof(struct pending_exception), 11661da177e4SLinus Torvalds __alignof__(struct pending_exception), 11671da177e4SLinus Torvalds 0, NULL, NULL); 11681da177e4SLinus Torvalds if (!pending_cache) { 11691da177e4SLinus Torvalds DMERR("Couldn't create pending cache."); 11701da177e4SLinus Torvalds r = -ENOMEM; 11711da177e4SLinus Torvalds goto bad4; 11721da177e4SLinus Torvalds } 11731da177e4SLinus Torvalds 117493d2341cSMatthew Dobson pending_pool = mempool_create_slab_pool(128, pending_cache); 11751da177e4SLinus Torvalds if (!pending_pool) { 11761da177e4SLinus Torvalds DMERR("Couldn't create pending pool."); 11771da177e4SLinus Torvalds r = -ENOMEM; 11781da177e4SLinus Torvalds goto bad5; 11791da177e4SLinus Torvalds } 11801da177e4SLinus Torvalds 11811da177e4SLinus Torvalds return 0; 11821da177e4SLinus Torvalds 11831da177e4SLinus Torvalds bad5: 11841da177e4SLinus Torvalds kmem_cache_destroy(pending_cache); 11851da177e4SLinus Torvalds bad4: 11861da177e4SLinus Torvalds kmem_cache_destroy(exception_cache); 11871da177e4SLinus Torvalds bad3: 11881da177e4SLinus Torvalds exit_origin_hash(); 11891da177e4SLinus Torvalds bad2: 11901da177e4SLinus Torvalds dm_unregister_target(&origin_target); 11911da177e4SLinus Torvalds bad1: 11921da177e4SLinus Torvalds dm_unregister_target(&snapshot_target); 11931da177e4SLinus Torvalds return r; 11941da177e4SLinus Torvalds } 11951da177e4SLinus Torvalds 11961da177e4SLinus Torvalds static void __exit dm_snapshot_exit(void) 11971da177e4SLinus Torvalds { 11981da177e4SLinus Torvalds int r; 11991da177e4SLinus Torvalds 12001da177e4SLinus Torvalds r = dm_unregister_target(&snapshot_target); 12011da177e4SLinus Torvalds if (r) 12021da177e4SLinus Torvalds DMERR("snapshot unregister failed %d", r); 12031da177e4SLinus Torvalds 12041da177e4SLinus Torvalds r = dm_unregister_target(&origin_target); 12051da177e4SLinus Torvalds if (r) 12061da177e4SLinus Torvalds DMERR("origin unregister failed %d", r); 12071da177e4SLinus Torvalds 12081da177e4SLinus Torvalds exit_origin_hash(); 12091da177e4SLinus Torvalds mempool_destroy(pending_pool); 12101da177e4SLinus Torvalds kmem_cache_destroy(pending_cache); 12111da177e4SLinus Torvalds kmem_cache_destroy(exception_cache); 12121da177e4SLinus Torvalds } 12131da177e4SLinus Torvalds 12141da177e4SLinus Torvalds /* Module hooks */ 12151da177e4SLinus Torvalds module_init(dm_snapshot_init); 12161da177e4SLinus Torvalds module_exit(dm_snapshot_exit); 12171da177e4SLinus Torvalds 12181da177e4SLinus Torvalds MODULE_DESCRIPTION(DM_NAME " snapshot target"); 12191da177e4SLinus Torvalds MODULE_AUTHOR("Joe Thornber"); 12201da177e4SLinus Torvalds MODULE_LICENSE("GPL"); 1221