xref: /openbmc/linux/drivers/md/dm-snap.c (revision ccc45ea8)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * dm-snapshot.c
31da177e4SLinus Torvalds  *
41da177e4SLinus Torvalds  * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
51da177e4SLinus Torvalds  *
61da177e4SLinus Torvalds  * This file is released under the GPL.
71da177e4SLinus Torvalds  */
81da177e4SLinus Torvalds 
91da177e4SLinus Torvalds #include <linux/blkdev.h>
101da177e4SLinus Torvalds #include <linux/ctype.h>
111da177e4SLinus Torvalds #include <linux/device-mapper.h>
1290fa1527SMikulas Patocka #include <linux/delay.h>
131da177e4SLinus Torvalds #include <linux/fs.h>
141da177e4SLinus Torvalds #include <linux/init.h>
151da177e4SLinus Torvalds #include <linux/kdev_t.h>
161da177e4SLinus Torvalds #include <linux/list.h>
171da177e4SLinus Torvalds #include <linux/mempool.h>
181da177e4SLinus Torvalds #include <linux/module.h>
191da177e4SLinus Torvalds #include <linux/slab.h>
201da177e4SLinus Torvalds #include <linux/vmalloc.h>
216f3c3f0aSvignesh babu #include <linux/log2.h>
22a765e20eSAlasdair G Kergon #include <linux/dm-kcopyd.h>
23ccc45ea8SJonathan Brassow #include <linux/workqueue.h>
241da177e4SLinus Torvalds 
25aea53d92SJonathan Brassow #include "dm-exception-store.h"
261da177e4SLinus Torvalds #include "dm-bio-list.h"
271da177e4SLinus Torvalds 
2872d94861SAlasdair G Kergon #define DM_MSG_PREFIX "snapshots"
2972d94861SAlasdair G Kergon 
301da177e4SLinus Torvalds /*
311da177e4SLinus Torvalds  * The percentage increment we will wake up users at
321da177e4SLinus Torvalds  */
331da177e4SLinus Torvalds #define WAKE_UP_PERCENT 5
341da177e4SLinus Torvalds 
351da177e4SLinus Torvalds /*
361da177e4SLinus Torvalds  * kcopyd priority of snapshot operations
371da177e4SLinus Torvalds  */
381da177e4SLinus Torvalds #define SNAPSHOT_COPY_PRIORITY 2
391da177e4SLinus Torvalds 
401da177e4SLinus Torvalds /*
418ee2767aSMilan Broz  * Reserve 1MB for each snapshot initially (with minimum of 1 page).
421da177e4SLinus Torvalds  */
438ee2767aSMilan Broz #define SNAPSHOT_PAGES (((1UL << 20) >> PAGE_SHIFT) ? : 1)
441da177e4SLinus Torvalds 
45cd45daffSMikulas Patocka /*
46cd45daffSMikulas Patocka  * The size of the mempool used to track chunks in use.
47cd45daffSMikulas Patocka  */
48cd45daffSMikulas Patocka #define MIN_IOS 256
49cd45daffSMikulas Patocka 
50ccc45ea8SJonathan Brassow #define DM_TRACKED_CHUNK_HASH_SIZE	16
51ccc45ea8SJonathan Brassow #define DM_TRACKED_CHUNK_HASH(x)	((unsigned long)(x) & \
52ccc45ea8SJonathan Brassow 					 (DM_TRACKED_CHUNK_HASH_SIZE - 1))
53ccc45ea8SJonathan Brassow 
54ccc45ea8SJonathan Brassow struct exception_table {
55ccc45ea8SJonathan Brassow 	uint32_t hash_mask;
56ccc45ea8SJonathan Brassow 	unsigned hash_shift;
57ccc45ea8SJonathan Brassow 	struct list_head *table;
58ccc45ea8SJonathan Brassow };
59ccc45ea8SJonathan Brassow 
60ccc45ea8SJonathan Brassow struct dm_snapshot {
61ccc45ea8SJonathan Brassow 	struct rw_semaphore lock;
62ccc45ea8SJonathan Brassow 
63ccc45ea8SJonathan Brassow 	struct dm_dev *origin;
64ccc45ea8SJonathan Brassow 
65ccc45ea8SJonathan Brassow 	/* List of snapshots per Origin */
66ccc45ea8SJonathan Brassow 	struct list_head list;
67ccc45ea8SJonathan Brassow 
68ccc45ea8SJonathan Brassow 	/* You can't use a snapshot if this is 0 (e.g. if full) */
69ccc45ea8SJonathan Brassow 	int valid;
70ccc45ea8SJonathan Brassow 
71ccc45ea8SJonathan Brassow 	/* Origin writes don't trigger exceptions until this is set */
72ccc45ea8SJonathan Brassow 	int active;
73ccc45ea8SJonathan Brassow 
74ccc45ea8SJonathan Brassow 	/* Used for display of table */
75ccc45ea8SJonathan Brassow 	char type;
76ccc45ea8SJonathan Brassow 
77ccc45ea8SJonathan Brassow 	mempool_t *pending_pool;
78ccc45ea8SJonathan Brassow 
79ccc45ea8SJonathan Brassow 	atomic_t pending_exceptions_count;
80ccc45ea8SJonathan Brassow 
81ccc45ea8SJonathan Brassow 	struct exception_table pending;
82ccc45ea8SJonathan Brassow 	struct exception_table complete;
83ccc45ea8SJonathan Brassow 
84ccc45ea8SJonathan Brassow 	/*
85ccc45ea8SJonathan Brassow 	 * pe_lock protects all pending_exception operations and access
86ccc45ea8SJonathan Brassow 	 * as well as the snapshot_bios list.
87ccc45ea8SJonathan Brassow 	 */
88ccc45ea8SJonathan Brassow 	spinlock_t pe_lock;
89ccc45ea8SJonathan Brassow 
90ccc45ea8SJonathan Brassow 	/* The on disk metadata handler */
91ccc45ea8SJonathan Brassow 	struct dm_exception_store *store;
92ccc45ea8SJonathan Brassow 
93ccc45ea8SJonathan Brassow 	struct dm_kcopyd_client *kcopyd_client;
94ccc45ea8SJonathan Brassow 
95ccc45ea8SJonathan Brassow 	/* Queue of snapshot writes for ksnapd to flush */
96ccc45ea8SJonathan Brassow 	struct bio_list queued_bios;
97ccc45ea8SJonathan Brassow 	struct work_struct queued_bios_work;
98ccc45ea8SJonathan Brassow 
99ccc45ea8SJonathan Brassow 	/* Chunks with outstanding reads */
100ccc45ea8SJonathan Brassow 	mempool_t *tracked_chunk_pool;
101ccc45ea8SJonathan Brassow 	spinlock_t tracked_chunk_lock;
102ccc45ea8SJonathan Brassow 	struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE];
103ccc45ea8SJonathan Brassow };
104ccc45ea8SJonathan Brassow 
105c642f9e0SAdrian Bunk static struct workqueue_struct *ksnapd;
106c4028958SDavid Howells static void flush_queued_bios(struct work_struct *work);
107ca3a931fSAlasdair G Kergon 
108ccc45ea8SJonathan Brassow static sector_t chunk_to_sector(struct dm_exception_store *store,
109ccc45ea8SJonathan Brassow 				chunk_t chunk)
110ccc45ea8SJonathan Brassow {
111ccc45ea8SJonathan Brassow 	return chunk << store->chunk_shift;
112ccc45ea8SJonathan Brassow }
113ccc45ea8SJonathan Brassow 
114ccc45ea8SJonathan Brassow static int bdev_equal(struct block_device *lhs, struct block_device *rhs)
115ccc45ea8SJonathan Brassow {
116ccc45ea8SJonathan Brassow 	/*
117ccc45ea8SJonathan Brassow 	 * There is only ever one instance of a particular block
118ccc45ea8SJonathan Brassow 	 * device so we can compare pointers safely.
119ccc45ea8SJonathan Brassow 	 */
120ccc45ea8SJonathan Brassow 	return lhs == rhs;
121ccc45ea8SJonathan Brassow }
122ccc45ea8SJonathan Brassow 
123028867acSAlasdair G Kergon struct dm_snap_pending_exception {
124028867acSAlasdair G Kergon 	struct dm_snap_exception e;
1251da177e4SLinus Torvalds 
1261da177e4SLinus Torvalds 	/*
1271da177e4SLinus Torvalds 	 * Origin buffers waiting for this to complete are held
1281da177e4SLinus Torvalds 	 * in a bio list
1291da177e4SLinus Torvalds 	 */
1301da177e4SLinus Torvalds 	struct bio_list origin_bios;
1311da177e4SLinus Torvalds 	struct bio_list snapshot_bios;
1321da177e4SLinus Torvalds 
1331da177e4SLinus Torvalds 	/*
134eccf0817SAlasdair G Kergon 	 * Short-term queue of pending exceptions prior to submission.
135eccf0817SAlasdair G Kergon 	 */
136eccf0817SAlasdair G Kergon 	struct list_head list;
137eccf0817SAlasdair G Kergon 
138eccf0817SAlasdair G Kergon 	/*
139b4b610f6SAlasdair G Kergon 	 * The primary pending_exception is the one that holds
1404b832e8dSAlasdair G Kergon 	 * the ref_count and the list of origin_bios for a
141b4b610f6SAlasdair G Kergon 	 * group of pending_exceptions.  It is always last to get freed.
142b4b610f6SAlasdair G Kergon 	 * These fields get set up when writing to the origin.
1431da177e4SLinus Torvalds 	 */
144028867acSAlasdair G Kergon 	struct dm_snap_pending_exception *primary_pe;
145b4b610f6SAlasdair G Kergon 
146b4b610f6SAlasdair G Kergon 	/*
147b4b610f6SAlasdair G Kergon 	 * Number of pending_exceptions processing this chunk.
148b4b610f6SAlasdair G Kergon 	 * When this drops to zero we must complete the origin bios.
149b4b610f6SAlasdair G Kergon 	 * If incrementing or decrementing this, hold pe->snap->lock for
150b4b610f6SAlasdair G Kergon 	 * the sibling concerned and not pe->primary_pe->snap->lock unless
151b4b610f6SAlasdair G Kergon 	 * they are the same.
152b4b610f6SAlasdair G Kergon 	 */
1534b832e8dSAlasdair G Kergon 	atomic_t ref_count;
1541da177e4SLinus Torvalds 
1551da177e4SLinus Torvalds 	/* Pointer back to snapshot context */
1561da177e4SLinus Torvalds 	struct dm_snapshot *snap;
1571da177e4SLinus Torvalds 
1581da177e4SLinus Torvalds 	/*
1591da177e4SLinus Torvalds 	 * 1 indicates the exception has already been sent to
1601da177e4SLinus Torvalds 	 * kcopyd.
1611da177e4SLinus Torvalds 	 */
1621da177e4SLinus Torvalds 	int started;
1631da177e4SLinus Torvalds };
1641da177e4SLinus Torvalds 
1651da177e4SLinus Torvalds /*
1661da177e4SLinus Torvalds  * Hash table mapping origin volumes to lists of snapshots and
1671da177e4SLinus Torvalds  * a lock to protect it
1681da177e4SLinus Torvalds  */
169e18b890bSChristoph Lameter static struct kmem_cache *exception_cache;
170e18b890bSChristoph Lameter static struct kmem_cache *pending_cache;
1711da177e4SLinus Torvalds 
172cd45daffSMikulas Patocka struct dm_snap_tracked_chunk {
173cd45daffSMikulas Patocka 	struct hlist_node node;
174cd45daffSMikulas Patocka 	chunk_t chunk;
175cd45daffSMikulas Patocka };
176cd45daffSMikulas Patocka 
177cd45daffSMikulas Patocka static struct kmem_cache *tracked_chunk_cache;
178cd45daffSMikulas Patocka 
179cd45daffSMikulas Patocka static struct dm_snap_tracked_chunk *track_chunk(struct dm_snapshot *s,
180cd45daffSMikulas Patocka 						 chunk_t chunk)
181cd45daffSMikulas Patocka {
182cd45daffSMikulas Patocka 	struct dm_snap_tracked_chunk *c = mempool_alloc(s->tracked_chunk_pool,
183cd45daffSMikulas Patocka 							GFP_NOIO);
184cd45daffSMikulas Patocka 	unsigned long flags;
185cd45daffSMikulas Patocka 
186cd45daffSMikulas Patocka 	c->chunk = chunk;
187cd45daffSMikulas Patocka 
188cd45daffSMikulas Patocka 	spin_lock_irqsave(&s->tracked_chunk_lock, flags);
189cd45daffSMikulas Patocka 	hlist_add_head(&c->node,
190cd45daffSMikulas Patocka 		       &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)]);
191cd45daffSMikulas Patocka 	spin_unlock_irqrestore(&s->tracked_chunk_lock, flags);
192cd45daffSMikulas Patocka 
193cd45daffSMikulas Patocka 	return c;
194cd45daffSMikulas Patocka }
195cd45daffSMikulas Patocka 
196cd45daffSMikulas Patocka static void stop_tracking_chunk(struct dm_snapshot *s,
197cd45daffSMikulas Patocka 				struct dm_snap_tracked_chunk *c)
198cd45daffSMikulas Patocka {
199cd45daffSMikulas Patocka 	unsigned long flags;
200cd45daffSMikulas Patocka 
201cd45daffSMikulas Patocka 	spin_lock_irqsave(&s->tracked_chunk_lock, flags);
202cd45daffSMikulas Patocka 	hlist_del(&c->node);
203cd45daffSMikulas Patocka 	spin_unlock_irqrestore(&s->tracked_chunk_lock, flags);
204cd45daffSMikulas Patocka 
205cd45daffSMikulas Patocka 	mempool_free(c, s->tracked_chunk_pool);
206cd45daffSMikulas Patocka }
207cd45daffSMikulas Patocka 
208a8d41b59SMikulas Patocka static int __chunk_is_tracked(struct dm_snapshot *s, chunk_t chunk)
209a8d41b59SMikulas Patocka {
210a8d41b59SMikulas Patocka 	struct dm_snap_tracked_chunk *c;
211a8d41b59SMikulas Patocka 	struct hlist_node *hn;
212a8d41b59SMikulas Patocka 	int found = 0;
213a8d41b59SMikulas Patocka 
214a8d41b59SMikulas Patocka 	spin_lock_irq(&s->tracked_chunk_lock);
215a8d41b59SMikulas Patocka 
216a8d41b59SMikulas Patocka 	hlist_for_each_entry(c, hn,
217a8d41b59SMikulas Patocka 	    &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)], node) {
218a8d41b59SMikulas Patocka 		if (c->chunk == chunk) {
219a8d41b59SMikulas Patocka 			found = 1;
220a8d41b59SMikulas Patocka 			break;
221a8d41b59SMikulas Patocka 		}
222a8d41b59SMikulas Patocka 	}
223a8d41b59SMikulas Patocka 
224a8d41b59SMikulas Patocka 	spin_unlock_irq(&s->tracked_chunk_lock);
225a8d41b59SMikulas Patocka 
226a8d41b59SMikulas Patocka 	return found;
227a8d41b59SMikulas Patocka }
228a8d41b59SMikulas Patocka 
2291da177e4SLinus Torvalds /*
2301da177e4SLinus Torvalds  * One of these per registered origin, held in the snapshot_origins hash
2311da177e4SLinus Torvalds  */
2321da177e4SLinus Torvalds struct origin {
2331da177e4SLinus Torvalds 	/* The origin device */
2341da177e4SLinus Torvalds 	struct block_device *bdev;
2351da177e4SLinus Torvalds 
2361da177e4SLinus Torvalds 	struct list_head hash_list;
2371da177e4SLinus Torvalds 
2381da177e4SLinus Torvalds 	/* List of snapshots for this origin */
2391da177e4SLinus Torvalds 	struct list_head snapshots;
2401da177e4SLinus Torvalds };
2411da177e4SLinus Torvalds 
2421da177e4SLinus Torvalds /*
2431da177e4SLinus Torvalds  * Size of the hash table for origin volumes. If we make this
2441da177e4SLinus Torvalds  * the size of the minors list then it should be nearly perfect
2451da177e4SLinus Torvalds  */
2461da177e4SLinus Torvalds #define ORIGIN_HASH_SIZE 256
2471da177e4SLinus Torvalds #define ORIGIN_MASK      0xFF
2481da177e4SLinus Torvalds static struct list_head *_origins;
2491da177e4SLinus Torvalds static struct rw_semaphore _origins_lock;
2501da177e4SLinus Torvalds 
2511da177e4SLinus Torvalds static int init_origin_hash(void)
2521da177e4SLinus Torvalds {
2531da177e4SLinus Torvalds 	int i;
2541da177e4SLinus Torvalds 
2551da177e4SLinus Torvalds 	_origins = kmalloc(ORIGIN_HASH_SIZE * sizeof(struct list_head),
2561da177e4SLinus Torvalds 			   GFP_KERNEL);
2571da177e4SLinus Torvalds 	if (!_origins) {
25872d94861SAlasdair G Kergon 		DMERR("unable to allocate memory");
2591da177e4SLinus Torvalds 		return -ENOMEM;
2601da177e4SLinus Torvalds 	}
2611da177e4SLinus Torvalds 
2621da177e4SLinus Torvalds 	for (i = 0; i < ORIGIN_HASH_SIZE; i++)
2631da177e4SLinus Torvalds 		INIT_LIST_HEAD(_origins + i);
2641da177e4SLinus Torvalds 	init_rwsem(&_origins_lock);
2651da177e4SLinus Torvalds 
2661da177e4SLinus Torvalds 	return 0;
2671da177e4SLinus Torvalds }
2681da177e4SLinus Torvalds 
2691da177e4SLinus Torvalds static void exit_origin_hash(void)
2701da177e4SLinus Torvalds {
2711da177e4SLinus Torvalds 	kfree(_origins);
2721da177e4SLinus Torvalds }
2731da177e4SLinus Torvalds 
274028867acSAlasdair G Kergon static unsigned origin_hash(struct block_device *bdev)
2751da177e4SLinus Torvalds {
2761da177e4SLinus Torvalds 	return bdev->bd_dev & ORIGIN_MASK;
2771da177e4SLinus Torvalds }
2781da177e4SLinus Torvalds 
2791da177e4SLinus Torvalds static struct origin *__lookup_origin(struct block_device *origin)
2801da177e4SLinus Torvalds {
2811da177e4SLinus Torvalds 	struct list_head *ol;
2821da177e4SLinus Torvalds 	struct origin *o;
2831da177e4SLinus Torvalds 
2841da177e4SLinus Torvalds 	ol = &_origins[origin_hash(origin)];
2851da177e4SLinus Torvalds 	list_for_each_entry (o, ol, hash_list)
2861da177e4SLinus Torvalds 		if (bdev_equal(o->bdev, origin))
2871da177e4SLinus Torvalds 			return o;
2881da177e4SLinus Torvalds 
2891da177e4SLinus Torvalds 	return NULL;
2901da177e4SLinus Torvalds }
2911da177e4SLinus Torvalds 
2921da177e4SLinus Torvalds static void __insert_origin(struct origin *o)
2931da177e4SLinus Torvalds {
2941da177e4SLinus Torvalds 	struct list_head *sl = &_origins[origin_hash(o->bdev)];
2951da177e4SLinus Torvalds 	list_add_tail(&o->hash_list, sl);
2961da177e4SLinus Torvalds }
2971da177e4SLinus Torvalds 
2981da177e4SLinus Torvalds /*
2991da177e4SLinus Torvalds  * Make a note of the snapshot and its origin so we can look it
3001da177e4SLinus Torvalds  * up when the origin has a write on it.
3011da177e4SLinus Torvalds  */
3021da177e4SLinus Torvalds static int register_snapshot(struct dm_snapshot *snap)
3031da177e4SLinus Torvalds {
30460c856c8SMikulas Patocka 	struct origin *o, *new_o;
3051da177e4SLinus Torvalds 	struct block_device *bdev = snap->origin->bdev;
3061da177e4SLinus Torvalds 
30760c856c8SMikulas Patocka 	new_o = kmalloc(sizeof(*new_o), GFP_KERNEL);
30860c856c8SMikulas Patocka 	if (!new_o)
30960c856c8SMikulas Patocka 		return -ENOMEM;
31060c856c8SMikulas Patocka 
3111da177e4SLinus Torvalds 	down_write(&_origins_lock);
3121da177e4SLinus Torvalds 	o = __lookup_origin(bdev);
3131da177e4SLinus Torvalds 
31460c856c8SMikulas Patocka 	if (o)
31560c856c8SMikulas Patocka 		kfree(new_o);
31660c856c8SMikulas Patocka 	else {
3171da177e4SLinus Torvalds 		/* New origin */
31860c856c8SMikulas Patocka 		o = new_o;
3191da177e4SLinus Torvalds 
3201da177e4SLinus Torvalds 		/* Initialise the struct */
3211da177e4SLinus Torvalds 		INIT_LIST_HEAD(&o->snapshots);
3221da177e4SLinus Torvalds 		o->bdev = bdev;
3231da177e4SLinus Torvalds 
3241da177e4SLinus Torvalds 		__insert_origin(o);
3251da177e4SLinus Torvalds 	}
3261da177e4SLinus Torvalds 
3271da177e4SLinus Torvalds 	list_add_tail(&snap->list, &o->snapshots);
3281da177e4SLinus Torvalds 
3291da177e4SLinus Torvalds 	up_write(&_origins_lock);
3301da177e4SLinus Torvalds 	return 0;
3311da177e4SLinus Torvalds }
3321da177e4SLinus Torvalds 
3331da177e4SLinus Torvalds static void unregister_snapshot(struct dm_snapshot *s)
3341da177e4SLinus Torvalds {
3351da177e4SLinus Torvalds 	struct origin *o;
3361da177e4SLinus Torvalds 
3371da177e4SLinus Torvalds 	down_write(&_origins_lock);
3381da177e4SLinus Torvalds 	o = __lookup_origin(s->origin->bdev);
3391da177e4SLinus Torvalds 
3401da177e4SLinus Torvalds 	list_del(&s->list);
3411da177e4SLinus Torvalds 	if (list_empty(&o->snapshots)) {
3421da177e4SLinus Torvalds 		list_del(&o->hash_list);
3431da177e4SLinus Torvalds 		kfree(o);
3441da177e4SLinus Torvalds 	}
3451da177e4SLinus Torvalds 
3461da177e4SLinus Torvalds 	up_write(&_origins_lock);
3471da177e4SLinus Torvalds }
3481da177e4SLinus Torvalds 
3491da177e4SLinus Torvalds /*
3501da177e4SLinus Torvalds  * Implementation of the exception hash tables.
351d74f81f8SMilan Broz  * The lowest hash_shift bits of the chunk number are ignored, allowing
352d74f81f8SMilan Broz  * some consecutive chunks to be grouped together.
3531da177e4SLinus Torvalds  */
354d74f81f8SMilan Broz static int init_exception_table(struct exception_table *et, uint32_t size,
355d74f81f8SMilan Broz 				unsigned hash_shift)
3561da177e4SLinus Torvalds {
3571da177e4SLinus Torvalds 	unsigned int i;
3581da177e4SLinus Torvalds 
359d74f81f8SMilan Broz 	et->hash_shift = hash_shift;
3601da177e4SLinus Torvalds 	et->hash_mask = size - 1;
3611da177e4SLinus Torvalds 	et->table = dm_vcalloc(size, sizeof(struct list_head));
3621da177e4SLinus Torvalds 	if (!et->table)
3631da177e4SLinus Torvalds 		return -ENOMEM;
3641da177e4SLinus Torvalds 
3651da177e4SLinus Torvalds 	for (i = 0; i < size; i++)
3661da177e4SLinus Torvalds 		INIT_LIST_HEAD(et->table + i);
3671da177e4SLinus Torvalds 
3681da177e4SLinus Torvalds 	return 0;
3691da177e4SLinus Torvalds }
3701da177e4SLinus Torvalds 
371e18b890bSChristoph Lameter static void exit_exception_table(struct exception_table *et, struct kmem_cache *mem)
3721da177e4SLinus Torvalds {
3731da177e4SLinus Torvalds 	struct list_head *slot;
374028867acSAlasdair G Kergon 	struct dm_snap_exception *ex, *next;
3751da177e4SLinus Torvalds 	int i, size;
3761da177e4SLinus Torvalds 
3771da177e4SLinus Torvalds 	size = et->hash_mask + 1;
3781da177e4SLinus Torvalds 	for (i = 0; i < size; i++) {
3791da177e4SLinus Torvalds 		slot = et->table + i;
3801da177e4SLinus Torvalds 
3811da177e4SLinus Torvalds 		list_for_each_entry_safe (ex, next, slot, hash_list)
3821da177e4SLinus Torvalds 			kmem_cache_free(mem, ex);
3831da177e4SLinus Torvalds 	}
3841da177e4SLinus Torvalds 
3851da177e4SLinus Torvalds 	vfree(et->table);
3861da177e4SLinus Torvalds }
3871da177e4SLinus Torvalds 
388028867acSAlasdair G Kergon static uint32_t exception_hash(struct exception_table *et, chunk_t chunk)
3891da177e4SLinus Torvalds {
390d74f81f8SMilan Broz 	return (chunk >> et->hash_shift) & et->hash_mask;
3911da177e4SLinus Torvalds }
3921da177e4SLinus Torvalds 
393028867acSAlasdair G Kergon static void insert_exception(struct exception_table *eh,
394028867acSAlasdair G Kergon 			     struct dm_snap_exception *e)
3951da177e4SLinus Torvalds {
3961da177e4SLinus Torvalds 	struct list_head *l = &eh->table[exception_hash(eh, e->old_chunk)];
3971da177e4SLinus Torvalds 	list_add(&e->hash_list, l);
3981da177e4SLinus Torvalds }
3991da177e4SLinus Torvalds 
400028867acSAlasdair G Kergon static void remove_exception(struct dm_snap_exception *e)
4011da177e4SLinus Torvalds {
4021da177e4SLinus Torvalds 	list_del(&e->hash_list);
4031da177e4SLinus Torvalds }
4041da177e4SLinus Torvalds 
4051da177e4SLinus Torvalds /*
4061da177e4SLinus Torvalds  * Return the exception data for a sector, or NULL if not
4071da177e4SLinus Torvalds  * remapped.
4081da177e4SLinus Torvalds  */
409028867acSAlasdair G Kergon static struct dm_snap_exception *lookup_exception(struct exception_table *et,
4101da177e4SLinus Torvalds 						  chunk_t chunk)
4111da177e4SLinus Torvalds {
4121da177e4SLinus Torvalds 	struct list_head *slot;
413028867acSAlasdair G Kergon 	struct dm_snap_exception *e;
4141da177e4SLinus Torvalds 
4151da177e4SLinus Torvalds 	slot = &et->table[exception_hash(et, chunk)];
4161da177e4SLinus Torvalds 	list_for_each_entry (e, slot, hash_list)
417d74f81f8SMilan Broz 		if (chunk >= e->old_chunk &&
418d74f81f8SMilan Broz 		    chunk <= e->old_chunk + dm_consecutive_chunk_count(e))
4191da177e4SLinus Torvalds 			return e;
4201da177e4SLinus Torvalds 
4211da177e4SLinus Torvalds 	return NULL;
4221da177e4SLinus Torvalds }
4231da177e4SLinus Torvalds 
424028867acSAlasdair G Kergon static struct dm_snap_exception *alloc_exception(void)
4251da177e4SLinus Torvalds {
426028867acSAlasdair G Kergon 	struct dm_snap_exception *e;
4271da177e4SLinus Torvalds 
4281da177e4SLinus Torvalds 	e = kmem_cache_alloc(exception_cache, GFP_NOIO);
4291da177e4SLinus Torvalds 	if (!e)
4301da177e4SLinus Torvalds 		e = kmem_cache_alloc(exception_cache, GFP_ATOMIC);
4311da177e4SLinus Torvalds 
4321da177e4SLinus Torvalds 	return e;
4331da177e4SLinus Torvalds }
4341da177e4SLinus Torvalds 
435028867acSAlasdair G Kergon static void free_exception(struct dm_snap_exception *e)
4361da177e4SLinus Torvalds {
4371da177e4SLinus Torvalds 	kmem_cache_free(exception_cache, e);
4381da177e4SLinus Torvalds }
4391da177e4SLinus Torvalds 
44092e86812SMikulas Patocka static struct dm_snap_pending_exception *alloc_pending_exception(struct dm_snapshot *s)
4411da177e4SLinus Torvalds {
44292e86812SMikulas Patocka 	struct dm_snap_pending_exception *pe = mempool_alloc(s->pending_pool,
44392e86812SMikulas Patocka 							     GFP_NOIO);
44492e86812SMikulas Patocka 
445879129d2SMikulas Patocka 	atomic_inc(&s->pending_exceptions_count);
44692e86812SMikulas Patocka 	pe->snap = s;
44792e86812SMikulas Patocka 
44892e86812SMikulas Patocka 	return pe;
4491da177e4SLinus Torvalds }
4501da177e4SLinus Torvalds 
451028867acSAlasdair G Kergon static void free_pending_exception(struct dm_snap_pending_exception *pe)
4521da177e4SLinus Torvalds {
453879129d2SMikulas Patocka 	struct dm_snapshot *s = pe->snap;
454879129d2SMikulas Patocka 
455879129d2SMikulas Patocka 	mempool_free(pe, s->pending_pool);
456879129d2SMikulas Patocka 	smp_mb__before_atomic_dec();
457879129d2SMikulas Patocka 	atomic_dec(&s->pending_exceptions_count);
4581da177e4SLinus Torvalds }
4591da177e4SLinus Torvalds 
460d74f81f8SMilan Broz static void insert_completed_exception(struct dm_snapshot *s,
461d74f81f8SMilan Broz 				       struct dm_snap_exception *new_e)
462d74f81f8SMilan Broz {
463d74f81f8SMilan Broz 	struct exception_table *eh = &s->complete;
464d74f81f8SMilan Broz 	struct list_head *l;
465d74f81f8SMilan Broz 	struct dm_snap_exception *e = NULL;
466d74f81f8SMilan Broz 
467d74f81f8SMilan Broz 	l = &eh->table[exception_hash(eh, new_e->old_chunk)];
468d74f81f8SMilan Broz 
469d74f81f8SMilan Broz 	/* Add immediately if this table doesn't support consecutive chunks */
470d74f81f8SMilan Broz 	if (!eh->hash_shift)
471d74f81f8SMilan Broz 		goto out;
472d74f81f8SMilan Broz 
473d74f81f8SMilan Broz 	/* List is ordered by old_chunk */
474d74f81f8SMilan Broz 	list_for_each_entry_reverse(e, l, hash_list) {
475d74f81f8SMilan Broz 		/* Insert after an existing chunk? */
476d74f81f8SMilan Broz 		if (new_e->old_chunk == (e->old_chunk +
477d74f81f8SMilan Broz 					 dm_consecutive_chunk_count(e) + 1) &&
478d74f81f8SMilan Broz 		    new_e->new_chunk == (dm_chunk_number(e->new_chunk) +
479d74f81f8SMilan Broz 					 dm_consecutive_chunk_count(e) + 1)) {
480d74f81f8SMilan Broz 			dm_consecutive_chunk_count_inc(e);
481d74f81f8SMilan Broz 			free_exception(new_e);
482d74f81f8SMilan Broz 			return;
483d74f81f8SMilan Broz 		}
484d74f81f8SMilan Broz 
485d74f81f8SMilan Broz 		/* Insert before an existing chunk? */
486d74f81f8SMilan Broz 		if (new_e->old_chunk == (e->old_chunk - 1) &&
487d74f81f8SMilan Broz 		    new_e->new_chunk == (dm_chunk_number(e->new_chunk) - 1)) {
488d74f81f8SMilan Broz 			dm_consecutive_chunk_count_inc(e);
489d74f81f8SMilan Broz 			e->old_chunk--;
490d74f81f8SMilan Broz 			e->new_chunk--;
491d74f81f8SMilan Broz 			free_exception(new_e);
492d74f81f8SMilan Broz 			return;
493d74f81f8SMilan Broz 		}
494d74f81f8SMilan Broz 
495d74f81f8SMilan Broz 		if (new_e->old_chunk > e->old_chunk)
496d74f81f8SMilan Broz 			break;
497d74f81f8SMilan Broz 	}
498d74f81f8SMilan Broz 
499d74f81f8SMilan Broz out:
500d74f81f8SMilan Broz 	list_add(&new_e->hash_list, e ? &e->hash_list : l);
501d74f81f8SMilan Broz }
502d74f81f8SMilan Broz 
503a159c1acSJonathan Brassow /*
504a159c1acSJonathan Brassow  * Callback used by the exception stores to load exceptions when
505a159c1acSJonathan Brassow  * initialising.
506a159c1acSJonathan Brassow  */
507a159c1acSJonathan Brassow static int dm_add_exception(void *context, chunk_t old, chunk_t new)
5081da177e4SLinus Torvalds {
509a159c1acSJonathan Brassow 	struct dm_snapshot *s = context;
510028867acSAlasdair G Kergon 	struct dm_snap_exception *e;
5111da177e4SLinus Torvalds 
5121da177e4SLinus Torvalds 	e = alloc_exception();
5131da177e4SLinus Torvalds 	if (!e)
5141da177e4SLinus Torvalds 		return -ENOMEM;
5151da177e4SLinus Torvalds 
5161da177e4SLinus Torvalds 	e->old_chunk = old;
517d74f81f8SMilan Broz 
518d74f81f8SMilan Broz 	/* Consecutive_count is implicitly initialised to zero */
5191da177e4SLinus Torvalds 	e->new_chunk = new;
520d74f81f8SMilan Broz 
521d74f81f8SMilan Broz 	insert_completed_exception(s, e);
522d74f81f8SMilan Broz 
5231da177e4SLinus Torvalds 	return 0;
5241da177e4SLinus Torvalds }
5251da177e4SLinus Torvalds 
5261da177e4SLinus Torvalds /*
5271da177e4SLinus Torvalds  * Hard coded magic.
5281da177e4SLinus Torvalds  */
5291da177e4SLinus Torvalds static int calc_max_buckets(void)
5301da177e4SLinus Torvalds {
5311da177e4SLinus Torvalds 	/* use a fixed size of 2MB */
5321da177e4SLinus Torvalds 	unsigned long mem = 2 * 1024 * 1024;
5331da177e4SLinus Torvalds 	mem /= sizeof(struct list_head);
5341da177e4SLinus Torvalds 
5351da177e4SLinus Torvalds 	return mem;
5361da177e4SLinus Torvalds }
5371da177e4SLinus Torvalds 
5381da177e4SLinus Torvalds /*
5391da177e4SLinus Torvalds  * Allocate room for a suitable hash table.
5401da177e4SLinus Torvalds  */
54149beb2b8SJonathan Brassow static int init_hash_tables(struct dm_snapshot *s, chunk_t chunk_shift,
54249beb2b8SJonathan Brassow 			    struct dm_dev *cow)
5431da177e4SLinus Torvalds {
5441da177e4SLinus Torvalds 	sector_t hash_size, cow_dev_size, origin_dev_size, max_buckets;
5451da177e4SLinus Torvalds 
5461da177e4SLinus Torvalds 	/*
5471da177e4SLinus Torvalds 	 * Calculate based on the size of the original volume or
5481da177e4SLinus Torvalds 	 * the COW volume...
5491da177e4SLinus Torvalds 	 */
55049beb2b8SJonathan Brassow 	cow_dev_size = get_dev_size(cow->bdev);
5511da177e4SLinus Torvalds 	origin_dev_size = get_dev_size(s->origin->bdev);
5521da177e4SLinus Torvalds 	max_buckets = calc_max_buckets();
5531da177e4SLinus Torvalds 
554d0216849SJonathan Brassow 	hash_size = min(origin_dev_size, cow_dev_size) >> chunk_shift;
5551da177e4SLinus Torvalds 	hash_size = min(hash_size, max_buckets);
5561da177e4SLinus Torvalds 
5578defd830SRobert P. J. Day 	hash_size = rounddown_pow_of_two(hash_size);
558d74f81f8SMilan Broz 	if (init_exception_table(&s->complete, hash_size,
559d74f81f8SMilan Broz 				 DM_CHUNK_CONSECUTIVE_BITS))
5601da177e4SLinus Torvalds 		return -ENOMEM;
5611da177e4SLinus Torvalds 
5621da177e4SLinus Torvalds 	/*
5631da177e4SLinus Torvalds 	 * Allocate hash table for in-flight exceptions
5641da177e4SLinus Torvalds 	 * Make this smaller than the real hash table
5651da177e4SLinus Torvalds 	 */
5661da177e4SLinus Torvalds 	hash_size >>= 3;
5671da177e4SLinus Torvalds 	if (hash_size < 64)
5681da177e4SLinus Torvalds 		hash_size = 64;
5691da177e4SLinus Torvalds 
570d74f81f8SMilan Broz 	if (init_exception_table(&s->pending, hash_size, 0)) {
5711da177e4SLinus Torvalds 		exit_exception_table(&s->complete, exception_cache);
5721da177e4SLinus Torvalds 		return -ENOMEM;
5731da177e4SLinus Torvalds 	}
5741da177e4SLinus Torvalds 
5751da177e4SLinus Torvalds 	return 0;
5761da177e4SLinus Torvalds }
5771da177e4SLinus Torvalds 
5781da177e4SLinus Torvalds /*
5791da177e4SLinus Torvalds  * Round a number up to the nearest 'size' boundary.  size must
5801da177e4SLinus Torvalds  * be a power of 2.
5811da177e4SLinus Torvalds  */
582028867acSAlasdair G Kergon static ulong round_up(ulong n, ulong size)
5831da177e4SLinus Torvalds {
5841da177e4SLinus Torvalds 	size--;
5851da177e4SLinus Torvalds 	return (n + size) & ~size;
5861da177e4SLinus Torvalds }
5871da177e4SLinus Torvalds 
5884c7e3bf4SMark McLoughlin static int set_chunk_size(struct dm_snapshot *s, const char *chunk_size_arg,
589d0216849SJonathan Brassow 			  chunk_t *chunk_size, chunk_t *chunk_mask,
59049beb2b8SJonathan Brassow 			  chunk_t *chunk_shift, struct dm_dev *cow,
59149beb2b8SJonathan Brassow 			  char **error)
5924c7e3bf4SMark McLoughlin {
593d0216849SJonathan Brassow 	unsigned long chunk_size_ulong;
5944c7e3bf4SMark McLoughlin 	char *value;
5954c7e3bf4SMark McLoughlin 
596d0216849SJonathan Brassow 	chunk_size_ulong = simple_strtoul(chunk_size_arg, &value, 10);
5974c7e3bf4SMark McLoughlin 	if (*chunk_size_arg == '\0' || *value != '\0') {
5984c7e3bf4SMark McLoughlin 		*error = "Invalid chunk size";
5994c7e3bf4SMark McLoughlin 		return -EINVAL;
6004c7e3bf4SMark McLoughlin 	}
6014c7e3bf4SMark McLoughlin 
602d0216849SJonathan Brassow 	if (!chunk_size_ulong) {
603d0216849SJonathan Brassow 		*chunk_size = *chunk_mask = *chunk_shift = 0;
6044c7e3bf4SMark McLoughlin 		return 0;
6054c7e3bf4SMark McLoughlin 	}
6064c7e3bf4SMark McLoughlin 
6074c7e3bf4SMark McLoughlin 	/*
6084c7e3bf4SMark McLoughlin 	 * Chunk size must be multiple of page size.  Silently
6094c7e3bf4SMark McLoughlin 	 * round up if it's not.
6104c7e3bf4SMark McLoughlin 	 */
611d0216849SJonathan Brassow 	chunk_size_ulong = round_up(chunk_size_ulong, PAGE_SIZE >> 9);
6124c7e3bf4SMark McLoughlin 
6134c7e3bf4SMark McLoughlin 	/* Check chunk_size is a power of 2 */
614d0216849SJonathan Brassow 	if (!is_power_of_2(chunk_size_ulong)) {
6154c7e3bf4SMark McLoughlin 		*error = "Chunk size is not a power of 2";
6164c7e3bf4SMark McLoughlin 		return -EINVAL;
6174c7e3bf4SMark McLoughlin 	}
6184c7e3bf4SMark McLoughlin 
6194c7e3bf4SMark McLoughlin 	/* Validate the chunk size against the device block size */
62049beb2b8SJonathan Brassow 	if (chunk_size_ulong % (bdev_hardsect_size(cow->bdev) >> 9)) {
6214c7e3bf4SMark McLoughlin 		*error = "Chunk size is not a multiple of device blocksize";
6224c7e3bf4SMark McLoughlin 		return -EINVAL;
6234c7e3bf4SMark McLoughlin 	}
6244c7e3bf4SMark McLoughlin 
625d0216849SJonathan Brassow 	*chunk_size = chunk_size_ulong;
626d0216849SJonathan Brassow 	*chunk_mask = chunk_size_ulong - 1;
627d0216849SJonathan Brassow 	*chunk_shift = ffs(chunk_size_ulong) - 1;
6284c7e3bf4SMark McLoughlin 
6294c7e3bf4SMark McLoughlin 	return 0;
6304c7e3bf4SMark McLoughlin }
6314c7e3bf4SMark McLoughlin 
6321da177e4SLinus Torvalds /*
6331da177e4SLinus Torvalds  * Construct a snapshot mapping: <origin_dev> <COW-dev> <p/n> <chunk-size>
6341da177e4SLinus Torvalds  */
6351da177e4SLinus Torvalds static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
6361da177e4SLinus Torvalds {
6371da177e4SLinus Torvalds 	struct dm_snapshot *s;
638cd45daffSMikulas Patocka 	int i;
6391da177e4SLinus Torvalds 	int r = -EINVAL;
6401da177e4SLinus Torvalds 	char persistent;
6411da177e4SLinus Torvalds 	char *origin_path;
6421da177e4SLinus Torvalds 	char *cow_path;
643d0216849SJonathan Brassow 	chunk_t chunk_size, chunk_mask, chunk_shift;
64449beb2b8SJonathan Brassow 	struct dm_dev *cow;
6451da177e4SLinus Torvalds 
6464c7e3bf4SMark McLoughlin 	if (argc != 4) {
64772d94861SAlasdair G Kergon 		ti->error = "requires exactly 4 arguments";
6481da177e4SLinus Torvalds 		r = -EINVAL;
6491da177e4SLinus Torvalds 		goto bad1;
6501da177e4SLinus Torvalds 	}
6511da177e4SLinus Torvalds 
6521da177e4SLinus Torvalds 	origin_path = argv[0];
6531da177e4SLinus Torvalds 	cow_path = argv[1];
6541da177e4SLinus Torvalds 	persistent = toupper(*argv[2]);
6551da177e4SLinus Torvalds 
6561da177e4SLinus Torvalds 	if (persistent != 'P' && persistent != 'N') {
6571da177e4SLinus Torvalds 		ti->error = "Persistent flag is not P or N";
6581da177e4SLinus Torvalds 		r = -EINVAL;
6591da177e4SLinus Torvalds 		goto bad1;
6601da177e4SLinus Torvalds 	}
6611da177e4SLinus Torvalds 
6621da177e4SLinus Torvalds 	s = kmalloc(sizeof(*s), GFP_KERNEL);
6631da177e4SLinus Torvalds 	if (s == NULL) {
6641da177e4SLinus Torvalds 		ti->error = "Cannot allocate snapshot context private "
6651da177e4SLinus Torvalds 		    "structure";
6661da177e4SLinus Torvalds 		r = -ENOMEM;
6671da177e4SLinus Torvalds 		goto bad1;
6681da177e4SLinus Torvalds 	}
6691da177e4SLinus Torvalds 
6701da177e4SLinus Torvalds 	r = dm_get_device(ti, origin_path, 0, ti->len, FMODE_READ, &s->origin);
6711da177e4SLinus Torvalds 	if (r) {
6721da177e4SLinus Torvalds 		ti->error = "Cannot get origin device";
6731da177e4SLinus Torvalds 		goto bad2;
6741da177e4SLinus Torvalds 	}
6751da177e4SLinus Torvalds 
6761da177e4SLinus Torvalds 	r = dm_get_device(ti, cow_path, 0, 0,
67749beb2b8SJonathan Brassow 			  FMODE_READ | FMODE_WRITE, &cow);
6781da177e4SLinus Torvalds 	if (r) {
6791da177e4SLinus Torvalds 		dm_put_device(ti, s->origin);
6801da177e4SLinus Torvalds 		ti->error = "Cannot get COW device";
6811da177e4SLinus Torvalds 		goto bad2;
6821da177e4SLinus Torvalds 	}
6831da177e4SLinus Torvalds 
684d0216849SJonathan Brassow 	r = set_chunk_size(s, argv[3], &chunk_size, &chunk_mask, &chunk_shift,
68549beb2b8SJonathan Brassow 			   cow, &ti->error);
6864c7e3bf4SMark McLoughlin 	if (r)
6871da177e4SLinus Torvalds 		goto bad3;
6881da177e4SLinus Torvalds 
6891da177e4SLinus Torvalds 	s->valid = 1;
690aa14edebSAlasdair G Kergon 	s->active = 0;
691879129d2SMikulas Patocka 	atomic_set(&s->pending_exceptions_count, 0);
6921da177e4SLinus Torvalds 	init_rwsem(&s->lock);
693ca3a931fSAlasdair G Kergon 	spin_lock_init(&s->pe_lock);
6941da177e4SLinus Torvalds 
6951da177e4SLinus Torvalds 	/* Allocate hash table for COW data */
69649beb2b8SJonathan Brassow 	if (init_hash_tables(s, chunk_shift, cow)) {
6971da177e4SLinus Torvalds 		ti->error = "Unable to allocate hash table space";
6981da177e4SLinus Torvalds 		r = -ENOMEM;
6991da177e4SLinus Torvalds 		goto bad3;
7001da177e4SLinus Torvalds 	}
7011da177e4SLinus Torvalds 
702d0216849SJonathan Brassow 	r = dm_exception_store_create(argv[2], ti, chunk_size, chunk_mask,
70349beb2b8SJonathan Brassow 				      chunk_shift, cow, &s->store);
7041da177e4SLinus Torvalds 	if (r) {
7051da177e4SLinus Torvalds 		ti->error = "Couldn't create exception store";
7061da177e4SLinus Torvalds 		r = -EINVAL;
7071da177e4SLinus Torvalds 		goto bad4;
7081da177e4SLinus Torvalds 	}
7091da177e4SLinus Torvalds 
710eb69aca5SHeinz Mauelshagen 	r = dm_kcopyd_client_create(SNAPSHOT_PAGES, &s->kcopyd_client);
7111da177e4SLinus Torvalds 	if (r) {
7121da177e4SLinus Torvalds 		ti->error = "Could not create kcopyd client";
7131da177e4SLinus Torvalds 		goto bad5;
7141da177e4SLinus Torvalds 	}
7151da177e4SLinus Torvalds 
71692e86812SMikulas Patocka 	s->pending_pool = mempool_create_slab_pool(MIN_IOS, pending_cache);
71792e86812SMikulas Patocka 	if (!s->pending_pool) {
71892e86812SMikulas Patocka 		ti->error = "Could not allocate mempool for pending exceptions";
71992e86812SMikulas Patocka 		goto bad6;
72092e86812SMikulas Patocka 	}
72192e86812SMikulas Patocka 
722cd45daffSMikulas Patocka 	s->tracked_chunk_pool = mempool_create_slab_pool(MIN_IOS,
723cd45daffSMikulas Patocka 							 tracked_chunk_cache);
724cd45daffSMikulas Patocka 	if (!s->tracked_chunk_pool) {
725cd45daffSMikulas Patocka 		ti->error = "Could not allocate tracked_chunk mempool for "
726cd45daffSMikulas Patocka 			    "tracking reads";
72792e86812SMikulas Patocka 		goto bad_tracked_chunk_pool;
728cd45daffSMikulas Patocka 	}
729cd45daffSMikulas Patocka 
730cd45daffSMikulas Patocka 	for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++)
731cd45daffSMikulas Patocka 		INIT_HLIST_HEAD(&s->tracked_chunk_hash[i]);
732cd45daffSMikulas Patocka 
733cd45daffSMikulas Patocka 	spin_lock_init(&s->tracked_chunk_lock);
734cd45daffSMikulas Patocka 
735aa14edebSAlasdair G Kergon 	/* Metadata must only be loaded into one table at once */
736493df71cSJonathan Brassow 	r = s->store->type->read_metadata(s->store, dm_add_exception,
737493df71cSJonathan Brassow 					  (void *)s);
7380764147bSMilan Broz 	if (r < 0) {
739f9cea4f7SMark McLoughlin 		ti->error = "Failed to read snapshot metadata";
740cd45daffSMikulas Patocka 		goto bad_load_and_register;
7410764147bSMilan Broz 	} else if (r > 0) {
7420764147bSMilan Broz 		s->valid = 0;
7430764147bSMilan Broz 		DMWARN("Snapshot is marked invalid.");
744f9cea4f7SMark McLoughlin 	}
745aa14edebSAlasdair G Kergon 
746ca3a931fSAlasdair G Kergon 	bio_list_init(&s->queued_bios);
747c4028958SDavid Howells 	INIT_WORK(&s->queued_bios_work, flush_queued_bios);
748ca3a931fSAlasdair G Kergon 
7491da177e4SLinus Torvalds 	/* Add snapshot to the list of snapshots for this origin */
750aa14edebSAlasdair G Kergon 	/* Exceptions aren't triggered till snapshot_resume() is called */
7511da177e4SLinus Torvalds 	if (register_snapshot(s)) {
7521da177e4SLinus Torvalds 		r = -EINVAL;
7531da177e4SLinus Torvalds 		ti->error = "Cannot register snapshot origin";
754cd45daffSMikulas Patocka 		goto bad_load_and_register;
7551da177e4SLinus Torvalds 	}
7561da177e4SLinus Torvalds 
7571da177e4SLinus Torvalds 	ti->private = s;
758d0216849SJonathan Brassow 	ti->split_io = s->store->chunk_size;
7591da177e4SLinus Torvalds 
7601da177e4SLinus Torvalds 	return 0;
7611da177e4SLinus Torvalds 
762cd45daffSMikulas Patocka  bad_load_and_register:
763cd45daffSMikulas Patocka 	mempool_destroy(s->tracked_chunk_pool);
764cd45daffSMikulas Patocka 
76592e86812SMikulas Patocka  bad_tracked_chunk_pool:
76692e86812SMikulas Patocka 	mempool_destroy(s->pending_pool);
76792e86812SMikulas Patocka 
7681da177e4SLinus Torvalds  bad6:
769eb69aca5SHeinz Mauelshagen 	dm_kcopyd_client_destroy(s->kcopyd_client);
7701da177e4SLinus Torvalds 
7711da177e4SLinus Torvalds  bad5:
772493df71cSJonathan Brassow 	s->store->type->dtr(s->store);
7731da177e4SLinus Torvalds 
7741da177e4SLinus Torvalds  bad4:
7751da177e4SLinus Torvalds 	exit_exception_table(&s->pending, pending_cache);
7761da177e4SLinus Torvalds 	exit_exception_table(&s->complete, exception_cache);
7771da177e4SLinus Torvalds 
7781da177e4SLinus Torvalds  bad3:
77949beb2b8SJonathan Brassow 	dm_put_device(ti, cow);
7801da177e4SLinus Torvalds 	dm_put_device(ti, s->origin);
7811da177e4SLinus Torvalds 
7821da177e4SLinus Torvalds  bad2:
7831da177e4SLinus Torvalds 	kfree(s);
7841da177e4SLinus Torvalds 
7851da177e4SLinus Torvalds  bad1:
7861da177e4SLinus Torvalds 	return r;
7871da177e4SLinus Torvalds }
7881da177e4SLinus Torvalds 
78931c93a0cSMilan Broz static void __free_exceptions(struct dm_snapshot *s)
79031c93a0cSMilan Broz {
791eb69aca5SHeinz Mauelshagen 	dm_kcopyd_client_destroy(s->kcopyd_client);
79231c93a0cSMilan Broz 	s->kcopyd_client = NULL;
79331c93a0cSMilan Broz 
79431c93a0cSMilan Broz 	exit_exception_table(&s->pending, pending_cache);
79531c93a0cSMilan Broz 	exit_exception_table(&s->complete, exception_cache);
79631c93a0cSMilan Broz 
797493df71cSJonathan Brassow 	s->store->type->dtr(s->store);
79831c93a0cSMilan Broz }
79931c93a0cSMilan Broz 
8001da177e4SLinus Torvalds static void snapshot_dtr(struct dm_target *ti)
8011da177e4SLinus Torvalds {
802cd45daffSMikulas Patocka #ifdef CONFIG_DM_DEBUG
803cd45daffSMikulas Patocka 	int i;
804cd45daffSMikulas Patocka #endif
805028867acSAlasdair G Kergon 	struct dm_snapshot *s = ti->private;
80649beb2b8SJonathan Brassow 	struct dm_dev *cow = s->store->cow;
8071da177e4SLinus Torvalds 
808ca3a931fSAlasdair G Kergon 	flush_workqueue(ksnapd);
809ca3a931fSAlasdair G Kergon 
810138728dcSAlasdair G Kergon 	/* Prevent further origin writes from using this snapshot. */
811138728dcSAlasdair G Kergon 	/* After this returns there can be no new kcopyd jobs. */
8121da177e4SLinus Torvalds 	unregister_snapshot(s);
8131da177e4SLinus Torvalds 
814879129d2SMikulas Patocka 	while (atomic_read(&s->pending_exceptions_count))
81590fa1527SMikulas Patocka 		msleep(1);
816879129d2SMikulas Patocka 	/*
817879129d2SMikulas Patocka 	 * Ensure instructions in mempool_destroy aren't reordered
818879129d2SMikulas Patocka 	 * before atomic_read.
819879129d2SMikulas Patocka 	 */
820879129d2SMikulas Patocka 	smp_mb();
821879129d2SMikulas Patocka 
822cd45daffSMikulas Patocka #ifdef CONFIG_DM_DEBUG
823cd45daffSMikulas Patocka 	for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++)
824cd45daffSMikulas Patocka 		BUG_ON(!hlist_empty(&s->tracked_chunk_hash[i]));
825cd45daffSMikulas Patocka #endif
826cd45daffSMikulas Patocka 
827cd45daffSMikulas Patocka 	mempool_destroy(s->tracked_chunk_pool);
828cd45daffSMikulas Patocka 
82931c93a0cSMilan Broz 	__free_exceptions(s);
8301da177e4SLinus Torvalds 
83192e86812SMikulas Patocka 	mempool_destroy(s->pending_pool);
83292e86812SMikulas Patocka 
8331da177e4SLinus Torvalds 	dm_put_device(ti, s->origin);
83449beb2b8SJonathan Brassow 	dm_put_device(ti, cow);
835138728dcSAlasdair G Kergon 
8361da177e4SLinus Torvalds 	kfree(s);
8371da177e4SLinus Torvalds }
8381da177e4SLinus Torvalds 
8391da177e4SLinus Torvalds /*
8401da177e4SLinus Torvalds  * Flush a list of buffers.
8411da177e4SLinus Torvalds  */
8421da177e4SLinus Torvalds static void flush_bios(struct bio *bio)
8431da177e4SLinus Torvalds {
8441da177e4SLinus Torvalds 	struct bio *n;
8451da177e4SLinus Torvalds 
8461da177e4SLinus Torvalds 	while (bio) {
8471da177e4SLinus Torvalds 		n = bio->bi_next;
8481da177e4SLinus Torvalds 		bio->bi_next = NULL;
8491da177e4SLinus Torvalds 		generic_make_request(bio);
8501da177e4SLinus Torvalds 		bio = n;
8511da177e4SLinus Torvalds 	}
8521da177e4SLinus Torvalds }
8531da177e4SLinus Torvalds 
854c4028958SDavid Howells static void flush_queued_bios(struct work_struct *work)
855ca3a931fSAlasdair G Kergon {
856c4028958SDavid Howells 	struct dm_snapshot *s =
857c4028958SDavid Howells 		container_of(work, struct dm_snapshot, queued_bios_work);
858ca3a931fSAlasdair G Kergon 	struct bio *queued_bios;
859ca3a931fSAlasdair G Kergon 	unsigned long flags;
860ca3a931fSAlasdair G Kergon 
861ca3a931fSAlasdair G Kergon 	spin_lock_irqsave(&s->pe_lock, flags);
862ca3a931fSAlasdair G Kergon 	queued_bios = bio_list_get(&s->queued_bios);
863ca3a931fSAlasdair G Kergon 	spin_unlock_irqrestore(&s->pe_lock, flags);
864ca3a931fSAlasdair G Kergon 
865ca3a931fSAlasdair G Kergon 	flush_bios(queued_bios);
866ca3a931fSAlasdair G Kergon }
867ca3a931fSAlasdair G Kergon 
8681da177e4SLinus Torvalds /*
8691da177e4SLinus Torvalds  * Error a list of buffers.
8701da177e4SLinus Torvalds  */
8711da177e4SLinus Torvalds static void error_bios(struct bio *bio)
8721da177e4SLinus Torvalds {
8731da177e4SLinus Torvalds 	struct bio *n;
8741da177e4SLinus Torvalds 
8751da177e4SLinus Torvalds 	while (bio) {
8761da177e4SLinus Torvalds 		n = bio->bi_next;
8771da177e4SLinus Torvalds 		bio->bi_next = NULL;
8786712ecf8SNeilBrown 		bio_io_error(bio);
8791da177e4SLinus Torvalds 		bio = n;
8801da177e4SLinus Torvalds 	}
8811da177e4SLinus Torvalds }
8821da177e4SLinus Torvalds 
883695368acSAlasdair G Kergon static void __invalidate_snapshot(struct dm_snapshot *s, int err)
88476df1c65SAlasdair G Kergon {
88576df1c65SAlasdair G Kergon 	if (!s->valid)
88676df1c65SAlasdair G Kergon 		return;
88776df1c65SAlasdair G Kergon 
88876df1c65SAlasdair G Kergon 	if (err == -EIO)
88976df1c65SAlasdair G Kergon 		DMERR("Invalidating snapshot: Error reading/writing.");
89076df1c65SAlasdair G Kergon 	else if (err == -ENOMEM)
89176df1c65SAlasdair G Kergon 		DMERR("Invalidating snapshot: Unable to allocate exception.");
89276df1c65SAlasdair G Kergon 
893493df71cSJonathan Brassow 	if (s->store->type->drop_snapshot)
894493df71cSJonathan Brassow 		s->store->type->drop_snapshot(s->store);
89576df1c65SAlasdair G Kergon 
89676df1c65SAlasdair G Kergon 	s->valid = 0;
89776df1c65SAlasdair G Kergon 
8980cea9c78SJonathan Brassow 	dm_table_event(s->store->ti->table);
89976df1c65SAlasdair G Kergon }
90076df1c65SAlasdair G Kergon 
901028867acSAlasdair G Kergon static void get_pending_exception(struct dm_snap_pending_exception *pe)
9024b832e8dSAlasdair G Kergon {
9034b832e8dSAlasdair G Kergon 	atomic_inc(&pe->ref_count);
9044b832e8dSAlasdair G Kergon }
9054b832e8dSAlasdair G Kergon 
906028867acSAlasdair G Kergon static struct bio *put_pending_exception(struct dm_snap_pending_exception *pe)
9074b832e8dSAlasdair G Kergon {
908028867acSAlasdair G Kergon 	struct dm_snap_pending_exception *primary_pe;
9094b832e8dSAlasdair G Kergon 	struct bio *origin_bios = NULL;
9104b832e8dSAlasdair G Kergon 
9114b832e8dSAlasdair G Kergon 	primary_pe = pe->primary_pe;
9124b832e8dSAlasdair G Kergon 
9134b832e8dSAlasdair G Kergon 	/*
9144b832e8dSAlasdair G Kergon 	 * If this pe is involved in a write to the origin and
9154b832e8dSAlasdair G Kergon 	 * it is the last sibling to complete then release
9164b832e8dSAlasdair G Kergon 	 * the bios for the original write to the origin.
9174b832e8dSAlasdair G Kergon 	 */
9184b832e8dSAlasdair G Kergon 	if (primary_pe &&
9197c5f78b9SMikulas Patocka 	    atomic_dec_and_test(&primary_pe->ref_count)) {
9204b832e8dSAlasdair G Kergon 		origin_bios = bio_list_get(&primary_pe->origin_bios);
9217c5f78b9SMikulas Patocka 		free_pending_exception(primary_pe);
9227c5f78b9SMikulas Patocka 	}
9234b832e8dSAlasdair G Kergon 
9244b832e8dSAlasdair G Kergon 	/*
9254b832e8dSAlasdair G Kergon 	 * Free the pe if it's not linked to an origin write or if
9264b832e8dSAlasdair G Kergon 	 * it's not itself a primary pe.
9274b832e8dSAlasdair G Kergon 	 */
9284b832e8dSAlasdair G Kergon 	if (!primary_pe || primary_pe != pe)
9294b832e8dSAlasdair G Kergon 		free_pending_exception(pe);
9304b832e8dSAlasdair G Kergon 
9314b832e8dSAlasdair G Kergon 	return origin_bios;
9324b832e8dSAlasdair G Kergon }
9334b832e8dSAlasdair G Kergon 
934028867acSAlasdair G Kergon static void pending_complete(struct dm_snap_pending_exception *pe, int success)
9351da177e4SLinus Torvalds {
936028867acSAlasdair G Kergon 	struct dm_snap_exception *e;
9371da177e4SLinus Torvalds 	struct dm_snapshot *s = pe->snap;
9389d493fa8SAlasdair G Kergon 	struct bio *origin_bios = NULL;
9399d493fa8SAlasdair G Kergon 	struct bio *snapshot_bios = NULL;
9409d493fa8SAlasdair G Kergon 	int error = 0;
9411da177e4SLinus Torvalds 
94276df1c65SAlasdair G Kergon 	if (!success) {
94376df1c65SAlasdair G Kergon 		/* Read/write error - snapshot is unusable */
9441da177e4SLinus Torvalds 		down_write(&s->lock);
945695368acSAlasdair G Kergon 		__invalidate_snapshot(s, -EIO);
9469d493fa8SAlasdair G Kergon 		error = 1;
94776df1c65SAlasdair G Kergon 		goto out;
94876df1c65SAlasdair G Kergon 	}
94976df1c65SAlasdair G Kergon 
95076df1c65SAlasdair G Kergon 	e = alloc_exception();
95176df1c65SAlasdair G Kergon 	if (!e) {
95276df1c65SAlasdair G Kergon 		down_write(&s->lock);
953695368acSAlasdair G Kergon 		__invalidate_snapshot(s, -ENOMEM);
9549d493fa8SAlasdair G Kergon 		error = 1;
9551da177e4SLinus Torvalds 		goto out;
9561da177e4SLinus Torvalds 	}
9571da177e4SLinus Torvalds 	*e = pe->e;
9581da177e4SLinus Torvalds 
9599d493fa8SAlasdair G Kergon 	down_write(&s->lock);
9609d493fa8SAlasdair G Kergon 	if (!s->valid) {
9619d493fa8SAlasdair G Kergon 		free_exception(e);
9629d493fa8SAlasdair G Kergon 		error = 1;
9639d493fa8SAlasdair G Kergon 		goto out;
9649d493fa8SAlasdair G Kergon 	}
9659d493fa8SAlasdair G Kergon 
9661da177e4SLinus Torvalds 	/*
967a8d41b59SMikulas Patocka 	 * Check for conflicting reads. This is extremely improbable,
96890fa1527SMikulas Patocka 	 * so msleep(1) is sufficient and there is no need for a wait queue.
969a8d41b59SMikulas Patocka 	 */
970a8d41b59SMikulas Patocka 	while (__chunk_is_tracked(s, pe->e.old_chunk))
97190fa1527SMikulas Patocka 		msleep(1);
972a8d41b59SMikulas Patocka 
973a8d41b59SMikulas Patocka 	/*
9741da177e4SLinus Torvalds 	 * Add a proper exception, and remove the
9751da177e4SLinus Torvalds 	 * in-flight exception from the list.
9761da177e4SLinus Torvalds 	 */
977d74f81f8SMilan Broz 	insert_completed_exception(s, e);
9781da177e4SLinus Torvalds 
9791da177e4SLinus Torvalds  out:
980695368acSAlasdair G Kergon 	remove_exception(&pe->e);
9819d493fa8SAlasdair G Kergon 	snapshot_bios = bio_list_get(&pe->snapshot_bios);
9824b832e8dSAlasdair G Kergon 	origin_bios = put_pending_exception(pe);
983b4b610f6SAlasdair G Kergon 
9849d493fa8SAlasdair G Kergon 	up_write(&s->lock);
9859d493fa8SAlasdair G Kergon 
9869d493fa8SAlasdair G Kergon 	/* Submit any pending write bios */
9879d493fa8SAlasdair G Kergon 	if (error)
9889d493fa8SAlasdair G Kergon 		error_bios(snapshot_bios);
9899d493fa8SAlasdair G Kergon 	else
9909d493fa8SAlasdair G Kergon 		flush_bios(snapshot_bios);
9919d493fa8SAlasdair G Kergon 
9929d493fa8SAlasdair G Kergon 	flush_bios(origin_bios);
9931da177e4SLinus Torvalds }
9941da177e4SLinus Torvalds 
9951da177e4SLinus Torvalds static void commit_callback(void *context, int success)
9961da177e4SLinus Torvalds {
997028867acSAlasdair G Kergon 	struct dm_snap_pending_exception *pe = context;
998028867acSAlasdair G Kergon 
9991da177e4SLinus Torvalds 	pending_complete(pe, success);
10001da177e4SLinus Torvalds }
10011da177e4SLinus Torvalds 
10021da177e4SLinus Torvalds /*
10031da177e4SLinus Torvalds  * Called when the copy I/O has finished.  kcopyd actually runs
10041da177e4SLinus Torvalds  * this code so don't block.
10051da177e4SLinus Torvalds  */
10064cdc1d1fSAlasdair G Kergon static void copy_callback(int read_err, unsigned long write_err, void *context)
10071da177e4SLinus Torvalds {
1008028867acSAlasdair G Kergon 	struct dm_snap_pending_exception *pe = context;
10091da177e4SLinus Torvalds 	struct dm_snapshot *s = pe->snap;
10101da177e4SLinus Torvalds 
10111da177e4SLinus Torvalds 	if (read_err || write_err)
10121da177e4SLinus Torvalds 		pending_complete(pe, 0);
10131da177e4SLinus Torvalds 
10141da177e4SLinus Torvalds 	else
10151da177e4SLinus Torvalds 		/* Update the metadata if we are persistent */
1016493df71cSJonathan Brassow 		s->store->type->commit_exception(s->store, &pe->e,
1017b2a11465SJonathan Brassow 						 commit_callback, pe);
10181da177e4SLinus Torvalds }
10191da177e4SLinus Torvalds 
10201da177e4SLinus Torvalds /*
10211da177e4SLinus Torvalds  * Dispatches the copy operation to kcopyd.
10221da177e4SLinus Torvalds  */
1023028867acSAlasdair G Kergon static void start_copy(struct dm_snap_pending_exception *pe)
10241da177e4SLinus Torvalds {
10251da177e4SLinus Torvalds 	struct dm_snapshot *s = pe->snap;
102622a1ceb1SHeinz Mauelshagen 	struct dm_io_region src, dest;
10271da177e4SLinus Torvalds 	struct block_device *bdev = s->origin->bdev;
10281da177e4SLinus Torvalds 	sector_t dev_size;
10291da177e4SLinus Torvalds 
10301da177e4SLinus Torvalds 	dev_size = get_dev_size(bdev);
10311da177e4SLinus Torvalds 
10321da177e4SLinus Torvalds 	src.bdev = bdev;
103371fab00aSJonathan Brassow 	src.sector = chunk_to_sector(s->store, pe->e.old_chunk);
1034d0216849SJonathan Brassow 	src.count = min(s->store->chunk_size, dev_size - src.sector);
10351da177e4SLinus Torvalds 
103649beb2b8SJonathan Brassow 	dest.bdev = s->store->cow->bdev;
103771fab00aSJonathan Brassow 	dest.sector = chunk_to_sector(s->store, pe->e.new_chunk);
10381da177e4SLinus Torvalds 	dest.count = src.count;
10391da177e4SLinus Torvalds 
10401da177e4SLinus Torvalds 	/* Hand over to kcopyd */
1041eb69aca5SHeinz Mauelshagen 	dm_kcopyd_copy(s->kcopyd_client,
10421da177e4SLinus Torvalds 		    &src, 1, &dest, 0, copy_callback, pe);
10431da177e4SLinus Torvalds }
10441da177e4SLinus Torvalds 
10452913808eSMikulas Patocka static struct dm_snap_pending_exception *
10462913808eSMikulas Patocka __lookup_pending_exception(struct dm_snapshot *s, chunk_t chunk)
10472913808eSMikulas Patocka {
10482913808eSMikulas Patocka 	struct dm_snap_exception *e = lookup_exception(&s->pending, chunk);
10492913808eSMikulas Patocka 
10502913808eSMikulas Patocka 	if (!e)
10512913808eSMikulas Patocka 		return NULL;
10522913808eSMikulas Patocka 
10532913808eSMikulas Patocka 	return container_of(e, struct dm_snap_pending_exception, e);
10542913808eSMikulas Patocka }
10552913808eSMikulas Patocka 
10561da177e4SLinus Torvalds /*
10571da177e4SLinus Torvalds  * Looks to see if this snapshot already has a pending exception
10581da177e4SLinus Torvalds  * for this chunk, otherwise it allocates a new one and inserts
10591da177e4SLinus Torvalds  * it into the pending table.
10601da177e4SLinus Torvalds  *
10611da177e4SLinus Torvalds  * NOTE: a write lock must be held on snap->lock before calling
10621da177e4SLinus Torvalds  * this.
10631da177e4SLinus Torvalds  */
1064028867acSAlasdair G Kergon static struct dm_snap_pending_exception *
1065c6621392SMikulas Patocka __find_pending_exception(struct dm_snapshot *s,
1066c6621392SMikulas Patocka 			 struct dm_snap_pending_exception *pe, chunk_t chunk)
10671da177e4SLinus Torvalds {
1068c6621392SMikulas Patocka 	struct dm_snap_pending_exception *pe2;
106976df1c65SAlasdair G Kergon 
10702913808eSMikulas Patocka 	pe2 = __lookup_pending_exception(s, chunk);
10712913808eSMikulas Patocka 	if (pe2) {
10721da177e4SLinus Torvalds 		free_pending_exception(pe);
10732913808eSMikulas Patocka 		return pe2;
107476df1c65SAlasdair G Kergon 	}
107576df1c65SAlasdair G Kergon 
10761da177e4SLinus Torvalds 	pe->e.old_chunk = chunk;
10771da177e4SLinus Torvalds 	bio_list_init(&pe->origin_bios);
10781da177e4SLinus Torvalds 	bio_list_init(&pe->snapshot_bios);
1079b4b610f6SAlasdair G Kergon 	pe->primary_pe = NULL;
10804b832e8dSAlasdair G Kergon 	atomic_set(&pe->ref_count, 0);
10811da177e4SLinus Torvalds 	pe->started = 0;
10821da177e4SLinus Torvalds 
1083493df71cSJonathan Brassow 	if (s->store->type->prepare_exception(s->store, &pe->e)) {
10841da177e4SLinus Torvalds 		free_pending_exception(pe);
10851da177e4SLinus Torvalds 		return NULL;
10861da177e4SLinus Torvalds 	}
10871da177e4SLinus Torvalds 
10884b832e8dSAlasdair G Kergon 	get_pending_exception(pe);
10891da177e4SLinus Torvalds 	insert_exception(&s->pending, &pe->e);
10901da177e4SLinus Torvalds 
10911da177e4SLinus Torvalds 	return pe;
10921da177e4SLinus Torvalds }
10931da177e4SLinus Torvalds 
1094028867acSAlasdair G Kergon static void remap_exception(struct dm_snapshot *s, struct dm_snap_exception *e,
1095d74f81f8SMilan Broz 			    struct bio *bio, chunk_t chunk)
10961da177e4SLinus Torvalds {
109749beb2b8SJonathan Brassow 	bio->bi_bdev = s->store->cow->bdev;
109871fab00aSJonathan Brassow 	bio->bi_sector = chunk_to_sector(s->store,
109971fab00aSJonathan Brassow 					 dm_chunk_number(e->new_chunk) +
1100d74f81f8SMilan Broz 					 (chunk - e->old_chunk)) +
110171fab00aSJonathan Brassow 					 (bio->bi_sector &
110271fab00aSJonathan Brassow 					  s->store->chunk_mask);
11031da177e4SLinus Torvalds }
11041da177e4SLinus Torvalds 
11051da177e4SLinus Torvalds static int snapshot_map(struct dm_target *ti, struct bio *bio,
11061da177e4SLinus Torvalds 			union map_info *map_context)
11071da177e4SLinus Torvalds {
1108028867acSAlasdair G Kergon 	struct dm_snap_exception *e;
1109028867acSAlasdair G Kergon 	struct dm_snapshot *s = ti->private;
1110d2a7ad29SKiyoshi Ueda 	int r = DM_MAPIO_REMAPPED;
11111da177e4SLinus Torvalds 	chunk_t chunk;
1112028867acSAlasdair G Kergon 	struct dm_snap_pending_exception *pe = NULL;
11131da177e4SLinus Torvalds 
111471fab00aSJonathan Brassow 	chunk = sector_to_chunk(s->store, bio->bi_sector);
11151da177e4SLinus Torvalds 
11161da177e4SLinus Torvalds 	/* Full snapshots are not usable */
111776df1c65SAlasdair G Kergon 	/* To get here the table must be live so s->active is always set. */
11181da177e4SLinus Torvalds 	if (!s->valid)
1119f6a80ea8SAlasdair G Kergon 		return -EIO;
11201da177e4SLinus Torvalds 
11211da177e4SLinus Torvalds 	/* FIXME: should only take write lock if we need
11221da177e4SLinus Torvalds 	 * to copy an exception */
11231da177e4SLinus Torvalds 	down_write(&s->lock);
11241da177e4SLinus Torvalds 
112576df1c65SAlasdair G Kergon 	if (!s->valid) {
112676df1c65SAlasdair G Kergon 		r = -EIO;
112776df1c65SAlasdair G Kergon 		goto out_unlock;
112876df1c65SAlasdair G Kergon 	}
112976df1c65SAlasdair G Kergon 
11301da177e4SLinus Torvalds 	/* If the block is already remapped - use that, else remap it */
11311da177e4SLinus Torvalds 	e = lookup_exception(&s->complete, chunk);
11321da177e4SLinus Torvalds 	if (e) {
1133d74f81f8SMilan Broz 		remap_exception(s, e, bio, chunk);
113476df1c65SAlasdair G Kergon 		goto out_unlock;
113576df1c65SAlasdair G Kergon 	}
11361da177e4SLinus Torvalds 
1137ba40a2aaSAlasdair G Kergon 	/*
1138ba40a2aaSAlasdair G Kergon 	 * Write to snapshot - higher level takes care of RW/RO
1139ba40a2aaSAlasdair G Kergon 	 * flags so we should only get this if we are
1140ba40a2aaSAlasdair G Kergon 	 * writeable.
1141ba40a2aaSAlasdair G Kergon 	 */
1142ba40a2aaSAlasdair G Kergon 	if (bio_rw(bio) == WRITE) {
11432913808eSMikulas Patocka 		pe = __lookup_pending_exception(s, chunk);
11442913808eSMikulas Patocka 		if (!pe) {
1145c6621392SMikulas Patocka 			up_write(&s->lock);
1146c6621392SMikulas Patocka 			pe = alloc_pending_exception(s);
1147c6621392SMikulas Patocka 			down_write(&s->lock);
1148c6621392SMikulas Patocka 
1149c6621392SMikulas Patocka 			if (!s->valid) {
1150c6621392SMikulas Patocka 				free_pending_exception(pe);
1151c6621392SMikulas Patocka 				r = -EIO;
1152c6621392SMikulas Patocka 				goto out_unlock;
1153c6621392SMikulas Patocka 			}
1154c6621392SMikulas Patocka 
115535bf659bSMikulas Patocka 			e = lookup_exception(&s->complete, chunk);
115635bf659bSMikulas Patocka 			if (e) {
115735bf659bSMikulas Patocka 				free_pending_exception(pe);
115835bf659bSMikulas Patocka 				remap_exception(s, e, bio, chunk);
115935bf659bSMikulas Patocka 				goto out_unlock;
116035bf659bSMikulas Patocka 			}
116135bf659bSMikulas Patocka 
1162c6621392SMikulas Patocka 			pe = __find_pending_exception(s, pe, chunk);
11631da177e4SLinus Torvalds 			if (!pe) {
1164695368acSAlasdair G Kergon 				__invalidate_snapshot(s, -ENOMEM);
11651da177e4SLinus Torvalds 				r = -EIO;
116676df1c65SAlasdair G Kergon 				goto out_unlock;
116776df1c65SAlasdair G Kergon 			}
11682913808eSMikulas Patocka 		}
116976df1c65SAlasdair G Kergon 
1170d74f81f8SMilan Broz 		remap_exception(s, &pe->e, bio, chunk);
11711da177e4SLinus Torvalds 		bio_list_add(&pe->snapshot_bios, bio);
11721da177e4SLinus Torvalds 
1173d2a7ad29SKiyoshi Ueda 		r = DM_MAPIO_SUBMITTED;
1174ba40a2aaSAlasdair G Kergon 
11751da177e4SLinus Torvalds 		if (!pe->started) {
11761da177e4SLinus Torvalds 			/* this is protected by snap->lock */
11771da177e4SLinus Torvalds 			pe->started = 1;
117876df1c65SAlasdair G Kergon 			up_write(&s->lock);
117976df1c65SAlasdair G Kergon 			start_copy(pe);
1180ba40a2aaSAlasdair G Kergon 			goto out;
1181ba40a2aaSAlasdair G Kergon 		}
1182cd45daffSMikulas Patocka 	} else {
11831da177e4SLinus Torvalds 		bio->bi_bdev = s->origin->bdev;
1184cd45daffSMikulas Patocka 		map_context->ptr = track_chunk(s, chunk);
1185cd45daffSMikulas Patocka 	}
11861da177e4SLinus Torvalds 
1187ba40a2aaSAlasdair G Kergon  out_unlock:
1188ba40a2aaSAlasdair G Kergon 	up_write(&s->lock);
1189ba40a2aaSAlasdair G Kergon  out:
11901da177e4SLinus Torvalds 	return r;
11911da177e4SLinus Torvalds }
11921da177e4SLinus Torvalds 
1193cd45daffSMikulas Patocka static int snapshot_end_io(struct dm_target *ti, struct bio *bio,
1194cd45daffSMikulas Patocka 			   int error, union map_info *map_context)
1195cd45daffSMikulas Patocka {
1196cd45daffSMikulas Patocka 	struct dm_snapshot *s = ti->private;
1197cd45daffSMikulas Patocka 	struct dm_snap_tracked_chunk *c = map_context->ptr;
1198cd45daffSMikulas Patocka 
1199cd45daffSMikulas Patocka 	if (c)
1200cd45daffSMikulas Patocka 		stop_tracking_chunk(s, c);
1201cd45daffSMikulas Patocka 
1202cd45daffSMikulas Patocka 	return 0;
1203cd45daffSMikulas Patocka }
1204cd45daffSMikulas Patocka 
12051da177e4SLinus Torvalds static void snapshot_resume(struct dm_target *ti)
12061da177e4SLinus Torvalds {
1207028867acSAlasdair G Kergon 	struct dm_snapshot *s = ti->private;
12081da177e4SLinus Torvalds 
1209aa14edebSAlasdair G Kergon 	down_write(&s->lock);
1210aa14edebSAlasdair G Kergon 	s->active = 1;
1211aa14edebSAlasdair G Kergon 	up_write(&s->lock);
12121da177e4SLinus Torvalds }
12131da177e4SLinus Torvalds 
12141da177e4SLinus Torvalds static int snapshot_status(struct dm_target *ti, status_type_t type,
12151da177e4SLinus Torvalds 			   char *result, unsigned int maxlen)
12161da177e4SLinus Torvalds {
1217028867acSAlasdair G Kergon 	struct dm_snapshot *snap = ti->private;
12181da177e4SLinus Torvalds 
12191da177e4SLinus Torvalds 	switch (type) {
12201da177e4SLinus Torvalds 	case STATUSTYPE_INFO:
12211da177e4SLinus Torvalds 		if (!snap->valid)
12221da177e4SLinus Torvalds 			snprintf(result, maxlen, "Invalid");
12231da177e4SLinus Torvalds 		else {
1224493df71cSJonathan Brassow 			if (snap->store->type->fraction_full) {
12251da177e4SLinus Torvalds 				sector_t numerator, denominator;
1226493df71cSJonathan Brassow 				snap->store->type->fraction_full(snap->store,
12271da177e4SLinus Torvalds 								 &numerator,
12281da177e4SLinus Torvalds 								 &denominator);
12294ee218cdSAndrew Morton 				snprintf(result, maxlen, "%llu/%llu",
12304ee218cdSAndrew Morton 					(unsigned long long)numerator,
12314ee218cdSAndrew Morton 					(unsigned long long)denominator);
12321da177e4SLinus Torvalds 			}
12331da177e4SLinus Torvalds 			else
12341da177e4SLinus Torvalds 				snprintf(result, maxlen, "Unknown");
12351da177e4SLinus Torvalds 		}
12361da177e4SLinus Torvalds 		break;
12371da177e4SLinus Torvalds 
12381da177e4SLinus Torvalds 	case STATUSTYPE_TABLE:
12391da177e4SLinus Torvalds 		/*
12401da177e4SLinus Torvalds 		 * kdevname returns a static pointer so we need
12411da177e4SLinus Torvalds 		 * to make private copies if the output is to
12421da177e4SLinus Torvalds 		 * make sense.
12431da177e4SLinus Torvalds 		 */
1244493df71cSJonathan Brassow 		snprintf(result, maxlen, "%s %s %s %llu",
124549beb2b8SJonathan Brassow 			 snap->origin->name, snap->store->cow->name,
1246493df71cSJonathan Brassow 			 snap->store->type->name,
1247d0216849SJonathan Brassow 			 (unsigned long long)snap->store->chunk_size);
12481da177e4SLinus Torvalds 		break;
12491da177e4SLinus Torvalds 	}
12501da177e4SLinus Torvalds 
12511da177e4SLinus Torvalds 	return 0;
12521da177e4SLinus Torvalds }
12531da177e4SLinus Torvalds 
12541da177e4SLinus Torvalds /*-----------------------------------------------------------------
12551da177e4SLinus Torvalds  * Origin methods
12561da177e4SLinus Torvalds  *---------------------------------------------------------------*/
12571da177e4SLinus Torvalds static int __origin_write(struct list_head *snapshots, struct bio *bio)
12581da177e4SLinus Torvalds {
1259d2a7ad29SKiyoshi Ueda 	int r = DM_MAPIO_REMAPPED, first = 0;
12601da177e4SLinus Torvalds 	struct dm_snapshot *snap;
1261028867acSAlasdair G Kergon 	struct dm_snap_exception *e;
1262028867acSAlasdair G Kergon 	struct dm_snap_pending_exception *pe, *next_pe, *primary_pe = NULL;
12631da177e4SLinus Torvalds 	chunk_t chunk;
1264eccf0817SAlasdair G Kergon 	LIST_HEAD(pe_queue);
12651da177e4SLinus Torvalds 
12661da177e4SLinus Torvalds 	/* Do all the snapshots on this origin */
12671da177e4SLinus Torvalds 	list_for_each_entry (snap, snapshots, list) {
12681da177e4SLinus Torvalds 
126976df1c65SAlasdair G Kergon 		down_write(&snap->lock);
127076df1c65SAlasdair G Kergon 
1271aa14edebSAlasdair G Kergon 		/* Only deal with valid and active snapshots */
1272aa14edebSAlasdair G Kergon 		if (!snap->valid || !snap->active)
127376df1c65SAlasdair G Kergon 			goto next_snapshot;
12741da177e4SLinus Torvalds 
1275d5e404c1SAlasdair G Kergon 		/* Nothing to do if writing beyond end of snapshot */
12760cea9c78SJonathan Brassow 		if (bio->bi_sector >= dm_table_get_size(snap->store->ti->table))
127776df1c65SAlasdair G Kergon 			goto next_snapshot;
12781da177e4SLinus Torvalds 
12791da177e4SLinus Torvalds 		/*
12801da177e4SLinus Torvalds 		 * Remember, different snapshots can have
12811da177e4SLinus Torvalds 		 * different chunk sizes.
12821da177e4SLinus Torvalds 		 */
128371fab00aSJonathan Brassow 		chunk = sector_to_chunk(snap->store, bio->bi_sector);
12841da177e4SLinus Torvalds 
12851da177e4SLinus Torvalds 		/*
12861da177e4SLinus Torvalds 		 * Check exception table to see if block
12871da177e4SLinus Torvalds 		 * is already remapped in this snapshot
12881da177e4SLinus Torvalds 		 * and trigger an exception if not.
1289b4b610f6SAlasdair G Kergon 		 *
12904b832e8dSAlasdair G Kergon 		 * ref_count is initialised to 1 so pending_complete()
1291b4b610f6SAlasdair G Kergon 		 * won't destroy the primary_pe while we're inside this loop.
12921da177e4SLinus Torvalds 		 */
12931da177e4SLinus Torvalds 		e = lookup_exception(&snap->complete, chunk);
129476df1c65SAlasdair G Kergon 		if (e)
129576df1c65SAlasdair G Kergon 			goto next_snapshot;
129676df1c65SAlasdair G Kergon 
12972913808eSMikulas Patocka 		pe = __lookup_pending_exception(snap, chunk);
12982913808eSMikulas Patocka 		if (!pe) {
1299c6621392SMikulas Patocka 			up_write(&snap->lock);
1300c6621392SMikulas Patocka 			pe = alloc_pending_exception(snap);
1301c6621392SMikulas Patocka 			down_write(&snap->lock);
1302c6621392SMikulas Patocka 
1303c6621392SMikulas Patocka 			if (!snap->valid) {
1304c6621392SMikulas Patocka 				free_pending_exception(pe);
1305c6621392SMikulas Patocka 				goto next_snapshot;
1306c6621392SMikulas Patocka 			}
1307c6621392SMikulas Patocka 
130835bf659bSMikulas Patocka 			e = lookup_exception(&snap->complete, chunk);
130935bf659bSMikulas Patocka 			if (e) {
131035bf659bSMikulas Patocka 				free_pending_exception(pe);
131135bf659bSMikulas Patocka 				goto next_snapshot;
131235bf659bSMikulas Patocka 			}
131335bf659bSMikulas Patocka 
1314c6621392SMikulas Patocka 			pe = __find_pending_exception(snap, pe, chunk);
13151da177e4SLinus Torvalds 			if (!pe) {
1316695368acSAlasdair G Kergon 				__invalidate_snapshot(snap, -ENOMEM);
131776df1c65SAlasdair G Kergon 				goto next_snapshot;
131876df1c65SAlasdair G Kergon 			}
13192913808eSMikulas Patocka 		}
13201da177e4SLinus Torvalds 
1321b4b610f6SAlasdair G Kergon 		if (!primary_pe) {
1322b4b610f6SAlasdair G Kergon 			/*
1323b4b610f6SAlasdair G Kergon 			 * Either every pe here has same
1324b4b610f6SAlasdair G Kergon 			 * primary_pe or none has one yet.
1325b4b610f6SAlasdair G Kergon 			 */
1326b4b610f6SAlasdair G Kergon 			if (pe->primary_pe)
1327b4b610f6SAlasdair G Kergon 				primary_pe = pe->primary_pe;
1328b4b610f6SAlasdair G Kergon 			else {
1329b4b610f6SAlasdair G Kergon 				primary_pe = pe;
1330b4b610f6SAlasdair G Kergon 				first = 1;
1331eccf0817SAlasdair G Kergon 			}
1332b4b610f6SAlasdair G Kergon 
133376df1c65SAlasdair G Kergon 			bio_list_add(&primary_pe->origin_bios, bio);
133476df1c65SAlasdair G Kergon 
1335d2a7ad29SKiyoshi Ueda 			r = DM_MAPIO_SUBMITTED;
1336b4b610f6SAlasdair G Kergon 		}
133776df1c65SAlasdair G Kergon 
1338b4b610f6SAlasdair G Kergon 		if (!pe->primary_pe) {
1339b4b610f6SAlasdair G Kergon 			pe->primary_pe = primary_pe;
13404b832e8dSAlasdair G Kergon 			get_pending_exception(primary_pe);
1341b4b610f6SAlasdair G Kergon 		}
134276df1c65SAlasdair G Kergon 
1343eccf0817SAlasdair G Kergon 		if (!pe->started) {
1344eccf0817SAlasdair G Kergon 			pe->started = 1;
1345eccf0817SAlasdair G Kergon 			list_add_tail(&pe->list, &pe_queue);
1346eccf0817SAlasdair G Kergon 		}
13471da177e4SLinus Torvalds 
134876df1c65SAlasdair G Kergon  next_snapshot:
13491da177e4SLinus Torvalds 		up_write(&snap->lock);
13501da177e4SLinus Torvalds 	}
13511da177e4SLinus Torvalds 
1352b4b610f6SAlasdair G Kergon 	if (!primary_pe)
13534b832e8dSAlasdair G Kergon 		return r;
1354b4b610f6SAlasdair G Kergon 
1355b4b610f6SAlasdair G Kergon 	/*
1356b4b610f6SAlasdair G Kergon 	 * If this is the first time we're processing this chunk and
13574b832e8dSAlasdair G Kergon 	 * ref_count is now 1 it means all the pending exceptions
1358b4b610f6SAlasdair G Kergon 	 * got completed while we were in the loop above, so it falls to
1359b4b610f6SAlasdair G Kergon 	 * us here to remove the primary_pe and submit any origin_bios.
1360b4b610f6SAlasdair G Kergon 	 */
1361b4b610f6SAlasdair G Kergon 
13624b832e8dSAlasdair G Kergon 	if (first && atomic_dec_and_test(&primary_pe->ref_count)) {
1363b4b610f6SAlasdair G Kergon 		flush_bios(bio_list_get(&primary_pe->origin_bios));
1364b4b610f6SAlasdair G Kergon 		free_pending_exception(primary_pe);
1365b4b610f6SAlasdair G Kergon 		/* If we got here, pe_queue is necessarily empty. */
13664b832e8dSAlasdair G Kergon 		return r;
1367b4b610f6SAlasdair G Kergon 	}
1368b4b610f6SAlasdair G Kergon 
13691da177e4SLinus Torvalds 	/*
13701da177e4SLinus Torvalds 	 * Now that we have a complete pe list we can start the copying.
13711da177e4SLinus Torvalds 	 */
1372eccf0817SAlasdair G Kergon 	list_for_each_entry_safe(pe, next_pe, &pe_queue, list)
13731da177e4SLinus Torvalds 		start_copy(pe);
13741da177e4SLinus Torvalds 
13751da177e4SLinus Torvalds 	return r;
13761da177e4SLinus Torvalds }
13771da177e4SLinus Torvalds 
13781da177e4SLinus Torvalds /*
13791da177e4SLinus Torvalds  * Called on a write from the origin driver.
13801da177e4SLinus Torvalds  */
13811da177e4SLinus Torvalds static int do_origin(struct dm_dev *origin, struct bio *bio)
13821da177e4SLinus Torvalds {
13831da177e4SLinus Torvalds 	struct origin *o;
1384d2a7ad29SKiyoshi Ueda 	int r = DM_MAPIO_REMAPPED;
13851da177e4SLinus Torvalds 
13861da177e4SLinus Torvalds 	down_read(&_origins_lock);
13871da177e4SLinus Torvalds 	o = __lookup_origin(origin->bdev);
13881da177e4SLinus Torvalds 	if (o)
13891da177e4SLinus Torvalds 		r = __origin_write(&o->snapshots, bio);
13901da177e4SLinus Torvalds 	up_read(&_origins_lock);
13911da177e4SLinus Torvalds 
13921da177e4SLinus Torvalds 	return r;
13931da177e4SLinus Torvalds }
13941da177e4SLinus Torvalds 
13951da177e4SLinus Torvalds /*
13961da177e4SLinus Torvalds  * Origin: maps a linear range of a device, with hooks for snapshotting.
13971da177e4SLinus Torvalds  */
13981da177e4SLinus Torvalds 
13991da177e4SLinus Torvalds /*
14001da177e4SLinus Torvalds  * Construct an origin mapping: <dev_path>
14011da177e4SLinus Torvalds  * The context for an origin is merely a 'struct dm_dev *'
14021da177e4SLinus Torvalds  * pointing to the real device.
14031da177e4SLinus Torvalds  */
14041da177e4SLinus Torvalds static int origin_ctr(struct dm_target *ti, unsigned int argc, char **argv)
14051da177e4SLinus Torvalds {
14061da177e4SLinus Torvalds 	int r;
14071da177e4SLinus Torvalds 	struct dm_dev *dev;
14081da177e4SLinus Torvalds 
14091da177e4SLinus Torvalds 	if (argc != 1) {
141072d94861SAlasdair G Kergon 		ti->error = "origin: incorrect number of arguments";
14111da177e4SLinus Torvalds 		return -EINVAL;
14121da177e4SLinus Torvalds 	}
14131da177e4SLinus Torvalds 
14141da177e4SLinus Torvalds 	r = dm_get_device(ti, argv[0], 0, ti->len,
14151da177e4SLinus Torvalds 			  dm_table_get_mode(ti->table), &dev);
14161da177e4SLinus Torvalds 	if (r) {
14171da177e4SLinus Torvalds 		ti->error = "Cannot get target device";
14181da177e4SLinus Torvalds 		return r;
14191da177e4SLinus Torvalds 	}
14201da177e4SLinus Torvalds 
14211da177e4SLinus Torvalds 	ti->private = dev;
14221da177e4SLinus Torvalds 	return 0;
14231da177e4SLinus Torvalds }
14241da177e4SLinus Torvalds 
14251da177e4SLinus Torvalds static void origin_dtr(struct dm_target *ti)
14261da177e4SLinus Torvalds {
1427028867acSAlasdair G Kergon 	struct dm_dev *dev = ti->private;
14281da177e4SLinus Torvalds 	dm_put_device(ti, dev);
14291da177e4SLinus Torvalds }
14301da177e4SLinus Torvalds 
14311da177e4SLinus Torvalds static int origin_map(struct dm_target *ti, struct bio *bio,
14321da177e4SLinus Torvalds 		      union map_info *map_context)
14331da177e4SLinus Torvalds {
1434028867acSAlasdair G Kergon 	struct dm_dev *dev = ti->private;
14351da177e4SLinus Torvalds 	bio->bi_bdev = dev->bdev;
14361da177e4SLinus Torvalds 
14371da177e4SLinus Torvalds 	/* Only tell snapshots if this is a write */
1438d2a7ad29SKiyoshi Ueda 	return (bio_rw(bio) == WRITE) ? do_origin(dev, bio) : DM_MAPIO_REMAPPED;
14391da177e4SLinus Torvalds }
14401da177e4SLinus Torvalds 
14411da177e4SLinus Torvalds #define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
14421da177e4SLinus Torvalds 
14431da177e4SLinus Torvalds /*
14441da177e4SLinus Torvalds  * Set the target "split_io" field to the minimum of all the snapshots'
14451da177e4SLinus Torvalds  * chunk sizes.
14461da177e4SLinus Torvalds  */
14471da177e4SLinus Torvalds static void origin_resume(struct dm_target *ti)
14481da177e4SLinus Torvalds {
1449028867acSAlasdair G Kergon 	struct dm_dev *dev = ti->private;
14501da177e4SLinus Torvalds 	struct dm_snapshot *snap;
14511da177e4SLinus Torvalds 	struct origin *o;
14521da177e4SLinus Torvalds 	chunk_t chunk_size = 0;
14531da177e4SLinus Torvalds 
14541da177e4SLinus Torvalds 	down_read(&_origins_lock);
14551da177e4SLinus Torvalds 	o = __lookup_origin(dev->bdev);
14561da177e4SLinus Torvalds 	if (o)
14571da177e4SLinus Torvalds 		list_for_each_entry (snap, &o->snapshots, list)
1458d0216849SJonathan Brassow 			chunk_size = min_not_zero(chunk_size,
1459d0216849SJonathan Brassow 						  snap->store->chunk_size);
14601da177e4SLinus Torvalds 	up_read(&_origins_lock);
14611da177e4SLinus Torvalds 
14621da177e4SLinus Torvalds 	ti->split_io = chunk_size;
14631da177e4SLinus Torvalds }
14641da177e4SLinus Torvalds 
14651da177e4SLinus Torvalds static int origin_status(struct dm_target *ti, status_type_t type, char *result,
14661da177e4SLinus Torvalds 			 unsigned int maxlen)
14671da177e4SLinus Torvalds {
1468028867acSAlasdair G Kergon 	struct dm_dev *dev = ti->private;
14691da177e4SLinus Torvalds 
14701da177e4SLinus Torvalds 	switch (type) {
14711da177e4SLinus Torvalds 	case STATUSTYPE_INFO:
14721da177e4SLinus Torvalds 		result[0] = '\0';
14731da177e4SLinus Torvalds 		break;
14741da177e4SLinus Torvalds 
14751da177e4SLinus Torvalds 	case STATUSTYPE_TABLE:
14761da177e4SLinus Torvalds 		snprintf(result, maxlen, "%s", dev->name);
14771da177e4SLinus Torvalds 		break;
14781da177e4SLinus Torvalds 	}
14791da177e4SLinus Torvalds 
14801da177e4SLinus Torvalds 	return 0;
14811da177e4SLinus Torvalds }
14821da177e4SLinus Torvalds 
14831da177e4SLinus Torvalds static struct target_type origin_target = {
14841da177e4SLinus Torvalds 	.name    = "snapshot-origin",
1485d74f81f8SMilan Broz 	.version = {1, 6, 0},
14861da177e4SLinus Torvalds 	.module  = THIS_MODULE,
14871da177e4SLinus Torvalds 	.ctr     = origin_ctr,
14881da177e4SLinus Torvalds 	.dtr     = origin_dtr,
14891da177e4SLinus Torvalds 	.map     = origin_map,
14901da177e4SLinus Torvalds 	.resume  = origin_resume,
14911da177e4SLinus Torvalds 	.status  = origin_status,
14921da177e4SLinus Torvalds };
14931da177e4SLinus Torvalds 
14941da177e4SLinus Torvalds static struct target_type snapshot_target = {
14951da177e4SLinus Torvalds 	.name    = "snapshot",
1496d74f81f8SMilan Broz 	.version = {1, 6, 0},
14971da177e4SLinus Torvalds 	.module  = THIS_MODULE,
14981da177e4SLinus Torvalds 	.ctr     = snapshot_ctr,
14991da177e4SLinus Torvalds 	.dtr     = snapshot_dtr,
15001da177e4SLinus Torvalds 	.map     = snapshot_map,
1501cd45daffSMikulas Patocka 	.end_io  = snapshot_end_io,
15021da177e4SLinus Torvalds 	.resume  = snapshot_resume,
15031da177e4SLinus Torvalds 	.status  = snapshot_status,
15041da177e4SLinus Torvalds };
15051da177e4SLinus Torvalds 
15061da177e4SLinus Torvalds static int __init dm_snapshot_init(void)
15071da177e4SLinus Torvalds {
15081da177e4SLinus Torvalds 	int r;
15091da177e4SLinus Torvalds 
15104db6bfe0SAlasdair G Kergon 	r = dm_exception_store_init();
15114db6bfe0SAlasdair G Kergon 	if (r) {
15124db6bfe0SAlasdair G Kergon 		DMERR("Failed to initialize exception stores");
15134db6bfe0SAlasdair G Kergon 		return r;
15144db6bfe0SAlasdair G Kergon 	}
15154db6bfe0SAlasdair G Kergon 
15161da177e4SLinus Torvalds 	r = dm_register_target(&snapshot_target);
15171da177e4SLinus Torvalds 	if (r) {
15181da177e4SLinus Torvalds 		DMERR("snapshot target register failed %d", r);
15191da177e4SLinus Torvalds 		return r;
15201da177e4SLinus Torvalds 	}
15211da177e4SLinus Torvalds 
15221da177e4SLinus Torvalds 	r = dm_register_target(&origin_target);
15231da177e4SLinus Torvalds 	if (r < 0) {
152472d94861SAlasdair G Kergon 		DMERR("Origin target register failed %d", r);
15251da177e4SLinus Torvalds 		goto bad1;
15261da177e4SLinus Torvalds 	}
15271da177e4SLinus Torvalds 
15281da177e4SLinus Torvalds 	r = init_origin_hash();
15291da177e4SLinus Torvalds 	if (r) {
15301da177e4SLinus Torvalds 		DMERR("init_origin_hash failed.");
15311da177e4SLinus Torvalds 		goto bad2;
15321da177e4SLinus Torvalds 	}
15331da177e4SLinus Torvalds 
1534028867acSAlasdair G Kergon 	exception_cache = KMEM_CACHE(dm_snap_exception, 0);
15351da177e4SLinus Torvalds 	if (!exception_cache) {
15361da177e4SLinus Torvalds 		DMERR("Couldn't create exception cache.");
15371da177e4SLinus Torvalds 		r = -ENOMEM;
15381da177e4SLinus Torvalds 		goto bad3;
15391da177e4SLinus Torvalds 	}
15401da177e4SLinus Torvalds 
1541028867acSAlasdair G Kergon 	pending_cache = KMEM_CACHE(dm_snap_pending_exception, 0);
15421da177e4SLinus Torvalds 	if (!pending_cache) {
15431da177e4SLinus Torvalds 		DMERR("Couldn't create pending cache.");
15441da177e4SLinus Torvalds 		r = -ENOMEM;
15451da177e4SLinus Torvalds 		goto bad4;
15461da177e4SLinus Torvalds 	}
15471da177e4SLinus Torvalds 
1548cd45daffSMikulas Patocka 	tracked_chunk_cache = KMEM_CACHE(dm_snap_tracked_chunk, 0);
1549cd45daffSMikulas Patocka 	if (!tracked_chunk_cache) {
1550cd45daffSMikulas Patocka 		DMERR("Couldn't create cache to track chunks in use.");
1551cd45daffSMikulas Patocka 		r = -ENOMEM;
1552cd45daffSMikulas Patocka 		goto bad5;
1553cd45daffSMikulas Patocka 	}
1554cd45daffSMikulas Patocka 
1555ca3a931fSAlasdair G Kergon 	ksnapd = create_singlethread_workqueue("ksnapd");
1556ca3a931fSAlasdair G Kergon 	if (!ksnapd) {
1557ca3a931fSAlasdair G Kergon 		DMERR("Failed to create ksnapd workqueue.");
1558ca3a931fSAlasdair G Kergon 		r = -ENOMEM;
155992e86812SMikulas Patocka 		goto bad_pending_pool;
1560ca3a931fSAlasdair G Kergon 	}
1561ca3a931fSAlasdair G Kergon 
15621da177e4SLinus Torvalds 	return 0;
15631da177e4SLinus Torvalds 
1564cd45daffSMikulas Patocka bad_pending_pool:
1565cd45daffSMikulas Patocka 	kmem_cache_destroy(tracked_chunk_cache);
15661da177e4SLinus Torvalds bad5:
15671da177e4SLinus Torvalds 	kmem_cache_destroy(pending_cache);
15681da177e4SLinus Torvalds bad4:
15691da177e4SLinus Torvalds 	kmem_cache_destroy(exception_cache);
15701da177e4SLinus Torvalds bad3:
15711da177e4SLinus Torvalds 	exit_origin_hash();
15721da177e4SLinus Torvalds bad2:
15731da177e4SLinus Torvalds 	dm_unregister_target(&origin_target);
15741da177e4SLinus Torvalds bad1:
15751da177e4SLinus Torvalds 	dm_unregister_target(&snapshot_target);
15761da177e4SLinus Torvalds 	return r;
15771da177e4SLinus Torvalds }
15781da177e4SLinus Torvalds 
15791da177e4SLinus Torvalds static void __exit dm_snapshot_exit(void)
15801da177e4SLinus Torvalds {
1581ca3a931fSAlasdair G Kergon 	destroy_workqueue(ksnapd);
1582ca3a931fSAlasdair G Kergon 
158310d3bd09SMikulas Patocka 	dm_unregister_target(&snapshot_target);
158410d3bd09SMikulas Patocka 	dm_unregister_target(&origin_target);
15851da177e4SLinus Torvalds 
15861da177e4SLinus Torvalds 	exit_origin_hash();
15871da177e4SLinus Torvalds 	kmem_cache_destroy(pending_cache);
15881da177e4SLinus Torvalds 	kmem_cache_destroy(exception_cache);
1589cd45daffSMikulas Patocka 	kmem_cache_destroy(tracked_chunk_cache);
15904db6bfe0SAlasdair G Kergon 
15914db6bfe0SAlasdair G Kergon 	dm_exception_store_exit();
15921da177e4SLinus Torvalds }
15931da177e4SLinus Torvalds 
15941da177e4SLinus Torvalds /* Module hooks */
15951da177e4SLinus Torvalds module_init(dm_snapshot_init);
15961da177e4SLinus Torvalds module_exit(dm_snapshot_exit);
15971da177e4SLinus Torvalds 
15981da177e4SLinus Torvalds MODULE_DESCRIPTION(DM_NAME " snapshot target");
15991da177e4SLinus Torvalds MODULE_AUTHOR("Joe Thornber");
16001da177e4SLinus Torvalds MODULE_LICENSE("GPL");
1601