xref: /openbmc/linux/drivers/md/dm-snap.c (revision 191437a5)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * dm-snapshot.c
31da177e4SLinus Torvalds  *
41da177e4SLinus Torvalds  * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
51da177e4SLinus Torvalds  *
61da177e4SLinus Torvalds  * This file is released under the GPL.
71da177e4SLinus Torvalds  */
81da177e4SLinus Torvalds 
91da177e4SLinus Torvalds #include <linux/blkdev.h>
101da177e4SLinus Torvalds #include <linux/device-mapper.h>
1190fa1527SMikulas Patocka #include <linux/delay.h>
121da177e4SLinus Torvalds #include <linux/fs.h>
131da177e4SLinus Torvalds #include <linux/init.h>
141da177e4SLinus Torvalds #include <linux/kdev_t.h>
151da177e4SLinus Torvalds #include <linux/list.h>
161da177e4SLinus Torvalds #include <linux/mempool.h>
171da177e4SLinus Torvalds #include <linux/module.h>
181da177e4SLinus Torvalds #include <linux/slab.h>
191da177e4SLinus Torvalds #include <linux/vmalloc.h>
206f3c3f0aSvignesh babu #include <linux/log2.h>
21a765e20eSAlasdair G Kergon #include <linux/dm-kcopyd.h>
22ccc45ea8SJonathan Brassow #include <linux/workqueue.h>
231da177e4SLinus Torvalds 
24aea53d92SJonathan Brassow #include "dm-exception-store.h"
251da177e4SLinus Torvalds 
2672d94861SAlasdair G Kergon #define DM_MSG_PREFIX "snapshots"
2772d94861SAlasdair G Kergon 
281da177e4SLinus Torvalds /*
291da177e4SLinus Torvalds  * The percentage increment we will wake up users at
301da177e4SLinus Torvalds  */
311da177e4SLinus Torvalds #define WAKE_UP_PERCENT 5
321da177e4SLinus Torvalds 
331da177e4SLinus Torvalds /*
341da177e4SLinus Torvalds  * kcopyd priority of snapshot operations
351da177e4SLinus Torvalds  */
361da177e4SLinus Torvalds #define SNAPSHOT_COPY_PRIORITY 2
371da177e4SLinus Torvalds 
381da177e4SLinus Torvalds /*
398ee2767aSMilan Broz  * Reserve 1MB for each snapshot initially (with minimum of 1 page).
401da177e4SLinus Torvalds  */
418ee2767aSMilan Broz #define SNAPSHOT_PAGES (((1UL << 20) >> PAGE_SHIFT) ? : 1)
421da177e4SLinus Torvalds 
43cd45daffSMikulas Patocka /*
44cd45daffSMikulas Patocka  * The size of the mempool used to track chunks in use.
45cd45daffSMikulas Patocka  */
46cd45daffSMikulas Patocka #define MIN_IOS 256
47cd45daffSMikulas Patocka 
48ccc45ea8SJonathan Brassow #define DM_TRACKED_CHUNK_HASH_SIZE	16
49ccc45ea8SJonathan Brassow #define DM_TRACKED_CHUNK_HASH(x)	((unsigned long)(x) & \
50ccc45ea8SJonathan Brassow 					 (DM_TRACKED_CHUNK_HASH_SIZE - 1))
51ccc45ea8SJonathan Brassow 
52191437a5SJon Brassow struct dm_exception_table {
53ccc45ea8SJonathan Brassow 	uint32_t hash_mask;
54ccc45ea8SJonathan Brassow 	unsigned hash_shift;
55ccc45ea8SJonathan Brassow 	struct list_head *table;
56ccc45ea8SJonathan Brassow };
57ccc45ea8SJonathan Brassow 
58ccc45ea8SJonathan Brassow struct dm_snapshot {
59ccc45ea8SJonathan Brassow 	struct rw_semaphore lock;
60ccc45ea8SJonathan Brassow 
61ccc45ea8SJonathan Brassow 	struct dm_dev *origin;
62ccc45ea8SJonathan Brassow 
63ccc45ea8SJonathan Brassow 	/* List of snapshots per Origin */
64ccc45ea8SJonathan Brassow 	struct list_head list;
65ccc45ea8SJonathan Brassow 
66ccc45ea8SJonathan Brassow 	/* You can't use a snapshot if this is 0 (e.g. if full) */
67ccc45ea8SJonathan Brassow 	int valid;
68ccc45ea8SJonathan Brassow 
69ccc45ea8SJonathan Brassow 	/* Origin writes don't trigger exceptions until this is set */
70ccc45ea8SJonathan Brassow 	int active;
71ccc45ea8SJonathan Brassow 
72ccc45ea8SJonathan Brassow 	mempool_t *pending_pool;
73ccc45ea8SJonathan Brassow 
74ccc45ea8SJonathan Brassow 	atomic_t pending_exceptions_count;
75ccc45ea8SJonathan Brassow 
76191437a5SJon Brassow 	struct dm_exception_table pending;
77191437a5SJon Brassow 	struct dm_exception_table complete;
78ccc45ea8SJonathan Brassow 
79ccc45ea8SJonathan Brassow 	/*
80ccc45ea8SJonathan Brassow 	 * pe_lock protects all pending_exception operations and access
81ccc45ea8SJonathan Brassow 	 * as well as the snapshot_bios list.
82ccc45ea8SJonathan Brassow 	 */
83ccc45ea8SJonathan Brassow 	spinlock_t pe_lock;
84ccc45ea8SJonathan Brassow 
85ccc45ea8SJonathan Brassow 	/* The on disk metadata handler */
86ccc45ea8SJonathan Brassow 	struct dm_exception_store *store;
87ccc45ea8SJonathan Brassow 
88ccc45ea8SJonathan Brassow 	struct dm_kcopyd_client *kcopyd_client;
89ccc45ea8SJonathan Brassow 
90ccc45ea8SJonathan Brassow 	/* Queue of snapshot writes for ksnapd to flush */
91ccc45ea8SJonathan Brassow 	struct bio_list queued_bios;
92ccc45ea8SJonathan Brassow 	struct work_struct queued_bios_work;
93ccc45ea8SJonathan Brassow 
94ccc45ea8SJonathan Brassow 	/* Chunks with outstanding reads */
95ccc45ea8SJonathan Brassow 	mempool_t *tracked_chunk_pool;
96ccc45ea8SJonathan Brassow 	spinlock_t tracked_chunk_lock;
97ccc45ea8SJonathan Brassow 	struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE];
98ccc45ea8SJonathan Brassow };
99ccc45ea8SJonathan Brassow 
100c642f9e0SAdrian Bunk static struct workqueue_struct *ksnapd;
101c4028958SDavid Howells static void flush_queued_bios(struct work_struct *work);
102ca3a931fSAlasdair G Kergon 
103ccc45ea8SJonathan Brassow static sector_t chunk_to_sector(struct dm_exception_store *store,
104ccc45ea8SJonathan Brassow 				chunk_t chunk)
105ccc45ea8SJonathan Brassow {
106ccc45ea8SJonathan Brassow 	return chunk << store->chunk_shift;
107ccc45ea8SJonathan Brassow }
108ccc45ea8SJonathan Brassow 
109ccc45ea8SJonathan Brassow static int bdev_equal(struct block_device *lhs, struct block_device *rhs)
110ccc45ea8SJonathan Brassow {
111ccc45ea8SJonathan Brassow 	/*
112ccc45ea8SJonathan Brassow 	 * There is only ever one instance of a particular block
113ccc45ea8SJonathan Brassow 	 * device so we can compare pointers safely.
114ccc45ea8SJonathan Brassow 	 */
115ccc45ea8SJonathan Brassow 	return lhs == rhs;
116ccc45ea8SJonathan Brassow }
117ccc45ea8SJonathan Brassow 
118028867acSAlasdair G Kergon struct dm_snap_pending_exception {
1191d4989c8SJon Brassow 	struct dm_exception e;
1201da177e4SLinus Torvalds 
1211da177e4SLinus Torvalds 	/*
1221da177e4SLinus Torvalds 	 * Origin buffers waiting for this to complete are held
1231da177e4SLinus Torvalds 	 * in a bio list
1241da177e4SLinus Torvalds 	 */
1251da177e4SLinus Torvalds 	struct bio_list origin_bios;
1261da177e4SLinus Torvalds 	struct bio_list snapshot_bios;
1271da177e4SLinus Torvalds 
1281da177e4SLinus Torvalds 	/*
129eccf0817SAlasdair G Kergon 	 * Short-term queue of pending exceptions prior to submission.
130eccf0817SAlasdair G Kergon 	 */
131eccf0817SAlasdair G Kergon 	struct list_head list;
132eccf0817SAlasdair G Kergon 
133eccf0817SAlasdair G Kergon 	/*
134b4b610f6SAlasdair G Kergon 	 * The primary pending_exception is the one that holds
1354b832e8dSAlasdair G Kergon 	 * the ref_count and the list of origin_bios for a
136b4b610f6SAlasdair G Kergon 	 * group of pending_exceptions.  It is always last to get freed.
137b4b610f6SAlasdair G Kergon 	 * These fields get set up when writing to the origin.
1381da177e4SLinus Torvalds 	 */
139028867acSAlasdair G Kergon 	struct dm_snap_pending_exception *primary_pe;
140b4b610f6SAlasdair G Kergon 
141b4b610f6SAlasdair G Kergon 	/*
142b4b610f6SAlasdair G Kergon 	 * Number of pending_exceptions processing this chunk.
143b4b610f6SAlasdair G Kergon 	 * When this drops to zero we must complete the origin bios.
144b4b610f6SAlasdair G Kergon 	 * If incrementing or decrementing this, hold pe->snap->lock for
145b4b610f6SAlasdair G Kergon 	 * the sibling concerned and not pe->primary_pe->snap->lock unless
146b4b610f6SAlasdair G Kergon 	 * they are the same.
147b4b610f6SAlasdair G Kergon 	 */
1484b832e8dSAlasdair G Kergon 	atomic_t ref_count;
1491da177e4SLinus Torvalds 
1501da177e4SLinus Torvalds 	/* Pointer back to snapshot context */
1511da177e4SLinus Torvalds 	struct dm_snapshot *snap;
1521da177e4SLinus Torvalds 
1531da177e4SLinus Torvalds 	/*
1541da177e4SLinus Torvalds 	 * 1 indicates the exception has already been sent to
1551da177e4SLinus Torvalds 	 * kcopyd.
1561da177e4SLinus Torvalds 	 */
1571da177e4SLinus Torvalds 	int started;
1581da177e4SLinus Torvalds };
1591da177e4SLinus Torvalds 
1601da177e4SLinus Torvalds /*
1611da177e4SLinus Torvalds  * Hash table mapping origin volumes to lists of snapshots and
1621da177e4SLinus Torvalds  * a lock to protect it
1631da177e4SLinus Torvalds  */
164e18b890bSChristoph Lameter static struct kmem_cache *exception_cache;
165e18b890bSChristoph Lameter static struct kmem_cache *pending_cache;
1661da177e4SLinus Torvalds 
167cd45daffSMikulas Patocka struct dm_snap_tracked_chunk {
168cd45daffSMikulas Patocka 	struct hlist_node node;
169cd45daffSMikulas Patocka 	chunk_t chunk;
170cd45daffSMikulas Patocka };
171cd45daffSMikulas Patocka 
172cd45daffSMikulas Patocka static struct kmem_cache *tracked_chunk_cache;
173cd45daffSMikulas Patocka 
174cd45daffSMikulas Patocka static struct dm_snap_tracked_chunk *track_chunk(struct dm_snapshot *s,
175cd45daffSMikulas Patocka 						 chunk_t chunk)
176cd45daffSMikulas Patocka {
177cd45daffSMikulas Patocka 	struct dm_snap_tracked_chunk *c = mempool_alloc(s->tracked_chunk_pool,
178cd45daffSMikulas Patocka 							GFP_NOIO);
179cd45daffSMikulas Patocka 	unsigned long flags;
180cd45daffSMikulas Patocka 
181cd45daffSMikulas Patocka 	c->chunk = chunk;
182cd45daffSMikulas Patocka 
183cd45daffSMikulas Patocka 	spin_lock_irqsave(&s->tracked_chunk_lock, flags);
184cd45daffSMikulas Patocka 	hlist_add_head(&c->node,
185cd45daffSMikulas Patocka 		       &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)]);
186cd45daffSMikulas Patocka 	spin_unlock_irqrestore(&s->tracked_chunk_lock, flags);
187cd45daffSMikulas Patocka 
188cd45daffSMikulas Patocka 	return c;
189cd45daffSMikulas Patocka }
190cd45daffSMikulas Patocka 
191cd45daffSMikulas Patocka static void stop_tracking_chunk(struct dm_snapshot *s,
192cd45daffSMikulas Patocka 				struct dm_snap_tracked_chunk *c)
193cd45daffSMikulas Patocka {
194cd45daffSMikulas Patocka 	unsigned long flags;
195cd45daffSMikulas Patocka 
196cd45daffSMikulas Patocka 	spin_lock_irqsave(&s->tracked_chunk_lock, flags);
197cd45daffSMikulas Patocka 	hlist_del(&c->node);
198cd45daffSMikulas Patocka 	spin_unlock_irqrestore(&s->tracked_chunk_lock, flags);
199cd45daffSMikulas Patocka 
200cd45daffSMikulas Patocka 	mempool_free(c, s->tracked_chunk_pool);
201cd45daffSMikulas Patocka }
202cd45daffSMikulas Patocka 
203a8d41b59SMikulas Patocka static int __chunk_is_tracked(struct dm_snapshot *s, chunk_t chunk)
204a8d41b59SMikulas Patocka {
205a8d41b59SMikulas Patocka 	struct dm_snap_tracked_chunk *c;
206a8d41b59SMikulas Patocka 	struct hlist_node *hn;
207a8d41b59SMikulas Patocka 	int found = 0;
208a8d41b59SMikulas Patocka 
209a8d41b59SMikulas Patocka 	spin_lock_irq(&s->tracked_chunk_lock);
210a8d41b59SMikulas Patocka 
211a8d41b59SMikulas Patocka 	hlist_for_each_entry(c, hn,
212a8d41b59SMikulas Patocka 	    &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)], node) {
213a8d41b59SMikulas Patocka 		if (c->chunk == chunk) {
214a8d41b59SMikulas Patocka 			found = 1;
215a8d41b59SMikulas Patocka 			break;
216a8d41b59SMikulas Patocka 		}
217a8d41b59SMikulas Patocka 	}
218a8d41b59SMikulas Patocka 
219a8d41b59SMikulas Patocka 	spin_unlock_irq(&s->tracked_chunk_lock);
220a8d41b59SMikulas Patocka 
221a8d41b59SMikulas Patocka 	return found;
222a8d41b59SMikulas Patocka }
223a8d41b59SMikulas Patocka 
2241da177e4SLinus Torvalds /*
2251da177e4SLinus Torvalds  * One of these per registered origin, held in the snapshot_origins hash
2261da177e4SLinus Torvalds  */
2271da177e4SLinus Torvalds struct origin {
2281da177e4SLinus Torvalds 	/* The origin device */
2291da177e4SLinus Torvalds 	struct block_device *bdev;
2301da177e4SLinus Torvalds 
2311da177e4SLinus Torvalds 	struct list_head hash_list;
2321da177e4SLinus Torvalds 
2331da177e4SLinus Torvalds 	/* List of snapshots for this origin */
2341da177e4SLinus Torvalds 	struct list_head snapshots;
2351da177e4SLinus Torvalds };
2361da177e4SLinus Torvalds 
2371da177e4SLinus Torvalds /*
2381da177e4SLinus Torvalds  * Size of the hash table for origin volumes. If we make this
2391da177e4SLinus Torvalds  * the size of the minors list then it should be nearly perfect
2401da177e4SLinus Torvalds  */
2411da177e4SLinus Torvalds #define ORIGIN_HASH_SIZE 256
2421da177e4SLinus Torvalds #define ORIGIN_MASK      0xFF
2431da177e4SLinus Torvalds static struct list_head *_origins;
2441da177e4SLinus Torvalds static struct rw_semaphore _origins_lock;
2451da177e4SLinus Torvalds 
2461da177e4SLinus Torvalds static int init_origin_hash(void)
2471da177e4SLinus Torvalds {
2481da177e4SLinus Torvalds 	int i;
2491da177e4SLinus Torvalds 
2501da177e4SLinus Torvalds 	_origins = kmalloc(ORIGIN_HASH_SIZE * sizeof(struct list_head),
2511da177e4SLinus Torvalds 			   GFP_KERNEL);
2521da177e4SLinus Torvalds 	if (!_origins) {
25372d94861SAlasdair G Kergon 		DMERR("unable to allocate memory");
2541da177e4SLinus Torvalds 		return -ENOMEM;
2551da177e4SLinus Torvalds 	}
2561da177e4SLinus Torvalds 
2571da177e4SLinus Torvalds 	for (i = 0; i < ORIGIN_HASH_SIZE; i++)
2581da177e4SLinus Torvalds 		INIT_LIST_HEAD(_origins + i);
2591da177e4SLinus Torvalds 	init_rwsem(&_origins_lock);
2601da177e4SLinus Torvalds 
2611da177e4SLinus Torvalds 	return 0;
2621da177e4SLinus Torvalds }
2631da177e4SLinus Torvalds 
2641da177e4SLinus Torvalds static void exit_origin_hash(void)
2651da177e4SLinus Torvalds {
2661da177e4SLinus Torvalds 	kfree(_origins);
2671da177e4SLinus Torvalds }
2681da177e4SLinus Torvalds 
269028867acSAlasdair G Kergon static unsigned origin_hash(struct block_device *bdev)
2701da177e4SLinus Torvalds {
2711da177e4SLinus Torvalds 	return bdev->bd_dev & ORIGIN_MASK;
2721da177e4SLinus Torvalds }
2731da177e4SLinus Torvalds 
2741da177e4SLinus Torvalds static struct origin *__lookup_origin(struct block_device *origin)
2751da177e4SLinus Torvalds {
2761da177e4SLinus Torvalds 	struct list_head *ol;
2771da177e4SLinus Torvalds 	struct origin *o;
2781da177e4SLinus Torvalds 
2791da177e4SLinus Torvalds 	ol = &_origins[origin_hash(origin)];
2801da177e4SLinus Torvalds 	list_for_each_entry (o, ol, hash_list)
2811da177e4SLinus Torvalds 		if (bdev_equal(o->bdev, origin))
2821da177e4SLinus Torvalds 			return o;
2831da177e4SLinus Torvalds 
2841da177e4SLinus Torvalds 	return NULL;
2851da177e4SLinus Torvalds }
2861da177e4SLinus Torvalds 
2871da177e4SLinus Torvalds static void __insert_origin(struct origin *o)
2881da177e4SLinus Torvalds {
2891da177e4SLinus Torvalds 	struct list_head *sl = &_origins[origin_hash(o->bdev)];
2901da177e4SLinus Torvalds 	list_add_tail(&o->hash_list, sl);
2911da177e4SLinus Torvalds }
2921da177e4SLinus Torvalds 
2931da177e4SLinus Torvalds /*
2941da177e4SLinus Torvalds  * Make a note of the snapshot and its origin so we can look it
2951da177e4SLinus Torvalds  * up when the origin has a write on it.
2961da177e4SLinus Torvalds  */
2971da177e4SLinus Torvalds static int register_snapshot(struct dm_snapshot *snap)
2981da177e4SLinus Torvalds {
2996d45d93eSMikulas Patocka 	struct dm_snapshot *l;
30060c856c8SMikulas Patocka 	struct origin *o, *new_o;
3011da177e4SLinus Torvalds 	struct block_device *bdev = snap->origin->bdev;
3021da177e4SLinus Torvalds 
30360c856c8SMikulas Patocka 	new_o = kmalloc(sizeof(*new_o), GFP_KERNEL);
30460c856c8SMikulas Patocka 	if (!new_o)
30560c856c8SMikulas Patocka 		return -ENOMEM;
30660c856c8SMikulas Patocka 
3071da177e4SLinus Torvalds 	down_write(&_origins_lock);
3081da177e4SLinus Torvalds 	o = __lookup_origin(bdev);
3091da177e4SLinus Torvalds 
31060c856c8SMikulas Patocka 	if (o)
31160c856c8SMikulas Patocka 		kfree(new_o);
31260c856c8SMikulas Patocka 	else {
3131da177e4SLinus Torvalds 		/* New origin */
31460c856c8SMikulas Patocka 		o = new_o;
3151da177e4SLinus Torvalds 
3161da177e4SLinus Torvalds 		/* Initialise the struct */
3171da177e4SLinus Torvalds 		INIT_LIST_HEAD(&o->snapshots);
3181da177e4SLinus Torvalds 		o->bdev = bdev;
3191da177e4SLinus Torvalds 
3201da177e4SLinus Torvalds 		__insert_origin(o);
3211da177e4SLinus Torvalds 	}
3221da177e4SLinus Torvalds 
3236d45d93eSMikulas Patocka 	/* Sort the list according to chunk size, largest-first smallest-last */
3246d45d93eSMikulas Patocka 	list_for_each_entry(l, &o->snapshots, list)
3256d45d93eSMikulas Patocka 		if (l->store->chunk_size < snap->store->chunk_size)
3266d45d93eSMikulas Patocka 			break;
3276d45d93eSMikulas Patocka 	list_add_tail(&snap->list, &l->list);
3281da177e4SLinus Torvalds 
3291da177e4SLinus Torvalds 	up_write(&_origins_lock);
3301da177e4SLinus Torvalds 	return 0;
3311da177e4SLinus Torvalds }
3321da177e4SLinus Torvalds 
3331da177e4SLinus Torvalds static void unregister_snapshot(struct dm_snapshot *s)
3341da177e4SLinus Torvalds {
3351da177e4SLinus Torvalds 	struct origin *o;
3361da177e4SLinus Torvalds 
3371da177e4SLinus Torvalds 	down_write(&_origins_lock);
3381da177e4SLinus Torvalds 	o = __lookup_origin(s->origin->bdev);
3391da177e4SLinus Torvalds 
3401da177e4SLinus Torvalds 	list_del(&s->list);
3411da177e4SLinus Torvalds 	if (list_empty(&o->snapshots)) {
3421da177e4SLinus Torvalds 		list_del(&o->hash_list);
3431da177e4SLinus Torvalds 		kfree(o);
3441da177e4SLinus Torvalds 	}
3451da177e4SLinus Torvalds 
3461da177e4SLinus Torvalds 	up_write(&_origins_lock);
3471da177e4SLinus Torvalds }
3481da177e4SLinus Torvalds 
3491da177e4SLinus Torvalds /*
3501da177e4SLinus Torvalds  * Implementation of the exception hash tables.
351d74f81f8SMilan Broz  * The lowest hash_shift bits of the chunk number are ignored, allowing
352d74f81f8SMilan Broz  * some consecutive chunks to be grouped together.
3531da177e4SLinus Torvalds  */
354191437a5SJon Brassow static int init_exception_table(struct dm_exception_table *et, uint32_t size,
355d74f81f8SMilan Broz 				unsigned hash_shift)
3561da177e4SLinus Torvalds {
3571da177e4SLinus Torvalds 	unsigned int i;
3581da177e4SLinus Torvalds 
359d74f81f8SMilan Broz 	et->hash_shift = hash_shift;
3601da177e4SLinus Torvalds 	et->hash_mask = size - 1;
3611da177e4SLinus Torvalds 	et->table = dm_vcalloc(size, sizeof(struct list_head));
3621da177e4SLinus Torvalds 	if (!et->table)
3631da177e4SLinus Torvalds 		return -ENOMEM;
3641da177e4SLinus Torvalds 
3651da177e4SLinus Torvalds 	for (i = 0; i < size; i++)
3661da177e4SLinus Torvalds 		INIT_LIST_HEAD(et->table + i);
3671da177e4SLinus Torvalds 
3681da177e4SLinus Torvalds 	return 0;
3691da177e4SLinus Torvalds }
3701da177e4SLinus Torvalds 
371191437a5SJon Brassow static void exit_exception_table(struct dm_exception_table *et,
372191437a5SJon Brassow 				 struct kmem_cache *mem)
3731da177e4SLinus Torvalds {
3741da177e4SLinus Torvalds 	struct list_head *slot;
3751d4989c8SJon Brassow 	struct dm_exception *ex, *next;
3761da177e4SLinus Torvalds 	int i, size;
3771da177e4SLinus Torvalds 
3781da177e4SLinus Torvalds 	size = et->hash_mask + 1;
3791da177e4SLinus Torvalds 	for (i = 0; i < size; i++) {
3801da177e4SLinus Torvalds 		slot = et->table + i;
3811da177e4SLinus Torvalds 
3821da177e4SLinus Torvalds 		list_for_each_entry_safe (ex, next, slot, hash_list)
3831da177e4SLinus Torvalds 			kmem_cache_free(mem, ex);
3841da177e4SLinus Torvalds 	}
3851da177e4SLinus Torvalds 
3861da177e4SLinus Torvalds 	vfree(et->table);
3871da177e4SLinus Torvalds }
3881da177e4SLinus Torvalds 
389191437a5SJon Brassow static uint32_t exception_hash(struct dm_exception_table *et, chunk_t chunk)
3901da177e4SLinus Torvalds {
391d74f81f8SMilan Broz 	return (chunk >> et->hash_shift) & et->hash_mask;
3921da177e4SLinus Torvalds }
3931da177e4SLinus Torvalds 
3941d4989c8SJon Brassow static void remove_exception(struct dm_exception *e)
3951da177e4SLinus Torvalds {
3961da177e4SLinus Torvalds 	list_del(&e->hash_list);
3971da177e4SLinus Torvalds }
3981da177e4SLinus Torvalds 
3991da177e4SLinus Torvalds /*
4001da177e4SLinus Torvalds  * Return the exception data for a sector, or NULL if not
4011da177e4SLinus Torvalds  * remapped.
4021da177e4SLinus Torvalds  */
403191437a5SJon Brassow static struct dm_exception *lookup_exception(struct dm_exception_table *et,
4041da177e4SLinus Torvalds 						  chunk_t chunk)
4051da177e4SLinus Torvalds {
4061da177e4SLinus Torvalds 	struct list_head *slot;
4071d4989c8SJon Brassow 	struct dm_exception *e;
4081da177e4SLinus Torvalds 
4091da177e4SLinus Torvalds 	slot = &et->table[exception_hash(et, chunk)];
4101da177e4SLinus Torvalds 	list_for_each_entry (e, slot, hash_list)
411d74f81f8SMilan Broz 		if (chunk >= e->old_chunk &&
412d74f81f8SMilan Broz 		    chunk <= e->old_chunk + dm_consecutive_chunk_count(e))
4131da177e4SLinus Torvalds 			return e;
4141da177e4SLinus Torvalds 
4151da177e4SLinus Torvalds 	return NULL;
4161da177e4SLinus Torvalds }
4171da177e4SLinus Torvalds 
4181d4989c8SJon Brassow static struct dm_exception *alloc_exception(void)
4191da177e4SLinus Torvalds {
4201d4989c8SJon Brassow 	struct dm_exception *e;
4211da177e4SLinus Torvalds 
4221da177e4SLinus Torvalds 	e = kmem_cache_alloc(exception_cache, GFP_NOIO);
4231da177e4SLinus Torvalds 	if (!e)
4241da177e4SLinus Torvalds 		e = kmem_cache_alloc(exception_cache, GFP_ATOMIC);
4251da177e4SLinus Torvalds 
4261da177e4SLinus Torvalds 	return e;
4271da177e4SLinus Torvalds }
4281da177e4SLinus Torvalds 
4291d4989c8SJon Brassow static void free_exception(struct dm_exception *e)
4301da177e4SLinus Torvalds {
4311da177e4SLinus Torvalds 	kmem_cache_free(exception_cache, e);
4321da177e4SLinus Torvalds }
4331da177e4SLinus Torvalds 
43492e86812SMikulas Patocka static struct dm_snap_pending_exception *alloc_pending_exception(struct dm_snapshot *s)
4351da177e4SLinus Torvalds {
43692e86812SMikulas Patocka 	struct dm_snap_pending_exception *pe = mempool_alloc(s->pending_pool,
43792e86812SMikulas Patocka 							     GFP_NOIO);
43892e86812SMikulas Patocka 
439879129d2SMikulas Patocka 	atomic_inc(&s->pending_exceptions_count);
44092e86812SMikulas Patocka 	pe->snap = s;
44192e86812SMikulas Patocka 
44292e86812SMikulas Patocka 	return pe;
4431da177e4SLinus Torvalds }
4441da177e4SLinus Torvalds 
445028867acSAlasdair G Kergon static void free_pending_exception(struct dm_snap_pending_exception *pe)
4461da177e4SLinus Torvalds {
447879129d2SMikulas Patocka 	struct dm_snapshot *s = pe->snap;
448879129d2SMikulas Patocka 
449879129d2SMikulas Patocka 	mempool_free(pe, s->pending_pool);
450879129d2SMikulas Patocka 	smp_mb__before_atomic_dec();
451879129d2SMikulas Patocka 	atomic_dec(&s->pending_exceptions_count);
4521da177e4SLinus Torvalds }
4531da177e4SLinus Torvalds 
454191437a5SJon Brassow static void insert_exception(struct dm_exception_table *eh,
4551d4989c8SJon Brassow 			     struct dm_exception *new_e)
456d74f81f8SMilan Broz {
457d74f81f8SMilan Broz 	struct list_head *l;
4581d4989c8SJon Brassow 	struct dm_exception *e = NULL;
459d74f81f8SMilan Broz 
460d74f81f8SMilan Broz 	l = &eh->table[exception_hash(eh, new_e->old_chunk)];
461d74f81f8SMilan Broz 
462d74f81f8SMilan Broz 	/* Add immediately if this table doesn't support consecutive chunks */
463d74f81f8SMilan Broz 	if (!eh->hash_shift)
464d74f81f8SMilan Broz 		goto out;
465d74f81f8SMilan Broz 
466d74f81f8SMilan Broz 	/* List is ordered by old_chunk */
467d74f81f8SMilan Broz 	list_for_each_entry_reverse(e, l, hash_list) {
468d74f81f8SMilan Broz 		/* Insert after an existing chunk? */
469d74f81f8SMilan Broz 		if (new_e->old_chunk == (e->old_chunk +
470d74f81f8SMilan Broz 					 dm_consecutive_chunk_count(e) + 1) &&
471d74f81f8SMilan Broz 		    new_e->new_chunk == (dm_chunk_number(e->new_chunk) +
472d74f81f8SMilan Broz 					 dm_consecutive_chunk_count(e) + 1)) {
473d74f81f8SMilan Broz 			dm_consecutive_chunk_count_inc(e);
474d74f81f8SMilan Broz 			free_exception(new_e);
475d74f81f8SMilan Broz 			return;
476d74f81f8SMilan Broz 		}
477d74f81f8SMilan Broz 
478d74f81f8SMilan Broz 		/* Insert before an existing chunk? */
479d74f81f8SMilan Broz 		if (new_e->old_chunk == (e->old_chunk - 1) &&
480d74f81f8SMilan Broz 		    new_e->new_chunk == (dm_chunk_number(e->new_chunk) - 1)) {
481d74f81f8SMilan Broz 			dm_consecutive_chunk_count_inc(e);
482d74f81f8SMilan Broz 			e->old_chunk--;
483d74f81f8SMilan Broz 			e->new_chunk--;
484d74f81f8SMilan Broz 			free_exception(new_e);
485d74f81f8SMilan Broz 			return;
486d74f81f8SMilan Broz 		}
487d74f81f8SMilan Broz 
488d74f81f8SMilan Broz 		if (new_e->old_chunk > e->old_chunk)
489d74f81f8SMilan Broz 			break;
490d74f81f8SMilan Broz 	}
491d74f81f8SMilan Broz 
492d74f81f8SMilan Broz out:
493d74f81f8SMilan Broz 	list_add(&new_e->hash_list, e ? &e->hash_list : l);
494d74f81f8SMilan Broz }
495d74f81f8SMilan Broz 
496a159c1acSJonathan Brassow /*
497a159c1acSJonathan Brassow  * Callback used by the exception stores to load exceptions when
498a159c1acSJonathan Brassow  * initialising.
499a159c1acSJonathan Brassow  */
500a159c1acSJonathan Brassow static int dm_add_exception(void *context, chunk_t old, chunk_t new)
5011da177e4SLinus Torvalds {
502a159c1acSJonathan Brassow 	struct dm_snapshot *s = context;
5031d4989c8SJon Brassow 	struct dm_exception *e;
5041da177e4SLinus Torvalds 
5051da177e4SLinus Torvalds 	e = alloc_exception();
5061da177e4SLinus Torvalds 	if (!e)
5071da177e4SLinus Torvalds 		return -ENOMEM;
5081da177e4SLinus Torvalds 
5091da177e4SLinus Torvalds 	e->old_chunk = old;
510d74f81f8SMilan Broz 
511d74f81f8SMilan Broz 	/* Consecutive_count is implicitly initialised to zero */
5121da177e4SLinus Torvalds 	e->new_chunk = new;
513d74f81f8SMilan Broz 
514d32a6ea6SJon Brassow 	insert_exception(&s->complete, e);
515d74f81f8SMilan Broz 
5161da177e4SLinus Torvalds 	return 0;
5171da177e4SLinus Torvalds }
5181da177e4SLinus Torvalds 
5197e201b35SMikulas Patocka #define min_not_zero(l, r) (((l) == 0) ? (r) : (((r) == 0) ? (l) : min(l, r)))
5207e201b35SMikulas Patocka 
5217e201b35SMikulas Patocka /*
5227e201b35SMikulas Patocka  * Return a minimum chunk size of all snapshots that have the specified origin.
5237e201b35SMikulas Patocka  * Return zero if the origin has no snapshots.
5247e201b35SMikulas Patocka  */
5257e201b35SMikulas Patocka static sector_t __minimum_chunk_size(struct origin *o)
5267e201b35SMikulas Patocka {
5277e201b35SMikulas Patocka 	struct dm_snapshot *snap;
5287e201b35SMikulas Patocka 	unsigned chunk_size = 0;
5297e201b35SMikulas Patocka 
5307e201b35SMikulas Patocka 	if (o)
5317e201b35SMikulas Patocka 		list_for_each_entry(snap, &o->snapshots, list)
5327e201b35SMikulas Patocka 			chunk_size = min_not_zero(chunk_size,
5337e201b35SMikulas Patocka 						  snap->store->chunk_size);
5347e201b35SMikulas Patocka 
5357e201b35SMikulas Patocka 	return chunk_size;
5367e201b35SMikulas Patocka }
5377e201b35SMikulas Patocka 
5381da177e4SLinus Torvalds /*
5391da177e4SLinus Torvalds  * Hard coded magic.
5401da177e4SLinus Torvalds  */
5411da177e4SLinus Torvalds static int calc_max_buckets(void)
5421da177e4SLinus Torvalds {
5431da177e4SLinus Torvalds 	/* use a fixed size of 2MB */
5441da177e4SLinus Torvalds 	unsigned long mem = 2 * 1024 * 1024;
5451da177e4SLinus Torvalds 	mem /= sizeof(struct list_head);
5461da177e4SLinus Torvalds 
5471da177e4SLinus Torvalds 	return mem;
5481da177e4SLinus Torvalds }
5491da177e4SLinus Torvalds 
5501da177e4SLinus Torvalds /*
5511da177e4SLinus Torvalds  * Allocate room for a suitable hash table.
5521da177e4SLinus Torvalds  */
553fee1998eSJonathan Brassow static int init_hash_tables(struct dm_snapshot *s)
5541da177e4SLinus Torvalds {
5551da177e4SLinus Torvalds 	sector_t hash_size, cow_dev_size, origin_dev_size, max_buckets;
5561da177e4SLinus Torvalds 
5571da177e4SLinus Torvalds 	/*
5581da177e4SLinus Torvalds 	 * Calculate based on the size of the original volume or
5591da177e4SLinus Torvalds 	 * the COW volume...
5601da177e4SLinus Torvalds 	 */
561fee1998eSJonathan Brassow 	cow_dev_size = get_dev_size(s->store->cow->bdev);
5621da177e4SLinus Torvalds 	origin_dev_size = get_dev_size(s->origin->bdev);
5631da177e4SLinus Torvalds 	max_buckets = calc_max_buckets();
5641da177e4SLinus Torvalds 
565fee1998eSJonathan Brassow 	hash_size = min(origin_dev_size, cow_dev_size) >> s->store->chunk_shift;
5661da177e4SLinus Torvalds 	hash_size = min(hash_size, max_buckets);
5671da177e4SLinus Torvalds 
5688e87b9b8SMikulas Patocka 	if (hash_size < 64)
5698e87b9b8SMikulas Patocka 		hash_size = 64;
5708defd830SRobert P. J. Day 	hash_size = rounddown_pow_of_two(hash_size);
571d74f81f8SMilan Broz 	if (init_exception_table(&s->complete, hash_size,
572d74f81f8SMilan Broz 				 DM_CHUNK_CONSECUTIVE_BITS))
5731da177e4SLinus Torvalds 		return -ENOMEM;
5741da177e4SLinus Torvalds 
5751da177e4SLinus Torvalds 	/*
5761da177e4SLinus Torvalds 	 * Allocate hash table for in-flight exceptions
5771da177e4SLinus Torvalds 	 * Make this smaller than the real hash table
5781da177e4SLinus Torvalds 	 */
5791da177e4SLinus Torvalds 	hash_size >>= 3;
5801da177e4SLinus Torvalds 	if (hash_size < 64)
5811da177e4SLinus Torvalds 		hash_size = 64;
5821da177e4SLinus Torvalds 
583d74f81f8SMilan Broz 	if (init_exception_table(&s->pending, hash_size, 0)) {
5841da177e4SLinus Torvalds 		exit_exception_table(&s->complete, exception_cache);
5851da177e4SLinus Torvalds 		return -ENOMEM;
5861da177e4SLinus Torvalds 	}
5871da177e4SLinus Torvalds 
5881da177e4SLinus Torvalds 	return 0;
5891da177e4SLinus Torvalds }
5901da177e4SLinus Torvalds 
5911da177e4SLinus Torvalds /*
5921da177e4SLinus Torvalds  * Construct a snapshot mapping: <origin_dev> <COW-dev> <p/n> <chunk-size>
5931da177e4SLinus Torvalds  */
5941da177e4SLinus Torvalds static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
5951da177e4SLinus Torvalds {
5961da177e4SLinus Torvalds 	struct dm_snapshot *s;
597cd45daffSMikulas Patocka 	int i;
5981da177e4SLinus Torvalds 	int r = -EINVAL;
5991da177e4SLinus Torvalds 	char *origin_path;
600fee1998eSJonathan Brassow 	struct dm_exception_store *store;
601fee1998eSJonathan Brassow 	unsigned args_used;
6021da177e4SLinus Torvalds 
6034c7e3bf4SMark McLoughlin 	if (argc != 4) {
60472d94861SAlasdair G Kergon 		ti->error = "requires exactly 4 arguments";
6051da177e4SLinus Torvalds 		r = -EINVAL;
606fee1998eSJonathan Brassow 		goto bad_args;
6071da177e4SLinus Torvalds 	}
6081da177e4SLinus Torvalds 
6091da177e4SLinus Torvalds 	origin_path = argv[0];
610fee1998eSJonathan Brassow 	argv++;
611fee1998eSJonathan Brassow 	argc--;
6121da177e4SLinus Torvalds 
613fee1998eSJonathan Brassow 	r = dm_exception_store_create(ti, argc, argv, &args_used, &store);
614fee1998eSJonathan Brassow 	if (r) {
615fee1998eSJonathan Brassow 		ti->error = "Couldn't create exception store";
6161da177e4SLinus Torvalds 		r = -EINVAL;
617fee1998eSJonathan Brassow 		goto bad_args;
6181da177e4SLinus Torvalds 	}
6191da177e4SLinus Torvalds 
620fee1998eSJonathan Brassow 	argv += args_used;
621fee1998eSJonathan Brassow 	argc -= args_used;
622fee1998eSJonathan Brassow 
6231da177e4SLinus Torvalds 	s = kmalloc(sizeof(*s), GFP_KERNEL);
624fee1998eSJonathan Brassow 	if (!s) {
6251da177e4SLinus Torvalds 		ti->error = "Cannot allocate snapshot context private "
6261da177e4SLinus Torvalds 		    "structure";
6271da177e4SLinus Torvalds 		r = -ENOMEM;
628fee1998eSJonathan Brassow 		goto bad_snap;
6291da177e4SLinus Torvalds 	}
6301da177e4SLinus Torvalds 
6311da177e4SLinus Torvalds 	r = dm_get_device(ti, origin_path, 0, ti->len, FMODE_READ, &s->origin);
6321da177e4SLinus Torvalds 	if (r) {
6331da177e4SLinus Torvalds 		ti->error = "Cannot get origin device";
634fee1998eSJonathan Brassow 		goto bad_origin;
6351da177e4SLinus Torvalds 	}
6361da177e4SLinus Torvalds 
637fee1998eSJonathan Brassow 	s->store = store;
6381da177e4SLinus Torvalds 	s->valid = 1;
639aa14edebSAlasdair G Kergon 	s->active = 0;
640879129d2SMikulas Patocka 	atomic_set(&s->pending_exceptions_count, 0);
6411da177e4SLinus Torvalds 	init_rwsem(&s->lock);
642ca3a931fSAlasdair G Kergon 	spin_lock_init(&s->pe_lock);
6431da177e4SLinus Torvalds 
6441da177e4SLinus Torvalds 	/* Allocate hash table for COW data */
645fee1998eSJonathan Brassow 	if (init_hash_tables(s)) {
6461da177e4SLinus Torvalds 		ti->error = "Unable to allocate hash table space";
6471da177e4SLinus Torvalds 		r = -ENOMEM;
648fee1998eSJonathan Brassow 		goto bad_hash_tables;
6491da177e4SLinus Torvalds 	}
6501da177e4SLinus Torvalds 
651eb69aca5SHeinz Mauelshagen 	r = dm_kcopyd_client_create(SNAPSHOT_PAGES, &s->kcopyd_client);
6521da177e4SLinus Torvalds 	if (r) {
6531da177e4SLinus Torvalds 		ti->error = "Could not create kcopyd client";
654fee1998eSJonathan Brassow 		goto bad_kcopyd;
6551da177e4SLinus Torvalds 	}
6561da177e4SLinus Torvalds 
65792e86812SMikulas Patocka 	s->pending_pool = mempool_create_slab_pool(MIN_IOS, pending_cache);
65892e86812SMikulas Patocka 	if (!s->pending_pool) {
65992e86812SMikulas Patocka 		ti->error = "Could not allocate mempool for pending exceptions";
660fee1998eSJonathan Brassow 		goto bad_pending_pool;
66192e86812SMikulas Patocka 	}
66292e86812SMikulas Patocka 
663cd45daffSMikulas Patocka 	s->tracked_chunk_pool = mempool_create_slab_pool(MIN_IOS,
664cd45daffSMikulas Patocka 							 tracked_chunk_cache);
665cd45daffSMikulas Patocka 	if (!s->tracked_chunk_pool) {
666cd45daffSMikulas Patocka 		ti->error = "Could not allocate tracked_chunk mempool for "
667cd45daffSMikulas Patocka 			    "tracking reads";
66892e86812SMikulas Patocka 		goto bad_tracked_chunk_pool;
669cd45daffSMikulas Patocka 	}
670cd45daffSMikulas Patocka 
671cd45daffSMikulas Patocka 	for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++)
672cd45daffSMikulas Patocka 		INIT_HLIST_HEAD(&s->tracked_chunk_hash[i]);
673cd45daffSMikulas Patocka 
674cd45daffSMikulas Patocka 	spin_lock_init(&s->tracked_chunk_lock);
675cd45daffSMikulas Patocka 
676aa14edebSAlasdair G Kergon 	/* Metadata must only be loaded into one table at once */
677493df71cSJonathan Brassow 	r = s->store->type->read_metadata(s->store, dm_add_exception,
678493df71cSJonathan Brassow 					  (void *)s);
6790764147bSMilan Broz 	if (r < 0) {
680f9cea4f7SMark McLoughlin 		ti->error = "Failed to read snapshot metadata";
681cd45daffSMikulas Patocka 		goto bad_load_and_register;
6820764147bSMilan Broz 	} else if (r > 0) {
6830764147bSMilan Broz 		s->valid = 0;
6840764147bSMilan Broz 		DMWARN("Snapshot is marked invalid.");
685f9cea4f7SMark McLoughlin 	}
686aa14edebSAlasdair G Kergon 
687ca3a931fSAlasdair G Kergon 	bio_list_init(&s->queued_bios);
688c4028958SDavid Howells 	INIT_WORK(&s->queued_bios_work, flush_queued_bios);
689ca3a931fSAlasdair G Kergon 
6903f2412dcSMikulas Patocka 	if (!s->store->chunk_size) {
6913f2412dcSMikulas Patocka 		ti->error = "Chunk size not set";
6923f2412dcSMikulas Patocka 		goto bad_load_and_register;
6933f2412dcSMikulas Patocka 	}
6943f2412dcSMikulas Patocka 
6951da177e4SLinus Torvalds 	/* Add snapshot to the list of snapshots for this origin */
696aa14edebSAlasdair G Kergon 	/* Exceptions aren't triggered till snapshot_resume() is called */
6971da177e4SLinus Torvalds 	if (register_snapshot(s)) {
6981da177e4SLinus Torvalds 		r = -EINVAL;
6991da177e4SLinus Torvalds 		ti->error = "Cannot register snapshot origin";
700cd45daffSMikulas Patocka 		goto bad_load_and_register;
7011da177e4SLinus Torvalds 	}
7021da177e4SLinus Torvalds 
7031da177e4SLinus Torvalds 	ti->private = s;
704d0216849SJonathan Brassow 	ti->split_io = s->store->chunk_size;
705494b3ee7SMikulas Patocka 	ti->num_flush_requests = 1;
7061da177e4SLinus Torvalds 
7071da177e4SLinus Torvalds 	return 0;
7081da177e4SLinus Torvalds 
709cd45daffSMikulas Patocka bad_load_and_register:
710cd45daffSMikulas Patocka 	mempool_destroy(s->tracked_chunk_pool);
711cd45daffSMikulas Patocka 
71292e86812SMikulas Patocka bad_tracked_chunk_pool:
71392e86812SMikulas Patocka 	mempool_destroy(s->pending_pool);
71492e86812SMikulas Patocka 
715fee1998eSJonathan Brassow bad_pending_pool:
716eb69aca5SHeinz Mauelshagen 	dm_kcopyd_client_destroy(s->kcopyd_client);
7171da177e4SLinus Torvalds 
718fee1998eSJonathan Brassow bad_kcopyd:
7191da177e4SLinus Torvalds 	exit_exception_table(&s->pending, pending_cache);
7201da177e4SLinus Torvalds 	exit_exception_table(&s->complete, exception_cache);
7211da177e4SLinus Torvalds 
722fee1998eSJonathan Brassow bad_hash_tables:
7231da177e4SLinus Torvalds 	dm_put_device(ti, s->origin);
7241da177e4SLinus Torvalds 
725fee1998eSJonathan Brassow bad_origin:
7261da177e4SLinus Torvalds 	kfree(s);
7271da177e4SLinus Torvalds 
728fee1998eSJonathan Brassow bad_snap:
729fee1998eSJonathan Brassow 	dm_exception_store_destroy(store);
730fee1998eSJonathan Brassow 
731fee1998eSJonathan Brassow bad_args:
7321da177e4SLinus Torvalds 	return r;
7331da177e4SLinus Torvalds }
7341da177e4SLinus Torvalds 
73531c93a0cSMilan Broz static void __free_exceptions(struct dm_snapshot *s)
73631c93a0cSMilan Broz {
737eb69aca5SHeinz Mauelshagen 	dm_kcopyd_client_destroy(s->kcopyd_client);
73831c93a0cSMilan Broz 	s->kcopyd_client = NULL;
73931c93a0cSMilan Broz 
74031c93a0cSMilan Broz 	exit_exception_table(&s->pending, pending_cache);
74131c93a0cSMilan Broz 	exit_exception_table(&s->complete, exception_cache);
74231c93a0cSMilan Broz }
74331c93a0cSMilan Broz 
7441da177e4SLinus Torvalds static void snapshot_dtr(struct dm_target *ti)
7451da177e4SLinus Torvalds {
746cd45daffSMikulas Patocka #ifdef CONFIG_DM_DEBUG
747cd45daffSMikulas Patocka 	int i;
748cd45daffSMikulas Patocka #endif
749028867acSAlasdair G Kergon 	struct dm_snapshot *s = ti->private;
7501da177e4SLinus Torvalds 
751ca3a931fSAlasdair G Kergon 	flush_workqueue(ksnapd);
752ca3a931fSAlasdair G Kergon 
753138728dcSAlasdair G Kergon 	/* Prevent further origin writes from using this snapshot. */
754138728dcSAlasdair G Kergon 	/* After this returns there can be no new kcopyd jobs. */
7551da177e4SLinus Torvalds 	unregister_snapshot(s);
7561da177e4SLinus Torvalds 
757879129d2SMikulas Patocka 	while (atomic_read(&s->pending_exceptions_count))
75890fa1527SMikulas Patocka 		msleep(1);
759879129d2SMikulas Patocka 	/*
760879129d2SMikulas Patocka 	 * Ensure instructions in mempool_destroy aren't reordered
761879129d2SMikulas Patocka 	 * before atomic_read.
762879129d2SMikulas Patocka 	 */
763879129d2SMikulas Patocka 	smp_mb();
764879129d2SMikulas Patocka 
765cd45daffSMikulas Patocka #ifdef CONFIG_DM_DEBUG
766cd45daffSMikulas Patocka 	for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++)
767cd45daffSMikulas Patocka 		BUG_ON(!hlist_empty(&s->tracked_chunk_hash[i]));
768cd45daffSMikulas Patocka #endif
769cd45daffSMikulas Patocka 
770cd45daffSMikulas Patocka 	mempool_destroy(s->tracked_chunk_pool);
771cd45daffSMikulas Patocka 
77231c93a0cSMilan Broz 	__free_exceptions(s);
7731da177e4SLinus Torvalds 
77492e86812SMikulas Patocka 	mempool_destroy(s->pending_pool);
77592e86812SMikulas Patocka 
7761da177e4SLinus Torvalds 	dm_put_device(ti, s->origin);
777fee1998eSJonathan Brassow 
778fee1998eSJonathan Brassow 	dm_exception_store_destroy(s->store);
779138728dcSAlasdair G Kergon 
7801da177e4SLinus Torvalds 	kfree(s);
7811da177e4SLinus Torvalds }
7821da177e4SLinus Torvalds 
7831da177e4SLinus Torvalds /*
7841da177e4SLinus Torvalds  * Flush a list of buffers.
7851da177e4SLinus Torvalds  */
7861da177e4SLinus Torvalds static void flush_bios(struct bio *bio)
7871da177e4SLinus Torvalds {
7881da177e4SLinus Torvalds 	struct bio *n;
7891da177e4SLinus Torvalds 
7901da177e4SLinus Torvalds 	while (bio) {
7911da177e4SLinus Torvalds 		n = bio->bi_next;
7921da177e4SLinus Torvalds 		bio->bi_next = NULL;
7931da177e4SLinus Torvalds 		generic_make_request(bio);
7941da177e4SLinus Torvalds 		bio = n;
7951da177e4SLinus Torvalds 	}
7961da177e4SLinus Torvalds }
7971da177e4SLinus Torvalds 
798c4028958SDavid Howells static void flush_queued_bios(struct work_struct *work)
799ca3a931fSAlasdair G Kergon {
800c4028958SDavid Howells 	struct dm_snapshot *s =
801c4028958SDavid Howells 		container_of(work, struct dm_snapshot, queued_bios_work);
802ca3a931fSAlasdair G Kergon 	struct bio *queued_bios;
803ca3a931fSAlasdair G Kergon 	unsigned long flags;
804ca3a931fSAlasdair G Kergon 
805ca3a931fSAlasdair G Kergon 	spin_lock_irqsave(&s->pe_lock, flags);
806ca3a931fSAlasdair G Kergon 	queued_bios = bio_list_get(&s->queued_bios);
807ca3a931fSAlasdair G Kergon 	spin_unlock_irqrestore(&s->pe_lock, flags);
808ca3a931fSAlasdair G Kergon 
809ca3a931fSAlasdair G Kergon 	flush_bios(queued_bios);
810ca3a931fSAlasdair G Kergon }
811ca3a931fSAlasdair G Kergon 
8121da177e4SLinus Torvalds /*
8131da177e4SLinus Torvalds  * Error a list of buffers.
8141da177e4SLinus Torvalds  */
8151da177e4SLinus Torvalds static void error_bios(struct bio *bio)
8161da177e4SLinus Torvalds {
8171da177e4SLinus Torvalds 	struct bio *n;
8181da177e4SLinus Torvalds 
8191da177e4SLinus Torvalds 	while (bio) {
8201da177e4SLinus Torvalds 		n = bio->bi_next;
8211da177e4SLinus Torvalds 		bio->bi_next = NULL;
8226712ecf8SNeilBrown 		bio_io_error(bio);
8231da177e4SLinus Torvalds 		bio = n;
8241da177e4SLinus Torvalds 	}
8251da177e4SLinus Torvalds }
8261da177e4SLinus Torvalds 
827695368acSAlasdair G Kergon static void __invalidate_snapshot(struct dm_snapshot *s, int err)
82876df1c65SAlasdair G Kergon {
82976df1c65SAlasdair G Kergon 	if (!s->valid)
83076df1c65SAlasdair G Kergon 		return;
83176df1c65SAlasdair G Kergon 
83276df1c65SAlasdair G Kergon 	if (err == -EIO)
83376df1c65SAlasdair G Kergon 		DMERR("Invalidating snapshot: Error reading/writing.");
83476df1c65SAlasdair G Kergon 	else if (err == -ENOMEM)
83576df1c65SAlasdair G Kergon 		DMERR("Invalidating snapshot: Unable to allocate exception.");
83676df1c65SAlasdair G Kergon 
837493df71cSJonathan Brassow 	if (s->store->type->drop_snapshot)
838493df71cSJonathan Brassow 		s->store->type->drop_snapshot(s->store);
83976df1c65SAlasdair G Kergon 
84076df1c65SAlasdair G Kergon 	s->valid = 0;
84176df1c65SAlasdair G Kergon 
8420cea9c78SJonathan Brassow 	dm_table_event(s->store->ti->table);
84376df1c65SAlasdair G Kergon }
84476df1c65SAlasdair G Kergon 
845028867acSAlasdair G Kergon static void get_pending_exception(struct dm_snap_pending_exception *pe)
8464b832e8dSAlasdair G Kergon {
8474b832e8dSAlasdair G Kergon 	atomic_inc(&pe->ref_count);
8484b832e8dSAlasdair G Kergon }
8494b832e8dSAlasdair G Kergon 
850028867acSAlasdair G Kergon static struct bio *put_pending_exception(struct dm_snap_pending_exception *pe)
8514b832e8dSAlasdair G Kergon {
852028867acSAlasdair G Kergon 	struct dm_snap_pending_exception *primary_pe;
8534b832e8dSAlasdair G Kergon 	struct bio *origin_bios = NULL;
8544b832e8dSAlasdair G Kergon 
8554b832e8dSAlasdair G Kergon 	primary_pe = pe->primary_pe;
8564b832e8dSAlasdair G Kergon 
8574b832e8dSAlasdair G Kergon 	/*
8584b832e8dSAlasdair G Kergon 	 * If this pe is involved in a write to the origin and
8594b832e8dSAlasdair G Kergon 	 * it is the last sibling to complete then release
8604b832e8dSAlasdair G Kergon 	 * the bios for the original write to the origin.
8614b832e8dSAlasdair G Kergon 	 */
8624b832e8dSAlasdair G Kergon 	if (primary_pe &&
8637c5f78b9SMikulas Patocka 	    atomic_dec_and_test(&primary_pe->ref_count)) {
8644b832e8dSAlasdair G Kergon 		origin_bios = bio_list_get(&primary_pe->origin_bios);
8657c5f78b9SMikulas Patocka 		free_pending_exception(primary_pe);
8667c5f78b9SMikulas Patocka 	}
8674b832e8dSAlasdair G Kergon 
8684b832e8dSAlasdair G Kergon 	/*
8694b832e8dSAlasdair G Kergon 	 * Free the pe if it's not linked to an origin write or if
8704b832e8dSAlasdair G Kergon 	 * it's not itself a primary pe.
8714b832e8dSAlasdair G Kergon 	 */
8724b832e8dSAlasdair G Kergon 	if (!primary_pe || primary_pe != pe)
8734b832e8dSAlasdair G Kergon 		free_pending_exception(pe);
8744b832e8dSAlasdair G Kergon 
8754b832e8dSAlasdair G Kergon 	return origin_bios;
8764b832e8dSAlasdair G Kergon }
8774b832e8dSAlasdair G Kergon 
878028867acSAlasdair G Kergon static void pending_complete(struct dm_snap_pending_exception *pe, int success)
8791da177e4SLinus Torvalds {
8801d4989c8SJon Brassow 	struct dm_exception *e;
8811da177e4SLinus Torvalds 	struct dm_snapshot *s = pe->snap;
8829d493fa8SAlasdair G Kergon 	struct bio *origin_bios = NULL;
8839d493fa8SAlasdair G Kergon 	struct bio *snapshot_bios = NULL;
8849d493fa8SAlasdair G Kergon 	int error = 0;
8851da177e4SLinus Torvalds 
88676df1c65SAlasdair G Kergon 	if (!success) {
88776df1c65SAlasdair G Kergon 		/* Read/write error - snapshot is unusable */
8881da177e4SLinus Torvalds 		down_write(&s->lock);
889695368acSAlasdair G Kergon 		__invalidate_snapshot(s, -EIO);
8909d493fa8SAlasdair G Kergon 		error = 1;
89176df1c65SAlasdair G Kergon 		goto out;
89276df1c65SAlasdair G Kergon 	}
89376df1c65SAlasdair G Kergon 
89476df1c65SAlasdair G Kergon 	e = alloc_exception();
89576df1c65SAlasdair G Kergon 	if (!e) {
89676df1c65SAlasdair G Kergon 		down_write(&s->lock);
897695368acSAlasdair G Kergon 		__invalidate_snapshot(s, -ENOMEM);
8989d493fa8SAlasdair G Kergon 		error = 1;
8991da177e4SLinus Torvalds 		goto out;
9001da177e4SLinus Torvalds 	}
9011da177e4SLinus Torvalds 	*e = pe->e;
9021da177e4SLinus Torvalds 
9039d493fa8SAlasdair G Kergon 	down_write(&s->lock);
9049d493fa8SAlasdair G Kergon 	if (!s->valid) {
9059d493fa8SAlasdair G Kergon 		free_exception(e);
9069d493fa8SAlasdair G Kergon 		error = 1;
9079d493fa8SAlasdair G Kergon 		goto out;
9089d493fa8SAlasdair G Kergon 	}
9099d493fa8SAlasdair G Kergon 
9101da177e4SLinus Torvalds 	/*
911a8d41b59SMikulas Patocka 	 * Check for conflicting reads. This is extremely improbable,
91290fa1527SMikulas Patocka 	 * so msleep(1) is sufficient and there is no need for a wait queue.
913a8d41b59SMikulas Patocka 	 */
914a8d41b59SMikulas Patocka 	while (__chunk_is_tracked(s, pe->e.old_chunk))
91590fa1527SMikulas Patocka 		msleep(1);
916a8d41b59SMikulas Patocka 
917a8d41b59SMikulas Patocka 	/*
9181da177e4SLinus Torvalds 	 * Add a proper exception, and remove the
9191da177e4SLinus Torvalds 	 * in-flight exception from the list.
9201da177e4SLinus Torvalds 	 */
921d32a6ea6SJon Brassow 	insert_exception(&s->complete, e);
9221da177e4SLinus Torvalds 
9231da177e4SLinus Torvalds  out:
924695368acSAlasdair G Kergon 	remove_exception(&pe->e);
9259d493fa8SAlasdair G Kergon 	snapshot_bios = bio_list_get(&pe->snapshot_bios);
9264b832e8dSAlasdair G Kergon 	origin_bios = put_pending_exception(pe);
927b4b610f6SAlasdair G Kergon 
9289d493fa8SAlasdair G Kergon 	up_write(&s->lock);
9299d493fa8SAlasdair G Kergon 
9309d493fa8SAlasdair G Kergon 	/* Submit any pending write bios */
9319d493fa8SAlasdair G Kergon 	if (error)
9329d493fa8SAlasdair G Kergon 		error_bios(snapshot_bios);
9339d493fa8SAlasdair G Kergon 	else
9349d493fa8SAlasdair G Kergon 		flush_bios(snapshot_bios);
9359d493fa8SAlasdair G Kergon 
9369d493fa8SAlasdair G Kergon 	flush_bios(origin_bios);
9371da177e4SLinus Torvalds }
9381da177e4SLinus Torvalds 
9391da177e4SLinus Torvalds static void commit_callback(void *context, int success)
9401da177e4SLinus Torvalds {
941028867acSAlasdair G Kergon 	struct dm_snap_pending_exception *pe = context;
942028867acSAlasdair G Kergon 
9431da177e4SLinus Torvalds 	pending_complete(pe, success);
9441da177e4SLinus Torvalds }
9451da177e4SLinus Torvalds 
9461da177e4SLinus Torvalds /*
9471da177e4SLinus Torvalds  * Called when the copy I/O has finished.  kcopyd actually runs
9481da177e4SLinus Torvalds  * this code so don't block.
9491da177e4SLinus Torvalds  */
9504cdc1d1fSAlasdair G Kergon static void copy_callback(int read_err, unsigned long write_err, void *context)
9511da177e4SLinus Torvalds {
952028867acSAlasdair G Kergon 	struct dm_snap_pending_exception *pe = context;
9531da177e4SLinus Torvalds 	struct dm_snapshot *s = pe->snap;
9541da177e4SLinus Torvalds 
9551da177e4SLinus Torvalds 	if (read_err || write_err)
9561da177e4SLinus Torvalds 		pending_complete(pe, 0);
9571da177e4SLinus Torvalds 
9581da177e4SLinus Torvalds 	else
9591da177e4SLinus Torvalds 		/* Update the metadata if we are persistent */
960493df71cSJonathan Brassow 		s->store->type->commit_exception(s->store, &pe->e,
961b2a11465SJonathan Brassow 						 commit_callback, pe);
9621da177e4SLinus Torvalds }
9631da177e4SLinus Torvalds 
9641da177e4SLinus Torvalds /*
9651da177e4SLinus Torvalds  * Dispatches the copy operation to kcopyd.
9661da177e4SLinus Torvalds  */
967028867acSAlasdair G Kergon static void start_copy(struct dm_snap_pending_exception *pe)
9681da177e4SLinus Torvalds {
9691da177e4SLinus Torvalds 	struct dm_snapshot *s = pe->snap;
97022a1ceb1SHeinz Mauelshagen 	struct dm_io_region src, dest;
9711da177e4SLinus Torvalds 	struct block_device *bdev = s->origin->bdev;
9721da177e4SLinus Torvalds 	sector_t dev_size;
9731da177e4SLinus Torvalds 
9741da177e4SLinus Torvalds 	dev_size = get_dev_size(bdev);
9751da177e4SLinus Torvalds 
9761da177e4SLinus Torvalds 	src.bdev = bdev;
97771fab00aSJonathan Brassow 	src.sector = chunk_to_sector(s->store, pe->e.old_chunk);
978df96eee6SMikulas Patocka 	src.count = min((sector_t)s->store->chunk_size, dev_size - src.sector);
9791da177e4SLinus Torvalds 
98049beb2b8SJonathan Brassow 	dest.bdev = s->store->cow->bdev;
98171fab00aSJonathan Brassow 	dest.sector = chunk_to_sector(s->store, pe->e.new_chunk);
9821da177e4SLinus Torvalds 	dest.count = src.count;
9831da177e4SLinus Torvalds 
9841da177e4SLinus Torvalds 	/* Hand over to kcopyd */
985eb69aca5SHeinz Mauelshagen 	dm_kcopyd_copy(s->kcopyd_client,
9861da177e4SLinus Torvalds 		    &src, 1, &dest, 0, copy_callback, pe);
9871da177e4SLinus Torvalds }
9881da177e4SLinus Torvalds 
9892913808eSMikulas Patocka static struct dm_snap_pending_exception *
9902913808eSMikulas Patocka __lookup_pending_exception(struct dm_snapshot *s, chunk_t chunk)
9912913808eSMikulas Patocka {
9921d4989c8SJon Brassow 	struct dm_exception *e = lookup_exception(&s->pending, chunk);
9932913808eSMikulas Patocka 
9942913808eSMikulas Patocka 	if (!e)
9952913808eSMikulas Patocka 		return NULL;
9962913808eSMikulas Patocka 
9972913808eSMikulas Patocka 	return container_of(e, struct dm_snap_pending_exception, e);
9982913808eSMikulas Patocka }
9992913808eSMikulas Patocka 
10001da177e4SLinus Torvalds /*
10011da177e4SLinus Torvalds  * Looks to see if this snapshot already has a pending exception
10021da177e4SLinus Torvalds  * for this chunk, otherwise it allocates a new one and inserts
10031da177e4SLinus Torvalds  * it into the pending table.
10041da177e4SLinus Torvalds  *
10051da177e4SLinus Torvalds  * NOTE: a write lock must be held on snap->lock before calling
10061da177e4SLinus Torvalds  * this.
10071da177e4SLinus Torvalds  */
1008028867acSAlasdair G Kergon static struct dm_snap_pending_exception *
1009c6621392SMikulas Patocka __find_pending_exception(struct dm_snapshot *s,
1010c6621392SMikulas Patocka 			 struct dm_snap_pending_exception *pe, chunk_t chunk)
10111da177e4SLinus Torvalds {
1012c6621392SMikulas Patocka 	struct dm_snap_pending_exception *pe2;
101376df1c65SAlasdair G Kergon 
10142913808eSMikulas Patocka 	pe2 = __lookup_pending_exception(s, chunk);
10152913808eSMikulas Patocka 	if (pe2) {
10161da177e4SLinus Torvalds 		free_pending_exception(pe);
10172913808eSMikulas Patocka 		return pe2;
101876df1c65SAlasdair G Kergon 	}
101976df1c65SAlasdair G Kergon 
10201da177e4SLinus Torvalds 	pe->e.old_chunk = chunk;
10211da177e4SLinus Torvalds 	bio_list_init(&pe->origin_bios);
10221da177e4SLinus Torvalds 	bio_list_init(&pe->snapshot_bios);
1023b4b610f6SAlasdair G Kergon 	pe->primary_pe = NULL;
10244b832e8dSAlasdair G Kergon 	atomic_set(&pe->ref_count, 0);
10251da177e4SLinus Torvalds 	pe->started = 0;
10261da177e4SLinus Torvalds 
1027493df71cSJonathan Brassow 	if (s->store->type->prepare_exception(s->store, &pe->e)) {
10281da177e4SLinus Torvalds 		free_pending_exception(pe);
10291da177e4SLinus Torvalds 		return NULL;
10301da177e4SLinus Torvalds 	}
10311da177e4SLinus Torvalds 
10324b832e8dSAlasdair G Kergon 	get_pending_exception(pe);
10331da177e4SLinus Torvalds 	insert_exception(&s->pending, &pe->e);
10341da177e4SLinus Torvalds 
10351da177e4SLinus Torvalds 	return pe;
10361da177e4SLinus Torvalds }
10371da177e4SLinus Torvalds 
10381d4989c8SJon Brassow static void remap_exception(struct dm_snapshot *s, struct dm_exception *e,
1039d74f81f8SMilan Broz 			    struct bio *bio, chunk_t chunk)
10401da177e4SLinus Torvalds {
104149beb2b8SJonathan Brassow 	bio->bi_bdev = s->store->cow->bdev;
104271fab00aSJonathan Brassow 	bio->bi_sector = chunk_to_sector(s->store,
104371fab00aSJonathan Brassow 					 dm_chunk_number(e->new_chunk) +
1044d74f81f8SMilan Broz 					 (chunk - e->old_chunk)) +
104571fab00aSJonathan Brassow 					 (bio->bi_sector &
104671fab00aSJonathan Brassow 					  s->store->chunk_mask);
10471da177e4SLinus Torvalds }
10481da177e4SLinus Torvalds 
10491da177e4SLinus Torvalds static int snapshot_map(struct dm_target *ti, struct bio *bio,
10501da177e4SLinus Torvalds 			union map_info *map_context)
10511da177e4SLinus Torvalds {
10521d4989c8SJon Brassow 	struct dm_exception *e;
1053028867acSAlasdair G Kergon 	struct dm_snapshot *s = ti->private;
1054d2a7ad29SKiyoshi Ueda 	int r = DM_MAPIO_REMAPPED;
10551da177e4SLinus Torvalds 	chunk_t chunk;
1056028867acSAlasdair G Kergon 	struct dm_snap_pending_exception *pe = NULL;
10571da177e4SLinus Torvalds 
1058494b3ee7SMikulas Patocka 	if (unlikely(bio_empty_barrier(bio))) {
1059494b3ee7SMikulas Patocka 		bio->bi_bdev = s->store->cow->bdev;
1060494b3ee7SMikulas Patocka 		return DM_MAPIO_REMAPPED;
1061494b3ee7SMikulas Patocka 	}
1062494b3ee7SMikulas Patocka 
106371fab00aSJonathan Brassow 	chunk = sector_to_chunk(s->store, bio->bi_sector);
10641da177e4SLinus Torvalds 
10651da177e4SLinus Torvalds 	/* Full snapshots are not usable */
106676df1c65SAlasdair G Kergon 	/* To get here the table must be live so s->active is always set. */
10671da177e4SLinus Torvalds 	if (!s->valid)
1068f6a80ea8SAlasdair G Kergon 		return -EIO;
10691da177e4SLinus Torvalds 
10701da177e4SLinus Torvalds 	/* FIXME: should only take write lock if we need
10711da177e4SLinus Torvalds 	 * to copy an exception */
10721da177e4SLinus Torvalds 	down_write(&s->lock);
10731da177e4SLinus Torvalds 
107476df1c65SAlasdair G Kergon 	if (!s->valid) {
107576df1c65SAlasdair G Kergon 		r = -EIO;
107676df1c65SAlasdair G Kergon 		goto out_unlock;
107776df1c65SAlasdair G Kergon 	}
107876df1c65SAlasdair G Kergon 
10791da177e4SLinus Torvalds 	/* If the block is already remapped - use that, else remap it */
10801da177e4SLinus Torvalds 	e = lookup_exception(&s->complete, chunk);
10811da177e4SLinus Torvalds 	if (e) {
1082d74f81f8SMilan Broz 		remap_exception(s, e, bio, chunk);
108376df1c65SAlasdair G Kergon 		goto out_unlock;
108476df1c65SAlasdair G Kergon 	}
10851da177e4SLinus Torvalds 
1086ba40a2aaSAlasdair G Kergon 	/*
1087ba40a2aaSAlasdair G Kergon 	 * Write to snapshot - higher level takes care of RW/RO
1088ba40a2aaSAlasdair G Kergon 	 * flags so we should only get this if we are
1089ba40a2aaSAlasdair G Kergon 	 * writeable.
1090ba40a2aaSAlasdair G Kergon 	 */
1091ba40a2aaSAlasdair G Kergon 	if (bio_rw(bio) == WRITE) {
10922913808eSMikulas Patocka 		pe = __lookup_pending_exception(s, chunk);
10932913808eSMikulas Patocka 		if (!pe) {
1094c6621392SMikulas Patocka 			up_write(&s->lock);
1095c6621392SMikulas Patocka 			pe = alloc_pending_exception(s);
1096c6621392SMikulas Patocka 			down_write(&s->lock);
1097c6621392SMikulas Patocka 
1098c6621392SMikulas Patocka 			if (!s->valid) {
1099c6621392SMikulas Patocka 				free_pending_exception(pe);
1100c6621392SMikulas Patocka 				r = -EIO;
1101c6621392SMikulas Patocka 				goto out_unlock;
1102c6621392SMikulas Patocka 			}
1103c6621392SMikulas Patocka 
110435bf659bSMikulas Patocka 			e = lookup_exception(&s->complete, chunk);
110535bf659bSMikulas Patocka 			if (e) {
110635bf659bSMikulas Patocka 				free_pending_exception(pe);
110735bf659bSMikulas Patocka 				remap_exception(s, e, bio, chunk);
110835bf659bSMikulas Patocka 				goto out_unlock;
110935bf659bSMikulas Patocka 			}
111035bf659bSMikulas Patocka 
1111c6621392SMikulas Patocka 			pe = __find_pending_exception(s, pe, chunk);
11121da177e4SLinus Torvalds 			if (!pe) {
1113695368acSAlasdair G Kergon 				__invalidate_snapshot(s, -ENOMEM);
11141da177e4SLinus Torvalds 				r = -EIO;
111576df1c65SAlasdair G Kergon 				goto out_unlock;
111676df1c65SAlasdair G Kergon 			}
11172913808eSMikulas Patocka 		}
111876df1c65SAlasdair G Kergon 
1119d74f81f8SMilan Broz 		remap_exception(s, &pe->e, bio, chunk);
11201da177e4SLinus Torvalds 		bio_list_add(&pe->snapshot_bios, bio);
11211da177e4SLinus Torvalds 
1122d2a7ad29SKiyoshi Ueda 		r = DM_MAPIO_SUBMITTED;
1123ba40a2aaSAlasdair G Kergon 
11241da177e4SLinus Torvalds 		if (!pe->started) {
11251da177e4SLinus Torvalds 			/* this is protected by snap->lock */
11261da177e4SLinus Torvalds 			pe->started = 1;
112776df1c65SAlasdair G Kergon 			up_write(&s->lock);
112876df1c65SAlasdair G Kergon 			start_copy(pe);
1129ba40a2aaSAlasdair G Kergon 			goto out;
1130ba40a2aaSAlasdair G Kergon 		}
1131cd45daffSMikulas Patocka 	} else {
11321da177e4SLinus Torvalds 		bio->bi_bdev = s->origin->bdev;
1133cd45daffSMikulas Patocka 		map_context->ptr = track_chunk(s, chunk);
1134cd45daffSMikulas Patocka 	}
11351da177e4SLinus Torvalds 
1136ba40a2aaSAlasdair G Kergon  out_unlock:
1137ba40a2aaSAlasdair G Kergon 	up_write(&s->lock);
1138ba40a2aaSAlasdair G Kergon  out:
11391da177e4SLinus Torvalds 	return r;
11401da177e4SLinus Torvalds }
11411da177e4SLinus Torvalds 
1142cd45daffSMikulas Patocka static int snapshot_end_io(struct dm_target *ti, struct bio *bio,
1143cd45daffSMikulas Patocka 			   int error, union map_info *map_context)
1144cd45daffSMikulas Patocka {
1145cd45daffSMikulas Patocka 	struct dm_snapshot *s = ti->private;
1146cd45daffSMikulas Patocka 	struct dm_snap_tracked_chunk *c = map_context->ptr;
1147cd45daffSMikulas Patocka 
1148cd45daffSMikulas Patocka 	if (c)
1149cd45daffSMikulas Patocka 		stop_tracking_chunk(s, c);
1150cd45daffSMikulas Patocka 
1151cd45daffSMikulas Patocka 	return 0;
1152cd45daffSMikulas Patocka }
1153cd45daffSMikulas Patocka 
11541da177e4SLinus Torvalds static void snapshot_resume(struct dm_target *ti)
11551da177e4SLinus Torvalds {
1156028867acSAlasdair G Kergon 	struct dm_snapshot *s = ti->private;
11571da177e4SLinus Torvalds 
1158aa14edebSAlasdair G Kergon 	down_write(&s->lock);
1159aa14edebSAlasdair G Kergon 	s->active = 1;
1160aa14edebSAlasdair G Kergon 	up_write(&s->lock);
11611da177e4SLinus Torvalds }
11621da177e4SLinus Torvalds 
11631da177e4SLinus Torvalds static int snapshot_status(struct dm_target *ti, status_type_t type,
11641da177e4SLinus Torvalds 			   char *result, unsigned int maxlen)
11651da177e4SLinus Torvalds {
11662e4a31dfSJonathan Brassow 	unsigned sz = 0;
1167028867acSAlasdair G Kergon 	struct dm_snapshot *snap = ti->private;
11681da177e4SLinus Torvalds 
11691da177e4SLinus Torvalds 	switch (type) {
11701da177e4SLinus Torvalds 	case STATUSTYPE_INFO:
117194e76572SMikulas Patocka 
117294e76572SMikulas Patocka 		down_write(&snap->lock);
117394e76572SMikulas Patocka 
11741da177e4SLinus Torvalds 		if (!snap->valid)
11752e4a31dfSJonathan Brassow 			DMEMIT("Invalid");
11761da177e4SLinus Torvalds 		else {
1177493df71cSJonathan Brassow 			if (snap->store->type->fraction_full) {
11781da177e4SLinus Torvalds 				sector_t numerator, denominator;
1179493df71cSJonathan Brassow 				snap->store->type->fraction_full(snap->store,
11801da177e4SLinus Torvalds 								 &numerator,
11811da177e4SLinus Torvalds 								 &denominator);
11822e4a31dfSJonathan Brassow 				DMEMIT("%llu/%llu",
11834ee218cdSAndrew Morton 				       (unsigned long long)numerator,
11844ee218cdSAndrew Morton 				       (unsigned long long)denominator);
11851da177e4SLinus Torvalds 			}
11861da177e4SLinus Torvalds 			else
11872e4a31dfSJonathan Brassow 				DMEMIT("Unknown");
11881da177e4SLinus Torvalds 		}
118994e76572SMikulas Patocka 
119094e76572SMikulas Patocka 		up_write(&snap->lock);
119194e76572SMikulas Patocka 
11921da177e4SLinus Torvalds 		break;
11931da177e4SLinus Torvalds 
11941da177e4SLinus Torvalds 	case STATUSTYPE_TABLE:
11951da177e4SLinus Torvalds 		/*
11961da177e4SLinus Torvalds 		 * kdevname returns a static pointer so we need
11971da177e4SLinus Torvalds 		 * to make private copies if the output is to
11981da177e4SLinus Torvalds 		 * make sense.
11991da177e4SLinus Torvalds 		 */
12002e4a31dfSJonathan Brassow 		DMEMIT("%s", snap->origin->name);
12011e302a92SJonathan Brassow 		snap->store->type->status(snap->store, type, result + sz,
12021e302a92SJonathan Brassow 					  maxlen - sz);
12031da177e4SLinus Torvalds 		break;
12041da177e4SLinus Torvalds 	}
12051da177e4SLinus Torvalds 
12061da177e4SLinus Torvalds 	return 0;
12071da177e4SLinus Torvalds }
12081da177e4SLinus Torvalds 
12098811f46cSMike Snitzer static int snapshot_iterate_devices(struct dm_target *ti,
12108811f46cSMike Snitzer 				    iterate_devices_callout_fn fn, void *data)
12118811f46cSMike Snitzer {
12128811f46cSMike Snitzer 	struct dm_snapshot *snap = ti->private;
12138811f46cSMike Snitzer 
12148811f46cSMike Snitzer 	return fn(ti, snap->origin, 0, ti->len, data);
12158811f46cSMike Snitzer }
12168811f46cSMike Snitzer 
12178811f46cSMike Snitzer 
12181da177e4SLinus Torvalds /*-----------------------------------------------------------------
12191da177e4SLinus Torvalds  * Origin methods
12201da177e4SLinus Torvalds  *---------------------------------------------------------------*/
12211da177e4SLinus Torvalds static int __origin_write(struct list_head *snapshots, struct bio *bio)
12221da177e4SLinus Torvalds {
1223d2a7ad29SKiyoshi Ueda 	int r = DM_MAPIO_REMAPPED, first = 0;
12241da177e4SLinus Torvalds 	struct dm_snapshot *snap;
12251d4989c8SJon Brassow 	struct dm_exception *e;
1226028867acSAlasdair G Kergon 	struct dm_snap_pending_exception *pe, *next_pe, *primary_pe = NULL;
12271da177e4SLinus Torvalds 	chunk_t chunk;
1228eccf0817SAlasdair G Kergon 	LIST_HEAD(pe_queue);
12291da177e4SLinus Torvalds 
12301da177e4SLinus Torvalds 	/* Do all the snapshots on this origin */
12311da177e4SLinus Torvalds 	list_for_each_entry (snap, snapshots, list) {
12321da177e4SLinus Torvalds 
123376df1c65SAlasdair G Kergon 		down_write(&snap->lock);
123476df1c65SAlasdair G Kergon 
1235aa14edebSAlasdair G Kergon 		/* Only deal with valid and active snapshots */
1236aa14edebSAlasdair G Kergon 		if (!snap->valid || !snap->active)
123776df1c65SAlasdair G Kergon 			goto next_snapshot;
12381da177e4SLinus Torvalds 
1239d5e404c1SAlasdair G Kergon 		/* Nothing to do if writing beyond end of snapshot */
12400cea9c78SJonathan Brassow 		if (bio->bi_sector >= dm_table_get_size(snap->store->ti->table))
124176df1c65SAlasdair G Kergon 			goto next_snapshot;
12421da177e4SLinus Torvalds 
12431da177e4SLinus Torvalds 		/*
12441da177e4SLinus Torvalds 		 * Remember, different snapshots can have
12451da177e4SLinus Torvalds 		 * different chunk sizes.
12461da177e4SLinus Torvalds 		 */
124771fab00aSJonathan Brassow 		chunk = sector_to_chunk(snap->store, bio->bi_sector);
12481da177e4SLinus Torvalds 
12491da177e4SLinus Torvalds 		/*
12501da177e4SLinus Torvalds 		 * Check exception table to see if block
12511da177e4SLinus Torvalds 		 * is already remapped in this snapshot
12521da177e4SLinus Torvalds 		 * and trigger an exception if not.
1253b4b610f6SAlasdair G Kergon 		 *
12544b832e8dSAlasdair G Kergon 		 * ref_count is initialised to 1 so pending_complete()
1255b4b610f6SAlasdair G Kergon 		 * won't destroy the primary_pe while we're inside this loop.
12561da177e4SLinus Torvalds 		 */
12571da177e4SLinus Torvalds 		e = lookup_exception(&snap->complete, chunk);
125876df1c65SAlasdair G Kergon 		if (e)
125976df1c65SAlasdair G Kergon 			goto next_snapshot;
126076df1c65SAlasdair G Kergon 
12612913808eSMikulas Patocka 		pe = __lookup_pending_exception(snap, chunk);
12622913808eSMikulas Patocka 		if (!pe) {
1263c6621392SMikulas Patocka 			up_write(&snap->lock);
1264c6621392SMikulas Patocka 			pe = alloc_pending_exception(snap);
1265c6621392SMikulas Patocka 			down_write(&snap->lock);
1266c6621392SMikulas Patocka 
1267c6621392SMikulas Patocka 			if (!snap->valid) {
1268c6621392SMikulas Patocka 				free_pending_exception(pe);
1269c6621392SMikulas Patocka 				goto next_snapshot;
1270c6621392SMikulas Patocka 			}
1271c6621392SMikulas Patocka 
127235bf659bSMikulas Patocka 			e = lookup_exception(&snap->complete, chunk);
127335bf659bSMikulas Patocka 			if (e) {
127435bf659bSMikulas Patocka 				free_pending_exception(pe);
127535bf659bSMikulas Patocka 				goto next_snapshot;
127635bf659bSMikulas Patocka 			}
127735bf659bSMikulas Patocka 
1278c6621392SMikulas Patocka 			pe = __find_pending_exception(snap, pe, chunk);
12791da177e4SLinus Torvalds 			if (!pe) {
1280695368acSAlasdair G Kergon 				__invalidate_snapshot(snap, -ENOMEM);
128176df1c65SAlasdair G Kergon 				goto next_snapshot;
128276df1c65SAlasdair G Kergon 			}
12832913808eSMikulas Patocka 		}
12841da177e4SLinus Torvalds 
1285b4b610f6SAlasdair G Kergon 		if (!primary_pe) {
1286b4b610f6SAlasdair G Kergon 			/*
1287b4b610f6SAlasdair G Kergon 			 * Either every pe here has same
1288b4b610f6SAlasdair G Kergon 			 * primary_pe or none has one yet.
1289b4b610f6SAlasdair G Kergon 			 */
1290b4b610f6SAlasdair G Kergon 			if (pe->primary_pe)
1291b4b610f6SAlasdair G Kergon 				primary_pe = pe->primary_pe;
1292b4b610f6SAlasdair G Kergon 			else {
1293b4b610f6SAlasdair G Kergon 				primary_pe = pe;
1294b4b610f6SAlasdair G Kergon 				first = 1;
1295eccf0817SAlasdair G Kergon 			}
1296b4b610f6SAlasdair G Kergon 
129776df1c65SAlasdair G Kergon 			bio_list_add(&primary_pe->origin_bios, bio);
129876df1c65SAlasdair G Kergon 
1299d2a7ad29SKiyoshi Ueda 			r = DM_MAPIO_SUBMITTED;
1300b4b610f6SAlasdair G Kergon 		}
130176df1c65SAlasdair G Kergon 
1302b4b610f6SAlasdair G Kergon 		if (!pe->primary_pe) {
1303b4b610f6SAlasdair G Kergon 			pe->primary_pe = primary_pe;
13044b832e8dSAlasdair G Kergon 			get_pending_exception(primary_pe);
1305b4b610f6SAlasdair G Kergon 		}
130676df1c65SAlasdair G Kergon 
1307eccf0817SAlasdair G Kergon 		if (!pe->started) {
1308eccf0817SAlasdair G Kergon 			pe->started = 1;
1309eccf0817SAlasdair G Kergon 			list_add_tail(&pe->list, &pe_queue);
1310eccf0817SAlasdair G Kergon 		}
13111da177e4SLinus Torvalds 
131276df1c65SAlasdair G Kergon  next_snapshot:
13131da177e4SLinus Torvalds 		up_write(&snap->lock);
13141da177e4SLinus Torvalds 	}
13151da177e4SLinus Torvalds 
1316b4b610f6SAlasdair G Kergon 	if (!primary_pe)
13174b832e8dSAlasdair G Kergon 		return r;
1318b4b610f6SAlasdair G Kergon 
1319b4b610f6SAlasdair G Kergon 	/*
1320b4b610f6SAlasdair G Kergon 	 * If this is the first time we're processing this chunk and
13214b832e8dSAlasdair G Kergon 	 * ref_count is now 1 it means all the pending exceptions
1322b4b610f6SAlasdair G Kergon 	 * got completed while we were in the loop above, so it falls to
1323b4b610f6SAlasdair G Kergon 	 * us here to remove the primary_pe and submit any origin_bios.
1324b4b610f6SAlasdair G Kergon 	 */
1325b4b610f6SAlasdair G Kergon 
13264b832e8dSAlasdair G Kergon 	if (first && atomic_dec_and_test(&primary_pe->ref_count)) {
1327b4b610f6SAlasdair G Kergon 		flush_bios(bio_list_get(&primary_pe->origin_bios));
1328b4b610f6SAlasdair G Kergon 		free_pending_exception(primary_pe);
1329b4b610f6SAlasdair G Kergon 		/* If we got here, pe_queue is necessarily empty. */
13304b832e8dSAlasdair G Kergon 		return r;
1331b4b610f6SAlasdair G Kergon 	}
1332b4b610f6SAlasdair G Kergon 
13331da177e4SLinus Torvalds 	/*
13341da177e4SLinus Torvalds 	 * Now that we have a complete pe list we can start the copying.
13351da177e4SLinus Torvalds 	 */
1336eccf0817SAlasdair G Kergon 	list_for_each_entry_safe(pe, next_pe, &pe_queue, list)
13371da177e4SLinus Torvalds 		start_copy(pe);
13381da177e4SLinus Torvalds 
13391da177e4SLinus Torvalds 	return r;
13401da177e4SLinus Torvalds }
13411da177e4SLinus Torvalds 
13421da177e4SLinus Torvalds /*
13431da177e4SLinus Torvalds  * Called on a write from the origin driver.
13441da177e4SLinus Torvalds  */
13451da177e4SLinus Torvalds static int do_origin(struct dm_dev *origin, struct bio *bio)
13461da177e4SLinus Torvalds {
13471da177e4SLinus Torvalds 	struct origin *o;
1348d2a7ad29SKiyoshi Ueda 	int r = DM_MAPIO_REMAPPED;
13491da177e4SLinus Torvalds 
13501da177e4SLinus Torvalds 	down_read(&_origins_lock);
13511da177e4SLinus Torvalds 	o = __lookup_origin(origin->bdev);
13521da177e4SLinus Torvalds 	if (o)
13531da177e4SLinus Torvalds 		r = __origin_write(&o->snapshots, bio);
13541da177e4SLinus Torvalds 	up_read(&_origins_lock);
13551da177e4SLinus Torvalds 
13561da177e4SLinus Torvalds 	return r;
13571da177e4SLinus Torvalds }
13581da177e4SLinus Torvalds 
13591da177e4SLinus Torvalds /*
13601da177e4SLinus Torvalds  * Origin: maps a linear range of a device, with hooks for snapshotting.
13611da177e4SLinus Torvalds  */
13621da177e4SLinus Torvalds 
13631da177e4SLinus Torvalds /*
13641da177e4SLinus Torvalds  * Construct an origin mapping: <dev_path>
13651da177e4SLinus Torvalds  * The context for an origin is merely a 'struct dm_dev *'
13661da177e4SLinus Torvalds  * pointing to the real device.
13671da177e4SLinus Torvalds  */
13681da177e4SLinus Torvalds static int origin_ctr(struct dm_target *ti, unsigned int argc, char **argv)
13691da177e4SLinus Torvalds {
13701da177e4SLinus Torvalds 	int r;
13711da177e4SLinus Torvalds 	struct dm_dev *dev;
13721da177e4SLinus Torvalds 
13731da177e4SLinus Torvalds 	if (argc != 1) {
137472d94861SAlasdair G Kergon 		ti->error = "origin: incorrect number of arguments";
13751da177e4SLinus Torvalds 		return -EINVAL;
13761da177e4SLinus Torvalds 	}
13771da177e4SLinus Torvalds 
13781da177e4SLinus Torvalds 	r = dm_get_device(ti, argv[0], 0, ti->len,
13791da177e4SLinus Torvalds 			  dm_table_get_mode(ti->table), &dev);
13801da177e4SLinus Torvalds 	if (r) {
13811da177e4SLinus Torvalds 		ti->error = "Cannot get target device";
13821da177e4SLinus Torvalds 		return r;
13831da177e4SLinus Torvalds 	}
13841da177e4SLinus Torvalds 
13851da177e4SLinus Torvalds 	ti->private = dev;
1386494b3ee7SMikulas Patocka 	ti->num_flush_requests = 1;
1387494b3ee7SMikulas Patocka 
13881da177e4SLinus Torvalds 	return 0;
13891da177e4SLinus Torvalds }
13901da177e4SLinus Torvalds 
13911da177e4SLinus Torvalds static void origin_dtr(struct dm_target *ti)
13921da177e4SLinus Torvalds {
1393028867acSAlasdair G Kergon 	struct dm_dev *dev = ti->private;
13941da177e4SLinus Torvalds 	dm_put_device(ti, dev);
13951da177e4SLinus Torvalds }
13961da177e4SLinus Torvalds 
13971da177e4SLinus Torvalds static int origin_map(struct dm_target *ti, struct bio *bio,
13981da177e4SLinus Torvalds 		      union map_info *map_context)
13991da177e4SLinus Torvalds {
1400028867acSAlasdair G Kergon 	struct dm_dev *dev = ti->private;
14011da177e4SLinus Torvalds 	bio->bi_bdev = dev->bdev;
14021da177e4SLinus Torvalds 
1403494b3ee7SMikulas Patocka 	if (unlikely(bio_empty_barrier(bio)))
1404494b3ee7SMikulas Patocka 		return DM_MAPIO_REMAPPED;
1405494b3ee7SMikulas Patocka 
14061da177e4SLinus Torvalds 	/* Only tell snapshots if this is a write */
1407d2a7ad29SKiyoshi Ueda 	return (bio_rw(bio) == WRITE) ? do_origin(dev, bio) : DM_MAPIO_REMAPPED;
14081da177e4SLinus Torvalds }
14091da177e4SLinus Torvalds 
14101da177e4SLinus Torvalds /*
14111da177e4SLinus Torvalds  * Set the target "split_io" field to the minimum of all the snapshots'
14121da177e4SLinus Torvalds  * chunk sizes.
14131da177e4SLinus Torvalds  */
14141da177e4SLinus Torvalds static void origin_resume(struct dm_target *ti)
14151da177e4SLinus Torvalds {
1416028867acSAlasdair G Kergon 	struct dm_dev *dev = ti->private;
14171da177e4SLinus Torvalds 
14181da177e4SLinus Torvalds 	down_read(&_origins_lock);
14191da177e4SLinus Torvalds 
14207e201b35SMikulas Patocka 	ti->split_io = __minimum_chunk_size(__lookup_origin(dev->bdev));
14217e201b35SMikulas Patocka 
14227e201b35SMikulas Patocka 	up_read(&_origins_lock);
14231da177e4SLinus Torvalds }
14241da177e4SLinus Torvalds 
14251da177e4SLinus Torvalds static int origin_status(struct dm_target *ti, status_type_t type, char *result,
14261da177e4SLinus Torvalds 			 unsigned int maxlen)
14271da177e4SLinus Torvalds {
1428028867acSAlasdair G Kergon 	struct dm_dev *dev = ti->private;
14291da177e4SLinus Torvalds 
14301da177e4SLinus Torvalds 	switch (type) {
14311da177e4SLinus Torvalds 	case STATUSTYPE_INFO:
14321da177e4SLinus Torvalds 		result[0] = '\0';
14331da177e4SLinus Torvalds 		break;
14341da177e4SLinus Torvalds 
14351da177e4SLinus Torvalds 	case STATUSTYPE_TABLE:
14361da177e4SLinus Torvalds 		snprintf(result, maxlen, "%s", dev->name);
14371da177e4SLinus Torvalds 		break;
14381da177e4SLinus Torvalds 	}
14391da177e4SLinus Torvalds 
14401da177e4SLinus Torvalds 	return 0;
14411da177e4SLinus Torvalds }
14421da177e4SLinus Torvalds 
14438811f46cSMike Snitzer static int origin_iterate_devices(struct dm_target *ti,
14448811f46cSMike Snitzer 				  iterate_devices_callout_fn fn, void *data)
14458811f46cSMike Snitzer {
14468811f46cSMike Snitzer 	struct dm_dev *dev = ti->private;
14478811f46cSMike Snitzer 
14488811f46cSMike Snitzer 	return fn(ti, dev, 0, ti->len, data);
14498811f46cSMike Snitzer }
14508811f46cSMike Snitzer 
14511da177e4SLinus Torvalds static struct target_type origin_target = {
14521da177e4SLinus Torvalds 	.name    = "snapshot-origin",
14538811f46cSMike Snitzer 	.version = {1, 7, 0},
14541da177e4SLinus Torvalds 	.module  = THIS_MODULE,
14551da177e4SLinus Torvalds 	.ctr     = origin_ctr,
14561da177e4SLinus Torvalds 	.dtr     = origin_dtr,
14571da177e4SLinus Torvalds 	.map     = origin_map,
14581da177e4SLinus Torvalds 	.resume  = origin_resume,
14591da177e4SLinus Torvalds 	.status  = origin_status,
14608811f46cSMike Snitzer 	.iterate_devices = origin_iterate_devices,
14611da177e4SLinus Torvalds };
14621da177e4SLinus Torvalds 
14631da177e4SLinus Torvalds static struct target_type snapshot_target = {
14641da177e4SLinus Torvalds 	.name    = "snapshot",
14658811f46cSMike Snitzer 	.version = {1, 7, 0},
14661da177e4SLinus Torvalds 	.module  = THIS_MODULE,
14671da177e4SLinus Torvalds 	.ctr     = snapshot_ctr,
14681da177e4SLinus Torvalds 	.dtr     = snapshot_dtr,
14691da177e4SLinus Torvalds 	.map     = snapshot_map,
1470cd45daffSMikulas Patocka 	.end_io  = snapshot_end_io,
14711da177e4SLinus Torvalds 	.resume  = snapshot_resume,
14721da177e4SLinus Torvalds 	.status  = snapshot_status,
14738811f46cSMike Snitzer 	.iterate_devices = snapshot_iterate_devices,
14741da177e4SLinus Torvalds };
14751da177e4SLinus Torvalds 
14761da177e4SLinus Torvalds static int __init dm_snapshot_init(void)
14771da177e4SLinus Torvalds {
14781da177e4SLinus Torvalds 	int r;
14791da177e4SLinus Torvalds 
14804db6bfe0SAlasdair G Kergon 	r = dm_exception_store_init();
14814db6bfe0SAlasdair G Kergon 	if (r) {
14824db6bfe0SAlasdair G Kergon 		DMERR("Failed to initialize exception stores");
14834db6bfe0SAlasdair G Kergon 		return r;
14844db6bfe0SAlasdair G Kergon 	}
14854db6bfe0SAlasdair G Kergon 
14861da177e4SLinus Torvalds 	r = dm_register_target(&snapshot_target);
14871da177e4SLinus Torvalds 	if (r) {
14881da177e4SLinus Torvalds 		DMERR("snapshot target register failed %d", r);
1489034a186dSJonathan Brassow 		goto bad_register_snapshot_target;
14901da177e4SLinus Torvalds 	}
14911da177e4SLinus Torvalds 
14921da177e4SLinus Torvalds 	r = dm_register_target(&origin_target);
14931da177e4SLinus Torvalds 	if (r < 0) {
149472d94861SAlasdair G Kergon 		DMERR("Origin target register failed %d", r);
14951da177e4SLinus Torvalds 		goto bad1;
14961da177e4SLinus Torvalds 	}
14971da177e4SLinus Torvalds 
14981da177e4SLinus Torvalds 	r = init_origin_hash();
14991da177e4SLinus Torvalds 	if (r) {
15001da177e4SLinus Torvalds 		DMERR("init_origin_hash failed.");
15011da177e4SLinus Torvalds 		goto bad2;
15021da177e4SLinus Torvalds 	}
15031da177e4SLinus Torvalds 
15041d4989c8SJon Brassow 	exception_cache = KMEM_CACHE(dm_exception, 0);
15051da177e4SLinus Torvalds 	if (!exception_cache) {
15061da177e4SLinus Torvalds 		DMERR("Couldn't create exception cache.");
15071da177e4SLinus Torvalds 		r = -ENOMEM;
15081da177e4SLinus Torvalds 		goto bad3;
15091da177e4SLinus Torvalds 	}
15101da177e4SLinus Torvalds 
1511028867acSAlasdair G Kergon 	pending_cache = KMEM_CACHE(dm_snap_pending_exception, 0);
15121da177e4SLinus Torvalds 	if (!pending_cache) {
15131da177e4SLinus Torvalds 		DMERR("Couldn't create pending cache.");
15141da177e4SLinus Torvalds 		r = -ENOMEM;
15151da177e4SLinus Torvalds 		goto bad4;
15161da177e4SLinus Torvalds 	}
15171da177e4SLinus Torvalds 
1518cd45daffSMikulas Patocka 	tracked_chunk_cache = KMEM_CACHE(dm_snap_tracked_chunk, 0);
1519cd45daffSMikulas Patocka 	if (!tracked_chunk_cache) {
1520cd45daffSMikulas Patocka 		DMERR("Couldn't create cache to track chunks in use.");
1521cd45daffSMikulas Patocka 		r = -ENOMEM;
1522cd45daffSMikulas Patocka 		goto bad5;
1523cd45daffSMikulas Patocka 	}
1524cd45daffSMikulas Patocka 
1525ca3a931fSAlasdair G Kergon 	ksnapd = create_singlethread_workqueue("ksnapd");
1526ca3a931fSAlasdair G Kergon 	if (!ksnapd) {
1527ca3a931fSAlasdair G Kergon 		DMERR("Failed to create ksnapd workqueue.");
1528ca3a931fSAlasdair G Kergon 		r = -ENOMEM;
152992e86812SMikulas Patocka 		goto bad_pending_pool;
1530ca3a931fSAlasdair G Kergon 	}
1531ca3a931fSAlasdair G Kergon 
15321da177e4SLinus Torvalds 	return 0;
15331da177e4SLinus Torvalds 
1534cd45daffSMikulas Patocka bad_pending_pool:
1535cd45daffSMikulas Patocka 	kmem_cache_destroy(tracked_chunk_cache);
15361da177e4SLinus Torvalds bad5:
15371da177e4SLinus Torvalds 	kmem_cache_destroy(pending_cache);
15381da177e4SLinus Torvalds bad4:
15391da177e4SLinus Torvalds 	kmem_cache_destroy(exception_cache);
15401da177e4SLinus Torvalds bad3:
15411da177e4SLinus Torvalds 	exit_origin_hash();
15421da177e4SLinus Torvalds bad2:
15431da177e4SLinus Torvalds 	dm_unregister_target(&origin_target);
15441da177e4SLinus Torvalds bad1:
15451da177e4SLinus Torvalds 	dm_unregister_target(&snapshot_target);
1546034a186dSJonathan Brassow 
1547034a186dSJonathan Brassow bad_register_snapshot_target:
1548034a186dSJonathan Brassow 	dm_exception_store_exit();
15491da177e4SLinus Torvalds 	return r;
15501da177e4SLinus Torvalds }
15511da177e4SLinus Torvalds 
15521da177e4SLinus Torvalds static void __exit dm_snapshot_exit(void)
15531da177e4SLinus Torvalds {
1554ca3a931fSAlasdair G Kergon 	destroy_workqueue(ksnapd);
1555ca3a931fSAlasdair G Kergon 
155610d3bd09SMikulas Patocka 	dm_unregister_target(&snapshot_target);
155710d3bd09SMikulas Patocka 	dm_unregister_target(&origin_target);
15581da177e4SLinus Torvalds 
15591da177e4SLinus Torvalds 	exit_origin_hash();
15601da177e4SLinus Torvalds 	kmem_cache_destroy(pending_cache);
15611da177e4SLinus Torvalds 	kmem_cache_destroy(exception_cache);
1562cd45daffSMikulas Patocka 	kmem_cache_destroy(tracked_chunk_cache);
15634db6bfe0SAlasdair G Kergon 
15644db6bfe0SAlasdair G Kergon 	dm_exception_store_exit();
15651da177e4SLinus Torvalds }
15661da177e4SLinus Torvalds 
15671da177e4SLinus Torvalds /* Module hooks */
15681da177e4SLinus Torvalds module_init(dm_snapshot_init);
15691da177e4SLinus Torvalds module_exit(dm_snapshot_exit);
15701da177e4SLinus Torvalds 
15711da177e4SLinus Torvalds MODULE_DESCRIPTION(DM_NAME " snapshot target");
15721da177e4SLinus Torvalds MODULE_AUTHOR("Joe Thornber");
15731da177e4SLinus Torvalds MODULE_LICENSE("GPL");
1574