xref: /openbmc/linux/drivers/md/dm-snap.c (revision 7e201b35)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * dm-snapshot.c
31da177e4SLinus Torvalds  *
41da177e4SLinus Torvalds  * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
51da177e4SLinus Torvalds  *
61da177e4SLinus Torvalds  * This file is released under the GPL.
71da177e4SLinus Torvalds  */
81da177e4SLinus Torvalds 
91da177e4SLinus Torvalds #include <linux/blkdev.h>
101da177e4SLinus Torvalds #include <linux/device-mapper.h>
1190fa1527SMikulas Patocka #include <linux/delay.h>
121da177e4SLinus Torvalds #include <linux/fs.h>
131da177e4SLinus Torvalds #include <linux/init.h>
141da177e4SLinus Torvalds #include <linux/kdev_t.h>
151da177e4SLinus Torvalds #include <linux/list.h>
161da177e4SLinus Torvalds #include <linux/mempool.h>
171da177e4SLinus Torvalds #include <linux/module.h>
181da177e4SLinus Torvalds #include <linux/slab.h>
191da177e4SLinus Torvalds #include <linux/vmalloc.h>
206f3c3f0aSvignesh babu #include <linux/log2.h>
21a765e20eSAlasdair G Kergon #include <linux/dm-kcopyd.h>
22ccc45ea8SJonathan Brassow #include <linux/workqueue.h>
231da177e4SLinus Torvalds 
24aea53d92SJonathan Brassow #include "dm-exception-store.h"
251da177e4SLinus Torvalds 
2672d94861SAlasdair G Kergon #define DM_MSG_PREFIX "snapshots"
2772d94861SAlasdair G Kergon 
281da177e4SLinus Torvalds /*
291da177e4SLinus Torvalds  * The percentage increment we will wake up users at
301da177e4SLinus Torvalds  */
311da177e4SLinus Torvalds #define WAKE_UP_PERCENT 5
321da177e4SLinus Torvalds 
331da177e4SLinus Torvalds /*
341da177e4SLinus Torvalds  * kcopyd priority of snapshot operations
351da177e4SLinus Torvalds  */
361da177e4SLinus Torvalds #define SNAPSHOT_COPY_PRIORITY 2
371da177e4SLinus Torvalds 
381da177e4SLinus Torvalds /*
398ee2767aSMilan Broz  * Reserve 1MB for each snapshot initially (with minimum of 1 page).
401da177e4SLinus Torvalds  */
418ee2767aSMilan Broz #define SNAPSHOT_PAGES (((1UL << 20) >> PAGE_SHIFT) ? : 1)
421da177e4SLinus Torvalds 
43cd45daffSMikulas Patocka /*
44cd45daffSMikulas Patocka  * The size of the mempool used to track chunks in use.
45cd45daffSMikulas Patocka  */
46cd45daffSMikulas Patocka #define MIN_IOS 256
47cd45daffSMikulas Patocka 
48ccc45ea8SJonathan Brassow #define DM_TRACKED_CHUNK_HASH_SIZE	16
49ccc45ea8SJonathan Brassow #define DM_TRACKED_CHUNK_HASH(x)	((unsigned long)(x) & \
50ccc45ea8SJonathan Brassow 					 (DM_TRACKED_CHUNK_HASH_SIZE - 1))
51ccc45ea8SJonathan Brassow 
52ccc45ea8SJonathan Brassow struct exception_table {
53ccc45ea8SJonathan Brassow 	uint32_t hash_mask;
54ccc45ea8SJonathan Brassow 	unsigned hash_shift;
55ccc45ea8SJonathan Brassow 	struct list_head *table;
56ccc45ea8SJonathan Brassow };
57ccc45ea8SJonathan Brassow 
58ccc45ea8SJonathan Brassow struct dm_snapshot {
59ccc45ea8SJonathan Brassow 	struct rw_semaphore lock;
60ccc45ea8SJonathan Brassow 
61ccc45ea8SJonathan Brassow 	struct dm_dev *origin;
62ccc45ea8SJonathan Brassow 
63ccc45ea8SJonathan Brassow 	/* List of snapshots per Origin */
64ccc45ea8SJonathan Brassow 	struct list_head list;
65ccc45ea8SJonathan Brassow 
66ccc45ea8SJonathan Brassow 	/* You can't use a snapshot if this is 0 (e.g. if full) */
67ccc45ea8SJonathan Brassow 	int valid;
68ccc45ea8SJonathan Brassow 
69ccc45ea8SJonathan Brassow 	/* Origin writes don't trigger exceptions until this is set */
70ccc45ea8SJonathan Brassow 	int active;
71ccc45ea8SJonathan Brassow 
72ccc45ea8SJonathan Brassow 	mempool_t *pending_pool;
73ccc45ea8SJonathan Brassow 
74ccc45ea8SJonathan Brassow 	atomic_t pending_exceptions_count;
75ccc45ea8SJonathan Brassow 
76ccc45ea8SJonathan Brassow 	struct exception_table pending;
77ccc45ea8SJonathan Brassow 	struct exception_table complete;
78ccc45ea8SJonathan Brassow 
79ccc45ea8SJonathan Brassow 	/*
80ccc45ea8SJonathan Brassow 	 * pe_lock protects all pending_exception operations and access
81ccc45ea8SJonathan Brassow 	 * as well as the snapshot_bios list.
82ccc45ea8SJonathan Brassow 	 */
83ccc45ea8SJonathan Brassow 	spinlock_t pe_lock;
84ccc45ea8SJonathan Brassow 
85ccc45ea8SJonathan Brassow 	/* The on disk metadata handler */
86ccc45ea8SJonathan Brassow 	struct dm_exception_store *store;
87ccc45ea8SJonathan Brassow 
88ccc45ea8SJonathan Brassow 	struct dm_kcopyd_client *kcopyd_client;
89ccc45ea8SJonathan Brassow 
90ccc45ea8SJonathan Brassow 	/* Queue of snapshot writes for ksnapd to flush */
91ccc45ea8SJonathan Brassow 	struct bio_list queued_bios;
92ccc45ea8SJonathan Brassow 	struct work_struct queued_bios_work;
93ccc45ea8SJonathan Brassow 
94ccc45ea8SJonathan Brassow 	/* Chunks with outstanding reads */
95ccc45ea8SJonathan Brassow 	mempool_t *tracked_chunk_pool;
96ccc45ea8SJonathan Brassow 	spinlock_t tracked_chunk_lock;
97ccc45ea8SJonathan Brassow 	struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE];
98ccc45ea8SJonathan Brassow };
99ccc45ea8SJonathan Brassow 
100c642f9e0SAdrian Bunk static struct workqueue_struct *ksnapd;
101c4028958SDavid Howells static void flush_queued_bios(struct work_struct *work);
102ca3a931fSAlasdair G Kergon 
103ccc45ea8SJonathan Brassow static sector_t chunk_to_sector(struct dm_exception_store *store,
104ccc45ea8SJonathan Brassow 				chunk_t chunk)
105ccc45ea8SJonathan Brassow {
106ccc45ea8SJonathan Brassow 	return chunk << store->chunk_shift;
107ccc45ea8SJonathan Brassow }
108ccc45ea8SJonathan Brassow 
109ccc45ea8SJonathan Brassow static int bdev_equal(struct block_device *lhs, struct block_device *rhs)
110ccc45ea8SJonathan Brassow {
111ccc45ea8SJonathan Brassow 	/*
112ccc45ea8SJonathan Brassow 	 * There is only ever one instance of a particular block
113ccc45ea8SJonathan Brassow 	 * device so we can compare pointers safely.
114ccc45ea8SJonathan Brassow 	 */
115ccc45ea8SJonathan Brassow 	return lhs == rhs;
116ccc45ea8SJonathan Brassow }
117ccc45ea8SJonathan Brassow 
118028867acSAlasdair G Kergon struct dm_snap_pending_exception {
119028867acSAlasdair G Kergon 	struct dm_snap_exception e;
1201da177e4SLinus Torvalds 
1211da177e4SLinus Torvalds 	/*
1221da177e4SLinus Torvalds 	 * Origin buffers waiting for this to complete are held
1231da177e4SLinus Torvalds 	 * in a bio list
1241da177e4SLinus Torvalds 	 */
1251da177e4SLinus Torvalds 	struct bio_list origin_bios;
1261da177e4SLinus Torvalds 	struct bio_list snapshot_bios;
1271da177e4SLinus Torvalds 
1281da177e4SLinus Torvalds 	/*
129eccf0817SAlasdair G Kergon 	 * Short-term queue of pending exceptions prior to submission.
130eccf0817SAlasdair G Kergon 	 */
131eccf0817SAlasdair G Kergon 	struct list_head list;
132eccf0817SAlasdair G Kergon 
133eccf0817SAlasdair G Kergon 	/*
134b4b610f6SAlasdair G Kergon 	 * The primary pending_exception is the one that holds
1354b832e8dSAlasdair G Kergon 	 * the ref_count and the list of origin_bios for a
136b4b610f6SAlasdair G Kergon 	 * group of pending_exceptions.  It is always last to get freed.
137b4b610f6SAlasdair G Kergon 	 * These fields get set up when writing to the origin.
1381da177e4SLinus Torvalds 	 */
139028867acSAlasdair G Kergon 	struct dm_snap_pending_exception *primary_pe;
140b4b610f6SAlasdair G Kergon 
141b4b610f6SAlasdair G Kergon 	/*
142b4b610f6SAlasdair G Kergon 	 * Number of pending_exceptions processing this chunk.
143b4b610f6SAlasdair G Kergon 	 * When this drops to zero we must complete the origin bios.
144b4b610f6SAlasdair G Kergon 	 * If incrementing or decrementing this, hold pe->snap->lock for
145b4b610f6SAlasdair G Kergon 	 * the sibling concerned and not pe->primary_pe->snap->lock unless
146b4b610f6SAlasdair G Kergon 	 * they are the same.
147b4b610f6SAlasdair G Kergon 	 */
1484b832e8dSAlasdair G Kergon 	atomic_t ref_count;
1491da177e4SLinus Torvalds 
1501da177e4SLinus Torvalds 	/* Pointer back to snapshot context */
1511da177e4SLinus Torvalds 	struct dm_snapshot *snap;
1521da177e4SLinus Torvalds 
1531da177e4SLinus Torvalds 	/*
1541da177e4SLinus Torvalds 	 * 1 indicates the exception has already been sent to
1551da177e4SLinus Torvalds 	 * kcopyd.
1561da177e4SLinus Torvalds 	 */
1571da177e4SLinus Torvalds 	int started;
1581da177e4SLinus Torvalds };
1591da177e4SLinus Torvalds 
1601da177e4SLinus Torvalds /*
1611da177e4SLinus Torvalds  * Hash table mapping origin volumes to lists of snapshots and
1621da177e4SLinus Torvalds  * a lock to protect it
1631da177e4SLinus Torvalds  */
164e18b890bSChristoph Lameter static struct kmem_cache *exception_cache;
165e18b890bSChristoph Lameter static struct kmem_cache *pending_cache;
1661da177e4SLinus Torvalds 
167cd45daffSMikulas Patocka struct dm_snap_tracked_chunk {
168cd45daffSMikulas Patocka 	struct hlist_node node;
169cd45daffSMikulas Patocka 	chunk_t chunk;
170cd45daffSMikulas Patocka };
171cd45daffSMikulas Patocka 
172cd45daffSMikulas Patocka static struct kmem_cache *tracked_chunk_cache;
173cd45daffSMikulas Patocka 
174cd45daffSMikulas Patocka static struct dm_snap_tracked_chunk *track_chunk(struct dm_snapshot *s,
175cd45daffSMikulas Patocka 						 chunk_t chunk)
176cd45daffSMikulas Patocka {
177cd45daffSMikulas Patocka 	struct dm_snap_tracked_chunk *c = mempool_alloc(s->tracked_chunk_pool,
178cd45daffSMikulas Patocka 							GFP_NOIO);
179cd45daffSMikulas Patocka 	unsigned long flags;
180cd45daffSMikulas Patocka 
181cd45daffSMikulas Patocka 	c->chunk = chunk;
182cd45daffSMikulas Patocka 
183cd45daffSMikulas Patocka 	spin_lock_irqsave(&s->tracked_chunk_lock, flags);
184cd45daffSMikulas Patocka 	hlist_add_head(&c->node,
185cd45daffSMikulas Patocka 		       &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)]);
186cd45daffSMikulas Patocka 	spin_unlock_irqrestore(&s->tracked_chunk_lock, flags);
187cd45daffSMikulas Patocka 
188cd45daffSMikulas Patocka 	return c;
189cd45daffSMikulas Patocka }
190cd45daffSMikulas Patocka 
191cd45daffSMikulas Patocka static void stop_tracking_chunk(struct dm_snapshot *s,
192cd45daffSMikulas Patocka 				struct dm_snap_tracked_chunk *c)
193cd45daffSMikulas Patocka {
194cd45daffSMikulas Patocka 	unsigned long flags;
195cd45daffSMikulas Patocka 
196cd45daffSMikulas Patocka 	spin_lock_irqsave(&s->tracked_chunk_lock, flags);
197cd45daffSMikulas Patocka 	hlist_del(&c->node);
198cd45daffSMikulas Patocka 	spin_unlock_irqrestore(&s->tracked_chunk_lock, flags);
199cd45daffSMikulas Patocka 
200cd45daffSMikulas Patocka 	mempool_free(c, s->tracked_chunk_pool);
201cd45daffSMikulas Patocka }
202cd45daffSMikulas Patocka 
203a8d41b59SMikulas Patocka static int __chunk_is_tracked(struct dm_snapshot *s, chunk_t chunk)
204a8d41b59SMikulas Patocka {
205a8d41b59SMikulas Patocka 	struct dm_snap_tracked_chunk *c;
206a8d41b59SMikulas Patocka 	struct hlist_node *hn;
207a8d41b59SMikulas Patocka 	int found = 0;
208a8d41b59SMikulas Patocka 
209a8d41b59SMikulas Patocka 	spin_lock_irq(&s->tracked_chunk_lock);
210a8d41b59SMikulas Patocka 
211a8d41b59SMikulas Patocka 	hlist_for_each_entry(c, hn,
212a8d41b59SMikulas Patocka 	    &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)], node) {
213a8d41b59SMikulas Patocka 		if (c->chunk == chunk) {
214a8d41b59SMikulas Patocka 			found = 1;
215a8d41b59SMikulas Patocka 			break;
216a8d41b59SMikulas Patocka 		}
217a8d41b59SMikulas Patocka 	}
218a8d41b59SMikulas Patocka 
219a8d41b59SMikulas Patocka 	spin_unlock_irq(&s->tracked_chunk_lock);
220a8d41b59SMikulas Patocka 
221a8d41b59SMikulas Patocka 	return found;
222a8d41b59SMikulas Patocka }
223a8d41b59SMikulas Patocka 
2241da177e4SLinus Torvalds /*
2251da177e4SLinus Torvalds  * One of these per registered origin, held in the snapshot_origins hash
2261da177e4SLinus Torvalds  */
2271da177e4SLinus Torvalds struct origin {
2281da177e4SLinus Torvalds 	/* The origin device */
2291da177e4SLinus Torvalds 	struct block_device *bdev;
2301da177e4SLinus Torvalds 
2311da177e4SLinus Torvalds 	struct list_head hash_list;
2321da177e4SLinus Torvalds 
2331da177e4SLinus Torvalds 	/* List of snapshots for this origin */
2341da177e4SLinus Torvalds 	struct list_head snapshots;
2351da177e4SLinus Torvalds };
2361da177e4SLinus Torvalds 
2371da177e4SLinus Torvalds /*
2381da177e4SLinus Torvalds  * Size of the hash table for origin volumes. If we make this
2391da177e4SLinus Torvalds  * the size of the minors list then it should be nearly perfect
2401da177e4SLinus Torvalds  */
2411da177e4SLinus Torvalds #define ORIGIN_HASH_SIZE 256
2421da177e4SLinus Torvalds #define ORIGIN_MASK      0xFF
2431da177e4SLinus Torvalds static struct list_head *_origins;
2441da177e4SLinus Torvalds static struct rw_semaphore _origins_lock;
2451da177e4SLinus Torvalds 
2461da177e4SLinus Torvalds static int init_origin_hash(void)
2471da177e4SLinus Torvalds {
2481da177e4SLinus Torvalds 	int i;
2491da177e4SLinus Torvalds 
2501da177e4SLinus Torvalds 	_origins = kmalloc(ORIGIN_HASH_SIZE * sizeof(struct list_head),
2511da177e4SLinus Torvalds 			   GFP_KERNEL);
2521da177e4SLinus Torvalds 	if (!_origins) {
25372d94861SAlasdair G Kergon 		DMERR("unable to allocate memory");
2541da177e4SLinus Torvalds 		return -ENOMEM;
2551da177e4SLinus Torvalds 	}
2561da177e4SLinus Torvalds 
2571da177e4SLinus Torvalds 	for (i = 0; i < ORIGIN_HASH_SIZE; i++)
2581da177e4SLinus Torvalds 		INIT_LIST_HEAD(_origins + i);
2591da177e4SLinus Torvalds 	init_rwsem(&_origins_lock);
2601da177e4SLinus Torvalds 
2611da177e4SLinus Torvalds 	return 0;
2621da177e4SLinus Torvalds }
2631da177e4SLinus Torvalds 
2641da177e4SLinus Torvalds static void exit_origin_hash(void)
2651da177e4SLinus Torvalds {
2661da177e4SLinus Torvalds 	kfree(_origins);
2671da177e4SLinus Torvalds }
2681da177e4SLinus Torvalds 
269028867acSAlasdair G Kergon static unsigned origin_hash(struct block_device *bdev)
2701da177e4SLinus Torvalds {
2711da177e4SLinus Torvalds 	return bdev->bd_dev & ORIGIN_MASK;
2721da177e4SLinus Torvalds }
2731da177e4SLinus Torvalds 
2741da177e4SLinus Torvalds static struct origin *__lookup_origin(struct block_device *origin)
2751da177e4SLinus Torvalds {
2761da177e4SLinus Torvalds 	struct list_head *ol;
2771da177e4SLinus Torvalds 	struct origin *o;
2781da177e4SLinus Torvalds 
2791da177e4SLinus Torvalds 	ol = &_origins[origin_hash(origin)];
2801da177e4SLinus Torvalds 	list_for_each_entry (o, ol, hash_list)
2811da177e4SLinus Torvalds 		if (bdev_equal(o->bdev, origin))
2821da177e4SLinus Torvalds 			return o;
2831da177e4SLinus Torvalds 
2841da177e4SLinus Torvalds 	return NULL;
2851da177e4SLinus Torvalds }
2861da177e4SLinus Torvalds 
2871da177e4SLinus Torvalds static void __insert_origin(struct origin *o)
2881da177e4SLinus Torvalds {
2891da177e4SLinus Torvalds 	struct list_head *sl = &_origins[origin_hash(o->bdev)];
2901da177e4SLinus Torvalds 	list_add_tail(&o->hash_list, sl);
2911da177e4SLinus Torvalds }
2921da177e4SLinus Torvalds 
2931da177e4SLinus Torvalds /*
2941da177e4SLinus Torvalds  * Make a note of the snapshot and its origin so we can look it
2951da177e4SLinus Torvalds  * up when the origin has a write on it.
2961da177e4SLinus Torvalds  */
2971da177e4SLinus Torvalds static int register_snapshot(struct dm_snapshot *snap)
2981da177e4SLinus Torvalds {
2996d45d93eSMikulas Patocka 	struct dm_snapshot *l;
30060c856c8SMikulas Patocka 	struct origin *o, *new_o;
3011da177e4SLinus Torvalds 	struct block_device *bdev = snap->origin->bdev;
3021da177e4SLinus Torvalds 
30360c856c8SMikulas Patocka 	new_o = kmalloc(sizeof(*new_o), GFP_KERNEL);
30460c856c8SMikulas Patocka 	if (!new_o)
30560c856c8SMikulas Patocka 		return -ENOMEM;
30660c856c8SMikulas Patocka 
3071da177e4SLinus Torvalds 	down_write(&_origins_lock);
3081da177e4SLinus Torvalds 	o = __lookup_origin(bdev);
3091da177e4SLinus Torvalds 
31060c856c8SMikulas Patocka 	if (o)
31160c856c8SMikulas Patocka 		kfree(new_o);
31260c856c8SMikulas Patocka 	else {
3131da177e4SLinus Torvalds 		/* New origin */
31460c856c8SMikulas Patocka 		o = new_o;
3151da177e4SLinus Torvalds 
3161da177e4SLinus Torvalds 		/* Initialise the struct */
3171da177e4SLinus Torvalds 		INIT_LIST_HEAD(&o->snapshots);
3181da177e4SLinus Torvalds 		o->bdev = bdev;
3191da177e4SLinus Torvalds 
3201da177e4SLinus Torvalds 		__insert_origin(o);
3211da177e4SLinus Torvalds 	}
3221da177e4SLinus Torvalds 
3236d45d93eSMikulas Patocka 	/* Sort the list according to chunk size, largest-first smallest-last */
3246d45d93eSMikulas Patocka 	list_for_each_entry(l, &o->snapshots, list)
3256d45d93eSMikulas Patocka 		if (l->store->chunk_size < snap->store->chunk_size)
3266d45d93eSMikulas Patocka 			break;
3276d45d93eSMikulas Patocka 	list_add_tail(&snap->list, &l->list);
3281da177e4SLinus Torvalds 
3291da177e4SLinus Torvalds 	up_write(&_origins_lock);
3301da177e4SLinus Torvalds 	return 0;
3311da177e4SLinus Torvalds }
3321da177e4SLinus Torvalds 
3331da177e4SLinus Torvalds static void unregister_snapshot(struct dm_snapshot *s)
3341da177e4SLinus Torvalds {
3351da177e4SLinus Torvalds 	struct origin *o;
3361da177e4SLinus Torvalds 
3371da177e4SLinus Torvalds 	down_write(&_origins_lock);
3381da177e4SLinus Torvalds 	o = __lookup_origin(s->origin->bdev);
3391da177e4SLinus Torvalds 
3401da177e4SLinus Torvalds 	list_del(&s->list);
3411da177e4SLinus Torvalds 	if (list_empty(&o->snapshots)) {
3421da177e4SLinus Torvalds 		list_del(&o->hash_list);
3431da177e4SLinus Torvalds 		kfree(o);
3441da177e4SLinus Torvalds 	}
3451da177e4SLinus Torvalds 
3461da177e4SLinus Torvalds 	up_write(&_origins_lock);
3471da177e4SLinus Torvalds }
3481da177e4SLinus Torvalds 
3491da177e4SLinus Torvalds /*
3501da177e4SLinus Torvalds  * Implementation of the exception hash tables.
351d74f81f8SMilan Broz  * The lowest hash_shift bits of the chunk number are ignored, allowing
352d74f81f8SMilan Broz  * some consecutive chunks to be grouped together.
3531da177e4SLinus Torvalds  */
354d74f81f8SMilan Broz static int init_exception_table(struct exception_table *et, uint32_t size,
355d74f81f8SMilan Broz 				unsigned hash_shift)
3561da177e4SLinus Torvalds {
3571da177e4SLinus Torvalds 	unsigned int i;
3581da177e4SLinus Torvalds 
359d74f81f8SMilan Broz 	et->hash_shift = hash_shift;
3601da177e4SLinus Torvalds 	et->hash_mask = size - 1;
3611da177e4SLinus Torvalds 	et->table = dm_vcalloc(size, sizeof(struct list_head));
3621da177e4SLinus Torvalds 	if (!et->table)
3631da177e4SLinus Torvalds 		return -ENOMEM;
3641da177e4SLinus Torvalds 
3651da177e4SLinus Torvalds 	for (i = 0; i < size; i++)
3661da177e4SLinus Torvalds 		INIT_LIST_HEAD(et->table + i);
3671da177e4SLinus Torvalds 
3681da177e4SLinus Torvalds 	return 0;
3691da177e4SLinus Torvalds }
3701da177e4SLinus Torvalds 
371e18b890bSChristoph Lameter static void exit_exception_table(struct exception_table *et, struct kmem_cache *mem)
3721da177e4SLinus Torvalds {
3731da177e4SLinus Torvalds 	struct list_head *slot;
374028867acSAlasdair G Kergon 	struct dm_snap_exception *ex, *next;
3751da177e4SLinus Torvalds 	int i, size;
3761da177e4SLinus Torvalds 
3771da177e4SLinus Torvalds 	size = et->hash_mask + 1;
3781da177e4SLinus Torvalds 	for (i = 0; i < size; i++) {
3791da177e4SLinus Torvalds 		slot = et->table + i;
3801da177e4SLinus Torvalds 
3811da177e4SLinus Torvalds 		list_for_each_entry_safe (ex, next, slot, hash_list)
3821da177e4SLinus Torvalds 			kmem_cache_free(mem, ex);
3831da177e4SLinus Torvalds 	}
3841da177e4SLinus Torvalds 
3851da177e4SLinus Torvalds 	vfree(et->table);
3861da177e4SLinus Torvalds }
3871da177e4SLinus Torvalds 
388028867acSAlasdair G Kergon static uint32_t exception_hash(struct exception_table *et, chunk_t chunk)
3891da177e4SLinus Torvalds {
390d74f81f8SMilan Broz 	return (chunk >> et->hash_shift) & et->hash_mask;
3911da177e4SLinus Torvalds }
3921da177e4SLinus Torvalds 
393028867acSAlasdair G Kergon static void insert_exception(struct exception_table *eh,
394028867acSAlasdair G Kergon 			     struct dm_snap_exception *e)
3951da177e4SLinus Torvalds {
3961da177e4SLinus Torvalds 	struct list_head *l = &eh->table[exception_hash(eh, e->old_chunk)];
3971da177e4SLinus Torvalds 	list_add(&e->hash_list, l);
3981da177e4SLinus Torvalds }
3991da177e4SLinus Torvalds 
400028867acSAlasdair G Kergon static void remove_exception(struct dm_snap_exception *e)
4011da177e4SLinus Torvalds {
4021da177e4SLinus Torvalds 	list_del(&e->hash_list);
4031da177e4SLinus Torvalds }
4041da177e4SLinus Torvalds 
4051da177e4SLinus Torvalds /*
4061da177e4SLinus Torvalds  * Return the exception data for a sector, or NULL if not
4071da177e4SLinus Torvalds  * remapped.
4081da177e4SLinus Torvalds  */
409028867acSAlasdair G Kergon static struct dm_snap_exception *lookup_exception(struct exception_table *et,
4101da177e4SLinus Torvalds 						  chunk_t chunk)
4111da177e4SLinus Torvalds {
4121da177e4SLinus Torvalds 	struct list_head *slot;
413028867acSAlasdair G Kergon 	struct dm_snap_exception *e;
4141da177e4SLinus Torvalds 
4151da177e4SLinus Torvalds 	slot = &et->table[exception_hash(et, chunk)];
4161da177e4SLinus Torvalds 	list_for_each_entry (e, slot, hash_list)
417d74f81f8SMilan Broz 		if (chunk >= e->old_chunk &&
418d74f81f8SMilan Broz 		    chunk <= e->old_chunk + dm_consecutive_chunk_count(e))
4191da177e4SLinus Torvalds 			return e;
4201da177e4SLinus Torvalds 
4211da177e4SLinus Torvalds 	return NULL;
4221da177e4SLinus Torvalds }
4231da177e4SLinus Torvalds 
424028867acSAlasdair G Kergon static struct dm_snap_exception *alloc_exception(void)
4251da177e4SLinus Torvalds {
426028867acSAlasdair G Kergon 	struct dm_snap_exception *e;
4271da177e4SLinus Torvalds 
4281da177e4SLinus Torvalds 	e = kmem_cache_alloc(exception_cache, GFP_NOIO);
4291da177e4SLinus Torvalds 	if (!e)
4301da177e4SLinus Torvalds 		e = kmem_cache_alloc(exception_cache, GFP_ATOMIC);
4311da177e4SLinus Torvalds 
4321da177e4SLinus Torvalds 	return e;
4331da177e4SLinus Torvalds }
4341da177e4SLinus Torvalds 
435028867acSAlasdair G Kergon static void free_exception(struct dm_snap_exception *e)
4361da177e4SLinus Torvalds {
4371da177e4SLinus Torvalds 	kmem_cache_free(exception_cache, e);
4381da177e4SLinus Torvalds }
4391da177e4SLinus Torvalds 
44092e86812SMikulas Patocka static struct dm_snap_pending_exception *alloc_pending_exception(struct dm_snapshot *s)
4411da177e4SLinus Torvalds {
44292e86812SMikulas Patocka 	struct dm_snap_pending_exception *pe = mempool_alloc(s->pending_pool,
44392e86812SMikulas Patocka 							     GFP_NOIO);
44492e86812SMikulas Patocka 
445879129d2SMikulas Patocka 	atomic_inc(&s->pending_exceptions_count);
44692e86812SMikulas Patocka 	pe->snap = s;
44792e86812SMikulas Patocka 
44892e86812SMikulas Patocka 	return pe;
4491da177e4SLinus Torvalds }
4501da177e4SLinus Torvalds 
451028867acSAlasdair G Kergon static void free_pending_exception(struct dm_snap_pending_exception *pe)
4521da177e4SLinus Torvalds {
453879129d2SMikulas Patocka 	struct dm_snapshot *s = pe->snap;
454879129d2SMikulas Patocka 
455879129d2SMikulas Patocka 	mempool_free(pe, s->pending_pool);
456879129d2SMikulas Patocka 	smp_mb__before_atomic_dec();
457879129d2SMikulas Patocka 	atomic_dec(&s->pending_exceptions_count);
4581da177e4SLinus Torvalds }
4591da177e4SLinus Torvalds 
460d74f81f8SMilan Broz static void insert_completed_exception(struct dm_snapshot *s,
461d74f81f8SMilan Broz 				       struct dm_snap_exception *new_e)
462d74f81f8SMilan Broz {
463d74f81f8SMilan Broz 	struct exception_table *eh = &s->complete;
464d74f81f8SMilan Broz 	struct list_head *l;
465d74f81f8SMilan Broz 	struct dm_snap_exception *e = NULL;
466d74f81f8SMilan Broz 
467d74f81f8SMilan Broz 	l = &eh->table[exception_hash(eh, new_e->old_chunk)];
468d74f81f8SMilan Broz 
469d74f81f8SMilan Broz 	/* Add immediately if this table doesn't support consecutive chunks */
470d74f81f8SMilan Broz 	if (!eh->hash_shift)
471d74f81f8SMilan Broz 		goto out;
472d74f81f8SMilan Broz 
473d74f81f8SMilan Broz 	/* List is ordered by old_chunk */
474d74f81f8SMilan Broz 	list_for_each_entry_reverse(e, l, hash_list) {
475d74f81f8SMilan Broz 		/* Insert after an existing chunk? */
476d74f81f8SMilan Broz 		if (new_e->old_chunk == (e->old_chunk +
477d74f81f8SMilan Broz 					 dm_consecutive_chunk_count(e) + 1) &&
478d74f81f8SMilan Broz 		    new_e->new_chunk == (dm_chunk_number(e->new_chunk) +
479d74f81f8SMilan Broz 					 dm_consecutive_chunk_count(e) + 1)) {
480d74f81f8SMilan Broz 			dm_consecutive_chunk_count_inc(e);
481d74f81f8SMilan Broz 			free_exception(new_e);
482d74f81f8SMilan Broz 			return;
483d74f81f8SMilan Broz 		}
484d74f81f8SMilan Broz 
485d74f81f8SMilan Broz 		/* Insert before an existing chunk? */
486d74f81f8SMilan Broz 		if (new_e->old_chunk == (e->old_chunk - 1) &&
487d74f81f8SMilan Broz 		    new_e->new_chunk == (dm_chunk_number(e->new_chunk) - 1)) {
488d74f81f8SMilan Broz 			dm_consecutive_chunk_count_inc(e);
489d74f81f8SMilan Broz 			e->old_chunk--;
490d74f81f8SMilan Broz 			e->new_chunk--;
491d74f81f8SMilan Broz 			free_exception(new_e);
492d74f81f8SMilan Broz 			return;
493d74f81f8SMilan Broz 		}
494d74f81f8SMilan Broz 
495d74f81f8SMilan Broz 		if (new_e->old_chunk > e->old_chunk)
496d74f81f8SMilan Broz 			break;
497d74f81f8SMilan Broz 	}
498d74f81f8SMilan Broz 
499d74f81f8SMilan Broz out:
500d74f81f8SMilan Broz 	list_add(&new_e->hash_list, e ? &e->hash_list : l);
501d74f81f8SMilan Broz }
502d74f81f8SMilan Broz 
503a159c1acSJonathan Brassow /*
504a159c1acSJonathan Brassow  * Callback used by the exception stores to load exceptions when
505a159c1acSJonathan Brassow  * initialising.
506a159c1acSJonathan Brassow  */
507a159c1acSJonathan Brassow static int dm_add_exception(void *context, chunk_t old, chunk_t new)
5081da177e4SLinus Torvalds {
509a159c1acSJonathan Brassow 	struct dm_snapshot *s = context;
510028867acSAlasdair G Kergon 	struct dm_snap_exception *e;
5111da177e4SLinus Torvalds 
5121da177e4SLinus Torvalds 	e = alloc_exception();
5131da177e4SLinus Torvalds 	if (!e)
5141da177e4SLinus Torvalds 		return -ENOMEM;
5151da177e4SLinus Torvalds 
5161da177e4SLinus Torvalds 	e->old_chunk = old;
517d74f81f8SMilan Broz 
518d74f81f8SMilan Broz 	/* Consecutive_count is implicitly initialised to zero */
5191da177e4SLinus Torvalds 	e->new_chunk = new;
520d74f81f8SMilan Broz 
521d74f81f8SMilan Broz 	insert_completed_exception(s, e);
522d74f81f8SMilan Broz 
5231da177e4SLinus Torvalds 	return 0;
5241da177e4SLinus Torvalds }
5251da177e4SLinus Torvalds 
5267e201b35SMikulas Patocka #define min_not_zero(l, r) (((l) == 0) ? (r) : (((r) == 0) ? (l) : min(l, r)))
5277e201b35SMikulas Patocka 
5287e201b35SMikulas Patocka /*
5297e201b35SMikulas Patocka  * Return a minimum chunk size of all snapshots that have the specified origin.
5307e201b35SMikulas Patocka  * Return zero if the origin has no snapshots.
5317e201b35SMikulas Patocka  */
5327e201b35SMikulas Patocka static sector_t __minimum_chunk_size(struct origin *o)
5337e201b35SMikulas Patocka {
5347e201b35SMikulas Patocka 	struct dm_snapshot *snap;
5357e201b35SMikulas Patocka 	unsigned chunk_size = 0;
5367e201b35SMikulas Patocka 
5377e201b35SMikulas Patocka 	if (o)
5387e201b35SMikulas Patocka 		list_for_each_entry(snap, &o->snapshots, list)
5397e201b35SMikulas Patocka 			chunk_size = min_not_zero(chunk_size,
5407e201b35SMikulas Patocka 						  snap->store->chunk_size);
5417e201b35SMikulas Patocka 
5427e201b35SMikulas Patocka 	return chunk_size;
5437e201b35SMikulas Patocka }
5447e201b35SMikulas Patocka 
5451da177e4SLinus Torvalds /*
5461da177e4SLinus Torvalds  * Hard coded magic.
5471da177e4SLinus Torvalds  */
5481da177e4SLinus Torvalds static int calc_max_buckets(void)
5491da177e4SLinus Torvalds {
5501da177e4SLinus Torvalds 	/* use a fixed size of 2MB */
5511da177e4SLinus Torvalds 	unsigned long mem = 2 * 1024 * 1024;
5521da177e4SLinus Torvalds 	mem /= sizeof(struct list_head);
5531da177e4SLinus Torvalds 
5541da177e4SLinus Torvalds 	return mem;
5551da177e4SLinus Torvalds }
5561da177e4SLinus Torvalds 
5571da177e4SLinus Torvalds /*
5581da177e4SLinus Torvalds  * Allocate room for a suitable hash table.
5591da177e4SLinus Torvalds  */
560fee1998eSJonathan Brassow static int init_hash_tables(struct dm_snapshot *s)
5611da177e4SLinus Torvalds {
5621da177e4SLinus Torvalds 	sector_t hash_size, cow_dev_size, origin_dev_size, max_buckets;
5631da177e4SLinus Torvalds 
5641da177e4SLinus Torvalds 	/*
5651da177e4SLinus Torvalds 	 * Calculate based on the size of the original volume or
5661da177e4SLinus Torvalds 	 * the COW volume...
5671da177e4SLinus Torvalds 	 */
568fee1998eSJonathan Brassow 	cow_dev_size = get_dev_size(s->store->cow->bdev);
5691da177e4SLinus Torvalds 	origin_dev_size = get_dev_size(s->origin->bdev);
5701da177e4SLinus Torvalds 	max_buckets = calc_max_buckets();
5711da177e4SLinus Torvalds 
572fee1998eSJonathan Brassow 	hash_size = min(origin_dev_size, cow_dev_size) >> s->store->chunk_shift;
5731da177e4SLinus Torvalds 	hash_size = min(hash_size, max_buckets);
5741da177e4SLinus Torvalds 
5758e87b9b8SMikulas Patocka 	if (hash_size < 64)
5768e87b9b8SMikulas Patocka 		hash_size = 64;
5778defd830SRobert P. J. Day 	hash_size = rounddown_pow_of_two(hash_size);
578d74f81f8SMilan Broz 	if (init_exception_table(&s->complete, hash_size,
579d74f81f8SMilan Broz 				 DM_CHUNK_CONSECUTIVE_BITS))
5801da177e4SLinus Torvalds 		return -ENOMEM;
5811da177e4SLinus Torvalds 
5821da177e4SLinus Torvalds 	/*
5831da177e4SLinus Torvalds 	 * Allocate hash table for in-flight exceptions
5841da177e4SLinus Torvalds 	 * Make this smaller than the real hash table
5851da177e4SLinus Torvalds 	 */
5861da177e4SLinus Torvalds 	hash_size >>= 3;
5871da177e4SLinus Torvalds 	if (hash_size < 64)
5881da177e4SLinus Torvalds 		hash_size = 64;
5891da177e4SLinus Torvalds 
590d74f81f8SMilan Broz 	if (init_exception_table(&s->pending, hash_size, 0)) {
5911da177e4SLinus Torvalds 		exit_exception_table(&s->complete, exception_cache);
5921da177e4SLinus Torvalds 		return -ENOMEM;
5931da177e4SLinus Torvalds 	}
5941da177e4SLinus Torvalds 
5951da177e4SLinus Torvalds 	return 0;
5961da177e4SLinus Torvalds }
5971da177e4SLinus Torvalds 
5981da177e4SLinus Torvalds /*
5991da177e4SLinus Torvalds  * Construct a snapshot mapping: <origin_dev> <COW-dev> <p/n> <chunk-size>
6001da177e4SLinus Torvalds  */
6011da177e4SLinus Torvalds static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
6021da177e4SLinus Torvalds {
6031da177e4SLinus Torvalds 	struct dm_snapshot *s;
604cd45daffSMikulas Patocka 	int i;
6051da177e4SLinus Torvalds 	int r = -EINVAL;
6061da177e4SLinus Torvalds 	char *origin_path;
607fee1998eSJonathan Brassow 	struct dm_exception_store *store;
608fee1998eSJonathan Brassow 	unsigned args_used;
6091da177e4SLinus Torvalds 
6104c7e3bf4SMark McLoughlin 	if (argc != 4) {
61172d94861SAlasdair G Kergon 		ti->error = "requires exactly 4 arguments";
6121da177e4SLinus Torvalds 		r = -EINVAL;
613fee1998eSJonathan Brassow 		goto bad_args;
6141da177e4SLinus Torvalds 	}
6151da177e4SLinus Torvalds 
6161da177e4SLinus Torvalds 	origin_path = argv[0];
617fee1998eSJonathan Brassow 	argv++;
618fee1998eSJonathan Brassow 	argc--;
6191da177e4SLinus Torvalds 
620fee1998eSJonathan Brassow 	r = dm_exception_store_create(ti, argc, argv, &args_used, &store);
621fee1998eSJonathan Brassow 	if (r) {
622fee1998eSJonathan Brassow 		ti->error = "Couldn't create exception store";
6231da177e4SLinus Torvalds 		r = -EINVAL;
624fee1998eSJonathan Brassow 		goto bad_args;
6251da177e4SLinus Torvalds 	}
6261da177e4SLinus Torvalds 
627fee1998eSJonathan Brassow 	argv += args_used;
628fee1998eSJonathan Brassow 	argc -= args_used;
629fee1998eSJonathan Brassow 
6301da177e4SLinus Torvalds 	s = kmalloc(sizeof(*s), GFP_KERNEL);
631fee1998eSJonathan Brassow 	if (!s) {
6321da177e4SLinus Torvalds 		ti->error = "Cannot allocate snapshot context private "
6331da177e4SLinus Torvalds 		    "structure";
6341da177e4SLinus Torvalds 		r = -ENOMEM;
635fee1998eSJonathan Brassow 		goto bad_snap;
6361da177e4SLinus Torvalds 	}
6371da177e4SLinus Torvalds 
6381da177e4SLinus Torvalds 	r = dm_get_device(ti, origin_path, 0, ti->len, FMODE_READ, &s->origin);
6391da177e4SLinus Torvalds 	if (r) {
6401da177e4SLinus Torvalds 		ti->error = "Cannot get origin device";
641fee1998eSJonathan Brassow 		goto bad_origin;
6421da177e4SLinus Torvalds 	}
6431da177e4SLinus Torvalds 
644fee1998eSJonathan Brassow 	s->store = store;
6451da177e4SLinus Torvalds 	s->valid = 1;
646aa14edebSAlasdair G Kergon 	s->active = 0;
647879129d2SMikulas Patocka 	atomic_set(&s->pending_exceptions_count, 0);
6481da177e4SLinus Torvalds 	init_rwsem(&s->lock);
649ca3a931fSAlasdair G Kergon 	spin_lock_init(&s->pe_lock);
6501da177e4SLinus Torvalds 
6511da177e4SLinus Torvalds 	/* Allocate hash table for COW data */
652fee1998eSJonathan Brassow 	if (init_hash_tables(s)) {
6531da177e4SLinus Torvalds 		ti->error = "Unable to allocate hash table space";
6541da177e4SLinus Torvalds 		r = -ENOMEM;
655fee1998eSJonathan Brassow 		goto bad_hash_tables;
6561da177e4SLinus Torvalds 	}
6571da177e4SLinus Torvalds 
658eb69aca5SHeinz Mauelshagen 	r = dm_kcopyd_client_create(SNAPSHOT_PAGES, &s->kcopyd_client);
6591da177e4SLinus Torvalds 	if (r) {
6601da177e4SLinus Torvalds 		ti->error = "Could not create kcopyd client";
661fee1998eSJonathan Brassow 		goto bad_kcopyd;
6621da177e4SLinus Torvalds 	}
6631da177e4SLinus Torvalds 
66492e86812SMikulas Patocka 	s->pending_pool = mempool_create_slab_pool(MIN_IOS, pending_cache);
66592e86812SMikulas Patocka 	if (!s->pending_pool) {
66692e86812SMikulas Patocka 		ti->error = "Could not allocate mempool for pending exceptions";
667fee1998eSJonathan Brassow 		goto bad_pending_pool;
66892e86812SMikulas Patocka 	}
66992e86812SMikulas Patocka 
670cd45daffSMikulas Patocka 	s->tracked_chunk_pool = mempool_create_slab_pool(MIN_IOS,
671cd45daffSMikulas Patocka 							 tracked_chunk_cache);
672cd45daffSMikulas Patocka 	if (!s->tracked_chunk_pool) {
673cd45daffSMikulas Patocka 		ti->error = "Could not allocate tracked_chunk mempool for "
674cd45daffSMikulas Patocka 			    "tracking reads";
67592e86812SMikulas Patocka 		goto bad_tracked_chunk_pool;
676cd45daffSMikulas Patocka 	}
677cd45daffSMikulas Patocka 
678cd45daffSMikulas Patocka 	for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++)
679cd45daffSMikulas Patocka 		INIT_HLIST_HEAD(&s->tracked_chunk_hash[i]);
680cd45daffSMikulas Patocka 
681cd45daffSMikulas Patocka 	spin_lock_init(&s->tracked_chunk_lock);
682cd45daffSMikulas Patocka 
683aa14edebSAlasdair G Kergon 	/* Metadata must only be loaded into one table at once */
684493df71cSJonathan Brassow 	r = s->store->type->read_metadata(s->store, dm_add_exception,
685493df71cSJonathan Brassow 					  (void *)s);
6860764147bSMilan Broz 	if (r < 0) {
687f9cea4f7SMark McLoughlin 		ti->error = "Failed to read snapshot metadata";
688cd45daffSMikulas Patocka 		goto bad_load_and_register;
6890764147bSMilan Broz 	} else if (r > 0) {
6900764147bSMilan Broz 		s->valid = 0;
6910764147bSMilan Broz 		DMWARN("Snapshot is marked invalid.");
692f9cea4f7SMark McLoughlin 	}
693aa14edebSAlasdair G Kergon 
694ca3a931fSAlasdair G Kergon 	bio_list_init(&s->queued_bios);
695c4028958SDavid Howells 	INIT_WORK(&s->queued_bios_work, flush_queued_bios);
696ca3a931fSAlasdair G Kergon 
6973f2412dcSMikulas Patocka 	if (!s->store->chunk_size) {
6983f2412dcSMikulas Patocka 		ti->error = "Chunk size not set";
6993f2412dcSMikulas Patocka 		goto bad_load_and_register;
7003f2412dcSMikulas Patocka 	}
7013f2412dcSMikulas Patocka 
7021da177e4SLinus Torvalds 	/* Add snapshot to the list of snapshots for this origin */
703aa14edebSAlasdair G Kergon 	/* Exceptions aren't triggered till snapshot_resume() is called */
7041da177e4SLinus Torvalds 	if (register_snapshot(s)) {
7051da177e4SLinus Torvalds 		r = -EINVAL;
7061da177e4SLinus Torvalds 		ti->error = "Cannot register snapshot origin";
707cd45daffSMikulas Patocka 		goto bad_load_and_register;
7081da177e4SLinus Torvalds 	}
7091da177e4SLinus Torvalds 
7101da177e4SLinus Torvalds 	ti->private = s;
711d0216849SJonathan Brassow 	ti->split_io = s->store->chunk_size;
712494b3ee7SMikulas Patocka 	ti->num_flush_requests = 1;
7131da177e4SLinus Torvalds 
7141da177e4SLinus Torvalds 	return 0;
7151da177e4SLinus Torvalds 
716cd45daffSMikulas Patocka bad_load_and_register:
717cd45daffSMikulas Patocka 	mempool_destroy(s->tracked_chunk_pool);
718cd45daffSMikulas Patocka 
71992e86812SMikulas Patocka bad_tracked_chunk_pool:
72092e86812SMikulas Patocka 	mempool_destroy(s->pending_pool);
72192e86812SMikulas Patocka 
722fee1998eSJonathan Brassow bad_pending_pool:
723eb69aca5SHeinz Mauelshagen 	dm_kcopyd_client_destroy(s->kcopyd_client);
7241da177e4SLinus Torvalds 
725fee1998eSJonathan Brassow bad_kcopyd:
7261da177e4SLinus Torvalds 	exit_exception_table(&s->pending, pending_cache);
7271da177e4SLinus Torvalds 	exit_exception_table(&s->complete, exception_cache);
7281da177e4SLinus Torvalds 
729fee1998eSJonathan Brassow bad_hash_tables:
7301da177e4SLinus Torvalds 	dm_put_device(ti, s->origin);
7311da177e4SLinus Torvalds 
732fee1998eSJonathan Brassow bad_origin:
7331da177e4SLinus Torvalds 	kfree(s);
7341da177e4SLinus Torvalds 
735fee1998eSJonathan Brassow bad_snap:
736fee1998eSJonathan Brassow 	dm_exception_store_destroy(store);
737fee1998eSJonathan Brassow 
738fee1998eSJonathan Brassow bad_args:
7391da177e4SLinus Torvalds 	return r;
7401da177e4SLinus Torvalds }
7411da177e4SLinus Torvalds 
74231c93a0cSMilan Broz static void __free_exceptions(struct dm_snapshot *s)
74331c93a0cSMilan Broz {
744eb69aca5SHeinz Mauelshagen 	dm_kcopyd_client_destroy(s->kcopyd_client);
74531c93a0cSMilan Broz 	s->kcopyd_client = NULL;
74631c93a0cSMilan Broz 
74731c93a0cSMilan Broz 	exit_exception_table(&s->pending, pending_cache);
74831c93a0cSMilan Broz 	exit_exception_table(&s->complete, exception_cache);
74931c93a0cSMilan Broz }
75031c93a0cSMilan Broz 
7511da177e4SLinus Torvalds static void snapshot_dtr(struct dm_target *ti)
7521da177e4SLinus Torvalds {
753cd45daffSMikulas Patocka #ifdef CONFIG_DM_DEBUG
754cd45daffSMikulas Patocka 	int i;
755cd45daffSMikulas Patocka #endif
756028867acSAlasdair G Kergon 	struct dm_snapshot *s = ti->private;
7571da177e4SLinus Torvalds 
758ca3a931fSAlasdair G Kergon 	flush_workqueue(ksnapd);
759ca3a931fSAlasdair G Kergon 
760138728dcSAlasdair G Kergon 	/* Prevent further origin writes from using this snapshot. */
761138728dcSAlasdair G Kergon 	/* After this returns there can be no new kcopyd jobs. */
7621da177e4SLinus Torvalds 	unregister_snapshot(s);
7631da177e4SLinus Torvalds 
764879129d2SMikulas Patocka 	while (atomic_read(&s->pending_exceptions_count))
76590fa1527SMikulas Patocka 		msleep(1);
766879129d2SMikulas Patocka 	/*
767879129d2SMikulas Patocka 	 * Ensure instructions in mempool_destroy aren't reordered
768879129d2SMikulas Patocka 	 * before atomic_read.
769879129d2SMikulas Patocka 	 */
770879129d2SMikulas Patocka 	smp_mb();
771879129d2SMikulas Patocka 
772cd45daffSMikulas Patocka #ifdef CONFIG_DM_DEBUG
773cd45daffSMikulas Patocka 	for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++)
774cd45daffSMikulas Patocka 		BUG_ON(!hlist_empty(&s->tracked_chunk_hash[i]));
775cd45daffSMikulas Patocka #endif
776cd45daffSMikulas Patocka 
777cd45daffSMikulas Patocka 	mempool_destroy(s->tracked_chunk_pool);
778cd45daffSMikulas Patocka 
77931c93a0cSMilan Broz 	__free_exceptions(s);
7801da177e4SLinus Torvalds 
78192e86812SMikulas Patocka 	mempool_destroy(s->pending_pool);
78292e86812SMikulas Patocka 
7831da177e4SLinus Torvalds 	dm_put_device(ti, s->origin);
784fee1998eSJonathan Brassow 
785fee1998eSJonathan Brassow 	dm_exception_store_destroy(s->store);
786138728dcSAlasdair G Kergon 
7871da177e4SLinus Torvalds 	kfree(s);
7881da177e4SLinus Torvalds }
7891da177e4SLinus Torvalds 
7901da177e4SLinus Torvalds /*
7911da177e4SLinus Torvalds  * Flush a list of buffers.
7921da177e4SLinus Torvalds  */
7931da177e4SLinus Torvalds static void flush_bios(struct bio *bio)
7941da177e4SLinus Torvalds {
7951da177e4SLinus Torvalds 	struct bio *n;
7961da177e4SLinus Torvalds 
7971da177e4SLinus Torvalds 	while (bio) {
7981da177e4SLinus Torvalds 		n = bio->bi_next;
7991da177e4SLinus Torvalds 		bio->bi_next = NULL;
8001da177e4SLinus Torvalds 		generic_make_request(bio);
8011da177e4SLinus Torvalds 		bio = n;
8021da177e4SLinus Torvalds 	}
8031da177e4SLinus Torvalds }
8041da177e4SLinus Torvalds 
805c4028958SDavid Howells static void flush_queued_bios(struct work_struct *work)
806ca3a931fSAlasdair G Kergon {
807c4028958SDavid Howells 	struct dm_snapshot *s =
808c4028958SDavid Howells 		container_of(work, struct dm_snapshot, queued_bios_work);
809ca3a931fSAlasdair G Kergon 	struct bio *queued_bios;
810ca3a931fSAlasdair G Kergon 	unsigned long flags;
811ca3a931fSAlasdair G Kergon 
812ca3a931fSAlasdair G Kergon 	spin_lock_irqsave(&s->pe_lock, flags);
813ca3a931fSAlasdair G Kergon 	queued_bios = bio_list_get(&s->queued_bios);
814ca3a931fSAlasdair G Kergon 	spin_unlock_irqrestore(&s->pe_lock, flags);
815ca3a931fSAlasdair G Kergon 
816ca3a931fSAlasdair G Kergon 	flush_bios(queued_bios);
817ca3a931fSAlasdair G Kergon }
818ca3a931fSAlasdair G Kergon 
8191da177e4SLinus Torvalds /*
8201da177e4SLinus Torvalds  * Error a list of buffers.
8211da177e4SLinus Torvalds  */
8221da177e4SLinus Torvalds static void error_bios(struct bio *bio)
8231da177e4SLinus Torvalds {
8241da177e4SLinus Torvalds 	struct bio *n;
8251da177e4SLinus Torvalds 
8261da177e4SLinus Torvalds 	while (bio) {
8271da177e4SLinus Torvalds 		n = bio->bi_next;
8281da177e4SLinus Torvalds 		bio->bi_next = NULL;
8296712ecf8SNeilBrown 		bio_io_error(bio);
8301da177e4SLinus Torvalds 		bio = n;
8311da177e4SLinus Torvalds 	}
8321da177e4SLinus Torvalds }
8331da177e4SLinus Torvalds 
834695368acSAlasdair G Kergon static void __invalidate_snapshot(struct dm_snapshot *s, int err)
83576df1c65SAlasdair G Kergon {
83676df1c65SAlasdair G Kergon 	if (!s->valid)
83776df1c65SAlasdair G Kergon 		return;
83876df1c65SAlasdair G Kergon 
83976df1c65SAlasdair G Kergon 	if (err == -EIO)
84076df1c65SAlasdair G Kergon 		DMERR("Invalidating snapshot: Error reading/writing.");
84176df1c65SAlasdair G Kergon 	else if (err == -ENOMEM)
84276df1c65SAlasdair G Kergon 		DMERR("Invalidating snapshot: Unable to allocate exception.");
84376df1c65SAlasdair G Kergon 
844493df71cSJonathan Brassow 	if (s->store->type->drop_snapshot)
845493df71cSJonathan Brassow 		s->store->type->drop_snapshot(s->store);
84676df1c65SAlasdair G Kergon 
84776df1c65SAlasdair G Kergon 	s->valid = 0;
84876df1c65SAlasdair G Kergon 
8490cea9c78SJonathan Brassow 	dm_table_event(s->store->ti->table);
85076df1c65SAlasdair G Kergon }
85176df1c65SAlasdair G Kergon 
852028867acSAlasdair G Kergon static void get_pending_exception(struct dm_snap_pending_exception *pe)
8534b832e8dSAlasdair G Kergon {
8544b832e8dSAlasdair G Kergon 	atomic_inc(&pe->ref_count);
8554b832e8dSAlasdair G Kergon }
8564b832e8dSAlasdair G Kergon 
857028867acSAlasdair G Kergon static struct bio *put_pending_exception(struct dm_snap_pending_exception *pe)
8584b832e8dSAlasdair G Kergon {
859028867acSAlasdair G Kergon 	struct dm_snap_pending_exception *primary_pe;
8604b832e8dSAlasdair G Kergon 	struct bio *origin_bios = NULL;
8614b832e8dSAlasdair G Kergon 
8624b832e8dSAlasdair G Kergon 	primary_pe = pe->primary_pe;
8634b832e8dSAlasdair G Kergon 
8644b832e8dSAlasdair G Kergon 	/*
8654b832e8dSAlasdair G Kergon 	 * If this pe is involved in a write to the origin and
8664b832e8dSAlasdair G Kergon 	 * it is the last sibling to complete then release
8674b832e8dSAlasdair G Kergon 	 * the bios for the original write to the origin.
8684b832e8dSAlasdair G Kergon 	 */
8694b832e8dSAlasdair G Kergon 	if (primary_pe &&
8707c5f78b9SMikulas Patocka 	    atomic_dec_and_test(&primary_pe->ref_count)) {
8714b832e8dSAlasdair G Kergon 		origin_bios = bio_list_get(&primary_pe->origin_bios);
8727c5f78b9SMikulas Patocka 		free_pending_exception(primary_pe);
8737c5f78b9SMikulas Patocka 	}
8744b832e8dSAlasdair G Kergon 
8754b832e8dSAlasdair G Kergon 	/*
8764b832e8dSAlasdair G Kergon 	 * Free the pe if it's not linked to an origin write or if
8774b832e8dSAlasdair G Kergon 	 * it's not itself a primary pe.
8784b832e8dSAlasdair G Kergon 	 */
8794b832e8dSAlasdair G Kergon 	if (!primary_pe || primary_pe != pe)
8804b832e8dSAlasdair G Kergon 		free_pending_exception(pe);
8814b832e8dSAlasdair G Kergon 
8824b832e8dSAlasdair G Kergon 	return origin_bios;
8834b832e8dSAlasdair G Kergon }
8844b832e8dSAlasdair G Kergon 
885028867acSAlasdair G Kergon static void pending_complete(struct dm_snap_pending_exception *pe, int success)
8861da177e4SLinus Torvalds {
887028867acSAlasdair G Kergon 	struct dm_snap_exception *e;
8881da177e4SLinus Torvalds 	struct dm_snapshot *s = pe->snap;
8899d493fa8SAlasdair G Kergon 	struct bio *origin_bios = NULL;
8909d493fa8SAlasdair G Kergon 	struct bio *snapshot_bios = NULL;
8919d493fa8SAlasdair G Kergon 	int error = 0;
8921da177e4SLinus Torvalds 
89376df1c65SAlasdair G Kergon 	if (!success) {
89476df1c65SAlasdair G Kergon 		/* Read/write error - snapshot is unusable */
8951da177e4SLinus Torvalds 		down_write(&s->lock);
896695368acSAlasdair G Kergon 		__invalidate_snapshot(s, -EIO);
8979d493fa8SAlasdair G Kergon 		error = 1;
89876df1c65SAlasdair G Kergon 		goto out;
89976df1c65SAlasdair G Kergon 	}
90076df1c65SAlasdair G Kergon 
90176df1c65SAlasdair G Kergon 	e = alloc_exception();
90276df1c65SAlasdair G Kergon 	if (!e) {
90376df1c65SAlasdair G Kergon 		down_write(&s->lock);
904695368acSAlasdair G Kergon 		__invalidate_snapshot(s, -ENOMEM);
9059d493fa8SAlasdair G Kergon 		error = 1;
9061da177e4SLinus Torvalds 		goto out;
9071da177e4SLinus Torvalds 	}
9081da177e4SLinus Torvalds 	*e = pe->e;
9091da177e4SLinus Torvalds 
9109d493fa8SAlasdair G Kergon 	down_write(&s->lock);
9119d493fa8SAlasdair G Kergon 	if (!s->valid) {
9129d493fa8SAlasdair G Kergon 		free_exception(e);
9139d493fa8SAlasdair G Kergon 		error = 1;
9149d493fa8SAlasdair G Kergon 		goto out;
9159d493fa8SAlasdair G Kergon 	}
9169d493fa8SAlasdair G Kergon 
9171da177e4SLinus Torvalds 	/*
918a8d41b59SMikulas Patocka 	 * Check for conflicting reads. This is extremely improbable,
91990fa1527SMikulas Patocka 	 * so msleep(1) is sufficient and there is no need for a wait queue.
920a8d41b59SMikulas Patocka 	 */
921a8d41b59SMikulas Patocka 	while (__chunk_is_tracked(s, pe->e.old_chunk))
92290fa1527SMikulas Patocka 		msleep(1);
923a8d41b59SMikulas Patocka 
924a8d41b59SMikulas Patocka 	/*
9251da177e4SLinus Torvalds 	 * Add a proper exception, and remove the
9261da177e4SLinus Torvalds 	 * in-flight exception from the list.
9271da177e4SLinus Torvalds 	 */
928d74f81f8SMilan Broz 	insert_completed_exception(s, e);
9291da177e4SLinus Torvalds 
9301da177e4SLinus Torvalds  out:
931695368acSAlasdair G Kergon 	remove_exception(&pe->e);
9329d493fa8SAlasdair G Kergon 	snapshot_bios = bio_list_get(&pe->snapshot_bios);
9334b832e8dSAlasdair G Kergon 	origin_bios = put_pending_exception(pe);
934b4b610f6SAlasdair G Kergon 
9359d493fa8SAlasdair G Kergon 	up_write(&s->lock);
9369d493fa8SAlasdair G Kergon 
9379d493fa8SAlasdair G Kergon 	/* Submit any pending write bios */
9389d493fa8SAlasdair G Kergon 	if (error)
9399d493fa8SAlasdair G Kergon 		error_bios(snapshot_bios);
9409d493fa8SAlasdair G Kergon 	else
9419d493fa8SAlasdair G Kergon 		flush_bios(snapshot_bios);
9429d493fa8SAlasdair G Kergon 
9439d493fa8SAlasdair G Kergon 	flush_bios(origin_bios);
9441da177e4SLinus Torvalds }
9451da177e4SLinus Torvalds 
9461da177e4SLinus Torvalds static void commit_callback(void *context, int success)
9471da177e4SLinus Torvalds {
948028867acSAlasdair G Kergon 	struct dm_snap_pending_exception *pe = context;
949028867acSAlasdair G Kergon 
9501da177e4SLinus Torvalds 	pending_complete(pe, success);
9511da177e4SLinus Torvalds }
9521da177e4SLinus Torvalds 
9531da177e4SLinus Torvalds /*
9541da177e4SLinus Torvalds  * Called when the copy I/O has finished.  kcopyd actually runs
9551da177e4SLinus Torvalds  * this code so don't block.
9561da177e4SLinus Torvalds  */
9574cdc1d1fSAlasdair G Kergon static void copy_callback(int read_err, unsigned long write_err, void *context)
9581da177e4SLinus Torvalds {
959028867acSAlasdair G Kergon 	struct dm_snap_pending_exception *pe = context;
9601da177e4SLinus Torvalds 	struct dm_snapshot *s = pe->snap;
9611da177e4SLinus Torvalds 
9621da177e4SLinus Torvalds 	if (read_err || write_err)
9631da177e4SLinus Torvalds 		pending_complete(pe, 0);
9641da177e4SLinus Torvalds 
9651da177e4SLinus Torvalds 	else
9661da177e4SLinus Torvalds 		/* Update the metadata if we are persistent */
967493df71cSJonathan Brassow 		s->store->type->commit_exception(s->store, &pe->e,
968b2a11465SJonathan Brassow 						 commit_callback, pe);
9691da177e4SLinus Torvalds }
9701da177e4SLinus Torvalds 
9711da177e4SLinus Torvalds /*
9721da177e4SLinus Torvalds  * Dispatches the copy operation to kcopyd.
9731da177e4SLinus Torvalds  */
974028867acSAlasdair G Kergon static void start_copy(struct dm_snap_pending_exception *pe)
9751da177e4SLinus Torvalds {
9761da177e4SLinus Torvalds 	struct dm_snapshot *s = pe->snap;
97722a1ceb1SHeinz Mauelshagen 	struct dm_io_region src, dest;
9781da177e4SLinus Torvalds 	struct block_device *bdev = s->origin->bdev;
9791da177e4SLinus Torvalds 	sector_t dev_size;
9801da177e4SLinus Torvalds 
9811da177e4SLinus Torvalds 	dev_size = get_dev_size(bdev);
9821da177e4SLinus Torvalds 
9831da177e4SLinus Torvalds 	src.bdev = bdev;
98471fab00aSJonathan Brassow 	src.sector = chunk_to_sector(s->store, pe->e.old_chunk);
985df96eee6SMikulas Patocka 	src.count = min((sector_t)s->store->chunk_size, dev_size - src.sector);
9861da177e4SLinus Torvalds 
98749beb2b8SJonathan Brassow 	dest.bdev = s->store->cow->bdev;
98871fab00aSJonathan Brassow 	dest.sector = chunk_to_sector(s->store, pe->e.new_chunk);
9891da177e4SLinus Torvalds 	dest.count = src.count;
9901da177e4SLinus Torvalds 
9911da177e4SLinus Torvalds 	/* Hand over to kcopyd */
992eb69aca5SHeinz Mauelshagen 	dm_kcopyd_copy(s->kcopyd_client,
9931da177e4SLinus Torvalds 		    &src, 1, &dest, 0, copy_callback, pe);
9941da177e4SLinus Torvalds }
9951da177e4SLinus Torvalds 
9962913808eSMikulas Patocka static struct dm_snap_pending_exception *
9972913808eSMikulas Patocka __lookup_pending_exception(struct dm_snapshot *s, chunk_t chunk)
9982913808eSMikulas Patocka {
9992913808eSMikulas Patocka 	struct dm_snap_exception *e = lookup_exception(&s->pending, chunk);
10002913808eSMikulas Patocka 
10012913808eSMikulas Patocka 	if (!e)
10022913808eSMikulas Patocka 		return NULL;
10032913808eSMikulas Patocka 
10042913808eSMikulas Patocka 	return container_of(e, struct dm_snap_pending_exception, e);
10052913808eSMikulas Patocka }
10062913808eSMikulas Patocka 
10071da177e4SLinus Torvalds /*
10081da177e4SLinus Torvalds  * Looks to see if this snapshot already has a pending exception
10091da177e4SLinus Torvalds  * for this chunk, otherwise it allocates a new one and inserts
10101da177e4SLinus Torvalds  * it into the pending table.
10111da177e4SLinus Torvalds  *
10121da177e4SLinus Torvalds  * NOTE: a write lock must be held on snap->lock before calling
10131da177e4SLinus Torvalds  * this.
10141da177e4SLinus Torvalds  */
1015028867acSAlasdair G Kergon static struct dm_snap_pending_exception *
1016c6621392SMikulas Patocka __find_pending_exception(struct dm_snapshot *s,
1017c6621392SMikulas Patocka 			 struct dm_snap_pending_exception *pe, chunk_t chunk)
10181da177e4SLinus Torvalds {
1019c6621392SMikulas Patocka 	struct dm_snap_pending_exception *pe2;
102076df1c65SAlasdair G Kergon 
10212913808eSMikulas Patocka 	pe2 = __lookup_pending_exception(s, chunk);
10222913808eSMikulas Patocka 	if (pe2) {
10231da177e4SLinus Torvalds 		free_pending_exception(pe);
10242913808eSMikulas Patocka 		return pe2;
102576df1c65SAlasdair G Kergon 	}
102676df1c65SAlasdair G Kergon 
10271da177e4SLinus Torvalds 	pe->e.old_chunk = chunk;
10281da177e4SLinus Torvalds 	bio_list_init(&pe->origin_bios);
10291da177e4SLinus Torvalds 	bio_list_init(&pe->snapshot_bios);
1030b4b610f6SAlasdair G Kergon 	pe->primary_pe = NULL;
10314b832e8dSAlasdair G Kergon 	atomic_set(&pe->ref_count, 0);
10321da177e4SLinus Torvalds 	pe->started = 0;
10331da177e4SLinus Torvalds 
1034493df71cSJonathan Brassow 	if (s->store->type->prepare_exception(s->store, &pe->e)) {
10351da177e4SLinus Torvalds 		free_pending_exception(pe);
10361da177e4SLinus Torvalds 		return NULL;
10371da177e4SLinus Torvalds 	}
10381da177e4SLinus Torvalds 
10394b832e8dSAlasdair G Kergon 	get_pending_exception(pe);
10401da177e4SLinus Torvalds 	insert_exception(&s->pending, &pe->e);
10411da177e4SLinus Torvalds 
10421da177e4SLinus Torvalds 	return pe;
10431da177e4SLinus Torvalds }
10441da177e4SLinus Torvalds 
1045028867acSAlasdair G Kergon static void remap_exception(struct dm_snapshot *s, struct dm_snap_exception *e,
1046d74f81f8SMilan Broz 			    struct bio *bio, chunk_t chunk)
10471da177e4SLinus Torvalds {
104849beb2b8SJonathan Brassow 	bio->bi_bdev = s->store->cow->bdev;
104971fab00aSJonathan Brassow 	bio->bi_sector = chunk_to_sector(s->store,
105071fab00aSJonathan Brassow 					 dm_chunk_number(e->new_chunk) +
1051d74f81f8SMilan Broz 					 (chunk - e->old_chunk)) +
105271fab00aSJonathan Brassow 					 (bio->bi_sector &
105371fab00aSJonathan Brassow 					  s->store->chunk_mask);
10541da177e4SLinus Torvalds }
10551da177e4SLinus Torvalds 
10561da177e4SLinus Torvalds static int snapshot_map(struct dm_target *ti, struct bio *bio,
10571da177e4SLinus Torvalds 			union map_info *map_context)
10581da177e4SLinus Torvalds {
1059028867acSAlasdair G Kergon 	struct dm_snap_exception *e;
1060028867acSAlasdair G Kergon 	struct dm_snapshot *s = ti->private;
1061d2a7ad29SKiyoshi Ueda 	int r = DM_MAPIO_REMAPPED;
10621da177e4SLinus Torvalds 	chunk_t chunk;
1063028867acSAlasdair G Kergon 	struct dm_snap_pending_exception *pe = NULL;
10641da177e4SLinus Torvalds 
1065494b3ee7SMikulas Patocka 	if (unlikely(bio_empty_barrier(bio))) {
1066494b3ee7SMikulas Patocka 		bio->bi_bdev = s->store->cow->bdev;
1067494b3ee7SMikulas Patocka 		return DM_MAPIO_REMAPPED;
1068494b3ee7SMikulas Patocka 	}
1069494b3ee7SMikulas Patocka 
107071fab00aSJonathan Brassow 	chunk = sector_to_chunk(s->store, bio->bi_sector);
10711da177e4SLinus Torvalds 
10721da177e4SLinus Torvalds 	/* Full snapshots are not usable */
107376df1c65SAlasdair G Kergon 	/* To get here the table must be live so s->active is always set. */
10741da177e4SLinus Torvalds 	if (!s->valid)
1075f6a80ea8SAlasdair G Kergon 		return -EIO;
10761da177e4SLinus Torvalds 
10771da177e4SLinus Torvalds 	/* FIXME: should only take write lock if we need
10781da177e4SLinus Torvalds 	 * to copy an exception */
10791da177e4SLinus Torvalds 	down_write(&s->lock);
10801da177e4SLinus Torvalds 
108176df1c65SAlasdair G Kergon 	if (!s->valid) {
108276df1c65SAlasdair G Kergon 		r = -EIO;
108376df1c65SAlasdair G Kergon 		goto out_unlock;
108476df1c65SAlasdair G Kergon 	}
108576df1c65SAlasdair G Kergon 
10861da177e4SLinus Torvalds 	/* If the block is already remapped - use that, else remap it */
10871da177e4SLinus Torvalds 	e = lookup_exception(&s->complete, chunk);
10881da177e4SLinus Torvalds 	if (e) {
1089d74f81f8SMilan Broz 		remap_exception(s, e, bio, chunk);
109076df1c65SAlasdair G Kergon 		goto out_unlock;
109176df1c65SAlasdair G Kergon 	}
10921da177e4SLinus Torvalds 
1093ba40a2aaSAlasdair G Kergon 	/*
1094ba40a2aaSAlasdair G Kergon 	 * Write to snapshot - higher level takes care of RW/RO
1095ba40a2aaSAlasdair G Kergon 	 * flags so we should only get this if we are
1096ba40a2aaSAlasdair G Kergon 	 * writeable.
1097ba40a2aaSAlasdair G Kergon 	 */
1098ba40a2aaSAlasdair G Kergon 	if (bio_rw(bio) == WRITE) {
10992913808eSMikulas Patocka 		pe = __lookup_pending_exception(s, chunk);
11002913808eSMikulas Patocka 		if (!pe) {
1101c6621392SMikulas Patocka 			up_write(&s->lock);
1102c6621392SMikulas Patocka 			pe = alloc_pending_exception(s);
1103c6621392SMikulas Patocka 			down_write(&s->lock);
1104c6621392SMikulas Patocka 
1105c6621392SMikulas Patocka 			if (!s->valid) {
1106c6621392SMikulas Patocka 				free_pending_exception(pe);
1107c6621392SMikulas Patocka 				r = -EIO;
1108c6621392SMikulas Patocka 				goto out_unlock;
1109c6621392SMikulas Patocka 			}
1110c6621392SMikulas Patocka 
111135bf659bSMikulas Patocka 			e = lookup_exception(&s->complete, chunk);
111235bf659bSMikulas Patocka 			if (e) {
111335bf659bSMikulas Patocka 				free_pending_exception(pe);
111435bf659bSMikulas Patocka 				remap_exception(s, e, bio, chunk);
111535bf659bSMikulas Patocka 				goto out_unlock;
111635bf659bSMikulas Patocka 			}
111735bf659bSMikulas Patocka 
1118c6621392SMikulas Patocka 			pe = __find_pending_exception(s, pe, chunk);
11191da177e4SLinus Torvalds 			if (!pe) {
1120695368acSAlasdair G Kergon 				__invalidate_snapshot(s, -ENOMEM);
11211da177e4SLinus Torvalds 				r = -EIO;
112276df1c65SAlasdair G Kergon 				goto out_unlock;
112376df1c65SAlasdair G Kergon 			}
11242913808eSMikulas Patocka 		}
112576df1c65SAlasdair G Kergon 
1126d74f81f8SMilan Broz 		remap_exception(s, &pe->e, bio, chunk);
11271da177e4SLinus Torvalds 		bio_list_add(&pe->snapshot_bios, bio);
11281da177e4SLinus Torvalds 
1129d2a7ad29SKiyoshi Ueda 		r = DM_MAPIO_SUBMITTED;
1130ba40a2aaSAlasdair G Kergon 
11311da177e4SLinus Torvalds 		if (!pe->started) {
11321da177e4SLinus Torvalds 			/* this is protected by snap->lock */
11331da177e4SLinus Torvalds 			pe->started = 1;
113476df1c65SAlasdair G Kergon 			up_write(&s->lock);
113576df1c65SAlasdair G Kergon 			start_copy(pe);
1136ba40a2aaSAlasdair G Kergon 			goto out;
1137ba40a2aaSAlasdair G Kergon 		}
1138cd45daffSMikulas Patocka 	} else {
11391da177e4SLinus Torvalds 		bio->bi_bdev = s->origin->bdev;
1140cd45daffSMikulas Patocka 		map_context->ptr = track_chunk(s, chunk);
1141cd45daffSMikulas Patocka 	}
11421da177e4SLinus Torvalds 
1143ba40a2aaSAlasdair G Kergon  out_unlock:
1144ba40a2aaSAlasdair G Kergon 	up_write(&s->lock);
1145ba40a2aaSAlasdair G Kergon  out:
11461da177e4SLinus Torvalds 	return r;
11471da177e4SLinus Torvalds }
11481da177e4SLinus Torvalds 
1149cd45daffSMikulas Patocka static int snapshot_end_io(struct dm_target *ti, struct bio *bio,
1150cd45daffSMikulas Patocka 			   int error, union map_info *map_context)
1151cd45daffSMikulas Patocka {
1152cd45daffSMikulas Patocka 	struct dm_snapshot *s = ti->private;
1153cd45daffSMikulas Patocka 	struct dm_snap_tracked_chunk *c = map_context->ptr;
1154cd45daffSMikulas Patocka 
1155cd45daffSMikulas Patocka 	if (c)
1156cd45daffSMikulas Patocka 		stop_tracking_chunk(s, c);
1157cd45daffSMikulas Patocka 
1158cd45daffSMikulas Patocka 	return 0;
1159cd45daffSMikulas Patocka }
1160cd45daffSMikulas Patocka 
11611da177e4SLinus Torvalds static void snapshot_resume(struct dm_target *ti)
11621da177e4SLinus Torvalds {
1163028867acSAlasdair G Kergon 	struct dm_snapshot *s = ti->private;
11641da177e4SLinus Torvalds 
1165aa14edebSAlasdair G Kergon 	down_write(&s->lock);
1166aa14edebSAlasdair G Kergon 	s->active = 1;
1167aa14edebSAlasdair G Kergon 	up_write(&s->lock);
11681da177e4SLinus Torvalds }
11691da177e4SLinus Torvalds 
11701da177e4SLinus Torvalds static int snapshot_status(struct dm_target *ti, status_type_t type,
11711da177e4SLinus Torvalds 			   char *result, unsigned int maxlen)
11721da177e4SLinus Torvalds {
11732e4a31dfSJonathan Brassow 	unsigned sz = 0;
1174028867acSAlasdair G Kergon 	struct dm_snapshot *snap = ti->private;
11751da177e4SLinus Torvalds 
11761da177e4SLinus Torvalds 	switch (type) {
11771da177e4SLinus Torvalds 	case STATUSTYPE_INFO:
117894e76572SMikulas Patocka 
117994e76572SMikulas Patocka 		down_write(&snap->lock);
118094e76572SMikulas Patocka 
11811da177e4SLinus Torvalds 		if (!snap->valid)
11822e4a31dfSJonathan Brassow 			DMEMIT("Invalid");
11831da177e4SLinus Torvalds 		else {
1184493df71cSJonathan Brassow 			if (snap->store->type->fraction_full) {
11851da177e4SLinus Torvalds 				sector_t numerator, denominator;
1186493df71cSJonathan Brassow 				snap->store->type->fraction_full(snap->store,
11871da177e4SLinus Torvalds 								 &numerator,
11881da177e4SLinus Torvalds 								 &denominator);
11892e4a31dfSJonathan Brassow 				DMEMIT("%llu/%llu",
11904ee218cdSAndrew Morton 				       (unsigned long long)numerator,
11914ee218cdSAndrew Morton 				       (unsigned long long)denominator);
11921da177e4SLinus Torvalds 			}
11931da177e4SLinus Torvalds 			else
11942e4a31dfSJonathan Brassow 				DMEMIT("Unknown");
11951da177e4SLinus Torvalds 		}
119694e76572SMikulas Patocka 
119794e76572SMikulas Patocka 		up_write(&snap->lock);
119894e76572SMikulas Patocka 
11991da177e4SLinus Torvalds 		break;
12001da177e4SLinus Torvalds 
12011da177e4SLinus Torvalds 	case STATUSTYPE_TABLE:
12021da177e4SLinus Torvalds 		/*
12031da177e4SLinus Torvalds 		 * kdevname returns a static pointer so we need
12041da177e4SLinus Torvalds 		 * to make private copies if the output is to
12051da177e4SLinus Torvalds 		 * make sense.
12061da177e4SLinus Torvalds 		 */
12072e4a31dfSJonathan Brassow 		DMEMIT("%s", snap->origin->name);
12081e302a92SJonathan Brassow 		snap->store->type->status(snap->store, type, result + sz,
12091e302a92SJonathan Brassow 					  maxlen - sz);
12101da177e4SLinus Torvalds 		break;
12111da177e4SLinus Torvalds 	}
12121da177e4SLinus Torvalds 
12131da177e4SLinus Torvalds 	return 0;
12141da177e4SLinus Torvalds }
12151da177e4SLinus Torvalds 
12168811f46cSMike Snitzer static int snapshot_iterate_devices(struct dm_target *ti,
12178811f46cSMike Snitzer 				    iterate_devices_callout_fn fn, void *data)
12188811f46cSMike Snitzer {
12198811f46cSMike Snitzer 	struct dm_snapshot *snap = ti->private;
12208811f46cSMike Snitzer 
12218811f46cSMike Snitzer 	return fn(ti, snap->origin, 0, ti->len, data);
12228811f46cSMike Snitzer }
12238811f46cSMike Snitzer 
12248811f46cSMike Snitzer 
12251da177e4SLinus Torvalds /*-----------------------------------------------------------------
12261da177e4SLinus Torvalds  * Origin methods
12271da177e4SLinus Torvalds  *---------------------------------------------------------------*/
12281da177e4SLinus Torvalds static int __origin_write(struct list_head *snapshots, struct bio *bio)
12291da177e4SLinus Torvalds {
1230d2a7ad29SKiyoshi Ueda 	int r = DM_MAPIO_REMAPPED, first = 0;
12311da177e4SLinus Torvalds 	struct dm_snapshot *snap;
1232028867acSAlasdair G Kergon 	struct dm_snap_exception *e;
1233028867acSAlasdair G Kergon 	struct dm_snap_pending_exception *pe, *next_pe, *primary_pe = NULL;
12341da177e4SLinus Torvalds 	chunk_t chunk;
1235eccf0817SAlasdair G Kergon 	LIST_HEAD(pe_queue);
12361da177e4SLinus Torvalds 
12371da177e4SLinus Torvalds 	/* Do all the snapshots on this origin */
12381da177e4SLinus Torvalds 	list_for_each_entry (snap, snapshots, list) {
12391da177e4SLinus Torvalds 
124076df1c65SAlasdair G Kergon 		down_write(&snap->lock);
124176df1c65SAlasdair G Kergon 
1242aa14edebSAlasdair G Kergon 		/* Only deal with valid and active snapshots */
1243aa14edebSAlasdair G Kergon 		if (!snap->valid || !snap->active)
124476df1c65SAlasdair G Kergon 			goto next_snapshot;
12451da177e4SLinus Torvalds 
1246d5e404c1SAlasdair G Kergon 		/* Nothing to do if writing beyond end of snapshot */
12470cea9c78SJonathan Brassow 		if (bio->bi_sector >= dm_table_get_size(snap->store->ti->table))
124876df1c65SAlasdair G Kergon 			goto next_snapshot;
12491da177e4SLinus Torvalds 
12501da177e4SLinus Torvalds 		/*
12511da177e4SLinus Torvalds 		 * Remember, different snapshots can have
12521da177e4SLinus Torvalds 		 * different chunk sizes.
12531da177e4SLinus Torvalds 		 */
125471fab00aSJonathan Brassow 		chunk = sector_to_chunk(snap->store, bio->bi_sector);
12551da177e4SLinus Torvalds 
12561da177e4SLinus Torvalds 		/*
12571da177e4SLinus Torvalds 		 * Check exception table to see if block
12581da177e4SLinus Torvalds 		 * is already remapped in this snapshot
12591da177e4SLinus Torvalds 		 * and trigger an exception if not.
1260b4b610f6SAlasdair G Kergon 		 *
12614b832e8dSAlasdair G Kergon 		 * ref_count is initialised to 1 so pending_complete()
1262b4b610f6SAlasdair G Kergon 		 * won't destroy the primary_pe while we're inside this loop.
12631da177e4SLinus Torvalds 		 */
12641da177e4SLinus Torvalds 		e = lookup_exception(&snap->complete, chunk);
126576df1c65SAlasdair G Kergon 		if (e)
126676df1c65SAlasdair G Kergon 			goto next_snapshot;
126776df1c65SAlasdair G Kergon 
12682913808eSMikulas Patocka 		pe = __lookup_pending_exception(snap, chunk);
12692913808eSMikulas Patocka 		if (!pe) {
1270c6621392SMikulas Patocka 			up_write(&snap->lock);
1271c6621392SMikulas Patocka 			pe = alloc_pending_exception(snap);
1272c6621392SMikulas Patocka 			down_write(&snap->lock);
1273c6621392SMikulas Patocka 
1274c6621392SMikulas Patocka 			if (!snap->valid) {
1275c6621392SMikulas Patocka 				free_pending_exception(pe);
1276c6621392SMikulas Patocka 				goto next_snapshot;
1277c6621392SMikulas Patocka 			}
1278c6621392SMikulas Patocka 
127935bf659bSMikulas Patocka 			e = lookup_exception(&snap->complete, chunk);
128035bf659bSMikulas Patocka 			if (e) {
128135bf659bSMikulas Patocka 				free_pending_exception(pe);
128235bf659bSMikulas Patocka 				goto next_snapshot;
128335bf659bSMikulas Patocka 			}
128435bf659bSMikulas Patocka 
1285c6621392SMikulas Patocka 			pe = __find_pending_exception(snap, pe, chunk);
12861da177e4SLinus Torvalds 			if (!pe) {
1287695368acSAlasdair G Kergon 				__invalidate_snapshot(snap, -ENOMEM);
128876df1c65SAlasdair G Kergon 				goto next_snapshot;
128976df1c65SAlasdair G Kergon 			}
12902913808eSMikulas Patocka 		}
12911da177e4SLinus Torvalds 
1292b4b610f6SAlasdair G Kergon 		if (!primary_pe) {
1293b4b610f6SAlasdair G Kergon 			/*
1294b4b610f6SAlasdair G Kergon 			 * Either every pe here has same
1295b4b610f6SAlasdair G Kergon 			 * primary_pe or none has one yet.
1296b4b610f6SAlasdair G Kergon 			 */
1297b4b610f6SAlasdair G Kergon 			if (pe->primary_pe)
1298b4b610f6SAlasdair G Kergon 				primary_pe = pe->primary_pe;
1299b4b610f6SAlasdair G Kergon 			else {
1300b4b610f6SAlasdair G Kergon 				primary_pe = pe;
1301b4b610f6SAlasdair G Kergon 				first = 1;
1302eccf0817SAlasdair G Kergon 			}
1303b4b610f6SAlasdair G Kergon 
130476df1c65SAlasdair G Kergon 			bio_list_add(&primary_pe->origin_bios, bio);
130576df1c65SAlasdair G Kergon 
1306d2a7ad29SKiyoshi Ueda 			r = DM_MAPIO_SUBMITTED;
1307b4b610f6SAlasdair G Kergon 		}
130876df1c65SAlasdair G Kergon 
1309b4b610f6SAlasdair G Kergon 		if (!pe->primary_pe) {
1310b4b610f6SAlasdair G Kergon 			pe->primary_pe = primary_pe;
13114b832e8dSAlasdair G Kergon 			get_pending_exception(primary_pe);
1312b4b610f6SAlasdair G Kergon 		}
131376df1c65SAlasdair G Kergon 
1314eccf0817SAlasdair G Kergon 		if (!pe->started) {
1315eccf0817SAlasdair G Kergon 			pe->started = 1;
1316eccf0817SAlasdair G Kergon 			list_add_tail(&pe->list, &pe_queue);
1317eccf0817SAlasdair G Kergon 		}
13181da177e4SLinus Torvalds 
131976df1c65SAlasdair G Kergon  next_snapshot:
13201da177e4SLinus Torvalds 		up_write(&snap->lock);
13211da177e4SLinus Torvalds 	}
13221da177e4SLinus Torvalds 
1323b4b610f6SAlasdair G Kergon 	if (!primary_pe)
13244b832e8dSAlasdair G Kergon 		return r;
1325b4b610f6SAlasdair G Kergon 
1326b4b610f6SAlasdair G Kergon 	/*
1327b4b610f6SAlasdair G Kergon 	 * If this is the first time we're processing this chunk and
13284b832e8dSAlasdair G Kergon 	 * ref_count is now 1 it means all the pending exceptions
1329b4b610f6SAlasdair G Kergon 	 * got completed while we were in the loop above, so it falls to
1330b4b610f6SAlasdair G Kergon 	 * us here to remove the primary_pe and submit any origin_bios.
1331b4b610f6SAlasdair G Kergon 	 */
1332b4b610f6SAlasdair G Kergon 
13334b832e8dSAlasdair G Kergon 	if (first && atomic_dec_and_test(&primary_pe->ref_count)) {
1334b4b610f6SAlasdair G Kergon 		flush_bios(bio_list_get(&primary_pe->origin_bios));
1335b4b610f6SAlasdair G Kergon 		free_pending_exception(primary_pe);
1336b4b610f6SAlasdair G Kergon 		/* If we got here, pe_queue is necessarily empty. */
13374b832e8dSAlasdair G Kergon 		return r;
1338b4b610f6SAlasdair G Kergon 	}
1339b4b610f6SAlasdair G Kergon 
13401da177e4SLinus Torvalds 	/*
13411da177e4SLinus Torvalds 	 * Now that we have a complete pe list we can start the copying.
13421da177e4SLinus Torvalds 	 */
1343eccf0817SAlasdair G Kergon 	list_for_each_entry_safe(pe, next_pe, &pe_queue, list)
13441da177e4SLinus Torvalds 		start_copy(pe);
13451da177e4SLinus Torvalds 
13461da177e4SLinus Torvalds 	return r;
13471da177e4SLinus Torvalds }
13481da177e4SLinus Torvalds 
13491da177e4SLinus Torvalds /*
13501da177e4SLinus Torvalds  * Called on a write from the origin driver.
13511da177e4SLinus Torvalds  */
13521da177e4SLinus Torvalds static int do_origin(struct dm_dev *origin, struct bio *bio)
13531da177e4SLinus Torvalds {
13541da177e4SLinus Torvalds 	struct origin *o;
1355d2a7ad29SKiyoshi Ueda 	int r = DM_MAPIO_REMAPPED;
13561da177e4SLinus Torvalds 
13571da177e4SLinus Torvalds 	down_read(&_origins_lock);
13581da177e4SLinus Torvalds 	o = __lookup_origin(origin->bdev);
13591da177e4SLinus Torvalds 	if (o)
13601da177e4SLinus Torvalds 		r = __origin_write(&o->snapshots, bio);
13611da177e4SLinus Torvalds 	up_read(&_origins_lock);
13621da177e4SLinus Torvalds 
13631da177e4SLinus Torvalds 	return r;
13641da177e4SLinus Torvalds }
13651da177e4SLinus Torvalds 
13661da177e4SLinus Torvalds /*
13671da177e4SLinus Torvalds  * Origin: maps a linear range of a device, with hooks for snapshotting.
13681da177e4SLinus Torvalds  */
13691da177e4SLinus Torvalds 
13701da177e4SLinus Torvalds /*
13711da177e4SLinus Torvalds  * Construct an origin mapping: <dev_path>
13721da177e4SLinus Torvalds  * The context for an origin is merely a 'struct dm_dev *'
13731da177e4SLinus Torvalds  * pointing to the real device.
13741da177e4SLinus Torvalds  */
13751da177e4SLinus Torvalds static int origin_ctr(struct dm_target *ti, unsigned int argc, char **argv)
13761da177e4SLinus Torvalds {
13771da177e4SLinus Torvalds 	int r;
13781da177e4SLinus Torvalds 	struct dm_dev *dev;
13791da177e4SLinus Torvalds 
13801da177e4SLinus Torvalds 	if (argc != 1) {
138172d94861SAlasdair G Kergon 		ti->error = "origin: incorrect number of arguments";
13821da177e4SLinus Torvalds 		return -EINVAL;
13831da177e4SLinus Torvalds 	}
13841da177e4SLinus Torvalds 
13851da177e4SLinus Torvalds 	r = dm_get_device(ti, argv[0], 0, ti->len,
13861da177e4SLinus Torvalds 			  dm_table_get_mode(ti->table), &dev);
13871da177e4SLinus Torvalds 	if (r) {
13881da177e4SLinus Torvalds 		ti->error = "Cannot get target device";
13891da177e4SLinus Torvalds 		return r;
13901da177e4SLinus Torvalds 	}
13911da177e4SLinus Torvalds 
13921da177e4SLinus Torvalds 	ti->private = dev;
1393494b3ee7SMikulas Patocka 	ti->num_flush_requests = 1;
1394494b3ee7SMikulas Patocka 
13951da177e4SLinus Torvalds 	return 0;
13961da177e4SLinus Torvalds }
13971da177e4SLinus Torvalds 
13981da177e4SLinus Torvalds static void origin_dtr(struct dm_target *ti)
13991da177e4SLinus Torvalds {
1400028867acSAlasdair G Kergon 	struct dm_dev *dev = ti->private;
14011da177e4SLinus Torvalds 	dm_put_device(ti, dev);
14021da177e4SLinus Torvalds }
14031da177e4SLinus Torvalds 
14041da177e4SLinus Torvalds static int origin_map(struct dm_target *ti, struct bio *bio,
14051da177e4SLinus Torvalds 		      union map_info *map_context)
14061da177e4SLinus Torvalds {
1407028867acSAlasdair G Kergon 	struct dm_dev *dev = ti->private;
14081da177e4SLinus Torvalds 	bio->bi_bdev = dev->bdev;
14091da177e4SLinus Torvalds 
1410494b3ee7SMikulas Patocka 	if (unlikely(bio_empty_barrier(bio)))
1411494b3ee7SMikulas Patocka 		return DM_MAPIO_REMAPPED;
1412494b3ee7SMikulas Patocka 
14131da177e4SLinus Torvalds 	/* Only tell snapshots if this is a write */
1414d2a7ad29SKiyoshi Ueda 	return (bio_rw(bio) == WRITE) ? do_origin(dev, bio) : DM_MAPIO_REMAPPED;
14151da177e4SLinus Torvalds }
14161da177e4SLinus Torvalds 
14171da177e4SLinus Torvalds /*
14181da177e4SLinus Torvalds  * Set the target "split_io" field to the minimum of all the snapshots'
14191da177e4SLinus Torvalds  * chunk sizes.
14201da177e4SLinus Torvalds  */
14211da177e4SLinus Torvalds static void origin_resume(struct dm_target *ti)
14221da177e4SLinus Torvalds {
1423028867acSAlasdair G Kergon 	struct dm_dev *dev = ti->private;
14241da177e4SLinus Torvalds 
14251da177e4SLinus Torvalds 	down_read(&_origins_lock);
14261da177e4SLinus Torvalds 
14277e201b35SMikulas Patocka 	ti->split_io = __minimum_chunk_size(__lookup_origin(dev->bdev));
14287e201b35SMikulas Patocka 
14297e201b35SMikulas Patocka 	up_read(&_origins_lock);
14301da177e4SLinus Torvalds }
14311da177e4SLinus Torvalds 
14321da177e4SLinus Torvalds static int origin_status(struct dm_target *ti, status_type_t type, char *result,
14331da177e4SLinus Torvalds 			 unsigned int maxlen)
14341da177e4SLinus Torvalds {
1435028867acSAlasdair G Kergon 	struct dm_dev *dev = ti->private;
14361da177e4SLinus Torvalds 
14371da177e4SLinus Torvalds 	switch (type) {
14381da177e4SLinus Torvalds 	case STATUSTYPE_INFO:
14391da177e4SLinus Torvalds 		result[0] = '\0';
14401da177e4SLinus Torvalds 		break;
14411da177e4SLinus Torvalds 
14421da177e4SLinus Torvalds 	case STATUSTYPE_TABLE:
14431da177e4SLinus Torvalds 		snprintf(result, maxlen, "%s", dev->name);
14441da177e4SLinus Torvalds 		break;
14451da177e4SLinus Torvalds 	}
14461da177e4SLinus Torvalds 
14471da177e4SLinus Torvalds 	return 0;
14481da177e4SLinus Torvalds }
14491da177e4SLinus Torvalds 
14508811f46cSMike Snitzer static int origin_iterate_devices(struct dm_target *ti,
14518811f46cSMike Snitzer 				  iterate_devices_callout_fn fn, void *data)
14528811f46cSMike Snitzer {
14538811f46cSMike Snitzer 	struct dm_dev *dev = ti->private;
14548811f46cSMike Snitzer 
14558811f46cSMike Snitzer 	return fn(ti, dev, 0, ti->len, data);
14568811f46cSMike Snitzer }
14578811f46cSMike Snitzer 
14581da177e4SLinus Torvalds static struct target_type origin_target = {
14591da177e4SLinus Torvalds 	.name    = "snapshot-origin",
14608811f46cSMike Snitzer 	.version = {1, 7, 0},
14611da177e4SLinus Torvalds 	.module  = THIS_MODULE,
14621da177e4SLinus Torvalds 	.ctr     = origin_ctr,
14631da177e4SLinus Torvalds 	.dtr     = origin_dtr,
14641da177e4SLinus Torvalds 	.map     = origin_map,
14651da177e4SLinus Torvalds 	.resume  = origin_resume,
14661da177e4SLinus Torvalds 	.status  = origin_status,
14678811f46cSMike Snitzer 	.iterate_devices = origin_iterate_devices,
14681da177e4SLinus Torvalds };
14691da177e4SLinus Torvalds 
14701da177e4SLinus Torvalds static struct target_type snapshot_target = {
14711da177e4SLinus Torvalds 	.name    = "snapshot",
14728811f46cSMike Snitzer 	.version = {1, 7, 0},
14731da177e4SLinus Torvalds 	.module  = THIS_MODULE,
14741da177e4SLinus Torvalds 	.ctr     = snapshot_ctr,
14751da177e4SLinus Torvalds 	.dtr     = snapshot_dtr,
14761da177e4SLinus Torvalds 	.map     = snapshot_map,
1477cd45daffSMikulas Patocka 	.end_io  = snapshot_end_io,
14781da177e4SLinus Torvalds 	.resume  = snapshot_resume,
14791da177e4SLinus Torvalds 	.status  = snapshot_status,
14808811f46cSMike Snitzer 	.iterate_devices = snapshot_iterate_devices,
14811da177e4SLinus Torvalds };
14821da177e4SLinus Torvalds 
14831da177e4SLinus Torvalds static int __init dm_snapshot_init(void)
14841da177e4SLinus Torvalds {
14851da177e4SLinus Torvalds 	int r;
14861da177e4SLinus Torvalds 
14874db6bfe0SAlasdair G Kergon 	r = dm_exception_store_init();
14884db6bfe0SAlasdair G Kergon 	if (r) {
14894db6bfe0SAlasdair G Kergon 		DMERR("Failed to initialize exception stores");
14904db6bfe0SAlasdair G Kergon 		return r;
14914db6bfe0SAlasdair G Kergon 	}
14924db6bfe0SAlasdair G Kergon 
14931da177e4SLinus Torvalds 	r = dm_register_target(&snapshot_target);
14941da177e4SLinus Torvalds 	if (r) {
14951da177e4SLinus Torvalds 		DMERR("snapshot target register failed %d", r);
1496034a186dSJonathan Brassow 		goto bad_register_snapshot_target;
14971da177e4SLinus Torvalds 	}
14981da177e4SLinus Torvalds 
14991da177e4SLinus Torvalds 	r = dm_register_target(&origin_target);
15001da177e4SLinus Torvalds 	if (r < 0) {
150172d94861SAlasdair G Kergon 		DMERR("Origin target register failed %d", r);
15021da177e4SLinus Torvalds 		goto bad1;
15031da177e4SLinus Torvalds 	}
15041da177e4SLinus Torvalds 
15051da177e4SLinus Torvalds 	r = init_origin_hash();
15061da177e4SLinus Torvalds 	if (r) {
15071da177e4SLinus Torvalds 		DMERR("init_origin_hash failed.");
15081da177e4SLinus Torvalds 		goto bad2;
15091da177e4SLinus Torvalds 	}
15101da177e4SLinus Torvalds 
1511028867acSAlasdair G Kergon 	exception_cache = KMEM_CACHE(dm_snap_exception, 0);
15121da177e4SLinus Torvalds 	if (!exception_cache) {
15131da177e4SLinus Torvalds 		DMERR("Couldn't create exception cache.");
15141da177e4SLinus Torvalds 		r = -ENOMEM;
15151da177e4SLinus Torvalds 		goto bad3;
15161da177e4SLinus Torvalds 	}
15171da177e4SLinus Torvalds 
1518028867acSAlasdair G Kergon 	pending_cache = KMEM_CACHE(dm_snap_pending_exception, 0);
15191da177e4SLinus Torvalds 	if (!pending_cache) {
15201da177e4SLinus Torvalds 		DMERR("Couldn't create pending cache.");
15211da177e4SLinus Torvalds 		r = -ENOMEM;
15221da177e4SLinus Torvalds 		goto bad4;
15231da177e4SLinus Torvalds 	}
15241da177e4SLinus Torvalds 
1525cd45daffSMikulas Patocka 	tracked_chunk_cache = KMEM_CACHE(dm_snap_tracked_chunk, 0);
1526cd45daffSMikulas Patocka 	if (!tracked_chunk_cache) {
1527cd45daffSMikulas Patocka 		DMERR("Couldn't create cache to track chunks in use.");
1528cd45daffSMikulas Patocka 		r = -ENOMEM;
1529cd45daffSMikulas Patocka 		goto bad5;
1530cd45daffSMikulas Patocka 	}
1531cd45daffSMikulas Patocka 
1532ca3a931fSAlasdair G Kergon 	ksnapd = create_singlethread_workqueue("ksnapd");
1533ca3a931fSAlasdair G Kergon 	if (!ksnapd) {
1534ca3a931fSAlasdair G Kergon 		DMERR("Failed to create ksnapd workqueue.");
1535ca3a931fSAlasdair G Kergon 		r = -ENOMEM;
153692e86812SMikulas Patocka 		goto bad_pending_pool;
1537ca3a931fSAlasdair G Kergon 	}
1538ca3a931fSAlasdair G Kergon 
15391da177e4SLinus Torvalds 	return 0;
15401da177e4SLinus Torvalds 
1541cd45daffSMikulas Patocka bad_pending_pool:
1542cd45daffSMikulas Patocka 	kmem_cache_destroy(tracked_chunk_cache);
15431da177e4SLinus Torvalds bad5:
15441da177e4SLinus Torvalds 	kmem_cache_destroy(pending_cache);
15451da177e4SLinus Torvalds bad4:
15461da177e4SLinus Torvalds 	kmem_cache_destroy(exception_cache);
15471da177e4SLinus Torvalds bad3:
15481da177e4SLinus Torvalds 	exit_origin_hash();
15491da177e4SLinus Torvalds bad2:
15501da177e4SLinus Torvalds 	dm_unregister_target(&origin_target);
15511da177e4SLinus Torvalds bad1:
15521da177e4SLinus Torvalds 	dm_unregister_target(&snapshot_target);
1553034a186dSJonathan Brassow 
1554034a186dSJonathan Brassow bad_register_snapshot_target:
1555034a186dSJonathan Brassow 	dm_exception_store_exit();
15561da177e4SLinus Torvalds 	return r;
15571da177e4SLinus Torvalds }
15581da177e4SLinus Torvalds 
15591da177e4SLinus Torvalds static void __exit dm_snapshot_exit(void)
15601da177e4SLinus Torvalds {
1561ca3a931fSAlasdair G Kergon 	destroy_workqueue(ksnapd);
1562ca3a931fSAlasdair G Kergon 
156310d3bd09SMikulas Patocka 	dm_unregister_target(&snapshot_target);
156410d3bd09SMikulas Patocka 	dm_unregister_target(&origin_target);
15651da177e4SLinus Torvalds 
15661da177e4SLinus Torvalds 	exit_origin_hash();
15671da177e4SLinus Torvalds 	kmem_cache_destroy(pending_cache);
15681da177e4SLinus Torvalds 	kmem_cache_destroy(exception_cache);
1569cd45daffSMikulas Patocka 	kmem_cache_destroy(tracked_chunk_cache);
15704db6bfe0SAlasdair G Kergon 
15714db6bfe0SAlasdair G Kergon 	dm_exception_store_exit();
15721da177e4SLinus Torvalds }
15731da177e4SLinus Torvalds 
15741da177e4SLinus Torvalds /* Module hooks */
15751da177e4SLinus Torvalds module_init(dm_snapshot_init);
15761da177e4SLinus Torvalds module_exit(dm_snapshot_exit);
15771da177e4SLinus Torvalds 
15781da177e4SLinus Torvalds MODULE_DESCRIPTION(DM_NAME " snapshot target");
15791da177e4SLinus Torvalds MODULE_AUTHOR("Joe Thornber");
15801da177e4SLinus Torvalds MODULE_LICENSE("GPL");
1581