xref: /openbmc/linux/drivers/md/dm-thin.c (revision d32fd6bb9f2bc8178cdd65ebec1ad670a8bfa241)
13bd94003SHeinz Mauelshagen // SPDX-License-Identifier: GPL-2.0-only
2991d9fa0SJoe Thornber /*
3e49e5829SJoe Thornber  * Copyright (C) 2011-2012 Red Hat UK.
4991d9fa0SJoe Thornber  *
5991d9fa0SJoe Thornber  * This file is released under the GPL.
6991d9fa0SJoe Thornber  */
7991d9fa0SJoe Thornber 
8991d9fa0SJoe Thornber #include "dm-thin-metadata.h"
9742c8fdcSJoe Thornber #include "dm-bio-prison-v1.h"
101f4e0ff0SAlasdair G Kergon #include "dm.h"
11991d9fa0SJoe Thornber 
12991d9fa0SJoe Thornber #include <linux/device-mapper.h>
13991d9fa0SJoe Thornber #include <linux/dm-io.h>
14991d9fa0SJoe Thornber #include <linux/dm-kcopyd.h>
150f30af98SManuel Schölling #include <linux/jiffies.h>
16604ea906SMike Snitzer #include <linux/log2.h>
17991d9fa0SJoe Thornber #include <linux/list.h>
18c140e1c4SMike Snitzer #include <linux/rculist.h>
19991d9fa0SJoe Thornber #include <linux/init.h>
20991d9fa0SJoe Thornber #include <linux/module.h>
21991d9fa0SJoe Thornber #include <linux/slab.h>
22a822c83eSJoe Thornber #include <linux/vmalloc.h>
23ac4c3f34SJoe Thornber #include <linux/sort.h>
2467324ea1SMike Snitzer #include <linux/rbtree.h>
25991d9fa0SJoe Thornber 
26991d9fa0SJoe Thornber #define	DM_MSG_PREFIX	"thin"
27991d9fa0SJoe Thornber 
28991d9fa0SJoe Thornber /*
29991d9fa0SJoe Thornber  * Tunable constants
30991d9fa0SJoe Thornber  */
317768ed33SAlasdair G Kergon #define ENDIO_HOOK_POOL_SIZE 1024
32991d9fa0SJoe Thornber #define MAPPING_POOL_SIZE 1024
33905e51b3SJoe Thornber #define COMMIT_PERIOD HZ
3480c57893SMike Snitzer #define NO_SPACE_TIMEOUT_SECS 60
3580c57893SMike Snitzer 
3686a3238cSHeinz Mauelshagen static unsigned int no_space_timeout_secs = NO_SPACE_TIMEOUT_SECS;
37991d9fa0SJoe Thornber 
38df5d2e90SMikulas Patocka DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle,
39df5d2e90SMikulas Patocka 		"A percentage of time allocated for copy on write");
40df5d2e90SMikulas Patocka 
41991d9fa0SJoe Thornber /*
42991d9fa0SJoe Thornber  * The block size of the device holding pool data must be
43991d9fa0SJoe Thornber  * between 64KB and 1GB.
44991d9fa0SJoe Thornber  */
45991d9fa0SJoe Thornber #define DATA_DEV_BLOCK_SIZE_MIN_SECTORS (64 * 1024 >> SECTOR_SHIFT)
46991d9fa0SJoe Thornber #define DATA_DEV_BLOCK_SIZE_MAX_SECTORS (1024 * 1024 * 1024 >> SECTOR_SHIFT)
47991d9fa0SJoe Thornber 
48991d9fa0SJoe Thornber /*
49991d9fa0SJoe Thornber  * Device id is restricted to 24 bits.
50991d9fa0SJoe Thornber  */
51991d9fa0SJoe Thornber #define MAX_DEV_ID ((1 << 24) - 1)
52991d9fa0SJoe Thornber 
53991d9fa0SJoe Thornber /*
54991d9fa0SJoe Thornber  * How do we handle breaking sharing of data blocks?
55991d9fa0SJoe Thornber  * =================================================
56991d9fa0SJoe Thornber  *
57991d9fa0SJoe Thornber  * We use a standard copy-on-write btree to store the mappings for the
58991d9fa0SJoe Thornber  * devices (note I'm talking about copy-on-write of the metadata here, not
59991d9fa0SJoe Thornber  * the data).  When you take an internal snapshot you clone the root node
60991d9fa0SJoe Thornber  * of the origin btree.  After this there is no concept of an origin or a
61991d9fa0SJoe Thornber  * snapshot.  They are just two device trees that happen to point to the
62991d9fa0SJoe Thornber  * same data blocks.
63991d9fa0SJoe Thornber  *
64991d9fa0SJoe Thornber  * When we get a write in we decide if it's to a shared data block using
65991d9fa0SJoe Thornber  * some timestamp magic.  If it is, we have to break sharing.
66991d9fa0SJoe Thornber  *
67991d9fa0SJoe Thornber  * Let's say we write to a shared block in what was the origin.  The
68991d9fa0SJoe Thornber  * steps are:
69991d9fa0SJoe Thornber  *
70991d9fa0SJoe Thornber  * i) plug io further to this physical block. (see bio_prison code).
71991d9fa0SJoe Thornber  *
72991d9fa0SJoe Thornber  * ii) quiesce any read io to that shared data block.  Obviously
7344feb387SMike Snitzer  * including all devices that share this block.  (see dm_deferred_set code)
74991d9fa0SJoe Thornber  *
75991d9fa0SJoe Thornber  * iii) copy the data block to a newly allocate block.  This step can be
76991d9fa0SJoe Thornber  * missed out if the io covers the block. (schedule_copy).
77991d9fa0SJoe Thornber  *
78991d9fa0SJoe Thornber  * iv) insert the new mapping into the origin's btree
79fe878f34SJoe Thornber  * (process_prepared_mapping).  This act of inserting breaks some
80991d9fa0SJoe Thornber  * sharing of btree nodes between the two devices.  Breaking sharing only
81991d9fa0SJoe Thornber  * effects the btree of that specific device.  Btrees for the other
82991d9fa0SJoe Thornber  * devices that share the block never change.  The btree for the origin
83991d9fa0SJoe Thornber  * device as it was after the last commit is untouched, ie. we're using
84991d9fa0SJoe Thornber  * persistent data structures in the functional programming sense.
85991d9fa0SJoe Thornber  *
86991d9fa0SJoe Thornber  * v) unplug io to this physical block, including the io that triggered
87991d9fa0SJoe Thornber  * the breaking of sharing.
88991d9fa0SJoe Thornber  *
89991d9fa0SJoe Thornber  * Steps (ii) and (iii) occur in parallel.
90991d9fa0SJoe Thornber  *
91991d9fa0SJoe Thornber  * The metadata _doesn't_ need to be committed before the io continues.  We
92991d9fa0SJoe Thornber  * get away with this because the io is always written to a _new_ block.
93991d9fa0SJoe Thornber  * If there's a crash, then:
94991d9fa0SJoe Thornber  *
95991d9fa0SJoe Thornber  * - The origin mapping will point to the old origin block (the shared
96991d9fa0SJoe Thornber  * one).  This will contain the data as it was before the io that triggered
97991d9fa0SJoe Thornber  * the breaking of sharing came in.
98991d9fa0SJoe Thornber  *
99991d9fa0SJoe Thornber  * - The snap mapping still points to the old block.  As it would after
100991d9fa0SJoe Thornber  * the commit.
101991d9fa0SJoe Thornber  *
102991d9fa0SJoe Thornber  * The downside of this scheme is the timestamp magic isn't perfect, and
103991d9fa0SJoe Thornber  * will continue to think that data block in the snapshot device is shared
104991d9fa0SJoe Thornber  * even after the write to the origin has broken sharing.  I suspect data
105991d9fa0SJoe Thornber  * blocks will typically be shared by many different devices, so we're
106991d9fa0SJoe Thornber  * breaking sharing n + 1 times, rather than n, where n is the number of
107991d9fa0SJoe Thornber  * devices that reference this data block.  At the moment I think the
108991d9fa0SJoe Thornber  * benefits far, far outweigh the disadvantages.
109991d9fa0SJoe Thornber  */
110991d9fa0SJoe Thornber 
111991d9fa0SJoe Thornber /*----------------------------------------------------------------*/
112991d9fa0SJoe Thornber 
113991d9fa0SJoe Thornber /*
114991d9fa0SJoe Thornber  * Key building.
115991d9fa0SJoe Thornber  */
11634fbcf62SJoe Thornber enum lock_space {
11734fbcf62SJoe Thornber 	VIRTUAL,
11834fbcf62SJoe Thornber 	PHYSICAL
11934fbcf62SJoe Thornber };
12034fbcf62SJoe Thornber 
build_key(struct dm_thin_device * td,enum lock_space ls,dm_block_t b,dm_block_t e,struct dm_cell_key * key)1213f8d3f54SMike Snitzer static bool build_key(struct dm_thin_device *td, enum lock_space ls,
12234fbcf62SJoe Thornber 		      dm_block_t b, dm_block_t e, struct dm_cell_key *key)
123991d9fa0SJoe Thornber {
12434fbcf62SJoe Thornber 	key->virtual = (ls == VIRTUAL);
125991d9fa0SJoe Thornber 	key->dev = dm_thin_dev_id(td);
1265f274d88SJoe Thornber 	key->block_begin = b;
12734fbcf62SJoe Thornber 	key->block_end = e;
1283f8d3f54SMike Snitzer 
1293f8d3f54SMike Snitzer 	return dm_cell_key_has_valid_range(key);
13034fbcf62SJoe Thornber }
13134fbcf62SJoe Thornber 
build_data_key(struct dm_thin_device * td,dm_block_t b,struct dm_cell_key * key)13234fbcf62SJoe Thornber static void build_data_key(struct dm_thin_device *td, dm_block_t b,
13334fbcf62SJoe Thornber 			   struct dm_cell_key *key)
13434fbcf62SJoe Thornber {
1353f8d3f54SMike Snitzer 	(void) build_key(td, PHYSICAL, b, b + 1llu, key);
136991d9fa0SJoe Thornber }
137991d9fa0SJoe Thornber 
build_virtual_key(struct dm_thin_device * td,dm_block_t b,struct dm_cell_key * key)138991d9fa0SJoe Thornber static void build_virtual_key(struct dm_thin_device *td, dm_block_t b,
13944feb387SMike Snitzer 			      struct dm_cell_key *key)
140991d9fa0SJoe Thornber {
1413f8d3f54SMike Snitzer 	(void) build_key(td, VIRTUAL, b, b + 1llu, key);
142991d9fa0SJoe Thornber }
143991d9fa0SJoe Thornber 
144991d9fa0SJoe Thornber /*----------------------------------------------------------------*/
145991d9fa0SJoe Thornber 
1467d327fe0SJoe Thornber #define THROTTLE_THRESHOLD (1 * HZ)
1477d327fe0SJoe Thornber 
1487d327fe0SJoe Thornber struct throttle {
1497d327fe0SJoe Thornber 	struct rw_semaphore lock;
1507d327fe0SJoe Thornber 	unsigned long threshold;
1517d327fe0SJoe Thornber 	bool throttle_applied;
1527d327fe0SJoe Thornber };
1537d327fe0SJoe Thornber 
throttle_init(struct throttle * t)1547d327fe0SJoe Thornber static void throttle_init(struct throttle *t)
1557d327fe0SJoe Thornber {
1567d327fe0SJoe Thornber 	init_rwsem(&t->lock);
1577d327fe0SJoe Thornber 	t->throttle_applied = false;
1587d327fe0SJoe Thornber }
1597d327fe0SJoe Thornber 
throttle_work_start(struct throttle * t)1607d327fe0SJoe Thornber static void throttle_work_start(struct throttle *t)
1617d327fe0SJoe Thornber {
1627d327fe0SJoe Thornber 	t->threshold = jiffies + THROTTLE_THRESHOLD;
1637d327fe0SJoe Thornber }
1647d327fe0SJoe Thornber 
throttle_work_update(struct throttle * t)1657d327fe0SJoe Thornber static void throttle_work_update(struct throttle *t)
1667d327fe0SJoe Thornber {
1678ca8b1e1SWang Qing 	if (!t->throttle_applied && time_is_before_jiffies(t->threshold)) {
1687d327fe0SJoe Thornber 		down_write(&t->lock);
1697d327fe0SJoe Thornber 		t->throttle_applied = true;
1707d327fe0SJoe Thornber 	}
1717d327fe0SJoe Thornber }
1727d327fe0SJoe Thornber 
throttle_work_complete(struct throttle * t)1737d327fe0SJoe Thornber static void throttle_work_complete(struct throttle *t)
1747d327fe0SJoe Thornber {
1757d327fe0SJoe Thornber 	if (t->throttle_applied) {
1767d327fe0SJoe Thornber 		t->throttle_applied = false;
1777d327fe0SJoe Thornber 		up_write(&t->lock);
1787d327fe0SJoe Thornber 	}
1797d327fe0SJoe Thornber }
1807d327fe0SJoe Thornber 
throttle_lock(struct throttle * t)1817d327fe0SJoe Thornber static void throttle_lock(struct throttle *t)
1827d327fe0SJoe Thornber {
1837d327fe0SJoe Thornber 	down_read(&t->lock);
1847d327fe0SJoe Thornber }
1857d327fe0SJoe Thornber 
throttle_unlock(struct throttle * t)1867d327fe0SJoe Thornber static void throttle_unlock(struct throttle *t)
1877d327fe0SJoe Thornber {
1887d327fe0SJoe Thornber 	up_read(&t->lock);
1897d327fe0SJoe Thornber }
1907d327fe0SJoe Thornber 
1917d327fe0SJoe Thornber /*----------------------------------------------------------------*/
1927d327fe0SJoe Thornber 
193991d9fa0SJoe Thornber /*
194991d9fa0SJoe Thornber  * A pool device ties together a metadata device and a data device.  It
195991d9fa0SJoe Thornber  * also provides the interface for creating and destroying internal
196991d9fa0SJoe Thornber  * devices.
197991d9fa0SJoe Thornber  */
198a24c2569SMike Snitzer struct dm_thin_new_mapping;
19967e2e2b2SJoe Thornber 
200e49e5829SJoe Thornber /*
201f6c36758SMike Snitzer  * The pool runs in various modes.  Ordered in degraded order for comparisons.
202e49e5829SJoe Thornber  */
203e49e5829SJoe Thornber enum pool_mode {
204e49e5829SJoe Thornber 	PM_WRITE,		/* metadata may be changed */
2053e1a0699SJoe Thornber 	PM_OUT_OF_DATA_SPACE,	/* metadata may be changed, though data may not be allocated */
2063ab91828SJoe Thornber 
2073ab91828SJoe Thornber 	/*
2083ab91828SJoe Thornber 	 * Like READ_ONLY, except may switch back to WRITE on metadata resize. Reported as READ_ONLY.
2093ab91828SJoe Thornber 	 */
2103ab91828SJoe Thornber 	PM_OUT_OF_METADATA_SPACE,
211e49e5829SJoe Thornber 	PM_READ_ONLY,		/* metadata may not be changed */
2123ab91828SJoe Thornber 
213e49e5829SJoe Thornber 	PM_FAIL,		/* all I/O fails */
214e49e5829SJoe Thornber };
215e49e5829SJoe Thornber 
21667e2e2b2SJoe Thornber struct pool_features {
217e49e5829SJoe Thornber 	enum pool_mode mode;
218e49e5829SJoe Thornber 
2199bc142ddSMike Snitzer 	bool zero_new_blocks:1;
2209bc142ddSMike Snitzer 	bool discard_enabled:1;
2219bc142ddSMike Snitzer 	bool discard_passdown:1;
222787a996cSMike Snitzer 	bool error_if_no_space:1;
22367e2e2b2SJoe Thornber };
22467e2e2b2SJoe Thornber 
225e49e5829SJoe Thornber struct thin_c;
226e49e5829SJoe Thornber typedef void (*process_bio_fn)(struct thin_c *tc, struct bio *bio);
227a374bb21SJoe Thornber typedef void (*process_cell_fn)(struct thin_c *tc, struct dm_bio_prison_cell *cell);
228e49e5829SJoe Thornber typedef void (*process_mapping_fn)(struct dm_thin_new_mapping *m);
229e49e5829SJoe Thornber 
230ac4c3f34SJoe Thornber #define CELL_SORT_ARRAY_SIZE 8192
231ac4c3f34SJoe Thornber 
232991d9fa0SJoe Thornber struct pool {
233991d9fa0SJoe Thornber 	struct list_head list;
234991d9fa0SJoe Thornber 	struct dm_target *ti;	/* Only set if a pool target is bound */
235991d9fa0SJoe Thornber 
236991d9fa0SJoe Thornber 	struct mapped_device *pool_md;
237873937e7SMikulas Patocka 	struct block_device *data_dev;
238991d9fa0SJoe Thornber 	struct block_device *md_dev;
239991d9fa0SJoe Thornber 	struct dm_pool_metadata *pmd;
240991d9fa0SJoe Thornber 
241991d9fa0SJoe Thornber 	dm_block_t low_water_blocks;
24255f2b8bdSMike Snitzer 	uint32_t sectors_per_block;
243f9a8e0cdSMikulas Patocka 	int sectors_per_block_shift;
244991d9fa0SJoe Thornber 
24567e2e2b2SJoe Thornber 	struct pool_features pf;
24688a6621bSJoe Thornber 	bool low_water_triggered:1;	/* A dm event has been sent */
24780e96c54SMike Snitzer 	bool suspended:1;
248c3667cc6SMike Snitzer 	bool out_of_data_space:1;
249991d9fa0SJoe Thornber 
25044feb387SMike Snitzer 	struct dm_bio_prison *prison;
251991d9fa0SJoe Thornber 	struct dm_kcopyd_client *copier;
252991d9fa0SJoe Thornber 
25372d711c8SMike Snitzer 	struct work_struct worker;
254991d9fa0SJoe Thornber 	struct workqueue_struct *wq;
2557d327fe0SJoe Thornber 	struct throttle throttle;
256905e51b3SJoe Thornber 	struct delayed_work waker;
25785ad643bSJoe Thornber 	struct delayed_work no_space_timeout;
258991d9fa0SJoe Thornber 
259905e51b3SJoe Thornber 	unsigned long last_commit_jiffies;
26086a3238cSHeinz Mauelshagen 	unsigned int ref_count;
261991d9fa0SJoe Thornber 
262991d9fa0SJoe Thornber 	spinlock_t lock;
263991d9fa0SJoe Thornber 	struct bio_list deferred_flush_bios;
2644ae280b4SNikos Tsironis 	struct bio_list deferred_flush_completions;
265991d9fa0SJoe Thornber 	struct list_head prepared_mappings;
266104655fdSJoe Thornber 	struct list_head prepared_discards;
2672a0fbffbSJoe Thornber 	struct list_head prepared_discards_pt2;
268c140e1c4SMike Snitzer 	struct list_head active_thins;
269991d9fa0SJoe Thornber 
27044feb387SMike Snitzer 	struct dm_deferred_set *shared_read_ds;
27144feb387SMike Snitzer 	struct dm_deferred_set *all_io_ds;
272991d9fa0SJoe Thornber 
273a24c2569SMike Snitzer 	struct dm_thin_new_mapping *next_mapping;
274e49e5829SJoe Thornber 
275e49e5829SJoe Thornber 	process_bio_fn process_bio;
276e49e5829SJoe Thornber 	process_bio_fn process_discard;
277e49e5829SJoe Thornber 
278a374bb21SJoe Thornber 	process_cell_fn process_cell;
279a374bb21SJoe Thornber 	process_cell_fn process_discard_cell;
280a374bb21SJoe Thornber 
281e49e5829SJoe Thornber 	process_mapping_fn process_prepared_mapping;
282e49e5829SJoe Thornber 	process_mapping_fn process_prepared_discard;
2832a0fbffbSJoe Thornber 	process_mapping_fn process_prepared_discard_pt2;
284ac4c3f34SJoe Thornber 
285a822c83eSJoe Thornber 	struct dm_bio_prison_cell **cell_sort_array;
28672d711c8SMike Snitzer 
28772d711c8SMike Snitzer 	mempool_t mapping_pool;
288991d9fa0SJoe Thornber };
289991d9fa0SJoe Thornber 
290b5330655SJoe Thornber static void metadata_operation_failed(struct pool *pool, const char *op, int r);
291e49e5829SJoe Thornber 
get_pool_mode(struct pool * pool)292f6c36758SMike Snitzer static enum pool_mode get_pool_mode(struct pool *pool)
293f6c36758SMike Snitzer {
294f6c36758SMike Snitzer 	return pool->pf.mode;
295f6c36758SMike Snitzer }
296f6c36758SMike Snitzer 
notify_of_pool_mode_change(struct pool * pool)297f6c36758SMike Snitzer static void notify_of_pool_mode_change(struct pool *pool)
298f6c36758SMike Snitzer {
299774f13acSHeinz Mauelshagen 	static const char *descs[] = {
300f6c36758SMike Snitzer 		"write",
301f6c36758SMike Snitzer 		"out-of-data-space",
302f6c36758SMike Snitzer 		"read-only",
303f6c36758SMike Snitzer 		"read-only",
304f6c36758SMike Snitzer 		"fail"
305f6c36758SMike Snitzer 	};
306f6c36758SMike Snitzer 	const char *extra_desc = NULL;
307f6c36758SMike Snitzer 	enum pool_mode mode = get_pool_mode(pool);
308f6c36758SMike Snitzer 
309f6c36758SMike Snitzer 	if (mode == PM_OUT_OF_DATA_SPACE) {
310f6c36758SMike Snitzer 		if (!pool->pf.error_if_no_space)
311f6c36758SMike Snitzer 			extra_desc = " (queue IO)";
312f6c36758SMike Snitzer 		else
313f6c36758SMike Snitzer 			extra_desc = " (error IO)";
314f6c36758SMike Snitzer 	}
315f6c36758SMike Snitzer 
316f6c36758SMike Snitzer 	dm_table_event(pool->ti->table);
317f6c36758SMike Snitzer 	DMINFO("%s: switching pool to %s%s mode",
318f6c36758SMike Snitzer 	       dm_device_name(pool->pool_md),
319f6c36758SMike Snitzer 	       descs[(int)mode], extra_desc ? : "");
320f6c36758SMike Snitzer }
321f6c36758SMike Snitzer 
322991d9fa0SJoe Thornber /*
323991d9fa0SJoe Thornber  * Target context for a pool.
324991d9fa0SJoe Thornber  */
325991d9fa0SJoe Thornber struct pool_c {
326991d9fa0SJoe Thornber 	struct dm_target *ti;
327991d9fa0SJoe Thornber 	struct pool *pool;
328991d9fa0SJoe Thornber 	struct dm_dev *data_dev;
329991d9fa0SJoe Thornber 	struct dm_dev *metadata_dev;
330991d9fa0SJoe Thornber 
331991d9fa0SJoe Thornber 	dm_block_t low_water_blocks;
3320424caa1SMike Snitzer 	struct pool_features requested_pf; /* Features requested during table load */
3330424caa1SMike Snitzer 	struct pool_features adjusted_pf;  /* Features used after adjusting for constituent devices */
334991d9fa0SJoe Thornber };
335991d9fa0SJoe Thornber 
336991d9fa0SJoe Thornber /*
337991d9fa0SJoe Thornber  * Target context for a thin.
338991d9fa0SJoe Thornber  */
339991d9fa0SJoe Thornber struct thin_c {
340c140e1c4SMike Snitzer 	struct list_head list;
341991d9fa0SJoe Thornber 	struct dm_dev *pool_dev;
3422dd9c257SJoe Thornber 	struct dm_dev *origin_dev;
343e5aea7b4SJoe Thornber 	sector_t origin_size;
344991d9fa0SJoe Thornber 	dm_thin_id dev_id;
345991d9fa0SJoe Thornber 
346991d9fa0SJoe Thornber 	struct pool *pool;
347991d9fa0SJoe Thornber 	struct dm_thin_device *td;
348583024d2SMike Snitzer 	struct mapped_device *thin_md;
349583024d2SMike Snitzer 
350738211f7SJoe Thornber 	bool requeue_mode:1;
351c140e1c4SMike Snitzer 	spinlock_t lock;
352a374bb21SJoe Thornber 	struct list_head deferred_cells;
353c140e1c4SMike Snitzer 	struct bio_list deferred_bio_list;
354c140e1c4SMike Snitzer 	struct bio_list retry_on_resume_list;
35567324ea1SMike Snitzer 	struct rb_root sort_bio_list; /* sorted list of deferred bios */
356b10ebd34SJoe Thornber 
357b10ebd34SJoe Thornber 	/*
358b10ebd34SJoe Thornber 	 * Ensures the thin is not destroyed until the worker has finished
359b10ebd34SJoe Thornber 	 * iterating the active_thins list.
360b10ebd34SJoe Thornber 	 */
36122d4c291SJohn Pittman 	refcount_t refcount;
362b10ebd34SJoe Thornber 	struct completion can_destroy;
363991d9fa0SJoe Thornber };
364991d9fa0SJoe Thornber 
365991d9fa0SJoe Thornber /*----------------------------------------------------------------*/
366991d9fa0SJoe Thornber 
block_size_is_power_of_two(struct pool * pool)36734fbcf62SJoe Thornber static bool block_size_is_power_of_two(struct pool *pool)
36834fbcf62SJoe Thornber {
36934fbcf62SJoe Thornber 	return pool->sectors_per_block_shift >= 0;
37034fbcf62SJoe Thornber }
37134fbcf62SJoe Thornber 
block_to_sectors(struct pool * pool,dm_block_t b)37234fbcf62SJoe Thornber static sector_t block_to_sectors(struct pool *pool, dm_block_t b)
37334fbcf62SJoe Thornber {
37434fbcf62SJoe Thornber 	return block_size_is_power_of_two(pool) ?
37534fbcf62SJoe Thornber 		(b << pool->sectors_per_block_shift) :
37634fbcf62SJoe Thornber 		(b * pool->sectors_per_block);
37734fbcf62SJoe Thornber }
37834fbcf62SJoe Thornber 
379202bae52SJoe Thornber /*----------------------------------------------------------------*/
380202bae52SJoe Thornber 
381202bae52SJoe Thornber struct discard_op {
382202bae52SJoe Thornber 	struct thin_c *tc;
383202bae52SJoe Thornber 	struct blk_plug plug;
384202bae52SJoe Thornber 	struct bio *parent_bio;
385202bae52SJoe Thornber 	struct bio *bio;
386202bae52SJoe Thornber };
387202bae52SJoe Thornber 
begin_discard(struct discard_op * op,struct thin_c * tc,struct bio * parent)388202bae52SJoe Thornber static void begin_discard(struct discard_op *op, struct thin_c *tc, struct bio *parent)
38934fbcf62SJoe Thornber {
390202bae52SJoe Thornber 	BUG_ON(!parent);
391202bae52SJoe Thornber 
392202bae52SJoe Thornber 	op->tc = tc;
393202bae52SJoe Thornber 	blk_start_plug(&op->plug);
394202bae52SJoe Thornber 	op->parent_bio = parent;
395202bae52SJoe Thornber 	op->bio = NULL;
396202bae52SJoe Thornber }
397202bae52SJoe Thornber 
issue_discard(struct discard_op * op,dm_block_t data_b,dm_block_t data_e)398202bae52SJoe Thornber static int issue_discard(struct discard_op *op, dm_block_t data_b, dm_block_t data_e)
399202bae52SJoe Thornber {
400202bae52SJoe Thornber 	struct thin_c *tc = op->tc;
40134fbcf62SJoe Thornber 	sector_t s = block_to_sectors(tc->pool, data_b);
40234fbcf62SJoe Thornber 	sector_t len = block_to_sectors(tc->pool, data_e - data_b);
40334fbcf62SJoe Thornber 
404722d9082SMike Snitzer 	return __blkdev_issue_discard(tc->pool_dev->bdev, s, len, GFP_NOIO, &op->bio);
4053dba53a9SMike Snitzer }
4063dba53a9SMike Snitzer 
end_discard(struct discard_op * op,int r)407202bae52SJoe Thornber static void end_discard(struct discard_op *op, int r)
408202bae52SJoe Thornber {
409202bae52SJoe Thornber 	if (op->bio) {
410202bae52SJoe Thornber 		/*
411202bae52SJoe Thornber 		 * Even if one of the calls to issue_discard failed, we
412202bae52SJoe Thornber 		 * need to wait for the chain to complete.
413202bae52SJoe Thornber 		 */
414202bae52SJoe Thornber 		bio_chain(op->bio, op->parent_bio);
415c34b7ac6SChristoph Hellwig 		op->bio->bi_opf = REQ_OP_DISCARD;
4164e49ea4aSMike Christie 		submit_bio(op->bio);
417202bae52SJoe Thornber 	}
418202bae52SJoe Thornber 
419202bae52SJoe Thornber 	blk_finish_plug(&op->plug);
420202bae52SJoe Thornber 
421202bae52SJoe Thornber 	/*
422202bae52SJoe Thornber 	 * Even if r is set, there could be sub discards in flight that we
423202bae52SJoe Thornber 	 * need to wait for.
424202bae52SJoe Thornber 	 */
4254e4cbee9SChristoph Hellwig 	if (r && !op->parent_bio->bi_status)
4264e4cbee9SChristoph Hellwig 		op->parent_bio->bi_status = errno_to_blk_status(r);
427202bae52SJoe Thornber 	bio_endio(op->parent_bio);
42834fbcf62SJoe Thornber }
42934fbcf62SJoe Thornber 
43034fbcf62SJoe Thornber /*----------------------------------------------------------------*/
43134fbcf62SJoe Thornber 
432025b9685SJoe Thornber /*
433025b9685SJoe Thornber  * wake_worker() is used when new work is queued and when pool_resume is
434025b9685SJoe Thornber  * ready to continue deferred IO processing.
435025b9685SJoe Thornber  */
wake_worker(struct pool * pool)436025b9685SJoe Thornber static void wake_worker(struct pool *pool)
437025b9685SJoe Thornber {
438025b9685SJoe Thornber 	queue_work(pool->wq, &pool->worker);
439025b9685SJoe Thornber }
440025b9685SJoe Thornber 
441025b9685SJoe Thornber /*----------------------------------------------------------------*/
442025b9685SJoe Thornber 
bio_detain(struct pool * pool,struct dm_cell_key * key,struct bio * bio,struct dm_bio_prison_cell ** cell_result)4436beca5ebSJoe Thornber static int bio_detain(struct pool *pool, struct dm_cell_key *key, struct bio *bio,
4446beca5ebSJoe Thornber 		      struct dm_bio_prison_cell **cell_result)
4456beca5ebSJoe Thornber {
4466beca5ebSJoe Thornber 	int r;
4476beca5ebSJoe Thornber 	struct dm_bio_prison_cell *cell_prealloc;
4486beca5ebSJoe Thornber 
4496beca5ebSJoe Thornber 	/*
4506beca5ebSJoe Thornber 	 * Allocate a cell from the prison's mempool.
4516beca5ebSJoe Thornber 	 * This might block but it can't fail.
4526beca5ebSJoe Thornber 	 */
4536beca5ebSJoe Thornber 	cell_prealloc = dm_bio_prison_alloc_cell(pool->prison, GFP_NOIO);
4546beca5ebSJoe Thornber 
4556beca5ebSJoe Thornber 	r = dm_bio_detain(pool->prison, key, bio, cell_prealloc, cell_result);
4566beca5ebSJoe Thornber 	if (r)
4576beca5ebSJoe Thornber 		/*
4586beca5ebSJoe Thornber 		 * We reused an old cell; we can get rid of
4596beca5ebSJoe Thornber 		 * the new one.
4606beca5ebSJoe Thornber 		 */
4616beca5ebSJoe Thornber 		dm_bio_prison_free_cell(pool->prison, cell_prealloc);
4626beca5ebSJoe Thornber 
4636beca5ebSJoe Thornber 	return r;
4646beca5ebSJoe Thornber }
4656beca5ebSJoe Thornber 
cell_release(struct pool * pool,struct dm_bio_prison_cell * cell,struct bio_list * bios)4666beca5ebSJoe Thornber static void cell_release(struct pool *pool,
4676beca5ebSJoe Thornber 			 struct dm_bio_prison_cell *cell,
4686beca5ebSJoe Thornber 			 struct bio_list *bios)
4696beca5ebSJoe Thornber {
4706beca5ebSJoe Thornber 	dm_cell_release(pool->prison, cell, bios);
4716beca5ebSJoe Thornber 	dm_bio_prison_free_cell(pool->prison, cell);
4726beca5ebSJoe Thornber }
4736beca5ebSJoe Thornber 
cell_visit_release(struct pool * pool,void (* fn)(void *,struct dm_bio_prison_cell *),void * context,struct dm_bio_prison_cell * cell)4742d759a46SJoe Thornber static void cell_visit_release(struct pool *pool,
4752d759a46SJoe Thornber 			       void (*fn)(void *, struct dm_bio_prison_cell *),
4762d759a46SJoe Thornber 			       void *context,
4772d759a46SJoe Thornber 			       struct dm_bio_prison_cell *cell)
4782d759a46SJoe Thornber {
4792d759a46SJoe Thornber 	dm_cell_visit_release(pool->prison, fn, context, cell);
4802d759a46SJoe Thornber 	dm_bio_prison_free_cell(pool->prison, cell);
4812d759a46SJoe Thornber }
4822d759a46SJoe Thornber 
cell_release_no_holder(struct pool * pool,struct dm_bio_prison_cell * cell,struct bio_list * bios)4836beca5ebSJoe Thornber static void cell_release_no_holder(struct pool *pool,
4846beca5ebSJoe Thornber 				   struct dm_bio_prison_cell *cell,
4856beca5ebSJoe Thornber 				   struct bio_list *bios)
4866beca5ebSJoe Thornber {
4876beca5ebSJoe Thornber 	dm_cell_release_no_holder(pool->prison, cell, bios);
4886beca5ebSJoe Thornber 	dm_bio_prison_free_cell(pool->prison, cell);
4896beca5ebSJoe Thornber }
4906beca5ebSJoe Thornber 
cell_error_with_code(struct pool * pool,struct dm_bio_prison_cell * cell,blk_status_t error_code)491af91805aSMike Snitzer static void cell_error_with_code(struct pool *pool,
4924e4cbee9SChristoph Hellwig 		struct dm_bio_prison_cell *cell, blk_status_t error_code)
4936beca5ebSJoe Thornber {
494af91805aSMike Snitzer 	dm_cell_error(pool->prison, cell, error_code);
4956beca5ebSJoe Thornber 	dm_bio_prison_free_cell(pool->prison, cell);
4966beca5ebSJoe Thornber }
4976beca5ebSJoe Thornber 
get_pool_io_error_code(struct pool * pool)4984e4cbee9SChristoph Hellwig static blk_status_t get_pool_io_error_code(struct pool *pool)
499c3667cc6SMike Snitzer {
5004e4cbee9SChristoph Hellwig 	return pool->out_of_data_space ? BLK_STS_NOSPC : BLK_STS_IOERR;
501c3667cc6SMike Snitzer }
502c3667cc6SMike Snitzer 
cell_error(struct pool * pool,struct dm_bio_prison_cell * cell)503af91805aSMike Snitzer static void cell_error(struct pool *pool, struct dm_bio_prison_cell *cell)
504af91805aSMike Snitzer {
5054e4cbee9SChristoph Hellwig 	cell_error_with_code(pool, cell, get_pool_io_error_code(pool));
506af91805aSMike Snitzer }
507af91805aSMike Snitzer 
cell_success(struct pool * pool,struct dm_bio_prison_cell * cell)508a374bb21SJoe Thornber static void cell_success(struct pool *pool, struct dm_bio_prison_cell *cell)
509a374bb21SJoe Thornber {
510a374bb21SJoe Thornber 	cell_error_with_code(pool, cell, 0);
511a374bb21SJoe Thornber }
512a374bb21SJoe Thornber 
cell_requeue(struct pool * pool,struct dm_bio_prison_cell * cell)513a374bb21SJoe Thornber static void cell_requeue(struct pool *pool, struct dm_bio_prison_cell *cell)
514a374bb21SJoe Thornber {
5154e4cbee9SChristoph Hellwig 	cell_error_with_code(pool, cell, BLK_STS_DM_REQUEUE);
516a374bb21SJoe Thornber }
517a374bb21SJoe Thornber 
5186beca5ebSJoe Thornber /*----------------------------------------------------------------*/
5196beca5ebSJoe Thornber 
520991d9fa0SJoe Thornber /*
521991d9fa0SJoe Thornber  * A global list of pools that uses a struct mapped_device as a key.
522991d9fa0SJoe Thornber  */
523991d9fa0SJoe Thornber static struct dm_thin_pool_table {
524991d9fa0SJoe Thornber 	struct mutex mutex;
525991d9fa0SJoe Thornber 	struct list_head pools;
526991d9fa0SJoe Thornber } dm_thin_pool_table;
527991d9fa0SJoe Thornber 
pool_table_init(void)528991d9fa0SJoe Thornber static void pool_table_init(void)
529991d9fa0SJoe Thornber {
530991d9fa0SJoe Thornber 	mutex_init(&dm_thin_pool_table.mutex);
531991d9fa0SJoe Thornber 	INIT_LIST_HEAD(&dm_thin_pool_table.pools);
532991d9fa0SJoe Thornber }
533991d9fa0SJoe Thornber 
pool_table_exit(void)534d5ffebddSMike Snitzer static void pool_table_exit(void)
535d5ffebddSMike Snitzer {
536d5ffebddSMike Snitzer 	mutex_destroy(&dm_thin_pool_table.mutex);
537d5ffebddSMike Snitzer }
538d5ffebddSMike Snitzer 
__pool_table_insert(struct pool * pool)539991d9fa0SJoe Thornber static void __pool_table_insert(struct pool *pool)
540991d9fa0SJoe Thornber {
541991d9fa0SJoe Thornber 	BUG_ON(!mutex_is_locked(&dm_thin_pool_table.mutex));
542991d9fa0SJoe Thornber 	list_add(&pool->list, &dm_thin_pool_table.pools);
543991d9fa0SJoe Thornber }
544991d9fa0SJoe Thornber 
__pool_table_remove(struct pool * pool)545991d9fa0SJoe Thornber static void __pool_table_remove(struct pool *pool)
546991d9fa0SJoe Thornber {
547991d9fa0SJoe Thornber 	BUG_ON(!mutex_is_locked(&dm_thin_pool_table.mutex));
548991d9fa0SJoe Thornber 	list_del(&pool->list);
549991d9fa0SJoe Thornber }
550991d9fa0SJoe Thornber 
__pool_table_lookup(struct mapped_device * md)551991d9fa0SJoe Thornber static struct pool *__pool_table_lookup(struct mapped_device *md)
552991d9fa0SJoe Thornber {
553991d9fa0SJoe Thornber 	struct pool *pool = NULL, *tmp;
554991d9fa0SJoe Thornber 
555991d9fa0SJoe Thornber 	BUG_ON(!mutex_is_locked(&dm_thin_pool_table.mutex));
556991d9fa0SJoe Thornber 
557991d9fa0SJoe Thornber 	list_for_each_entry(tmp, &dm_thin_pool_table.pools, list) {
558991d9fa0SJoe Thornber 		if (tmp->pool_md == md) {
559991d9fa0SJoe Thornber 			pool = tmp;
560991d9fa0SJoe Thornber 			break;
561991d9fa0SJoe Thornber 		}
562991d9fa0SJoe Thornber 	}
563991d9fa0SJoe Thornber 
564991d9fa0SJoe Thornber 	return pool;
565991d9fa0SJoe Thornber }
566991d9fa0SJoe Thornber 
__pool_table_lookup_metadata_dev(struct block_device * md_dev)567991d9fa0SJoe Thornber static struct pool *__pool_table_lookup_metadata_dev(struct block_device *md_dev)
568991d9fa0SJoe Thornber {
569991d9fa0SJoe Thornber 	struct pool *pool = NULL, *tmp;
570991d9fa0SJoe Thornber 
571991d9fa0SJoe Thornber 	BUG_ON(!mutex_is_locked(&dm_thin_pool_table.mutex));
572991d9fa0SJoe Thornber 
573991d9fa0SJoe Thornber 	list_for_each_entry(tmp, &dm_thin_pool_table.pools, list) {
574991d9fa0SJoe Thornber 		if (tmp->md_dev == md_dev) {
575991d9fa0SJoe Thornber 			pool = tmp;
576991d9fa0SJoe Thornber 			break;
577991d9fa0SJoe Thornber 		}
578991d9fa0SJoe Thornber 	}
579991d9fa0SJoe Thornber 
580991d9fa0SJoe Thornber 	return pool;
581991d9fa0SJoe Thornber }
582991d9fa0SJoe Thornber 
583991d9fa0SJoe Thornber /*----------------------------------------------------------------*/
584991d9fa0SJoe Thornber 
585a24c2569SMike Snitzer struct dm_thin_endio_hook {
586eb2aa48dSJoe Thornber 	struct thin_c *tc;
58744feb387SMike Snitzer 	struct dm_deferred_entry *shared_read_entry;
58844feb387SMike Snitzer 	struct dm_deferred_entry *all_io_entry;
589a24c2569SMike Snitzer 	struct dm_thin_new_mapping *overwrite_mapping;
59067324ea1SMike Snitzer 	struct rb_node rb_node;
59134fbcf62SJoe Thornber 	struct dm_bio_prison_cell *cell;
592eb2aa48dSJoe Thornber };
593eb2aa48dSJoe Thornber 
__merge_bio_list(struct bio_list * bios,struct bio_list * master)59442d6a8ceSMike Snitzer static void __merge_bio_list(struct bio_list *bios, struct bio_list *master)
59542d6a8ceSMike Snitzer {
59642d6a8ceSMike Snitzer 	bio_list_merge(bios, master);
59742d6a8ceSMike Snitzer 	bio_list_init(master);
59842d6a8ceSMike Snitzer }
59942d6a8ceSMike Snitzer 
error_bio_list(struct bio_list * bios,blk_status_t error)6004e4cbee9SChristoph Hellwig static void error_bio_list(struct bio_list *bios, blk_status_t error)
601991d9fa0SJoe Thornber {
602991d9fa0SJoe Thornber 	struct bio *bio;
60342d6a8ceSMike Snitzer 
6044246a0b6SChristoph Hellwig 	while ((bio = bio_list_pop(bios))) {
6054e4cbee9SChristoph Hellwig 		bio->bi_status = error;
6064246a0b6SChristoph Hellwig 		bio_endio(bio);
6074246a0b6SChristoph Hellwig 	}
60842d6a8ceSMike Snitzer }
60942d6a8ceSMike Snitzer 
error_thin_bio_list(struct thin_c * tc,struct bio_list * master,blk_status_t error)6104e4cbee9SChristoph Hellwig static void error_thin_bio_list(struct thin_c *tc, struct bio_list *master,
6114e4cbee9SChristoph Hellwig 		blk_status_t error)
61242d6a8ceSMike Snitzer {
613991d9fa0SJoe Thornber 	struct bio_list bios;
614991d9fa0SJoe Thornber 
615991d9fa0SJoe Thornber 	bio_list_init(&bios);
61618adc577SJoe Thornber 
6178e0c9dacSMikulas Patocka 	spin_lock_irq(&tc->lock);
61842d6a8ceSMike Snitzer 	__merge_bio_list(&bios, master);
6198e0c9dacSMikulas Patocka 	spin_unlock_irq(&tc->lock);
620991d9fa0SJoe Thornber 
62142d6a8ceSMike Snitzer 	error_bio_list(&bios, error);
622991d9fa0SJoe Thornber }
623991d9fa0SJoe Thornber 
requeue_deferred_cells(struct thin_c * tc)624a374bb21SJoe Thornber static void requeue_deferred_cells(struct thin_c *tc)
625a374bb21SJoe Thornber {
626a374bb21SJoe Thornber 	struct pool *pool = tc->pool;
627a374bb21SJoe Thornber 	struct list_head cells;
628a374bb21SJoe Thornber 	struct dm_bio_prison_cell *cell, *tmp;
629a374bb21SJoe Thornber 
630a374bb21SJoe Thornber 	INIT_LIST_HEAD(&cells);
631a374bb21SJoe Thornber 
6328e0c9dacSMikulas Patocka 	spin_lock_irq(&tc->lock);
633a374bb21SJoe Thornber 	list_splice_init(&tc->deferred_cells, &cells);
6348e0c9dacSMikulas Patocka 	spin_unlock_irq(&tc->lock);
635a374bb21SJoe Thornber 
636a374bb21SJoe Thornber 	list_for_each_entry_safe(cell, tmp, &cells, user_list)
637a374bb21SJoe Thornber 		cell_requeue(pool, cell);
638a374bb21SJoe Thornber }
639a374bb21SJoe Thornber 
requeue_io(struct thin_c * tc)640991d9fa0SJoe Thornber static void requeue_io(struct thin_c *tc)
641991d9fa0SJoe Thornber {
6423e1a0699SJoe Thornber 	struct bio_list bios;
6433e1a0699SJoe Thornber 
6443e1a0699SJoe Thornber 	bio_list_init(&bios);
6453e1a0699SJoe Thornber 
6468e0c9dacSMikulas Patocka 	spin_lock_irq(&tc->lock);
64742d6a8ceSMike Snitzer 	__merge_bio_list(&bios, &tc->deferred_bio_list);
64842d6a8ceSMike Snitzer 	__merge_bio_list(&bios, &tc->retry_on_resume_list);
6498e0c9dacSMikulas Patocka 	spin_unlock_irq(&tc->lock);
6503e1a0699SJoe Thornber 
6514e4cbee9SChristoph Hellwig 	error_bio_list(&bios, BLK_STS_DM_REQUEUE);
65242d6a8ceSMike Snitzer 	requeue_deferred_cells(tc);
6533e1a0699SJoe Thornber }
6543e1a0699SJoe Thornber 
error_retry_list_with_code(struct pool * pool,blk_status_t error)6554e4cbee9SChristoph Hellwig static void error_retry_list_with_code(struct pool *pool, blk_status_t error)
656c140e1c4SMike Snitzer {
657c140e1c4SMike Snitzer 	struct thin_c *tc;
658c140e1c4SMike Snitzer 
659c140e1c4SMike Snitzer 	rcu_read_lock();
660c140e1c4SMike Snitzer 	list_for_each_entry_rcu(tc, &pool->active_thins, list)
6610a927c2fSMike Snitzer 		error_thin_bio_list(tc, &tc->retry_on_resume_list, error);
662c140e1c4SMike Snitzer 	rcu_read_unlock();
663c140e1c4SMike Snitzer }
664c140e1c4SMike Snitzer 
error_retry_list(struct pool * pool)6650a927c2fSMike Snitzer static void error_retry_list(struct pool *pool)
6660a927c2fSMike Snitzer {
6674e4cbee9SChristoph Hellwig 	error_retry_list_with_code(pool, get_pool_io_error_code(pool));
6680a927c2fSMike Snitzer }
6690a927c2fSMike Snitzer 
670991d9fa0SJoe Thornber /*
671991d9fa0SJoe Thornber  * This section of code contains the logic for processing a thin device's IO.
672991d9fa0SJoe Thornber  * Much of the code depends on pool object resources (lists, workqueues, etc)
673991d9fa0SJoe Thornber  * but most is exclusively called from the thin target rather than the thin-pool
674991d9fa0SJoe Thornber  * target.
675991d9fa0SJoe Thornber  */
676991d9fa0SJoe Thornber 
get_bio_block(struct thin_c * tc,struct bio * bio)677991d9fa0SJoe Thornber static dm_block_t get_bio_block(struct thin_c *tc, struct bio *bio)
678991d9fa0SJoe Thornber {
67958f77a21SMike Snitzer 	struct pool *pool = tc->pool;
6804f024f37SKent Overstreet 	sector_t block_nr = bio->bi_iter.bi_sector;
68155f2b8bdSMike Snitzer 
68258f77a21SMike Snitzer 	if (block_size_is_power_of_two(pool))
68358f77a21SMike Snitzer 		block_nr >>= pool->sectors_per_block_shift;
684f9a8e0cdSMikulas Patocka 	else
68558f77a21SMike Snitzer 		(void) sector_div(block_nr, pool->sectors_per_block);
68655f2b8bdSMike Snitzer 
68755f2b8bdSMike Snitzer 	return block_nr;
688991d9fa0SJoe Thornber }
689991d9fa0SJoe Thornber 
69034fbcf62SJoe Thornber /*
69134fbcf62SJoe Thornber  * Returns the _complete_ blocks that this bio covers.
69234fbcf62SJoe Thornber  */
get_bio_block_range(struct thin_c * tc,struct bio * bio,dm_block_t * begin,dm_block_t * end)69334fbcf62SJoe Thornber static void get_bio_block_range(struct thin_c *tc, struct bio *bio,
69434fbcf62SJoe Thornber 				dm_block_t *begin, dm_block_t *end)
69534fbcf62SJoe Thornber {
69634fbcf62SJoe Thornber 	struct pool *pool = tc->pool;
69734fbcf62SJoe Thornber 	sector_t b = bio->bi_iter.bi_sector;
69834fbcf62SJoe Thornber 	sector_t e = b + (bio->bi_iter.bi_size >> SECTOR_SHIFT);
69934fbcf62SJoe Thornber 
70034fbcf62SJoe Thornber 	b += pool->sectors_per_block - 1ull; /* so we round up */
70134fbcf62SJoe Thornber 
70234fbcf62SJoe Thornber 	if (block_size_is_power_of_two(pool)) {
70334fbcf62SJoe Thornber 		b >>= pool->sectors_per_block_shift;
70434fbcf62SJoe Thornber 		e >>= pool->sectors_per_block_shift;
70534fbcf62SJoe Thornber 	} else {
70634fbcf62SJoe Thornber 		(void) sector_div(b, pool->sectors_per_block);
70734fbcf62SJoe Thornber 		(void) sector_div(e, pool->sectors_per_block);
70834fbcf62SJoe Thornber 	}
70934fbcf62SJoe Thornber 
71034fbcf62SJoe Thornber 	if (e < b)
71134fbcf62SJoe Thornber 		/* Can happen if the bio is within a single block. */
71234fbcf62SJoe Thornber 		e = b;
71334fbcf62SJoe Thornber 
71434fbcf62SJoe Thornber 	*begin = b;
71534fbcf62SJoe Thornber 	*end = e;
71634fbcf62SJoe Thornber }
71734fbcf62SJoe Thornber 
remap(struct thin_c * tc,struct bio * bio,dm_block_t block)718991d9fa0SJoe Thornber static void remap(struct thin_c *tc, struct bio *bio, dm_block_t block)
719991d9fa0SJoe Thornber {
720991d9fa0SJoe Thornber 	struct pool *pool = tc->pool;
7214f024f37SKent Overstreet 	sector_t bi_sector = bio->bi_iter.bi_sector;
722991d9fa0SJoe Thornber 
72374d46992SChristoph Hellwig 	bio_set_dev(bio, tc->pool_dev->bdev);
72458f77a21SMike Snitzer 	if (block_size_is_power_of_two(pool))
7254f024f37SKent Overstreet 		bio->bi_iter.bi_sector =
7264f024f37SKent Overstreet 			(block << pool->sectors_per_block_shift) |
727f9a8e0cdSMikulas Patocka 			(bi_sector & (pool->sectors_per_block - 1));
72858f77a21SMike Snitzer 	else
7294f024f37SKent Overstreet 		bio->bi_iter.bi_sector = (block * pool->sectors_per_block) +
73058f77a21SMike Snitzer 				 sector_div(bi_sector, pool->sectors_per_block);
731991d9fa0SJoe Thornber }
732991d9fa0SJoe Thornber 
remap_to_origin(struct thin_c * tc,struct bio * bio)7332dd9c257SJoe Thornber static void remap_to_origin(struct thin_c *tc, struct bio *bio)
7342dd9c257SJoe Thornber {
73574d46992SChristoph Hellwig 	bio_set_dev(bio, tc->origin_dev->bdev);
7362dd9c257SJoe Thornber }
7372dd9c257SJoe Thornber 
bio_triggers_commit(struct thin_c * tc,struct bio * bio)7384afdd680SJoe Thornber static int bio_triggers_commit(struct thin_c *tc, struct bio *bio)
7394afdd680SJoe Thornber {
740f73f44ebSChristoph Hellwig 	return op_is_flush(bio->bi_opf) &&
7414afdd680SJoe Thornber 		dm_thin_changed_this_transaction(tc->td);
7424afdd680SJoe Thornber }
7434afdd680SJoe Thornber 
inc_all_io_entry(struct pool * pool,struct bio * bio)744e8088073SJoe Thornber static void inc_all_io_entry(struct pool *pool, struct bio *bio)
745e8088073SJoe Thornber {
746e8088073SJoe Thornber 	struct dm_thin_endio_hook *h;
747e8088073SJoe Thornber 
748e6047149SMike Christie 	if (bio_op(bio) == REQ_OP_DISCARD)
749e8088073SJoe Thornber 		return;
750e8088073SJoe Thornber 
75159c3d2c6SMikulas Patocka 	h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
752e8088073SJoe Thornber 	h->all_io_entry = dm_deferred_entry_inc(pool->all_io_ds);
753e8088073SJoe Thornber }
754e8088073SJoe Thornber 
issue(struct thin_c * tc,struct bio * bio)7552dd9c257SJoe Thornber static void issue(struct thin_c *tc, struct bio *bio)
756991d9fa0SJoe Thornber {
757991d9fa0SJoe Thornber 	struct pool *pool = tc->pool;
758991d9fa0SJoe Thornber 
759e49e5829SJoe Thornber 	if (!bio_triggers_commit(tc, bio)) {
760b7f8dff0SMike Snitzer 		dm_submit_bio_remap(bio, NULL);
761e49e5829SJoe Thornber 		return;
762e49e5829SJoe Thornber 	}
763e49e5829SJoe Thornber 
764991d9fa0SJoe Thornber 	/*
765e49e5829SJoe Thornber 	 * Complete bio with an error if earlier I/O caused changes to
766e49e5829SJoe Thornber 	 * the metadata that can't be committed e.g, due to I/O errors
767e49e5829SJoe Thornber 	 * on the metadata device.
768991d9fa0SJoe Thornber 	 */
769e49e5829SJoe Thornber 	if (dm_thin_aborted_changes(tc->td)) {
770e49e5829SJoe Thornber 		bio_io_error(bio);
771e49e5829SJoe Thornber 		return;
772e49e5829SJoe Thornber 	}
773e49e5829SJoe Thornber 
774e49e5829SJoe Thornber 	/*
775e49e5829SJoe Thornber 	 * Batch together any bios that trigger commits and then issue a
776e49e5829SJoe Thornber 	 * single commit for them in process_deferred_bios().
777e49e5829SJoe Thornber 	 */
7788e0c9dacSMikulas Patocka 	spin_lock_irq(&pool->lock);
779991d9fa0SJoe Thornber 	bio_list_add(&pool->deferred_flush_bios, bio);
7808e0c9dacSMikulas Patocka 	spin_unlock_irq(&pool->lock);
781991d9fa0SJoe Thornber }
782991d9fa0SJoe Thornber 
remap_to_origin_and_issue(struct thin_c * tc,struct bio * bio)7832dd9c257SJoe Thornber static void remap_to_origin_and_issue(struct thin_c *tc, struct bio *bio)
7842dd9c257SJoe Thornber {
7852dd9c257SJoe Thornber 	remap_to_origin(tc, bio);
7862dd9c257SJoe Thornber 	issue(tc, bio);
7872dd9c257SJoe Thornber }
7882dd9c257SJoe Thornber 
remap_and_issue(struct thin_c * tc,struct bio * bio,dm_block_t block)7892dd9c257SJoe Thornber static void remap_and_issue(struct thin_c *tc, struct bio *bio,
7902dd9c257SJoe Thornber 			    dm_block_t block)
7912dd9c257SJoe Thornber {
7922dd9c257SJoe Thornber 	remap(tc, bio, block);
7932dd9c257SJoe Thornber 	issue(tc, bio);
7942dd9c257SJoe Thornber }
7952dd9c257SJoe Thornber 
796991d9fa0SJoe Thornber /*----------------------------------------------------------------*/
797991d9fa0SJoe Thornber 
798991d9fa0SJoe Thornber /*
799991d9fa0SJoe Thornber  * Bio endio functions.
800991d9fa0SJoe Thornber  */
801a24c2569SMike Snitzer struct dm_thin_new_mapping {
802991d9fa0SJoe Thornber 	struct list_head list;
803991d9fa0SJoe Thornber 
8047f214665SMike Snitzer 	bool pass_discard:1;
80534fbcf62SJoe Thornber 	bool maybe_shared:1;
806991d9fa0SJoe Thornber 
80750f3c3efSJoe Thornber 	/*
80850f3c3efSJoe Thornber 	 * Track quiescing, copying and zeroing preparation actions.  When this
80950f3c3efSJoe Thornber 	 * counter hits zero the block is prepared and can be inserted into the
81050f3c3efSJoe Thornber 	 * btree.
81150f3c3efSJoe Thornber 	 */
81250f3c3efSJoe Thornber 	atomic_t prepare_actions;
81350f3c3efSJoe Thornber 
8144e4cbee9SChristoph Hellwig 	blk_status_t status;
815991d9fa0SJoe Thornber 	struct thin_c *tc;
81634fbcf62SJoe Thornber 	dm_block_t virt_begin, virt_end;
817991d9fa0SJoe Thornber 	dm_block_t data_block;
81834fbcf62SJoe Thornber 	struct dm_bio_prison_cell *cell;
819991d9fa0SJoe Thornber 
820991d9fa0SJoe Thornber 	/*
821991d9fa0SJoe Thornber 	 * If the bio covers the whole area of a block then we can avoid
822991d9fa0SJoe Thornber 	 * zeroing or copying.  Instead this bio is hooked.  The bio will
823991d9fa0SJoe Thornber 	 * still be in the cell, so care has to be taken to avoid issuing
824991d9fa0SJoe Thornber 	 * the bio twice.
825991d9fa0SJoe Thornber 	 */
826991d9fa0SJoe Thornber 	struct bio *bio;
827991d9fa0SJoe Thornber 	bio_end_io_t *saved_bi_end_io;
828991d9fa0SJoe Thornber };
829991d9fa0SJoe Thornber 
__complete_mapping_preparation(struct dm_thin_new_mapping * m)83050f3c3efSJoe Thornber static void __complete_mapping_preparation(struct dm_thin_new_mapping *m)
831991d9fa0SJoe Thornber {
832991d9fa0SJoe Thornber 	struct pool *pool = m->tc->pool;
833991d9fa0SJoe Thornber 
83450f3c3efSJoe Thornber 	if (atomic_dec_and_test(&m->prepare_actions)) {
835daec338bSMike Snitzer 		list_add_tail(&m->list, &pool->prepared_mappings);
836991d9fa0SJoe Thornber 		wake_worker(pool);
837991d9fa0SJoe Thornber 	}
838991d9fa0SJoe Thornber }
839991d9fa0SJoe Thornber 
complete_mapping_preparation(struct dm_thin_new_mapping * m)840e5aea7b4SJoe Thornber static void complete_mapping_preparation(struct dm_thin_new_mapping *m)
841991d9fa0SJoe Thornber {
842991d9fa0SJoe Thornber 	unsigned long flags;
843991d9fa0SJoe Thornber 	struct pool *pool = m->tc->pool;
844991d9fa0SJoe Thornber 
845991d9fa0SJoe Thornber 	spin_lock_irqsave(&pool->lock, flags);
84650f3c3efSJoe Thornber 	__complete_mapping_preparation(m);
847991d9fa0SJoe Thornber 	spin_unlock_irqrestore(&pool->lock, flags);
848991d9fa0SJoe Thornber }
849991d9fa0SJoe Thornber 
copy_complete(int read_err,unsigned long write_err,void * context)850e5aea7b4SJoe Thornber static void copy_complete(int read_err, unsigned long write_err, void *context)
851e5aea7b4SJoe Thornber {
852e5aea7b4SJoe Thornber 	struct dm_thin_new_mapping *m = context;
853e5aea7b4SJoe Thornber 
8544e4cbee9SChristoph Hellwig 	m->status = read_err || write_err ? BLK_STS_IOERR : 0;
855e5aea7b4SJoe Thornber 	complete_mapping_preparation(m);
856e5aea7b4SJoe Thornber }
857e5aea7b4SJoe Thornber 
overwrite_endio(struct bio * bio)8584246a0b6SChristoph Hellwig static void overwrite_endio(struct bio *bio)
859991d9fa0SJoe Thornber {
86059c3d2c6SMikulas Patocka 	struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
861a24c2569SMike Snitzer 	struct dm_thin_new_mapping *m = h->overwrite_mapping;
862991d9fa0SJoe Thornber 
8638b908f8eSMike Snitzer 	bio->bi_end_io = m->saved_bi_end_io;
8648b908f8eSMike Snitzer 
8654e4cbee9SChristoph Hellwig 	m->status = bio->bi_status;
866e5aea7b4SJoe Thornber 	complete_mapping_preparation(m);
867991d9fa0SJoe Thornber }
868991d9fa0SJoe Thornber 
869991d9fa0SJoe Thornber /*----------------------------------------------------------------*/
870991d9fa0SJoe Thornber 
871991d9fa0SJoe Thornber /*
872991d9fa0SJoe Thornber  * Workqueue.
873991d9fa0SJoe Thornber  */
874991d9fa0SJoe Thornber 
875991d9fa0SJoe Thornber /*
876991d9fa0SJoe Thornber  * Prepared mapping jobs.
877991d9fa0SJoe Thornber  */
878991d9fa0SJoe Thornber 
879991d9fa0SJoe Thornber /*
8802d759a46SJoe Thornber  * This sends the bios in the cell, except the original holder, back
8812d759a46SJoe Thornber  * to the deferred_bios list.
882991d9fa0SJoe Thornber  */
cell_defer_no_holder(struct thin_c * tc,struct dm_bio_prison_cell * cell)883f286ba0eSJoe Thornber static void cell_defer_no_holder(struct thin_c *tc, struct dm_bio_prison_cell *cell)
884991d9fa0SJoe Thornber {
885991d9fa0SJoe Thornber 	struct pool *pool = tc->pool;
886991d9fa0SJoe Thornber 	unsigned long flags;
887bb46c561SJoe Thornber 	struct bio_list bios;
888991d9fa0SJoe Thornber 
889bb46c561SJoe Thornber 	bio_list_init(&bios);
890bb46c561SJoe Thornber 	cell_release_no_holder(pool, cell, &bios);
891bb46c561SJoe Thornber 
892bb46c561SJoe Thornber 	if (!bio_list_empty(&bios)) {
893c140e1c4SMike Snitzer 		spin_lock_irqsave(&tc->lock, flags);
894bb46c561SJoe Thornber 		bio_list_merge(&tc->deferred_bio_list, &bios);
895c140e1c4SMike Snitzer 		spin_unlock_irqrestore(&tc->lock, flags);
896991d9fa0SJoe Thornber 		wake_worker(pool);
897991d9fa0SJoe Thornber 	}
898bb46c561SJoe Thornber }
899991d9fa0SJoe Thornber 
900a374bb21SJoe Thornber static void thin_defer_bio(struct thin_c *tc, struct bio *bio);
901a374bb21SJoe Thornber 
9022d759a46SJoe Thornber struct remap_info {
9032d759a46SJoe Thornber 	struct thin_c *tc;
9042d759a46SJoe Thornber 	struct bio_list defer_bios;
9052d759a46SJoe Thornber 	struct bio_list issue_bios;
9062d759a46SJoe Thornber };
9072d759a46SJoe Thornber 
__inc_remap_and_issue_cell(void * context,struct dm_bio_prison_cell * cell)9082d759a46SJoe Thornber static void __inc_remap_and_issue_cell(void *context,
9092d759a46SJoe Thornber 				       struct dm_bio_prison_cell *cell)
9102d759a46SJoe Thornber {
9112d759a46SJoe Thornber 	struct remap_info *info = context;
9122d759a46SJoe Thornber 	struct bio *bio;
9132d759a46SJoe Thornber 
9142d759a46SJoe Thornber 	while ((bio = bio_list_pop(&cell->bios))) {
915f73f44ebSChristoph Hellwig 		if (op_is_flush(bio->bi_opf) || bio_op(bio) == REQ_OP_DISCARD)
9162d759a46SJoe Thornber 			bio_list_add(&info->defer_bios, bio);
9172d759a46SJoe Thornber 		else {
9182d759a46SJoe Thornber 			inc_all_io_entry(info->tc->pool, bio);
9192d759a46SJoe Thornber 
9202d759a46SJoe Thornber 			/*
9212d759a46SJoe Thornber 			 * We can't issue the bios with the bio prison lock
9222d759a46SJoe Thornber 			 * held, so we add them to a list to issue on
9232d759a46SJoe Thornber 			 * return from this function.
9242d759a46SJoe Thornber 			 */
9252d759a46SJoe Thornber 			bio_list_add(&info->issue_bios, bio);
9262d759a46SJoe Thornber 		}
9272d759a46SJoe Thornber 	}
9282d759a46SJoe Thornber }
9292d759a46SJoe Thornber 
inc_remap_and_issue_cell(struct thin_c * tc,struct dm_bio_prison_cell * cell,dm_block_t block)930a374bb21SJoe Thornber static void inc_remap_and_issue_cell(struct thin_c *tc,
931a374bb21SJoe Thornber 				     struct dm_bio_prison_cell *cell,
932a374bb21SJoe Thornber 				     dm_block_t block)
933a374bb21SJoe Thornber {
934a374bb21SJoe Thornber 	struct bio *bio;
9352d759a46SJoe Thornber 	struct remap_info info;
936a374bb21SJoe Thornber 
9372d759a46SJoe Thornber 	info.tc = tc;
9382d759a46SJoe Thornber 	bio_list_init(&info.defer_bios);
9392d759a46SJoe Thornber 	bio_list_init(&info.issue_bios);
940a374bb21SJoe Thornber 
9412d759a46SJoe Thornber 	/*
9422d759a46SJoe Thornber 	 * We have to be careful to inc any bios we're about to issue
9432d759a46SJoe Thornber 	 * before the cell is released, and avoid a race with new bios
9442d759a46SJoe Thornber 	 * being added to the cell.
9452d759a46SJoe Thornber 	 */
9462d759a46SJoe Thornber 	cell_visit_release(tc->pool, __inc_remap_and_issue_cell,
9472d759a46SJoe Thornber 			   &info, cell);
9482d759a46SJoe Thornber 
9492d759a46SJoe Thornber 	while ((bio = bio_list_pop(&info.defer_bios)))
950a374bb21SJoe Thornber 		thin_defer_bio(tc, bio);
9512d759a46SJoe Thornber 
9522d759a46SJoe Thornber 	while ((bio = bio_list_pop(&info.issue_bios)))
9532d759a46SJoe Thornber 		remap_and_issue(info.tc, bio, block);
954a374bb21SJoe Thornber }
955a374bb21SJoe Thornber 
process_prepared_mapping_fail(struct dm_thin_new_mapping * m)956e49e5829SJoe Thornber static void process_prepared_mapping_fail(struct dm_thin_new_mapping *m)
957e49e5829SJoe Thornber {
9586beca5ebSJoe Thornber 	cell_error(m->tc->pool, m->cell);
959e49e5829SJoe Thornber 	list_del(&m->list);
9606f1c819cSKent Overstreet 	mempool_free(m, &m->tc->pool->mapping_pool);
961e49e5829SJoe Thornber }
962025b9685SJoe Thornber 
complete_overwrite_bio(struct thin_c * tc,struct bio * bio)9634ae280b4SNikos Tsironis static void complete_overwrite_bio(struct thin_c *tc, struct bio *bio)
9644ae280b4SNikos Tsironis {
9654ae280b4SNikos Tsironis 	struct pool *pool = tc->pool;
9664ae280b4SNikos Tsironis 
9674ae280b4SNikos Tsironis 	/*
9684ae280b4SNikos Tsironis 	 * If the bio has the REQ_FUA flag set we must commit the metadata
9694ae280b4SNikos Tsironis 	 * before signaling its completion.
9704ae280b4SNikos Tsironis 	 */
9714ae280b4SNikos Tsironis 	if (!bio_triggers_commit(tc, bio)) {
9724ae280b4SNikos Tsironis 		bio_endio(bio);
9734ae280b4SNikos Tsironis 		return;
9744ae280b4SNikos Tsironis 	}
9754ae280b4SNikos Tsironis 
9764ae280b4SNikos Tsironis 	/*
9774ae280b4SNikos Tsironis 	 * Complete bio with an error if earlier I/O caused changes to the
9784ae280b4SNikos Tsironis 	 * metadata that can't be committed, e.g, due to I/O errors on the
9794ae280b4SNikos Tsironis 	 * metadata device.
9804ae280b4SNikos Tsironis 	 */
9814ae280b4SNikos Tsironis 	if (dm_thin_aborted_changes(tc->td)) {
9824ae280b4SNikos Tsironis 		bio_io_error(bio);
9834ae280b4SNikos Tsironis 		return;
9844ae280b4SNikos Tsironis 	}
9854ae280b4SNikos Tsironis 
9864ae280b4SNikos Tsironis 	/*
9874ae280b4SNikos Tsironis 	 * Batch together any bios that trigger commits and then issue a
9884ae280b4SNikos Tsironis 	 * single commit for them in process_deferred_bios().
9894ae280b4SNikos Tsironis 	 */
9908e0c9dacSMikulas Patocka 	spin_lock_irq(&pool->lock);
9914ae280b4SNikos Tsironis 	bio_list_add(&pool->deferred_flush_completions, bio);
9928e0c9dacSMikulas Patocka 	spin_unlock_irq(&pool->lock);
9934ae280b4SNikos Tsironis }
9944ae280b4SNikos Tsironis 
process_prepared_mapping(struct dm_thin_new_mapping * m)995a24c2569SMike Snitzer static void process_prepared_mapping(struct dm_thin_new_mapping *m)
996991d9fa0SJoe Thornber {
997991d9fa0SJoe Thornber 	struct thin_c *tc = m->tc;
9986beca5ebSJoe Thornber 	struct pool *pool = tc->pool;
9998b908f8eSMike Snitzer 	struct bio *bio = m->bio;
1000991d9fa0SJoe Thornber 	int r;
1001991d9fa0SJoe Thornber 
10024e4cbee9SChristoph Hellwig 	if (m->status) {
10036beca5ebSJoe Thornber 		cell_error(pool, m->cell);
1004905386f8SJoe Thornber 		goto out;
1005991d9fa0SJoe Thornber 	}
1006991d9fa0SJoe Thornber 
1007991d9fa0SJoe Thornber 	/*
1008991d9fa0SJoe Thornber 	 * Commit the prepared block into the mapping btree.
1009991d9fa0SJoe Thornber 	 * Any I/O for this block arriving after this point will get
1010991d9fa0SJoe Thornber 	 * remapped to it directly.
1011991d9fa0SJoe Thornber 	 */
101234fbcf62SJoe Thornber 	r = dm_thin_insert_block(tc->td, m->virt_begin, m->data_block);
1013991d9fa0SJoe Thornber 	if (r) {
1014b5330655SJoe Thornber 		metadata_operation_failed(pool, "dm_thin_insert_block", r);
10156beca5ebSJoe Thornber 		cell_error(pool, m->cell);
1016905386f8SJoe Thornber 		goto out;
1017991d9fa0SJoe Thornber 	}
1018991d9fa0SJoe Thornber 
1019991d9fa0SJoe Thornber 	/*
1020991d9fa0SJoe Thornber 	 * Release any bios held while the block was being provisioned.
1021991d9fa0SJoe Thornber 	 * If we are processing a write bio that completely covers the block,
1022991d9fa0SJoe Thornber 	 * we already processed it so can ignore it now when processing
1023991d9fa0SJoe Thornber 	 * the bios in the cell.
1024991d9fa0SJoe Thornber 	 */
1025991d9fa0SJoe Thornber 	if (bio) {
10262d759a46SJoe Thornber 		inc_remap_and_issue_cell(tc, m->cell, m->data_block);
10274ae280b4SNikos Tsironis 		complete_overwrite_bio(tc, bio);
10282d759a46SJoe Thornber 	} else {
10292d759a46SJoe Thornber 		inc_all_io_entry(tc->pool, m->cell->holder);
10302d759a46SJoe Thornber 		remap_and_issue(tc, m->cell->holder, m->data_block);
10312d759a46SJoe Thornber 		inc_remap_and_issue_cell(tc, m->cell, m->data_block);
10322d759a46SJoe Thornber 	}
1033991d9fa0SJoe Thornber 
1034905386f8SJoe Thornber out:
1035991d9fa0SJoe Thornber 	list_del(&m->list);
10366f1c819cSKent Overstreet 	mempool_free(m, &pool->mapping_pool);
1037991d9fa0SJoe Thornber }
1038991d9fa0SJoe Thornber 
103934fbcf62SJoe Thornber /*----------------------------------------------------------------*/
104034fbcf62SJoe Thornber 
free_discard_mapping(struct dm_thin_new_mapping * m)104134fbcf62SJoe Thornber static void free_discard_mapping(struct dm_thin_new_mapping *m)
104234fbcf62SJoe Thornber {
104334fbcf62SJoe Thornber 	struct thin_c *tc = m->tc;
10440ef0b471SHeinz Mauelshagen 
104534fbcf62SJoe Thornber 	if (m->cell)
104634fbcf62SJoe Thornber 		cell_defer_no_holder(tc, m->cell);
10476f1c819cSKent Overstreet 	mempool_free(m, &tc->pool->mapping_pool);
104834fbcf62SJoe Thornber }
104934fbcf62SJoe Thornber 
process_prepared_discard_fail(struct dm_thin_new_mapping * m)1050e49e5829SJoe Thornber static void process_prepared_discard_fail(struct dm_thin_new_mapping *m)
1051104655fdSJoe Thornber {
1052e49e5829SJoe Thornber 	bio_io_error(m->bio);
105334fbcf62SJoe Thornber 	free_discard_mapping(m);
1054e49e5829SJoe Thornber }
1055104655fdSJoe Thornber 
process_prepared_discard_success(struct dm_thin_new_mapping * m)105634fbcf62SJoe Thornber static void process_prepared_discard_success(struct dm_thin_new_mapping *m)
1057e49e5829SJoe Thornber {
10584246a0b6SChristoph Hellwig 	bio_endio(m->bio);
105934fbcf62SJoe Thornber 	free_discard_mapping(m);
1060104655fdSJoe Thornber }
1061104655fdSJoe Thornber 
process_prepared_discard_no_passdown(struct dm_thin_new_mapping * m)106234fbcf62SJoe Thornber static void process_prepared_discard_no_passdown(struct dm_thin_new_mapping *m)
1063e49e5829SJoe Thornber {
1064e49e5829SJoe Thornber 	int r;
1065e49e5829SJoe Thornber 	struct thin_c *tc = m->tc;
1066e49e5829SJoe Thornber 
106734fbcf62SJoe Thornber 	r = dm_thin_remove_range(tc->td, m->cell->key.block_begin, m->cell->key.block_end);
106834fbcf62SJoe Thornber 	if (r) {
106934fbcf62SJoe Thornber 		metadata_operation_failed(tc->pool, "dm_thin_remove_range", r);
107034fbcf62SJoe Thornber 		bio_io_error(m->bio);
107134fbcf62SJoe Thornber 	} else
10724246a0b6SChristoph Hellwig 		bio_endio(m->bio);
1073e49e5829SJoe Thornber 
107434fbcf62SJoe Thornber 	cell_defer_no_holder(tc, m->cell);
10756f1c819cSKent Overstreet 	mempool_free(m, &tc->pool->mapping_pool);
107634fbcf62SJoe Thornber }
107734fbcf62SJoe Thornber 
1078202bae52SJoe Thornber /*----------------------------------------------------------------*/
1079202bae52SJoe Thornber 
passdown_double_checking_shared_status(struct dm_thin_new_mapping * m,struct bio * discard_parent)10802a0fbffbSJoe Thornber static void passdown_double_checking_shared_status(struct dm_thin_new_mapping *m,
10812a0fbffbSJoe Thornber 						   struct bio *discard_parent)
108234fbcf62SJoe Thornber {
108334fbcf62SJoe Thornber 	/*
108434fbcf62SJoe Thornber 	 * We've already unmapped this range of blocks, but before we
108534fbcf62SJoe Thornber 	 * passdown we have to check that these blocks are now unused.
108634fbcf62SJoe Thornber 	 */
1087202bae52SJoe Thornber 	int r = 0;
1088d445bd9cSJoe Thornber 	bool shared = true;
108934fbcf62SJoe Thornber 	struct thin_c *tc = m->tc;
109034fbcf62SJoe Thornber 	struct pool *pool = tc->pool;
109134fbcf62SJoe Thornber 	dm_block_t b = m->data_block, e, end = m->data_block + m->virt_end - m->virt_begin;
1092202bae52SJoe Thornber 	struct discard_op op;
109334fbcf62SJoe Thornber 
10942a0fbffbSJoe Thornber 	begin_discard(&op, tc, discard_parent);
109534fbcf62SJoe Thornber 	while (b != end) {
109634fbcf62SJoe Thornber 		/* find start of unmapped run */
109734fbcf62SJoe Thornber 		for (; b < end; b++) {
1098d445bd9cSJoe Thornber 			r = dm_pool_block_is_shared(pool->pmd, b, &shared);
109934fbcf62SJoe Thornber 			if (r)
1100202bae52SJoe Thornber 				goto out;
110134fbcf62SJoe Thornber 
1102d445bd9cSJoe Thornber 			if (!shared)
110334fbcf62SJoe Thornber 				break;
110434fbcf62SJoe Thornber 		}
110534fbcf62SJoe Thornber 
110634fbcf62SJoe Thornber 		if (b == end)
110734fbcf62SJoe Thornber 			break;
110834fbcf62SJoe Thornber 
110934fbcf62SJoe Thornber 		/* find end of run */
111034fbcf62SJoe Thornber 		for (e = b + 1; e != end; e++) {
1111d445bd9cSJoe Thornber 			r = dm_pool_block_is_shared(pool->pmd, e, &shared);
111234fbcf62SJoe Thornber 			if (r)
1113202bae52SJoe Thornber 				goto out;
111434fbcf62SJoe Thornber 
1115d445bd9cSJoe Thornber 			if (shared)
111634fbcf62SJoe Thornber 				break;
111734fbcf62SJoe Thornber 		}
111834fbcf62SJoe Thornber 
1119202bae52SJoe Thornber 		r = issue_discard(&op, b, e);
112034fbcf62SJoe Thornber 		if (r)
1121202bae52SJoe Thornber 			goto out;
112234fbcf62SJoe Thornber 
112334fbcf62SJoe Thornber 		b = e;
112434fbcf62SJoe Thornber 	}
1125202bae52SJoe Thornber out:
1126202bae52SJoe Thornber 	end_discard(&op, r);
112734fbcf62SJoe Thornber }
112834fbcf62SJoe Thornber 
queue_passdown_pt2(struct dm_thin_new_mapping * m)11292a0fbffbSJoe Thornber static void queue_passdown_pt2(struct dm_thin_new_mapping *m)
11302a0fbffbSJoe Thornber {
11312a0fbffbSJoe Thornber 	unsigned long flags;
11322a0fbffbSJoe Thornber 	struct pool *pool = m->tc->pool;
11332a0fbffbSJoe Thornber 
11342a0fbffbSJoe Thornber 	spin_lock_irqsave(&pool->lock, flags);
11352a0fbffbSJoe Thornber 	list_add_tail(&m->list, &pool->prepared_discards_pt2);
11362a0fbffbSJoe Thornber 	spin_unlock_irqrestore(&pool->lock, flags);
11372a0fbffbSJoe Thornber 	wake_worker(pool);
11382a0fbffbSJoe Thornber }
11392a0fbffbSJoe Thornber 
passdown_endio(struct bio * bio)11402a0fbffbSJoe Thornber static void passdown_endio(struct bio *bio)
11412a0fbffbSJoe Thornber {
11422a0fbffbSJoe Thornber 	/*
11432a0fbffbSJoe Thornber 	 * It doesn't matter if the passdown discard failed, we still want
11442a0fbffbSJoe Thornber 	 * to unmap (we ignore err).
11452a0fbffbSJoe Thornber 	 */
11462a0fbffbSJoe Thornber 	queue_passdown_pt2(bio->bi_private);
1147948f581aSDennis Yang 	bio_put(bio);
11482a0fbffbSJoe Thornber }
11492a0fbffbSJoe Thornber 
process_prepared_discard_passdown_pt1(struct dm_thin_new_mapping * m)11502a0fbffbSJoe Thornber static void process_prepared_discard_passdown_pt1(struct dm_thin_new_mapping *m)
11512a0fbffbSJoe Thornber {
11522a0fbffbSJoe Thornber 	int r;
11532a0fbffbSJoe Thornber 	struct thin_c *tc = m->tc;
11542a0fbffbSJoe Thornber 	struct pool *pool = tc->pool;
11552a0fbffbSJoe Thornber 	struct bio *discard_parent;
11562a0fbffbSJoe Thornber 	dm_block_t data_end = m->data_block + (m->virt_end - m->virt_begin);
11572a0fbffbSJoe Thornber 
11582a0fbffbSJoe Thornber 	/*
11592a0fbffbSJoe Thornber 	 * Only this thread allocates blocks, so we can be sure that the
11602a0fbffbSJoe Thornber 	 * newly unmapped blocks will not be allocated before the end of
11612a0fbffbSJoe Thornber 	 * the function.
11622a0fbffbSJoe Thornber 	 */
11632a0fbffbSJoe Thornber 	r = dm_thin_remove_range(tc->td, m->virt_begin, m->virt_end);
11642a0fbffbSJoe Thornber 	if (r) {
11652a0fbffbSJoe Thornber 		metadata_operation_failed(pool, "dm_thin_remove_range", r);
11662a0fbffbSJoe Thornber 		bio_io_error(m->bio);
11672a0fbffbSJoe Thornber 		cell_defer_no_holder(tc, m->cell);
11686f1c819cSKent Overstreet 		mempool_free(m, &pool->mapping_pool);
11692a0fbffbSJoe Thornber 		return;
11702a0fbffbSJoe Thornber 	}
11712a0fbffbSJoe Thornber 
117200a0ea33SVallish Vaidyeshwara 	/*
117300a0ea33SVallish Vaidyeshwara 	 * Increment the unmapped blocks.  This prevents a race between the
117400a0ea33SVallish Vaidyeshwara 	 * passdown io and reallocation of freed blocks.
117500a0ea33SVallish Vaidyeshwara 	 */
117600a0ea33SVallish Vaidyeshwara 	r = dm_pool_inc_data_range(pool->pmd, m->data_block, data_end);
117700a0ea33SVallish Vaidyeshwara 	if (r) {
117800a0ea33SVallish Vaidyeshwara 		metadata_operation_failed(pool, "dm_pool_inc_data_range", r);
117900a0ea33SVallish Vaidyeshwara 		bio_io_error(m->bio);
118000a0ea33SVallish Vaidyeshwara 		cell_defer_no_holder(tc, m->cell);
11816f1c819cSKent Overstreet 		mempool_free(m, &pool->mapping_pool);
118200a0ea33SVallish Vaidyeshwara 		return;
118300a0ea33SVallish Vaidyeshwara 	}
118400a0ea33SVallish Vaidyeshwara 
118507888c66SChristoph Hellwig 	discard_parent = bio_alloc(NULL, 1, 0, GFP_NOIO);
11862a0fbffbSJoe Thornber 	discard_parent->bi_end_io = passdown_endio;
11872a0fbffbSJoe Thornber 	discard_parent->bi_private = m;
11882a0fbffbSJoe Thornber 	if (m->maybe_shared)
11892a0fbffbSJoe Thornber 		passdown_double_checking_shared_status(m, discard_parent);
11902a0fbffbSJoe Thornber 	else {
11912a0fbffbSJoe Thornber 		struct discard_op op;
11922a0fbffbSJoe Thornber 
11932a0fbffbSJoe Thornber 		begin_discard(&op, tc, discard_parent);
11942a0fbffbSJoe Thornber 		r = issue_discard(&op, m->data_block, data_end);
11952a0fbffbSJoe Thornber 		end_discard(&op, r);
11962a0fbffbSJoe Thornber 	}
11972a0fbffbSJoe Thornber }
11982a0fbffbSJoe Thornber 
process_prepared_discard_passdown_pt2(struct dm_thin_new_mapping * m)11992a0fbffbSJoe Thornber static void process_prepared_discard_passdown_pt2(struct dm_thin_new_mapping *m)
120034fbcf62SJoe Thornber {
120134fbcf62SJoe Thornber 	int r;
120234fbcf62SJoe Thornber 	struct thin_c *tc = m->tc;
120334fbcf62SJoe Thornber 	struct pool *pool = tc->pool;
120434fbcf62SJoe Thornber 
12052a0fbffbSJoe Thornber 	/*
12062a0fbffbSJoe Thornber 	 * The passdown has completed, so now we can decrement all those
12072a0fbffbSJoe Thornber 	 * unmapped blocks.
12082a0fbffbSJoe Thornber 	 */
12092a0fbffbSJoe Thornber 	r = dm_pool_dec_data_range(pool->pmd, m->data_block,
1210202bae52SJoe Thornber 				   m->data_block + (m->virt_end - m->virt_begin));
12112a0fbffbSJoe Thornber 	if (r) {
12122a0fbffbSJoe Thornber 		metadata_operation_failed(pool, "dm_pool_dec_data_range", r);
12132a0fbffbSJoe Thornber 		bio_io_error(m->bio);
12142a0fbffbSJoe Thornber 	} else
12152a0fbffbSJoe Thornber 		bio_endio(m->bio);
1216202bae52SJoe Thornber 
121734fbcf62SJoe Thornber 	cell_defer_no_holder(tc, m->cell);
12186f1c819cSKent Overstreet 	mempool_free(m, &pool->mapping_pool);
1219e49e5829SJoe Thornber }
1220e49e5829SJoe Thornber 
process_prepared(struct pool * pool,struct list_head * head,process_mapping_fn * fn)1221104655fdSJoe Thornber static void process_prepared(struct pool *pool, struct list_head *head,
1222e49e5829SJoe Thornber 			     process_mapping_fn *fn)
1223991d9fa0SJoe Thornber {
1224991d9fa0SJoe Thornber 	struct list_head maps;
1225a24c2569SMike Snitzer 	struct dm_thin_new_mapping *m, *tmp;
1226991d9fa0SJoe Thornber 
1227991d9fa0SJoe Thornber 	INIT_LIST_HEAD(&maps);
12288e0c9dacSMikulas Patocka 	spin_lock_irq(&pool->lock);
1229104655fdSJoe Thornber 	list_splice_init(head, &maps);
12308e0c9dacSMikulas Patocka 	spin_unlock_irq(&pool->lock);
1231991d9fa0SJoe Thornber 
1232991d9fa0SJoe Thornber 	list_for_each_entry_safe(m, tmp, &maps, list)
1233e49e5829SJoe Thornber 		(*fn)(m);
1234991d9fa0SJoe Thornber }
1235991d9fa0SJoe Thornber 
1236991d9fa0SJoe Thornber /*
1237991d9fa0SJoe Thornber  * Deferred bio jobs.
1238991d9fa0SJoe Thornber  */
io_overlaps_block(struct pool * pool,struct bio * bio)1239104655fdSJoe Thornber static int io_overlaps_block(struct pool *pool, struct bio *bio)
1240104655fdSJoe Thornber {
12414f024f37SKent Overstreet 	return bio->bi_iter.bi_size ==
12424f024f37SKent Overstreet 		(pool->sectors_per_block << SECTOR_SHIFT);
1243104655fdSJoe Thornber }
1244104655fdSJoe Thornber 
io_overwrites_block(struct pool * pool,struct bio * bio)1245991d9fa0SJoe Thornber static int io_overwrites_block(struct pool *pool, struct bio *bio)
1246991d9fa0SJoe Thornber {
1247104655fdSJoe Thornber 	return (bio_data_dir(bio) == WRITE) &&
1248104655fdSJoe Thornber 		io_overlaps_block(pool, bio);
1249991d9fa0SJoe Thornber }
1250991d9fa0SJoe Thornber 
save_and_set_endio(struct bio * bio,bio_end_io_t ** save,bio_end_io_t * fn)1251991d9fa0SJoe Thornber static void save_and_set_endio(struct bio *bio, bio_end_io_t **save,
1252991d9fa0SJoe Thornber 			       bio_end_io_t *fn)
1253991d9fa0SJoe Thornber {
1254991d9fa0SJoe Thornber 	*save = bio->bi_end_io;
1255991d9fa0SJoe Thornber 	bio->bi_end_io = fn;
1256991d9fa0SJoe Thornber }
1257991d9fa0SJoe Thornber 
ensure_next_mapping(struct pool * pool)1258991d9fa0SJoe Thornber static int ensure_next_mapping(struct pool *pool)
1259991d9fa0SJoe Thornber {
1260991d9fa0SJoe Thornber 	if (pool->next_mapping)
1261991d9fa0SJoe Thornber 		return 0;
1262991d9fa0SJoe Thornber 
12636f1c819cSKent Overstreet 	pool->next_mapping = mempool_alloc(&pool->mapping_pool, GFP_ATOMIC);
1264991d9fa0SJoe Thornber 
1265991d9fa0SJoe Thornber 	return pool->next_mapping ? 0 : -ENOMEM;
1266991d9fa0SJoe Thornber }
1267991d9fa0SJoe Thornber 
get_next_mapping(struct pool * pool)1268a24c2569SMike Snitzer static struct dm_thin_new_mapping *get_next_mapping(struct pool *pool)
1269991d9fa0SJoe Thornber {
127016961b04SMike Snitzer 	struct dm_thin_new_mapping *m = pool->next_mapping;
1271991d9fa0SJoe Thornber 
1272991d9fa0SJoe Thornber 	BUG_ON(!pool->next_mapping);
1273991d9fa0SJoe Thornber 
127416961b04SMike Snitzer 	memset(m, 0, sizeof(struct dm_thin_new_mapping));
127516961b04SMike Snitzer 	INIT_LIST_HEAD(&m->list);
127616961b04SMike Snitzer 	m->bio = NULL;
127716961b04SMike Snitzer 
1278991d9fa0SJoe Thornber 	pool->next_mapping = NULL;
1279991d9fa0SJoe Thornber 
128016961b04SMike Snitzer 	return m;
1281991d9fa0SJoe Thornber }
1282991d9fa0SJoe Thornber 
ll_zero(struct thin_c * tc,struct dm_thin_new_mapping * m,sector_t begin,sector_t end)1283e5aea7b4SJoe Thornber static void ll_zero(struct thin_c *tc, struct dm_thin_new_mapping *m,
1284e5aea7b4SJoe Thornber 		    sector_t begin, sector_t end)
1285e5aea7b4SJoe Thornber {
1286e5aea7b4SJoe Thornber 	struct dm_io_region to;
1287e5aea7b4SJoe Thornber 
1288e5aea7b4SJoe Thornber 	to.bdev = tc->pool_dev->bdev;
1289e5aea7b4SJoe Thornber 	to.sector = begin;
1290e5aea7b4SJoe Thornber 	to.count = end - begin;
1291e5aea7b4SJoe Thornber 
12927209049dSMike Snitzer 	dm_kcopyd_zero(tc->pool->copier, 1, &to, 0, copy_complete, m);
1293e5aea7b4SJoe Thornber }
1294e5aea7b4SJoe Thornber 
remap_and_issue_overwrite(struct thin_c * tc,struct bio * bio,dm_block_t data_begin,struct dm_thin_new_mapping * m)1295452d7a62SMike Snitzer static void remap_and_issue_overwrite(struct thin_c *tc, struct bio *bio,
129634fbcf62SJoe Thornber 				      dm_block_t data_begin,
1297452d7a62SMike Snitzer 				      struct dm_thin_new_mapping *m)
1298452d7a62SMike Snitzer {
1299452d7a62SMike Snitzer 	struct pool *pool = tc->pool;
1300452d7a62SMike Snitzer 	struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
1301452d7a62SMike Snitzer 
1302452d7a62SMike Snitzer 	h->overwrite_mapping = m;
1303452d7a62SMike Snitzer 	m->bio = bio;
1304452d7a62SMike Snitzer 	save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio);
1305452d7a62SMike Snitzer 	inc_all_io_entry(pool, bio);
130634fbcf62SJoe Thornber 	remap_and_issue(tc, bio, data_begin);
1307452d7a62SMike Snitzer }
1308452d7a62SMike Snitzer 
1309e5aea7b4SJoe Thornber /*
1310e5aea7b4SJoe Thornber  * A partial copy also needs to zero the uncopied region.
1311e5aea7b4SJoe Thornber  */
schedule_copy(struct thin_c * tc,dm_block_t virt_block,struct dm_dev * origin,dm_block_t data_origin,dm_block_t data_dest,struct dm_bio_prison_cell * cell,struct bio * bio,sector_t len)1312991d9fa0SJoe Thornber static void schedule_copy(struct thin_c *tc, dm_block_t virt_block,
13132dd9c257SJoe Thornber 			  struct dm_dev *origin, dm_block_t data_origin,
13142dd9c257SJoe Thornber 			  dm_block_t data_dest,
1315e5aea7b4SJoe Thornber 			  struct dm_bio_prison_cell *cell, struct bio *bio,
1316e5aea7b4SJoe Thornber 			  sector_t len)
1317991d9fa0SJoe Thornber {
1318991d9fa0SJoe Thornber 	struct pool *pool = tc->pool;
1319a24c2569SMike Snitzer 	struct dm_thin_new_mapping *m = get_next_mapping(pool);
1320991d9fa0SJoe Thornber 
1321991d9fa0SJoe Thornber 	m->tc = tc;
132234fbcf62SJoe Thornber 	m->virt_begin = virt_block;
132334fbcf62SJoe Thornber 	m->virt_end = virt_block + 1u;
1324991d9fa0SJoe Thornber 	m->data_block = data_dest;
1325991d9fa0SJoe Thornber 	m->cell = cell;
1326991d9fa0SJoe Thornber 
1327e5aea7b4SJoe Thornber 	/*
1328e5aea7b4SJoe Thornber 	 * quiesce action + copy action + an extra reference held for the
1329e5aea7b4SJoe Thornber 	 * duration of this function (we may need to inc later for a
1330e5aea7b4SJoe Thornber 	 * partial zero).
1331e5aea7b4SJoe Thornber 	 */
1332e5aea7b4SJoe Thornber 	atomic_set(&m->prepare_actions, 3);
1333e5aea7b4SJoe Thornber 
133444feb387SMike Snitzer 	if (!dm_deferred_set_add_work(pool->shared_read_ds, &m->list))
1335e5aea7b4SJoe Thornber 		complete_mapping_preparation(m); /* already quiesced */
1336991d9fa0SJoe Thornber 
1337991d9fa0SJoe Thornber 	/*
1338991d9fa0SJoe Thornber 	 * IO to pool_dev remaps to the pool target's data_dev.
1339991d9fa0SJoe Thornber 	 *
1340991d9fa0SJoe Thornber 	 * If the whole block of data is being overwritten, we can issue the
1341991d9fa0SJoe Thornber 	 * bio immediately. Otherwise we use kcopyd to clone the data first.
1342991d9fa0SJoe Thornber 	 */
1343452d7a62SMike Snitzer 	if (io_overwrites_block(pool, bio))
1344452d7a62SMike Snitzer 		remap_and_issue_overwrite(tc, bio, data_dest, m);
1345452d7a62SMike Snitzer 	else {
1346991d9fa0SJoe Thornber 		struct dm_io_region from, to;
1347991d9fa0SJoe Thornber 
13482dd9c257SJoe Thornber 		from.bdev = origin->bdev;
1349991d9fa0SJoe Thornber 		from.sector = data_origin * pool->sectors_per_block;
1350e5aea7b4SJoe Thornber 		from.count = len;
1351991d9fa0SJoe Thornber 
1352991d9fa0SJoe Thornber 		to.bdev = tc->pool_dev->bdev;
1353991d9fa0SJoe Thornber 		to.sector = data_dest * pool->sectors_per_block;
1354e5aea7b4SJoe Thornber 		to.count = len;
1355991d9fa0SJoe Thornber 
13567209049dSMike Snitzer 		dm_kcopyd_copy(pool->copier, &from, 1, &to,
1357991d9fa0SJoe Thornber 			       0, copy_complete, m);
1358e5aea7b4SJoe Thornber 
1359e5aea7b4SJoe Thornber 		/*
1360e5aea7b4SJoe Thornber 		 * Do we need to zero a tail region?
1361e5aea7b4SJoe Thornber 		 */
1362e5aea7b4SJoe Thornber 		if (len < pool->sectors_per_block && pool->pf.zero_new_blocks) {
1363e5aea7b4SJoe Thornber 			atomic_inc(&m->prepare_actions);
1364e5aea7b4SJoe Thornber 			ll_zero(tc, m,
1365e5aea7b4SJoe Thornber 				data_dest * pool->sectors_per_block + len,
1366e5aea7b4SJoe Thornber 				(data_dest + 1) * pool->sectors_per_block);
1367991d9fa0SJoe Thornber 		}
1368991d9fa0SJoe Thornber 	}
1369e5aea7b4SJoe Thornber 
1370e5aea7b4SJoe Thornber 	complete_mapping_preparation(m); /* drop our ref */
1371991d9fa0SJoe Thornber }
1372991d9fa0SJoe Thornber 
schedule_internal_copy(struct thin_c * tc,dm_block_t virt_block,dm_block_t data_origin,dm_block_t data_dest,struct dm_bio_prison_cell * cell,struct bio * bio)13732dd9c257SJoe Thornber static void schedule_internal_copy(struct thin_c *tc, dm_block_t virt_block,
13742dd9c257SJoe Thornber 				   dm_block_t data_origin, dm_block_t data_dest,
1375a24c2569SMike Snitzer 				   struct dm_bio_prison_cell *cell, struct bio *bio)
13762dd9c257SJoe Thornber {
13772dd9c257SJoe Thornber 	schedule_copy(tc, virt_block, tc->pool_dev,
1378e5aea7b4SJoe Thornber 		      data_origin, data_dest, cell, bio,
1379e5aea7b4SJoe Thornber 		      tc->pool->sectors_per_block);
13802dd9c257SJoe Thornber }
13812dd9c257SJoe Thornber 
schedule_zero(struct thin_c * tc,dm_block_t virt_block,dm_block_t data_block,struct dm_bio_prison_cell * cell,struct bio * bio)1382991d9fa0SJoe Thornber static void schedule_zero(struct thin_c *tc, dm_block_t virt_block,
1383a24c2569SMike Snitzer 			  dm_block_t data_block, struct dm_bio_prison_cell *cell,
1384991d9fa0SJoe Thornber 			  struct bio *bio)
1385991d9fa0SJoe Thornber {
1386991d9fa0SJoe Thornber 	struct pool *pool = tc->pool;
1387a24c2569SMike Snitzer 	struct dm_thin_new_mapping *m = get_next_mapping(pool);
1388991d9fa0SJoe Thornber 
138950f3c3efSJoe Thornber 	atomic_set(&m->prepare_actions, 1); /* no need to quiesce */
1390991d9fa0SJoe Thornber 	m->tc = tc;
139134fbcf62SJoe Thornber 	m->virt_begin = virt_block;
139234fbcf62SJoe Thornber 	m->virt_end = virt_block + 1u;
1393991d9fa0SJoe Thornber 	m->data_block = data_block;
1394991d9fa0SJoe Thornber 	m->cell = cell;
1395991d9fa0SJoe Thornber 
1396991d9fa0SJoe Thornber 	/*
1397991d9fa0SJoe Thornber 	 * If the whole block of data is being overwritten or we are not
1398991d9fa0SJoe Thornber 	 * zeroing pre-existing data, we can issue the bio immediately.
1399991d9fa0SJoe Thornber 	 * Otherwise we use kcopyd to zero the data first.
1400991d9fa0SJoe Thornber 	 */
1401f8ae7525SMike Snitzer 	if (pool->pf.zero_new_blocks) {
1402f8ae7525SMike Snitzer 		if (io_overwrites_block(pool, bio))
1403452d7a62SMike Snitzer 			remap_and_issue_overwrite(tc, bio, data_block, m);
1404452d7a62SMike Snitzer 		else
1405f8ae7525SMike Snitzer 			ll_zero(tc, m, data_block * pool->sectors_per_block,
1406e5aea7b4SJoe Thornber 				(data_block + 1) * pool->sectors_per_block);
1407f8ae7525SMike Snitzer 	} else
1408f8ae7525SMike Snitzer 		process_prepared_mapping(m);
1409e5aea7b4SJoe Thornber }
1410991d9fa0SJoe Thornber 
schedule_external_copy(struct thin_c * tc,dm_block_t virt_block,dm_block_t data_dest,struct dm_bio_prison_cell * cell,struct bio * bio)1411e5aea7b4SJoe Thornber static void schedule_external_copy(struct thin_c *tc, dm_block_t virt_block,
1412e5aea7b4SJoe Thornber 				   dm_block_t data_dest,
1413e5aea7b4SJoe Thornber 				   struct dm_bio_prison_cell *cell, struct bio *bio)
1414e5aea7b4SJoe Thornber {
1415e5aea7b4SJoe Thornber 	struct pool *pool = tc->pool;
1416e5aea7b4SJoe Thornber 	sector_t virt_block_begin = virt_block * pool->sectors_per_block;
1417e5aea7b4SJoe Thornber 	sector_t virt_block_end = (virt_block + 1) * pool->sectors_per_block;
1418e5aea7b4SJoe Thornber 
1419e5aea7b4SJoe Thornber 	if (virt_block_end <= tc->origin_size)
1420e5aea7b4SJoe Thornber 		schedule_copy(tc, virt_block, tc->origin_dev,
1421e5aea7b4SJoe Thornber 			      virt_block, data_dest, cell, bio,
1422e5aea7b4SJoe Thornber 			      pool->sectors_per_block);
1423e5aea7b4SJoe Thornber 
1424e5aea7b4SJoe Thornber 	else if (virt_block_begin < tc->origin_size)
1425e5aea7b4SJoe Thornber 		schedule_copy(tc, virt_block, tc->origin_dev,
1426e5aea7b4SJoe Thornber 			      virt_block, data_dest, cell, bio,
1427e5aea7b4SJoe Thornber 			      tc->origin_size - virt_block_begin);
1428e5aea7b4SJoe Thornber 
1429e5aea7b4SJoe Thornber 	else
1430e5aea7b4SJoe Thornber 		schedule_zero(tc, virt_block, data_dest, cell, bio);
1431991d9fa0SJoe Thornber }
1432991d9fa0SJoe Thornber 
14332c43fd26SJoe Thornber static void set_pool_mode(struct pool *pool, enum pool_mode new_mode);
14342c43fd26SJoe Thornber 
1435a685557fSMike Snitzer static void requeue_bios(struct pool *pool);
1436a685557fSMike Snitzer 
is_read_only_pool_mode(enum pool_mode mode)14373ab91828SJoe Thornber static bool is_read_only_pool_mode(enum pool_mode mode)
14383ab91828SJoe Thornber {
14393ab91828SJoe Thornber 	return (mode == PM_OUT_OF_METADATA_SPACE || mode == PM_READ_ONLY);
14403ab91828SJoe Thornber }
14413ab91828SJoe Thornber 
is_read_only(struct pool * pool)14423ab91828SJoe Thornber static bool is_read_only(struct pool *pool)
14433ab91828SJoe Thornber {
14443ab91828SJoe Thornber 	return is_read_only_pool_mode(get_pool_mode(pool));
14453ab91828SJoe Thornber }
14463ab91828SJoe Thornber 
check_for_metadata_space(struct pool * pool)14473ab91828SJoe Thornber static void check_for_metadata_space(struct pool *pool)
14483ab91828SJoe Thornber {
14493ab91828SJoe Thornber 	int r;
14503ab91828SJoe Thornber 	const char *ooms_reason = NULL;
14513ab91828SJoe Thornber 	dm_block_t nr_free;
14523ab91828SJoe Thornber 
14533ab91828SJoe Thornber 	r = dm_pool_get_free_metadata_block_count(pool->pmd, &nr_free);
14543ab91828SJoe Thornber 	if (r)
14553ab91828SJoe Thornber 		ooms_reason = "Could not get free metadata blocks";
14563ab91828SJoe Thornber 	else if (!nr_free)
14573ab91828SJoe Thornber 		ooms_reason = "No free metadata blocks";
14583ab91828SJoe Thornber 
14593ab91828SJoe Thornber 	if (ooms_reason && !is_read_only(pool)) {
14603ab91828SJoe Thornber 		DMERR("%s", ooms_reason);
14613ab91828SJoe Thornber 		set_pool_mode(pool, PM_OUT_OF_METADATA_SPACE);
14623ab91828SJoe Thornber 	}
14633ab91828SJoe Thornber }
14643ab91828SJoe Thornber 
check_for_data_space(struct pool * pool)14653ab91828SJoe Thornber static void check_for_data_space(struct pool *pool)
14662c43fd26SJoe Thornber {
14672c43fd26SJoe Thornber 	int r;
14682c43fd26SJoe Thornber 	dm_block_t nr_free;
14692c43fd26SJoe Thornber 
14702c43fd26SJoe Thornber 	if (get_pool_mode(pool) != PM_OUT_OF_DATA_SPACE)
14712c43fd26SJoe Thornber 		return;
14722c43fd26SJoe Thornber 
14732c43fd26SJoe Thornber 	r = dm_pool_get_free_block_count(pool->pmd, &nr_free);
14742c43fd26SJoe Thornber 	if (r)
14752c43fd26SJoe Thornber 		return;
14762c43fd26SJoe Thornber 
1477a685557fSMike Snitzer 	if (nr_free) {
14782c43fd26SJoe Thornber 		set_pool_mode(pool, PM_WRITE);
1479a685557fSMike Snitzer 		requeue_bios(pool);
1480a685557fSMike Snitzer 	}
14812c43fd26SJoe Thornber }
14822c43fd26SJoe Thornber 
1483e49e5829SJoe Thornber /*
1484e49e5829SJoe Thornber  * A non-zero return indicates read_only or fail_io mode.
1485e49e5829SJoe Thornber  * Many callers don't care about the return value.
1486e49e5829SJoe Thornber  */
commit(struct pool * pool)1487020cc3b5SJoe Thornber static int commit(struct pool *pool)
1488e49e5829SJoe Thornber {
1489e49e5829SJoe Thornber 	int r;
1490e49e5829SJoe Thornber 
14913ab91828SJoe Thornber 	if (get_pool_mode(pool) >= PM_OUT_OF_METADATA_SPACE)
1492e49e5829SJoe Thornber 		return -EINVAL;
1493e49e5829SJoe Thornber 
1494020cc3b5SJoe Thornber 	r = dm_pool_commit_metadata(pool->pmd);
1495b5330655SJoe Thornber 	if (r)
1496b5330655SJoe Thornber 		metadata_operation_failed(pool, "dm_pool_commit_metadata", r);
14973ab91828SJoe Thornber 	else {
14983ab91828SJoe Thornber 		check_for_metadata_space(pool);
14993ab91828SJoe Thornber 		check_for_data_space(pool);
15003ab91828SJoe Thornber 	}
1501e49e5829SJoe Thornber 
1502e49e5829SJoe Thornber 	return r;
1503e49e5829SJoe Thornber }
1504e49e5829SJoe Thornber 
check_low_water_mark(struct pool * pool,dm_block_t free_blocks)150588a6621bSJoe Thornber static void check_low_water_mark(struct pool *pool, dm_block_t free_blocks)
150688a6621bSJoe Thornber {
150788a6621bSJoe Thornber 	if (free_blocks <= pool->low_water_blocks && !pool->low_water_triggered) {
150888a6621bSJoe Thornber 		DMWARN("%s: reached low water mark for data device: sending event.",
150988a6621bSJoe Thornber 		       dm_device_name(pool->pool_md));
15108e0c9dacSMikulas Patocka 		spin_lock_irq(&pool->lock);
151188a6621bSJoe Thornber 		pool->low_water_triggered = true;
15128e0c9dacSMikulas Patocka 		spin_unlock_irq(&pool->lock);
151388a6621bSJoe Thornber 		dm_table_event(pool->ti->table);
151488a6621bSJoe Thornber 	}
151588a6621bSJoe Thornber }
151688a6621bSJoe Thornber 
alloc_data_block(struct thin_c * tc,dm_block_t * result)1517991d9fa0SJoe Thornber static int alloc_data_block(struct thin_c *tc, dm_block_t *result)
1518991d9fa0SJoe Thornber {
1519991d9fa0SJoe Thornber 	int r;
1520991d9fa0SJoe Thornber 	dm_block_t free_blocks;
1521991d9fa0SJoe Thornber 	struct pool *pool = tc->pool;
1522991d9fa0SJoe Thornber 
15233e1a0699SJoe Thornber 	if (WARN_ON(get_pool_mode(pool) != PM_WRITE))
15248d30abffSJoe Thornber 		return -EINVAL;
15258d30abffSJoe Thornber 
1526991d9fa0SJoe Thornber 	r = dm_pool_get_free_block_count(pool->pmd, &free_blocks);
1527b5330655SJoe Thornber 	if (r) {
1528b5330655SJoe Thornber 		metadata_operation_failed(pool, "dm_pool_get_free_block_count", r);
1529991d9fa0SJoe Thornber 		return r;
1530b5330655SJoe Thornber 	}
1531991d9fa0SJoe Thornber 
153288a6621bSJoe Thornber 	check_low_water_mark(pool, free_blocks);
1533991d9fa0SJoe Thornber 
1534991d9fa0SJoe Thornber 	if (!free_blocks) {
1535991d9fa0SJoe Thornber 		/*
1536991d9fa0SJoe Thornber 		 * Try to commit to see if that will free up some
1537991d9fa0SJoe Thornber 		 * more space.
1538991d9fa0SJoe Thornber 		 */
1539020cc3b5SJoe Thornber 		r = commit(pool);
1540020cc3b5SJoe Thornber 		if (r)
1541020cc3b5SJoe Thornber 			return r;
1542991d9fa0SJoe Thornber 
1543991d9fa0SJoe Thornber 		r = dm_pool_get_free_block_count(pool->pmd, &free_blocks);
1544b5330655SJoe Thornber 		if (r) {
1545b5330655SJoe Thornber 			metadata_operation_failed(pool, "dm_pool_get_free_block_count", r);
1546991d9fa0SJoe Thornber 			return r;
1547b5330655SJoe Thornber 		}
1548991d9fa0SJoe Thornber 
1549991d9fa0SJoe Thornber 		if (!free_blocks) {
15503e1a0699SJoe Thornber 			set_pool_mode(pool, PM_OUT_OF_DATA_SPACE);
1551991d9fa0SJoe Thornber 			return -ENOSPC;
1552991d9fa0SJoe Thornber 		}
1553991d9fa0SJoe Thornber 	}
1554991d9fa0SJoe Thornber 
1555991d9fa0SJoe Thornber 	r = dm_pool_alloc_data_block(pool->pmd, result);
15564a02b34eSMike Snitzer 	if (r) {
1557a685557fSMike Snitzer 		if (r == -ENOSPC)
1558a685557fSMike Snitzer 			set_pool_mode(pool, PM_OUT_OF_DATA_SPACE);
1559a685557fSMike Snitzer 		else
1560b5330655SJoe Thornber 			metadata_operation_failed(pool, "dm_pool_alloc_data_block", r);
1561991d9fa0SJoe Thornber 		return r;
15624a02b34eSMike Snitzer 	}
1563991d9fa0SJoe Thornber 
15643ab91828SJoe Thornber 	r = dm_pool_get_free_metadata_block_count(pool->pmd, &free_blocks);
15653ab91828SJoe Thornber 	if (r) {
15663ab91828SJoe Thornber 		metadata_operation_failed(pool, "dm_pool_get_free_metadata_block_count", r);
15673ab91828SJoe Thornber 		return r;
15683ab91828SJoe Thornber 	}
15693ab91828SJoe Thornber 
15703ab91828SJoe Thornber 	if (!free_blocks) {
15713ab91828SJoe Thornber 		/* Let's commit before we use up the metadata reserve. */
15723ab91828SJoe Thornber 		r = commit(pool);
15733ab91828SJoe Thornber 		if (r)
15743ab91828SJoe Thornber 			return r;
15753ab91828SJoe Thornber 	}
15763ab91828SJoe Thornber 
1577991d9fa0SJoe Thornber 	return 0;
1578991d9fa0SJoe Thornber }
1579991d9fa0SJoe Thornber 
1580991d9fa0SJoe Thornber /*
1581991d9fa0SJoe Thornber  * If we have run out of space, queue bios until the device is
1582991d9fa0SJoe Thornber  * resumed, presumably after having been reloaded with more space.
1583991d9fa0SJoe Thornber  */
retry_on_resume(struct bio * bio)1584991d9fa0SJoe Thornber static void retry_on_resume(struct bio *bio)
1585991d9fa0SJoe Thornber {
158659c3d2c6SMikulas Patocka 	struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
1587eb2aa48dSJoe Thornber 	struct thin_c *tc = h->tc;
1588991d9fa0SJoe Thornber 
15898e0c9dacSMikulas Patocka 	spin_lock_irq(&tc->lock);
1590c140e1c4SMike Snitzer 	bio_list_add(&tc->retry_on_resume_list, bio);
15918e0c9dacSMikulas Patocka 	spin_unlock_irq(&tc->lock);
1592991d9fa0SJoe Thornber }
1593991d9fa0SJoe Thornber 
should_error_unserviceable_bio(struct pool * pool)15944e4cbee9SChristoph Hellwig static blk_status_t should_error_unserviceable_bio(struct pool *pool)
15953e1a0699SJoe Thornber {
15963e1a0699SJoe Thornber 	enum pool_mode m = get_pool_mode(pool);
15973e1a0699SJoe Thornber 
15983e1a0699SJoe Thornber 	switch (m) {
15993e1a0699SJoe Thornber 	case PM_WRITE:
16003e1a0699SJoe Thornber 		/* Shouldn't get here */
16013e1a0699SJoe Thornber 		DMERR_LIMIT("bio unserviceable, yet pool is in PM_WRITE mode");
16024e4cbee9SChristoph Hellwig 		return BLK_STS_IOERR;
16033e1a0699SJoe Thornber 
16043e1a0699SJoe Thornber 	case PM_OUT_OF_DATA_SPACE:
16054e4cbee9SChristoph Hellwig 		return pool->pf.error_if_no_space ? BLK_STS_NOSPC : 0;
16063e1a0699SJoe Thornber 
16073ab91828SJoe Thornber 	case PM_OUT_OF_METADATA_SPACE:
16083e1a0699SJoe Thornber 	case PM_READ_ONLY:
16093e1a0699SJoe Thornber 	case PM_FAIL:
16104e4cbee9SChristoph Hellwig 		return BLK_STS_IOERR;
16113e1a0699SJoe Thornber 	default:
16123e1a0699SJoe Thornber 		/* Shouldn't get here */
16133e1a0699SJoe Thornber 		DMERR_LIMIT("bio unserviceable, yet pool has an unknown mode");
16144e4cbee9SChristoph Hellwig 		return BLK_STS_IOERR;
16153e1a0699SJoe Thornber 	}
16163e1a0699SJoe Thornber }
16173e1a0699SJoe Thornber 
handle_unserviceable_bio(struct pool * pool,struct bio * bio)16188c0f0e8cSMike Snitzer static void handle_unserviceable_bio(struct pool *pool, struct bio *bio)
16198c0f0e8cSMike Snitzer {
16204e4cbee9SChristoph Hellwig 	blk_status_t error = should_error_unserviceable_bio(pool);
1621af91805aSMike Snitzer 
16224246a0b6SChristoph Hellwig 	if (error) {
16234e4cbee9SChristoph Hellwig 		bio->bi_status = error;
16244246a0b6SChristoph Hellwig 		bio_endio(bio);
16254246a0b6SChristoph Hellwig 	} else
16266d16202bSMike Snitzer 		retry_on_resume(bio);
16278c0f0e8cSMike Snitzer }
16288c0f0e8cSMike Snitzer 
retry_bios_on_resume(struct pool * pool,struct dm_bio_prison_cell * cell)1629399caddfSMike Snitzer static void retry_bios_on_resume(struct pool *pool, struct dm_bio_prison_cell *cell)
1630991d9fa0SJoe Thornber {
1631991d9fa0SJoe Thornber 	struct bio *bio;
1632991d9fa0SJoe Thornber 	struct bio_list bios;
16334e4cbee9SChristoph Hellwig 	blk_status_t error;
1634991d9fa0SJoe Thornber 
1635af91805aSMike Snitzer 	error = should_error_unserviceable_bio(pool);
1636af91805aSMike Snitzer 	if (error) {
1637af91805aSMike Snitzer 		cell_error_with_code(pool, cell, error);
16383e1a0699SJoe Thornber 		return;
16393e1a0699SJoe Thornber 	}
16403e1a0699SJoe Thornber 
1641991d9fa0SJoe Thornber 	bio_list_init(&bios);
16426beca5ebSJoe Thornber 	cell_release(pool, cell, &bios);
1643991d9fa0SJoe Thornber 
16443e1a0699SJoe Thornber 	while ((bio = bio_list_pop(&bios)))
16453e1a0699SJoe Thornber 		retry_on_resume(bio);
1646991d9fa0SJoe Thornber }
1647991d9fa0SJoe Thornber 
process_discard_cell_no_passdown(struct thin_c * tc,struct dm_bio_prison_cell * virt_cell)164834fbcf62SJoe Thornber static void process_discard_cell_no_passdown(struct thin_c *tc,
164934fbcf62SJoe Thornber 					     struct dm_bio_prison_cell *virt_cell)
1650104655fdSJoe Thornber {
1651104655fdSJoe Thornber 	struct pool *pool = tc->pool;
165234fbcf62SJoe Thornber 	struct dm_thin_new_mapping *m = get_next_mapping(pool);
1653104655fdSJoe Thornber 
1654104655fdSJoe Thornber 	/*
165534fbcf62SJoe Thornber 	 * We don't need to lock the data blocks, since there's no
165634fbcf62SJoe Thornber 	 * passdown.  We only lock data blocks for allocation and breaking sharing.
1657104655fdSJoe Thornber 	 */
165834fbcf62SJoe Thornber 	m->tc = tc;
165934fbcf62SJoe Thornber 	m->virt_begin = virt_cell->key.block_begin;
166034fbcf62SJoe Thornber 	m->virt_end = virt_cell->key.block_end;
166134fbcf62SJoe Thornber 	m->cell = virt_cell;
166234fbcf62SJoe Thornber 	m->bio = virt_cell->holder;
166334fbcf62SJoe Thornber 
166434fbcf62SJoe Thornber 	if (!dm_deferred_set_add_work(pool->all_io_ds, &m->list))
166534fbcf62SJoe Thornber 		pool->process_prepared_discard(m);
1666104655fdSJoe Thornber }
1667104655fdSJoe Thornber 
break_up_discard_bio(struct thin_c * tc,dm_block_t begin,dm_block_t end,struct bio * bio)166834fbcf62SJoe Thornber static void break_up_discard_bio(struct thin_c *tc, dm_block_t begin, dm_block_t end,
166934fbcf62SJoe Thornber 				 struct bio *bio)
167034fbcf62SJoe Thornber {
167134fbcf62SJoe Thornber 	struct pool *pool = tc->pool;
167234fbcf62SJoe Thornber 
167334fbcf62SJoe Thornber 	int r;
167434fbcf62SJoe Thornber 	bool maybe_shared;
167534fbcf62SJoe Thornber 	struct dm_cell_key data_key;
167634fbcf62SJoe Thornber 	struct dm_bio_prison_cell *data_cell;
167734fbcf62SJoe Thornber 	struct dm_thin_new_mapping *m;
1678e2dd8acaSJoe Thornber 	dm_block_t virt_begin, virt_end, data_begin, data_end;
1679e2dd8acaSJoe Thornber 	dm_block_t len, next_boundary;
168034fbcf62SJoe Thornber 
168134fbcf62SJoe Thornber 	while (begin != end) {
168234fbcf62SJoe Thornber 		r = dm_thin_find_mapped_range(tc->td, begin, end, &virt_begin, &virt_end,
168334fbcf62SJoe Thornber 					      &data_begin, &maybe_shared);
1684e2dd8acaSJoe Thornber 		if (r) {
168534fbcf62SJoe Thornber 			/*
168634fbcf62SJoe Thornber 			 * Silently fail, letting any mappings we've
168734fbcf62SJoe Thornber 			 * created complete.
168834fbcf62SJoe Thornber 			 */
168934fbcf62SJoe Thornber 			break;
1690e2dd8acaSJoe Thornber 		}
169134fbcf62SJoe Thornber 
1692e2dd8acaSJoe Thornber 		data_end = data_begin + (virt_end - virt_begin);
1693e2dd8acaSJoe Thornber 
1694e2dd8acaSJoe Thornber 		/*
1695e2dd8acaSJoe Thornber 		 * Make sure the data region obeys the bio prison restrictions.
1696e2dd8acaSJoe Thornber 		 */
1697e2dd8acaSJoe Thornber 		while (data_begin < data_end) {
1698e2dd8acaSJoe Thornber 			r = ensure_next_mapping(pool);
1699e2dd8acaSJoe Thornber 			if (r)
1700e2dd8acaSJoe Thornber 				return; /* we did our best */
1701e2dd8acaSJoe Thornber 
1702e2dd8acaSJoe Thornber 			next_boundary = ((data_begin >> BIO_PRISON_MAX_RANGE_SHIFT) + 1)
1703e2dd8acaSJoe Thornber 				<< BIO_PRISON_MAX_RANGE_SHIFT;
1704e2dd8acaSJoe Thornber 			len = min_t(sector_t, data_end - data_begin, next_boundary - data_begin);
1705e2dd8acaSJoe Thornber 
17063f8d3f54SMike Snitzer 			/* This key is certainly within range given the above splitting */
17073f8d3f54SMike Snitzer 			(void) build_key(tc->td, PHYSICAL, data_begin, data_begin + len, &data_key);
170834fbcf62SJoe Thornber 			if (bio_detain(tc->pool, &data_key, NULL, &data_cell)) {
170934fbcf62SJoe Thornber 				/* contention, we'll give up with this range */
1710e2dd8acaSJoe Thornber 				data_begin += len;
171134fbcf62SJoe Thornber 				continue;
171234fbcf62SJoe Thornber 			}
171334fbcf62SJoe Thornber 
1714104655fdSJoe Thornber 			/*
1715104655fdSJoe Thornber 			 * IO may still be going to the destination block.  We must
1716104655fdSJoe Thornber 			 * quiesce before we can do the removal.
1717104655fdSJoe Thornber 			 */
1718104655fdSJoe Thornber 			m = get_next_mapping(pool);
1719104655fdSJoe Thornber 			m->tc = tc;
172034fbcf62SJoe Thornber 			m->maybe_shared = maybe_shared;
172134fbcf62SJoe Thornber 			m->virt_begin = virt_begin;
1722e2dd8acaSJoe Thornber 			m->virt_end = virt_begin + len;
172334fbcf62SJoe Thornber 			m->data_block = data_begin;
172434fbcf62SJoe Thornber 			m->cell = data_cell;
1725104655fdSJoe Thornber 			m->bio = bio;
1726104655fdSJoe Thornber 
172734fbcf62SJoe Thornber 			/*
172834fbcf62SJoe Thornber 			 * The parent bio must not complete before sub discard bios are
1729202bae52SJoe Thornber 			 * chained to it (see end_discard's bio_chain)!
173034fbcf62SJoe Thornber 			 *
173134fbcf62SJoe Thornber 			 * This per-mapping bi_remaining increment is paired with
173234fbcf62SJoe Thornber 			 * the implicit decrement that occurs via bio_endio() in
1733202bae52SJoe Thornber 			 * end_discard().
173434fbcf62SJoe Thornber 			 */
173513e4f8a6SMike Snitzer 			bio_inc_remaining(bio);
17367a7e97caSJoe Thornber 			if (!dm_deferred_set_add_work(pool->all_io_ds, &m->list))
17377a7e97caSJoe Thornber 				pool->process_prepared_discard(m);
17387a7e97caSJoe Thornber 
1739e2dd8acaSJoe Thornber 			virt_begin += len;
1740e2dd8acaSJoe Thornber 			data_begin += len;
1741e2dd8acaSJoe Thornber 		}
1742e2dd8acaSJoe Thornber 
174334fbcf62SJoe Thornber 		begin = virt_end;
174434fbcf62SJoe Thornber 	}
174534fbcf62SJoe Thornber }
174634fbcf62SJoe Thornber 
process_discard_cell_passdown(struct thin_c * tc,struct dm_bio_prison_cell * virt_cell)174734fbcf62SJoe Thornber static void process_discard_cell_passdown(struct thin_c *tc, struct dm_bio_prison_cell *virt_cell)
174834fbcf62SJoe Thornber {
174934fbcf62SJoe Thornber 	struct bio *bio = virt_cell->holder;
175034fbcf62SJoe Thornber 	struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
1751e8088073SJoe Thornber 
1752104655fdSJoe Thornber 	/*
175334fbcf62SJoe Thornber 	 * The virt_cell will only get freed once the origin bio completes.
175434fbcf62SJoe Thornber 	 * This means it will remain locked while all the individual
175534fbcf62SJoe Thornber 	 * passdown bios are in flight.
1756104655fdSJoe Thornber 	 */
175734fbcf62SJoe Thornber 	h->cell = virt_cell;
175834fbcf62SJoe Thornber 	break_up_discard_bio(tc, virt_cell->key.block_begin, virt_cell->key.block_end, bio);
1759104655fdSJoe Thornber 
1760104655fdSJoe Thornber 	/*
176134fbcf62SJoe Thornber 	 * We complete the bio now, knowing that the bi_remaining field
176234fbcf62SJoe Thornber 	 * will prevent completion until the sub range discards have
176334fbcf62SJoe Thornber 	 * completed.
1764104655fdSJoe Thornber 	 */
17654246a0b6SChristoph Hellwig 	bio_endio(bio);
1766104655fdSJoe Thornber }
1767104655fdSJoe Thornber 
process_discard_bio(struct thin_c * tc,struct bio * bio)1768a374bb21SJoe Thornber static void process_discard_bio(struct thin_c *tc, struct bio *bio)
1769a374bb21SJoe Thornber {
177034fbcf62SJoe Thornber 	dm_block_t begin, end;
177134fbcf62SJoe Thornber 	struct dm_cell_key virt_key;
177234fbcf62SJoe Thornber 	struct dm_bio_prison_cell *virt_cell;
1773a374bb21SJoe Thornber 
177434fbcf62SJoe Thornber 	get_bio_block_range(tc, bio, &begin, &end);
177534fbcf62SJoe Thornber 	if (begin == end) {
177634fbcf62SJoe Thornber 		/*
177734fbcf62SJoe Thornber 		 * The discard covers less than a block.
177834fbcf62SJoe Thornber 		 */
17794246a0b6SChristoph Hellwig 		bio_endio(bio);
178034fbcf62SJoe Thornber 		return;
178134fbcf62SJoe Thornber 	}
178234fbcf62SJoe Thornber 
17833f8d3f54SMike Snitzer 	if (unlikely(!build_key(tc->td, VIRTUAL, begin, end, &virt_key))) {
17843f8d3f54SMike Snitzer 		DMERR_LIMIT("Discard doesn't respect bio prison limits");
17853f8d3f54SMike Snitzer 		bio_endio(bio);
17863f8d3f54SMike Snitzer 		return;
17873f8d3f54SMike Snitzer 	}
17883f8d3f54SMike Snitzer 
17893f8d3f54SMike Snitzer 	if (bio_detain(tc->pool, &virt_key, bio, &virt_cell)) {
179034fbcf62SJoe Thornber 		/*
179134fbcf62SJoe Thornber 		 * Potential starvation issue: We're relying on the
179234fbcf62SJoe Thornber 		 * fs/application being well behaved, and not trying to
179334fbcf62SJoe Thornber 		 * send IO to a region at the same time as discarding it.
179434fbcf62SJoe Thornber 		 * If they do this persistently then it's possible this
179534fbcf62SJoe Thornber 		 * cell will never be granted.
179634fbcf62SJoe Thornber 		 */
1797a374bb21SJoe Thornber 		return;
17983f8d3f54SMike Snitzer 	}
1799a374bb21SJoe Thornber 
180034fbcf62SJoe Thornber 	tc->pool->process_discard_cell(tc, virt_cell);
1801a374bb21SJoe Thornber }
1802a374bb21SJoe Thornber 
break_sharing(struct thin_c * tc,struct bio * bio,dm_block_t block,struct dm_cell_key * key,struct dm_thin_lookup_result * lookup_result,struct dm_bio_prison_cell * cell)1803991d9fa0SJoe Thornber static void break_sharing(struct thin_c *tc, struct bio *bio, dm_block_t block,
180444feb387SMike Snitzer 			  struct dm_cell_key *key,
1805991d9fa0SJoe Thornber 			  struct dm_thin_lookup_result *lookup_result,
1806a24c2569SMike Snitzer 			  struct dm_bio_prison_cell *cell)
1807991d9fa0SJoe Thornber {
1808991d9fa0SJoe Thornber 	int r;
1809991d9fa0SJoe Thornber 	dm_block_t data_block;
1810d6fc2042SMike Snitzer 	struct pool *pool = tc->pool;
1811991d9fa0SJoe Thornber 
1812991d9fa0SJoe Thornber 	r = alloc_data_block(tc, &data_block);
1813991d9fa0SJoe Thornber 	switch (r) {
1814991d9fa0SJoe Thornber 	case 0:
18152dd9c257SJoe Thornber 		schedule_internal_copy(tc, block, lookup_result->block,
1816991d9fa0SJoe Thornber 				       data_block, cell, bio);
1817991d9fa0SJoe Thornber 		break;
1818991d9fa0SJoe Thornber 
1819991d9fa0SJoe Thornber 	case -ENOSPC:
1820399caddfSMike Snitzer 		retry_bios_on_resume(pool, cell);
1821991d9fa0SJoe Thornber 		break;
1822991d9fa0SJoe Thornber 
1823991d9fa0SJoe Thornber 	default:
1824c397741cSMike Snitzer 		DMERR_LIMIT("%s: alloc_data_block() failed: error = %d",
1825c397741cSMike Snitzer 			    __func__, r);
1826d6fc2042SMike Snitzer 		cell_error(pool, cell);
1827991d9fa0SJoe Thornber 		break;
1828991d9fa0SJoe Thornber 	}
1829991d9fa0SJoe Thornber }
1830991d9fa0SJoe Thornber 
__remap_and_issue_shared_cell(void * context,struct dm_bio_prison_cell * cell)183123ca2bb6SJoe Thornber static void __remap_and_issue_shared_cell(void *context,
183223ca2bb6SJoe Thornber 					  struct dm_bio_prison_cell *cell)
183323ca2bb6SJoe Thornber {
183423ca2bb6SJoe Thornber 	struct remap_info *info = context;
183523ca2bb6SJoe Thornber 	struct bio *bio;
183623ca2bb6SJoe Thornber 
183723ca2bb6SJoe Thornber 	while ((bio = bio_list_pop(&cell->bios))) {
1838f73f44ebSChristoph Hellwig 		if (bio_data_dir(bio) == WRITE || op_is_flush(bio->bi_opf) ||
1839f73f44ebSChristoph Hellwig 		    bio_op(bio) == REQ_OP_DISCARD)
184023ca2bb6SJoe Thornber 			bio_list_add(&info->defer_bios, bio);
184123ca2bb6SJoe Thornber 		else {
1842bd6d1e0aSLuis de Bethencourt 			struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
184323ca2bb6SJoe Thornber 
184423ca2bb6SJoe Thornber 			h->shared_read_entry = dm_deferred_entry_inc(info->tc->pool->shared_read_ds);
184523ca2bb6SJoe Thornber 			inc_all_io_entry(info->tc->pool, bio);
184623ca2bb6SJoe Thornber 			bio_list_add(&info->issue_bios, bio);
184723ca2bb6SJoe Thornber 		}
184823ca2bb6SJoe Thornber 	}
184923ca2bb6SJoe Thornber }
185023ca2bb6SJoe Thornber 
remap_and_issue_shared_cell(struct thin_c * tc,struct dm_bio_prison_cell * cell,dm_block_t block)185123ca2bb6SJoe Thornber static void remap_and_issue_shared_cell(struct thin_c *tc,
185223ca2bb6SJoe Thornber 					struct dm_bio_prison_cell *cell,
185323ca2bb6SJoe Thornber 					dm_block_t block)
185423ca2bb6SJoe Thornber {
185523ca2bb6SJoe Thornber 	struct bio *bio;
185623ca2bb6SJoe Thornber 	struct remap_info info;
185723ca2bb6SJoe Thornber 
185823ca2bb6SJoe Thornber 	info.tc = tc;
185923ca2bb6SJoe Thornber 	bio_list_init(&info.defer_bios);
186023ca2bb6SJoe Thornber 	bio_list_init(&info.issue_bios);
186123ca2bb6SJoe Thornber 
186223ca2bb6SJoe Thornber 	cell_visit_release(tc->pool, __remap_and_issue_shared_cell,
186323ca2bb6SJoe Thornber 			   &info, cell);
186423ca2bb6SJoe Thornber 
186523ca2bb6SJoe Thornber 	while ((bio = bio_list_pop(&info.defer_bios)))
186623ca2bb6SJoe Thornber 		thin_defer_bio(tc, bio);
186723ca2bb6SJoe Thornber 
186823ca2bb6SJoe Thornber 	while ((bio = bio_list_pop(&info.issue_bios)))
186923ca2bb6SJoe Thornber 		remap_and_issue(tc, bio, block);
187023ca2bb6SJoe Thornber }
187123ca2bb6SJoe Thornber 
process_shared_bio(struct thin_c * tc,struct bio * bio,dm_block_t block,struct dm_thin_lookup_result * lookup_result,struct dm_bio_prison_cell * virt_cell)1872991d9fa0SJoe Thornber static void process_shared_bio(struct thin_c *tc, struct bio *bio,
1873991d9fa0SJoe Thornber 			       dm_block_t block,
187423ca2bb6SJoe Thornber 			       struct dm_thin_lookup_result *lookup_result,
187523ca2bb6SJoe Thornber 			       struct dm_bio_prison_cell *virt_cell)
1876991d9fa0SJoe Thornber {
187723ca2bb6SJoe Thornber 	struct dm_bio_prison_cell *data_cell;
1878991d9fa0SJoe Thornber 	struct pool *pool = tc->pool;
187944feb387SMike Snitzer 	struct dm_cell_key key;
1880991d9fa0SJoe Thornber 
1881991d9fa0SJoe Thornber 	/*
1882991d9fa0SJoe Thornber 	 * If cell is already occupied, then sharing is already in the process
1883991d9fa0SJoe Thornber 	 * of being broken so we have nothing further to do here.
1884991d9fa0SJoe Thornber 	 */
1885991d9fa0SJoe Thornber 	build_data_key(tc->td, lookup_result->block, &key);
188623ca2bb6SJoe Thornber 	if (bio_detain(pool, &key, bio, &data_cell)) {
188723ca2bb6SJoe Thornber 		cell_defer_no_holder(tc, virt_cell);
1888991d9fa0SJoe Thornber 		return;
188923ca2bb6SJoe Thornber 	}
1890991d9fa0SJoe Thornber 
189123ca2bb6SJoe Thornber 	if (bio_data_dir(bio) == WRITE && bio->bi_iter.bi_size) {
189223ca2bb6SJoe Thornber 		break_sharing(tc, bio, block, &key, lookup_result, data_cell);
189323ca2bb6SJoe Thornber 		cell_defer_no_holder(tc, virt_cell);
189423ca2bb6SJoe Thornber 	} else {
189559c3d2c6SMikulas Patocka 		struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
1896991d9fa0SJoe Thornber 
189744feb387SMike Snitzer 		h->shared_read_entry = dm_deferred_entry_inc(pool->shared_read_ds);
1898e8088073SJoe Thornber 		inc_all_io_entry(pool, bio);
1899991d9fa0SJoe Thornber 		remap_and_issue(tc, bio, lookup_result->block);
190023ca2bb6SJoe Thornber 
190123ca2bb6SJoe Thornber 		remap_and_issue_shared_cell(tc, data_cell, lookup_result->block);
190223ca2bb6SJoe Thornber 		remap_and_issue_shared_cell(tc, virt_cell, lookup_result->block);
1903991d9fa0SJoe Thornber 	}
1904991d9fa0SJoe Thornber }
1905991d9fa0SJoe Thornber 
provision_block(struct thin_c * tc,struct bio * bio,dm_block_t block,struct dm_bio_prison_cell * cell)1906991d9fa0SJoe Thornber static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block,
1907a24c2569SMike Snitzer 			    struct dm_bio_prison_cell *cell)
1908991d9fa0SJoe Thornber {
1909991d9fa0SJoe Thornber 	int r;
1910991d9fa0SJoe Thornber 	dm_block_t data_block;
19116beca5ebSJoe Thornber 	struct pool *pool = tc->pool;
1912991d9fa0SJoe Thornber 
1913991d9fa0SJoe Thornber 	/*
1914991d9fa0SJoe Thornber 	 * Remap empty bios (flushes) immediately, without provisioning.
1915991d9fa0SJoe Thornber 	 */
19164f024f37SKent Overstreet 	if (!bio->bi_iter.bi_size) {
19176beca5ebSJoe Thornber 		inc_all_io_entry(pool, bio);
1918f286ba0eSJoe Thornber 		cell_defer_no_holder(tc, cell);
1919e8088073SJoe Thornber 
1920991d9fa0SJoe Thornber 		remap_and_issue(tc, bio, 0);
1921991d9fa0SJoe Thornber 		return;
1922991d9fa0SJoe Thornber 	}
1923991d9fa0SJoe Thornber 
1924991d9fa0SJoe Thornber 	/*
1925991d9fa0SJoe Thornber 	 * Fill read bios with zeroes and complete them immediately.
1926991d9fa0SJoe Thornber 	 */
1927991d9fa0SJoe Thornber 	if (bio_data_dir(bio) == READ) {
1928991d9fa0SJoe Thornber 		zero_fill_bio(bio);
1929f286ba0eSJoe Thornber 		cell_defer_no_holder(tc, cell);
19304246a0b6SChristoph Hellwig 		bio_endio(bio);
1931991d9fa0SJoe Thornber 		return;
1932991d9fa0SJoe Thornber 	}
1933991d9fa0SJoe Thornber 
1934991d9fa0SJoe Thornber 	r = alloc_data_block(tc, &data_block);
1935991d9fa0SJoe Thornber 	switch (r) {
1936991d9fa0SJoe Thornber 	case 0:
19372dd9c257SJoe Thornber 		if (tc->origin_dev)
19382dd9c257SJoe Thornber 			schedule_external_copy(tc, block, data_block, cell, bio);
19392dd9c257SJoe Thornber 		else
1940991d9fa0SJoe Thornber 			schedule_zero(tc, block, data_block, cell, bio);
1941991d9fa0SJoe Thornber 		break;
1942991d9fa0SJoe Thornber 
1943991d9fa0SJoe Thornber 	case -ENOSPC:
1944399caddfSMike Snitzer 		retry_bios_on_resume(pool, cell);
1945991d9fa0SJoe Thornber 		break;
1946991d9fa0SJoe Thornber 
1947991d9fa0SJoe Thornber 	default:
1948c397741cSMike Snitzer 		DMERR_LIMIT("%s: alloc_data_block() failed: error = %d",
1949c397741cSMike Snitzer 			    __func__, r);
19506beca5ebSJoe Thornber 		cell_error(pool, cell);
1951991d9fa0SJoe Thornber 		break;
1952991d9fa0SJoe Thornber 	}
1953991d9fa0SJoe Thornber }
1954991d9fa0SJoe Thornber 
process_cell(struct thin_c * tc,struct dm_bio_prison_cell * cell)1955a374bb21SJoe Thornber static void process_cell(struct thin_c *tc, struct dm_bio_prison_cell *cell)
1956991d9fa0SJoe Thornber {
1957991d9fa0SJoe Thornber 	int r;
19586beca5ebSJoe Thornber 	struct pool *pool = tc->pool;
1959a374bb21SJoe Thornber 	struct bio *bio = cell->holder;
1960991d9fa0SJoe Thornber 	dm_block_t block = get_bio_block(tc, bio);
1961991d9fa0SJoe Thornber 	struct dm_thin_lookup_result lookup_result;
1962991d9fa0SJoe Thornber 
1963a374bb21SJoe Thornber 	if (tc->requeue_mode) {
1964a374bb21SJoe Thornber 		cell_requeue(pool, cell);
1965991d9fa0SJoe Thornber 		return;
1966a374bb21SJoe Thornber 	}
1967991d9fa0SJoe Thornber 
1968991d9fa0SJoe Thornber 	r = dm_thin_find_block(tc->td, block, 1, &lookup_result);
1969991d9fa0SJoe Thornber 	switch (r) {
1970991d9fa0SJoe Thornber 	case 0:
197123ca2bb6SJoe Thornber 		if (lookup_result.shared)
197223ca2bb6SJoe Thornber 			process_shared_bio(tc, bio, block, &lookup_result, cell);
197323ca2bb6SJoe Thornber 		else {
19746beca5ebSJoe Thornber 			inc_all_io_entry(pool, bio);
1975991d9fa0SJoe Thornber 			remap_and_issue(tc, bio, lookup_result.block);
1976a374bb21SJoe Thornber 			inc_remap_and_issue_cell(tc, cell, lookup_result.block);
1977e8088073SJoe Thornber 		}
1978991d9fa0SJoe Thornber 		break;
1979991d9fa0SJoe Thornber 
1980991d9fa0SJoe Thornber 	case -ENODATA:
19812dd9c257SJoe Thornber 		if (bio_data_dir(bio) == READ && tc->origin_dev) {
19826beca5ebSJoe Thornber 			inc_all_io_entry(pool, bio);
1983f286ba0eSJoe Thornber 			cell_defer_no_holder(tc, cell);
1984e8088073SJoe Thornber 
1985e5aea7b4SJoe Thornber 			if (bio_end_sector(bio) <= tc->origin_size)
19862dd9c257SJoe Thornber 				remap_to_origin_and_issue(tc, bio);
1987e5aea7b4SJoe Thornber 
1988e5aea7b4SJoe Thornber 			else if (bio->bi_iter.bi_sector < tc->origin_size) {
1989e5aea7b4SJoe Thornber 				zero_fill_bio(bio);
1990e5aea7b4SJoe Thornber 				bio->bi_iter.bi_size = (tc->origin_size - bio->bi_iter.bi_sector) << SECTOR_SHIFT;
1991e5aea7b4SJoe Thornber 				remap_to_origin_and_issue(tc, bio);
1992e5aea7b4SJoe Thornber 
1993e5aea7b4SJoe Thornber 			} else {
1994e5aea7b4SJoe Thornber 				zero_fill_bio(bio);
19954246a0b6SChristoph Hellwig 				bio_endio(bio);
1996e5aea7b4SJoe Thornber 			}
19972dd9c257SJoe Thornber 		} else
1998991d9fa0SJoe Thornber 			provision_block(tc, bio, block, cell);
1999991d9fa0SJoe Thornber 		break;
2000991d9fa0SJoe Thornber 
2001991d9fa0SJoe Thornber 	default:
2002c397741cSMike Snitzer 		DMERR_LIMIT("%s: dm_thin_find_block() failed: error = %d",
2003c397741cSMike Snitzer 			    __func__, r);
2004f286ba0eSJoe Thornber 		cell_defer_no_holder(tc, cell);
2005991d9fa0SJoe Thornber 		bio_io_error(bio);
2006991d9fa0SJoe Thornber 		break;
2007991d9fa0SJoe Thornber 	}
2008991d9fa0SJoe Thornber }
2009991d9fa0SJoe Thornber 
process_bio(struct thin_c * tc,struct bio * bio)2010a374bb21SJoe Thornber static void process_bio(struct thin_c *tc, struct bio *bio)
2011a374bb21SJoe Thornber {
2012a374bb21SJoe Thornber 	struct pool *pool = tc->pool;
2013a374bb21SJoe Thornber 	dm_block_t block = get_bio_block(tc, bio);
2014a374bb21SJoe Thornber 	struct dm_bio_prison_cell *cell;
2015a374bb21SJoe Thornber 	struct dm_cell_key key;
2016a374bb21SJoe Thornber 
2017a374bb21SJoe Thornber 	/*
2018a374bb21SJoe Thornber 	 * If cell is already occupied, then the block is already
2019a374bb21SJoe Thornber 	 * being provisioned so we have nothing further to do here.
2020a374bb21SJoe Thornber 	 */
2021a374bb21SJoe Thornber 	build_virtual_key(tc->td, block, &key);
2022a374bb21SJoe Thornber 	if (bio_detain(pool, &key, bio, &cell))
2023a374bb21SJoe Thornber 		return;
2024a374bb21SJoe Thornber 
2025a374bb21SJoe Thornber 	process_cell(tc, cell);
2026a374bb21SJoe Thornber }
2027a374bb21SJoe Thornber 
__process_bio_read_only(struct thin_c * tc,struct bio * bio,struct dm_bio_prison_cell * cell)2028a374bb21SJoe Thornber static void __process_bio_read_only(struct thin_c *tc, struct bio *bio,
2029a374bb21SJoe Thornber 				    struct dm_bio_prison_cell *cell)
2030e49e5829SJoe Thornber {
2031e49e5829SJoe Thornber 	int r;
2032e49e5829SJoe Thornber 	int rw = bio_data_dir(bio);
2033e49e5829SJoe Thornber 	dm_block_t block = get_bio_block(tc, bio);
2034e49e5829SJoe Thornber 	struct dm_thin_lookup_result lookup_result;
2035e49e5829SJoe Thornber 
2036e49e5829SJoe Thornber 	r = dm_thin_find_block(tc->td, block, 1, &lookup_result);
2037e49e5829SJoe Thornber 	switch (r) {
2038e49e5829SJoe Thornber 	case 0:
2039a374bb21SJoe Thornber 		if (lookup_result.shared && (rw == WRITE) && bio->bi_iter.bi_size) {
20408c0f0e8cSMike Snitzer 			handle_unserviceable_bio(tc->pool, bio);
2041a374bb21SJoe Thornber 			if (cell)
2042a374bb21SJoe Thornber 				cell_defer_no_holder(tc, cell);
2043a374bb21SJoe Thornber 		} else {
2044e8088073SJoe Thornber 			inc_all_io_entry(tc->pool, bio);
2045e49e5829SJoe Thornber 			remap_and_issue(tc, bio, lookup_result.block);
2046a374bb21SJoe Thornber 			if (cell)
2047a374bb21SJoe Thornber 				inc_remap_and_issue_cell(tc, cell, lookup_result.block);
2048e8088073SJoe Thornber 		}
2049e49e5829SJoe Thornber 		break;
2050e49e5829SJoe Thornber 
2051e49e5829SJoe Thornber 	case -ENODATA:
2052a374bb21SJoe Thornber 		if (cell)
2053a374bb21SJoe Thornber 			cell_defer_no_holder(tc, cell);
2054e49e5829SJoe Thornber 		if (rw != READ) {
20558c0f0e8cSMike Snitzer 			handle_unserviceable_bio(tc->pool, bio);
2056e49e5829SJoe Thornber 			break;
2057e49e5829SJoe Thornber 		}
2058e49e5829SJoe Thornber 
2059e49e5829SJoe Thornber 		if (tc->origin_dev) {
2060e8088073SJoe Thornber 			inc_all_io_entry(tc->pool, bio);
2061e49e5829SJoe Thornber 			remap_to_origin_and_issue(tc, bio);
2062e49e5829SJoe Thornber 			break;
2063e49e5829SJoe Thornber 		}
2064e49e5829SJoe Thornber 
2065e49e5829SJoe Thornber 		zero_fill_bio(bio);
20664246a0b6SChristoph Hellwig 		bio_endio(bio);
2067e49e5829SJoe Thornber 		break;
2068e49e5829SJoe Thornber 
2069e49e5829SJoe Thornber 	default:
2070c397741cSMike Snitzer 		DMERR_LIMIT("%s: dm_thin_find_block() failed: error = %d",
2071c397741cSMike Snitzer 			    __func__, r);
2072a374bb21SJoe Thornber 		if (cell)
2073a374bb21SJoe Thornber 			cell_defer_no_holder(tc, cell);
2074e49e5829SJoe Thornber 		bio_io_error(bio);
2075e49e5829SJoe Thornber 		break;
2076e49e5829SJoe Thornber 	}
2077e49e5829SJoe Thornber }
2078e49e5829SJoe Thornber 
process_bio_read_only(struct thin_c * tc,struct bio * bio)2079a374bb21SJoe Thornber static void process_bio_read_only(struct thin_c *tc, struct bio *bio)
2080a374bb21SJoe Thornber {
2081a374bb21SJoe Thornber 	__process_bio_read_only(tc, bio, NULL);
2082a374bb21SJoe Thornber }
2083a374bb21SJoe Thornber 
process_cell_read_only(struct thin_c * tc,struct dm_bio_prison_cell * cell)2084a374bb21SJoe Thornber static void process_cell_read_only(struct thin_c *tc, struct dm_bio_prison_cell *cell)
2085a374bb21SJoe Thornber {
2086a374bb21SJoe Thornber 	__process_bio_read_only(tc, cell->holder, cell);
2087a374bb21SJoe Thornber }
2088a374bb21SJoe Thornber 
process_bio_success(struct thin_c * tc,struct bio * bio)20893e1a0699SJoe Thornber static void process_bio_success(struct thin_c *tc, struct bio *bio)
20903e1a0699SJoe Thornber {
20914246a0b6SChristoph Hellwig 	bio_endio(bio);
20923e1a0699SJoe Thornber }
20933e1a0699SJoe Thornber 
process_bio_fail(struct thin_c * tc,struct bio * bio)2094e49e5829SJoe Thornber static void process_bio_fail(struct thin_c *tc, struct bio *bio)
2095e49e5829SJoe Thornber {
2096e49e5829SJoe Thornber 	bio_io_error(bio);
2097e49e5829SJoe Thornber }
2098e49e5829SJoe Thornber 
process_cell_success(struct thin_c * tc,struct dm_bio_prison_cell * cell)2099a374bb21SJoe Thornber static void process_cell_success(struct thin_c *tc, struct dm_bio_prison_cell *cell)
2100a374bb21SJoe Thornber {
2101a374bb21SJoe Thornber 	cell_success(tc->pool, cell);
2102a374bb21SJoe Thornber }
2103a374bb21SJoe Thornber 
process_cell_fail(struct thin_c * tc,struct dm_bio_prison_cell * cell)2104a374bb21SJoe Thornber static void process_cell_fail(struct thin_c *tc, struct dm_bio_prison_cell *cell)
2105a374bb21SJoe Thornber {
2106a374bb21SJoe Thornber 	cell_error(tc->pool, cell);
2107a374bb21SJoe Thornber }
2108a374bb21SJoe Thornber 
2109ac8c3f3dSJoe Thornber /*
2110ac8c3f3dSJoe Thornber  * FIXME: should we also commit due to size of transaction, measured in
2111ac8c3f3dSJoe Thornber  * metadata blocks?
2112ac8c3f3dSJoe Thornber  */
need_commit_due_to_time(struct pool * pool)2113905e51b3SJoe Thornber static int need_commit_due_to_time(struct pool *pool)
2114905e51b3SJoe Thornber {
21150f30af98SManuel Schölling 	return !time_in_range(jiffies, pool->last_commit_jiffies,
21160f30af98SManuel Schölling 			      pool->last_commit_jiffies + COMMIT_PERIOD);
2117905e51b3SJoe Thornber }
2118905e51b3SJoe Thornber 
211967324ea1SMike Snitzer #define thin_pbd(node) rb_entry((node), struct dm_thin_endio_hook, rb_node)
212067324ea1SMike Snitzer #define thin_bio(pbd) dm_bio_from_per_bio_data((pbd), sizeof(struct dm_thin_endio_hook))
212167324ea1SMike Snitzer 
__thin_bio_rb_add(struct thin_c * tc,struct bio * bio)212267324ea1SMike Snitzer static void __thin_bio_rb_add(struct thin_c *tc, struct bio *bio)
212367324ea1SMike Snitzer {
212467324ea1SMike Snitzer 	struct rb_node **rbp, *parent;
212567324ea1SMike Snitzer 	struct dm_thin_endio_hook *pbd;
212667324ea1SMike Snitzer 	sector_t bi_sector = bio->bi_iter.bi_sector;
212767324ea1SMike Snitzer 
212867324ea1SMike Snitzer 	rbp = &tc->sort_bio_list.rb_node;
212967324ea1SMike Snitzer 	parent = NULL;
213067324ea1SMike Snitzer 	while (*rbp) {
213167324ea1SMike Snitzer 		parent = *rbp;
213267324ea1SMike Snitzer 		pbd = thin_pbd(parent);
213367324ea1SMike Snitzer 
213467324ea1SMike Snitzer 		if (bi_sector < thin_bio(pbd)->bi_iter.bi_sector)
213567324ea1SMike Snitzer 			rbp = &(*rbp)->rb_left;
213667324ea1SMike Snitzer 		else
213767324ea1SMike Snitzer 			rbp = &(*rbp)->rb_right;
213867324ea1SMike Snitzer 	}
213967324ea1SMike Snitzer 
214067324ea1SMike Snitzer 	pbd = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
214167324ea1SMike Snitzer 	rb_link_node(&pbd->rb_node, parent, rbp);
214267324ea1SMike Snitzer 	rb_insert_color(&pbd->rb_node, &tc->sort_bio_list);
214367324ea1SMike Snitzer }
214467324ea1SMike Snitzer 
__extract_sorted_bios(struct thin_c * tc)214567324ea1SMike Snitzer static void __extract_sorted_bios(struct thin_c *tc)
214667324ea1SMike Snitzer {
214767324ea1SMike Snitzer 	struct rb_node *node;
214867324ea1SMike Snitzer 	struct dm_thin_endio_hook *pbd;
214967324ea1SMike Snitzer 	struct bio *bio;
215067324ea1SMike Snitzer 
215167324ea1SMike Snitzer 	for (node = rb_first(&tc->sort_bio_list); node; node = rb_next(node)) {
215267324ea1SMike Snitzer 		pbd = thin_pbd(node);
215367324ea1SMike Snitzer 		bio = thin_bio(pbd);
215467324ea1SMike Snitzer 
215567324ea1SMike Snitzer 		bio_list_add(&tc->deferred_bio_list, bio);
215667324ea1SMike Snitzer 		rb_erase(&pbd->rb_node, &tc->sort_bio_list);
215767324ea1SMike Snitzer 	}
215867324ea1SMike Snitzer 
215967324ea1SMike Snitzer 	WARN_ON(!RB_EMPTY_ROOT(&tc->sort_bio_list));
216067324ea1SMike Snitzer }
216167324ea1SMike Snitzer 
__sort_thin_deferred_bios(struct thin_c * tc)216267324ea1SMike Snitzer static void __sort_thin_deferred_bios(struct thin_c *tc)
216367324ea1SMike Snitzer {
216467324ea1SMike Snitzer 	struct bio *bio;
216567324ea1SMike Snitzer 	struct bio_list bios;
216667324ea1SMike Snitzer 
216767324ea1SMike Snitzer 	bio_list_init(&bios);
216867324ea1SMike Snitzer 	bio_list_merge(&bios, &tc->deferred_bio_list);
216967324ea1SMike Snitzer 	bio_list_init(&tc->deferred_bio_list);
217067324ea1SMike Snitzer 
217167324ea1SMike Snitzer 	/* Sort deferred_bio_list using rb-tree */
217267324ea1SMike Snitzer 	while ((bio = bio_list_pop(&bios)))
217367324ea1SMike Snitzer 		__thin_bio_rb_add(tc, bio);
217467324ea1SMike Snitzer 
217567324ea1SMike Snitzer 	/*
217667324ea1SMike Snitzer 	 * Transfer the sorted bios in sort_bio_list back to
217767324ea1SMike Snitzer 	 * deferred_bio_list to allow lockless submission of
217867324ea1SMike Snitzer 	 * all bios.
217967324ea1SMike Snitzer 	 */
218067324ea1SMike Snitzer 	__extract_sorted_bios(tc);
218167324ea1SMike Snitzer }
218267324ea1SMike Snitzer 
process_thin_deferred_bios(struct thin_c * tc)2183c140e1c4SMike Snitzer static void process_thin_deferred_bios(struct thin_c *tc)
2184991d9fa0SJoe Thornber {
2185c140e1c4SMike Snitzer 	struct pool *pool = tc->pool;
2186991d9fa0SJoe Thornber 	struct bio *bio;
2187991d9fa0SJoe Thornber 	struct bio_list bios;
218867324ea1SMike Snitzer 	struct blk_plug plug;
218986a3238cSHeinz Mauelshagen 	unsigned int count = 0;
2190991d9fa0SJoe Thornber 
2191738211f7SJoe Thornber 	if (tc->requeue_mode) {
21924e4cbee9SChristoph Hellwig 		error_thin_bio_list(tc, &tc->deferred_bio_list,
21934e4cbee9SChristoph Hellwig 				BLK_STS_DM_REQUEUE);
2194c140e1c4SMike Snitzer 		return;
2195738211f7SJoe Thornber 	}
2196738211f7SJoe Thornber 
2197c140e1c4SMike Snitzer 	bio_list_init(&bios);
2198c140e1c4SMike Snitzer 
21998e0c9dacSMikulas Patocka 	spin_lock_irq(&tc->lock);
220067324ea1SMike Snitzer 
220167324ea1SMike Snitzer 	if (bio_list_empty(&tc->deferred_bio_list)) {
22028e0c9dacSMikulas Patocka 		spin_unlock_irq(&tc->lock);
220367324ea1SMike Snitzer 		return;
220467324ea1SMike Snitzer 	}
220567324ea1SMike Snitzer 
220667324ea1SMike Snitzer 	__sort_thin_deferred_bios(tc);
220767324ea1SMike Snitzer 
2208c140e1c4SMike Snitzer 	bio_list_merge(&bios, &tc->deferred_bio_list);
2209c140e1c4SMike Snitzer 	bio_list_init(&tc->deferred_bio_list);
221067324ea1SMike Snitzer 
22118e0c9dacSMikulas Patocka 	spin_unlock_irq(&tc->lock);
2212c140e1c4SMike Snitzer 
221367324ea1SMike Snitzer 	blk_start_plug(&plug);
2214c140e1c4SMike Snitzer 	while ((bio = bio_list_pop(&bios))) {
2215991d9fa0SJoe Thornber 		/*
2216991d9fa0SJoe Thornber 		 * If we've got no free new_mapping structs, and processing
2217991d9fa0SJoe Thornber 		 * this bio might require one, we pause until there are some
2218991d9fa0SJoe Thornber 		 * prepared mappings to process.
2219991d9fa0SJoe Thornber 		 */
2220991d9fa0SJoe Thornber 		if (ensure_next_mapping(pool)) {
22218e0c9dacSMikulas Patocka 			spin_lock_irq(&tc->lock);
2222c140e1c4SMike Snitzer 			bio_list_add(&tc->deferred_bio_list, bio);
2223c140e1c4SMike Snitzer 			bio_list_merge(&tc->deferred_bio_list, &bios);
22248e0c9dacSMikulas Patocka 			spin_unlock_irq(&tc->lock);
2225991d9fa0SJoe Thornber 			break;
2226991d9fa0SJoe Thornber 		}
2227104655fdSJoe Thornber 
2228e6047149SMike Christie 		if (bio_op(bio) == REQ_OP_DISCARD)
2229e49e5829SJoe Thornber 			pool->process_discard(tc, bio);
2230104655fdSJoe Thornber 		else
2231e49e5829SJoe Thornber 			pool->process_bio(tc, bio);
22328a01a6afSJoe Thornber 
22338a01a6afSJoe Thornber 		if ((count++ & 127) == 0) {
22347d327fe0SJoe Thornber 			throttle_work_update(&pool->throttle);
22358a01a6afSJoe Thornber 			dm_pool_issue_prefetches(pool->pmd);
22368a01a6afSJoe Thornber 		}
2237e4f80303SMike Snitzer 		cond_resched();
2238991d9fa0SJoe Thornber 	}
223967324ea1SMike Snitzer 	blk_finish_plug(&plug);
2240c140e1c4SMike Snitzer }
2241c140e1c4SMike Snitzer 
cmp_cells(const void * lhs,const void * rhs)2242ac4c3f34SJoe Thornber static int cmp_cells(const void *lhs, const void *rhs)
2243ac4c3f34SJoe Thornber {
2244ac4c3f34SJoe Thornber 	struct dm_bio_prison_cell *lhs_cell = *((struct dm_bio_prison_cell **) lhs);
2245ac4c3f34SJoe Thornber 	struct dm_bio_prison_cell *rhs_cell = *((struct dm_bio_prison_cell **) rhs);
2246ac4c3f34SJoe Thornber 
2247ac4c3f34SJoe Thornber 	BUG_ON(!lhs_cell->holder);
2248ac4c3f34SJoe Thornber 	BUG_ON(!rhs_cell->holder);
2249ac4c3f34SJoe Thornber 
2250ac4c3f34SJoe Thornber 	if (lhs_cell->holder->bi_iter.bi_sector < rhs_cell->holder->bi_iter.bi_sector)
2251ac4c3f34SJoe Thornber 		return -1;
2252ac4c3f34SJoe Thornber 
2253ac4c3f34SJoe Thornber 	if (lhs_cell->holder->bi_iter.bi_sector > rhs_cell->holder->bi_iter.bi_sector)
2254ac4c3f34SJoe Thornber 		return 1;
2255ac4c3f34SJoe Thornber 
2256ac4c3f34SJoe Thornber 	return 0;
2257ac4c3f34SJoe Thornber }
2258ac4c3f34SJoe Thornber 
sort_cells(struct pool * pool,struct list_head * cells)225986a3238cSHeinz Mauelshagen static unsigned int sort_cells(struct pool *pool, struct list_head *cells)
2260ac4c3f34SJoe Thornber {
226186a3238cSHeinz Mauelshagen 	unsigned int count = 0;
2262ac4c3f34SJoe Thornber 	struct dm_bio_prison_cell *cell, *tmp;
2263ac4c3f34SJoe Thornber 
2264ac4c3f34SJoe Thornber 	list_for_each_entry_safe(cell, tmp, cells, user_list) {
2265ac4c3f34SJoe Thornber 		if (count >= CELL_SORT_ARRAY_SIZE)
2266ac4c3f34SJoe Thornber 			break;
2267ac4c3f34SJoe Thornber 
2268ac4c3f34SJoe Thornber 		pool->cell_sort_array[count++] = cell;
2269ac4c3f34SJoe Thornber 		list_del(&cell->user_list);
2270ac4c3f34SJoe Thornber 	}
2271ac4c3f34SJoe Thornber 
2272ac4c3f34SJoe Thornber 	sort(pool->cell_sort_array, count, sizeof(cell), cmp_cells, NULL);
2273ac4c3f34SJoe Thornber 
2274ac4c3f34SJoe Thornber 	return count;
2275ac4c3f34SJoe Thornber }
2276ac4c3f34SJoe Thornber 
process_thin_deferred_cells(struct thin_c * tc)2277a374bb21SJoe Thornber static void process_thin_deferred_cells(struct thin_c *tc)
2278a374bb21SJoe Thornber {
2279a374bb21SJoe Thornber 	struct pool *pool = tc->pool;
2280a374bb21SJoe Thornber 	struct list_head cells;
2281ac4c3f34SJoe Thornber 	struct dm_bio_prison_cell *cell;
228286a3238cSHeinz Mauelshagen 	unsigned int i, j, count;
2283a374bb21SJoe Thornber 
2284a374bb21SJoe Thornber 	INIT_LIST_HEAD(&cells);
2285a374bb21SJoe Thornber 
22868e0c9dacSMikulas Patocka 	spin_lock_irq(&tc->lock);
2287a374bb21SJoe Thornber 	list_splice_init(&tc->deferred_cells, &cells);
22888e0c9dacSMikulas Patocka 	spin_unlock_irq(&tc->lock);
2289a374bb21SJoe Thornber 
2290a374bb21SJoe Thornber 	if (list_empty(&cells))
2291a374bb21SJoe Thornber 		return;
2292a374bb21SJoe Thornber 
2293ac4c3f34SJoe Thornber 	do {
2294ac4c3f34SJoe Thornber 		count = sort_cells(tc->pool, &cells);
2295ac4c3f34SJoe Thornber 
2296ac4c3f34SJoe Thornber 		for (i = 0; i < count; i++) {
2297ac4c3f34SJoe Thornber 			cell = pool->cell_sort_array[i];
2298a374bb21SJoe Thornber 			BUG_ON(!cell->holder);
2299a374bb21SJoe Thornber 
2300a374bb21SJoe Thornber 			/*
2301a374bb21SJoe Thornber 			 * If we've got no free new_mapping structs, and processing
2302a374bb21SJoe Thornber 			 * this bio might require one, we pause until there are some
2303a374bb21SJoe Thornber 			 * prepared mappings to process.
2304a374bb21SJoe Thornber 			 */
2305a374bb21SJoe Thornber 			if (ensure_next_mapping(pool)) {
2306ac4c3f34SJoe Thornber 				for (j = i; j < count; j++)
2307ac4c3f34SJoe Thornber 					list_add(&pool->cell_sort_array[j]->user_list, &cells);
2308ac4c3f34SJoe Thornber 
23098e0c9dacSMikulas Patocka 				spin_lock_irq(&tc->lock);
2310a374bb21SJoe Thornber 				list_splice(&cells, &tc->deferred_cells);
23118e0c9dacSMikulas Patocka 				spin_unlock_irq(&tc->lock);
2312ac4c3f34SJoe Thornber 				return;
2313a374bb21SJoe Thornber 			}
2314a374bb21SJoe Thornber 
2315e6047149SMike Christie 			if (bio_op(cell->holder) == REQ_OP_DISCARD)
2316a374bb21SJoe Thornber 				pool->process_discard_cell(tc, cell);
2317a374bb21SJoe Thornber 			else
2318a374bb21SJoe Thornber 				pool->process_cell(tc, cell);
2319a374bb21SJoe Thornber 		}
2320e4f80303SMike Snitzer 		cond_resched();
2321ac4c3f34SJoe Thornber 	} while (!list_empty(&cells));
2322a374bb21SJoe Thornber }
2323a374bb21SJoe Thornber 
2324b10ebd34SJoe Thornber static void thin_get(struct thin_c *tc);
2325b10ebd34SJoe Thornber static void thin_put(struct thin_c *tc);
2326b10ebd34SJoe Thornber 
2327b10ebd34SJoe Thornber /*
2328b10ebd34SJoe Thornber  * We can't hold rcu_read_lock() around code that can block.  So we
2329b10ebd34SJoe Thornber  * find a thin with the rcu lock held; bump a refcount; then drop
2330b10ebd34SJoe Thornber  * the lock.
2331b10ebd34SJoe Thornber  */
get_first_thin(struct pool * pool)2332b10ebd34SJoe Thornber static struct thin_c *get_first_thin(struct pool *pool)
2333b10ebd34SJoe Thornber {
2334b10ebd34SJoe Thornber 	struct thin_c *tc = NULL;
2335b10ebd34SJoe Thornber 
2336b10ebd34SJoe Thornber 	rcu_read_lock();
2337*6b305e98SKrister Johansen 	tc = list_first_or_null_rcu(&pool->active_thins, struct thin_c, list);
2338*6b305e98SKrister Johansen 	if (tc)
2339b10ebd34SJoe Thornber 		thin_get(tc);
2340b10ebd34SJoe Thornber 	rcu_read_unlock();
2341b10ebd34SJoe Thornber 
2342b10ebd34SJoe Thornber 	return tc;
2343b10ebd34SJoe Thornber }
2344b10ebd34SJoe Thornber 
get_next_thin(struct pool * pool,struct thin_c * tc)2345b10ebd34SJoe Thornber static struct thin_c *get_next_thin(struct pool *pool, struct thin_c *tc)
2346b10ebd34SJoe Thornber {
2347b10ebd34SJoe Thornber 	struct thin_c *old_tc = tc;
2348b10ebd34SJoe Thornber 
2349b10ebd34SJoe Thornber 	rcu_read_lock();
2350b10ebd34SJoe Thornber 	list_for_each_entry_continue_rcu(tc, &pool->active_thins, list) {
2351b10ebd34SJoe Thornber 		thin_get(tc);
2352b10ebd34SJoe Thornber 		thin_put(old_tc);
2353b10ebd34SJoe Thornber 		rcu_read_unlock();
2354b10ebd34SJoe Thornber 		return tc;
2355b10ebd34SJoe Thornber 	}
2356b10ebd34SJoe Thornber 	thin_put(old_tc);
2357b10ebd34SJoe Thornber 	rcu_read_unlock();
2358b10ebd34SJoe Thornber 
2359b10ebd34SJoe Thornber 	return NULL;
2360b10ebd34SJoe Thornber }
2361b10ebd34SJoe Thornber 
process_deferred_bios(struct pool * pool)2362c140e1c4SMike Snitzer static void process_deferred_bios(struct pool *pool)
2363c140e1c4SMike Snitzer {
2364c140e1c4SMike Snitzer 	struct bio *bio;
23654ae280b4SNikos Tsironis 	struct bio_list bios, bio_completions;
2366c140e1c4SMike Snitzer 	struct thin_c *tc;
2367c140e1c4SMike Snitzer 
2368b10ebd34SJoe Thornber 	tc = get_first_thin(pool);
2369b10ebd34SJoe Thornber 	while (tc) {
2370a374bb21SJoe Thornber 		process_thin_deferred_cells(tc);
2371c140e1c4SMike Snitzer 		process_thin_deferred_bios(tc);
2372b10ebd34SJoe Thornber 		tc = get_next_thin(pool, tc);
2373b10ebd34SJoe Thornber 	}
2374991d9fa0SJoe Thornber 
2375991d9fa0SJoe Thornber 	/*
23764ae280b4SNikos Tsironis 	 * If there are any deferred flush bios, we must commit the metadata
23774ae280b4SNikos Tsironis 	 * before issuing them or signaling their completion.
2378991d9fa0SJoe Thornber 	 */
2379991d9fa0SJoe Thornber 	bio_list_init(&bios);
23804ae280b4SNikos Tsironis 	bio_list_init(&bio_completions);
23814ae280b4SNikos Tsironis 
23828e0c9dacSMikulas Patocka 	spin_lock_irq(&pool->lock);
2383991d9fa0SJoe Thornber 	bio_list_merge(&bios, &pool->deferred_flush_bios);
2384991d9fa0SJoe Thornber 	bio_list_init(&pool->deferred_flush_bios);
23854ae280b4SNikos Tsironis 
23864ae280b4SNikos Tsironis 	bio_list_merge(&bio_completions, &pool->deferred_flush_completions);
23874ae280b4SNikos Tsironis 	bio_list_init(&pool->deferred_flush_completions);
23888e0c9dacSMikulas Patocka 	spin_unlock_irq(&pool->lock);
2389991d9fa0SJoe Thornber 
23904ae280b4SNikos Tsironis 	if (bio_list_empty(&bios) && bio_list_empty(&bio_completions) &&
23914d1662a3SMike Snitzer 	    !(dm_pool_changed_this_transaction(pool->pmd) && need_commit_due_to_time(pool)))
2392991d9fa0SJoe Thornber 		return;
2393991d9fa0SJoe Thornber 
2394020cc3b5SJoe Thornber 	if (commit(pool)) {
23954ae280b4SNikos Tsironis 		bio_list_merge(&bios, &bio_completions);
23964ae280b4SNikos Tsironis 
2397991d9fa0SJoe Thornber 		while ((bio = bio_list_pop(&bios)))
2398991d9fa0SJoe Thornber 			bio_io_error(bio);
2399991d9fa0SJoe Thornber 		return;
2400991d9fa0SJoe Thornber 	}
2401905e51b3SJoe Thornber 	pool->last_commit_jiffies = jiffies;
2402991d9fa0SJoe Thornber 
24034ae280b4SNikos Tsironis 	while ((bio = bio_list_pop(&bio_completions)))
24044ae280b4SNikos Tsironis 		bio_endio(bio);
24054ae280b4SNikos Tsironis 
2406694cfe7fSNikos Tsironis 	while ((bio = bio_list_pop(&bios))) {
2407694cfe7fSNikos Tsironis 		/*
2408694cfe7fSNikos Tsironis 		 * The data device was flushed as part of metadata commit,
2409694cfe7fSNikos Tsironis 		 * so complete redundant flushes immediately.
2410694cfe7fSNikos Tsironis 		 */
2411694cfe7fSNikos Tsironis 		if (bio->bi_opf & REQ_PREFLUSH)
2412694cfe7fSNikos Tsironis 			bio_endio(bio);
2413694cfe7fSNikos Tsironis 		else
2414b7f8dff0SMike Snitzer 			dm_submit_bio_remap(bio, NULL);
2415991d9fa0SJoe Thornber 	}
2416694cfe7fSNikos Tsironis }
2417991d9fa0SJoe Thornber 
do_worker(struct work_struct * ws)2418991d9fa0SJoe Thornber static void do_worker(struct work_struct *ws)
2419991d9fa0SJoe Thornber {
2420991d9fa0SJoe Thornber 	struct pool *pool = container_of(ws, struct pool, worker);
2421991d9fa0SJoe Thornber 
24227d327fe0SJoe Thornber 	throttle_work_start(&pool->throttle);
24238a01a6afSJoe Thornber 	dm_pool_issue_prefetches(pool->pmd);
24247d327fe0SJoe Thornber 	throttle_work_update(&pool->throttle);
2425e49e5829SJoe Thornber 	process_prepared(pool, &pool->prepared_mappings, &pool->process_prepared_mapping);
24267d327fe0SJoe Thornber 	throttle_work_update(&pool->throttle);
2427e49e5829SJoe Thornber 	process_prepared(pool, &pool->prepared_discards, &pool->process_prepared_discard);
24287d327fe0SJoe Thornber 	throttle_work_update(&pool->throttle);
24292a0fbffbSJoe Thornber 	process_prepared(pool, &pool->prepared_discards_pt2, &pool->process_prepared_discard_pt2);
24302a0fbffbSJoe Thornber 	throttle_work_update(&pool->throttle);
2431991d9fa0SJoe Thornber 	process_deferred_bios(pool);
24327d327fe0SJoe Thornber 	throttle_work_complete(&pool->throttle);
2433991d9fa0SJoe Thornber }
2434991d9fa0SJoe Thornber 
2435905e51b3SJoe Thornber /*
2436905e51b3SJoe Thornber  * We want to commit periodically so that not too much
2437905e51b3SJoe Thornber  * unwritten data builds up.
2438905e51b3SJoe Thornber  */
do_waker(struct work_struct * ws)2439905e51b3SJoe Thornber static void do_waker(struct work_struct *ws)
2440905e51b3SJoe Thornber {
2441905e51b3SJoe Thornber 	struct pool *pool = container_of(to_delayed_work(ws), struct pool, waker);
24420ef0b471SHeinz Mauelshagen 
2443905e51b3SJoe Thornber 	wake_worker(pool);
2444905e51b3SJoe Thornber 	queue_delayed_work(pool->wq, &pool->waker, COMMIT_PERIOD);
2445905e51b3SJoe Thornber }
2446905e51b3SJoe Thornber 
244785ad643bSJoe Thornber /*
244885ad643bSJoe Thornber  * We're holding onto IO to allow userland time to react.  After the
244985ad643bSJoe Thornber  * timeout either the pool will have been resized (and thus back in
2450bcc696faSMike Snitzer  * PM_WRITE mode), or we degrade to PM_OUT_OF_DATA_SPACE w/ error_if_no_space.
245185ad643bSJoe Thornber  */
do_no_space_timeout(struct work_struct * ws)245285ad643bSJoe Thornber static void do_no_space_timeout(struct work_struct *ws)
245385ad643bSJoe Thornber {
245485ad643bSJoe Thornber 	struct pool *pool = container_of(to_delayed_work(ws), struct pool,
245585ad643bSJoe Thornber 					 no_space_timeout);
245685ad643bSJoe Thornber 
2457bcc696faSMike Snitzer 	if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space) {
2458bcc696faSMike Snitzer 		pool->pf.error_if_no_space = true;
2459f6c36758SMike Snitzer 		notify_of_pool_mode_change(pool);
24604e4cbee9SChristoph Hellwig 		error_retry_list_with_code(pool, BLK_STS_NOSPC);
2461bcc696faSMike Snitzer 	}
246285ad643bSJoe Thornber }
246385ad643bSJoe Thornber 
2464991d9fa0SJoe Thornber /*----------------------------------------------------------------*/
2465991d9fa0SJoe Thornber 
2466e7a3e871SJoe Thornber struct pool_work {
2467738211f7SJoe Thornber 	struct work_struct worker;
2468e7a3e871SJoe Thornber 	struct completion complete;
2469738211f7SJoe Thornber };
2470738211f7SJoe Thornber 
to_pool_work(struct work_struct * ws)2471e7a3e871SJoe Thornber static struct pool_work *to_pool_work(struct work_struct *ws)
2472738211f7SJoe Thornber {
2473e7a3e871SJoe Thornber 	return container_of(ws, struct pool_work, worker);
2474e7a3e871SJoe Thornber }
2475e7a3e871SJoe Thornber 
pool_work_complete(struct pool_work * pw)2476e7a3e871SJoe Thornber static void pool_work_complete(struct pool_work *pw)
2477e7a3e871SJoe Thornber {
2478e7a3e871SJoe Thornber 	complete(&pw->complete);
2479e7a3e871SJoe Thornber }
2480e7a3e871SJoe Thornber 
pool_work_wait(struct pool_work * pw,struct pool * pool,void (* fn)(struct work_struct *))2481e7a3e871SJoe Thornber static void pool_work_wait(struct pool_work *pw, struct pool *pool,
2482e7a3e871SJoe Thornber 			   void (*fn)(struct work_struct *))
2483e7a3e871SJoe Thornber {
2484e7a3e871SJoe Thornber 	INIT_WORK_ONSTACK(&pw->worker, fn);
2485e7a3e871SJoe Thornber 	init_completion(&pw->complete);
2486e7a3e871SJoe Thornber 	queue_work(pool->wq, &pw->worker);
2487e7a3e871SJoe Thornber 	wait_for_completion(&pw->complete);
24886f433923SYuan Can 	destroy_work_on_stack(&pw->worker);
2489e7a3e871SJoe Thornber }
2490e7a3e871SJoe Thornber 
2491e7a3e871SJoe Thornber /*----------------------------------------------------------------*/
2492e7a3e871SJoe Thornber 
2493e7a3e871SJoe Thornber struct noflush_work {
2494e7a3e871SJoe Thornber 	struct pool_work pw;
2495e7a3e871SJoe Thornber 	struct thin_c *tc;
2496e7a3e871SJoe Thornber };
2497e7a3e871SJoe Thornber 
to_noflush(struct work_struct * ws)2498e7a3e871SJoe Thornber static struct noflush_work *to_noflush(struct work_struct *ws)
2499e7a3e871SJoe Thornber {
2500e7a3e871SJoe Thornber 	return container_of(to_pool_work(ws), struct noflush_work, pw);
2501738211f7SJoe Thornber }
2502738211f7SJoe Thornber 
do_noflush_start(struct work_struct * ws)2503738211f7SJoe Thornber static void do_noflush_start(struct work_struct *ws)
2504738211f7SJoe Thornber {
2505e7a3e871SJoe Thornber 	struct noflush_work *w = to_noflush(ws);
25060ef0b471SHeinz Mauelshagen 
2507738211f7SJoe Thornber 	w->tc->requeue_mode = true;
2508738211f7SJoe Thornber 	requeue_io(w->tc);
2509e7a3e871SJoe Thornber 	pool_work_complete(&w->pw);
2510738211f7SJoe Thornber }
2511738211f7SJoe Thornber 
do_noflush_stop(struct work_struct * ws)2512738211f7SJoe Thornber static void do_noflush_stop(struct work_struct *ws)
2513738211f7SJoe Thornber {
2514e7a3e871SJoe Thornber 	struct noflush_work *w = to_noflush(ws);
25150ef0b471SHeinz Mauelshagen 
2516738211f7SJoe Thornber 	w->tc->requeue_mode = false;
2517e7a3e871SJoe Thornber 	pool_work_complete(&w->pw);
2518738211f7SJoe Thornber }
2519738211f7SJoe Thornber 
noflush_work(struct thin_c * tc,void (* fn)(struct work_struct *))2520738211f7SJoe Thornber static void noflush_work(struct thin_c *tc, void (*fn)(struct work_struct *))
2521738211f7SJoe Thornber {
2522738211f7SJoe Thornber 	struct noflush_work w;
2523738211f7SJoe Thornber 
2524738211f7SJoe Thornber 	w.tc = tc;
2525e7a3e871SJoe Thornber 	pool_work_wait(&w.pw, tc->pool, fn);
2526738211f7SJoe Thornber }
2527738211f7SJoe Thornber 
2528738211f7SJoe Thornber /*----------------------------------------------------------------*/
2529738211f7SJoe Thornber 
set_discard_callbacks(struct pool * pool)253034fbcf62SJoe Thornber static void set_discard_callbacks(struct pool *pool)
253134fbcf62SJoe Thornber {
253234fbcf62SJoe Thornber 	struct pool_c *pt = pool->ti->private;
253334fbcf62SJoe Thornber 
2534fa375646SMike Snitzer 	if (pt->adjusted_pf.discard_passdown) {
253534fbcf62SJoe Thornber 		pool->process_discard_cell = process_discard_cell_passdown;
25362a0fbffbSJoe Thornber 		pool->process_prepared_discard = process_prepared_discard_passdown_pt1;
25372a0fbffbSJoe Thornber 		pool->process_prepared_discard_pt2 = process_prepared_discard_passdown_pt2;
253834fbcf62SJoe Thornber 	} else {
253934fbcf62SJoe Thornber 		pool->process_discard_cell = process_discard_cell_no_passdown;
254034fbcf62SJoe Thornber 		pool->process_prepared_discard = process_prepared_discard_no_passdown;
254134fbcf62SJoe Thornber 	}
254234fbcf62SJoe Thornber }
254334fbcf62SJoe Thornber 
set_pool_mode(struct pool * pool,enum pool_mode new_mode)25448b64e881SMike Snitzer static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
2545e49e5829SJoe Thornber {
2546cdc2b415SMike Snitzer 	struct pool_c *pt = pool->ti->private;
254707f2b6e0SMike Snitzer 	bool needs_check = dm_pool_metadata_needs_check(pool->pmd);
254807f2b6e0SMike Snitzer 	enum pool_mode old_mode = get_pool_mode(pool);
25496aa7de05SMark Rutland 	unsigned long no_space_timeout = READ_ONCE(no_space_timeout_secs) * HZ;
255007f2b6e0SMike Snitzer 
255107f2b6e0SMike Snitzer 	/*
255207f2b6e0SMike Snitzer 	 * Never allow the pool to transition to PM_WRITE mode if user
255307f2b6e0SMike Snitzer 	 * intervention is required to verify metadata and data consistency.
255407f2b6e0SMike Snitzer 	 */
255507f2b6e0SMike Snitzer 	if (new_mode == PM_WRITE && needs_check) {
255607f2b6e0SMike Snitzer 		DMERR("%s: unable to switch pool to write mode until repaired.",
255707f2b6e0SMike Snitzer 		      dm_device_name(pool->pool_md));
255807f2b6e0SMike Snitzer 		if (old_mode != new_mode)
255907f2b6e0SMike Snitzer 			new_mode = old_mode;
256007f2b6e0SMike Snitzer 		else
256107f2b6e0SMike Snitzer 			new_mode = PM_READ_ONLY;
256207f2b6e0SMike Snitzer 	}
256307f2b6e0SMike Snitzer 	/*
256407f2b6e0SMike Snitzer 	 * If we were in PM_FAIL mode, rollback of metadata failed.  We're
256507f2b6e0SMike Snitzer 	 * not going to recover without a thin_repair.	So we never let the
256607f2b6e0SMike Snitzer 	 * pool move out of the old mode.
256707f2b6e0SMike Snitzer 	 */
256807f2b6e0SMike Snitzer 	if (old_mode == PM_FAIL)
256907f2b6e0SMike Snitzer 		new_mode = old_mode;
2570e49e5829SJoe Thornber 
25718b64e881SMike Snitzer 	switch (new_mode) {
2572e49e5829SJoe Thornber 	case PM_FAIL:
25735383ef3aSJoe Thornber 		dm_pool_metadata_read_only(pool->pmd);
2574e49e5829SJoe Thornber 		pool->process_bio = process_bio_fail;
2575e49e5829SJoe Thornber 		pool->process_discard = process_bio_fail;
2576a374bb21SJoe Thornber 		pool->process_cell = process_cell_fail;
2577a374bb21SJoe Thornber 		pool->process_discard_cell = process_cell_fail;
2578e49e5829SJoe Thornber 		pool->process_prepared_mapping = process_prepared_mapping_fail;
2579e49e5829SJoe Thornber 		pool->process_prepared_discard = process_prepared_discard_fail;
25803e1a0699SJoe Thornber 
25813e1a0699SJoe Thornber 		error_retry_list(pool);
2582e49e5829SJoe Thornber 		break;
2583e49e5829SJoe Thornber 
25843ab91828SJoe Thornber 	case PM_OUT_OF_METADATA_SPACE:
2585e49e5829SJoe Thornber 	case PM_READ_ONLY:
2586e49e5829SJoe Thornber 		dm_pool_metadata_read_only(pool->pmd);
2587e49e5829SJoe Thornber 		pool->process_bio = process_bio_read_only;
25883e1a0699SJoe Thornber 		pool->process_discard = process_bio_success;
2589a374bb21SJoe Thornber 		pool->process_cell = process_cell_read_only;
2590a374bb21SJoe Thornber 		pool->process_discard_cell = process_cell_success;
2591e49e5829SJoe Thornber 		pool->process_prepared_mapping = process_prepared_mapping_fail;
259234fbcf62SJoe Thornber 		pool->process_prepared_discard = process_prepared_discard_success;
25933e1a0699SJoe Thornber 
25943e1a0699SJoe Thornber 		error_retry_list(pool);
25953e1a0699SJoe Thornber 		break;
25963e1a0699SJoe Thornber 
25973e1a0699SJoe Thornber 	case PM_OUT_OF_DATA_SPACE:
25983e1a0699SJoe Thornber 		/*
25993e1a0699SJoe Thornber 		 * Ideally we'd never hit this state; the low water mark
26003e1a0699SJoe Thornber 		 * would trigger userland to extend the pool before we
26013e1a0699SJoe Thornber 		 * completely run out of data space.  However, many small
26023e1a0699SJoe Thornber 		 * IOs to unprovisioned space can consume data space at an
26033e1a0699SJoe Thornber 		 * alarming rate.  Adjust your low water mark if you're
26043e1a0699SJoe Thornber 		 * frequently seeing this mode.
26053e1a0699SJoe Thornber 		 */
2606c3667cc6SMike Snitzer 		pool->out_of_data_space = true;
26073e1a0699SJoe Thornber 		pool->process_bio = process_bio_read_only;
2608a374bb21SJoe Thornber 		pool->process_discard = process_discard_bio;
2609a374bb21SJoe Thornber 		pool->process_cell = process_cell_read_only;
26103e1a0699SJoe Thornber 		pool->process_prepared_mapping = process_prepared_mapping;
261134fbcf62SJoe Thornber 		set_discard_callbacks(pool);
261285ad643bSJoe Thornber 
261380c57893SMike Snitzer 		if (!pool->pf.error_if_no_space && no_space_timeout)
261480c57893SMike Snitzer 			queue_delayed_work(pool->wq, &pool->no_space_timeout, no_space_timeout);
2615e49e5829SJoe Thornber 		break;
2616e49e5829SJoe Thornber 
2617e49e5829SJoe Thornber 	case PM_WRITE:
261875294442SHou Tao 		if (old_mode == PM_OUT_OF_DATA_SPACE)
261975294442SHou Tao 			cancel_delayed_work_sync(&pool->no_space_timeout);
2620c3667cc6SMike Snitzer 		pool->out_of_data_space = false;
2621172c2386SMike Snitzer 		pool->pf.error_if_no_space = pt->requested_pf.error_if_no_space;
26229b7aaa64SJoe Thornber 		dm_pool_metadata_read_write(pool->pmd);
2623e49e5829SJoe Thornber 		pool->process_bio = process_bio;
2624a374bb21SJoe Thornber 		pool->process_discard = process_discard_bio;
2625a374bb21SJoe Thornber 		pool->process_cell = process_cell;
2626e49e5829SJoe Thornber 		pool->process_prepared_mapping = process_prepared_mapping;
262734fbcf62SJoe Thornber 		set_discard_callbacks(pool);
2628e49e5829SJoe Thornber 		break;
2629e49e5829SJoe Thornber 	}
26308b64e881SMike Snitzer 
26318b64e881SMike Snitzer 	pool->pf.mode = new_mode;
2632cdc2b415SMike Snitzer 	/*
2633cdc2b415SMike Snitzer 	 * The pool mode may have changed, sync it so bind_control_target()
2634cdc2b415SMike Snitzer 	 * doesn't cause an unexpected mode transition on resume.
2635cdc2b415SMike Snitzer 	 */
2636cdc2b415SMike Snitzer 	pt->adjusted_pf.mode = new_mode;
2637f6c36758SMike Snitzer 
2638f6c36758SMike Snitzer 	if (old_mode != new_mode)
2639f6c36758SMike Snitzer 		notify_of_pool_mode_change(pool);
2640e49e5829SJoe Thornber }
2641e49e5829SJoe Thornber 
abort_transaction(struct pool * pool)264207f2b6e0SMike Snitzer static void abort_transaction(struct pool *pool)
264307f2b6e0SMike Snitzer {
264407f2b6e0SMike Snitzer 	const char *dev_name = dm_device_name(pool->pool_md);
264507f2b6e0SMike Snitzer 
264607f2b6e0SMike Snitzer 	DMERR_LIMIT("%s: aborting current metadata transaction", dev_name);
264707f2b6e0SMike Snitzer 	if (dm_pool_abort_metadata(pool->pmd)) {
264807f2b6e0SMike Snitzer 		DMERR("%s: failed to abort metadata transaction", dev_name);
264907f2b6e0SMike Snitzer 		set_pool_mode(pool, PM_FAIL);
265007f2b6e0SMike Snitzer 	}
265107f2b6e0SMike Snitzer 
265207f2b6e0SMike Snitzer 	if (dm_pool_metadata_set_needs_check(pool->pmd)) {
265307f2b6e0SMike Snitzer 		DMERR("%s: failed to set 'needs_check' flag in metadata", dev_name);
265407f2b6e0SMike Snitzer 		set_pool_mode(pool, PM_FAIL);
265507f2b6e0SMike Snitzer 	}
265607f2b6e0SMike Snitzer }
265707f2b6e0SMike Snitzer 
metadata_operation_failed(struct pool * pool,const char * op,int r)2658b5330655SJoe Thornber static void metadata_operation_failed(struct pool *pool, const char *op, int r)
2659b5330655SJoe Thornber {
2660b5330655SJoe Thornber 	DMERR_LIMIT("%s: metadata operation '%s' failed: error = %d",
2661b5330655SJoe Thornber 		    dm_device_name(pool->pool_md), op, r);
2662b5330655SJoe Thornber 
266307f2b6e0SMike Snitzer 	abort_transaction(pool);
2664b5330655SJoe Thornber 	set_pool_mode(pool, PM_READ_ONLY);
2665b5330655SJoe Thornber }
2666b5330655SJoe Thornber 
2667e49e5829SJoe Thornber /*----------------------------------------------------------------*/
2668e49e5829SJoe Thornber 
2669991d9fa0SJoe Thornber /*
2670991d9fa0SJoe Thornber  * Mapping functions.
2671991d9fa0SJoe Thornber  */
2672991d9fa0SJoe Thornber 
2673991d9fa0SJoe Thornber /*
2674991d9fa0SJoe Thornber  * Called only while mapping a thin bio to hand it over to the workqueue.
2675991d9fa0SJoe Thornber  */
thin_defer_bio(struct thin_c * tc,struct bio * bio)2676991d9fa0SJoe Thornber static void thin_defer_bio(struct thin_c *tc, struct bio *bio)
2677991d9fa0SJoe Thornber {
2678991d9fa0SJoe Thornber 	struct pool *pool = tc->pool;
2679991d9fa0SJoe Thornber 
26808e0c9dacSMikulas Patocka 	spin_lock_irq(&tc->lock);
2681c140e1c4SMike Snitzer 	bio_list_add(&tc->deferred_bio_list, bio);
26828e0c9dacSMikulas Patocka 	spin_unlock_irq(&tc->lock);
2683991d9fa0SJoe Thornber 
2684991d9fa0SJoe Thornber 	wake_worker(pool);
2685991d9fa0SJoe Thornber }
2686991d9fa0SJoe Thornber 
thin_defer_bio_with_throttle(struct thin_c * tc,struct bio * bio)26877d327fe0SJoe Thornber static void thin_defer_bio_with_throttle(struct thin_c *tc, struct bio *bio)
26887d327fe0SJoe Thornber {
26897d327fe0SJoe Thornber 	struct pool *pool = tc->pool;
26907d327fe0SJoe Thornber 
26917d327fe0SJoe Thornber 	throttle_lock(&pool->throttle);
26927d327fe0SJoe Thornber 	thin_defer_bio(tc, bio);
26937d327fe0SJoe Thornber 	throttle_unlock(&pool->throttle);
26947d327fe0SJoe Thornber }
26957d327fe0SJoe Thornber 
thin_defer_cell(struct thin_c * tc,struct dm_bio_prison_cell * cell)2696a374bb21SJoe Thornber static void thin_defer_cell(struct thin_c *tc, struct dm_bio_prison_cell *cell)
2697a374bb21SJoe Thornber {
2698a374bb21SJoe Thornber 	struct pool *pool = tc->pool;
2699a374bb21SJoe Thornber 
2700a374bb21SJoe Thornber 	throttle_lock(&pool->throttle);
27018e0c9dacSMikulas Patocka 	spin_lock_irq(&tc->lock);
2702a374bb21SJoe Thornber 	list_add_tail(&cell->user_list, &tc->deferred_cells);
27038e0c9dacSMikulas Patocka 	spin_unlock_irq(&tc->lock);
2704a374bb21SJoe Thornber 	throttle_unlock(&pool->throttle);
2705a374bb21SJoe Thornber 
2706a374bb21SJoe Thornber 	wake_worker(pool);
2707a374bb21SJoe Thornber }
2708a374bb21SJoe Thornber 
thin_hook_bio(struct thin_c * tc,struct bio * bio)270959c3d2c6SMikulas Patocka static void thin_hook_bio(struct thin_c *tc, struct bio *bio)
2710eb2aa48dSJoe Thornber {
271159c3d2c6SMikulas Patocka 	struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
2712eb2aa48dSJoe Thornber 
2713eb2aa48dSJoe Thornber 	h->tc = tc;
2714eb2aa48dSJoe Thornber 	h->shared_read_entry = NULL;
2715e8088073SJoe Thornber 	h->all_io_entry = NULL;
2716eb2aa48dSJoe Thornber 	h->overwrite_mapping = NULL;
271734fbcf62SJoe Thornber 	h->cell = NULL;
2718eb2aa48dSJoe Thornber }
2719eb2aa48dSJoe Thornber 
2720991d9fa0SJoe Thornber /*
2721991d9fa0SJoe Thornber  * Non-blocking function called from the thin target's map function.
2722991d9fa0SJoe Thornber  */
thin_bio_map(struct dm_target * ti,struct bio * bio)27237de3ee57SMikulas Patocka static int thin_bio_map(struct dm_target *ti, struct bio *bio)
2724991d9fa0SJoe Thornber {
2725991d9fa0SJoe Thornber 	int r;
2726991d9fa0SJoe Thornber 	struct thin_c *tc = ti->private;
2727991d9fa0SJoe Thornber 	dm_block_t block = get_bio_block(tc, bio);
2728991d9fa0SJoe Thornber 	struct dm_thin_device *td = tc->td;
2729991d9fa0SJoe Thornber 	struct dm_thin_lookup_result result;
2730a374bb21SJoe Thornber 	struct dm_bio_prison_cell *virt_cell, *data_cell;
2731e8088073SJoe Thornber 	struct dm_cell_key key;
2732991d9fa0SJoe Thornber 
273359c3d2c6SMikulas Patocka 	thin_hook_bio(tc, bio);
2734e49e5829SJoe Thornber 
2735738211f7SJoe Thornber 	if (tc->requeue_mode) {
27364e4cbee9SChristoph Hellwig 		bio->bi_status = BLK_STS_DM_REQUEUE;
27374246a0b6SChristoph Hellwig 		bio_endio(bio);
2738738211f7SJoe Thornber 		return DM_MAPIO_SUBMITTED;
2739738211f7SJoe Thornber 	}
2740738211f7SJoe Thornber 
2741e49e5829SJoe Thornber 	if (get_pool_mode(tc->pool) == PM_FAIL) {
2742e49e5829SJoe Thornber 		bio_io_error(bio);
2743e49e5829SJoe Thornber 		return DM_MAPIO_SUBMITTED;
2744e49e5829SJoe Thornber 	}
2745e49e5829SJoe Thornber 
2746f73f44ebSChristoph Hellwig 	if (op_is_flush(bio->bi_opf) || bio_op(bio) == REQ_OP_DISCARD) {
27477d327fe0SJoe Thornber 		thin_defer_bio_with_throttle(tc, bio);
2748991d9fa0SJoe Thornber 		return DM_MAPIO_SUBMITTED;
2749991d9fa0SJoe Thornber 	}
2750991d9fa0SJoe Thornber 
2751c822ed96SJoe Thornber 	/*
2752c822ed96SJoe Thornber 	 * We must hold the virtual cell before doing the lookup, otherwise
2753c822ed96SJoe Thornber 	 * there's a race with discard.
2754c822ed96SJoe Thornber 	 */
2755c822ed96SJoe Thornber 	build_virtual_key(tc->td, block, &key);
2756a374bb21SJoe Thornber 	if (bio_detain(tc->pool, &key, bio, &virt_cell))
2757c822ed96SJoe Thornber 		return DM_MAPIO_SUBMITTED;
2758c822ed96SJoe Thornber 
2759991d9fa0SJoe Thornber 	r = dm_thin_find_block(td, block, 0, &result);
2760991d9fa0SJoe Thornber 
2761991d9fa0SJoe Thornber 	/*
2762991d9fa0SJoe Thornber 	 * Note that we defer readahead too.
2763991d9fa0SJoe Thornber 	 */
2764991d9fa0SJoe Thornber 	switch (r) {
2765991d9fa0SJoe Thornber 	case 0:
2766991d9fa0SJoe Thornber 		if (unlikely(result.shared)) {
2767991d9fa0SJoe Thornber 			/*
2768991d9fa0SJoe Thornber 			 * We have a race condition here between the
2769991d9fa0SJoe Thornber 			 * result.shared value returned by the lookup and
2770991d9fa0SJoe Thornber 			 * snapshot creation, which may cause new
2771991d9fa0SJoe Thornber 			 * sharing.
2772991d9fa0SJoe Thornber 			 *
2773991d9fa0SJoe Thornber 			 * To avoid this always quiesce the origin before
2774991d9fa0SJoe Thornber 			 * taking the snap.  You want to do this anyway to
2775991d9fa0SJoe Thornber 			 * ensure a consistent application view
2776991d9fa0SJoe Thornber 			 * (i.e. lockfs).
2777991d9fa0SJoe Thornber 			 *
2778991d9fa0SJoe Thornber 			 * More distant ancestors are irrelevant. The
2779991d9fa0SJoe Thornber 			 * shared flag will be set in their case.
2780991d9fa0SJoe Thornber 			 */
2781a374bb21SJoe Thornber 			thin_defer_cell(tc, virt_cell);
2782e8088073SJoe Thornber 			return DM_MAPIO_SUBMITTED;
2783991d9fa0SJoe Thornber 		}
2784e8088073SJoe Thornber 
2785e8088073SJoe Thornber 		build_data_key(tc->td, result.block, &key);
2786a374bb21SJoe Thornber 		if (bio_detain(tc->pool, &key, bio, &data_cell)) {
2787a374bb21SJoe Thornber 			cell_defer_no_holder(tc, virt_cell);
2788e8088073SJoe Thornber 			return DM_MAPIO_SUBMITTED;
2789e8088073SJoe Thornber 		}
2790e8088073SJoe Thornber 
2791e8088073SJoe Thornber 		inc_all_io_entry(tc->pool, bio);
2792a374bb21SJoe Thornber 		cell_defer_no_holder(tc, data_cell);
2793a374bb21SJoe Thornber 		cell_defer_no_holder(tc, virt_cell);
2794e8088073SJoe Thornber 
2795e8088073SJoe Thornber 		remap(tc, bio, result.block);
2796e8088073SJoe Thornber 		return DM_MAPIO_REMAPPED;
2797991d9fa0SJoe Thornber 
2798991d9fa0SJoe Thornber 	case -ENODATA:
2799e49e5829SJoe Thornber 	case -EWOULDBLOCK:
2800a374bb21SJoe Thornber 		thin_defer_cell(tc, virt_cell);
28012aab3850SJoe Thornber 		return DM_MAPIO_SUBMITTED;
2802e49e5829SJoe Thornber 
2803e49e5829SJoe Thornber 	default:
2804e49e5829SJoe Thornber 		/*
2805e49e5829SJoe Thornber 		 * Must always call bio_io_error on failure.
2806e49e5829SJoe Thornber 		 * dm_thin_find_block can fail with -EINVAL if the
2807e49e5829SJoe Thornber 		 * pool is switched to fail-io mode.
2808e49e5829SJoe Thornber 		 */
2809e49e5829SJoe Thornber 		bio_io_error(bio);
2810a374bb21SJoe Thornber 		cell_defer_no_holder(tc, virt_cell);
28112aab3850SJoe Thornber 		return DM_MAPIO_SUBMITTED;
2812991d9fa0SJoe Thornber 	}
2813991d9fa0SJoe Thornber }
2814991d9fa0SJoe Thornber 
requeue_bios(struct pool * pool)2815c140e1c4SMike Snitzer static void requeue_bios(struct pool *pool)
2816991d9fa0SJoe Thornber {
2817c140e1c4SMike Snitzer 	struct thin_c *tc;
2818c140e1c4SMike Snitzer 
2819c140e1c4SMike Snitzer 	rcu_read_lock();
2820c140e1c4SMike Snitzer 	list_for_each_entry_rcu(tc, &pool->active_thins, list) {
28218e0c9dacSMikulas Patocka 		spin_lock_irq(&tc->lock);
2822c140e1c4SMike Snitzer 		bio_list_merge(&tc->deferred_bio_list, &tc->retry_on_resume_list);
2823c140e1c4SMike Snitzer 		bio_list_init(&tc->retry_on_resume_list);
28248e0c9dacSMikulas Patocka 		spin_unlock_irq(&tc->lock);
2825c140e1c4SMike Snitzer 	}
2826c140e1c4SMike Snitzer 	rcu_read_unlock();
2827991d9fa0SJoe Thornber }
2828991d9fa0SJoe Thornber 
2829a4a82ce3SHeinz Mauelshagen /*
2830a4a82ce3SHeinz Mauelshagen  *--------------------------------------------------------------
2831991d9fa0SJoe Thornber  * Binding of control targets to a pool object
2832a4a82ce3SHeinz Mauelshagen  *--------------------------------------------------------------
2833a4a82ce3SHeinz Mauelshagen  */
is_factor(sector_t block_size,uint32_t n)283458051b94SJoe Thornber static bool is_factor(sector_t block_size, uint32_t n)
283558051b94SJoe Thornber {
283658051b94SJoe Thornber 	return !sector_div(block_size, n);
283758051b94SJoe Thornber }
283858051b94SJoe Thornber 
28399bc142ddSMike Snitzer /*
28409bc142ddSMike Snitzer  * If discard_passdown was enabled verify that the data device
28410424caa1SMike Snitzer  * supports discards.  Disable discard_passdown if not.
28429bc142ddSMike Snitzer  */
disable_discard_passdown_if_not_supported(struct pool_c * pt)2843fa375646SMike Snitzer static void disable_discard_passdown_if_not_supported(struct pool_c *pt)
28449bc142ddSMike Snitzer {
28450424caa1SMike Snitzer 	struct pool *pool = pt->pool;
28460424caa1SMike Snitzer 	struct block_device *data_bdev = pt->data_dev->bdev;
28470424caa1SMike Snitzer 	struct queue_limits *data_limits = &bdev_get_queue(data_bdev)->limits;
28480424caa1SMike Snitzer 	const char *reason = NULL;
28499bc142ddSMike Snitzer 
28500424caa1SMike Snitzer 	if (!pt->adjusted_pf.discard_passdown)
28519bc142ddSMike Snitzer 		return;
28529bc142ddSMike Snitzer 
285370200574SChristoph Hellwig 	if (!bdev_max_discard_sectors(pt->data_dev->bdev))
28540424caa1SMike Snitzer 		reason = "discard unsupported";
28559bc142ddSMike Snitzer 
28560424caa1SMike Snitzer 	else if (data_limits->max_discard_sectors < pool->sectors_per_block)
28570424caa1SMike Snitzer 		reason = "max discard sectors smaller than a block";
28580424caa1SMike Snitzer 
28590424caa1SMike Snitzer 	if (reason) {
2860385411ffSChristoph Hellwig 		DMWARN("Data device (%pg) %s: Disabling discard passdown.", data_bdev, reason);
28610424caa1SMike Snitzer 		pt->adjusted_pf.discard_passdown = false;
28620424caa1SMike Snitzer 	}
28639bc142ddSMike Snitzer }
28649bc142ddSMike Snitzer 
bind_control_target(struct pool * pool,struct dm_target * ti)2865991d9fa0SJoe Thornber static int bind_control_target(struct pool *pool, struct dm_target *ti)
2866991d9fa0SJoe Thornber {
2867991d9fa0SJoe Thornber 	struct pool_c *pt = ti->private;
2868991d9fa0SJoe Thornber 
2869e49e5829SJoe Thornber 	/*
28709b7aaa64SJoe Thornber 	 * We want to make sure that a pool in PM_FAIL mode is never upgraded.
2871e49e5829SJoe Thornber 	 */
287207f2b6e0SMike Snitzer 	enum pool_mode old_mode = get_pool_mode(pool);
28730424caa1SMike Snitzer 	enum pool_mode new_mode = pt->adjusted_pf.mode;
2874e49e5829SJoe Thornber 
28759b7aaa64SJoe Thornber 	/*
28768b64e881SMike Snitzer 	 * Don't change the pool's mode until set_pool_mode() below.
28778b64e881SMike Snitzer 	 * Otherwise the pool's process_* function pointers may
28788b64e881SMike Snitzer 	 * not match the desired pool mode.
28798b64e881SMike Snitzer 	 */
28808b64e881SMike Snitzer 	pt->adjusted_pf.mode = old_mode;
28818b64e881SMike Snitzer 
28828b64e881SMike Snitzer 	pool->ti = ti;
28838b64e881SMike Snitzer 	pool->pf = pt->adjusted_pf;
28848b64e881SMike Snitzer 	pool->low_water_blocks = pt->low_water_blocks;
28858b64e881SMike Snitzer 
28869bc142ddSMike Snitzer 	set_pool_mode(pool, new_mode);
2887f402693dSMike Snitzer 
2888991d9fa0SJoe Thornber 	return 0;
2889991d9fa0SJoe Thornber }
2890991d9fa0SJoe Thornber 
unbind_control_target(struct pool * pool,struct dm_target * ti)2891991d9fa0SJoe Thornber static void unbind_control_target(struct pool *pool, struct dm_target *ti)
2892991d9fa0SJoe Thornber {
2893991d9fa0SJoe Thornber 	if (pool->ti == ti)
2894991d9fa0SJoe Thornber 		pool->ti = NULL;
2895991d9fa0SJoe Thornber }
2896991d9fa0SJoe Thornber 
2897a4a82ce3SHeinz Mauelshagen /*
2898a4a82ce3SHeinz Mauelshagen  *--------------------------------------------------------------
2899991d9fa0SJoe Thornber  * Pool creation
2900a4a82ce3SHeinz Mauelshagen  *--------------------------------------------------------------
2901a4a82ce3SHeinz Mauelshagen  */
290267e2e2b2SJoe Thornber /* Initialize pool features. */
pool_features_init(struct pool_features * pf)290367e2e2b2SJoe Thornber static void pool_features_init(struct pool_features *pf)
290467e2e2b2SJoe Thornber {
2905e49e5829SJoe Thornber 	pf->mode = PM_WRITE;
29069bc142ddSMike Snitzer 	pf->zero_new_blocks = true;
29079bc142ddSMike Snitzer 	pf->discard_enabled = true;
29089bc142ddSMike Snitzer 	pf->discard_passdown = true;
2909787a996cSMike Snitzer 	pf->error_if_no_space = false;
291067e2e2b2SJoe Thornber }
291167e2e2b2SJoe Thornber 
__pool_destroy(struct pool * pool)2912991d9fa0SJoe Thornber static void __pool_destroy(struct pool *pool)
2913991d9fa0SJoe Thornber {
2914991d9fa0SJoe Thornber 	__pool_table_remove(pool);
2915991d9fa0SJoe Thornber 
2916a822c83eSJoe Thornber 	vfree(pool->cell_sort_array);
2917991d9fa0SJoe Thornber 	if (dm_pool_metadata_close(pool->pmd) < 0)
2918991d9fa0SJoe Thornber 		DMWARN("%s: dm_pool_metadata_close() failed.", __func__);
2919991d9fa0SJoe Thornber 
292044feb387SMike Snitzer 	dm_bio_prison_destroy(pool->prison);
2921991d9fa0SJoe Thornber 	dm_kcopyd_client_destroy(pool->copier);
2922991d9fa0SJoe Thornber 
292388430ebcSLuo Meng 	cancel_delayed_work_sync(&pool->waker);
292488430ebcSLuo Meng 	cancel_delayed_work_sync(&pool->no_space_timeout);
2925991d9fa0SJoe Thornber 	if (pool->wq)
2926991d9fa0SJoe Thornber 		destroy_workqueue(pool->wq);
2927991d9fa0SJoe Thornber 
2928991d9fa0SJoe Thornber 	if (pool->next_mapping)
29296f1c819cSKent Overstreet 		mempool_free(pool->next_mapping, &pool->mapping_pool);
29306f1c819cSKent Overstreet 	mempool_exit(&pool->mapping_pool);
293144feb387SMike Snitzer 	dm_deferred_set_destroy(pool->shared_read_ds);
293244feb387SMike Snitzer 	dm_deferred_set_destroy(pool->all_io_ds);
2933991d9fa0SJoe Thornber 	kfree(pool);
2934991d9fa0SJoe Thornber }
2935991d9fa0SJoe Thornber 
2936a24c2569SMike Snitzer static struct kmem_cache *_new_mapping_cache;
2937a24c2569SMike Snitzer 
pool_create(struct mapped_device * pool_md,struct block_device * metadata_dev,struct block_device * data_dev,unsigned long block_size,int read_only,char ** error)2938991d9fa0SJoe Thornber static struct pool *pool_create(struct mapped_device *pool_md,
2939991d9fa0SJoe Thornber 				struct block_device *metadata_dev,
2940873937e7SMikulas Patocka 				struct block_device *data_dev,
2941e49e5829SJoe Thornber 				unsigned long block_size,
2942e49e5829SJoe Thornber 				int read_only, char **error)
2943991d9fa0SJoe Thornber {
2944991d9fa0SJoe Thornber 	int r;
2945991d9fa0SJoe Thornber 	void *err_p;
2946991d9fa0SJoe Thornber 	struct pool *pool;
2947991d9fa0SJoe Thornber 	struct dm_pool_metadata *pmd;
2948e49e5829SJoe Thornber 	bool format_device = read_only ? false : true;
2949991d9fa0SJoe Thornber 
2950e49e5829SJoe Thornber 	pmd = dm_pool_metadata_open(metadata_dev, block_size, format_device);
2951991d9fa0SJoe Thornber 	if (IS_ERR(pmd)) {
2952991d9fa0SJoe Thornber 		*error = "Error creating metadata object";
2953991d9fa0SJoe Thornber 		return (struct pool *)pmd;
2954991d9fa0SJoe Thornber 	}
2955991d9fa0SJoe Thornber 
2956d3775354SKent Overstreet 	pool = kzalloc(sizeof(*pool), GFP_KERNEL);
2957991d9fa0SJoe Thornber 	if (!pool) {
2958991d9fa0SJoe Thornber 		*error = "Error allocating memory for pool";
2959991d9fa0SJoe Thornber 		err_p = ERR_PTR(-ENOMEM);
2960991d9fa0SJoe Thornber 		goto bad_pool;
2961991d9fa0SJoe Thornber 	}
2962991d9fa0SJoe Thornber 
2963991d9fa0SJoe Thornber 	pool->pmd = pmd;
2964991d9fa0SJoe Thornber 	pool->sectors_per_block = block_size;
2965f9a8e0cdSMikulas Patocka 	if (block_size & (block_size - 1))
2966f9a8e0cdSMikulas Patocka 		pool->sectors_per_block_shift = -1;
2967f9a8e0cdSMikulas Patocka 	else
2968f9a8e0cdSMikulas Patocka 		pool->sectors_per_block_shift = __ffs(block_size);
2969991d9fa0SJoe Thornber 	pool->low_water_blocks = 0;
297067e2e2b2SJoe Thornber 	pool_features_init(&pool->pf);
2971a195db2dSJoe Thornber 	pool->prison = dm_bio_prison_create();
2972991d9fa0SJoe Thornber 	if (!pool->prison) {
2973991d9fa0SJoe Thornber 		*error = "Error creating pool's bio prison";
2974991d9fa0SJoe Thornber 		err_p = ERR_PTR(-ENOMEM);
2975991d9fa0SJoe Thornber 		goto bad_prison;
2976991d9fa0SJoe Thornber 	}
2977991d9fa0SJoe Thornber 
2978df5d2e90SMikulas Patocka 	pool->copier = dm_kcopyd_client_create(&dm_kcopyd_throttle);
2979991d9fa0SJoe Thornber 	if (IS_ERR(pool->copier)) {
2980991d9fa0SJoe Thornber 		r = PTR_ERR(pool->copier);
2981991d9fa0SJoe Thornber 		*error = "Error creating pool's kcopyd client";
2982991d9fa0SJoe Thornber 		err_p = ERR_PTR(r);
2983991d9fa0SJoe Thornber 		goto bad_kcopyd_client;
2984991d9fa0SJoe Thornber 	}
2985991d9fa0SJoe Thornber 
2986991d9fa0SJoe Thornber 	/*
2987991d9fa0SJoe Thornber 	 * Create singlethreaded workqueue that will service all devices
2988991d9fa0SJoe Thornber 	 * that use this metadata.
2989991d9fa0SJoe Thornber 	 */
2990991d9fa0SJoe Thornber 	pool->wq = alloc_ordered_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM);
2991991d9fa0SJoe Thornber 	if (!pool->wq) {
2992991d9fa0SJoe Thornber 		*error = "Error creating pool's workqueue";
2993991d9fa0SJoe Thornber 		err_p = ERR_PTR(-ENOMEM);
2994991d9fa0SJoe Thornber 		goto bad_wq;
2995991d9fa0SJoe Thornber 	}
2996991d9fa0SJoe Thornber 
29977d327fe0SJoe Thornber 	throttle_init(&pool->throttle);
2998991d9fa0SJoe Thornber 	INIT_WORK(&pool->worker, do_worker);
2999905e51b3SJoe Thornber 	INIT_DELAYED_WORK(&pool->waker, do_waker);
300085ad643bSJoe Thornber 	INIT_DELAYED_WORK(&pool->no_space_timeout, do_no_space_timeout);
3001991d9fa0SJoe Thornber 	spin_lock_init(&pool->lock);
3002991d9fa0SJoe Thornber 	bio_list_init(&pool->deferred_flush_bios);
30034ae280b4SNikos Tsironis 	bio_list_init(&pool->deferred_flush_completions);
3004991d9fa0SJoe Thornber 	INIT_LIST_HEAD(&pool->prepared_mappings);
3005104655fdSJoe Thornber 	INIT_LIST_HEAD(&pool->prepared_discards);
30062a0fbffbSJoe Thornber 	INIT_LIST_HEAD(&pool->prepared_discards_pt2);
3007c140e1c4SMike Snitzer 	INIT_LIST_HEAD(&pool->active_thins);
300888a6621bSJoe Thornber 	pool->low_water_triggered = false;
300980e96c54SMike Snitzer 	pool->suspended = true;
3010c3667cc6SMike Snitzer 	pool->out_of_data_space = false;
301144feb387SMike Snitzer 
301244feb387SMike Snitzer 	pool->shared_read_ds = dm_deferred_set_create();
301344feb387SMike Snitzer 	if (!pool->shared_read_ds) {
301444feb387SMike Snitzer 		*error = "Error creating pool's shared read deferred set";
301544feb387SMike Snitzer 		err_p = ERR_PTR(-ENOMEM);
301644feb387SMike Snitzer 		goto bad_shared_read_ds;
301744feb387SMike Snitzer 	}
301844feb387SMike Snitzer 
301944feb387SMike Snitzer 	pool->all_io_ds = dm_deferred_set_create();
302044feb387SMike Snitzer 	if (!pool->all_io_ds) {
302144feb387SMike Snitzer 		*error = "Error creating pool's all io deferred set";
302244feb387SMike Snitzer 		err_p = ERR_PTR(-ENOMEM);
302344feb387SMike Snitzer 		goto bad_all_io_ds;
302444feb387SMike Snitzer 	}
3025991d9fa0SJoe Thornber 
3026991d9fa0SJoe Thornber 	pool->next_mapping = NULL;
30276f1c819cSKent Overstreet 	r = mempool_init_slab_pool(&pool->mapping_pool, MAPPING_POOL_SIZE,
3028a24c2569SMike Snitzer 				   _new_mapping_cache);
30296f1c819cSKent Overstreet 	if (r) {
3030991d9fa0SJoe Thornber 		*error = "Error creating pool's mapping mempool";
30316f1c819cSKent Overstreet 		err_p = ERR_PTR(r);
3032991d9fa0SJoe Thornber 		goto bad_mapping_pool;
3033991d9fa0SJoe Thornber 	}
3034991d9fa0SJoe Thornber 
303542bc47b3SKees Cook 	pool->cell_sort_array =
303642bc47b3SKees Cook 		vmalloc(array_size(CELL_SORT_ARRAY_SIZE,
303742bc47b3SKees Cook 				   sizeof(*pool->cell_sort_array)));
3038a822c83eSJoe Thornber 	if (!pool->cell_sort_array) {
3039a822c83eSJoe Thornber 		*error = "Error allocating cell sort array";
3040a822c83eSJoe Thornber 		err_p = ERR_PTR(-ENOMEM);
3041a822c83eSJoe Thornber 		goto bad_sort_array;
3042a822c83eSJoe Thornber 	}
3043a822c83eSJoe Thornber 
3044991d9fa0SJoe Thornber 	pool->ref_count = 1;
3045905e51b3SJoe Thornber 	pool->last_commit_jiffies = jiffies;
3046991d9fa0SJoe Thornber 	pool->pool_md = pool_md;
3047991d9fa0SJoe Thornber 	pool->md_dev = metadata_dev;
3048873937e7SMikulas Patocka 	pool->data_dev = data_dev;
3049991d9fa0SJoe Thornber 	__pool_table_insert(pool);
3050991d9fa0SJoe Thornber 
3051991d9fa0SJoe Thornber 	return pool;
3052991d9fa0SJoe Thornber 
3053a822c83eSJoe Thornber bad_sort_array:
30546f1c819cSKent Overstreet 	mempool_exit(&pool->mapping_pool);
3055991d9fa0SJoe Thornber bad_mapping_pool:
305644feb387SMike Snitzer 	dm_deferred_set_destroy(pool->all_io_ds);
305744feb387SMike Snitzer bad_all_io_ds:
305844feb387SMike Snitzer 	dm_deferred_set_destroy(pool->shared_read_ds);
305944feb387SMike Snitzer bad_shared_read_ds:
3060991d9fa0SJoe Thornber 	destroy_workqueue(pool->wq);
3061991d9fa0SJoe Thornber bad_wq:
3062991d9fa0SJoe Thornber 	dm_kcopyd_client_destroy(pool->copier);
3063991d9fa0SJoe Thornber bad_kcopyd_client:
306444feb387SMike Snitzer 	dm_bio_prison_destroy(pool->prison);
3065991d9fa0SJoe Thornber bad_prison:
3066991d9fa0SJoe Thornber 	kfree(pool);
3067991d9fa0SJoe Thornber bad_pool:
3068991d9fa0SJoe Thornber 	if (dm_pool_metadata_close(pmd))
3069991d9fa0SJoe Thornber 		DMWARN("%s: dm_pool_metadata_close() failed.", __func__);
3070991d9fa0SJoe Thornber 
3071991d9fa0SJoe Thornber 	return err_p;
3072991d9fa0SJoe Thornber }
3073991d9fa0SJoe Thornber 
__pool_inc(struct pool * pool)3074991d9fa0SJoe Thornber static void __pool_inc(struct pool *pool)
3075991d9fa0SJoe Thornber {
3076991d9fa0SJoe Thornber 	BUG_ON(!mutex_is_locked(&dm_thin_pool_table.mutex));
3077991d9fa0SJoe Thornber 	pool->ref_count++;
3078991d9fa0SJoe Thornber }
3079991d9fa0SJoe Thornber 
__pool_dec(struct pool * pool)3080991d9fa0SJoe Thornber static void __pool_dec(struct pool *pool)
3081991d9fa0SJoe Thornber {
3082991d9fa0SJoe Thornber 	BUG_ON(!mutex_is_locked(&dm_thin_pool_table.mutex));
3083991d9fa0SJoe Thornber 	BUG_ON(!pool->ref_count);
3084991d9fa0SJoe Thornber 	if (!--pool->ref_count)
3085991d9fa0SJoe Thornber 		__pool_destroy(pool);
3086991d9fa0SJoe Thornber }
3087991d9fa0SJoe Thornber 
__pool_find(struct mapped_device * pool_md,struct block_device * metadata_dev,struct block_device * data_dev,unsigned long block_size,int read_only,char ** error,int * created)3088991d9fa0SJoe Thornber static struct pool *__pool_find(struct mapped_device *pool_md,
3089991d9fa0SJoe Thornber 				struct block_device *metadata_dev,
3090873937e7SMikulas Patocka 				struct block_device *data_dev,
3091e49e5829SJoe Thornber 				unsigned long block_size, int read_only,
3092e49e5829SJoe Thornber 				char **error, int *created)
3093991d9fa0SJoe Thornber {
3094991d9fa0SJoe Thornber 	struct pool *pool = __pool_table_lookup_metadata_dev(metadata_dev);
3095991d9fa0SJoe Thornber 
3096991d9fa0SJoe Thornber 	if (pool) {
3097f09996c9SMike Snitzer 		if (pool->pool_md != pool_md) {
3098f09996c9SMike Snitzer 			*error = "metadata device already in use by a pool";
3099991d9fa0SJoe Thornber 			return ERR_PTR(-EBUSY);
3100f09996c9SMike Snitzer 		}
3101873937e7SMikulas Patocka 		if (pool->data_dev != data_dev) {
3102873937e7SMikulas Patocka 			*error = "data device already in use by a pool";
3103873937e7SMikulas Patocka 			return ERR_PTR(-EBUSY);
3104873937e7SMikulas Patocka 		}
3105991d9fa0SJoe Thornber 		__pool_inc(pool);
3106991d9fa0SJoe Thornber 
3107991d9fa0SJoe Thornber 	} else {
3108991d9fa0SJoe Thornber 		pool = __pool_table_lookup(pool_md);
3109991d9fa0SJoe Thornber 		if (pool) {
3110873937e7SMikulas Patocka 			if (pool->md_dev != metadata_dev || pool->data_dev != data_dev) {
3111f09996c9SMike Snitzer 				*error = "different pool cannot replace a pool";
3112991d9fa0SJoe Thornber 				return ERR_PTR(-EINVAL);
3113f09996c9SMike Snitzer 			}
3114991d9fa0SJoe Thornber 			__pool_inc(pool);
3115991d9fa0SJoe Thornber 
311667e2e2b2SJoe Thornber 		} else {
3117873937e7SMikulas Patocka 			pool = pool_create(pool_md, metadata_dev, data_dev, block_size, read_only, error);
311867e2e2b2SJoe Thornber 			*created = 1;
311967e2e2b2SJoe Thornber 		}
3120991d9fa0SJoe Thornber 	}
3121991d9fa0SJoe Thornber 
3122991d9fa0SJoe Thornber 	return pool;
3123991d9fa0SJoe Thornber }
3124991d9fa0SJoe Thornber 
3125a4a82ce3SHeinz Mauelshagen /*
3126a4a82ce3SHeinz Mauelshagen  *--------------------------------------------------------------
3127991d9fa0SJoe Thornber  * Pool target methods
3128a4a82ce3SHeinz Mauelshagen  *--------------------------------------------------------------
3129a4a82ce3SHeinz Mauelshagen  */
pool_dtr(struct dm_target * ti)3130991d9fa0SJoe Thornber static void pool_dtr(struct dm_target *ti)
3131991d9fa0SJoe Thornber {
3132991d9fa0SJoe Thornber 	struct pool_c *pt = ti->private;
3133991d9fa0SJoe Thornber 
3134991d9fa0SJoe Thornber 	mutex_lock(&dm_thin_pool_table.mutex);
3135991d9fa0SJoe Thornber 
3136991d9fa0SJoe Thornber 	unbind_control_target(pt->pool, ti);
3137991d9fa0SJoe Thornber 	__pool_dec(pt->pool);
3138991d9fa0SJoe Thornber 	dm_put_device(ti, pt->metadata_dev);
3139991d9fa0SJoe Thornber 	dm_put_device(ti, pt->data_dev);
3140991d9fa0SJoe Thornber 	kfree(pt);
3141991d9fa0SJoe Thornber 
3142991d9fa0SJoe Thornber 	mutex_unlock(&dm_thin_pool_table.mutex);
3143991d9fa0SJoe Thornber }
3144991d9fa0SJoe Thornber 
parse_pool_features(struct dm_arg_set * as,struct pool_features * pf,struct dm_target * ti)3145991d9fa0SJoe Thornber static int parse_pool_features(struct dm_arg_set *as, struct pool_features *pf,
3146991d9fa0SJoe Thornber 			       struct dm_target *ti)
3147991d9fa0SJoe Thornber {
3148991d9fa0SJoe Thornber 	int r;
314986a3238cSHeinz Mauelshagen 	unsigned int argc;
3150991d9fa0SJoe Thornber 	const char *arg_name;
3151991d9fa0SJoe Thornber 
31525916a22bSEric Biggers 	static const struct dm_arg _args[] = {
315374aa45c3SMike Snitzer 		{0, 4, "Invalid number of pool feature arguments"},
3154991d9fa0SJoe Thornber 	};
3155991d9fa0SJoe Thornber 
3156991d9fa0SJoe Thornber 	/*
3157991d9fa0SJoe Thornber 	 * No feature arguments supplied.
3158991d9fa0SJoe Thornber 	 */
3159991d9fa0SJoe Thornber 	if (!as->argc)
3160991d9fa0SJoe Thornber 		return 0;
3161991d9fa0SJoe Thornber 
3162991d9fa0SJoe Thornber 	r = dm_read_arg_group(_args, as, &argc, &ti->error);
3163991d9fa0SJoe Thornber 	if (r)
3164991d9fa0SJoe Thornber 		return -EINVAL;
3165991d9fa0SJoe Thornber 
3166991d9fa0SJoe Thornber 	while (argc && !r) {
3167991d9fa0SJoe Thornber 		arg_name = dm_shift_arg(as);
3168991d9fa0SJoe Thornber 		argc--;
3169991d9fa0SJoe Thornber 
3170e49e5829SJoe Thornber 		if (!strcasecmp(arg_name, "skip_block_zeroing"))
31719bc142ddSMike Snitzer 			pf->zero_new_blocks = false;
3172991d9fa0SJoe Thornber 
3173e49e5829SJoe Thornber 		else if (!strcasecmp(arg_name, "ignore_discard"))
31749bc142ddSMike Snitzer 			pf->discard_enabled = false;
3175e49e5829SJoe Thornber 
3176e49e5829SJoe Thornber 		else if (!strcasecmp(arg_name, "no_discard_passdown"))
31779bc142ddSMike Snitzer 			pf->discard_passdown = false;
3178e49e5829SJoe Thornber 
3179e49e5829SJoe Thornber 		else if (!strcasecmp(arg_name, "read_only"))
3180e49e5829SJoe Thornber 			pf->mode = PM_READ_ONLY;
3181e49e5829SJoe Thornber 
3182787a996cSMike Snitzer 		else if (!strcasecmp(arg_name, "error_if_no_space"))
3183787a996cSMike Snitzer 			pf->error_if_no_space = true;
3184787a996cSMike Snitzer 
3185e49e5829SJoe Thornber 		else {
3186991d9fa0SJoe Thornber 			ti->error = "Unrecognised pool feature requested";
3187991d9fa0SJoe Thornber 			r = -EINVAL;
3188e49e5829SJoe Thornber 			break;
3189e49e5829SJoe Thornber 		}
3190991d9fa0SJoe Thornber 	}
3191991d9fa0SJoe Thornber 
3192991d9fa0SJoe Thornber 	return r;
3193991d9fa0SJoe Thornber }
3194991d9fa0SJoe Thornber 
metadata_low_callback(void * context)3195ac8c3f3dSJoe Thornber static void metadata_low_callback(void *context)
3196ac8c3f3dSJoe Thornber {
3197ac8c3f3dSJoe Thornber 	struct pool *pool = context;
3198ac8c3f3dSJoe Thornber 
3199ac8c3f3dSJoe Thornber 	DMWARN("%s: reached low water mark for metadata device: sending event.",
3200ac8c3f3dSJoe Thornber 	       dm_device_name(pool->pool_md));
3201ac8c3f3dSJoe Thornber 
3202ac8c3f3dSJoe Thornber 	dm_table_event(pool->ti->table);
3203ac8c3f3dSJoe Thornber }
3204ac8c3f3dSJoe Thornber 
3205694cfe7fSNikos Tsironis /*
3206694cfe7fSNikos Tsironis  * We need to flush the data device **before** committing the metadata.
3207694cfe7fSNikos Tsironis  *
3208694cfe7fSNikos Tsironis  * This ensures that the data blocks of any newly inserted mappings are
3209694cfe7fSNikos Tsironis  * properly written to non-volatile storage and won't be lost in case of a
3210694cfe7fSNikos Tsironis  * crash.
3211694cfe7fSNikos Tsironis  *
3212694cfe7fSNikos Tsironis  * Failure to do so can result in data corruption in the case of internal or
3213694cfe7fSNikos Tsironis  * external snapshots and in the case of newly provisioned blocks, when block
3214694cfe7fSNikos Tsironis  * zeroing is enabled.
3215694cfe7fSNikos Tsironis  */
metadata_pre_commit_callback(void * context)3216694cfe7fSNikos Tsironis static int metadata_pre_commit_callback(void *context)
3217694cfe7fSNikos Tsironis {
3218f06c03d1SMikulas Patocka 	struct pool *pool = context;
3219694cfe7fSNikos Tsironis 
322028d7d128SChristoph Hellwig 	return blkdev_issue_flush(pool->data_dev);
3221694cfe7fSNikos Tsironis }
3222694cfe7fSNikos Tsironis 
get_dev_size(struct block_device * bdev)32237d48935eSMike Snitzer static sector_t get_dev_size(struct block_device *bdev)
3224b17446dfSJoe Thornber {
32256dcbb52cSChristoph Hellwig 	return bdev_nr_sectors(bdev);
32267d48935eSMike Snitzer }
32277d48935eSMike Snitzer 
warn_if_metadata_device_too_big(struct block_device * bdev)32287d48935eSMike Snitzer static void warn_if_metadata_device_too_big(struct block_device *bdev)
32297d48935eSMike Snitzer {
32307d48935eSMike Snitzer 	sector_t metadata_dev_size = get_dev_size(bdev);
3231b17446dfSJoe Thornber 
32327d48935eSMike Snitzer 	if (metadata_dev_size > THIN_METADATA_MAX_SECTORS_WARNING)
3233385411ffSChristoph Hellwig 		DMWARN("Metadata device %pg is larger than %u sectors: excess space will not be used.",
3234385411ffSChristoph Hellwig 		       bdev, THIN_METADATA_MAX_SECTORS);
3235b17446dfSJoe Thornber }
3236b17446dfSJoe Thornber 
get_metadata_dev_size(struct block_device * bdev)32377d48935eSMike Snitzer static sector_t get_metadata_dev_size(struct block_device *bdev)
32387d48935eSMike Snitzer {
32397d48935eSMike Snitzer 	sector_t metadata_dev_size = get_dev_size(bdev);
32407d48935eSMike Snitzer 
32417d48935eSMike Snitzer 	if (metadata_dev_size > THIN_METADATA_MAX_SECTORS)
32427d48935eSMike Snitzer 		metadata_dev_size = THIN_METADATA_MAX_SECTORS;
32437d48935eSMike Snitzer 
3244b17446dfSJoe Thornber 	return metadata_dev_size;
3245b17446dfSJoe Thornber }
3246b17446dfSJoe Thornber 
get_metadata_dev_size_in_blocks(struct block_device * bdev)324724347e95SJoe Thornber static dm_block_t get_metadata_dev_size_in_blocks(struct block_device *bdev)
324824347e95SJoe Thornber {
324924347e95SJoe Thornber 	sector_t metadata_dev_size = get_metadata_dev_size(bdev);
325024347e95SJoe Thornber 
32517d48935eSMike Snitzer 	sector_div(metadata_dev_size, THIN_METADATA_BLOCK_SIZE);
325224347e95SJoe Thornber 
325324347e95SJoe Thornber 	return metadata_dev_size;
325424347e95SJoe Thornber }
325524347e95SJoe Thornber 
3256991d9fa0SJoe Thornber /*
3257ac8c3f3dSJoe Thornber  * When a metadata threshold is crossed a dm event is triggered, and
3258ac8c3f3dSJoe Thornber  * userland should respond by growing the metadata device.  We could let
3259ac8c3f3dSJoe Thornber  * userland set the threshold, like we do with the data threshold, but I'm
3260ac8c3f3dSJoe Thornber  * not sure they know enough to do this well.
3261ac8c3f3dSJoe Thornber  */
calc_metadata_threshold(struct pool_c * pt)3262ac8c3f3dSJoe Thornber static dm_block_t calc_metadata_threshold(struct pool_c *pt)
3263ac8c3f3dSJoe Thornber {
3264ac8c3f3dSJoe Thornber 	/*
3265ac8c3f3dSJoe Thornber 	 * 4M is ample for all ops with the possible exception of thin
3266ac8c3f3dSJoe Thornber 	 * device deletion which is harmless if it fails (just retry the
3267ac8c3f3dSJoe Thornber 	 * delete after you've grown the device).
3268ac8c3f3dSJoe Thornber 	 */
3269ac8c3f3dSJoe Thornber 	dm_block_t quarter = get_metadata_dev_size_in_blocks(pt->metadata_dev->bdev) / 4;
32700ef0b471SHeinz Mauelshagen 
3271ac8c3f3dSJoe Thornber 	return min((dm_block_t)1024ULL /* 4M */, quarter);
3272ac8c3f3dSJoe Thornber }
3273ac8c3f3dSJoe Thornber 
3274ac8c3f3dSJoe Thornber /*
3275991d9fa0SJoe Thornber  * thin-pool <metadata dev> <data dev>
3276991d9fa0SJoe Thornber  *	     <data block size (sectors)>
3277991d9fa0SJoe Thornber  *	     <low water mark (blocks)>
3278991d9fa0SJoe Thornber  *	     [<#feature args> [<arg>]*]
3279991d9fa0SJoe Thornber  *
3280991d9fa0SJoe Thornber  * Optional feature arguments are:
3281991d9fa0SJoe Thornber  *	     skip_block_zeroing: skips the zeroing of newly-provisioned blocks.
328267e2e2b2SJoe Thornber  *	     ignore_discard: disable discard
328367e2e2b2SJoe Thornber  *	     no_discard_passdown: don't pass discards down to the data device
3284787a996cSMike Snitzer  *	     read_only: Don't allow any changes to be made to the pool metadata.
3285787a996cSMike Snitzer  *	     error_if_no_space: error IOs, instead of queueing, if no space.
3286991d9fa0SJoe Thornber  */
pool_ctr(struct dm_target * ti,unsigned int argc,char ** argv)328786a3238cSHeinz Mauelshagen static int pool_ctr(struct dm_target *ti, unsigned int argc, char **argv)
3288991d9fa0SJoe Thornber {
328967e2e2b2SJoe Thornber 	int r, pool_created = 0;
3290991d9fa0SJoe Thornber 	struct pool_c *pt;
3291991d9fa0SJoe Thornber 	struct pool *pool;
3292991d9fa0SJoe Thornber 	struct pool_features pf;
3293991d9fa0SJoe Thornber 	struct dm_arg_set as;
3294991d9fa0SJoe Thornber 	struct dm_dev *data_dev;
3295991d9fa0SJoe Thornber 	unsigned long block_size;
3296991d9fa0SJoe Thornber 	dm_block_t low_water_blocks;
3297991d9fa0SJoe Thornber 	struct dm_dev *metadata_dev;
329805bdb996SChristoph Hellwig 	blk_mode_t metadata_mode;
3299991d9fa0SJoe Thornber 
3300991d9fa0SJoe Thornber 	/*
3301991d9fa0SJoe Thornber 	 * FIXME Remove validation from scope of lock.
3302991d9fa0SJoe Thornber 	 */
3303991d9fa0SJoe Thornber 	mutex_lock(&dm_thin_pool_table.mutex);
3304991d9fa0SJoe Thornber 
3305991d9fa0SJoe Thornber 	if (argc < 4) {
3306991d9fa0SJoe Thornber 		ti->error = "Invalid argument count";
3307991d9fa0SJoe Thornber 		r = -EINVAL;
3308991d9fa0SJoe Thornber 		goto out_unlock;
3309991d9fa0SJoe Thornber 	}
33105d0db96dSJoe Thornber 
3311991d9fa0SJoe Thornber 	as.argc = argc;
3312991d9fa0SJoe Thornber 	as.argv = argv;
3313991d9fa0SJoe Thornber 
331470de2cbdSJason Cai (Xiang Feng) 	/* make sure metadata and data are different devices */
331570de2cbdSJason Cai (Xiang Feng) 	if (!strcmp(argv[0], argv[1])) {
331670de2cbdSJason Cai (Xiang Feng) 		ti->error = "Error setting metadata or data device";
331770de2cbdSJason Cai (Xiang Feng) 		r = -EINVAL;
331870de2cbdSJason Cai (Xiang Feng) 		goto out_unlock;
331970de2cbdSJason Cai (Xiang Feng) 	}
332070de2cbdSJason Cai (Xiang Feng) 
33215d0db96dSJoe Thornber 	/*
33225d0db96dSJoe Thornber 	 * Set default pool features.
33235d0db96dSJoe Thornber 	 */
33245d0db96dSJoe Thornber 	pool_features_init(&pf);
33255d0db96dSJoe Thornber 
33265d0db96dSJoe Thornber 	dm_consume_args(&as, 4);
33275d0db96dSJoe Thornber 	r = parse_pool_features(&as, &pf, ti);
33285d0db96dSJoe Thornber 	if (r)
33295d0db96dSJoe Thornber 		goto out_unlock;
33305d0db96dSJoe Thornber 
333105bdb996SChristoph Hellwig 	metadata_mode = BLK_OPEN_READ |
333205bdb996SChristoph Hellwig 		((pf.mode == PM_READ_ONLY) ? 0 : BLK_OPEN_WRITE);
33335d0db96dSJoe Thornber 	r = dm_get_device(ti, argv[0], metadata_mode, &metadata_dev);
3334991d9fa0SJoe Thornber 	if (r) {
3335991d9fa0SJoe Thornber 		ti->error = "Error opening metadata block device";
3336991d9fa0SJoe Thornber 		goto out_unlock;
3337991d9fa0SJoe Thornber 	}
33387d48935eSMike Snitzer 	warn_if_metadata_device_too_big(metadata_dev->bdev);
3339991d9fa0SJoe Thornber 
334005bdb996SChristoph Hellwig 	r = dm_get_device(ti, argv[1], BLK_OPEN_READ | BLK_OPEN_WRITE, &data_dev);
3341991d9fa0SJoe Thornber 	if (r) {
3342991d9fa0SJoe Thornber 		ti->error = "Error getting data device";
3343991d9fa0SJoe Thornber 		goto out_metadata;
3344991d9fa0SJoe Thornber 	}
3345991d9fa0SJoe Thornber 
3346991d9fa0SJoe Thornber 	if (kstrtoul(argv[2], 10, &block_size) || !block_size ||
3347991d9fa0SJoe Thornber 	    block_size < DATA_DEV_BLOCK_SIZE_MIN_SECTORS ||
3348991d9fa0SJoe Thornber 	    block_size > DATA_DEV_BLOCK_SIZE_MAX_SECTORS ||
334955f2b8bdSMike Snitzer 	    block_size & (DATA_DEV_BLOCK_SIZE_MIN_SECTORS - 1)) {
3350991d9fa0SJoe Thornber 		ti->error = "Invalid block size";
3351991d9fa0SJoe Thornber 		r = -EINVAL;
3352991d9fa0SJoe Thornber 		goto out;
3353991d9fa0SJoe Thornber 	}
3354991d9fa0SJoe Thornber 
3355991d9fa0SJoe Thornber 	if (kstrtoull(argv[3], 10, (unsigned long long *)&low_water_blocks)) {
3356991d9fa0SJoe Thornber 		ti->error = "Invalid low water mark";
3357991d9fa0SJoe Thornber 		r = -EINVAL;
3358991d9fa0SJoe Thornber 		goto out;
3359991d9fa0SJoe Thornber 	}
3360991d9fa0SJoe Thornber 
3361991d9fa0SJoe Thornber 	pt = kzalloc(sizeof(*pt), GFP_KERNEL);
3362991d9fa0SJoe Thornber 	if (!pt) {
3363991d9fa0SJoe Thornber 		r = -ENOMEM;
3364991d9fa0SJoe Thornber 		goto out;
3365991d9fa0SJoe Thornber 	}
3366991d9fa0SJoe Thornber 
3367873937e7SMikulas Patocka 	pool = __pool_find(dm_table_get_md(ti->table), metadata_dev->bdev, data_dev->bdev,
3368e49e5829SJoe Thornber 			   block_size, pf.mode == PM_READ_ONLY, &ti->error, &pool_created);
3369991d9fa0SJoe Thornber 	if (IS_ERR(pool)) {
3370991d9fa0SJoe Thornber 		r = PTR_ERR(pool);
3371991d9fa0SJoe Thornber 		goto out_free_pt;
3372991d9fa0SJoe Thornber 	}
3373991d9fa0SJoe Thornber 
337467e2e2b2SJoe Thornber 	/*
337567e2e2b2SJoe Thornber 	 * 'pool_created' reflects whether this is the first table load.
337667e2e2b2SJoe Thornber 	 * Top level discard support is not allowed to be changed after
337767e2e2b2SJoe Thornber 	 * initial load.  This would require a pool reload to trigger thin
337867e2e2b2SJoe Thornber 	 * device changes.
337967e2e2b2SJoe Thornber 	 */
338067e2e2b2SJoe Thornber 	if (!pool_created && pf.discard_enabled != pool->pf.discard_enabled) {
338167e2e2b2SJoe Thornber 		ti->error = "Discard support cannot be disabled once enabled";
338267e2e2b2SJoe Thornber 		r = -EINVAL;
338367e2e2b2SJoe Thornber 		goto out_flags_changed;
338467e2e2b2SJoe Thornber 	}
338567e2e2b2SJoe Thornber 
3386991d9fa0SJoe Thornber 	pt->pool = pool;
3387991d9fa0SJoe Thornber 	pt->ti = ti;
3388991d9fa0SJoe Thornber 	pt->metadata_dev = metadata_dev;
3389991d9fa0SJoe Thornber 	pt->data_dev = data_dev;
3390991d9fa0SJoe Thornber 	pt->low_water_blocks = low_water_blocks;
33910424caa1SMike Snitzer 	pt->adjusted_pf = pt->requested_pf = pf;
339255a62eefSAlasdair G Kergon 	ti->num_flush_bios = 1;
33939bbf5feeSColy Li 	ti->limit_swap_bios = true;
33949bc142ddSMike Snitzer 
339567e2e2b2SJoe Thornber 	/*
339667e2e2b2SJoe Thornber 	 * Only need to enable discards if the pool should pass
339767e2e2b2SJoe Thornber 	 * them down to the data device.  The thin device's discard
339867e2e2b2SJoe Thornber 	 * processing will cause mappings to be removed from the btree.
339967e2e2b2SJoe Thornber 	 */
340067e2e2b2SJoe Thornber 	if (pf.discard_enabled && pf.discard_passdown) {
340155a62eefSAlasdair G Kergon 		ti->num_discard_bios = 1;
340267e2e2b2SJoe Thornber 		/*
340367e2e2b2SJoe Thornber 		 * Setting 'discards_supported' circumvents the normal
340467e2e2b2SJoe Thornber 		 * stacking of discard limits (this keeps the pool and
340567e2e2b2SJoe Thornber 		 * thin devices' discard limits consistent).
340667e2e2b2SJoe Thornber 		 */
34070ac55489SAlasdair G Kergon 		ti->discards_supported = true;
3408e2dd8acaSJoe Thornber 		ti->max_discard_granularity = true;
340967e2e2b2SJoe Thornber 	}
3410991d9fa0SJoe Thornber 	ti->private = pt;
3411991d9fa0SJoe Thornber 
3412ac8c3f3dSJoe Thornber 	r = dm_pool_register_metadata_threshold(pt->pool->pmd,
3413ac8c3f3dSJoe Thornber 						calc_metadata_threshold(pt),
3414ac8c3f3dSJoe Thornber 						metadata_low_callback,
3415ac8c3f3dSJoe Thornber 						pool);
34163534e5a5SLuo Meng 	if (r) {
34173534e5a5SLuo Meng 		ti->error = "Error registering metadata threshold";
3418ba30670fSMike Snitzer 		goto out_flags_changed;
34193534e5a5SLuo Meng 	}
3420ac8c3f3dSJoe Thornber 
3421f06c03d1SMikulas Patocka 	dm_pool_register_pre_commit_callback(pool->pmd,
3422f06c03d1SMikulas Patocka 					     metadata_pre_commit_callback, pool);
3423f06c03d1SMikulas Patocka 
3424991d9fa0SJoe Thornber 	mutex_unlock(&dm_thin_pool_table.mutex);
3425991d9fa0SJoe Thornber 
3426991d9fa0SJoe Thornber 	return 0;
3427991d9fa0SJoe Thornber 
342867e2e2b2SJoe Thornber out_flags_changed:
342967e2e2b2SJoe Thornber 	__pool_dec(pool);
3430991d9fa0SJoe Thornber out_free_pt:
3431991d9fa0SJoe Thornber 	kfree(pt);
3432991d9fa0SJoe Thornber out:
3433991d9fa0SJoe Thornber 	dm_put_device(ti, data_dev);
3434991d9fa0SJoe Thornber out_metadata:
3435991d9fa0SJoe Thornber 	dm_put_device(ti, metadata_dev);
3436991d9fa0SJoe Thornber out_unlock:
3437991d9fa0SJoe Thornber 	mutex_unlock(&dm_thin_pool_table.mutex);
3438991d9fa0SJoe Thornber 
3439991d9fa0SJoe Thornber 	return r;
3440991d9fa0SJoe Thornber }
3441991d9fa0SJoe Thornber 
pool_map(struct dm_target * ti,struct bio * bio)34427de3ee57SMikulas Patocka static int pool_map(struct dm_target *ti, struct bio *bio)
3443991d9fa0SJoe Thornber {
3444991d9fa0SJoe Thornber 	struct pool_c *pt = ti->private;
3445991d9fa0SJoe Thornber 	struct pool *pool = pt->pool;
3446991d9fa0SJoe Thornber 
3447991d9fa0SJoe Thornber 	/*
3448991d9fa0SJoe Thornber 	 * As this is a singleton target, ti->begin is always zero.
3449991d9fa0SJoe Thornber 	 */
34508e0c9dacSMikulas Patocka 	spin_lock_irq(&pool->lock);
345174d46992SChristoph Hellwig 	bio_set_dev(bio, pt->data_dev->bdev);
34528e0c9dacSMikulas Patocka 	spin_unlock_irq(&pool->lock);
3453991d9fa0SJoe Thornber 
3454c0a7a0acSMike Snitzer 	return DM_MAPIO_REMAPPED;
3455991d9fa0SJoe Thornber }
3456991d9fa0SJoe Thornber 
maybe_resize_data_dev(struct dm_target * ti,bool * need_commit)3457b17446dfSJoe Thornber static int maybe_resize_data_dev(struct dm_target *ti, bool *need_commit)
3458b17446dfSJoe Thornber {
3459b17446dfSJoe Thornber 	int r;
3460b17446dfSJoe Thornber 	struct pool_c *pt = ti->private;
3461b17446dfSJoe Thornber 	struct pool *pool = pt->pool;
3462b17446dfSJoe Thornber 	sector_t data_size = ti->len;
3463b17446dfSJoe Thornber 	dm_block_t sb_data_size;
3464b17446dfSJoe Thornber 
3465b17446dfSJoe Thornber 	*need_commit = false;
3466b17446dfSJoe Thornber 
3467b17446dfSJoe Thornber 	(void) sector_div(data_size, pool->sectors_per_block);
3468b17446dfSJoe Thornber 
3469b17446dfSJoe Thornber 	r = dm_pool_get_data_dev_size(pool->pmd, &sb_data_size);
3470b17446dfSJoe Thornber 	if (r) {
34714fa5971aSMike Snitzer 		DMERR("%s: failed to retrieve data device size",
34724fa5971aSMike Snitzer 		      dm_device_name(pool->pool_md));
3473b17446dfSJoe Thornber 		return r;
3474b17446dfSJoe Thornber 	}
3475b17446dfSJoe Thornber 
3476b17446dfSJoe Thornber 	if (data_size < sb_data_size) {
34774fa5971aSMike Snitzer 		DMERR("%s: pool target (%llu blocks) too small: expected %llu",
34784fa5971aSMike Snitzer 		      dm_device_name(pool->pool_md),
3479b17446dfSJoe Thornber 		      (unsigned long long)data_size, sb_data_size);
3480b17446dfSJoe Thornber 		return -EINVAL;
3481b17446dfSJoe Thornber 
3482b17446dfSJoe Thornber 	} else if (data_size > sb_data_size) {
348307f2b6e0SMike Snitzer 		if (dm_pool_metadata_needs_check(pool->pmd)) {
348407f2b6e0SMike Snitzer 			DMERR("%s: unable to grow the data device until repaired.",
348507f2b6e0SMike Snitzer 			      dm_device_name(pool->pool_md));
348607f2b6e0SMike Snitzer 			return 0;
348707f2b6e0SMike Snitzer 		}
348807f2b6e0SMike Snitzer 
34896f7f51d4SMike Snitzer 		if (sb_data_size)
34906f7f51d4SMike Snitzer 			DMINFO("%s: growing the data device from %llu to %llu blocks",
34916f7f51d4SMike Snitzer 			       dm_device_name(pool->pool_md),
34926f7f51d4SMike Snitzer 			       sb_data_size, (unsigned long long)data_size);
3493b17446dfSJoe Thornber 		r = dm_pool_resize_data_dev(pool->pmd, data_size);
3494b17446dfSJoe Thornber 		if (r) {
3495b5330655SJoe Thornber 			metadata_operation_failed(pool, "dm_pool_resize_data_dev", r);
3496b17446dfSJoe Thornber 			return r;
3497b17446dfSJoe Thornber 		}
3498b17446dfSJoe Thornber 
3499b17446dfSJoe Thornber 		*need_commit = true;
3500b17446dfSJoe Thornber 	}
3501b17446dfSJoe Thornber 
3502b17446dfSJoe Thornber 	return 0;
3503b17446dfSJoe Thornber }
3504b17446dfSJoe Thornber 
maybe_resize_metadata_dev(struct dm_target * ti,bool * need_commit)350524347e95SJoe Thornber static int maybe_resize_metadata_dev(struct dm_target *ti, bool *need_commit)
350624347e95SJoe Thornber {
350724347e95SJoe Thornber 	int r;
350824347e95SJoe Thornber 	struct pool_c *pt = ti->private;
350924347e95SJoe Thornber 	struct pool *pool = pt->pool;
351024347e95SJoe Thornber 	dm_block_t metadata_dev_size, sb_metadata_dev_size;
351124347e95SJoe Thornber 
351224347e95SJoe Thornber 	*need_commit = false;
351324347e95SJoe Thornber 
3514610bba8bSAlasdair G Kergon 	metadata_dev_size = get_metadata_dev_size_in_blocks(pool->md_dev);
351524347e95SJoe Thornber 
351624347e95SJoe Thornber 	r = dm_pool_get_metadata_dev_size(pool->pmd, &sb_metadata_dev_size);
351724347e95SJoe Thornber 	if (r) {
35184fa5971aSMike Snitzer 		DMERR("%s: failed to retrieve metadata device size",
35194fa5971aSMike Snitzer 		      dm_device_name(pool->pool_md));
352024347e95SJoe Thornber 		return r;
352124347e95SJoe Thornber 	}
352224347e95SJoe Thornber 
352324347e95SJoe Thornber 	if (metadata_dev_size < sb_metadata_dev_size) {
35244fa5971aSMike Snitzer 		DMERR("%s: metadata device (%llu blocks) too small: expected %llu",
35254fa5971aSMike Snitzer 		      dm_device_name(pool->pool_md),
352624347e95SJoe Thornber 		      metadata_dev_size, sb_metadata_dev_size);
352724347e95SJoe Thornber 		return -EINVAL;
352824347e95SJoe Thornber 
352924347e95SJoe Thornber 	} else if (metadata_dev_size > sb_metadata_dev_size) {
353007f2b6e0SMike Snitzer 		if (dm_pool_metadata_needs_check(pool->pmd)) {
353107f2b6e0SMike Snitzer 			DMERR("%s: unable to grow the metadata device until repaired.",
353207f2b6e0SMike Snitzer 			      dm_device_name(pool->pool_md));
353307f2b6e0SMike Snitzer 			return 0;
353407f2b6e0SMike Snitzer 		}
353507f2b6e0SMike Snitzer 
35367d48935eSMike Snitzer 		warn_if_metadata_device_too_big(pool->md_dev);
35376f7f51d4SMike Snitzer 		DMINFO("%s: growing the metadata device from %llu to %llu blocks",
35386f7f51d4SMike Snitzer 		       dm_device_name(pool->pool_md),
35396f7f51d4SMike Snitzer 		       sb_metadata_dev_size, metadata_dev_size);
35403ab91828SJoe Thornber 
35413ab91828SJoe Thornber 		if (get_pool_mode(pool) == PM_OUT_OF_METADATA_SPACE)
35423ab91828SJoe Thornber 			set_pool_mode(pool, PM_WRITE);
35433ab91828SJoe Thornber 
354424347e95SJoe Thornber 		r = dm_pool_resize_metadata_dev(pool->pmd, metadata_dev_size);
354524347e95SJoe Thornber 		if (r) {
3546b5330655SJoe Thornber 			metadata_operation_failed(pool, "dm_pool_resize_metadata_dev", r);
354724347e95SJoe Thornber 			return r;
354824347e95SJoe Thornber 		}
354924347e95SJoe Thornber 
355024347e95SJoe Thornber 		*need_commit = true;
355124347e95SJoe Thornber 	}
355224347e95SJoe Thornber 
355324347e95SJoe Thornber 	return 0;
355424347e95SJoe Thornber }
355524347e95SJoe Thornber 
3556991d9fa0SJoe Thornber /*
3557991d9fa0SJoe Thornber  * Retrieves the number of blocks of the data device from
3558991d9fa0SJoe Thornber  * the superblock and compares it to the actual device size,
3559991d9fa0SJoe Thornber  * thus resizing the data device in case it has grown.
3560991d9fa0SJoe Thornber  *
3561991d9fa0SJoe Thornber  * This both copes with opening preallocated data devices in the ctr
3562991d9fa0SJoe Thornber  * being followed by a resume
3563991d9fa0SJoe Thornber  * -and-
3564991d9fa0SJoe Thornber  * calling the resume method individually after userspace has
3565991d9fa0SJoe Thornber  * grown the data device in reaction to a table event.
3566991d9fa0SJoe Thornber  */
pool_preresume(struct dm_target * ti)3567991d9fa0SJoe Thornber static int pool_preresume(struct dm_target *ti)
3568991d9fa0SJoe Thornber {
3569991d9fa0SJoe Thornber 	int r;
357024347e95SJoe Thornber 	bool need_commit1, need_commit2;
3571991d9fa0SJoe Thornber 	struct pool_c *pt = ti->private;
3572991d9fa0SJoe Thornber 	struct pool *pool = pt->pool;
3573991d9fa0SJoe Thornber 
3574991d9fa0SJoe Thornber 	/*
3575991d9fa0SJoe Thornber 	 * Take control of the pool object.
3576991d9fa0SJoe Thornber 	 */
3577991d9fa0SJoe Thornber 	r = bind_control_target(pool, ti);
3578991d9fa0SJoe Thornber 	if (r)
357919eb1650SLuo Meng 		goto out;
3580991d9fa0SJoe Thornber 
3581b17446dfSJoe Thornber 	r = maybe_resize_data_dev(ti, &need_commit1);
3582b17446dfSJoe Thornber 	if (r)
358319eb1650SLuo Meng 		goto out;
3584991d9fa0SJoe Thornber 
358524347e95SJoe Thornber 	r = maybe_resize_metadata_dev(ti, &need_commit2);
358624347e95SJoe Thornber 	if (r)
358719eb1650SLuo Meng 		goto out;
358824347e95SJoe Thornber 
358924347e95SJoe Thornber 	if (need_commit1 || need_commit2)
3590020cc3b5SJoe Thornber 		(void) commit(pool);
359119eb1650SLuo Meng out:
359219eb1650SLuo Meng 	/*
359319eb1650SLuo Meng 	 * When a thin-pool is PM_FAIL, it cannot be rebuilt if
359419eb1650SLuo Meng 	 * bio is in deferred list. Therefore need to return 0
359519eb1650SLuo Meng 	 * to allow pool_resume() to flush IO.
359619eb1650SLuo Meng 	 */
359719eb1650SLuo Meng 	if (r && get_pool_mode(pool) == PM_FAIL)
359819eb1650SLuo Meng 		r = 0;
3599991d9fa0SJoe Thornber 
360019eb1650SLuo Meng 	return r;
3601991d9fa0SJoe Thornber }
3602991d9fa0SJoe Thornber 
pool_suspend_active_thins(struct pool * pool)3603583024d2SMike Snitzer static void pool_suspend_active_thins(struct pool *pool)
3604583024d2SMike Snitzer {
3605583024d2SMike Snitzer 	struct thin_c *tc;
3606583024d2SMike Snitzer 
3607583024d2SMike Snitzer 	/* Suspend all active thin devices */
3608583024d2SMike Snitzer 	tc = get_first_thin(pool);
3609583024d2SMike Snitzer 	while (tc) {
3610583024d2SMike Snitzer 		dm_internal_suspend_noflush(tc->thin_md);
3611583024d2SMike Snitzer 		tc = get_next_thin(pool, tc);
3612583024d2SMike Snitzer 	}
3613583024d2SMike Snitzer }
3614583024d2SMike Snitzer 
pool_resume_active_thins(struct pool * pool)3615583024d2SMike Snitzer static void pool_resume_active_thins(struct pool *pool)
3616583024d2SMike Snitzer {
3617583024d2SMike Snitzer 	struct thin_c *tc;
3618583024d2SMike Snitzer 
3619583024d2SMike Snitzer 	/* Resume all active thin devices */
3620583024d2SMike Snitzer 	tc = get_first_thin(pool);
3621583024d2SMike Snitzer 	while (tc) {
3622583024d2SMike Snitzer 		dm_internal_resume(tc->thin_md);
3623583024d2SMike Snitzer 		tc = get_next_thin(pool, tc);
3624583024d2SMike Snitzer 	}
3625583024d2SMike Snitzer }
3626583024d2SMike Snitzer 
pool_resume(struct dm_target * ti)3627991d9fa0SJoe Thornber static void pool_resume(struct dm_target *ti)
3628991d9fa0SJoe Thornber {
3629991d9fa0SJoe Thornber 	struct pool_c *pt = ti->private;
3630991d9fa0SJoe Thornber 	struct pool *pool = pt->pool;
3631991d9fa0SJoe Thornber 
3632583024d2SMike Snitzer 	/*
3633583024d2SMike Snitzer 	 * Must requeue active_thins' bios and then resume
3634583024d2SMike Snitzer 	 * active_thins _before_ clearing 'suspend' flag.
3635583024d2SMike Snitzer 	 */
3636583024d2SMike Snitzer 	requeue_bios(pool);
3637583024d2SMike Snitzer 	pool_resume_active_thins(pool);
3638583024d2SMike Snitzer 
36398e0c9dacSMikulas Patocka 	spin_lock_irq(&pool->lock);
364088a6621bSJoe Thornber 	pool->low_water_triggered = false;
364180e96c54SMike Snitzer 	pool->suspended = false;
36428e0c9dacSMikulas Patocka 	spin_unlock_irq(&pool->lock);
364380e96c54SMike Snitzer 
3644905e51b3SJoe Thornber 	do_waker(&pool->waker.work);
3645991d9fa0SJoe Thornber }
3646991d9fa0SJoe Thornber 
pool_presuspend(struct dm_target * ti)364780e96c54SMike Snitzer static void pool_presuspend(struct dm_target *ti)
364880e96c54SMike Snitzer {
364980e96c54SMike Snitzer 	struct pool_c *pt = ti->private;
365080e96c54SMike Snitzer 	struct pool *pool = pt->pool;
365180e96c54SMike Snitzer 
36528e0c9dacSMikulas Patocka 	spin_lock_irq(&pool->lock);
365380e96c54SMike Snitzer 	pool->suspended = true;
36548e0c9dacSMikulas Patocka 	spin_unlock_irq(&pool->lock);
3655583024d2SMike Snitzer 
3656583024d2SMike Snitzer 	pool_suspend_active_thins(pool);
365780e96c54SMike Snitzer }
365880e96c54SMike Snitzer 
pool_presuspend_undo(struct dm_target * ti)365980e96c54SMike Snitzer static void pool_presuspend_undo(struct dm_target *ti)
366080e96c54SMike Snitzer {
366180e96c54SMike Snitzer 	struct pool_c *pt = ti->private;
366280e96c54SMike Snitzer 	struct pool *pool = pt->pool;
366380e96c54SMike Snitzer 
3664583024d2SMike Snitzer 	pool_resume_active_thins(pool);
3665583024d2SMike Snitzer 
36668e0c9dacSMikulas Patocka 	spin_lock_irq(&pool->lock);
366780e96c54SMike Snitzer 	pool->suspended = false;
36688e0c9dacSMikulas Patocka 	spin_unlock_irq(&pool->lock);
366980e96c54SMike Snitzer }
367080e96c54SMike Snitzer 
pool_postsuspend(struct dm_target * ti)3671991d9fa0SJoe Thornber static void pool_postsuspend(struct dm_target *ti)
3672991d9fa0SJoe Thornber {
3673991d9fa0SJoe Thornber 	struct pool_c *pt = ti->private;
3674991d9fa0SJoe Thornber 	struct pool *pool = pt->pool;
3675991d9fa0SJoe Thornber 
367618d03e8cSNikolay Borisov 	cancel_delayed_work_sync(&pool->waker);
367718d03e8cSNikolay Borisov 	cancel_delayed_work_sync(&pool->no_space_timeout);
3678991d9fa0SJoe Thornber 	flush_workqueue(pool->wq);
3679020cc3b5SJoe Thornber 	(void) commit(pool);
3680991d9fa0SJoe Thornber }
3681991d9fa0SJoe Thornber 
check_arg_count(unsigned int argc,unsigned int args_required)368286a3238cSHeinz Mauelshagen static int check_arg_count(unsigned int argc, unsigned int args_required)
3683991d9fa0SJoe Thornber {
3684991d9fa0SJoe Thornber 	if (argc != args_required) {
3685991d9fa0SJoe Thornber 		DMWARN("Message received with %u arguments instead of %u.",
3686991d9fa0SJoe Thornber 		       argc, args_required);
3687991d9fa0SJoe Thornber 		return -EINVAL;
3688991d9fa0SJoe Thornber 	}
3689991d9fa0SJoe Thornber 
3690991d9fa0SJoe Thornber 	return 0;
3691991d9fa0SJoe Thornber }
3692991d9fa0SJoe Thornber 
read_dev_id(char * arg,dm_thin_id * dev_id,int warning)3693991d9fa0SJoe Thornber static int read_dev_id(char *arg, dm_thin_id *dev_id, int warning)
3694991d9fa0SJoe Thornber {
3695991d9fa0SJoe Thornber 	if (!kstrtoull(arg, 10, (unsigned long long *)dev_id) &&
3696991d9fa0SJoe Thornber 	    *dev_id <= MAX_DEV_ID)
3697991d9fa0SJoe Thornber 		return 0;
3698991d9fa0SJoe Thornber 
3699991d9fa0SJoe Thornber 	if (warning)
3700991d9fa0SJoe Thornber 		DMWARN("Message received with invalid device id: %s", arg);
3701991d9fa0SJoe Thornber 
3702991d9fa0SJoe Thornber 	return -EINVAL;
3703991d9fa0SJoe Thornber }
3704991d9fa0SJoe Thornber 
process_create_thin_mesg(unsigned int argc,char ** argv,struct pool * pool)370586a3238cSHeinz Mauelshagen static int process_create_thin_mesg(unsigned int argc, char **argv, struct pool *pool)
3706991d9fa0SJoe Thornber {
3707991d9fa0SJoe Thornber 	dm_thin_id dev_id;
3708991d9fa0SJoe Thornber 	int r;
3709991d9fa0SJoe Thornber 
3710991d9fa0SJoe Thornber 	r = check_arg_count(argc, 2);
3711991d9fa0SJoe Thornber 	if (r)
3712991d9fa0SJoe Thornber 		return r;
3713991d9fa0SJoe Thornber 
3714991d9fa0SJoe Thornber 	r = read_dev_id(argv[1], &dev_id, 1);
3715991d9fa0SJoe Thornber 	if (r)
3716991d9fa0SJoe Thornber 		return r;
3717991d9fa0SJoe Thornber 
3718991d9fa0SJoe Thornber 	r = dm_pool_create_thin(pool->pmd, dev_id);
3719991d9fa0SJoe Thornber 	if (r) {
3720991d9fa0SJoe Thornber 		DMWARN("Creation of new thinly-provisioned device with id %s failed.",
3721991d9fa0SJoe Thornber 		       argv[1]);
3722991d9fa0SJoe Thornber 		return r;
3723991d9fa0SJoe Thornber 	}
3724991d9fa0SJoe Thornber 
3725991d9fa0SJoe Thornber 	return 0;
3726991d9fa0SJoe Thornber }
3727991d9fa0SJoe Thornber 
process_create_snap_mesg(unsigned int argc,char ** argv,struct pool * pool)372886a3238cSHeinz Mauelshagen static int process_create_snap_mesg(unsigned int argc, char **argv, struct pool *pool)
3729991d9fa0SJoe Thornber {
3730991d9fa0SJoe Thornber 	dm_thin_id dev_id;
3731991d9fa0SJoe Thornber 	dm_thin_id origin_dev_id;
3732991d9fa0SJoe Thornber 	int r;
3733991d9fa0SJoe Thornber 
3734991d9fa0SJoe Thornber 	r = check_arg_count(argc, 3);
3735991d9fa0SJoe Thornber 	if (r)
3736991d9fa0SJoe Thornber 		return r;
3737991d9fa0SJoe Thornber 
3738991d9fa0SJoe Thornber 	r = read_dev_id(argv[1], &dev_id, 1);
3739991d9fa0SJoe Thornber 	if (r)
3740991d9fa0SJoe Thornber 		return r;
3741991d9fa0SJoe Thornber 
3742991d9fa0SJoe Thornber 	r = read_dev_id(argv[2], &origin_dev_id, 1);
3743991d9fa0SJoe Thornber 	if (r)
3744991d9fa0SJoe Thornber 		return r;
3745991d9fa0SJoe Thornber 
3746991d9fa0SJoe Thornber 	r = dm_pool_create_snap(pool->pmd, dev_id, origin_dev_id);
3747991d9fa0SJoe Thornber 	if (r) {
3748991d9fa0SJoe Thornber 		DMWARN("Creation of new snapshot %s of device %s failed.",
3749991d9fa0SJoe Thornber 		       argv[1], argv[2]);
3750991d9fa0SJoe Thornber 		return r;
3751991d9fa0SJoe Thornber 	}
3752991d9fa0SJoe Thornber 
3753991d9fa0SJoe Thornber 	return 0;
3754991d9fa0SJoe Thornber }
3755991d9fa0SJoe Thornber 
process_delete_mesg(unsigned int argc,char ** argv,struct pool * pool)375686a3238cSHeinz Mauelshagen static int process_delete_mesg(unsigned int argc, char **argv, struct pool *pool)
3757991d9fa0SJoe Thornber {
3758991d9fa0SJoe Thornber 	dm_thin_id dev_id;
3759991d9fa0SJoe Thornber 	int r;
3760991d9fa0SJoe Thornber 
3761991d9fa0SJoe Thornber 	r = check_arg_count(argc, 2);
3762991d9fa0SJoe Thornber 	if (r)
3763991d9fa0SJoe Thornber 		return r;
3764991d9fa0SJoe Thornber 
3765991d9fa0SJoe Thornber 	r = read_dev_id(argv[1], &dev_id, 1);
3766991d9fa0SJoe Thornber 	if (r)
3767991d9fa0SJoe Thornber 		return r;
3768991d9fa0SJoe Thornber 
3769991d9fa0SJoe Thornber 	r = dm_pool_delete_thin_device(pool->pmd, dev_id);
3770991d9fa0SJoe Thornber 	if (r)
3771991d9fa0SJoe Thornber 		DMWARN("Deletion of thin device %s failed.", argv[1]);
3772991d9fa0SJoe Thornber 
3773991d9fa0SJoe Thornber 	return r;
3774991d9fa0SJoe Thornber }
3775991d9fa0SJoe Thornber 
process_set_transaction_id_mesg(unsigned int argc,char ** argv,struct pool * pool)377686a3238cSHeinz Mauelshagen static int process_set_transaction_id_mesg(unsigned int argc, char **argv, struct pool *pool)
3777991d9fa0SJoe Thornber {
3778991d9fa0SJoe Thornber 	dm_thin_id old_id, new_id;
3779991d9fa0SJoe Thornber 	int r;
3780991d9fa0SJoe Thornber 
3781991d9fa0SJoe Thornber 	r = check_arg_count(argc, 3);
3782991d9fa0SJoe Thornber 	if (r)
3783991d9fa0SJoe Thornber 		return r;
3784991d9fa0SJoe Thornber 
3785991d9fa0SJoe Thornber 	if (kstrtoull(argv[1], 10, (unsigned long long *)&old_id)) {
3786991d9fa0SJoe Thornber 		DMWARN("set_transaction_id message: Unrecognised id %s.", argv[1]);
3787991d9fa0SJoe Thornber 		return -EINVAL;
3788991d9fa0SJoe Thornber 	}
3789991d9fa0SJoe Thornber 
3790991d9fa0SJoe Thornber 	if (kstrtoull(argv[2], 10, (unsigned long long *)&new_id)) {
3791991d9fa0SJoe Thornber 		DMWARN("set_transaction_id message: Unrecognised new id %s.", argv[2]);
3792991d9fa0SJoe Thornber 		return -EINVAL;
3793991d9fa0SJoe Thornber 	}
3794991d9fa0SJoe Thornber 
3795991d9fa0SJoe Thornber 	r = dm_pool_set_metadata_transaction_id(pool->pmd, old_id, new_id);
3796991d9fa0SJoe Thornber 	if (r) {
3797991d9fa0SJoe Thornber 		DMWARN("Failed to change transaction id from %s to %s.",
3798991d9fa0SJoe Thornber 		       argv[1], argv[2]);
3799991d9fa0SJoe Thornber 		return r;
3800991d9fa0SJoe Thornber 	}
3801991d9fa0SJoe Thornber 
3802991d9fa0SJoe Thornber 	return 0;
3803991d9fa0SJoe Thornber }
3804991d9fa0SJoe Thornber 
process_reserve_metadata_snap_mesg(unsigned int argc,char ** argv,struct pool * pool)380586a3238cSHeinz Mauelshagen static int process_reserve_metadata_snap_mesg(unsigned int argc, char **argv, struct pool *pool)
3806cc8394d8SJoe Thornber {
3807cc8394d8SJoe Thornber 	int r;
3808cc8394d8SJoe Thornber 
3809cc8394d8SJoe Thornber 	r = check_arg_count(argc, 1);
3810cc8394d8SJoe Thornber 	if (r)
3811cc8394d8SJoe Thornber 		return r;
3812cc8394d8SJoe Thornber 
3813020cc3b5SJoe Thornber 	(void) commit(pool);
38140d200aefSJoe Thornber 
3815cc8394d8SJoe Thornber 	r = dm_pool_reserve_metadata_snap(pool->pmd);
3816cc8394d8SJoe Thornber 	if (r)
3817cc8394d8SJoe Thornber 		DMWARN("reserve_metadata_snap message failed.");
3818cc8394d8SJoe Thornber 
3819cc8394d8SJoe Thornber 	return r;
3820cc8394d8SJoe Thornber }
3821cc8394d8SJoe Thornber 
process_release_metadata_snap_mesg(unsigned int argc,char ** argv,struct pool * pool)382286a3238cSHeinz Mauelshagen static int process_release_metadata_snap_mesg(unsigned int argc, char **argv, struct pool *pool)
3823cc8394d8SJoe Thornber {
3824cc8394d8SJoe Thornber 	int r;
3825cc8394d8SJoe Thornber 
3826cc8394d8SJoe Thornber 	r = check_arg_count(argc, 1);
3827cc8394d8SJoe Thornber 	if (r)
3828cc8394d8SJoe Thornber 		return r;
3829cc8394d8SJoe Thornber 
3830cc8394d8SJoe Thornber 	r = dm_pool_release_metadata_snap(pool->pmd);
3831cc8394d8SJoe Thornber 	if (r)
3832cc8394d8SJoe Thornber 		DMWARN("release_metadata_snap message failed.");
3833cc8394d8SJoe Thornber 
3834cc8394d8SJoe Thornber 	return r;
3835cc8394d8SJoe Thornber }
3836cc8394d8SJoe Thornber 
3837991d9fa0SJoe Thornber /*
3838991d9fa0SJoe Thornber  * Messages supported:
3839991d9fa0SJoe Thornber  *   create_thin	<dev_id>
3840991d9fa0SJoe Thornber  *   create_snap	<dev_id> <origin_id>
3841991d9fa0SJoe Thornber  *   delete		<dev_id>
3842991d9fa0SJoe Thornber  *   set_transaction_id <current_trans_id> <new_trans_id>
3843cc8394d8SJoe Thornber  *   reserve_metadata_snap
3844cc8394d8SJoe Thornber  *   release_metadata_snap
3845991d9fa0SJoe Thornber  */
pool_message(struct dm_target * ti,unsigned int argc,char ** argv,char * result,unsigned int maxlen)384686a3238cSHeinz Mauelshagen static int pool_message(struct dm_target *ti, unsigned int argc, char **argv,
384786a3238cSHeinz Mauelshagen 			char *result, unsigned int maxlen)
3848991d9fa0SJoe Thornber {
3849991d9fa0SJoe Thornber 	int r = -EINVAL;
3850991d9fa0SJoe Thornber 	struct pool_c *pt = ti->private;
3851991d9fa0SJoe Thornber 	struct pool *pool = pt->pool;
3852991d9fa0SJoe Thornber 
38533ab91828SJoe Thornber 	if (get_pool_mode(pool) >= PM_OUT_OF_METADATA_SPACE) {
38542a7eaea0SJoe Thornber 		DMERR("%s: unable to service pool target messages in READ_ONLY or FAIL mode",
38552a7eaea0SJoe Thornber 		      dm_device_name(pool->pool_md));
3856fd467696SMike Snitzer 		return -EOPNOTSUPP;
38572a7eaea0SJoe Thornber 	}
38582a7eaea0SJoe Thornber 
3859991d9fa0SJoe Thornber 	if (!strcasecmp(argv[0], "create_thin"))
3860991d9fa0SJoe Thornber 		r = process_create_thin_mesg(argc, argv, pool);
3861991d9fa0SJoe Thornber 
3862991d9fa0SJoe Thornber 	else if (!strcasecmp(argv[0], "create_snap"))
3863991d9fa0SJoe Thornber 		r = process_create_snap_mesg(argc, argv, pool);
3864991d9fa0SJoe Thornber 
3865991d9fa0SJoe Thornber 	else if (!strcasecmp(argv[0], "delete"))
3866991d9fa0SJoe Thornber 		r = process_delete_mesg(argc, argv, pool);
3867991d9fa0SJoe Thornber 
3868991d9fa0SJoe Thornber 	else if (!strcasecmp(argv[0], "set_transaction_id"))
3869991d9fa0SJoe Thornber 		r = process_set_transaction_id_mesg(argc, argv, pool);
3870991d9fa0SJoe Thornber 
3871cc8394d8SJoe Thornber 	else if (!strcasecmp(argv[0], "reserve_metadata_snap"))
3872cc8394d8SJoe Thornber 		r = process_reserve_metadata_snap_mesg(argc, argv, pool);
3873cc8394d8SJoe Thornber 
3874cc8394d8SJoe Thornber 	else if (!strcasecmp(argv[0], "release_metadata_snap"))
3875cc8394d8SJoe Thornber 		r = process_release_metadata_snap_mesg(argc, argv, pool);
3876cc8394d8SJoe Thornber 
3877991d9fa0SJoe Thornber 	else
3878991d9fa0SJoe Thornber 		DMWARN("Unrecognised thin pool target message received: %s", argv[0]);
3879991d9fa0SJoe Thornber 
3880e49e5829SJoe Thornber 	if (!r)
3881020cc3b5SJoe Thornber 		(void) commit(pool);
3882991d9fa0SJoe Thornber 
3883991d9fa0SJoe Thornber 	return r;
3884991d9fa0SJoe Thornber }
3885991d9fa0SJoe Thornber 
emit_flags(struct pool_features * pf,char * result,unsigned int sz,unsigned int maxlen)3886e49e5829SJoe Thornber static void emit_flags(struct pool_features *pf, char *result,
388786a3238cSHeinz Mauelshagen 		       unsigned int sz, unsigned int maxlen)
3888e49e5829SJoe Thornber {
388986a3238cSHeinz Mauelshagen 	unsigned int count = !pf->zero_new_blocks + !pf->discard_enabled +
3890787a996cSMike Snitzer 		!pf->discard_passdown + (pf->mode == PM_READ_ONLY) +
3891787a996cSMike Snitzer 		pf->error_if_no_space;
3892e49e5829SJoe Thornber 	DMEMIT("%u ", count);
3893e49e5829SJoe Thornber 
3894e49e5829SJoe Thornber 	if (!pf->zero_new_blocks)
3895e49e5829SJoe Thornber 		DMEMIT("skip_block_zeroing ");
3896e49e5829SJoe Thornber 
3897e49e5829SJoe Thornber 	if (!pf->discard_enabled)
3898e49e5829SJoe Thornber 		DMEMIT("ignore_discard ");
3899e49e5829SJoe Thornber 
3900e49e5829SJoe Thornber 	if (!pf->discard_passdown)
3901e49e5829SJoe Thornber 		DMEMIT("no_discard_passdown ");
3902e49e5829SJoe Thornber 
3903e49e5829SJoe Thornber 	if (pf->mode == PM_READ_ONLY)
3904e49e5829SJoe Thornber 		DMEMIT("read_only ");
3905787a996cSMike Snitzer 
3906787a996cSMike Snitzer 	if (pf->error_if_no_space)
3907787a996cSMike Snitzer 		DMEMIT("error_if_no_space ");
3908e49e5829SJoe Thornber }
3909e49e5829SJoe Thornber 
3910991d9fa0SJoe Thornber /*
3911991d9fa0SJoe Thornber  * Status line is:
3912991d9fa0SJoe Thornber  *    <transaction id> <used metadata sectors>/<total metadata sectors>
3913991d9fa0SJoe Thornber  *    <used data sectors>/<total data sectors> <held metadata root>
3914e4c78e21SMike Snitzer  *    <pool mode> <discard config> <no space config> <needs_check>
3915991d9fa0SJoe Thornber  */
pool_status(struct dm_target * ti,status_type_t type,unsigned int status_flags,char * result,unsigned int maxlen)3916fd7c092eSMikulas Patocka static void pool_status(struct dm_target *ti, status_type_t type,
391786a3238cSHeinz Mauelshagen 			unsigned int status_flags, char *result, unsigned int maxlen)
3918991d9fa0SJoe Thornber {
3919e49e5829SJoe Thornber 	int r;
392086a3238cSHeinz Mauelshagen 	unsigned int sz = 0;
3921991d9fa0SJoe Thornber 	uint64_t transaction_id;
3922991d9fa0SJoe Thornber 	dm_block_t nr_free_blocks_data;
3923991d9fa0SJoe Thornber 	dm_block_t nr_free_blocks_metadata;
3924991d9fa0SJoe Thornber 	dm_block_t nr_blocks_data;
3925991d9fa0SJoe Thornber 	dm_block_t nr_blocks_metadata;
3926991d9fa0SJoe Thornber 	dm_block_t held_root;
39273ab91828SJoe Thornber 	enum pool_mode mode;
3928991d9fa0SJoe Thornber 	char buf[BDEVNAME_SIZE];
3929991d9fa0SJoe Thornber 	char buf2[BDEVNAME_SIZE];
3930991d9fa0SJoe Thornber 	struct pool_c *pt = ti->private;
3931991d9fa0SJoe Thornber 	struct pool *pool = pt->pool;
3932991d9fa0SJoe Thornber 
3933991d9fa0SJoe Thornber 	switch (type) {
3934991d9fa0SJoe Thornber 	case STATUSTYPE_INFO:
3935e49e5829SJoe Thornber 		if (get_pool_mode(pool) == PM_FAIL) {
3936e49e5829SJoe Thornber 			DMEMIT("Fail");
3937e49e5829SJoe Thornber 			break;
3938e49e5829SJoe Thornber 		}
3939e49e5829SJoe Thornber 
39401f4e0ff0SAlasdair G Kergon 		/* Commit to ensure statistics aren't out-of-date */
39411f4e0ff0SAlasdair G Kergon 		if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti))
3942020cc3b5SJoe Thornber 			(void) commit(pool);
39431f4e0ff0SAlasdair G Kergon 
3944fd7c092eSMikulas Patocka 		r = dm_pool_get_metadata_transaction_id(pool->pmd, &transaction_id);
3945fd7c092eSMikulas Patocka 		if (r) {
39464fa5971aSMike Snitzer 			DMERR("%s: dm_pool_get_metadata_transaction_id returned %d",
39474fa5971aSMike Snitzer 			      dm_device_name(pool->pool_md), r);
3948fd7c092eSMikulas Patocka 			goto err;
3949fd7c092eSMikulas Patocka 		}
3950991d9fa0SJoe Thornber 
3951fd7c092eSMikulas Patocka 		r = dm_pool_get_free_metadata_block_count(pool->pmd, &nr_free_blocks_metadata);
3952fd7c092eSMikulas Patocka 		if (r) {
39534fa5971aSMike Snitzer 			DMERR("%s: dm_pool_get_free_metadata_block_count returned %d",
39544fa5971aSMike Snitzer 			      dm_device_name(pool->pool_md), r);
3955fd7c092eSMikulas Patocka 			goto err;
3956fd7c092eSMikulas Patocka 		}
3957991d9fa0SJoe Thornber 
3958991d9fa0SJoe Thornber 		r = dm_pool_get_metadata_dev_size(pool->pmd, &nr_blocks_metadata);
3959fd7c092eSMikulas Patocka 		if (r) {
39604fa5971aSMike Snitzer 			DMERR("%s: dm_pool_get_metadata_dev_size returned %d",
39614fa5971aSMike Snitzer 			      dm_device_name(pool->pool_md), r);
3962fd7c092eSMikulas Patocka 			goto err;
3963fd7c092eSMikulas Patocka 		}
3964991d9fa0SJoe Thornber 
3965fd7c092eSMikulas Patocka 		r = dm_pool_get_free_block_count(pool->pmd, &nr_free_blocks_data);
3966fd7c092eSMikulas Patocka 		if (r) {
39674fa5971aSMike Snitzer 			DMERR("%s: dm_pool_get_free_block_count returned %d",
39684fa5971aSMike Snitzer 			      dm_device_name(pool->pool_md), r);
3969fd7c092eSMikulas Patocka 			goto err;
3970fd7c092eSMikulas Patocka 		}
3971991d9fa0SJoe Thornber 
3972991d9fa0SJoe Thornber 		r = dm_pool_get_data_dev_size(pool->pmd, &nr_blocks_data);
3973fd7c092eSMikulas Patocka 		if (r) {
39744fa5971aSMike Snitzer 			DMERR("%s: dm_pool_get_data_dev_size returned %d",
39754fa5971aSMike Snitzer 			      dm_device_name(pool->pool_md), r);
3976fd7c092eSMikulas Patocka 			goto err;
3977fd7c092eSMikulas Patocka 		}
3978991d9fa0SJoe Thornber 
3979cc8394d8SJoe Thornber 		r = dm_pool_get_metadata_snap(pool->pmd, &held_root);
3980fd7c092eSMikulas Patocka 		if (r) {
39814fa5971aSMike Snitzer 			DMERR("%s: dm_pool_get_metadata_snap returned %d",
39824fa5971aSMike Snitzer 			      dm_device_name(pool->pool_md), r);
3983fd7c092eSMikulas Patocka 			goto err;
3984fd7c092eSMikulas Patocka 		}
3985991d9fa0SJoe Thornber 
3986991d9fa0SJoe Thornber 		DMEMIT("%llu %llu/%llu %llu/%llu ",
3987991d9fa0SJoe Thornber 		       (unsigned long long)transaction_id,
3988991d9fa0SJoe Thornber 		       (unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata),
3989991d9fa0SJoe Thornber 		       (unsigned long long)nr_blocks_metadata,
3990991d9fa0SJoe Thornber 		       (unsigned long long)(nr_blocks_data - nr_free_blocks_data),
3991991d9fa0SJoe Thornber 		       (unsigned long long)nr_blocks_data);
3992991d9fa0SJoe Thornber 
3993991d9fa0SJoe Thornber 		if (held_root)
3994991d9fa0SJoe Thornber 			DMEMIT("%llu ", held_root);
3995991d9fa0SJoe Thornber 		else
3996991d9fa0SJoe Thornber 			DMEMIT("- ");
3997991d9fa0SJoe Thornber 
39983ab91828SJoe Thornber 		mode = get_pool_mode(pool);
39993ab91828SJoe Thornber 		if (mode == PM_OUT_OF_DATA_SPACE)
40003e1a0699SJoe Thornber 			DMEMIT("out_of_data_space ");
40013ab91828SJoe Thornber 		else if (is_read_only_pool_mode(mode))
4002e49e5829SJoe Thornber 			DMEMIT("ro ");
4003e49e5829SJoe Thornber 		else
4004e49e5829SJoe Thornber 			DMEMIT("rw ");
4005e49e5829SJoe Thornber 
4006018debeaSMike Snitzer 		if (!pool->pf.discard_enabled)
4007018debeaSMike Snitzer 			DMEMIT("ignore_discard ");
4008018debeaSMike Snitzer 		else if (pool->pf.discard_passdown)
4009e49e5829SJoe Thornber 			DMEMIT("discard_passdown ");
4010e49e5829SJoe Thornber 		else
4011e49e5829SJoe Thornber 			DMEMIT("no_discard_passdown ");
4012e49e5829SJoe Thornber 
4013787a996cSMike Snitzer 		if (pool->pf.error_if_no_space)
4014787a996cSMike Snitzer 			DMEMIT("error_if_no_space ");
4015787a996cSMike Snitzer 		else
4016787a996cSMike Snitzer 			DMEMIT("queue_if_no_space ");
4017787a996cSMike Snitzer 
4018e4c78e21SMike Snitzer 		if (dm_pool_metadata_needs_check(pool->pmd))
4019e4c78e21SMike Snitzer 			DMEMIT("needs_check ");
4020e4c78e21SMike Snitzer 		else
4021e4c78e21SMike Snitzer 			DMEMIT("- ");
4022e4c78e21SMike Snitzer 
402363c8ecb6SAndy Grover 		DMEMIT("%llu ", (unsigned long long)calc_metadata_threshold(pt));
402463c8ecb6SAndy Grover 
4025991d9fa0SJoe Thornber 		break;
4026991d9fa0SJoe Thornber 
4027991d9fa0SJoe Thornber 	case STATUSTYPE_TABLE:
4028991d9fa0SJoe Thornber 		DMEMIT("%s %s %lu %llu ",
4029991d9fa0SJoe Thornber 		       format_dev_t(buf, pt->metadata_dev->bdev->bd_dev),
4030991d9fa0SJoe Thornber 		       format_dev_t(buf2, pt->data_dev->bdev->bd_dev),
4031991d9fa0SJoe Thornber 		       (unsigned long)pool->sectors_per_block,
4032991d9fa0SJoe Thornber 		       (unsigned long long)pt->low_water_blocks);
40330424caa1SMike Snitzer 		emit_flags(&pt->requested_pf, result, sz, maxlen);
4034991d9fa0SJoe Thornber 		break;
40358ec45662STushar Sugandhi 
40368ec45662STushar Sugandhi 	case STATUSTYPE_IMA:
40378ec45662STushar Sugandhi 		*result = '\0';
40388ec45662STushar Sugandhi 		break;
4039991d9fa0SJoe Thornber 	}
4040fd7c092eSMikulas Patocka 	return;
4041991d9fa0SJoe Thornber 
4042fd7c092eSMikulas Patocka err:
4043fd7c092eSMikulas Patocka 	DMEMIT("Error");
4044991d9fa0SJoe Thornber }
4045991d9fa0SJoe Thornber 
pool_iterate_devices(struct dm_target * ti,iterate_devices_callout_fn fn,void * data)4046991d9fa0SJoe Thornber static int pool_iterate_devices(struct dm_target *ti,
4047991d9fa0SJoe Thornber 				iterate_devices_callout_fn fn, void *data)
4048991d9fa0SJoe Thornber {
4049991d9fa0SJoe Thornber 	struct pool_c *pt = ti->private;
4050991d9fa0SJoe Thornber 
4051991d9fa0SJoe Thornber 	return fn(ti, pt->data_dev, 0, ti->len, data);
4052991d9fa0SJoe Thornber }
4053991d9fa0SJoe Thornber 
pool_io_hints(struct dm_target * ti,struct queue_limits * limits)4054991d9fa0SJoe Thornber static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
4055991d9fa0SJoe Thornber {
4056991d9fa0SJoe Thornber 	struct pool_c *pt = ti->private;
4057991d9fa0SJoe Thornber 	struct pool *pool = pt->pool;
4058604ea906SMike Snitzer 	sector_t io_opt_sectors = limits->io_opt >> SECTOR_SHIFT;
4059604ea906SMike Snitzer 
4060604ea906SMike Snitzer 	/*
4061d200c30eSMike Snitzer 	 * If max_sectors is smaller than pool->sectors_per_block adjust it
4062d200c30eSMike Snitzer 	 * to the highest possible power-of-2 factor of pool->sectors_per_block.
4063d200c30eSMike Snitzer 	 * This is especially beneficial when the pool's data device is a RAID
4064d200c30eSMike Snitzer 	 * device that has a full stripe width that matches pool->sectors_per_block
4065d200c30eSMike Snitzer 	 * -- because even though partial RAID stripe-sized IOs will be issued to a
4066d200c30eSMike Snitzer 	 *    single RAID stripe; when aggregated they will end on a full RAID stripe
4067d200c30eSMike Snitzer 	 *    boundary.. which avoids additional partial RAID stripe writes cascading
4068604ea906SMike Snitzer 	 */
4069604ea906SMike Snitzer 	if (limits->max_sectors < pool->sectors_per_block) {
4070604ea906SMike Snitzer 		while (!is_factor(pool->sectors_per_block, limits->max_sectors)) {
4071604ea906SMike Snitzer 			if ((limits->max_sectors & (limits->max_sectors - 1)) == 0)
4072604ea906SMike Snitzer 				limits->max_sectors--;
4073604ea906SMike Snitzer 			limits->max_sectors = rounddown_pow_of_two(limits->max_sectors);
4074604ea906SMike Snitzer 		}
4075604ea906SMike Snitzer 	}
4076991d9fa0SJoe Thornber 
40770cc67cd9SMike Snitzer 	/*
40780cc67cd9SMike Snitzer 	 * If the system-determined stacked limits are compatible with the
40790cc67cd9SMike Snitzer 	 * pool's blocksize (io_opt is a factor) do not override them.
40800cc67cd9SMike Snitzer 	 */
40810cc67cd9SMike Snitzer 	if (io_opt_sectors < pool->sectors_per_block ||
4082604ea906SMike Snitzer 	    !is_factor(io_opt_sectors, pool->sectors_per_block)) {
4083604ea906SMike Snitzer 		if (is_factor(pool->sectors_per_block, limits->max_sectors))
4084604ea906SMike Snitzer 			blk_limits_io_min(limits, limits->max_sectors << SECTOR_SHIFT);
4085604ea906SMike Snitzer 		else
4086fdfb4c8cSMike Snitzer 			blk_limits_io_min(limits, pool->sectors_per_block << SECTOR_SHIFT);
4087991d9fa0SJoe Thornber 		blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT);
40880cc67cd9SMike Snitzer 	}
40890424caa1SMike Snitzer 
40900424caa1SMike Snitzer 	/*
40910424caa1SMike Snitzer 	 * pt->adjusted_pf is a staging area for the actual features to use.
40920424caa1SMike Snitzer 	 * They get transferred to the live pool in bind_control_target()
40930424caa1SMike Snitzer 	 * called from pool_preresume().
40940424caa1SMike Snitzer 	 */
4095ef6953fbSMike Snitzer 
4096ef6953fbSMike Snitzer 	if (pt->adjusted_pf.discard_enabled) {
4097fa375646SMike Snitzer 		disable_discard_passdown_if_not_supported(pt);
4098fa375646SMike Snitzer 		if (!pt->adjusted_pf.discard_passdown)
4099fa375646SMike Snitzer 			limits->max_discard_sectors = 0;
4100ef6953fbSMike Snitzer 		/*
4101ef6953fbSMike Snitzer 		 * The pool uses the same discard limits as the underlying data
4102ef6953fbSMike Snitzer 		 * device.  DM core has already set this up.
4103ef6953fbSMike Snitzer 		 */
4104ef6953fbSMike Snitzer 	} else {
4105b60ab990SMike Snitzer 		/*
4106b60ab990SMike Snitzer 		 * Must explicitly disallow stacking discard limits otherwise the
4107b60ab990SMike Snitzer 		 * block layer will stack them if pool's data device has support.
4108b60ab990SMike Snitzer 		 */
4109b60ab990SMike Snitzer 		limits->discard_granularity = 0;
4110b60ab990SMike Snitzer 	}
4111991d9fa0SJoe Thornber }
4112991d9fa0SJoe Thornber 
4113991d9fa0SJoe Thornber static struct target_type pool_target = {
4114991d9fa0SJoe Thornber 	.name = "thin-pool",
4115991d9fa0SJoe Thornber 	.features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
4116991d9fa0SJoe Thornber 		    DM_TARGET_IMMUTABLE,
4117e2dd8acaSJoe Thornber 	.version = {1, 23, 0},
4118991d9fa0SJoe Thornber 	.module = THIS_MODULE,
4119991d9fa0SJoe Thornber 	.ctr = pool_ctr,
4120991d9fa0SJoe Thornber 	.dtr = pool_dtr,
4121991d9fa0SJoe Thornber 	.map = pool_map,
412280e96c54SMike Snitzer 	.presuspend = pool_presuspend,
412380e96c54SMike Snitzer 	.presuspend_undo = pool_presuspend_undo,
4124991d9fa0SJoe Thornber 	.postsuspend = pool_postsuspend,
4125991d9fa0SJoe Thornber 	.preresume = pool_preresume,
4126991d9fa0SJoe Thornber 	.resume = pool_resume,
4127991d9fa0SJoe Thornber 	.message = pool_message,
4128991d9fa0SJoe Thornber 	.status = pool_status,
4129991d9fa0SJoe Thornber 	.iterate_devices = pool_iterate_devices,
4130991d9fa0SJoe Thornber 	.io_hints = pool_io_hints,
4131991d9fa0SJoe Thornber };
4132991d9fa0SJoe Thornber 
4133a4a82ce3SHeinz Mauelshagen /*
4134a4a82ce3SHeinz Mauelshagen  *--------------------------------------------------------------
4135991d9fa0SJoe Thornber  * Thin target methods
4136a4a82ce3SHeinz Mauelshagen  *--------------------------------------------------------------
4137a4a82ce3SHeinz Mauelshagen  */
thin_get(struct thin_c * tc)4138b10ebd34SJoe Thornber static void thin_get(struct thin_c *tc)
4139b10ebd34SJoe Thornber {
414022d4c291SJohn Pittman 	refcount_inc(&tc->refcount);
4141b10ebd34SJoe Thornber }
4142b10ebd34SJoe Thornber 
thin_put(struct thin_c * tc)4143b10ebd34SJoe Thornber static void thin_put(struct thin_c *tc)
4144b10ebd34SJoe Thornber {
414522d4c291SJohn Pittman 	if (refcount_dec_and_test(&tc->refcount))
4146b10ebd34SJoe Thornber 		complete(&tc->can_destroy);
4147b10ebd34SJoe Thornber }
4148b10ebd34SJoe Thornber 
thin_dtr(struct dm_target * ti)4149991d9fa0SJoe Thornber static void thin_dtr(struct dm_target *ti)
4150991d9fa0SJoe Thornber {
4151991d9fa0SJoe Thornber 	struct thin_c *tc = ti->private;
4152c140e1c4SMike Snitzer 
41538e0c9dacSMikulas Patocka 	spin_lock_irq(&tc->pool->lock);
4154c140e1c4SMike Snitzer 	list_del_rcu(&tc->list);
41558e0c9dacSMikulas Patocka 	spin_unlock_irq(&tc->pool->lock);
4156c140e1c4SMike Snitzer 	synchronize_rcu();
4157991d9fa0SJoe Thornber 
415817181fb7SMikulas Patocka 	thin_put(tc);
415917181fb7SMikulas Patocka 	wait_for_completion(&tc->can_destroy);
416017181fb7SMikulas Patocka 
4161991d9fa0SJoe Thornber 	mutex_lock(&dm_thin_pool_table.mutex);
4162991d9fa0SJoe Thornber 
4163991d9fa0SJoe Thornber 	__pool_dec(tc->pool);
4164991d9fa0SJoe Thornber 	dm_pool_close_thin_device(tc->td);
4165991d9fa0SJoe Thornber 	dm_put_device(ti, tc->pool_dev);
41662dd9c257SJoe Thornber 	if (tc->origin_dev)
41672dd9c257SJoe Thornber 		dm_put_device(ti, tc->origin_dev);
4168991d9fa0SJoe Thornber 	kfree(tc);
4169991d9fa0SJoe Thornber 
4170991d9fa0SJoe Thornber 	mutex_unlock(&dm_thin_pool_table.mutex);
4171991d9fa0SJoe Thornber }
4172991d9fa0SJoe Thornber 
4173991d9fa0SJoe Thornber /*
4174991d9fa0SJoe Thornber  * Thin target parameters:
4175991d9fa0SJoe Thornber  *
41762dd9c257SJoe Thornber  * <pool_dev> <dev_id> [origin_dev]
4177991d9fa0SJoe Thornber  *
4178991d9fa0SJoe Thornber  * pool_dev: the path to the pool (eg, /dev/mapper/my_pool)
4179991d9fa0SJoe Thornber  * dev_id: the internal device identifier
41802dd9c257SJoe Thornber  * origin_dev: a device external to the pool that should act as the origin
418167e2e2b2SJoe Thornber  *
418267e2e2b2SJoe Thornber  * If the pool device has discards disabled, they get disabled for the thin
418367e2e2b2SJoe Thornber  * device as well.
4184991d9fa0SJoe Thornber  */
thin_ctr(struct dm_target * ti,unsigned int argc,char ** argv)418586a3238cSHeinz Mauelshagen static int thin_ctr(struct dm_target *ti, unsigned int argc, char **argv)
4186991d9fa0SJoe Thornber {
4187991d9fa0SJoe Thornber 	int r;
4188991d9fa0SJoe Thornber 	struct thin_c *tc;
41892dd9c257SJoe Thornber 	struct dm_dev *pool_dev, *origin_dev;
4190991d9fa0SJoe Thornber 	struct mapped_device *pool_md;
4191991d9fa0SJoe Thornber 
4192991d9fa0SJoe Thornber 	mutex_lock(&dm_thin_pool_table.mutex);
4193991d9fa0SJoe Thornber 
41942dd9c257SJoe Thornber 	if (argc != 2 && argc != 3) {
4195991d9fa0SJoe Thornber 		ti->error = "Invalid argument count";
4196991d9fa0SJoe Thornber 		r = -EINVAL;
4197991d9fa0SJoe Thornber 		goto out_unlock;
4198991d9fa0SJoe Thornber 	}
4199991d9fa0SJoe Thornber 
4200991d9fa0SJoe Thornber 	tc = ti->private = kzalloc(sizeof(*tc), GFP_KERNEL);
4201991d9fa0SJoe Thornber 	if (!tc) {
4202991d9fa0SJoe Thornber 		ti->error = "Out of memory";
4203991d9fa0SJoe Thornber 		r = -ENOMEM;
4204991d9fa0SJoe Thornber 		goto out_unlock;
4205991d9fa0SJoe Thornber 	}
4206583024d2SMike Snitzer 	tc->thin_md = dm_table_get_md(ti->table);
4207c140e1c4SMike Snitzer 	spin_lock_init(&tc->lock);
4208a374bb21SJoe Thornber 	INIT_LIST_HEAD(&tc->deferred_cells);
4209c140e1c4SMike Snitzer 	bio_list_init(&tc->deferred_bio_list);
4210c140e1c4SMike Snitzer 	bio_list_init(&tc->retry_on_resume_list);
421167324ea1SMike Snitzer 	tc->sort_bio_list = RB_ROOT;
4212991d9fa0SJoe Thornber 
42132dd9c257SJoe Thornber 	if (argc == 3) {
421470de2cbdSJason Cai (Xiang Feng) 		if (!strcmp(argv[0], argv[2])) {
421570de2cbdSJason Cai (Xiang Feng) 			ti->error = "Error setting origin device";
421670de2cbdSJason Cai (Xiang Feng) 			r = -EINVAL;
421770de2cbdSJason Cai (Xiang Feng) 			goto bad_origin_dev;
421870de2cbdSJason Cai (Xiang Feng) 		}
421970de2cbdSJason Cai (Xiang Feng) 
422005bdb996SChristoph Hellwig 		r = dm_get_device(ti, argv[2], BLK_OPEN_READ, &origin_dev);
42212dd9c257SJoe Thornber 		if (r) {
42222dd9c257SJoe Thornber 			ti->error = "Error opening origin device";
42232dd9c257SJoe Thornber 			goto bad_origin_dev;
42242dd9c257SJoe Thornber 		}
42252dd9c257SJoe Thornber 		tc->origin_dev = origin_dev;
42262dd9c257SJoe Thornber 	}
42272dd9c257SJoe Thornber 
4228991d9fa0SJoe Thornber 	r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &pool_dev);
4229991d9fa0SJoe Thornber 	if (r) {
4230991d9fa0SJoe Thornber 		ti->error = "Error opening pool device";
4231991d9fa0SJoe Thornber 		goto bad_pool_dev;
4232991d9fa0SJoe Thornber 	}
4233991d9fa0SJoe Thornber 	tc->pool_dev = pool_dev;
4234991d9fa0SJoe Thornber 
4235991d9fa0SJoe Thornber 	if (read_dev_id(argv[1], (unsigned long long *)&tc->dev_id, 0)) {
4236991d9fa0SJoe Thornber 		ti->error = "Invalid device id";
4237991d9fa0SJoe Thornber 		r = -EINVAL;
4238991d9fa0SJoe Thornber 		goto bad_common;
4239991d9fa0SJoe Thornber 	}
4240991d9fa0SJoe Thornber 
4241991d9fa0SJoe Thornber 	pool_md = dm_get_md(tc->pool_dev->bdev->bd_dev);
4242991d9fa0SJoe Thornber 	if (!pool_md) {
4243991d9fa0SJoe Thornber 		ti->error = "Couldn't get pool mapped device";
4244991d9fa0SJoe Thornber 		r = -EINVAL;
4245991d9fa0SJoe Thornber 		goto bad_common;
4246991d9fa0SJoe Thornber 	}
4247991d9fa0SJoe Thornber 
4248991d9fa0SJoe Thornber 	tc->pool = __pool_table_lookup(pool_md);
4249991d9fa0SJoe Thornber 	if (!tc->pool) {
4250991d9fa0SJoe Thornber 		ti->error = "Couldn't find pool object";
4251991d9fa0SJoe Thornber 		r = -EINVAL;
4252991d9fa0SJoe Thornber 		goto bad_pool_lookup;
4253991d9fa0SJoe Thornber 	}
4254991d9fa0SJoe Thornber 	__pool_inc(tc->pool);
4255991d9fa0SJoe Thornber 
4256e49e5829SJoe Thornber 	if (get_pool_mode(tc->pool) == PM_FAIL) {
4257e49e5829SJoe Thornber 		ti->error = "Couldn't open thin device, Pool is in fail mode";
42581acacc07SMike Snitzer 		r = -EINVAL;
425980e96c54SMike Snitzer 		goto bad_pool;
4260e49e5829SJoe Thornber 	}
4261e49e5829SJoe Thornber 
4262991d9fa0SJoe Thornber 	r = dm_pool_open_thin_device(tc->pool->pmd, tc->dev_id, &tc->td);
4263991d9fa0SJoe Thornber 	if (r) {
4264991d9fa0SJoe Thornber 		ti->error = "Couldn't open thin internal device";
426580e96c54SMike Snitzer 		goto bad_pool;
4266991d9fa0SJoe Thornber 	}
4267991d9fa0SJoe Thornber 
4268542f9038SMike Snitzer 	r = dm_set_target_max_io_len(ti, tc->pool->sectors_per_block);
4269542f9038SMike Snitzer 	if (r)
427080e96c54SMike Snitzer 		goto bad;
4271542f9038SMike Snitzer 
427255a62eefSAlasdair G Kergon 	ti->num_flush_bios = 1;
42739bbf5feeSColy Li 	ti->limit_swap_bios = true;
427416ad3d10SJoe Thornber 	ti->flush_supported = true;
4275a9251281SMike Snitzer 	ti->accounts_remapped_io = true;
427630187e1dSMike Snitzer 	ti->per_io_data_size = sizeof(struct dm_thin_endio_hook);
427767e2e2b2SJoe Thornber 
427867e2e2b2SJoe Thornber 	/* In case the pool supports discards, pass them on. */
427967e2e2b2SJoe Thornber 	if (tc->pool->pf.discard_enabled) {
42800ac55489SAlasdair G Kergon 		ti->discards_supported = true;
428155a62eefSAlasdair G Kergon 		ti->num_discard_bios = 1;
4282e2dd8acaSJoe Thornber 		ti->max_discard_granularity = true;
428367e2e2b2SJoe Thornber 	}
4284991d9fa0SJoe Thornber 
4285991d9fa0SJoe Thornber 	mutex_unlock(&dm_thin_pool_table.mutex);
4286991d9fa0SJoe Thornber 
42878e0c9dacSMikulas Patocka 	spin_lock_irq(&tc->pool->lock);
428880e96c54SMike Snitzer 	if (tc->pool->suspended) {
42898e0c9dacSMikulas Patocka 		spin_unlock_irq(&tc->pool->lock);
429080e96c54SMike Snitzer 		mutex_lock(&dm_thin_pool_table.mutex); /* reacquire for __pool_dec */
429180e96c54SMike Snitzer 		ti->error = "Unable to activate thin device while pool is suspended";
429280e96c54SMike Snitzer 		r = -EINVAL;
429380e96c54SMike Snitzer 		goto bad;
429480e96c54SMike Snitzer 	}
429522d4c291SJohn Pittman 	refcount_set(&tc->refcount, 1);
42962b94e896SMarc Dionne 	init_completion(&tc->can_destroy);
4297c140e1c4SMike Snitzer 	list_add_tail_rcu(&tc->list, &tc->pool->active_thins);
42988e0c9dacSMikulas Patocka 	spin_unlock_irq(&tc->pool->lock);
4299c140e1c4SMike Snitzer 	/*
4300c140e1c4SMike Snitzer 	 * This synchronize_rcu() call is needed here otherwise we risk a
4301c140e1c4SMike Snitzer 	 * wake_worker() call finding no bios to process (because the newly
4302c140e1c4SMike Snitzer 	 * added tc isn't yet visible).  So this reduces latency since we
4303c140e1c4SMike Snitzer 	 * aren't then dependent on the periodic commit to wake_worker().
4304c140e1c4SMike Snitzer 	 */
4305c140e1c4SMike Snitzer 	synchronize_rcu();
4306c140e1c4SMike Snitzer 
430780e96c54SMike Snitzer 	dm_put(pool_md);
430880e96c54SMike Snitzer 
4309991d9fa0SJoe Thornber 	return 0;
4310991d9fa0SJoe Thornber 
431180e96c54SMike Snitzer bad:
43121acacc07SMike Snitzer 	dm_pool_close_thin_device(tc->td);
431380e96c54SMike Snitzer bad_pool:
4314991d9fa0SJoe Thornber 	__pool_dec(tc->pool);
4315991d9fa0SJoe Thornber bad_pool_lookup:
4316991d9fa0SJoe Thornber 	dm_put(pool_md);
4317991d9fa0SJoe Thornber bad_common:
4318991d9fa0SJoe Thornber 	dm_put_device(ti, tc->pool_dev);
4319991d9fa0SJoe Thornber bad_pool_dev:
43202dd9c257SJoe Thornber 	if (tc->origin_dev)
43212dd9c257SJoe Thornber 		dm_put_device(ti, tc->origin_dev);
43222dd9c257SJoe Thornber bad_origin_dev:
4323991d9fa0SJoe Thornber 	kfree(tc);
4324991d9fa0SJoe Thornber out_unlock:
4325991d9fa0SJoe Thornber 	mutex_unlock(&dm_thin_pool_table.mutex);
4326991d9fa0SJoe Thornber 
4327991d9fa0SJoe Thornber 	return r;
4328991d9fa0SJoe Thornber }
4329991d9fa0SJoe Thornber 
thin_map(struct dm_target * ti,struct bio * bio)43307de3ee57SMikulas Patocka static int thin_map(struct dm_target *ti, struct bio *bio)
4331991d9fa0SJoe Thornber {
43324f024f37SKent Overstreet 	bio->bi_iter.bi_sector = dm_target_offset(ti, bio->bi_iter.bi_sector);
4333991d9fa0SJoe Thornber 
43347de3ee57SMikulas Patocka 	return thin_bio_map(ti, bio);
4335991d9fa0SJoe Thornber }
4336991d9fa0SJoe Thornber 
thin_endio(struct dm_target * ti,struct bio * bio,blk_status_t * err)43374e4cbee9SChristoph Hellwig static int thin_endio(struct dm_target *ti, struct bio *bio,
43384e4cbee9SChristoph Hellwig 		blk_status_t *err)
4339eb2aa48dSJoe Thornber {
4340eb2aa48dSJoe Thornber 	unsigned long flags;
434159c3d2c6SMikulas Patocka 	struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
4342eb2aa48dSJoe Thornber 	struct list_head work;
4343a24c2569SMike Snitzer 	struct dm_thin_new_mapping *m, *tmp;
4344eb2aa48dSJoe Thornber 	struct pool *pool = h->tc->pool;
4345eb2aa48dSJoe Thornber 
4346eb2aa48dSJoe Thornber 	if (h->shared_read_entry) {
4347eb2aa48dSJoe Thornber 		INIT_LIST_HEAD(&work);
434844feb387SMike Snitzer 		dm_deferred_entry_dec(h->shared_read_entry, &work);
4349eb2aa48dSJoe Thornber 
4350eb2aa48dSJoe Thornber 		spin_lock_irqsave(&pool->lock, flags);
4351eb2aa48dSJoe Thornber 		list_for_each_entry_safe(m, tmp, &work, list) {
4352eb2aa48dSJoe Thornber 			list_del(&m->list);
435350f3c3efSJoe Thornber 			__complete_mapping_preparation(m);
4354eb2aa48dSJoe Thornber 		}
4355eb2aa48dSJoe Thornber 		spin_unlock_irqrestore(&pool->lock, flags);
4356eb2aa48dSJoe Thornber 	}
4357eb2aa48dSJoe Thornber 
4358104655fdSJoe Thornber 	if (h->all_io_entry) {
4359104655fdSJoe Thornber 		INIT_LIST_HEAD(&work);
436044feb387SMike Snitzer 		dm_deferred_entry_dec(h->all_io_entry, &work);
4361563af186SJoe Thornber 		if (!list_empty(&work)) {
4362c3a0ce2eSMike Snitzer 			spin_lock_irqsave(&pool->lock, flags);
4363104655fdSJoe Thornber 			list_for_each_entry_safe(m, tmp, &work, list)
4364daec338bSMike Snitzer 				list_add_tail(&m->list, &pool->prepared_discards);
4365c3a0ce2eSMike Snitzer 			spin_unlock_irqrestore(&pool->lock, flags);
4366563af186SJoe Thornber 			wake_worker(pool);
4367563af186SJoe Thornber 		}
4368104655fdSJoe Thornber 	}
4369104655fdSJoe Thornber 
437034fbcf62SJoe Thornber 	if (h->cell)
437134fbcf62SJoe Thornber 		cell_defer_no_holder(h->tc, h->cell);
437234fbcf62SJoe Thornber 
43731be56909SChristoph Hellwig 	return DM_ENDIO_DONE;
4374eb2aa48dSJoe Thornber }
4375eb2aa48dSJoe Thornber 
thin_presuspend(struct dm_target * ti)4376738211f7SJoe Thornber static void thin_presuspend(struct dm_target *ti)
4377738211f7SJoe Thornber {
4378738211f7SJoe Thornber 	struct thin_c *tc = ti->private;
4379738211f7SJoe Thornber 
4380738211f7SJoe Thornber 	if (dm_noflush_suspending(ti))
4381738211f7SJoe Thornber 		noflush_work(tc, do_noflush_start);
4382738211f7SJoe Thornber }
4383738211f7SJoe Thornber 
thin_postsuspend(struct dm_target * ti)4384991d9fa0SJoe Thornber static void thin_postsuspend(struct dm_target *ti)
4385991d9fa0SJoe Thornber {
4386738211f7SJoe Thornber 	struct thin_c *tc = ti->private;
4387738211f7SJoe Thornber 
4388738211f7SJoe Thornber 	/*
4389738211f7SJoe Thornber 	 * The dm_noflush_suspending flag has been cleared by now, so
4390738211f7SJoe Thornber 	 * unfortunately we must always run this.
4391738211f7SJoe Thornber 	 */
4392738211f7SJoe Thornber 	noflush_work(tc, do_noflush_stop);
4393991d9fa0SJoe Thornber }
4394991d9fa0SJoe Thornber 
thin_preresume(struct dm_target * ti)4395e5aea7b4SJoe Thornber static int thin_preresume(struct dm_target *ti)
4396e5aea7b4SJoe Thornber {
4397e5aea7b4SJoe Thornber 	struct thin_c *tc = ti->private;
4398e5aea7b4SJoe Thornber 
4399e5aea7b4SJoe Thornber 	if (tc->origin_dev)
4400e5aea7b4SJoe Thornber 		tc->origin_size = get_dev_size(tc->origin_dev->bdev);
4401e5aea7b4SJoe Thornber 
4402e5aea7b4SJoe Thornber 	return 0;
4403e5aea7b4SJoe Thornber }
4404e5aea7b4SJoe Thornber 
4405991d9fa0SJoe Thornber /*
4406991d9fa0SJoe Thornber  * <nr mapped sectors> <highest mapped sector>
4407991d9fa0SJoe Thornber  */
thin_status(struct dm_target * ti,status_type_t type,unsigned int status_flags,char * result,unsigned int maxlen)4408fd7c092eSMikulas Patocka static void thin_status(struct dm_target *ti, status_type_t type,
440986a3238cSHeinz Mauelshagen 			unsigned int status_flags, char *result, unsigned int maxlen)
4410991d9fa0SJoe Thornber {
4411991d9fa0SJoe Thornber 	int r;
4412991d9fa0SJoe Thornber 	ssize_t sz = 0;
4413991d9fa0SJoe Thornber 	dm_block_t mapped, highest;
4414991d9fa0SJoe Thornber 	char buf[BDEVNAME_SIZE];
4415991d9fa0SJoe Thornber 	struct thin_c *tc = ti->private;
4416991d9fa0SJoe Thornber 
4417e49e5829SJoe Thornber 	if (get_pool_mode(tc->pool) == PM_FAIL) {
4418e49e5829SJoe Thornber 		DMEMIT("Fail");
4419fd7c092eSMikulas Patocka 		return;
4420e49e5829SJoe Thornber 	}
4421e49e5829SJoe Thornber 
4422991d9fa0SJoe Thornber 	if (!tc->td)
4423991d9fa0SJoe Thornber 		DMEMIT("-");
4424991d9fa0SJoe Thornber 	else {
4425991d9fa0SJoe Thornber 		switch (type) {
4426991d9fa0SJoe Thornber 		case STATUSTYPE_INFO:
4427991d9fa0SJoe Thornber 			r = dm_thin_get_mapped_count(tc->td, &mapped);
4428fd7c092eSMikulas Patocka 			if (r) {
4429fd7c092eSMikulas Patocka 				DMERR("dm_thin_get_mapped_count returned %d", r);
4430fd7c092eSMikulas Patocka 				goto err;
4431fd7c092eSMikulas Patocka 			}
4432991d9fa0SJoe Thornber 
4433991d9fa0SJoe Thornber 			r = dm_thin_get_highest_mapped_block(tc->td, &highest);
4434fd7c092eSMikulas Patocka 			if (r < 0) {
4435fd7c092eSMikulas Patocka 				DMERR("dm_thin_get_highest_mapped_block returned %d", r);
4436fd7c092eSMikulas Patocka 				goto err;
4437fd7c092eSMikulas Patocka 			}
4438991d9fa0SJoe Thornber 
4439991d9fa0SJoe Thornber 			DMEMIT("%llu ", mapped * tc->pool->sectors_per_block);
4440991d9fa0SJoe Thornber 			if (r)
4441991d9fa0SJoe Thornber 				DMEMIT("%llu", ((highest + 1) *
4442991d9fa0SJoe Thornber 						tc->pool->sectors_per_block) - 1);
4443991d9fa0SJoe Thornber 			else
4444991d9fa0SJoe Thornber 				DMEMIT("-");
4445991d9fa0SJoe Thornber 			break;
4446991d9fa0SJoe Thornber 
4447991d9fa0SJoe Thornber 		case STATUSTYPE_TABLE:
4448991d9fa0SJoe Thornber 			DMEMIT("%s %lu",
4449991d9fa0SJoe Thornber 			       format_dev_t(buf, tc->pool_dev->bdev->bd_dev),
4450991d9fa0SJoe Thornber 			       (unsigned long) tc->dev_id);
44512dd9c257SJoe Thornber 			if (tc->origin_dev)
44522dd9c257SJoe Thornber 				DMEMIT(" %s", format_dev_t(buf, tc->origin_dev->bdev->bd_dev));
4453991d9fa0SJoe Thornber 			break;
44548ec45662STushar Sugandhi 
44558ec45662STushar Sugandhi 		case STATUSTYPE_IMA:
44568ec45662STushar Sugandhi 			*result = '\0';
44578ec45662STushar Sugandhi 			break;
4458991d9fa0SJoe Thornber 		}
4459991d9fa0SJoe Thornber 	}
4460991d9fa0SJoe Thornber 
4461fd7c092eSMikulas Patocka 	return;
4462fd7c092eSMikulas Patocka 
4463fd7c092eSMikulas Patocka err:
4464fd7c092eSMikulas Patocka 	DMEMIT("Error");
4465991d9fa0SJoe Thornber }
4466991d9fa0SJoe Thornber 
thin_iterate_devices(struct dm_target * ti,iterate_devices_callout_fn fn,void * data)4467991d9fa0SJoe Thornber static int thin_iterate_devices(struct dm_target *ti,
4468991d9fa0SJoe Thornber 				iterate_devices_callout_fn fn, void *data)
4469991d9fa0SJoe Thornber {
447055f2b8bdSMike Snitzer 	sector_t blocks;
4471991d9fa0SJoe Thornber 	struct thin_c *tc = ti->private;
447255f2b8bdSMike Snitzer 	struct pool *pool = tc->pool;
4473991d9fa0SJoe Thornber 
4474991d9fa0SJoe Thornber 	/*
4475991d9fa0SJoe Thornber 	 * We can't call dm_pool_get_data_dev_size() since that blocks.  So
4476991d9fa0SJoe Thornber 	 * we follow a more convoluted path through to the pool's target.
4477991d9fa0SJoe Thornber 	 */
447855f2b8bdSMike Snitzer 	if (!pool->ti)
4479991d9fa0SJoe Thornber 		return 0;	/* nothing is bound */
4480991d9fa0SJoe Thornber 
448155f2b8bdSMike Snitzer 	blocks = pool->ti->len;
448255f2b8bdSMike Snitzer 	(void) sector_div(blocks, pool->sectors_per_block);
4483991d9fa0SJoe Thornber 	if (blocks)
448455f2b8bdSMike Snitzer 		return fn(ti, tc->pool_dev, 0, pool->sectors_per_block * blocks, data);
4485991d9fa0SJoe Thornber 
4486991d9fa0SJoe Thornber 	return 0;
4487991d9fa0SJoe Thornber }
4488991d9fa0SJoe Thornber 
thin_io_hints(struct dm_target * ti,struct queue_limits * limits)448934fbcf62SJoe Thornber static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits)
449034fbcf62SJoe Thornber {
449134fbcf62SJoe Thornber 	struct thin_c *tc = ti->private;
449234fbcf62SJoe Thornber 	struct pool *pool = tc->pool;
449321607670SMike Snitzer 
4494ef6953fbSMike Snitzer 	if (pool->pf.discard_enabled) {
449534fbcf62SJoe Thornber 		limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT;
4496e2dd8acaSJoe Thornber 		limits->max_discard_sectors = pool->sectors_per_block * BIO_PRISON_MAX_RANGE;
449734fbcf62SJoe Thornber 	}
4498ef6953fbSMike Snitzer }
449934fbcf62SJoe Thornber 
4500991d9fa0SJoe Thornber static struct target_type thin_target = {
4501991d9fa0SJoe Thornber 	.name = "thin",
4502e2dd8acaSJoe Thornber 	.version = {1, 23, 0},
4503991d9fa0SJoe Thornber 	.module	= THIS_MODULE,
4504991d9fa0SJoe Thornber 	.ctr = thin_ctr,
4505991d9fa0SJoe Thornber 	.dtr = thin_dtr,
4506991d9fa0SJoe Thornber 	.map = thin_map,
4507eb2aa48dSJoe Thornber 	.end_io = thin_endio,
4508e5aea7b4SJoe Thornber 	.preresume = thin_preresume,
4509738211f7SJoe Thornber 	.presuspend = thin_presuspend,
4510991d9fa0SJoe Thornber 	.postsuspend = thin_postsuspend,
4511991d9fa0SJoe Thornber 	.status = thin_status,
4512991d9fa0SJoe Thornber 	.iterate_devices = thin_iterate_devices,
451334fbcf62SJoe Thornber 	.io_hints = thin_io_hints,
4514991d9fa0SJoe Thornber };
4515991d9fa0SJoe Thornber 
4516991d9fa0SJoe Thornber /*----------------------------------------------------------------*/
4517991d9fa0SJoe Thornber 
dm_thin_init(void)4518991d9fa0SJoe Thornber static int __init dm_thin_init(void)
4519991d9fa0SJoe Thornber {
45207e6358d2Smonty_pavel@sina.com 	int r = -ENOMEM;
4521991d9fa0SJoe Thornber 
4522991d9fa0SJoe Thornber 	pool_table_init();
4523991d9fa0SJoe Thornber 
45247e6358d2Smonty_pavel@sina.com 	_new_mapping_cache = KMEM_CACHE(dm_thin_new_mapping, 0);
45257e6358d2Smonty_pavel@sina.com 	if (!_new_mapping_cache)
45267e6358d2Smonty_pavel@sina.com 		return r;
45277e6358d2Smonty_pavel@sina.com 
4528991d9fa0SJoe Thornber 	r = dm_register_target(&thin_target);
4529991d9fa0SJoe Thornber 	if (r)
45307e6358d2Smonty_pavel@sina.com 		goto bad_new_mapping_cache;
4531991d9fa0SJoe Thornber 
4532991d9fa0SJoe Thornber 	r = dm_register_target(&pool_target);
4533991d9fa0SJoe Thornber 	if (r)
45347e6358d2Smonty_pavel@sina.com 		goto bad_thin_target;
4535a24c2569SMike Snitzer 
4536a24c2569SMike Snitzer 	return 0;
4537a24c2569SMike Snitzer 
45387e6358d2Smonty_pavel@sina.com bad_thin_target:
4539991d9fa0SJoe Thornber 	dm_unregister_target(&thin_target);
45407e6358d2Smonty_pavel@sina.com bad_new_mapping_cache:
45417e6358d2Smonty_pavel@sina.com 	kmem_cache_destroy(_new_mapping_cache);
4542991d9fa0SJoe Thornber 
4543991d9fa0SJoe Thornber 	return r;
4544991d9fa0SJoe Thornber }
4545991d9fa0SJoe Thornber 
dm_thin_exit(void)4546991d9fa0SJoe Thornber static void dm_thin_exit(void)
4547991d9fa0SJoe Thornber {
4548991d9fa0SJoe Thornber 	dm_unregister_target(&thin_target);
4549991d9fa0SJoe Thornber 	dm_unregister_target(&pool_target);
4550a24c2569SMike Snitzer 
4551a24c2569SMike Snitzer 	kmem_cache_destroy(_new_mapping_cache);
4552d5ffebddSMike Snitzer 
4553d5ffebddSMike Snitzer 	pool_table_exit();
4554991d9fa0SJoe Thornber }
4555991d9fa0SJoe Thornber 
4556991d9fa0SJoe Thornber module_init(dm_thin_init);
4557991d9fa0SJoe Thornber module_exit(dm_thin_exit);
4558991d9fa0SJoe Thornber 
45596a808034SHeinz Mauelshagen module_param_named(no_space_timeout, no_space_timeout_secs, uint, 0644);
456080c57893SMike Snitzer MODULE_PARM_DESC(no_space_timeout, "Out of data space queue IO timeout in seconds");
456180c57893SMike Snitzer 
45627cab8bf1SAlasdair G Kergon MODULE_DESCRIPTION(DM_NAME " thin provisioning target");
4563991d9fa0SJoe Thornber MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
4564991d9fa0SJoe Thornber MODULE_LICENSE("GPL");
4565