xref: /openbmc/linux/drivers/md/dm-cache-target.c (revision ecc23d0a422a3118fcf6e4f0a46e17a6c2047b02)
13bd94003SHeinz Mauelshagen // SPDX-License-Identifier: GPL-2.0-only
2c6b4fcbaSJoe Thornber /*
3c6b4fcbaSJoe Thornber  * Copyright (C) 2012 Red Hat. All rights reserved.
4c6b4fcbaSJoe Thornber  *
5c6b4fcbaSJoe Thornber  * This file is released under the GPL.
6c6b4fcbaSJoe Thornber  */
7c6b4fcbaSJoe Thornber 
8c6b4fcbaSJoe Thornber #include "dm.h"
9b29d4986SJoe Thornber #include "dm-bio-prison-v2.h"
10b844fe69SDarrick J. Wong #include "dm-bio-record.h"
11c6b4fcbaSJoe Thornber #include "dm-cache-metadata.h"
12dc4fa29fSMike Snitzer #include "dm-io-tracker.h"
13*f14fc623SMikulas Patocka #include "dm-cache-background-tracker.h"
14c6b4fcbaSJoe Thornber 
15c6b4fcbaSJoe Thornber #include <linux/dm-io.h>
16c6b4fcbaSJoe Thornber #include <linux/dm-kcopyd.h>
170f30af98SManuel Schölling #include <linux/jiffies.h>
18c6b4fcbaSJoe Thornber #include <linux/init.h>
19c6b4fcbaSJoe Thornber #include <linux/mempool.h>
20c6b4fcbaSJoe Thornber #include <linux/module.h>
21b29d4986SJoe Thornber #include <linux/rwsem.h>
22c6b4fcbaSJoe Thornber #include <linux/slab.h>
23c6b4fcbaSJoe Thornber #include <linux/vmalloc.h>
24c6b4fcbaSJoe Thornber 
25c6b4fcbaSJoe Thornber #define DM_MSG_PREFIX "cache"
26c6b4fcbaSJoe Thornber 
27c6b4fcbaSJoe Thornber DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(cache_copy_throttle,
28c6b4fcbaSJoe Thornber 	"A percentage of time allocated for copying to and/or from cache");
29c6b4fcbaSJoe Thornber 
30c6b4fcbaSJoe Thornber /*----------------------------------------------------------------*/
31c6b4fcbaSJoe Thornber 
32b29d4986SJoe Thornber /*
33b29d4986SJoe Thornber  * Glossary:
34b29d4986SJoe Thornber  *
35b29d4986SJoe Thornber  * oblock: index of an origin block
36b29d4986SJoe Thornber  * cblock: index of a cache block
37b29d4986SJoe Thornber  * promotion: movement of a block from origin to cache
38b29d4986SJoe Thornber  * demotion: movement of a block from cache to origin
39b29d4986SJoe Thornber  * migration: movement of a block between the origin and cache device,
40b29d4986SJoe Thornber  *	      either direction
41b29d4986SJoe Thornber  */
42b29d4986SJoe Thornber 
43b29d4986SJoe Thornber /*----------------------------------------------------------------*/
4477289d32SJoe Thornber 
45c6b4fcbaSJoe Thornber /*
46b29d4986SJoe Thornber  * Represents a chunk of future work.  'input' allows continuations to pass
47b29d4986SJoe Thornber  * values between themselves, typically error values.
48c6b4fcbaSJoe Thornber  */
49b29d4986SJoe Thornber struct continuation {
50b29d4986SJoe Thornber 	struct work_struct ws;
514e4cbee9SChristoph Hellwig 	blk_status_t input;
52b29d4986SJoe Thornber };
53b29d4986SJoe Thornber 
init_continuation(struct continuation * k,void (* fn)(struct work_struct *))54b29d4986SJoe Thornber static inline void init_continuation(struct continuation *k,
55b29d4986SJoe Thornber 				     void (*fn)(struct work_struct *))
56b29d4986SJoe Thornber {
57b29d4986SJoe Thornber 	INIT_WORK(&k->ws, fn);
58b29d4986SJoe Thornber 	k->input = 0;
59b29d4986SJoe Thornber }
60b29d4986SJoe Thornber 
queue_continuation(struct workqueue_struct * wq,struct continuation * k)61b29d4986SJoe Thornber static inline void queue_continuation(struct workqueue_struct *wq,
62b29d4986SJoe Thornber 				      struct continuation *k)
63b29d4986SJoe Thornber {
64b29d4986SJoe Thornber 	queue_work(wq, &k->ws);
65b29d4986SJoe Thornber }
66c6b4fcbaSJoe Thornber 
67c6b4fcbaSJoe Thornber /*----------------------------------------------------------------*/
68c6b4fcbaSJoe Thornber 
69c9d28d5dSJoe Thornber /*
70b29d4986SJoe Thornber  * The batcher collects together pieces of work that need a particular
71b29d4986SJoe Thornber  * operation to occur before they can proceed (typically a commit).
72b29d4986SJoe Thornber  */
73b29d4986SJoe Thornber struct batcher {
74b29d4986SJoe Thornber 	/*
75b29d4986SJoe Thornber 	 * The operation that everyone is waiting for.
76b29d4986SJoe Thornber 	 */
774e4cbee9SChristoph Hellwig 	blk_status_t (*commit_op)(void *context);
78b29d4986SJoe Thornber 	void *commit_context;
79b29d4986SJoe Thornber 
80b29d4986SJoe Thornber 	/*
81b29d4986SJoe Thornber 	 * This is how bios should be issued once the commit op is complete
82b29d4986SJoe Thornber 	 * (accounted_request).
83b29d4986SJoe Thornber 	 */
84b29d4986SJoe Thornber 	void (*issue_op)(struct bio *bio, void *context);
85b29d4986SJoe Thornber 	void *issue_context;
86b29d4986SJoe Thornber 
87b29d4986SJoe Thornber 	/*
88b29d4986SJoe Thornber 	 * Queued work gets put on here after commit.
89b29d4986SJoe Thornber 	 */
90b29d4986SJoe Thornber 	struct workqueue_struct *wq;
91b29d4986SJoe Thornber 
92b29d4986SJoe Thornber 	spinlock_t lock;
93b29d4986SJoe Thornber 	struct list_head work_items;
94b29d4986SJoe Thornber 	struct bio_list bios;
95b29d4986SJoe Thornber 	struct work_struct commit_work;
96b29d4986SJoe Thornber 
97b29d4986SJoe Thornber 	bool commit_scheduled;
98b29d4986SJoe Thornber };
99b29d4986SJoe Thornber 
__commit(struct work_struct * _ws)100b29d4986SJoe Thornber static void __commit(struct work_struct *_ws)
101b29d4986SJoe Thornber {
102b29d4986SJoe Thornber 	struct batcher *b = container_of(_ws, struct batcher, commit_work);
1034e4cbee9SChristoph Hellwig 	blk_status_t r;
104b29d4986SJoe Thornber 	struct list_head work_items;
105b29d4986SJoe Thornber 	struct work_struct *ws, *tmp;
106b29d4986SJoe Thornber 	struct continuation *k;
107b29d4986SJoe Thornber 	struct bio *bio;
108b29d4986SJoe Thornber 	struct bio_list bios;
109b29d4986SJoe Thornber 
110b29d4986SJoe Thornber 	INIT_LIST_HEAD(&work_items);
111b29d4986SJoe Thornber 	bio_list_init(&bios);
112b29d4986SJoe Thornber 
113b29d4986SJoe Thornber 	/*
114b29d4986SJoe Thornber 	 * We have to grab these before the commit_op to avoid a race
115b29d4986SJoe Thornber 	 * condition.
116b29d4986SJoe Thornber 	 */
11726b924b9SMikulas Patocka 	spin_lock_irq(&b->lock);
118b29d4986SJoe Thornber 	list_splice_init(&b->work_items, &work_items);
119b29d4986SJoe Thornber 	bio_list_merge(&bios, &b->bios);
120b29d4986SJoe Thornber 	bio_list_init(&b->bios);
121b29d4986SJoe Thornber 	b->commit_scheduled = false;
12226b924b9SMikulas Patocka 	spin_unlock_irq(&b->lock);
123b29d4986SJoe Thornber 
124b29d4986SJoe Thornber 	r = b->commit_op(b->commit_context);
125b29d4986SJoe Thornber 
126b29d4986SJoe Thornber 	list_for_each_entry_safe(ws, tmp, &work_items, entry) {
127b29d4986SJoe Thornber 		k = container_of(ws, struct continuation, ws);
128b29d4986SJoe Thornber 		k->input = r;
129b29d4986SJoe Thornber 		INIT_LIST_HEAD(&ws->entry); /* to avoid a WARN_ON */
130b29d4986SJoe Thornber 		queue_work(b->wq, ws);
131b29d4986SJoe Thornber 	}
132b29d4986SJoe Thornber 
133b29d4986SJoe Thornber 	while ((bio = bio_list_pop(&bios))) {
134b29d4986SJoe Thornber 		if (r) {
1354e4cbee9SChristoph Hellwig 			bio->bi_status = r;
136b29d4986SJoe Thornber 			bio_endio(bio);
137b29d4986SJoe Thornber 		} else
138b29d4986SJoe Thornber 			b->issue_op(bio, b->issue_context);
139b29d4986SJoe Thornber 	}
140b29d4986SJoe Thornber }
141b29d4986SJoe Thornber 
batcher_init(struct batcher * b,blk_status_t (* commit_op)(void *),void * commit_context,void (* issue_op)(struct bio * bio,void *),void * issue_context,struct workqueue_struct * wq)142b29d4986SJoe Thornber static void batcher_init(struct batcher *b,
1434e4cbee9SChristoph Hellwig 			 blk_status_t (*commit_op)(void *),
144b29d4986SJoe Thornber 			 void *commit_context,
145b29d4986SJoe Thornber 			 void (*issue_op)(struct bio *bio, void *),
146b29d4986SJoe Thornber 			 void *issue_context,
147b29d4986SJoe Thornber 			 struct workqueue_struct *wq)
148b29d4986SJoe Thornber {
149b29d4986SJoe Thornber 	b->commit_op = commit_op;
150b29d4986SJoe Thornber 	b->commit_context = commit_context;
151b29d4986SJoe Thornber 	b->issue_op = issue_op;
152b29d4986SJoe Thornber 	b->issue_context = issue_context;
153b29d4986SJoe Thornber 	b->wq = wq;
154b29d4986SJoe Thornber 
155b29d4986SJoe Thornber 	spin_lock_init(&b->lock);
156b29d4986SJoe Thornber 	INIT_LIST_HEAD(&b->work_items);
157b29d4986SJoe Thornber 	bio_list_init(&b->bios);
158b29d4986SJoe Thornber 	INIT_WORK(&b->commit_work, __commit);
159b29d4986SJoe Thornber 	b->commit_scheduled = false;
160b29d4986SJoe Thornber }
161b29d4986SJoe Thornber 
async_commit(struct batcher * b)162b29d4986SJoe Thornber static void async_commit(struct batcher *b)
163b29d4986SJoe Thornber {
164b29d4986SJoe Thornber 	queue_work(b->wq, &b->commit_work);
165b29d4986SJoe Thornber }
166b29d4986SJoe Thornber 
continue_after_commit(struct batcher * b,struct continuation * k)167b29d4986SJoe Thornber static void continue_after_commit(struct batcher *b, struct continuation *k)
168b29d4986SJoe Thornber {
169b29d4986SJoe Thornber 	bool commit_scheduled;
170b29d4986SJoe Thornber 
17126b924b9SMikulas Patocka 	spin_lock_irq(&b->lock);
172b29d4986SJoe Thornber 	commit_scheduled = b->commit_scheduled;
173b29d4986SJoe Thornber 	list_add_tail(&k->ws.entry, &b->work_items);
17426b924b9SMikulas Patocka 	spin_unlock_irq(&b->lock);
175b29d4986SJoe Thornber 
176b29d4986SJoe Thornber 	if (commit_scheduled)
177b29d4986SJoe Thornber 		async_commit(b);
178b29d4986SJoe Thornber }
179b29d4986SJoe Thornber 
180b29d4986SJoe Thornber /*
181b29d4986SJoe Thornber  * Bios are errored if commit failed.
182b29d4986SJoe Thornber  */
issue_after_commit(struct batcher * b,struct bio * bio)183b29d4986SJoe Thornber static void issue_after_commit(struct batcher *b, struct bio *bio)
184b29d4986SJoe Thornber {
185b29d4986SJoe Thornber 	bool commit_scheduled;
186b29d4986SJoe Thornber 
18726b924b9SMikulas Patocka 	spin_lock_irq(&b->lock);
188b29d4986SJoe Thornber 	commit_scheduled = b->commit_scheduled;
189b29d4986SJoe Thornber 	bio_list_add(&b->bios, bio);
19026b924b9SMikulas Patocka 	spin_unlock_irq(&b->lock);
191b29d4986SJoe Thornber 
192b29d4986SJoe Thornber 	if (commit_scheduled)
193b29d4986SJoe Thornber 		async_commit(b);
194b29d4986SJoe Thornber }
195b29d4986SJoe Thornber 
196b29d4986SJoe Thornber /*
197b29d4986SJoe Thornber  * Call this if some urgent work is waiting for the commit to complete.
198b29d4986SJoe Thornber  */
schedule_commit(struct batcher * b)199b29d4986SJoe Thornber static void schedule_commit(struct batcher *b)
200b29d4986SJoe Thornber {
201b29d4986SJoe Thornber 	bool immediate;
202b29d4986SJoe Thornber 
20326b924b9SMikulas Patocka 	spin_lock_irq(&b->lock);
204b29d4986SJoe Thornber 	immediate = !list_empty(&b->work_items) || !bio_list_empty(&b->bios);
205b29d4986SJoe Thornber 	b->commit_scheduled = true;
20626b924b9SMikulas Patocka 	spin_unlock_irq(&b->lock);
207b29d4986SJoe Thornber 
208b29d4986SJoe Thornber 	if (immediate)
209b29d4986SJoe Thornber 		async_commit(b);
210b29d4986SJoe Thornber }
211b29d4986SJoe Thornber 
212b29d4986SJoe Thornber /*
213c9d28d5dSJoe Thornber  * There are a couple of places where we let a bio run, but want to do some
214c9d28d5dSJoe Thornber  * work before calling its endio function.  We do this by temporarily
215c9d28d5dSJoe Thornber  * changing the endio fn.
216c9d28d5dSJoe Thornber  */
217c9d28d5dSJoe Thornber struct dm_hook_info {
218c9d28d5dSJoe Thornber 	bio_end_io_t *bi_end_io;
219c9d28d5dSJoe Thornber };
220c9d28d5dSJoe Thornber 
dm_hook_bio(struct dm_hook_info * h,struct bio * bio,bio_end_io_t * bi_end_io,void * bi_private)221c9d28d5dSJoe Thornber static void dm_hook_bio(struct dm_hook_info *h, struct bio *bio,
222c9d28d5dSJoe Thornber 			bio_end_io_t *bi_end_io, void *bi_private)
223c9d28d5dSJoe Thornber {
224c9d28d5dSJoe Thornber 	h->bi_end_io = bio->bi_end_io;
225c9d28d5dSJoe Thornber 
226c9d28d5dSJoe Thornber 	bio->bi_end_io = bi_end_io;
227c9d28d5dSJoe Thornber 	bio->bi_private = bi_private;
228c9d28d5dSJoe Thornber }
229c9d28d5dSJoe Thornber 
dm_unhook_bio(struct dm_hook_info * h,struct bio * bio)230c9d28d5dSJoe Thornber static void dm_unhook_bio(struct dm_hook_info *h, struct bio *bio)
231c9d28d5dSJoe Thornber {
232c9d28d5dSJoe Thornber 	bio->bi_end_io = h->bi_end_io;
233c9d28d5dSJoe Thornber }
234c9d28d5dSJoe Thornber 
235c9d28d5dSJoe Thornber /*----------------------------------------------------------------*/
236c9d28d5dSJoe Thornber 
237c6b4fcbaSJoe Thornber #define MIGRATION_POOL_SIZE 128
238c6b4fcbaSJoe Thornber #define COMMIT_PERIOD HZ
239c6b4fcbaSJoe Thornber #define MIGRATION_COUNT_WINDOW 10
240c6b4fcbaSJoe Thornber 
241c6b4fcbaSJoe Thornber /*
24205473044SMike Snitzer  * The block size of the device holding cache data must be
24305473044SMike Snitzer  * between 32KB and 1GB.
244c6b4fcbaSJoe Thornber  */
245c6b4fcbaSJoe Thornber #define DATA_DEV_BLOCK_SIZE_MIN_SECTORS (32 * 1024 >> SECTOR_SHIFT)
24605473044SMike Snitzer #define DATA_DEV_BLOCK_SIZE_MAX_SECTORS (1024 * 1024 * 1024 >> SECTOR_SHIFT)
247c6b4fcbaSJoe Thornber 
2482ee57d58SJoe Thornber enum cache_metadata_mode {
249c6b4fcbaSJoe Thornber 	CM_WRITE,		/* metadata may be changed */
250c6b4fcbaSJoe Thornber 	CM_READ_ONLY,		/* metadata may not be changed */
251028ae9f7SJoe Thornber 	CM_FAIL
252c6b4fcbaSJoe Thornber };
253c6b4fcbaSJoe Thornber 
2542ee57d58SJoe Thornber enum cache_io_mode {
2552ee57d58SJoe Thornber 	/*
2562ee57d58SJoe Thornber 	 * Data is written to cached blocks only.  These blocks are marked
2572ee57d58SJoe Thornber 	 * dirty.  If you lose the cache device you will lose data.
2582ee57d58SJoe Thornber 	 * Potential performance increase for both reads and writes.
2592ee57d58SJoe Thornber 	 */
2602ee57d58SJoe Thornber 	CM_IO_WRITEBACK,
2612ee57d58SJoe Thornber 
2622ee57d58SJoe Thornber 	/*
2632ee57d58SJoe Thornber 	 * Data is written to both cache and origin.  Blocks are never
2642ee57d58SJoe Thornber 	 * dirty.  Potential performance benfit for reads only.
2652ee57d58SJoe Thornber 	 */
2662ee57d58SJoe Thornber 	CM_IO_WRITETHROUGH,
2672ee57d58SJoe Thornber 
2682ee57d58SJoe Thornber 	/*
2692ee57d58SJoe Thornber 	 * A degraded mode useful for various cache coherency situations
2702ee57d58SJoe Thornber 	 * (eg, rolling back snapshots).  Reads and writes always go to the
2712ee57d58SJoe Thornber 	 * origin.  If a write goes to a cached oblock, then the cache
2722ee57d58SJoe Thornber 	 * block is invalidated.
2732ee57d58SJoe Thornber 	 */
2742ee57d58SJoe Thornber 	CM_IO_PASSTHROUGH
2752ee57d58SJoe Thornber };
2762ee57d58SJoe Thornber 
277c6b4fcbaSJoe Thornber struct cache_features {
2782ee57d58SJoe Thornber 	enum cache_metadata_mode mode;
2792ee57d58SJoe Thornber 	enum cache_io_mode io_mode;
28086a3238cSHeinz Mauelshagen 	unsigned int metadata_version;
281de7180ffSMike Snitzer 	bool discard_passdown:1;
282c6b4fcbaSJoe Thornber };
283c6b4fcbaSJoe Thornber 
284c6b4fcbaSJoe Thornber struct cache_stats {
285c6b4fcbaSJoe Thornber 	atomic_t read_hit;
286c6b4fcbaSJoe Thornber 	atomic_t read_miss;
287c6b4fcbaSJoe Thornber 	atomic_t write_hit;
288c6b4fcbaSJoe Thornber 	atomic_t write_miss;
289c6b4fcbaSJoe Thornber 	atomic_t demotion;
290c6b4fcbaSJoe Thornber 	atomic_t promotion;
291b29d4986SJoe Thornber 	atomic_t writeback;
292c6b4fcbaSJoe Thornber 	atomic_t copies_avoided;
293c6b4fcbaSJoe Thornber 	atomic_t cache_cell_clash;
294c6b4fcbaSJoe Thornber 	atomic_t commit_count;
295c6b4fcbaSJoe Thornber 	atomic_t discard_count;
296c6b4fcbaSJoe Thornber };
297c6b4fcbaSJoe Thornber 
298c6b4fcbaSJoe Thornber struct cache {
299c6b4fcbaSJoe Thornber 	struct dm_target *ti;
30072d711c8SMike Snitzer 	spinlock_t lock;
30172d711c8SMike Snitzer 
30272d711c8SMike Snitzer 	/*
30372d711c8SMike Snitzer 	 * Fields for converting from sectors to blocks.
30472d711c8SMike Snitzer 	 */
30572d711c8SMike Snitzer 	int sectors_per_block_shift;
30672d711c8SMike Snitzer 	sector_t sectors_per_block;
307c6b4fcbaSJoe Thornber 
308c9ec5d7cSMike Snitzer 	struct dm_cache_metadata *cmd;
309c9ec5d7cSMike Snitzer 
310c6b4fcbaSJoe Thornber 	/*
311c6b4fcbaSJoe Thornber 	 * Metadata is written to this device.
312c6b4fcbaSJoe Thornber 	 */
313c6b4fcbaSJoe Thornber 	struct dm_dev *metadata_dev;
314c6b4fcbaSJoe Thornber 
315c6b4fcbaSJoe Thornber 	/*
316c6b4fcbaSJoe Thornber 	 * The slower of the two data devices.  Typically a spindle.
317c6b4fcbaSJoe Thornber 	 */
318c6b4fcbaSJoe Thornber 	struct dm_dev *origin_dev;
319c6b4fcbaSJoe Thornber 
320c6b4fcbaSJoe Thornber 	/*
321c6b4fcbaSJoe Thornber 	 * The faster of the two data devices.  Typically an SSD.
322c6b4fcbaSJoe Thornber 	 */
323c6b4fcbaSJoe Thornber 	struct dm_dev *cache_dev;
324c6b4fcbaSJoe Thornber 
325c6b4fcbaSJoe Thornber 	/*
326c6b4fcbaSJoe Thornber 	 * Size of the origin device in _complete_ blocks and native sectors.
327c6b4fcbaSJoe Thornber 	 */
328c6b4fcbaSJoe Thornber 	dm_oblock_t origin_blocks;
329c6b4fcbaSJoe Thornber 	sector_t origin_sectors;
330c6b4fcbaSJoe Thornber 
331c6b4fcbaSJoe Thornber 	/*
332c6b4fcbaSJoe Thornber 	 * Size of the cache device in blocks.
333c6b4fcbaSJoe Thornber 	 */
334c6b4fcbaSJoe Thornber 	dm_cblock_t cache_size;
335c6b4fcbaSJoe Thornber 
336c6b4fcbaSJoe Thornber 	/*
33772d711c8SMike Snitzer 	 * Invalidation fields.
338c6b4fcbaSJoe Thornber 	 */
33972d711c8SMike Snitzer 	spinlock_t invalidation_lock;
34072d711c8SMike Snitzer 	struct list_head invalidation_requests;
341c6b4fcbaSJoe Thornber 
342c6b4fcbaSJoe Thornber 	sector_t migration_threshold;
343c6b4fcbaSJoe Thornber 	wait_queue_head_t migration_wait;
344a59db676SJoe Thornber 	atomic_t nr_allocated_migrations;
345a59db676SJoe Thornber 
346a59db676SJoe Thornber 	/*
347a59db676SJoe Thornber 	 * The number of in flight migrations that are performing
348a59db676SJoe Thornber 	 * background io. eg, promotion, writeback.
349a59db676SJoe Thornber 	 */
350a59db676SJoe Thornber 	atomic_t nr_io_migrations;
351c6b4fcbaSJoe Thornber 
35272d711c8SMike Snitzer 	struct bio_list deferred_bios;
35372d711c8SMike Snitzer 
354b29d4986SJoe Thornber 	struct rw_semaphore quiesce_lock;
35566cb1910SJoe Thornber 
356c6b4fcbaSJoe Thornber 	/*
357c6b4fcbaSJoe Thornber 	 * origin_blocks entries, discarded if set.
358c6b4fcbaSJoe Thornber 	 */
3591bad9bc4SJoe Thornber 	dm_dblock_t discard_nr_blocks;
360c6b4fcbaSJoe Thornber 	unsigned long *discard_bitset;
36108b18451SJoe Thornber 	uint32_t discard_block_size; /* a power of 2 times sectors per block */
362c9ec5d7cSMike Snitzer 
363c9ec5d7cSMike Snitzer 	/*
364c9ec5d7cSMike Snitzer 	 * Rather than reconstructing the table line for the status we just
365c9ec5d7cSMike Snitzer 	 * save it and regurgitate.
366c9ec5d7cSMike Snitzer 	 */
36786a3238cSHeinz Mauelshagen 	unsigned int nr_ctr_args;
368c9ec5d7cSMike Snitzer 	const char **ctr_args;
369c6b4fcbaSJoe Thornber 
370c6b4fcbaSJoe Thornber 	struct dm_kcopyd_client *copier;
371b29d4986SJoe Thornber 	struct work_struct deferred_bio_worker;
372b29d4986SJoe Thornber 	struct work_struct migration_worker;
37372d711c8SMike Snitzer 	struct workqueue_struct *wq;
374c6b4fcbaSJoe Thornber 	struct delayed_work waker;
375b29d4986SJoe Thornber 	struct dm_bio_prison_v2 *prison;
376c6b4fcbaSJoe Thornber 
37772d711c8SMike Snitzer 	/*
37872d711c8SMike Snitzer 	 * cache_size entries, dirty if set
37972d711c8SMike Snitzer 	 */
38072d711c8SMike Snitzer 	unsigned long *dirty_bitset;
38172d711c8SMike Snitzer 	atomic_t nr_dirty;
382c6b4fcbaSJoe Thornber 
38386a3238cSHeinz Mauelshagen 	unsigned int policy_nr_args;
38472d711c8SMike Snitzer 	struct dm_cache_policy *policy;
385c6b4fcbaSJoe Thornber 
386c6b4fcbaSJoe Thornber 	/*
387c9ec5d7cSMike Snitzer 	 * Cache features such as write-through.
388c6b4fcbaSJoe Thornber 	 */
389c9ec5d7cSMike Snitzer 	struct cache_features features;
390c9ec5d7cSMike Snitzer 
391c9ec5d7cSMike Snitzer 	struct cache_stats stats;
39265790ff9SJoe Thornber 
39372d711c8SMike Snitzer 	bool need_tick_bio:1;
39472d711c8SMike Snitzer 	bool sized:1;
39572d711c8SMike Snitzer 	bool invalidate:1;
39672d711c8SMike Snitzer 	bool commit_requested:1;
39772d711c8SMike Snitzer 	bool loaded_mappings:1;
39872d711c8SMike Snitzer 	bool loaded_discards:1;
39972d711c8SMike Snitzer 
40072d711c8SMike Snitzer 	struct rw_semaphore background_work_lock;
40172d711c8SMike Snitzer 
40272d711c8SMike Snitzer 	struct batcher committer;
40372d711c8SMike Snitzer 	struct work_struct commit_ws;
404066dbaa3SJoe Thornber 
405dc4fa29fSMike Snitzer 	struct dm_io_tracker tracker;
406b29d4986SJoe Thornber 
40772d711c8SMike Snitzer 	mempool_t migration_pool;
408b29d4986SJoe Thornber 
40972d711c8SMike Snitzer 	struct bio_set bs;
410c6b4fcbaSJoe Thornber };
411c6b4fcbaSJoe Thornber 
412c6b4fcbaSJoe Thornber struct per_bio_data {
413c6b4fcbaSJoe Thornber 	bool tick:1;
41486a3238cSHeinz Mauelshagen 	unsigned int req_nr:2;
415b29d4986SJoe Thornber 	struct dm_bio_prison_cell_v2 *cell;
416c6eda5e8SMike Snitzer 	struct dm_hook_info hook_info;
417066dbaa3SJoe Thornber 	sector_t len;
418c6b4fcbaSJoe Thornber };
419c6b4fcbaSJoe Thornber 
420c6b4fcbaSJoe Thornber struct dm_cache_migration {
421b29d4986SJoe Thornber 	struct continuation k;
422c6b4fcbaSJoe Thornber 	struct cache *cache;
423c6b4fcbaSJoe Thornber 
424b29d4986SJoe Thornber 	struct policy_work *op;
425b29d4986SJoe Thornber 	struct bio *overwrite_bio;
426b29d4986SJoe Thornber 	struct dm_bio_prison_cell_v2 *cell;
427c6b4fcbaSJoe Thornber 
428b29d4986SJoe Thornber 	dm_cblock_t invalidate_cblock;
429b29d4986SJoe Thornber 	dm_oblock_t invalidate_oblock;
430c6b4fcbaSJoe Thornber };
431c6b4fcbaSJoe Thornber 
432b29d4986SJoe Thornber /*----------------------------------------------------------------*/
433c6b4fcbaSJoe Thornber 
writethrough_mode(struct cache * cache)4348e3c3827SMike Snitzer static bool writethrough_mode(struct cache *cache)
435c6b4fcbaSJoe Thornber {
4368e3c3827SMike Snitzer 	return cache->features.io_mode == CM_IO_WRITETHROUGH;
437b29d4986SJoe Thornber }
438b29d4986SJoe Thornber 
writeback_mode(struct cache * cache)4398e3c3827SMike Snitzer static bool writeback_mode(struct cache *cache)
440b29d4986SJoe Thornber {
4418e3c3827SMike Snitzer 	return cache->features.io_mode == CM_IO_WRITEBACK;
442b29d4986SJoe Thornber }
443b29d4986SJoe Thornber 
passthrough_mode(struct cache * cache)4448e3c3827SMike Snitzer static inline bool passthrough_mode(struct cache *cache)
445b29d4986SJoe Thornber {
4468e3c3827SMike Snitzer 	return unlikely(cache->features.io_mode == CM_IO_PASSTHROUGH);
447c6b4fcbaSJoe Thornber }
448c6b4fcbaSJoe Thornber 
449c6b4fcbaSJoe Thornber /*----------------------------------------------------------------*/
450c6b4fcbaSJoe Thornber 
wake_deferred_bio_worker(struct cache * cache)451b29d4986SJoe Thornber static void wake_deferred_bio_worker(struct cache *cache)
452c6b4fcbaSJoe Thornber {
453b29d4986SJoe Thornber 	queue_work(cache->wq, &cache->deferred_bio_worker);
454c6b4fcbaSJoe Thornber }
455c6b4fcbaSJoe Thornber 
wake_migration_worker(struct cache * cache)456b29d4986SJoe Thornber static void wake_migration_worker(struct cache *cache)
457b29d4986SJoe Thornber {
4588e3c3827SMike Snitzer 	if (passthrough_mode(cache))
459b29d4986SJoe Thornber 		return;
460b29d4986SJoe Thornber 
461b29d4986SJoe Thornber 	queue_work(cache->wq, &cache->migration_worker);
462b29d4986SJoe Thornber }
463b29d4986SJoe Thornber 
464b29d4986SJoe Thornber /*----------------------------------------------------------------*/
465b29d4986SJoe Thornber 
alloc_prison_cell(struct cache * cache)466b29d4986SJoe Thornber static struct dm_bio_prison_cell_v2 *alloc_prison_cell(struct cache *cache)
467b29d4986SJoe Thornber {
46813bd677aSMikulas Patocka 	return dm_bio_prison_alloc_cell_v2(cache->prison, GFP_NOIO);
469b29d4986SJoe Thornber }
470b29d4986SJoe Thornber 
free_prison_cell(struct cache * cache,struct dm_bio_prison_cell_v2 * cell)471b29d4986SJoe Thornber static void free_prison_cell(struct cache *cache, struct dm_bio_prison_cell_v2 *cell)
472b29d4986SJoe Thornber {
473b29d4986SJoe Thornber 	dm_bio_prison_free_cell_v2(cache->prison, cell);
474c6b4fcbaSJoe Thornber }
475c6b4fcbaSJoe Thornber 
alloc_migration(struct cache * cache)476a59db676SJoe Thornber static struct dm_cache_migration *alloc_migration(struct cache *cache)
477a59db676SJoe Thornber {
478a59db676SJoe Thornber 	struct dm_cache_migration *mg;
479a59db676SJoe Thornber 
48013bd677aSMikulas Patocka 	mg = mempool_alloc(&cache->migration_pool, GFP_NOIO);
481ef7afb36SMike Snitzer 
482ef7afb36SMike Snitzer 	memset(mg, 0, sizeof(*mg));
483ef7afb36SMike Snitzer 
484a59db676SJoe Thornber 	mg->cache = cache;
485ef7afb36SMike Snitzer 	atomic_inc(&cache->nr_allocated_migrations);
486a59db676SJoe Thornber 
487a59db676SJoe Thornber 	return mg;
488a59db676SJoe Thornber }
489a59db676SJoe Thornber 
free_migration(struct dm_cache_migration * mg)490a59db676SJoe Thornber static void free_migration(struct dm_cache_migration *mg)
491a59db676SJoe Thornber {
49288bf5184SJoe Thornber 	struct cache *cache = mg->cache;
493a59db676SJoe Thornber 
49488bf5184SJoe Thornber 	if (atomic_dec_and_test(&cache->nr_allocated_migrations))
49588bf5184SJoe Thornber 		wake_up(&cache->migration_wait);
49688bf5184SJoe Thornber 
4976f1c819cSKent Overstreet 	mempool_free(mg, &cache->migration_pool);
498a59db676SJoe Thornber }
499a59db676SJoe Thornber 
500c6b4fcbaSJoe Thornber /*----------------------------------------------------------------*/
501c6b4fcbaSJoe Thornber 
oblock_succ(dm_oblock_t b)502b29d4986SJoe Thornber static inline dm_oblock_t oblock_succ(dm_oblock_t b)
503b29d4986SJoe Thornber {
504b29d4986SJoe Thornber 	return to_oblock(from_oblock(b) + 1ull);
505b29d4986SJoe Thornber }
506b29d4986SJoe Thornber 
build_key(dm_oblock_t begin,dm_oblock_t end,struct dm_cell_key_v2 * key)507b29d4986SJoe Thornber static void build_key(dm_oblock_t begin, dm_oblock_t end, struct dm_cell_key_v2 *key)
508c6b4fcbaSJoe Thornber {
509c6b4fcbaSJoe Thornber 	key->virtual = 0;
510c6b4fcbaSJoe Thornber 	key->dev = 0;
5117ae34e77SJoe Thornber 	key->block_begin = from_oblock(begin);
5127ae34e77SJoe Thornber 	key->block_end = from_oblock(end);
513c6b4fcbaSJoe Thornber }
514c6b4fcbaSJoe Thornber 
515c6b4fcbaSJoe Thornber /*
516b29d4986SJoe Thornber  * We have two lock levels.  Level 0, which is used to prevent WRITEs, and
517b29d4986SJoe Thornber  * level 1 which prevents *both* READs and WRITEs.
518c6b4fcbaSJoe Thornber  */
519b29d4986SJoe Thornber #define WRITE_LOCK_LEVEL 0
520b29d4986SJoe Thornber #define READ_WRITE_LOCK_LEVEL 1
521c6b4fcbaSJoe Thornber 
lock_level(struct bio * bio)52286a3238cSHeinz Mauelshagen static unsigned int lock_level(struct bio *bio)
523c6b4fcbaSJoe Thornber {
524b29d4986SJoe Thornber 	return bio_data_dir(bio) == WRITE ?
525b29d4986SJoe Thornber 		WRITE_LOCK_LEVEL :
526b29d4986SJoe Thornber 		READ_WRITE_LOCK_LEVEL;
527b29d4986SJoe Thornber }
528c6b4fcbaSJoe Thornber 
529a4a82ce3SHeinz Mauelshagen /*
530a4a82ce3SHeinz Mauelshagen  *--------------------------------------------------------------
531b29d4986SJoe Thornber  * Per bio data
532a4a82ce3SHeinz Mauelshagen  *--------------------------------------------------------------
533a4a82ce3SHeinz Mauelshagen  */
534c6b4fcbaSJoe Thornber 
get_per_bio_data(struct bio * bio)535693b960eSMike Snitzer static struct per_bio_data *get_per_bio_data(struct bio *bio)
536b29d4986SJoe Thornber {
537693b960eSMike Snitzer 	struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
5380ef0b471SHeinz Mauelshagen 
539b29d4986SJoe Thornber 	BUG_ON(!pb);
540b29d4986SJoe Thornber 	return pb;
541b29d4986SJoe Thornber }
542b29d4986SJoe Thornber 
init_per_bio_data(struct bio * bio)543693b960eSMike Snitzer static struct per_bio_data *init_per_bio_data(struct bio *bio)
544b29d4986SJoe Thornber {
545693b960eSMike Snitzer 	struct per_bio_data *pb = get_per_bio_data(bio);
546b29d4986SJoe Thornber 
547b29d4986SJoe Thornber 	pb->tick = false;
548b29d4986SJoe Thornber 	pb->req_nr = dm_bio_get_target_bio_nr(bio);
549b29d4986SJoe Thornber 	pb->cell = NULL;
550b29d4986SJoe Thornber 	pb->len = 0;
551b29d4986SJoe Thornber 
552b29d4986SJoe Thornber 	return pb;
553b29d4986SJoe Thornber }
554b29d4986SJoe Thornber 
555b29d4986SJoe Thornber /*----------------------------------------------------------------*/
556b29d4986SJoe Thornber 
defer_bio(struct cache * cache,struct bio * bio)557b29d4986SJoe Thornber static void defer_bio(struct cache *cache, struct bio *bio)
558b29d4986SJoe Thornber {
55926b924b9SMikulas Patocka 	spin_lock_irq(&cache->lock);
560b29d4986SJoe Thornber 	bio_list_add(&cache->deferred_bios, bio);
56126b924b9SMikulas Patocka 	spin_unlock_irq(&cache->lock);
562b29d4986SJoe Thornber 
563b29d4986SJoe Thornber 	wake_deferred_bio_worker(cache);
564b29d4986SJoe Thornber }
565b29d4986SJoe Thornber 
defer_bios(struct cache * cache,struct bio_list * bios)566b29d4986SJoe Thornber static void defer_bios(struct cache *cache, struct bio_list *bios)
567b29d4986SJoe Thornber {
56826b924b9SMikulas Patocka 	spin_lock_irq(&cache->lock);
569b29d4986SJoe Thornber 	bio_list_merge(&cache->deferred_bios, bios);
570b29d4986SJoe Thornber 	bio_list_init(bios);
57126b924b9SMikulas Patocka 	spin_unlock_irq(&cache->lock);
572b29d4986SJoe Thornber 
573b29d4986SJoe Thornber 	wake_deferred_bio_worker(cache);
574b29d4986SJoe Thornber }
575b29d4986SJoe Thornber 
576b29d4986SJoe Thornber /*----------------------------------------------------------------*/
577b29d4986SJoe Thornber 
bio_detain_shared(struct cache * cache,dm_oblock_t oblock,struct bio * bio)578b29d4986SJoe Thornber static bool bio_detain_shared(struct cache *cache, dm_oblock_t oblock, struct bio *bio)
579b29d4986SJoe Thornber {
580b29d4986SJoe Thornber 	bool r;
581b29d4986SJoe Thornber 	struct per_bio_data *pb;
582b29d4986SJoe Thornber 	struct dm_cell_key_v2 key;
583b29d4986SJoe Thornber 	dm_oblock_t end = to_oblock(from_oblock(oblock) + 1ULL);
584b29d4986SJoe Thornber 	struct dm_bio_prison_cell_v2 *cell_prealloc, *cell;
585b29d4986SJoe Thornber 
586b29d4986SJoe Thornber 	cell_prealloc = alloc_prison_cell(cache); /* FIXME: allow wait if calling from worker */
587b29d4986SJoe Thornber 
588b29d4986SJoe Thornber 	build_key(oblock, end, &key);
589b29d4986SJoe Thornber 	r = dm_cell_get_v2(cache->prison, &key, lock_level(bio), bio, cell_prealloc, &cell);
590b29d4986SJoe Thornber 	if (!r) {
591b29d4986SJoe Thornber 		/*
592b29d4986SJoe Thornber 		 * Failed to get the lock.
593b29d4986SJoe Thornber 		 */
594b29d4986SJoe Thornber 		free_prison_cell(cache, cell_prealloc);
595c6b4fcbaSJoe Thornber 		return r;
596c6b4fcbaSJoe Thornber 	}
597c6b4fcbaSJoe Thornber 
598b29d4986SJoe Thornber 	if (cell != cell_prealloc)
599b29d4986SJoe Thornber 		free_prison_cell(cache, cell_prealloc);
6007ae34e77SJoe Thornber 
601693b960eSMike Snitzer 	pb = get_per_bio_data(bio);
602b29d4986SJoe Thornber 	pb->cell = cell;
603c6b4fcbaSJoe Thornber 
604c6b4fcbaSJoe Thornber 	return r;
605c6b4fcbaSJoe Thornber }
606c6b4fcbaSJoe Thornber 
607c6b4fcbaSJoe Thornber /*----------------------------------------------------------------*/
608c6b4fcbaSJoe Thornber 
is_dirty(struct cache * cache,dm_cblock_t b)609c6b4fcbaSJoe Thornber static bool is_dirty(struct cache *cache, dm_cblock_t b)
610c6b4fcbaSJoe Thornber {
611c6b4fcbaSJoe Thornber 	return test_bit(from_cblock(b), cache->dirty_bitset);
612c6b4fcbaSJoe Thornber }
613c6b4fcbaSJoe Thornber 
set_dirty(struct cache * cache,dm_cblock_t cblock)614b29d4986SJoe Thornber static void set_dirty(struct cache *cache, dm_cblock_t cblock)
615c6b4fcbaSJoe Thornber {
616c6b4fcbaSJoe Thornber 	if (!test_and_set_bit(from_cblock(cblock), cache->dirty_bitset)) {
61744fa816bSAnssi Hannula 		atomic_inc(&cache->nr_dirty);
618b29d4986SJoe Thornber 		policy_set_dirty(cache->policy, cblock);
619c6b4fcbaSJoe Thornber 	}
620c6b4fcbaSJoe Thornber }
621c6b4fcbaSJoe Thornber 
622b29d4986SJoe Thornber /*
623b29d4986SJoe Thornber  * These two are called when setting after migrations to force the policy
624b29d4986SJoe Thornber  * and dirty bitset to be in sync.
625b29d4986SJoe Thornber  */
force_set_dirty(struct cache * cache,dm_cblock_t cblock)626b29d4986SJoe Thornber static void force_set_dirty(struct cache *cache, dm_cblock_t cblock)
627b29d4986SJoe Thornber {
628b29d4986SJoe Thornber 	if (!test_and_set_bit(from_cblock(cblock), cache->dirty_bitset))
629b29d4986SJoe Thornber 		atomic_inc(&cache->nr_dirty);
630b29d4986SJoe Thornber 	policy_set_dirty(cache->policy, cblock);
631b29d4986SJoe Thornber }
632b29d4986SJoe Thornber 
force_clear_dirty(struct cache * cache,dm_cblock_t cblock)633b29d4986SJoe Thornber static void force_clear_dirty(struct cache *cache, dm_cblock_t cblock)
634c6b4fcbaSJoe Thornber {
635c6b4fcbaSJoe Thornber 	if (test_and_clear_bit(from_cblock(cblock), cache->dirty_bitset)) {
63644fa816bSAnssi Hannula 		if (atomic_dec_return(&cache->nr_dirty) == 0)
637c6b4fcbaSJoe Thornber 			dm_table_event(cache->ti->table);
638c6b4fcbaSJoe Thornber 	}
639b29d4986SJoe Thornber 
640b29d4986SJoe Thornber 	policy_clear_dirty(cache->policy, cblock);
641c6b4fcbaSJoe Thornber }
642c6b4fcbaSJoe Thornber 
643c6b4fcbaSJoe Thornber /*----------------------------------------------------------------*/
644aeed1420SJoe Thornber 
block_size_is_power_of_two(struct cache * cache)645c6b4fcbaSJoe Thornber static bool block_size_is_power_of_two(struct cache *cache)
646c6b4fcbaSJoe Thornber {
647c6b4fcbaSJoe Thornber 	return cache->sectors_per_block_shift >= 0;
648c6b4fcbaSJoe Thornber }
649c6b4fcbaSJoe Thornber 
block_div(dm_block_t b,uint32_t n)650414dd67dSJoe Thornber static dm_block_t block_div(dm_block_t b, uint32_t n)
651414dd67dSJoe Thornber {
652414dd67dSJoe Thornber 	do_div(b, n);
653414dd67dSJoe Thornber 
654414dd67dSJoe Thornber 	return b;
655414dd67dSJoe Thornber }
656414dd67dSJoe Thornber 
oblocks_per_dblock(struct cache * cache)6577ae34e77SJoe Thornber static dm_block_t oblocks_per_dblock(struct cache *cache)
6587ae34e77SJoe Thornber {
6597ae34e77SJoe Thornber 	dm_block_t oblocks = cache->discard_block_size;
6607ae34e77SJoe Thornber 
6617ae34e77SJoe Thornber 	if (block_size_is_power_of_two(cache))
6627ae34e77SJoe Thornber 		oblocks >>= cache->sectors_per_block_shift;
6637ae34e77SJoe Thornber 	else
6647ae34e77SJoe Thornber 		oblocks = block_div(oblocks, cache->sectors_per_block);
6657ae34e77SJoe Thornber 
6667ae34e77SJoe Thornber 	return oblocks;
6677ae34e77SJoe Thornber }
6687ae34e77SJoe Thornber 
oblock_to_dblock(struct cache * cache,dm_oblock_t oblock)6691bad9bc4SJoe Thornber static dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock)
6701bad9bc4SJoe Thornber {
6717ae34e77SJoe Thornber 	return to_dblock(block_div(from_oblock(oblock),
6727ae34e77SJoe Thornber 				   oblocks_per_dblock(cache)));
6737ae34e77SJoe Thornber }
6741bad9bc4SJoe Thornber 
set_discard(struct cache * cache,dm_dblock_t b)6751bad9bc4SJoe Thornber static void set_discard(struct cache *cache, dm_dblock_t b)
676c6b4fcbaSJoe Thornber {
6777ae34e77SJoe Thornber 	BUG_ON(from_dblock(b) >= from_dblock(cache->discard_nr_blocks));
678c6b4fcbaSJoe Thornber 	atomic_inc(&cache->stats.discard_count);
679c6b4fcbaSJoe Thornber 
68026b924b9SMikulas Patocka 	spin_lock_irq(&cache->lock);
6811bad9bc4SJoe Thornber 	set_bit(from_dblock(b), cache->discard_bitset);
68226b924b9SMikulas Patocka 	spin_unlock_irq(&cache->lock);
683c6b4fcbaSJoe Thornber }
684c6b4fcbaSJoe Thornber 
clear_discard(struct cache * cache,dm_dblock_t b)6851bad9bc4SJoe Thornber static void clear_discard(struct cache *cache, dm_dblock_t b)
686c6b4fcbaSJoe Thornber {
68726b924b9SMikulas Patocka 	spin_lock_irq(&cache->lock);
6881bad9bc4SJoe Thornber 	clear_bit(from_dblock(b), cache->discard_bitset);
68926b924b9SMikulas Patocka 	spin_unlock_irq(&cache->lock);
690c6b4fcbaSJoe Thornber }
691c6b4fcbaSJoe Thornber 
is_discarded(struct cache * cache,dm_dblock_t b)6921bad9bc4SJoe Thornber static bool is_discarded(struct cache *cache, dm_dblock_t b)
693c6b4fcbaSJoe Thornber {
694c6b4fcbaSJoe Thornber 	int r;
6950ef0b471SHeinz Mauelshagen 
69626b924b9SMikulas Patocka 	spin_lock_irq(&cache->lock);
6971bad9bc4SJoe Thornber 	r = test_bit(from_dblock(b), cache->discard_bitset);
69826b924b9SMikulas Patocka 	spin_unlock_irq(&cache->lock);
699c6b4fcbaSJoe Thornber 
700c6b4fcbaSJoe Thornber 	return r;
701c6b4fcbaSJoe Thornber }
702c6b4fcbaSJoe Thornber 
is_discarded_oblock(struct cache * cache,dm_oblock_t b)703c6b4fcbaSJoe Thornber static bool is_discarded_oblock(struct cache *cache, dm_oblock_t b)
704c6b4fcbaSJoe Thornber {
705c6b4fcbaSJoe Thornber 	int r;
7060ef0b471SHeinz Mauelshagen 
70726b924b9SMikulas Patocka 	spin_lock_irq(&cache->lock);
7081bad9bc4SJoe Thornber 	r = test_bit(from_dblock(oblock_to_dblock(cache, b)),
7091bad9bc4SJoe Thornber 		     cache->discard_bitset);
71026b924b9SMikulas Patocka 	spin_unlock_irq(&cache->lock);
711c6b4fcbaSJoe Thornber 
712c6b4fcbaSJoe Thornber 	return r;
713c6b4fcbaSJoe Thornber }
714c6b4fcbaSJoe Thornber 
715a4a82ce3SHeinz Mauelshagen /*
716a4a82ce3SHeinz Mauelshagen  * -------------------------------------------------------------
717c6b4fcbaSJoe Thornber  * Remapping
718a4a82ce3SHeinz Mauelshagen  *--------------------------------------------------------------
719a4a82ce3SHeinz Mauelshagen  */
remap_to_origin(struct cache * cache,struct bio * bio)720c6b4fcbaSJoe Thornber static void remap_to_origin(struct cache *cache, struct bio *bio)
721c6b4fcbaSJoe Thornber {
72274d46992SChristoph Hellwig 	bio_set_dev(bio, cache->origin_dev->bdev);
723c6b4fcbaSJoe Thornber }
724c6b4fcbaSJoe Thornber 
remap_to_cache(struct cache * cache,struct bio * bio,dm_cblock_t cblock)725c6b4fcbaSJoe Thornber static void remap_to_cache(struct cache *cache, struct bio *bio,
726c6b4fcbaSJoe Thornber 			   dm_cblock_t cblock)
727c6b4fcbaSJoe Thornber {
7284f024f37SKent Overstreet 	sector_t bi_sector = bio->bi_iter.bi_sector;
729e0d849faSHeinz Mauelshagen 	sector_t block = from_cblock(cblock);
730c6b4fcbaSJoe Thornber 
73174d46992SChristoph Hellwig 	bio_set_dev(bio, cache->cache_dev->bdev);
732c6b4fcbaSJoe Thornber 	if (!block_size_is_power_of_two(cache))
7334f024f37SKent Overstreet 		bio->bi_iter.bi_sector =
734e0d849faSHeinz Mauelshagen 			(block * cache->sectors_per_block) +
735c6b4fcbaSJoe Thornber 			sector_div(bi_sector, cache->sectors_per_block);
736c6b4fcbaSJoe Thornber 	else
7374f024f37SKent Overstreet 		bio->bi_iter.bi_sector =
738e0d849faSHeinz Mauelshagen 			(block << cache->sectors_per_block_shift) |
739c6b4fcbaSJoe Thornber 			(bi_sector & (cache->sectors_per_block - 1));
740c6b4fcbaSJoe Thornber }
741c6b4fcbaSJoe Thornber 
check_if_tick_bio_needed(struct cache * cache,struct bio * bio)742c6b4fcbaSJoe Thornber static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio)
743c6b4fcbaSJoe Thornber {
744693b960eSMike Snitzer 	struct per_bio_data *pb;
745c6b4fcbaSJoe Thornber 
74626b924b9SMikulas Patocka 	spin_lock_irq(&cache->lock);
747f73f44ebSChristoph Hellwig 	if (cache->need_tick_bio && !op_is_flush(bio->bi_opf) &&
748e6047149SMike Christie 	    bio_op(bio) != REQ_OP_DISCARD) {
749693b960eSMike Snitzer 		pb = get_per_bio_data(bio);
750c6b4fcbaSJoe Thornber 		pb->tick = true;
751c6b4fcbaSJoe Thornber 		cache->need_tick_bio = false;
752c6b4fcbaSJoe Thornber 	}
75326b924b9SMikulas Patocka 	spin_unlock_irq(&cache->lock);
754c6b4fcbaSJoe Thornber }
755c6b4fcbaSJoe Thornber 
remap_to_origin_clear_discard(struct cache * cache,struct bio * bio,dm_oblock_t oblock)7562df3bae9SMike Snitzer static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio,
7572df3bae9SMike Snitzer 					  dm_oblock_t oblock)
7582df3bae9SMike Snitzer {
7592df3bae9SMike Snitzer 	// FIXME: check_if_tick_bio_needed() is called way too much through this interface
7603c4b455eSChristoph Hellwig 	check_if_tick_bio_needed(cache, bio);
7613c4b455eSChristoph Hellwig 	remap_to_origin(cache, bio);
7623c4b455eSChristoph Hellwig 	if (bio_data_dir(bio) == WRITE)
7633c4b455eSChristoph Hellwig 		clear_discard(cache, oblock_to_dblock(cache, oblock));
7642df3bae9SMike Snitzer }
7652df3bae9SMike Snitzer 
remap_to_cache_dirty(struct cache * cache,struct bio * bio,dm_oblock_t oblock,dm_cblock_t cblock)766c6b4fcbaSJoe Thornber static void remap_to_cache_dirty(struct cache *cache, struct bio *bio,
767c6b4fcbaSJoe Thornber 				 dm_oblock_t oblock, dm_cblock_t cblock)
768c6b4fcbaSJoe Thornber {
769f8e5f01aSJoe Thornber 	check_if_tick_bio_needed(cache, bio);
770c6b4fcbaSJoe Thornber 	remap_to_cache(cache, bio, cblock);
771c6b4fcbaSJoe Thornber 	if (bio_data_dir(bio) == WRITE) {
772b29d4986SJoe Thornber 		set_dirty(cache, cblock);
7731bad9bc4SJoe Thornber 		clear_discard(cache, oblock_to_dblock(cache, oblock));
774c6b4fcbaSJoe Thornber 	}
775c6b4fcbaSJoe Thornber }
776c6b4fcbaSJoe Thornber 
get_bio_block(struct cache * cache,struct bio * bio)777c6b4fcbaSJoe Thornber static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio)
778c6b4fcbaSJoe Thornber {
7794f024f37SKent Overstreet 	sector_t block_nr = bio->bi_iter.bi_sector;
780c6b4fcbaSJoe Thornber 
781c6b4fcbaSJoe Thornber 	if (!block_size_is_power_of_two(cache))
782c6b4fcbaSJoe Thornber 		(void) sector_div(block_nr, cache->sectors_per_block);
783c6b4fcbaSJoe Thornber 	else
784c6b4fcbaSJoe Thornber 		block_nr >>= cache->sectors_per_block_shift;
785c6b4fcbaSJoe Thornber 
786c6b4fcbaSJoe Thornber 	return to_oblock(block_nr);
787c6b4fcbaSJoe Thornber }
788c6b4fcbaSJoe Thornber 
accountable_bio(struct cache * cache,struct bio * bio)789066dbaa3SJoe Thornber static bool accountable_bio(struct cache *cache, struct bio *bio)
790066dbaa3SJoe Thornber {
791701e03e4SJoe Thornber 	return bio_op(bio) != REQ_OP_DISCARD;
792066dbaa3SJoe Thornber }
793066dbaa3SJoe Thornber 
accounted_begin(struct cache * cache,struct bio * bio)794066dbaa3SJoe Thornber static void accounted_begin(struct cache *cache, struct bio *bio)
795066dbaa3SJoe Thornber {
796693b960eSMike Snitzer 	struct per_bio_data *pb;
797066dbaa3SJoe Thornber 
798066dbaa3SJoe Thornber 	if (accountable_bio(cache, bio)) {
799693b960eSMike Snitzer 		pb = get_per_bio_data(bio);
800066dbaa3SJoe Thornber 		pb->len = bio_sectors(bio);
801dc4fa29fSMike Snitzer 		dm_iot_io_begin(&cache->tracker, pb->len);
802066dbaa3SJoe Thornber 	}
803066dbaa3SJoe Thornber }
804066dbaa3SJoe Thornber 
accounted_complete(struct cache * cache,struct bio * bio)805066dbaa3SJoe Thornber static void accounted_complete(struct cache *cache, struct bio *bio)
806066dbaa3SJoe Thornber {
807693b960eSMike Snitzer 	struct per_bio_data *pb = get_per_bio_data(bio);
808066dbaa3SJoe Thornber 
809dc4fa29fSMike Snitzer 	dm_iot_io_end(&cache->tracker, pb->len);
810066dbaa3SJoe Thornber }
811066dbaa3SJoe Thornber 
accounted_request(struct cache * cache,struct bio * bio)812066dbaa3SJoe Thornber static void accounted_request(struct cache *cache, struct bio *bio)
813066dbaa3SJoe Thornber {
814066dbaa3SJoe Thornber 	accounted_begin(cache, bio);
81569596f55SMike Snitzer 	dm_submit_bio_remap(bio, NULL);
816066dbaa3SJoe Thornber }
817066dbaa3SJoe Thornber 
issue_op(struct bio * bio,void * context)818b29d4986SJoe Thornber static void issue_op(struct bio *bio, void *context)
819c6b4fcbaSJoe Thornber {
820b29d4986SJoe Thornber 	struct cache *cache = context;
8210ef0b471SHeinz Mauelshagen 
822066dbaa3SJoe Thornber 	accounted_request(cache, bio);
8238c081b52SJoe Thornber }
8248c081b52SJoe Thornber 
825e2e74d61SJoe Thornber /*
826e2e74d61SJoe Thornber  * When running in writethrough mode we need to send writes to clean blocks
8272df3bae9SMike Snitzer  * to both the cache and origin devices.  Clone the bio and send them in parallel.
828e2e74d61SJoe Thornber  */
remap_to_origin_and_cache(struct cache * cache,struct bio * bio,dm_oblock_t oblock,dm_cblock_t cblock)8292df3bae9SMike Snitzer static void remap_to_origin_and_cache(struct cache *cache, struct bio *bio,
830e2e74d61SJoe Thornber 				      dm_oblock_t oblock, dm_cblock_t cblock)
831e2e74d61SJoe Thornber {
832abfc426dSChristoph Hellwig 	struct bio *origin_bio = bio_alloc_clone(cache->origin_dev->bdev, bio,
833abfc426dSChristoph Hellwig 						 GFP_NOIO, &cache->bs);
834e2e74d61SJoe Thornber 
8352df3bae9SMike Snitzer 	BUG_ON(!origin_bio);
836e2e74d61SJoe Thornber 
8372df3bae9SMike Snitzer 	bio_chain(origin_bio, bio);
8383c4b455eSChristoph Hellwig 
8393c4b455eSChristoph Hellwig 	if (bio_data_dir(origin_bio) == WRITE)
8403c4b455eSChristoph Hellwig 		clear_discard(cache, oblock_to_dblock(cache, oblock));
8412df3bae9SMike Snitzer 	submit_bio(origin_bio);
8422df3bae9SMike Snitzer 
8432df3bae9SMike Snitzer 	remap_to_cache(cache, bio, cblock);
844e2e74d61SJoe Thornber }
845e2e74d61SJoe Thornber 
846a4a82ce3SHeinz Mauelshagen /*
847a4a82ce3SHeinz Mauelshagen  *--------------------------------------------------------------
848028ae9f7SJoe Thornber  * Failure modes
849a4a82ce3SHeinz Mauelshagen  *--------------------------------------------------------------
850a4a82ce3SHeinz Mauelshagen  */
get_cache_mode(struct cache * cache)851028ae9f7SJoe Thornber static enum cache_metadata_mode get_cache_mode(struct cache *cache)
852028ae9f7SJoe Thornber {
853028ae9f7SJoe Thornber 	return cache->features.mode;
854028ae9f7SJoe Thornber }
855028ae9f7SJoe Thornber 
cache_device_name(struct cache * cache)856b61d9509SMike Snitzer static const char *cache_device_name(struct cache *cache)
857b61d9509SMike Snitzer {
858d4a512edSMike Snitzer 	return dm_table_device_name(cache->ti->table);
859b61d9509SMike Snitzer }
860b61d9509SMike Snitzer 
notify_mode_switch(struct cache * cache,enum cache_metadata_mode mode)861028ae9f7SJoe Thornber static void notify_mode_switch(struct cache *cache, enum cache_metadata_mode mode)
862028ae9f7SJoe Thornber {
863774f13acSHeinz Mauelshagen 	static const char *descs[] = {
864028ae9f7SJoe Thornber 		"write",
865028ae9f7SJoe Thornber 		"read-only",
866028ae9f7SJoe Thornber 		"fail"
867028ae9f7SJoe Thornber 	};
868028ae9f7SJoe Thornber 
869028ae9f7SJoe Thornber 	dm_table_event(cache->ti->table);
870b61d9509SMike Snitzer 	DMINFO("%s: switching cache to %s mode",
871b61d9509SMike Snitzer 	       cache_device_name(cache), descs[(int)mode]);
872028ae9f7SJoe Thornber }
873028ae9f7SJoe Thornber 
set_cache_mode(struct cache * cache,enum cache_metadata_mode new_mode)874028ae9f7SJoe Thornber static void set_cache_mode(struct cache *cache, enum cache_metadata_mode new_mode)
875028ae9f7SJoe Thornber {
876d14fcf3dSJoe Thornber 	bool needs_check;
877028ae9f7SJoe Thornber 	enum cache_metadata_mode old_mode = get_cache_mode(cache);
878028ae9f7SJoe Thornber 
879d14fcf3dSJoe Thornber 	if (dm_cache_metadata_needs_check(cache->cmd, &needs_check)) {
88023cab26dSMike Snitzer 		DMERR("%s: unable to read needs_check flag, setting failure mode.",
88123cab26dSMike Snitzer 		      cache_device_name(cache));
882d14fcf3dSJoe Thornber 		new_mode = CM_FAIL;
883d14fcf3dSJoe Thornber 	}
884d14fcf3dSJoe Thornber 
885028ae9f7SJoe Thornber 	if (new_mode == CM_WRITE && needs_check) {
886b61d9509SMike Snitzer 		DMERR("%s: unable to switch cache to write mode until repaired.",
887b61d9509SMike Snitzer 		      cache_device_name(cache));
888028ae9f7SJoe Thornber 		if (old_mode != new_mode)
889028ae9f7SJoe Thornber 			new_mode = old_mode;
890028ae9f7SJoe Thornber 		else
891028ae9f7SJoe Thornber 			new_mode = CM_READ_ONLY;
892028ae9f7SJoe Thornber 	}
893028ae9f7SJoe Thornber 
894028ae9f7SJoe Thornber 	/* Never move out of fail mode */
895028ae9f7SJoe Thornber 	if (old_mode == CM_FAIL)
896028ae9f7SJoe Thornber 		new_mode = CM_FAIL;
897028ae9f7SJoe Thornber 
898028ae9f7SJoe Thornber 	switch (new_mode) {
899028ae9f7SJoe Thornber 	case CM_FAIL:
900028ae9f7SJoe Thornber 	case CM_READ_ONLY:
901028ae9f7SJoe Thornber 		dm_cache_metadata_set_read_only(cache->cmd);
902028ae9f7SJoe Thornber 		break;
903028ae9f7SJoe Thornber 
904028ae9f7SJoe Thornber 	case CM_WRITE:
905028ae9f7SJoe Thornber 		dm_cache_metadata_set_read_write(cache->cmd);
906028ae9f7SJoe Thornber 		break;
907028ae9f7SJoe Thornber 	}
908028ae9f7SJoe Thornber 
909028ae9f7SJoe Thornber 	cache->features.mode = new_mode;
910028ae9f7SJoe Thornber 
911028ae9f7SJoe Thornber 	if (new_mode != old_mode)
912028ae9f7SJoe Thornber 		notify_mode_switch(cache, new_mode);
913028ae9f7SJoe Thornber }
914028ae9f7SJoe Thornber 
abort_transaction(struct cache * cache)915028ae9f7SJoe Thornber static void abort_transaction(struct cache *cache)
916028ae9f7SJoe Thornber {
917b61d9509SMike Snitzer 	const char *dev_name = cache_device_name(cache);
918b61d9509SMike Snitzer 
919028ae9f7SJoe Thornber 	if (get_cache_mode(cache) >= CM_READ_ONLY)
920028ae9f7SJoe Thornber 		return;
921028ae9f7SJoe Thornber 
922b61d9509SMike Snitzer 	DMERR_LIMIT("%s: aborting current metadata transaction", dev_name);
923028ae9f7SJoe Thornber 	if (dm_cache_metadata_abort(cache->cmd)) {
924b61d9509SMike Snitzer 		DMERR("%s: failed to abort metadata transaction", dev_name);
925028ae9f7SJoe Thornber 		set_cache_mode(cache, CM_FAIL);
926028ae9f7SJoe Thornber 	}
9276b997386SMike Snitzer 
9286b997386SMike Snitzer 	if (dm_cache_metadata_set_needs_check(cache->cmd)) {
9296b997386SMike Snitzer 		DMERR("%s: failed to set 'needs_check' flag in metadata", dev_name);
9306b997386SMike Snitzer 		set_cache_mode(cache, CM_FAIL);
9316b997386SMike Snitzer 	}
932028ae9f7SJoe Thornber }
933028ae9f7SJoe Thornber 
metadata_operation_failed(struct cache * cache,const char * op,int r)934028ae9f7SJoe Thornber static void metadata_operation_failed(struct cache *cache, const char *op, int r)
935028ae9f7SJoe Thornber {
936b61d9509SMike Snitzer 	DMERR_LIMIT("%s: metadata operation '%s' failed: error = %d",
937b61d9509SMike Snitzer 		    cache_device_name(cache), op, r);
938028ae9f7SJoe Thornber 	abort_transaction(cache);
939028ae9f7SJoe Thornber 	set_cache_mode(cache, CM_READ_ONLY);
940028ae9f7SJoe Thornber }
941028ae9f7SJoe Thornber 
942b29d4986SJoe Thornber /*----------------------------------------------------------------*/
943b29d4986SJoe Thornber 
load_stats(struct cache * cache)944b29d4986SJoe Thornber static void load_stats(struct cache *cache)
945b29d4986SJoe Thornber {
946b29d4986SJoe Thornber 	struct dm_cache_statistics stats;
947b29d4986SJoe Thornber 
948b29d4986SJoe Thornber 	dm_cache_metadata_get_stats(cache->cmd, &stats);
949b29d4986SJoe Thornber 	atomic_set(&cache->stats.read_hit, stats.read_hits);
950b29d4986SJoe Thornber 	atomic_set(&cache->stats.read_miss, stats.read_misses);
951b29d4986SJoe Thornber 	atomic_set(&cache->stats.write_hit, stats.write_hits);
952b29d4986SJoe Thornber 	atomic_set(&cache->stats.write_miss, stats.write_misses);
953b29d4986SJoe Thornber }
954b29d4986SJoe Thornber 
save_stats(struct cache * cache)955b29d4986SJoe Thornber static void save_stats(struct cache *cache)
956b29d4986SJoe Thornber {
957b29d4986SJoe Thornber 	struct dm_cache_statistics stats;
958b29d4986SJoe Thornber 
959b29d4986SJoe Thornber 	if (get_cache_mode(cache) >= CM_READ_ONLY)
960b29d4986SJoe Thornber 		return;
961b29d4986SJoe Thornber 
962b29d4986SJoe Thornber 	stats.read_hits = atomic_read(&cache->stats.read_hit);
963b29d4986SJoe Thornber 	stats.read_misses = atomic_read(&cache->stats.read_miss);
964b29d4986SJoe Thornber 	stats.write_hits = atomic_read(&cache->stats.write_hit);
965b29d4986SJoe Thornber 	stats.write_misses = atomic_read(&cache->stats.write_miss);
966b29d4986SJoe Thornber 
967b29d4986SJoe Thornber 	dm_cache_metadata_set_stats(cache->cmd, &stats);
968b29d4986SJoe Thornber }
969b29d4986SJoe Thornber 
update_stats(struct cache_stats * stats,enum policy_operation op)970b29d4986SJoe Thornber static void update_stats(struct cache_stats *stats, enum policy_operation op)
971b29d4986SJoe Thornber {
972b29d4986SJoe Thornber 	switch (op) {
973b29d4986SJoe Thornber 	case POLICY_PROMOTE:
974b29d4986SJoe Thornber 		atomic_inc(&stats->promotion);
975b29d4986SJoe Thornber 		break;
976b29d4986SJoe Thornber 
977b29d4986SJoe Thornber 	case POLICY_DEMOTE:
978b29d4986SJoe Thornber 		atomic_inc(&stats->demotion);
979b29d4986SJoe Thornber 		break;
980b29d4986SJoe Thornber 
981b29d4986SJoe Thornber 	case POLICY_WRITEBACK:
982b29d4986SJoe Thornber 		atomic_inc(&stats->writeback);
983b29d4986SJoe Thornber 		break;
984b29d4986SJoe Thornber 	}
985b29d4986SJoe Thornber }
986b29d4986SJoe Thornber 
987a4a82ce3SHeinz Mauelshagen /*
988a4a82ce3SHeinz Mauelshagen  *---------------------------------------------------------------------
989c6b4fcbaSJoe Thornber  * Migration processing
990c6b4fcbaSJoe Thornber  *
991c6b4fcbaSJoe Thornber  * Migration covers moving data from the origin device to the cache, or
992c6b4fcbaSJoe Thornber  * vice versa.
993a4a82ce3SHeinz Mauelshagen  *---------------------------------------------------------------------
994a4a82ce3SHeinz Mauelshagen  */
inc_io_migrations(struct cache * cache)995a59db676SJoe Thornber static void inc_io_migrations(struct cache *cache)
996c6b4fcbaSJoe Thornber {
997a59db676SJoe Thornber 	atomic_inc(&cache->nr_io_migrations);
998c6b4fcbaSJoe Thornber }
999c6b4fcbaSJoe Thornber 
dec_io_migrations(struct cache * cache)1000a59db676SJoe Thornber static void dec_io_migrations(struct cache *cache)
1001c6b4fcbaSJoe Thornber {
1002a59db676SJoe Thornber 	atomic_dec(&cache->nr_io_migrations);
1003c6b4fcbaSJoe Thornber }
1004c6b4fcbaSJoe Thornber 
discard_or_flush(struct bio * bio)1005651f5fa2SJoe Thornber static bool discard_or_flush(struct bio *bio)
1006651f5fa2SJoe Thornber {
1007f73f44ebSChristoph Hellwig 	return bio_op(bio) == REQ_OP_DISCARD || op_is_flush(bio->bi_opf);
1008651f5fa2SJoe Thornber }
1009651f5fa2SJoe Thornber 
calc_discard_block_range(struct cache * cache,struct bio * bio,dm_dblock_t * b,dm_dblock_t * e)10107ae34e77SJoe Thornber static void calc_discard_block_range(struct cache *cache, struct bio *bio,
10117ae34e77SJoe Thornber 				     dm_dblock_t *b, dm_dblock_t *e)
10127ae34e77SJoe Thornber {
10137ae34e77SJoe Thornber 	sector_t sb = bio->bi_iter.bi_sector;
10147ae34e77SJoe Thornber 	sector_t se = bio_end_sector(bio);
10157ae34e77SJoe Thornber 
10167ae34e77SJoe Thornber 	*b = to_dblock(dm_sector_div_up(sb, cache->discard_block_size));
10177ae34e77SJoe Thornber 
10187ae34e77SJoe Thornber 	if (se - sb < cache->discard_block_size)
10197ae34e77SJoe Thornber 		*e = *b;
10207ae34e77SJoe Thornber 	else
10217ae34e77SJoe Thornber 		*e = to_dblock(block_div(se, cache->discard_block_size));
10227ae34e77SJoe Thornber }
10237ae34e77SJoe Thornber 
1024b29d4986SJoe Thornber /*----------------------------------------------------------------*/
1025b29d4986SJoe Thornber 
prevent_background_work(struct cache * cache)1026b29d4986SJoe Thornber static void prevent_background_work(struct cache *cache)
10277ae34e77SJoe Thornber {
1028b29d4986SJoe Thornber 	lockdep_off();
1029b29d4986SJoe Thornber 	down_write(&cache->background_work_lock);
1030b29d4986SJoe Thornber 	lockdep_on();
1031b29d4986SJoe Thornber }
1032b29d4986SJoe Thornber 
allow_background_work(struct cache * cache)1033b29d4986SJoe Thornber static void allow_background_work(struct cache *cache)
1034b29d4986SJoe Thornber {
1035b29d4986SJoe Thornber 	lockdep_off();
1036b29d4986SJoe Thornber 	up_write(&cache->background_work_lock);
1037b29d4986SJoe Thornber 	lockdep_on();
1038b29d4986SJoe Thornber }
1039b29d4986SJoe Thornber 
background_work_begin(struct cache * cache)1040b29d4986SJoe Thornber static bool background_work_begin(struct cache *cache)
1041b29d4986SJoe Thornber {
1042b29d4986SJoe Thornber 	bool r;
1043b29d4986SJoe Thornber 
1044b29d4986SJoe Thornber 	lockdep_off();
1045b29d4986SJoe Thornber 	r = down_read_trylock(&cache->background_work_lock);
1046b29d4986SJoe Thornber 	lockdep_on();
1047b29d4986SJoe Thornber 
1048b29d4986SJoe Thornber 	return r;
1049b29d4986SJoe Thornber }
1050b29d4986SJoe Thornber 
background_work_end(struct cache * cache)1051b29d4986SJoe Thornber static void background_work_end(struct cache *cache)
1052b29d4986SJoe Thornber {
1053b29d4986SJoe Thornber 	lockdep_off();
1054b29d4986SJoe Thornber 	up_read(&cache->background_work_lock);
1055b29d4986SJoe Thornber 	lockdep_on();
1056b29d4986SJoe Thornber }
1057b29d4986SJoe Thornber 
1058b29d4986SJoe Thornber /*----------------------------------------------------------------*/
1059b29d4986SJoe Thornber 
bio_writes_complete_block(struct cache * cache,struct bio * bio)1060d1260e2aSJoe Thornber static bool bio_writes_complete_block(struct cache *cache, struct bio *bio)
1061d1260e2aSJoe Thornber {
1062d1260e2aSJoe Thornber 	return (bio_data_dir(bio) == WRITE) &&
1063d1260e2aSJoe Thornber 		(bio->bi_iter.bi_size == (cache->sectors_per_block << SECTOR_SHIFT));
1064d1260e2aSJoe Thornber }
1065d1260e2aSJoe Thornber 
optimisable_bio(struct cache * cache,struct bio * bio,dm_oblock_t block)1066d1260e2aSJoe Thornber static bool optimisable_bio(struct cache *cache, struct bio *bio, dm_oblock_t block)
1067d1260e2aSJoe Thornber {
10688e3c3827SMike Snitzer 	return writeback_mode(cache) &&
1069d1260e2aSJoe Thornber 		(is_discarded_oblock(cache, block) || bio_writes_complete_block(cache, bio));
1070d1260e2aSJoe Thornber }
1071d1260e2aSJoe Thornber 
quiesce(struct dm_cache_migration * mg,void (* continuation)(struct work_struct *))1072b29d4986SJoe Thornber static void quiesce(struct dm_cache_migration *mg,
1073b29d4986SJoe Thornber 		    void (*continuation)(struct work_struct *))
1074b29d4986SJoe Thornber {
1075b29d4986SJoe Thornber 	init_continuation(&mg->k, continuation);
1076b29d4986SJoe Thornber 	dm_cell_quiesce_v2(mg->cache->prison, mg->cell, &mg->k.ws);
1077b29d4986SJoe Thornber }
1078b29d4986SJoe Thornber 
ws_to_mg(struct work_struct * ws)1079b29d4986SJoe Thornber static struct dm_cache_migration *ws_to_mg(struct work_struct *ws)
1080b29d4986SJoe Thornber {
1081b29d4986SJoe Thornber 	struct continuation *k = container_of(ws, struct continuation, ws);
10820ef0b471SHeinz Mauelshagen 
1083b29d4986SJoe Thornber 	return container_of(k, struct dm_cache_migration, k);
1084b29d4986SJoe Thornber }
1085b29d4986SJoe Thornber 
copy_complete(int read_err,unsigned long write_err,void * context)1086b29d4986SJoe Thornber static void copy_complete(int read_err, unsigned long write_err, void *context)
1087b29d4986SJoe Thornber {
1088b29d4986SJoe Thornber 	struct dm_cache_migration *mg = container_of(context, struct dm_cache_migration, k);
1089b29d4986SJoe Thornber 
1090b29d4986SJoe Thornber 	if (read_err || write_err)
10914e4cbee9SChristoph Hellwig 		mg->k.input = BLK_STS_IOERR;
1092b29d4986SJoe Thornber 
1093b29d4986SJoe Thornber 	queue_continuation(mg->cache->wq, &mg->k);
1094b29d4986SJoe Thornber }
1095b29d4986SJoe Thornber 
copy(struct dm_cache_migration * mg,bool promote)10967209049dSMike Snitzer static void copy(struct dm_cache_migration *mg, bool promote)
1097b29d4986SJoe Thornber {
1098b29d4986SJoe Thornber 	struct dm_io_region o_region, c_region;
1099cc7da0baSJoe Thornber 	struct cache *cache = mg->cache;
11007ae34e77SJoe Thornber 
1101b29d4986SJoe Thornber 	o_region.bdev = cache->origin_dev->bdev;
1102b29d4986SJoe Thornber 	o_region.sector = from_oblock(mg->op->oblock) * cache->sectors_per_block;
1103b29d4986SJoe Thornber 	o_region.count = cache->sectors_per_block;
11047ae34e77SJoe Thornber 
1105b29d4986SJoe Thornber 	c_region.bdev = cache->cache_dev->bdev;
1106b29d4986SJoe Thornber 	c_region.sector = from_cblock(mg->op->cblock) * cache->sectors_per_block;
1107b29d4986SJoe Thornber 	c_region.count = cache->sectors_per_block;
11087ae34e77SJoe Thornber 
1109b29d4986SJoe Thornber 	if (promote)
11107209049dSMike Snitzer 		dm_kcopyd_copy(cache->copier, &o_region, 1, &c_region, 0, copy_complete, &mg->k);
1111c6b4fcbaSJoe Thornber 	else
11127209049dSMike Snitzer 		dm_kcopyd_copy(cache->copier, &c_region, 1, &o_region, 0, copy_complete, &mg->k);
1113c6b4fcbaSJoe Thornber }
1114c6b4fcbaSJoe Thornber 
bio_drop_shared_lock(struct cache * cache,struct bio * bio)1115b29d4986SJoe Thornber static void bio_drop_shared_lock(struct cache *cache, struct bio *bio)
1116c6b4fcbaSJoe Thornber {
1117693b960eSMike Snitzer 	struct per_bio_data *pb = get_per_bio_data(bio);
1118c6b4fcbaSJoe Thornber 
1119b29d4986SJoe Thornber 	if (pb->cell && dm_cell_put_v2(cache->prison, pb->cell))
1120b29d4986SJoe Thornber 		free_prison_cell(cache, pb->cell);
1121b29d4986SJoe Thornber 	pb->cell = NULL;
1122c6b4fcbaSJoe Thornber }
1123c6b4fcbaSJoe Thornber 
overwrite_endio(struct bio * bio)1124b29d4986SJoe Thornber static void overwrite_endio(struct bio *bio)
1125c6b4fcbaSJoe Thornber {
1126b29d4986SJoe Thornber 	struct dm_cache_migration *mg = bio->bi_private;
1127c6b4fcbaSJoe Thornber 	struct cache *cache = mg->cache;
1128693b960eSMike Snitzer 	struct per_bio_data *pb = get_per_bio_data(bio);
1129c6b4fcbaSJoe Thornber 
1130b29d4986SJoe Thornber 	dm_unhook_bio(&pb->hook_info, bio);
1131c6b4fcbaSJoe Thornber 
11324e4cbee9SChristoph Hellwig 	if (bio->bi_status)
11334e4cbee9SChristoph Hellwig 		mg->k.input = bio->bi_status;
1134b29d4986SJoe Thornber 
1135693b960eSMike Snitzer 	queue_continuation(cache->wq, &mg->k);
1136c6b4fcbaSJoe Thornber }
1137c6b4fcbaSJoe Thornber 
overwrite(struct dm_cache_migration * mg,void (* continuation)(struct work_struct *))1138b29d4986SJoe Thornber static void overwrite(struct dm_cache_migration *mg,
1139b29d4986SJoe Thornber 		      void (*continuation)(struct work_struct *))
1140c6b4fcbaSJoe Thornber {
1141b29d4986SJoe Thornber 	struct bio *bio = mg->overwrite_bio;
1142693b960eSMike Snitzer 	struct per_bio_data *pb = get_per_bio_data(bio);
1143c6b4fcbaSJoe Thornber 
1144b29d4986SJoe Thornber 	dm_hook_bio(&pb->hook_info, bio, overwrite_endio, mg);
1145c6b4fcbaSJoe Thornber 
1146b29d4986SJoe Thornber 	/*
1147b29d4986SJoe Thornber 	 * The overwrite bio is part of the copy operation, as such it does
1148b29d4986SJoe Thornber 	 * not set/clear discard or dirty flags.
1149b29d4986SJoe Thornber 	 */
1150b29d4986SJoe Thornber 	if (mg->op->op == POLICY_PROMOTE)
1151b29d4986SJoe Thornber 		remap_to_cache(mg->cache, bio, mg->op->cblock);
1152b29d4986SJoe Thornber 	else
1153b29d4986SJoe Thornber 		remap_to_origin(mg->cache, bio);
1154c6b4fcbaSJoe Thornber 
1155b29d4986SJoe Thornber 	init_continuation(&mg->k, continuation);
1156b29d4986SJoe Thornber 	accounted_request(mg->cache, bio);
1157c6b4fcbaSJoe Thornber }
1158c6b4fcbaSJoe Thornber 
11592ee57d58SJoe Thornber /*
1160b29d4986SJoe Thornber  * Migration steps:
1161b29d4986SJoe Thornber  *
1162b29d4986SJoe Thornber  * 1) exclusive lock preventing WRITEs
1163b29d4986SJoe Thornber  * 2) quiesce
1164b29d4986SJoe Thornber  * 3) copy or issue overwrite bio
1165b29d4986SJoe Thornber  * 4) upgrade to exclusive lock preventing READs and WRITEs
1166b29d4986SJoe Thornber  * 5) quiesce
1167b29d4986SJoe Thornber  * 6) update metadata and commit
1168b29d4986SJoe Thornber  * 7) unlock
11692ee57d58SJoe Thornber  */
mg_complete(struct dm_cache_migration * mg,bool success)1170b29d4986SJoe Thornber static void mg_complete(struct dm_cache_migration *mg, bool success)
11712ee57d58SJoe Thornber {
1172b29d4986SJoe Thornber 	struct bio_list bios;
1173b29d4986SJoe Thornber 	struct cache *cache = mg->cache;
1174b29d4986SJoe Thornber 	struct policy_work *op = mg->op;
1175b29d4986SJoe Thornber 	dm_cblock_t cblock = op->cblock;
11762ee57d58SJoe Thornber 
1177b29d4986SJoe Thornber 	if (success)
1178b29d4986SJoe Thornber 		update_stats(&cache->stats, op->op);
11792ee57d58SJoe Thornber 
1180b29d4986SJoe Thornber 	switch (op->op) {
1181b29d4986SJoe Thornber 	case POLICY_PROMOTE:
1182b29d4986SJoe Thornber 		clear_discard(cache, oblock_to_dblock(cache, op->oblock));
1183b29d4986SJoe Thornber 		policy_complete_background_work(cache->policy, op, success);
1184b29d4986SJoe Thornber 
1185b29d4986SJoe Thornber 		if (mg->overwrite_bio) {
1186b29d4986SJoe Thornber 			if (success)
1187b29d4986SJoe Thornber 				force_set_dirty(cache, cblock);
11884e4cbee9SChristoph Hellwig 			else if (mg->k.input)
11894e4cbee9SChristoph Hellwig 				mg->overwrite_bio->bi_status = mg->k.input;
1190b29d4986SJoe Thornber 			else
11914e4cbee9SChristoph Hellwig 				mg->overwrite_bio->bi_status = BLK_STS_IOERR;
1192b29d4986SJoe Thornber 			bio_endio(mg->overwrite_bio);
1193b29d4986SJoe Thornber 		} else {
1194b29d4986SJoe Thornber 			if (success)
1195b29d4986SJoe Thornber 				force_clear_dirty(cache, cblock);
1196b29d4986SJoe Thornber 			dec_io_migrations(cache);
1197b29d4986SJoe Thornber 		}
1198b29d4986SJoe Thornber 		break;
1199b29d4986SJoe Thornber 
1200b29d4986SJoe Thornber 	case POLICY_DEMOTE:
1201b29d4986SJoe Thornber 		/*
1202b29d4986SJoe Thornber 		 * We clear dirty here to update the nr_dirty counter.
1203b29d4986SJoe Thornber 		 */
1204b29d4986SJoe Thornber 		if (success)
1205b29d4986SJoe Thornber 			force_clear_dirty(cache, cblock);
1206b29d4986SJoe Thornber 		policy_complete_background_work(cache->policy, op, success);
1207b29d4986SJoe Thornber 		dec_io_migrations(cache);
1208b29d4986SJoe Thornber 		break;
1209b29d4986SJoe Thornber 
1210b29d4986SJoe Thornber 	case POLICY_WRITEBACK:
1211b29d4986SJoe Thornber 		if (success)
1212b29d4986SJoe Thornber 			force_clear_dirty(cache, cblock);
1213b29d4986SJoe Thornber 		policy_complete_background_work(cache->policy, op, success);
1214b29d4986SJoe Thornber 		dec_io_migrations(cache);
1215b29d4986SJoe Thornber 		break;
12162ee57d58SJoe Thornber 	}
12172ee57d58SJoe Thornber 
1218b29d4986SJoe Thornber 	bio_list_init(&bios);
1219b29d4986SJoe Thornber 	if (mg->cell) {
1220b29d4986SJoe Thornber 		if (dm_cell_unlock_v2(cache->prison, mg->cell, &bios))
1221b29d4986SJoe Thornber 			free_prison_cell(cache, mg->cell);
1222b29d4986SJoe Thornber 	}
1223b29d4986SJoe Thornber 
1224b29d4986SJoe Thornber 	free_migration(mg);
1225b29d4986SJoe Thornber 	defer_bios(cache, &bios);
1226b29d4986SJoe Thornber 	wake_migration_worker(cache);
1227b29d4986SJoe Thornber 
1228b29d4986SJoe Thornber 	background_work_end(cache);
1229b29d4986SJoe Thornber }
1230b29d4986SJoe Thornber 
mg_success(struct work_struct * ws)1231b29d4986SJoe Thornber static void mg_success(struct work_struct *ws)
12327ae34e77SJoe Thornber {
1233b29d4986SJoe Thornber 	struct dm_cache_migration *mg = ws_to_mg(ws);
12340ef0b471SHeinz Mauelshagen 
1235b29d4986SJoe Thornber 	mg_complete(mg, mg->k.input == 0);
1236b29d4986SJoe Thornber }
12377ae34e77SJoe Thornber 
mg_update_metadata(struct work_struct * ws)1238b29d4986SJoe Thornber static void mg_update_metadata(struct work_struct *ws)
1239b29d4986SJoe Thornber {
1240b29d4986SJoe Thornber 	int r;
1241b29d4986SJoe Thornber 	struct dm_cache_migration *mg = ws_to_mg(ws);
1242b29d4986SJoe Thornber 	struct cache *cache = mg->cache;
1243b29d4986SJoe Thornber 	struct policy_work *op = mg->op;
1244b29d4986SJoe Thornber 
1245b29d4986SJoe Thornber 	switch (op->op) {
1246b29d4986SJoe Thornber 	case POLICY_PROMOTE:
1247b29d4986SJoe Thornber 		r = dm_cache_insert_mapping(cache->cmd, op->cblock, op->oblock);
1248b29d4986SJoe Thornber 		if (r) {
1249b29d4986SJoe Thornber 			DMERR_LIMIT("%s: migration failed; couldn't insert mapping",
1250b29d4986SJoe Thornber 				    cache_device_name(cache));
1251b29d4986SJoe Thornber 			metadata_operation_failed(cache, "dm_cache_insert_mapping", r);
1252b29d4986SJoe Thornber 
1253b29d4986SJoe Thornber 			mg_complete(mg, false);
1254b29d4986SJoe Thornber 			return;
1255b29d4986SJoe Thornber 		}
1256b29d4986SJoe Thornber 		mg_complete(mg, true);
1257b29d4986SJoe Thornber 		break;
1258b29d4986SJoe Thornber 
1259b29d4986SJoe Thornber 	case POLICY_DEMOTE:
1260b29d4986SJoe Thornber 		r = dm_cache_remove_mapping(cache->cmd, op->cblock);
1261b29d4986SJoe Thornber 		if (r) {
1262b29d4986SJoe Thornber 			DMERR_LIMIT("%s: migration failed; couldn't update on disk metadata",
1263b29d4986SJoe Thornber 				    cache_device_name(cache));
1264b29d4986SJoe Thornber 			metadata_operation_failed(cache, "dm_cache_remove_mapping", r);
1265b29d4986SJoe Thornber 
1266b29d4986SJoe Thornber 			mg_complete(mg, false);
1267b29d4986SJoe Thornber 			return;
1268b29d4986SJoe Thornber 		}
1269b29d4986SJoe Thornber 
1270b29d4986SJoe Thornber 		/*
1271b29d4986SJoe Thornber 		 * It would be nice if we only had to commit when a REQ_FLUSH
1272b29d4986SJoe Thornber 		 * comes through.  But there's one scenario that we have to
1273b29d4986SJoe Thornber 		 * look out for:
1274b29d4986SJoe Thornber 		 *
1275b29d4986SJoe Thornber 		 * - vblock x in a cache block
1276b29d4986SJoe Thornber 		 * - domotion occurs
1277b29d4986SJoe Thornber 		 * - cache block gets reallocated and over written
1278b29d4986SJoe Thornber 		 * - crash
1279b29d4986SJoe Thornber 		 *
1280b29d4986SJoe Thornber 		 * When we recover, because there was no commit the cache will
1281b29d4986SJoe Thornber 		 * rollback to having the data for vblock x in the cache block.
1282b29d4986SJoe Thornber 		 * But the cache block has since been overwritten, so it'll end
1283b29d4986SJoe Thornber 		 * up pointing to data that was never in 'x' during the history
1284b29d4986SJoe Thornber 		 * of the device.
1285b29d4986SJoe Thornber 		 *
1286b29d4986SJoe Thornber 		 * To avoid this issue we require a commit as part of the
1287b29d4986SJoe Thornber 		 * demotion operation.
1288b29d4986SJoe Thornber 		 */
1289b29d4986SJoe Thornber 		init_continuation(&mg->k, mg_success);
1290b29d4986SJoe Thornber 		continue_after_commit(&cache->committer, &mg->k);
1291b29d4986SJoe Thornber 		schedule_commit(&cache->committer);
1292b29d4986SJoe Thornber 		break;
1293b29d4986SJoe Thornber 
1294b29d4986SJoe Thornber 	case POLICY_WRITEBACK:
1295b29d4986SJoe Thornber 		mg_complete(mg, true);
1296b29d4986SJoe Thornber 		break;
1297b29d4986SJoe Thornber 	}
1298b29d4986SJoe Thornber }
1299b29d4986SJoe Thornber 
mg_update_metadata_after_copy(struct work_struct * ws)1300b29d4986SJoe Thornber static void mg_update_metadata_after_copy(struct work_struct *ws)
1301b29d4986SJoe Thornber {
1302b29d4986SJoe Thornber 	struct dm_cache_migration *mg = ws_to_mg(ws);
1303b29d4986SJoe Thornber 
1304b29d4986SJoe Thornber 	/*
1305b29d4986SJoe Thornber 	 * Did the copy succeed?
1306b29d4986SJoe Thornber 	 */
1307b29d4986SJoe Thornber 	if (mg->k.input)
1308b29d4986SJoe Thornber 		mg_complete(mg, false);
1309b29d4986SJoe Thornber 	else
1310b29d4986SJoe Thornber 		mg_update_metadata(ws);
1311b29d4986SJoe Thornber }
1312b29d4986SJoe Thornber 
mg_upgrade_lock(struct work_struct * ws)1313b29d4986SJoe Thornber static void mg_upgrade_lock(struct work_struct *ws)
1314b29d4986SJoe Thornber {
1315b29d4986SJoe Thornber 	int r;
1316b29d4986SJoe Thornber 	struct dm_cache_migration *mg = ws_to_mg(ws);
1317b29d4986SJoe Thornber 
1318b29d4986SJoe Thornber 	/*
1319b29d4986SJoe Thornber 	 * Did the copy succeed?
1320b29d4986SJoe Thornber 	 */
1321b29d4986SJoe Thornber 	if (mg->k.input)
1322b29d4986SJoe Thornber 		mg_complete(mg, false);
1323b29d4986SJoe Thornber 
1324b29d4986SJoe Thornber 	else {
1325b29d4986SJoe Thornber 		/*
1326b29d4986SJoe Thornber 		 * Now we want the lock to prevent both reads and writes.
1327b29d4986SJoe Thornber 		 */
1328b29d4986SJoe Thornber 		r = dm_cell_lock_promote_v2(mg->cache->prison, mg->cell,
1329b29d4986SJoe Thornber 					    READ_WRITE_LOCK_LEVEL);
1330b29d4986SJoe Thornber 		if (r < 0)
1331b29d4986SJoe Thornber 			mg_complete(mg, false);
1332b29d4986SJoe Thornber 
1333b29d4986SJoe Thornber 		else if (r)
1334b29d4986SJoe Thornber 			quiesce(mg, mg_update_metadata);
1335b29d4986SJoe Thornber 
1336b29d4986SJoe Thornber 		else
1337b29d4986SJoe Thornber 			mg_update_metadata(ws);
1338b29d4986SJoe Thornber 	}
1339b29d4986SJoe Thornber }
1340b29d4986SJoe Thornber 
mg_full_copy(struct work_struct * ws)1341d1260e2aSJoe Thornber static void mg_full_copy(struct work_struct *ws)
1342b29d4986SJoe Thornber {
1343b29d4986SJoe Thornber 	struct dm_cache_migration *mg = ws_to_mg(ws);
1344b29d4986SJoe Thornber 	struct cache *cache = mg->cache;
1345b29d4986SJoe Thornber 	struct policy_work *op = mg->op;
1346b29d4986SJoe Thornber 	bool is_policy_promote = (op->op == POLICY_PROMOTE);
1347b29d4986SJoe Thornber 
1348b29d4986SJoe Thornber 	if ((!is_policy_promote && !is_dirty(cache, op->cblock)) ||
1349b29d4986SJoe Thornber 	    is_discarded_oblock(cache, op->oblock)) {
1350b29d4986SJoe Thornber 		mg_upgrade_lock(ws);
1351b29d4986SJoe Thornber 		return;
1352b29d4986SJoe Thornber 	}
1353b29d4986SJoe Thornber 
1354b29d4986SJoe Thornber 	init_continuation(&mg->k, mg_upgrade_lock);
13557209049dSMike Snitzer 	copy(mg, is_policy_promote);
1356b29d4986SJoe Thornber }
1357d1260e2aSJoe Thornber 
mg_copy(struct work_struct * ws)1358d1260e2aSJoe Thornber static void mg_copy(struct work_struct *ws)
1359d1260e2aSJoe Thornber {
1360d1260e2aSJoe Thornber 	struct dm_cache_migration *mg = ws_to_mg(ws);
1361d1260e2aSJoe Thornber 
1362d1260e2aSJoe Thornber 	if (mg->overwrite_bio) {
1363d1260e2aSJoe Thornber 		/*
1364d1260e2aSJoe Thornber 		 * No exclusive lock was held when we last checked if the bio
1365d1260e2aSJoe Thornber 		 * was optimisable.  So we have to check again in case things
1366d1260e2aSJoe Thornber 		 * have changed (eg, the block may no longer be discarded).
1367d1260e2aSJoe Thornber 		 */
1368d1260e2aSJoe Thornber 		if (!optimisable_bio(mg->cache, mg->overwrite_bio, mg->op->oblock)) {
1369d1260e2aSJoe Thornber 			/*
1370d1260e2aSJoe Thornber 			 * Fallback to a real full copy after doing some tidying up.
1371d1260e2aSJoe Thornber 			 */
1372d1260e2aSJoe Thornber 			bool rb = bio_detain_shared(mg->cache, mg->op->oblock, mg->overwrite_bio);
13730ef0b471SHeinz Mauelshagen 
1374d1260e2aSJoe Thornber 			BUG_ON(rb); /* An exclussive lock must _not_ be held for this block */
1375d1260e2aSJoe Thornber 			mg->overwrite_bio = NULL;
1376d1260e2aSJoe Thornber 			inc_io_migrations(mg->cache);
1377d1260e2aSJoe Thornber 			mg_full_copy(ws);
1378d1260e2aSJoe Thornber 			return;
1379d1260e2aSJoe Thornber 		}
1380d1260e2aSJoe Thornber 
1381d1260e2aSJoe Thornber 		/*
1382d1260e2aSJoe Thornber 		 * It's safe to do this here, even though it's new data
1383d1260e2aSJoe Thornber 		 * because all IO has been locked out of the block.
1384d1260e2aSJoe Thornber 		 *
1385d1260e2aSJoe Thornber 		 * mg_lock_writes() already took READ_WRITE_LOCK_LEVEL
1386d1260e2aSJoe Thornber 		 * so _not_ using mg_upgrade_lock() as continutation.
1387d1260e2aSJoe Thornber 		 */
1388d1260e2aSJoe Thornber 		overwrite(mg, mg_update_metadata_after_copy);
1389d1260e2aSJoe Thornber 
1390d1260e2aSJoe Thornber 	} else
1391d1260e2aSJoe Thornber 		mg_full_copy(ws);
1392b29d4986SJoe Thornber }
1393b29d4986SJoe Thornber 
mg_lock_writes(struct dm_cache_migration * mg)1394b29d4986SJoe Thornber static int mg_lock_writes(struct dm_cache_migration *mg)
1395b29d4986SJoe Thornber {
1396b29d4986SJoe Thornber 	int r;
1397b29d4986SJoe Thornber 	struct dm_cell_key_v2 key;
1398b29d4986SJoe Thornber 	struct cache *cache = mg->cache;
1399b29d4986SJoe Thornber 	struct dm_bio_prison_cell_v2 *prealloc;
1400b29d4986SJoe Thornber 
1401b29d4986SJoe Thornber 	prealloc = alloc_prison_cell(cache);
1402b29d4986SJoe Thornber 
1403b29d4986SJoe Thornber 	/*
1404b29d4986SJoe Thornber 	 * Prevent writes to the block, but allow reads to continue.
1405b29d4986SJoe Thornber 	 * Unless we're using an overwrite bio, in which case we lock
1406b29d4986SJoe Thornber 	 * everything.
1407b29d4986SJoe Thornber 	 */
1408b29d4986SJoe Thornber 	build_key(mg->op->oblock, oblock_succ(mg->op->oblock), &key);
1409b29d4986SJoe Thornber 	r = dm_cell_lock_v2(cache->prison, &key,
1410b29d4986SJoe Thornber 			    mg->overwrite_bio ?  READ_WRITE_LOCK_LEVEL : WRITE_LOCK_LEVEL,
1411b29d4986SJoe Thornber 			    prealloc, &mg->cell);
1412b29d4986SJoe Thornber 	if (r < 0) {
1413b29d4986SJoe Thornber 		free_prison_cell(cache, prealloc);
1414b29d4986SJoe Thornber 		mg_complete(mg, false);
1415b29d4986SJoe Thornber 		return r;
1416b29d4986SJoe Thornber 	}
1417b29d4986SJoe Thornber 
1418b29d4986SJoe Thornber 	if (mg->cell != prealloc)
1419b29d4986SJoe Thornber 		free_prison_cell(cache, prealloc);
1420b29d4986SJoe Thornber 
1421b29d4986SJoe Thornber 	if (r == 0)
1422b29d4986SJoe Thornber 		mg_copy(&mg->k.ws);
1423b29d4986SJoe Thornber 	else
1424b29d4986SJoe Thornber 		quiesce(mg, mg_copy);
1425b29d4986SJoe Thornber 
1426b29d4986SJoe Thornber 	return 0;
1427b29d4986SJoe Thornber }
1428b29d4986SJoe Thornber 
mg_start(struct cache * cache,struct policy_work * op,struct bio * bio)1429b29d4986SJoe Thornber static int mg_start(struct cache *cache, struct policy_work *op, struct bio *bio)
1430b29d4986SJoe Thornber {
1431b29d4986SJoe Thornber 	struct dm_cache_migration *mg;
1432b29d4986SJoe Thornber 
1433b29d4986SJoe Thornber 	if (!background_work_begin(cache)) {
1434b29d4986SJoe Thornber 		policy_complete_background_work(cache->policy, op, false);
1435b29d4986SJoe Thornber 		return -EPERM;
1436b29d4986SJoe Thornber 	}
1437b29d4986SJoe Thornber 
1438b29d4986SJoe Thornber 	mg = alloc_migration(cache);
1439b29d4986SJoe Thornber 
1440b29d4986SJoe Thornber 	mg->op = op;
1441b29d4986SJoe Thornber 	mg->overwrite_bio = bio;
14427ae34e77SJoe Thornber 
1443b29d4986SJoe Thornber 	if (!bio)
1444b29d4986SJoe Thornber 		inc_io_migrations(cache);
1445b29d4986SJoe Thornber 
1446b29d4986SJoe Thornber 	return mg_lock_writes(mg);
1447b29d4986SJoe Thornber }
1448b29d4986SJoe Thornber 
1449a4a82ce3SHeinz Mauelshagen /*
1450a4a82ce3SHeinz Mauelshagen  *--------------------------------------------------------------
1451b29d4986SJoe Thornber  * invalidation processing
1452a4a82ce3SHeinz Mauelshagen  *--------------------------------------------------------------
1453a4a82ce3SHeinz Mauelshagen  */
1454b29d4986SJoe Thornber 
invalidate_complete(struct dm_cache_migration * mg,bool success)1455b29d4986SJoe Thornber static void invalidate_complete(struct dm_cache_migration *mg, bool success)
1456b29d4986SJoe Thornber {
1457b29d4986SJoe Thornber 	struct bio_list bios;
1458b29d4986SJoe Thornber 	struct cache *cache = mg->cache;
1459b29d4986SJoe Thornber 
1460b29d4986SJoe Thornber 	bio_list_init(&bios);
1461b29d4986SJoe Thornber 	if (dm_cell_unlock_v2(cache->prison, mg->cell, &bios))
1462b29d4986SJoe Thornber 		free_prison_cell(cache, mg->cell);
1463b29d4986SJoe Thornber 
1464b29d4986SJoe Thornber 	if (!success && mg->overwrite_bio)
1465b29d4986SJoe Thornber 		bio_io_error(mg->overwrite_bio);
1466b29d4986SJoe Thornber 
1467b29d4986SJoe Thornber 	free_migration(mg);
1468b29d4986SJoe Thornber 	defer_bios(cache, &bios);
1469b29d4986SJoe Thornber 
1470b29d4986SJoe Thornber 	background_work_end(cache);
1471b29d4986SJoe Thornber }
1472b29d4986SJoe Thornber 
invalidate_completed(struct work_struct * ws)1473b29d4986SJoe Thornber static void invalidate_completed(struct work_struct *ws)
1474b29d4986SJoe Thornber {
1475b29d4986SJoe Thornber 	struct dm_cache_migration *mg = ws_to_mg(ws);
14760ef0b471SHeinz Mauelshagen 
1477b29d4986SJoe Thornber 	invalidate_complete(mg, !mg->k.input);
1478b29d4986SJoe Thornber }
1479b29d4986SJoe Thornber 
invalidate_cblock(struct cache * cache,dm_cblock_t cblock)1480b29d4986SJoe Thornber static int invalidate_cblock(struct cache *cache, dm_cblock_t cblock)
1481b29d4986SJoe Thornber {
14820ef0b471SHeinz Mauelshagen 	int r;
14830ef0b471SHeinz Mauelshagen 
14840ef0b471SHeinz Mauelshagen 	r = policy_invalidate_mapping(cache->policy, cblock);
1485b29d4986SJoe Thornber 	if (!r) {
1486b29d4986SJoe Thornber 		r = dm_cache_remove_mapping(cache->cmd, cblock);
1487b29d4986SJoe Thornber 		if (r) {
1488b29d4986SJoe Thornber 			DMERR_LIMIT("%s: invalidation failed; couldn't update on disk metadata",
1489b29d4986SJoe Thornber 				    cache_device_name(cache));
1490b29d4986SJoe Thornber 			metadata_operation_failed(cache, "dm_cache_remove_mapping", r);
1491b29d4986SJoe Thornber 		}
1492b29d4986SJoe Thornber 
1493b29d4986SJoe Thornber 	} else if (r == -ENODATA) {
1494b29d4986SJoe Thornber 		/*
1495b29d4986SJoe Thornber 		 * Harmless, already unmapped.
1496b29d4986SJoe Thornber 		 */
1497b29d4986SJoe Thornber 		r = 0;
1498b29d4986SJoe Thornber 
1499b29d4986SJoe Thornber 	} else
1500b29d4986SJoe Thornber 		DMERR("%s: policy_invalidate_mapping failed", cache_device_name(cache));
1501b29d4986SJoe Thornber 
1502b29d4986SJoe Thornber 	return r;
1503b29d4986SJoe Thornber }
1504b29d4986SJoe Thornber 
invalidate_remove(struct work_struct * ws)1505b29d4986SJoe Thornber static void invalidate_remove(struct work_struct *ws)
1506b29d4986SJoe Thornber {
1507b29d4986SJoe Thornber 	int r;
1508b29d4986SJoe Thornber 	struct dm_cache_migration *mg = ws_to_mg(ws);
1509b29d4986SJoe Thornber 	struct cache *cache = mg->cache;
1510b29d4986SJoe Thornber 
1511b29d4986SJoe Thornber 	r = invalidate_cblock(cache, mg->invalidate_cblock);
1512b29d4986SJoe Thornber 	if (r) {
1513b29d4986SJoe Thornber 		invalidate_complete(mg, false);
1514b29d4986SJoe Thornber 		return;
1515b29d4986SJoe Thornber 	}
1516b29d4986SJoe Thornber 
1517b29d4986SJoe Thornber 	init_continuation(&mg->k, invalidate_completed);
1518b29d4986SJoe Thornber 	continue_after_commit(&cache->committer, &mg->k);
1519b29d4986SJoe Thornber 	remap_to_origin_clear_discard(cache, mg->overwrite_bio, mg->invalidate_oblock);
1520b29d4986SJoe Thornber 	mg->overwrite_bio = NULL;
1521b29d4986SJoe Thornber 	schedule_commit(&cache->committer);
1522b29d4986SJoe Thornber }
1523b29d4986SJoe Thornber 
invalidate_lock(struct dm_cache_migration * mg)1524b29d4986SJoe Thornber static int invalidate_lock(struct dm_cache_migration *mg)
1525b29d4986SJoe Thornber {
1526b29d4986SJoe Thornber 	int r;
1527b29d4986SJoe Thornber 	struct dm_cell_key_v2 key;
1528b29d4986SJoe Thornber 	struct cache *cache = mg->cache;
1529b29d4986SJoe Thornber 	struct dm_bio_prison_cell_v2 *prealloc;
1530b29d4986SJoe Thornber 
1531b29d4986SJoe Thornber 	prealloc = alloc_prison_cell(cache);
1532b29d4986SJoe Thornber 
1533b29d4986SJoe Thornber 	build_key(mg->invalidate_oblock, oblock_succ(mg->invalidate_oblock), &key);
1534b29d4986SJoe Thornber 	r = dm_cell_lock_v2(cache->prison, &key,
1535b29d4986SJoe Thornber 			    READ_WRITE_LOCK_LEVEL, prealloc, &mg->cell);
1536b29d4986SJoe Thornber 	if (r < 0) {
1537b29d4986SJoe Thornber 		free_prison_cell(cache, prealloc);
1538b29d4986SJoe Thornber 		invalidate_complete(mg, false);
1539b29d4986SJoe Thornber 		return r;
1540b29d4986SJoe Thornber 	}
1541b29d4986SJoe Thornber 
1542b29d4986SJoe Thornber 	if (mg->cell != prealloc)
1543b29d4986SJoe Thornber 		free_prison_cell(cache, prealloc);
1544b29d4986SJoe Thornber 
1545b29d4986SJoe Thornber 	if (r)
1546b29d4986SJoe Thornber 		quiesce(mg, invalidate_remove);
1547b29d4986SJoe Thornber 
1548b29d4986SJoe Thornber 	else {
1549b29d4986SJoe Thornber 		/*
1550b29d4986SJoe Thornber 		 * We can't call invalidate_remove() directly here because we
1551b29d4986SJoe Thornber 		 * might still be in request context.
1552b29d4986SJoe Thornber 		 */
1553b29d4986SJoe Thornber 		init_continuation(&mg->k, invalidate_remove);
1554b29d4986SJoe Thornber 		queue_work(cache->wq, &mg->k.ws);
1555b29d4986SJoe Thornber 	}
1556b29d4986SJoe Thornber 
1557b29d4986SJoe Thornber 	return 0;
1558b29d4986SJoe Thornber }
1559b29d4986SJoe Thornber 
invalidate_start(struct cache * cache,dm_cblock_t cblock,dm_oblock_t oblock,struct bio * bio)1560b29d4986SJoe Thornber static int invalidate_start(struct cache *cache, dm_cblock_t cblock,
1561b29d4986SJoe Thornber 			    dm_oblock_t oblock, struct bio *bio)
1562b29d4986SJoe Thornber {
1563b29d4986SJoe Thornber 	struct dm_cache_migration *mg;
1564b29d4986SJoe Thornber 
1565b29d4986SJoe Thornber 	if (!background_work_begin(cache))
1566b29d4986SJoe Thornber 		return -EPERM;
1567b29d4986SJoe Thornber 
1568b29d4986SJoe Thornber 	mg = alloc_migration(cache);
1569b29d4986SJoe Thornber 
1570b29d4986SJoe Thornber 	mg->overwrite_bio = bio;
1571b29d4986SJoe Thornber 	mg->invalidate_cblock = cblock;
1572b29d4986SJoe Thornber 	mg->invalidate_oblock = oblock;
1573b29d4986SJoe Thornber 
1574b29d4986SJoe Thornber 	return invalidate_lock(mg);
15757ae34e77SJoe Thornber }
15767ae34e77SJoe Thornber 
1577a4a82ce3SHeinz Mauelshagen /*
1578a4a82ce3SHeinz Mauelshagen  *--------------------------------------------------------------
1579c6b4fcbaSJoe Thornber  * bio processing
1580a4a82ce3SHeinz Mauelshagen  *--------------------------------------------------------------
1581a4a82ce3SHeinz Mauelshagen  */
1582b29d4986SJoe Thornber 
1583b29d4986SJoe Thornber enum busy {
1584b29d4986SJoe Thornber 	IDLE,
1585b29d4986SJoe Thornber 	BUSY
1586b29d4986SJoe Thornber };
1587b29d4986SJoe Thornber 
spare_migration_bandwidth(struct cache * cache)1588b29d4986SJoe Thornber static enum busy spare_migration_bandwidth(struct cache *cache)
1589c6b4fcbaSJoe Thornber {
1590dc4fa29fSMike Snitzer 	bool idle = dm_iot_idle_for(&cache->tracker, HZ);
1591a59db676SJoe Thornber 	sector_t current_volume = (atomic_read(&cache->nr_io_migrations) + 1) *
1592c6b4fcbaSJoe Thornber 		cache->sectors_per_block;
1593b29d4986SJoe Thornber 
159449b7f768SJoe Thornber 	if (idle && current_volume <= cache->migration_threshold)
159549b7f768SJoe Thornber 		return IDLE;
1596b29d4986SJoe Thornber 	else
159749b7f768SJoe Thornber 		return BUSY;
1598c6b4fcbaSJoe Thornber }
1599c6b4fcbaSJoe Thornber 
inc_hit_counter(struct cache * cache,struct bio * bio)1600c6b4fcbaSJoe Thornber static void inc_hit_counter(struct cache *cache, struct bio *bio)
1601c6b4fcbaSJoe Thornber {
1602c6b4fcbaSJoe Thornber 	atomic_inc(bio_data_dir(bio) == READ ?
1603c6b4fcbaSJoe Thornber 		   &cache->stats.read_hit : &cache->stats.write_hit);
1604c6b4fcbaSJoe Thornber }
1605c6b4fcbaSJoe Thornber 
inc_miss_counter(struct cache * cache,struct bio * bio)1606c6b4fcbaSJoe Thornber static void inc_miss_counter(struct cache *cache, struct bio *bio)
1607c6b4fcbaSJoe Thornber {
1608c6b4fcbaSJoe Thornber 	atomic_inc(bio_data_dir(bio) == READ ?
1609c6b4fcbaSJoe Thornber 		   &cache->stats.read_miss : &cache->stats.write_miss);
1610c6b4fcbaSJoe Thornber }
1611c6b4fcbaSJoe Thornber 
1612fb4100aeSJoe Thornber /*----------------------------------------------------------------*/
1613fb4100aeSJoe Thornber 
map_bio(struct cache * cache,struct bio * bio,dm_oblock_t block,bool * commit_needed)1614b29d4986SJoe Thornber static int map_bio(struct cache *cache, struct bio *bio, dm_oblock_t block,
1615b29d4986SJoe Thornber 		   bool *commit_needed)
1616651f5fa2SJoe Thornber {
1617b29d4986SJoe Thornber 	int r, data_dir;
1618b29d4986SJoe Thornber 	bool rb, background_queued;
1619b29d4986SJoe Thornber 	dm_cblock_t cblock;
1620651f5fa2SJoe Thornber 
1621b29d4986SJoe Thornber 	*commit_needed = false;
1622651f5fa2SJoe Thornber 
1623b29d4986SJoe Thornber 	rb = bio_detain_shared(cache, block, bio);
1624b29d4986SJoe Thornber 	if (!rb) {
16252ee57d58SJoe Thornber 		/*
1626b29d4986SJoe Thornber 		 * An exclusive lock is held for this block, so we have to
1627b29d4986SJoe Thornber 		 * wait.  We set the commit_needed flag so the current
1628b29d4986SJoe Thornber 		 * transaction will be committed asap, allowing this lock
1629b29d4986SJoe Thornber 		 * to be dropped.
16302ee57d58SJoe Thornber 		 */
1631b29d4986SJoe Thornber 		*commit_needed = true;
1632b29d4986SJoe Thornber 		return DM_MAPIO_SUBMITTED;
1633b29d4986SJoe Thornber 	}
16342ee57d58SJoe Thornber 
1635b29d4986SJoe Thornber 	data_dir = bio_data_dir(bio);
16362ee57d58SJoe Thornber 
1637b29d4986SJoe Thornber 	if (optimisable_bio(cache, bio, block)) {
1638b29d4986SJoe Thornber 		struct policy_work *op = NULL;
1639b29d4986SJoe Thornber 
1640b29d4986SJoe Thornber 		r = policy_lookup_with_work(cache->policy, block, &cblock, data_dir, true, &op);
1641b29d4986SJoe Thornber 		if (unlikely(r && r != -ENOENT)) {
1642b29d4986SJoe Thornber 			DMERR_LIMIT("%s: policy_lookup_with_work() failed with r = %d",
1643b29d4986SJoe Thornber 				    cache_device_name(cache), r);
1644b29d4986SJoe Thornber 			bio_io_error(bio);
1645b29d4986SJoe Thornber 			return DM_MAPIO_SUBMITTED;
1646b29d4986SJoe Thornber 		}
1647b29d4986SJoe Thornber 
1648b29d4986SJoe Thornber 		if (r == -ENOENT && op) {
1649b29d4986SJoe Thornber 			bio_drop_shared_lock(cache, bio);
1650b29d4986SJoe Thornber 			BUG_ON(op->op != POLICY_PROMOTE);
1651b29d4986SJoe Thornber 			mg_start(cache, op, bio);
1652b29d4986SJoe Thornber 			return DM_MAPIO_SUBMITTED;
16532ee57d58SJoe Thornber 		}
16542ee57d58SJoe Thornber 	} else {
1655b29d4986SJoe Thornber 		r = policy_lookup(cache->policy, block, &cblock, data_dir, false, &background_queued);
1656b29d4986SJoe Thornber 		if (unlikely(r && r != -ENOENT)) {
1657b29d4986SJoe Thornber 			DMERR_LIMIT("%s: policy_lookup() failed with r = %d",
1658b29d4986SJoe Thornber 				    cache_device_name(cache), r);
1659b29d4986SJoe Thornber 			bio_io_error(bio);
1660b29d4986SJoe Thornber 			return DM_MAPIO_SUBMITTED;
1661b29d4986SJoe Thornber 		}
1662b29d4986SJoe Thornber 
1663b29d4986SJoe Thornber 		if (background_queued)
1664b29d4986SJoe Thornber 			wake_migration_worker(cache);
1665b29d4986SJoe Thornber 	}
1666b29d4986SJoe Thornber 
1667b29d4986SJoe Thornber 	if (r == -ENOENT) {
1668693b960eSMike Snitzer 		struct per_bio_data *pb = get_per_bio_data(bio);
1669693b960eSMike Snitzer 
1670b29d4986SJoe Thornber 		/*
1671b29d4986SJoe Thornber 		 * Miss.
1672b29d4986SJoe Thornber 		 */
1673b29d4986SJoe Thornber 		inc_miss_counter(cache, bio);
1674b29d4986SJoe Thornber 		if (pb->req_nr == 0) {
1675b29d4986SJoe Thornber 			accounted_begin(cache, bio);
1676b29d4986SJoe Thornber 			remap_to_origin_clear_discard(cache, bio, block);
1677b29d4986SJoe Thornber 		} else {
1678b29d4986SJoe Thornber 			/*
1679b29d4986SJoe Thornber 			 * This is a duplicate writethrough io that is no
1680b29d4986SJoe Thornber 			 * longer needed because the block has been demoted.
1681b29d4986SJoe Thornber 			 */
1682b29d4986SJoe Thornber 			bio_endio(bio);
1683b29d4986SJoe Thornber 			return DM_MAPIO_SUBMITTED;
1684b29d4986SJoe Thornber 		}
1685b29d4986SJoe Thornber 	} else {
1686b29d4986SJoe Thornber 		/*
1687b29d4986SJoe Thornber 		 * Hit.
1688b29d4986SJoe Thornber 		 */
16892ee57d58SJoe Thornber 		inc_hit_counter(cache, bio);
16902ee57d58SJoe Thornber 
1691b29d4986SJoe Thornber 		/*
1692b29d4986SJoe Thornber 		 * Passthrough always maps to the origin, invalidating any
1693b29d4986SJoe Thornber 		 * cache blocks that are written to.
1694b29d4986SJoe Thornber 		 */
16958e3c3827SMike Snitzer 		if (passthrough_mode(cache)) {
1696b29d4986SJoe Thornber 			if (bio_data_dir(bio) == WRITE) {
1697b29d4986SJoe Thornber 				bio_drop_shared_lock(cache, bio);
1698b29d4986SJoe Thornber 				atomic_inc(&cache->stats.demotion);
1699b29d4986SJoe Thornber 				invalidate_start(cache, cblock, block, bio);
1700b29d4986SJoe Thornber 			} else
1701b29d4986SJoe Thornber 				remap_to_origin_clear_discard(cache, bio, block);
17028c081b52SJoe Thornber 		} else {
17038e3c3827SMike Snitzer 			if (bio_data_dir(bio) == WRITE && writethrough_mode(cache) &&
1704b29d4986SJoe Thornber 			    !is_dirty(cache, cblock)) {
17052df3bae9SMike Snitzer 				remap_to_origin_and_cache(cache, bio, block, cblock);
1706b29d4986SJoe Thornber 				accounted_begin(cache, bio);
1707b29d4986SJoe Thornber 			} else
1708b29d4986SJoe Thornber 				remap_to_cache_dirty(cache, bio, block, cblock);
17098c081b52SJoe Thornber 		}
17102ee57d58SJoe Thornber 	}
17112ee57d58SJoe Thornber 
1712651f5fa2SJoe Thornber 	/*
1713b29d4986SJoe Thornber 	 * dm core turns FUA requests into a separate payload and FLUSH req.
1714651f5fa2SJoe Thornber 	 */
1715b29d4986SJoe Thornber 	if (bio->bi_opf & REQ_FUA) {
1716b29d4986SJoe Thornber 		/*
1717b29d4986SJoe Thornber 		 * issue_after_commit will call accounted_begin a second time.  So
1718b29d4986SJoe Thornber 		 * we call accounted_complete() to avoid double accounting.
1719b29d4986SJoe Thornber 		 */
1720b29d4986SJoe Thornber 		accounted_complete(cache, bio);
1721b29d4986SJoe Thornber 		issue_after_commit(&cache->committer, bio);
1722b29d4986SJoe Thornber 		*commit_needed = true;
1723b29d4986SJoe Thornber 		return DM_MAPIO_SUBMITTED;
1724651f5fa2SJoe Thornber 	}
1725651f5fa2SJoe Thornber 
1726b29d4986SJoe Thornber 	return DM_MAPIO_REMAPPED;
1727b29d4986SJoe Thornber }
1728b29d4986SJoe Thornber 
process_bio(struct cache * cache,struct bio * bio)1729b29d4986SJoe Thornber static bool process_bio(struct cache *cache, struct bio *bio)
1730c6b4fcbaSJoe Thornber {
1731b29d4986SJoe Thornber 	bool commit_needed;
1732b29d4986SJoe Thornber 
1733b29d4986SJoe Thornber 	if (map_bio(cache, bio, get_bio_block(cache, bio), &commit_needed) == DM_MAPIO_REMAPPED)
173469596f55SMike Snitzer 		dm_submit_bio_remap(bio, NULL);
1735b29d4986SJoe Thornber 
1736b29d4986SJoe Thornber 	return commit_needed;
1737c6b4fcbaSJoe Thornber }
1738c6b4fcbaSJoe Thornber 
1739028ae9f7SJoe Thornber /*
1740028ae9f7SJoe Thornber  * A non-zero return indicates read_only or fail_io mode.
1741028ae9f7SJoe Thornber  */
commit(struct cache * cache,bool clean_shutdown)1742028ae9f7SJoe Thornber static int commit(struct cache *cache, bool clean_shutdown)
1743028ae9f7SJoe Thornber {
1744028ae9f7SJoe Thornber 	int r;
1745028ae9f7SJoe Thornber 
1746028ae9f7SJoe Thornber 	if (get_cache_mode(cache) >= CM_READ_ONLY)
1747028ae9f7SJoe Thornber 		return -EINVAL;
1748028ae9f7SJoe Thornber 
1749028ae9f7SJoe Thornber 	atomic_inc(&cache->stats.commit_count);
1750028ae9f7SJoe Thornber 	r = dm_cache_commit(cache->cmd, clean_shutdown);
1751028ae9f7SJoe Thornber 	if (r)
1752028ae9f7SJoe Thornber 		metadata_operation_failed(cache, "dm_cache_commit", r);
1753028ae9f7SJoe Thornber 
1754028ae9f7SJoe Thornber 	return r;
1755028ae9f7SJoe Thornber }
1756028ae9f7SJoe Thornber 
1757b29d4986SJoe Thornber /*
1758b29d4986SJoe Thornber  * Used by the batcher.
1759b29d4986SJoe Thornber  */
commit_op(void * context)17604e4cbee9SChristoph Hellwig static blk_status_t commit_op(void *context)
1761c6b4fcbaSJoe Thornber {
1762b29d4986SJoe Thornber 	struct cache *cache = context;
1763ffcbcb67SHeinz Mauelshagen 
1764b29d4986SJoe Thornber 	if (dm_cache_changed_this_transaction(cache->cmd))
17654e4cbee9SChristoph Hellwig 		return errno_to_blk_status(commit(cache, false));
1766b29d4986SJoe Thornber 
1767b29d4986SJoe Thornber 	return 0;
1768c6b4fcbaSJoe Thornber }
1769c6b4fcbaSJoe Thornber 
1770b29d4986SJoe Thornber /*----------------------------------------------------------------*/
1771b29d4986SJoe Thornber 
process_flush_bio(struct cache * cache,struct bio * bio)1772b29d4986SJoe Thornber static bool process_flush_bio(struct cache *cache, struct bio *bio)
1773b29d4986SJoe Thornber {
1774693b960eSMike Snitzer 	struct per_bio_data *pb = get_per_bio_data(bio);
1775b29d4986SJoe Thornber 
1776b29d4986SJoe Thornber 	if (!pb->req_nr)
1777b29d4986SJoe Thornber 		remap_to_origin(cache, bio);
1778b29d4986SJoe Thornber 	else
1779b29d4986SJoe Thornber 		remap_to_cache(cache, bio, 0);
1780b29d4986SJoe Thornber 
1781b29d4986SJoe Thornber 	issue_after_commit(&cache->committer, bio);
1782b29d4986SJoe Thornber 	return true;
1783c6b4fcbaSJoe Thornber }
1784c6b4fcbaSJoe Thornber 
process_discard_bio(struct cache * cache,struct bio * bio)1785b29d4986SJoe Thornber static bool process_discard_bio(struct cache *cache, struct bio *bio)
1786c6b4fcbaSJoe Thornber {
1787b29d4986SJoe Thornber 	dm_dblock_t b, e;
1788b29d4986SJoe Thornber 
1789a4a82ce3SHeinz Mauelshagen 	/*
1790a4a82ce3SHeinz Mauelshagen 	 * FIXME: do we need to lock the region?  Or can we just assume the
1791a4a82ce3SHeinz Mauelshagen 	 * user wont be so foolish as to issue discard concurrently with
1792a4a82ce3SHeinz Mauelshagen 	 * other IO?
1793a4a82ce3SHeinz Mauelshagen 	 */
1794b29d4986SJoe Thornber 	calc_discard_block_range(cache, bio, &b, &e);
1795b29d4986SJoe Thornber 	while (b != e) {
1796b29d4986SJoe Thornber 		set_discard(cache, b);
1797b29d4986SJoe Thornber 		b = to_dblock(from_dblock(b) + 1);
1798b29d4986SJoe Thornber 	}
1799b29d4986SJoe Thornber 
1800de7180ffSMike Snitzer 	if (cache->features.discard_passdown) {
1801de7180ffSMike Snitzer 		remap_to_origin(cache, bio);
180269596f55SMike Snitzer 		dm_submit_bio_remap(bio, NULL);
1803de7180ffSMike Snitzer 	} else
1804b29d4986SJoe Thornber 		bio_endio(bio);
1805b29d4986SJoe Thornber 
1806b29d4986SJoe Thornber 	return false;
1807b29d4986SJoe Thornber }
1808b29d4986SJoe Thornber 
process_deferred_bios(struct work_struct * ws)1809b29d4986SJoe Thornber static void process_deferred_bios(struct work_struct *ws)
1810b29d4986SJoe Thornber {
1811b29d4986SJoe Thornber 	struct cache *cache = container_of(ws, struct cache, deferred_bio_worker);
1812b29d4986SJoe Thornber 
1813b29d4986SJoe Thornber 	bool commit_needed = false;
1814c6b4fcbaSJoe Thornber 	struct bio_list bios;
1815c6b4fcbaSJoe Thornber 	struct bio *bio;
1816c6b4fcbaSJoe Thornber 
1817c6b4fcbaSJoe Thornber 	bio_list_init(&bios);
1818c6b4fcbaSJoe Thornber 
181926b924b9SMikulas Patocka 	spin_lock_irq(&cache->lock);
1820c6b4fcbaSJoe Thornber 	bio_list_merge(&bios, &cache->deferred_bios);
1821c6b4fcbaSJoe Thornber 	bio_list_init(&cache->deferred_bios);
182226b924b9SMikulas Patocka 	spin_unlock_irq(&cache->lock);
1823c6b4fcbaSJoe Thornber 
1824b29d4986SJoe Thornber 	while ((bio = bio_list_pop(&bios))) {
18251eff9d32SJens Axboe 		if (bio->bi_opf & REQ_PREFLUSH)
1826b29d4986SJoe Thornber 			commit_needed = process_flush_bio(cache, bio) || commit_needed;
1827b29d4986SJoe Thornber 
1828e6047149SMike Christie 		else if (bio_op(bio) == REQ_OP_DISCARD)
1829b29d4986SJoe Thornber 			commit_needed = process_discard_bio(cache, bio) || commit_needed;
1830b29d4986SJoe Thornber 
1831c6b4fcbaSJoe Thornber 		else
1832b29d4986SJoe Thornber 			commit_needed = process_bio(cache, bio) || commit_needed;
183376227f6dSMike Snitzer 		cond_resched();
1834c6b4fcbaSJoe Thornber 	}
1835c6b4fcbaSJoe Thornber 
1836b29d4986SJoe Thornber 	if (commit_needed)
1837b29d4986SJoe Thornber 		schedule_commit(&cache->committer);
1838c6b4fcbaSJoe Thornber }
1839c6b4fcbaSJoe Thornber 
1840a4a82ce3SHeinz Mauelshagen /*
1841a4a82ce3SHeinz Mauelshagen  *--------------------------------------------------------------
1842c6b4fcbaSJoe Thornber  * Main worker loop
1843a4a82ce3SHeinz Mauelshagen  *--------------------------------------------------------------
1844a4a82ce3SHeinz Mauelshagen  */
requeue_deferred_bios(struct cache * cache)1845651f5fa2SJoe Thornber static void requeue_deferred_bios(struct cache *cache)
1846c6b4fcbaSJoe Thornber {
1847c6b4fcbaSJoe Thornber 	struct bio *bio;
1848c6b4fcbaSJoe Thornber 	struct bio_list bios;
1849c6b4fcbaSJoe Thornber 
1850c6b4fcbaSJoe Thornber 	bio_list_init(&bios);
1851c6b4fcbaSJoe Thornber 	bio_list_merge(&bios, &cache->deferred_bios);
1852c6b4fcbaSJoe Thornber 	bio_list_init(&cache->deferred_bios);
1853c6b4fcbaSJoe Thornber 
18544246a0b6SChristoph Hellwig 	while ((bio = bio_list_pop(&bios))) {
18554e4cbee9SChristoph Hellwig 		bio->bi_status = BLK_STS_DM_REQUEUE;
18564246a0b6SChristoph Hellwig 		bio_endio(bio);
185776227f6dSMike Snitzer 		cond_resched();
18584246a0b6SChristoph Hellwig 	}
1859c6b4fcbaSJoe Thornber }
1860c6b4fcbaSJoe Thornber 
1861c6b4fcbaSJoe Thornber /*
1862c6b4fcbaSJoe Thornber  * We want to commit periodically so that not too much
1863c6b4fcbaSJoe Thornber  * unwritten metadata builds up.
1864c6b4fcbaSJoe Thornber  */
do_waker(struct work_struct * ws)1865c6b4fcbaSJoe Thornber static void do_waker(struct work_struct *ws)
1866c6b4fcbaSJoe Thornber {
1867c6b4fcbaSJoe Thornber 	struct cache *cache = container_of(to_delayed_work(ws), struct cache, waker);
1868b29d4986SJoe Thornber 
1869fba10109SJoe Thornber 	policy_tick(cache->policy, true);
1870b29d4986SJoe Thornber 	wake_migration_worker(cache);
1871b29d4986SJoe Thornber 	schedule_commit(&cache->committer);
1872c6b4fcbaSJoe Thornber 	queue_delayed_work(cache->wq, &cache->waker, COMMIT_PERIOD);
1873c6b4fcbaSJoe Thornber }
1874c6b4fcbaSJoe Thornber 
check_migrations(struct work_struct * ws)1875b29d4986SJoe Thornber static void check_migrations(struct work_struct *ws)
1876c6b4fcbaSJoe Thornber {
1877b29d4986SJoe Thornber 	int r;
1878b29d4986SJoe Thornber 	struct policy_work *op;
1879b29d4986SJoe Thornber 	struct cache *cache = container_of(ws, struct cache, migration_worker);
1880b29d4986SJoe Thornber 	enum busy b;
1881b29d4986SJoe Thornber 
1882b29d4986SJoe Thornber 	for (;;) {
1883b29d4986SJoe Thornber 		b = spare_migration_bandwidth(cache);
1884b29d4986SJoe Thornber 
1885b29d4986SJoe Thornber 		r = policy_get_background_work(cache->policy, b == IDLE, &op);
1886b29d4986SJoe Thornber 		if (r == -ENODATA)
1887b29d4986SJoe Thornber 			break;
1888b29d4986SJoe Thornber 
1889b29d4986SJoe Thornber 		if (r) {
1890b29d4986SJoe Thornber 			DMERR_LIMIT("%s: policy_background_work failed",
1891b29d4986SJoe Thornber 				    cache_device_name(cache));
1892b29d4986SJoe Thornber 			break;
1893c6b4fcbaSJoe Thornber 		}
1894c6b4fcbaSJoe Thornber 
1895b29d4986SJoe Thornber 		r = mg_start(cache, op, NULL);
1896b29d4986SJoe Thornber 		if (r)
1897b29d4986SJoe Thornber 			break;
189876227f6dSMike Snitzer 
189976227f6dSMike Snitzer 		cond_resched();
1900b29d4986SJoe Thornber 	}
1901c6b4fcbaSJoe Thornber }
1902c6b4fcbaSJoe Thornber 
1903a4a82ce3SHeinz Mauelshagen /*
1904a4a82ce3SHeinz Mauelshagen  *--------------------------------------------------------------
1905c6b4fcbaSJoe Thornber  * Target methods
1906a4a82ce3SHeinz Mauelshagen  *--------------------------------------------------------------
1907a4a82ce3SHeinz Mauelshagen  */
1908c6b4fcbaSJoe Thornber 
1909c6b4fcbaSJoe Thornber /*
1910c6b4fcbaSJoe Thornber  * This function gets called on the error paths of the constructor, so we
1911c6b4fcbaSJoe Thornber  * have to cope with a partially initialised struct.
1912c6b4fcbaSJoe Thornber  */
__destroy(struct cache * cache)19138cc12dabSMing-Hung Tsai static void __destroy(struct cache *cache)
1914c6b4fcbaSJoe Thornber {
19156f1c819cSKent Overstreet 	mempool_exit(&cache->migration_pool);
1916c6b4fcbaSJoe Thornber 
1917c6b4fcbaSJoe Thornber 	if (cache->prison)
1918b29d4986SJoe Thornber 		dm_bio_prison_destroy_v2(cache->prison);
1919c6b4fcbaSJoe Thornber 
1920c6b4fcbaSJoe Thornber 	if (cache->wq)
1921c6b4fcbaSJoe Thornber 		destroy_workqueue(cache->wq);
1922c6b4fcbaSJoe Thornber 
1923c6b4fcbaSJoe Thornber 	if (cache->dirty_bitset)
1924c6b4fcbaSJoe Thornber 		free_bitset(cache->dirty_bitset);
1925c6b4fcbaSJoe Thornber 
1926c6b4fcbaSJoe Thornber 	if (cache->discard_bitset)
1927c6b4fcbaSJoe Thornber 		free_bitset(cache->discard_bitset);
1928c6b4fcbaSJoe Thornber 
1929c6b4fcbaSJoe Thornber 	if (cache->copier)
1930c6b4fcbaSJoe Thornber 		dm_kcopyd_client_destroy(cache->copier);
1931c6b4fcbaSJoe Thornber 
1932c6b4fcbaSJoe Thornber 	if (cache->cmd)
1933c6b4fcbaSJoe Thornber 		dm_cache_metadata_close(cache->cmd);
1934c6b4fcbaSJoe Thornber 
1935c6b4fcbaSJoe Thornber 	if (cache->metadata_dev)
1936c6b4fcbaSJoe Thornber 		dm_put_device(cache->ti, cache->metadata_dev);
1937c6b4fcbaSJoe Thornber 
1938c6b4fcbaSJoe Thornber 	if (cache->origin_dev)
1939c6b4fcbaSJoe Thornber 		dm_put_device(cache->ti, cache->origin_dev);
1940c6b4fcbaSJoe Thornber 
1941c6b4fcbaSJoe Thornber 	if (cache->cache_dev)
1942c6b4fcbaSJoe Thornber 		dm_put_device(cache->ti, cache->cache_dev);
1943c6b4fcbaSJoe Thornber 
1944c6b4fcbaSJoe Thornber 	if (cache->policy)
1945c6b4fcbaSJoe Thornber 		dm_cache_policy_destroy(cache->policy);
1946c6b4fcbaSJoe Thornber 
19478cc12dabSMing-Hung Tsai 	bioset_exit(&cache->bs);
19488cc12dabSMing-Hung Tsai 
19498cc12dabSMing-Hung Tsai 	kfree(cache);
19508cc12dabSMing-Hung Tsai }
19518cc12dabSMing-Hung Tsai 
destroy(struct cache * cache)19528cc12dabSMing-Hung Tsai static void destroy(struct cache *cache)
19538cc12dabSMing-Hung Tsai {
19548cc12dabSMing-Hung Tsai 	unsigned int i;
19558cc12dabSMing-Hung Tsai 
19568cc12dabSMing-Hung Tsai 	cancel_delayed_work_sync(&cache->waker);
19578cc12dabSMing-Hung Tsai 
1958c6b4fcbaSJoe Thornber 	for (i = 0; i < cache->nr_ctr_args ; i++)
1959c6b4fcbaSJoe Thornber 		kfree(cache->ctr_args[i]);
1960c6b4fcbaSJoe Thornber 	kfree(cache->ctr_args);
1961c6b4fcbaSJoe Thornber 
19628cc12dabSMing-Hung Tsai 	__destroy(cache);
1963c6b4fcbaSJoe Thornber }
1964c6b4fcbaSJoe Thornber 
cache_dtr(struct dm_target * ti)1965c6b4fcbaSJoe Thornber static void cache_dtr(struct dm_target *ti)
1966c6b4fcbaSJoe Thornber {
1967c6b4fcbaSJoe Thornber 	struct cache *cache = ti->private;
1968c6b4fcbaSJoe Thornber 
1969c6b4fcbaSJoe Thornber 	destroy(cache);
1970c6b4fcbaSJoe Thornber }
1971c6b4fcbaSJoe Thornber 
get_dev_size(struct dm_dev * dev)1972c6b4fcbaSJoe Thornber static sector_t get_dev_size(struct dm_dev *dev)
1973c6b4fcbaSJoe Thornber {
19746dcbb52cSChristoph Hellwig 	return bdev_nr_sectors(dev->bdev);
1975c6b4fcbaSJoe Thornber }
1976c6b4fcbaSJoe Thornber 
1977c6b4fcbaSJoe Thornber /*----------------------------------------------------------------*/
1978c6b4fcbaSJoe Thornber 
1979c6b4fcbaSJoe Thornber /*
1980c6b4fcbaSJoe Thornber  * Construct a cache device mapping.
1981c6b4fcbaSJoe Thornber  *
1982c6b4fcbaSJoe Thornber  * cache <metadata dev> <cache dev> <origin dev> <block size>
1983c6b4fcbaSJoe Thornber  *       <#feature args> [<feature arg>]*
1984c6b4fcbaSJoe Thornber  *       <policy> <#policy args> [<policy arg>]*
1985c6b4fcbaSJoe Thornber  *
1986c6b4fcbaSJoe Thornber  * metadata dev    : fast device holding the persistent metadata
1987c6b4fcbaSJoe Thornber  * cache dev	   : fast device holding cached data blocks
1988c6b4fcbaSJoe Thornber  * origin dev	   : slow device holding original data blocks
1989c6b4fcbaSJoe Thornber  * block size	   : cache unit size in sectors
1990c6b4fcbaSJoe Thornber  *
1991c6b4fcbaSJoe Thornber  * #feature args   : number of feature arguments passed
1992c6b4fcbaSJoe Thornber  * feature args    : writethrough.  (The default is writeback.)
1993c6b4fcbaSJoe Thornber  *
1994c6b4fcbaSJoe Thornber  * policy	   : the replacement policy to use
1995c6b4fcbaSJoe Thornber  * #policy args    : an even number of policy arguments corresponding
1996c6b4fcbaSJoe Thornber  *		     to key/value pairs passed to the policy
1997c6b4fcbaSJoe Thornber  * policy args	   : key/value pairs passed to the policy
1998c6b4fcbaSJoe Thornber  *		     E.g. 'sequential_threshold 1024'
1999c6b4fcbaSJoe Thornber  *		     See cache-policies.txt for details.
2000c6b4fcbaSJoe Thornber  *
2001c6b4fcbaSJoe Thornber  * Optional feature arguments are:
2002c6b4fcbaSJoe Thornber  *   writethrough  : write through caching that prohibits cache block
2003c6b4fcbaSJoe Thornber  *		     content from being different from origin block content.
2004c6b4fcbaSJoe Thornber  *		     Without this argument, the default behaviour is to write
2005c6b4fcbaSJoe Thornber  *		     back cache block contents later for performance reasons,
2006c6b4fcbaSJoe Thornber  *		     so they may differ from the corresponding origin blocks.
2007c6b4fcbaSJoe Thornber  */
2008c6b4fcbaSJoe Thornber struct cache_args {
2009c6b4fcbaSJoe Thornber 	struct dm_target *ti;
2010c6b4fcbaSJoe Thornber 
2011c6b4fcbaSJoe Thornber 	struct dm_dev *metadata_dev;
2012c6b4fcbaSJoe Thornber 
2013c6b4fcbaSJoe Thornber 	struct dm_dev *cache_dev;
2014c6b4fcbaSJoe Thornber 	sector_t cache_sectors;
2015c6b4fcbaSJoe Thornber 
2016c6b4fcbaSJoe Thornber 	struct dm_dev *origin_dev;
2017c6b4fcbaSJoe Thornber 
2018c6b4fcbaSJoe Thornber 	uint32_t block_size;
2019c6b4fcbaSJoe Thornber 
2020c6b4fcbaSJoe Thornber 	const char *policy_name;
2021c6b4fcbaSJoe Thornber 	int policy_argc;
2022c6b4fcbaSJoe Thornber 	const char **policy_argv;
2023c6b4fcbaSJoe Thornber 
2024c6b4fcbaSJoe Thornber 	struct cache_features features;
2025c6b4fcbaSJoe Thornber };
2026c6b4fcbaSJoe Thornber 
destroy_cache_args(struct cache_args * ca)2027c6b4fcbaSJoe Thornber static void destroy_cache_args(struct cache_args *ca)
2028c6b4fcbaSJoe Thornber {
2029c6b4fcbaSJoe Thornber 	if (ca->metadata_dev)
2030c6b4fcbaSJoe Thornber 		dm_put_device(ca->ti, ca->metadata_dev);
2031c6b4fcbaSJoe Thornber 
2032c6b4fcbaSJoe Thornber 	if (ca->cache_dev)
2033c6b4fcbaSJoe Thornber 		dm_put_device(ca->ti, ca->cache_dev);
2034c6b4fcbaSJoe Thornber 
2035c6b4fcbaSJoe Thornber 	if (ca->origin_dev)
2036c6b4fcbaSJoe Thornber 		dm_put_device(ca->ti, ca->origin_dev);
2037c6b4fcbaSJoe Thornber 
2038c6b4fcbaSJoe Thornber 	kfree(ca);
2039c6b4fcbaSJoe Thornber }
2040c6b4fcbaSJoe Thornber 
at_least_one_arg(struct dm_arg_set * as,char ** error)2041c6b4fcbaSJoe Thornber static bool at_least_one_arg(struct dm_arg_set *as, char **error)
2042c6b4fcbaSJoe Thornber {
2043c6b4fcbaSJoe Thornber 	if (!as->argc) {
2044c6b4fcbaSJoe Thornber 		*error = "Insufficient args";
2045c6b4fcbaSJoe Thornber 		return false;
2046c6b4fcbaSJoe Thornber 	}
2047c6b4fcbaSJoe Thornber 
2048c6b4fcbaSJoe Thornber 	return true;
2049c6b4fcbaSJoe Thornber }
2050c6b4fcbaSJoe Thornber 
parse_metadata_dev(struct cache_args * ca,struct dm_arg_set * as,char ** error)2051c6b4fcbaSJoe Thornber static int parse_metadata_dev(struct cache_args *ca, struct dm_arg_set *as,
2052c6b4fcbaSJoe Thornber 			      char **error)
2053c6b4fcbaSJoe Thornber {
2054c6b4fcbaSJoe Thornber 	int r;
2055c6b4fcbaSJoe Thornber 	sector_t metadata_dev_size;
2056c6b4fcbaSJoe Thornber 
2057c6b4fcbaSJoe Thornber 	if (!at_least_one_arg(as, error))
2058c6b4fcbaSJoe Thornber 		return -EINVAL;
2059c6b4fcbaSJoe Thornber 
206005bdb996SChristoph Hellwig 	r = dm_get_device(ca->ti, dm_shift_arg(as),
206105bdb996SChristoph Hellwig 			  BLK_OPEN_READ | BLK_OPEN_WRITE, &ca->metadata_dev);
2062c6b4fcbaSJoe Thornber 	if (r) {
2063c6b4fcbaSJoe Thornber 		*error = "Error opening metadata device";
2064c6b4fcbaSJoe Thornber 		return r;
2065c6b4fcbaSJoe Thornber 	}
2066c6b4fcbaSJoe Thornber 
2067c6b4fcbaSJoe Thornber 	metadata_dev_size = get_dev_size(ca->metadata_dev);
2068c6b4fcbaSJoe Thornber 	if (metadata_dev_size > DM_CACHE_METADATA_MAX_SECTORS_WARNING)
2069385411ffSChristoph Hellwig 		DMWARN("Metadata device %pg is larger than %u sectors: excess space will not be used.",
2070385411ffSChristoph Hellwig 		       ca->metadata_dev->bdev, THIN_METADATA_MAX_SECTORS);
2071c6b4fcbaSJoe Thornber 
2072c6b4fcbaSJoe Thornber 	return 0;
2073c6b4fcbaSJoe Thornber }
2074c6b4fcbaSJoe Thornber 
parse_cache_dev(struct cache_args * ca,struct dm_arg_set * as,char ** error)2075c6b4fcbaSJoe Thornber static int parse_cache_dev(struct cache_args *ca, struct dm_arg_set *as,
2076c6b4fcbaSJoe Thornber 			   char **error)
2077c6b4fcbaSJoe Thornber {
2078c6b4fcbaSJoe Thornber 	int r;
2079c6b4fcbaSJoe Thornber 
2080c6b4fcbaSJoe Thornber 	if (!at_least_one_arg(as, error))
2081c6b4fcbaSJoe Thornber 		return -EINVAL;
2082c6b4fcbaSJoe Thornber 
208305bdb996SChristoph Hellwig 	r = dm_get_device(ca->ti, dm_shift_arg(as),
208405bdb996SChristoph Hellwig 			  BLK_OPEN_READ | BLK_OPEN_WRITE, &ca->cache_dev);
2085c6b4fcbaSJoe Thornber 	if (r) {
2086c6b4fcbaSJoe Thornber 		*error = "Error opening cache device";
2087c6b4fcbaSJoe Thornber 		return r;
2088c6b4fcbaSJoe Thornber 	}
2089c6b4fcbaSJoe Thornber 	ca->cache_sectors = get_dev_size(ca->cache_dev);
2090c6b4fcbaSJoe Thornber 
2091c6b4fcbaSJoe Thornber 	return 0;
2092c6b4fcbaSJoe Thornber }
2093c6b4fcbaSJoe Thornber 
parse_origin_dev(struct cache_args * ca,struct dm_arg_set * as,char ** error)2094c6b4fcbaSJoe Thornber static int parse_origin_dev(struct cache_args *ca, struct dm_arg_set *as,
2095c6b4fcbaSJoe Thornber 			    char **error)
2096c6b4fcbaSJoe Thornber {
209711d5a3f8SMing-Hung Tsai 	sector_t origin_sectors;
2098c6b4fcbaSJoe Thornber 	int r;
2099c6b4fcbaSJoe Thornber 
2100c6b4fcbaSJoe Thornber 	if (!at_least_one_arg(as, error))
2101c6b4fcbaSJoe Thornber 		return -EINVAL;
2102c6b4fcbaSJoe Thornber 
210305bdb996SChristoph Hellwig 	r = dm_get_device(ca->ti, dm_shift_arg(as),
210405bdb996SChristoph Hellwig 			  BLK_OPEN_READ | BLK_OPEN_WRITE, &ca->origin_dev);
2105c6b4fcbaSJoe Thornber 	if (r) {
2106c6b4fcbaSJoe Thornber 		*error = "Error opening origin device";
2107c6b4fcbaSJoe Thornber 		return r;
2108c6b4fcbaSJoe Thornber 	}
2109c6b4fcbaSJoe Thornber 
211011d5a3f8SMing-Hung Tsai 	origin_sectors = get_dev_size(ca->origin_dev);
211111d5a3f8SMing-Hung Tsai 	if (ca->ti->len > origin_sectors) {
2112c6b4fcbaSJoe Thornber 		*error = "Device size larger than cached device";
2113c6b4fcbaSJoe Thornber 		return -EINVAL;
2114c6b4fcbaSJoe Thornber 	}
2115c6b4fcbaSJoe Thornber 
2116c6b4fcbaSJoe Thornber 	return 0;
2117c6b4fcbaSJoe Thornber }
2118c6b4fcbaSJoe Thornber 
parse_block_size(struct cache_args * ca,struct dm_arg_set * as,char ** error)2119c6b4fcbaSJoe Thornber static int parse_block_size(struct cache_args *ca, struct dm_arg_set *as,
2120c6b4fcbaSJoe Thornber 			    char **error)
2121c6b4fcbaSJoe Thornber {
212205473044SMike Snitzer 	unsigned long block_size;
2123c6b4fcbaSJoe Thornber 
2124c6b4fcbaSJoe Thornber 	if (!at_least_one_arg(as, error))
2125c6b4fcbaSJoe Thornber 		return -EINVAL;
2126c6b4fcbaSJoe Thornber 
212705473044SMike Snitzer 	if (kstrtoul(dm_shift_arg(as), 10, &block_size) || !block_size ||
212805473044SMike Snitzer 	    block_size < DATA_DEV_BLOCK_SIZE_MIN_SECTORS ||
212905473044SMike Snitzer 	    block_size > DATA_DEV_BLOCK_SIZE_MAX_SECTORS ||
213005473044SMike Snitzer 	    block_size & (DATA_DEV_BLOCK_SIZE_MIN_SECTORS - 1)) {
2131c6b4fcbaSJoe Thornber 		*error = "Invalid data block size";
2132c6b4fcbaSJoe Thornber 		return -EINVAL;
2133c6b4fcbaSJoe Thornber 	}
2134c6b4fcbaSJoe Thornber 
213505473044SMike Snitzer 	if (block_size > ca->cache_sectors) {
2136c6b4fcbaSJoe Thornber 		*error = "Data block size is larger than the cache device";
2137c6b4fcbaSJoe Thornber 		return -EINVAL;
2138c6b4fcbaSJoe Thornber 	}
2139c6b4fcbaSJoe Thornber 
214005473044SMike Snitzer 	ca->block_size = block_size;
2141c6b4fcbaSJoe Thornber 
2142c6b4fcbaSJoe Thornber 	return 0;
2143c6b4fcbaSJoe Thornber }
2144c6b4fcbaSJoe Thornber 
init_features(struct cache_features * cf)2145c6b4fcbaSJoe Thornber static void init_features(struct cache_features *cf)
2146c6b4fcbaSJoe Thornber {
2147c6b4fcbaSJoe Thornber 	cf->mode = CM_WRITE;
21482ee57d58SJoe Thornber 	cf->io_mode = CM_IO_WRITEBACK;
2149629d0a8aSJoe Thornber 	cf->metadata_version = 1;
2150de7180ffSMike Snitzer 	cf->discard_passdown = true;
2151c6b4fcbaSJoe Thornber }
2152c6b4fcbaSJoe Thornber 
parse_features(struct cache_args * ca,struct dm_arg_set * as,char ** error)2153c6b4fcbaSJoe Thornber static int parse_features(struct cache_args *ca, struct dm_arg_set *as,
2154c6b4fcbaSJoe Thornber 			  char **error)
2155c6b4fcbaSJoe Thornber {
21565916a22bSEric Biggers 	static const struct dm_arg _args[] = {
2157de7180ffSMike Snitzer 		{0, 3, "Invalid number of cache feature arguments"},
2158c6b4fcbaSJoe Thornber 	};
2159c6b4fcbaSJoe Thornber 
2160af9313c3SJohn Pittman 	int r, mode_ctr = 0;
216186a3238cSHeinz Mauelshagen 	unsigned int argc;
2162c6b4fcbaSJoe Thornber 	const char *arg;
2163c6b4fcbaSJoe Thornber 	struct cache_features *cf = &ca->features;
2164c6b4fcbaSJoe Thornber 
2165c6b4fcbaSJoe Thornber 	init_features(cf);
2166c6b4fcbaSJoe Thornber 
2167c6b4fcbaSJoe Thornber 	r = dm_read_arg_group(_args, as, &argc, error);
2168c6b4fcbaSJoe Thornber 	if (r)
2169c6b4fcbaSJoe Thornber 		return -EINVAL;
2170c6b4fcbaSJoe Thornber 
2171c6b4fcbaSJoe Thornber 	while (argc--) {
2172c6b4fcbaSJoe Thornber 		arg = dm_shift_arg(as);
2173c6b4fcbaSJoe Thornber 
2174af9313c3SJohn Pittman 		if (!strcasecmp(arg, "writeback")) {
21752ee57d58SJoe Thornber 			cf->io_mode = CM_IO_WRITEBACK;
2176af9313c3SJohn Pittman 			mode_ctr++;
2177af9313c3SJohn Pittman 		}
2178c6b4fcbaSJoe Thornber 
2179af9313c3SJohn Pittman 		else if (!strcasecmp(arg, "writethrough")) {
21802ee57d58SJoe Thornber 			cf->io_mode = CM_IO_WRITETHROUGH;
2181af9313c3SJohn Pittman 			mode_ctr++;
2182af9313c3SJohn Pittman 		}
21832ee57d58SJoe Thornber 
2184af9313c3SJohn Pittman 		else if (!strcasecmp(arg, "passthrough")) {
21852ee57d58SJoe Thornber 			cf->io_mode = CM_IO_PASSTHROUGH;
2186af9313c3SJohn Pittman 			mode_ctr++;
2187af9313c3SJohn Pittman 		}
2188c6b4fcbaSJoe Thornber 
2189629d0a8aSJoe Thornber 		else if (!strcasecmp(arg, "metadata2"))
2190629d0a8aSJoe Thornber 			cf->metadata_version = 2;
2191629d0a8aSJoe Thornber 
2192de7180ffSMike Snitzer 		else if (!strcasecmp(arg, "no_discard_passdown"))
2193de7180ffSMike Snitzer 			cf->discard_passdown = false;
2194de7180ffSMike Snitzer 
2195c6b4fcbaSJoe Thornber 		else {
2196c6b4fcbaSJoe Thornber 			*error = "Unrecognised cache feature requested";
2197c6b4fcbaSJoe Thornber 			return -EINVAL;
2198c6b4fcbaSJoe Thornber 		}
2199c6b4fcbaSJoe Thornber 	}
2200c6b4fcbaSJoe Thornber 
2201af9313c3SJohn Pittman 	if (mode_ctr > 1) {
2202af9313c3SJohn Pittman 		*error = "Duplicate cache io_mode features requested";
2203af9313c3SJohn Pittman 		return -EINVAL;
2204af9313c3SJohn Pittman 	}
2205af9313c3SJohn Pittman 
2206c6b4fcbaSJoe Thornber 	return 0;
2207c6b4fcbaSJoe Thornber }
2208c6b4fcbaSJoe Thornber 
parse_policy(struct cache_args * ca,struct dm_arg_set * as,char ** error)2209c6b4fcbaSJoe Thornber static int parse_policy(struct cache_args *ca, struct dm_arg_set *as,
2210c6b4fcbaSJoe Thornber 			char **error)
2211c6b4fcbaSJoe Thornber {
22125916a22bSEric Biggers 	static const struct dm_arg _args[] = {
2213c6b4fcbaSJoe Thornber 		{0, 1024, "Invalid number of policy arguments"},
2214c6b4fcbaSJoe Thornber 	};
2215c6b4fcbaSJoe Thornber 
2216c6b4fcbaSJoe Thornber 	int r;
2217c6b4fcbaSJoe Thornber 
2218c6b4fcbaSJoe Thornber 	if (!at_least_one_arg(as, error))
2219c6b4fcbaSJoe Thornber 		return -EINVAL;
2220c6b4fcbaSJoe Thornber 
2221c6b4fcbaSJoe Thornber 	ca->policy_name = dm_shift_arg(as);
2222c6b4fcbaSJoe Thornber 
2223c6b4fcbaSJoe Thornber 	r = dm_read_arg_group(_args, as, &ca->policy_argc, error);
2224c6b4fcbaSJoe Thornber 	if (r)
2225c6b4fcbaSJoe Thornber 		return -EINVAL;
2226c6b4fcbaSJoe Thornber 
2227c6b4fcbaSJoe Thornber 	ca->policy_argv = (const char **)as->argv;
2228c6b4fcbaSJoe Thornber 	dm_consume_args(as, ca->policy_argc);
2229c6b4fcbaSJoe Thornber 
2230c6b4fcbaSJoe Thornber 	return 0;
2231c6b4fcbaSJoe Thornber }
2232c6b4fcbaSJoe Thornber 
parse_cache_args(struct cache_args * ca,int argc,char ** argv,char ** error)2233c6b4fcbaSJoe Thornber static int parse_cache_args(struct cache_args *ca, int argc, char **argv,
2234c6b4fcbaSJoe Thornber 			    char **error)
2235c6b4fcbaSJoe Thornber {
2236c6b4fcbaSJoe Thornber 	int r;
2237c6b4fcbaSJoe Thornber 	struct dm_arg_set as;
2238c6b4fcbaSJoe Thornber 
2239c6b4fcbaSJoe Thornber 	as.argc = argc;
2240c6b4fcbaSJoe Thornber 	as.argv = argv;
2241c6b4fcbaSJoe Thornber 
2242c6b4fcbaSJoe Thornber 	r = parse_metadata_dev(ca, &as, error);
2243c6b4fcbaSJoe Thornber 	if (r)
2244c6b4fcbaSJoe Thornber 		return r;
2245c6b4fcbaSJoe Thornber 
2246c6b4fcbaSJoe Thornber 	r = parse_cache_dev(ca, &as, error);
2247c6b4fcbaSJoe Thornber 	if (r)
2248c6b4fcbaSJoe Thornber 		return r;
2249c6b4fcbaSJoe Thornber 
2250c6b4fcbaSJoe Thornber 	r = parse_origin_dev(ca, &as, error);
2251c6b4fcbaSJoe Thornber 	if (r)
2252c6b4fcbaSJoe Thornber 		return r;
2253c6b4fcbaSJoe Thornber 
2254c6b4fcbaSJoe Thornber 	r = parse_block_size(ca, &as, error);
2255c6b4fcbaSJoe Thornber 	if (r)
2256c6b4fcbaSJoe Thornber 		return r;
2257c6b4fcbaSJoe Thornber 
2258c6b4fcbaSJoe Thornber 	r = parse_features(ca, &as, error);
2259c6b4fcbaSJoe Thornber 	if (r)
2260c6b4fcbaSJoe Thornber 		return r;
2261c6b4fcbaSJoe Thornber 
2262c6b4fcbaSJoe Thornber 	r = parse_policy(ca, &as, error);
2263c6b4fcbaSJoe Thornber 	if (r)
2264c6b4fcbaSJoe Thornber 		return r;
2265c6b4fcbaSJoe Thornber 
2266c6b4fcbaSJoe Thornber 	return 0;
2267c6b4fcbaSJoe Thornber }
2268c6b4fcbaSJoe Thornber 
2269c6b4fcbaSJoe Thornber /*----------------------------------------------------------------*/
2270c6b4fcbaSJoe Thornber 
2271*f14fc623SMikulas Patocka static struct kmem_cache *migration_cache = NULL;
2272c6b4fcbaSJoe Thornber 
22732c73c471SAlasdair G Kergon #define NOT_CORE_OPTION 1
22742c73c471SAlasdair G Kergon 
process_config_option(struct cache * cache,const char * key,const char * value)22752f14f4b5SJoe Thornber static int process_config_option(struct cache *cache, const char *key, const char *value)
22762c73c471SAlasdair G Kergon {
22772c73c471SAlasdair G Kergon 	unsigned long tmp;
22782c73c471SAlasdair G Kergon 
22792f14f4b5SJoe Thornber 	if (!strcasecmp(key, "migration_threshold")) {
22802f14f4b5SJoe Thornber 		if (kstrtoul(value, 10, &tmp))
22812c73c471SAlasdair G Kergon 			return -EINVAL;
22822c73c471SAlasdair G Kergon 
22832c73c471SAlasdair G Kergon 		cache->migration_threshold = tmp;
22842c73c471SAlasdair G Kergon 		return 0;
22852c73c471SAlasdair G Kergon 	}
22862c73c471SAlasdair G Kergon 
22872c73c471SAlasdair G Kergon 	return NOT_CORE_OPTION;
22882c73c471SAlasdair G Kergon }
22892c73c471SAlasdair G Kergon 
set_config_value(struct cache * cache,const char * key,const char * value)22902f14f4b5SJoe Thornber static int set_config_value(struct cache *cache, const char *key, const char *value)
22912f14f4b5SJoe Thornber {
22922f14f4b5SJoe Thornber 	int r = process_config_option(cache, key, value);
22932f14f4b5SJoe Thornber 
22942f14f4b5SJoe Thornber 	if (r == NOT_CORE_OPTION)
22952f14f4b5SJoe Thornber 		r = policy_set_config_value(cache->policy, key, value);
22962f14f4b5SJoe Thornber 
22972f14f4b5SJoe Thornber 	if (r)
22982f14f4b5SJoe Thornber 		DMWARN("bad config value for %s: %s", key, value);
22992f14f4b5SJoe Thornber 
23002f14f4b5SJoe Thornber 	return r;
23012f14f4b5SJoe Thornber }
23022f14f4b5SJoe Thornber 
set_config_values(struct cache * cache,int argc,const char ** argv)23032f14f4b5SJoe Thornber static int set_config_values(struct cache *cache, int argc, const char **argv)
2304c6b4fcbaSJoe Thornber {
2305c6b4fcbaSJoe Thornber 	int r = 0;
2306c6b4fcbaSJoe Thornber 
2307c6b4fcbaSJoe Thornber 	if (argc & 1) {
2308c6b4fcbaSJoe Thornber 		DMWARN("Odd number of policy arguments given but they should be <key> <value> pairs.");
2309c6b4fcbaSJoe Thornber 		return -EINVAL;
2310c6b4fcbaSJoe Thornber 	}
2311c6b4fcbaSJoe Thornber 
2312c6b4fcbaSJoe Thornber 	while (argc) {
23132f14f4b5SJoe Thornber 		r = set_config_value(cache, argv[0], argv[1]);
23142f14f4b5SJoe Thornber 		if (r)
23152f14f4b5SJoe Thornber 			break;
2316c6b4fcbaSJoe Thornber 
2317c6b4fcbaSJoe Thornber 		argc -= 2;
2318c6b4fcbaSJoe Thornber 		argv += 2;
2319c6b4fcbaSJoe Thornber 	}
2320c6b4fcbaSJoe Thornber 
2321c6b4fcbaSJoe Thornber 	return r;
2322c6b4fcbaSJoe Thornber }
2323c6b4fcbaSJoe Thornber 
create_cache_policy(struct cache * cache,struct cache_args * ca,char ** error)2324c6b4fcbaSJoe Thornber static int create_cache_policy(struct cache *cache, struct cache_args *ca,
2325c6b4fcbaSJoe Thornber 			       char **error)
2326c6b4fcbaSJoe Thornber {
23274cb3e1dbSMikulas Patocka 	struct dm_cache_policy *p = dm_cache_policy_create(ca->policy_name,
2328c6b4fcbaSJoe Thornber 							   cache->cache_size,
2329c6b4fcbaSJoe Thornber 							   cache->origin_sectors,
2330c6b4fcbaSJoe Thornber 							   cache->sectors_per_block);
23314cb3e1dbSMikulas Patocka 	if (IS_ERR(p)) {
2332c6b4fcbaSJoe Thornber 		*error = "Error creating cache's policy";
23334cb3e1dbSMikulas Patocka 		return PTR_ERR(p);
2334c6b4fcbaSJoe Thornber 	}
23354cb3e1dbSMikulas Patocka 	cache->policy = p;
2336b29d4986SJoe Thornber 	BUG_ON(!cache->policy);
2337c6b4fcbaSJoe Thornber 
23382f14f4b5SJoe Thornber 	return 0;
2339c6b4fcbaSJoe Thornber }
2340c6b4fcbaSJoe Thornber 
234108b18451SJoe Thornber /*
23422bb812dfSJoe Thornber  * We want the discard block size to be at least the size of the cache
23432bb812dfSJoe Thornber  * block size and have no more than 2^14 discard blocks across the origin.
234408b18451SJoe Thornber  */
234508b18451SJoe Thornber #define MAX_DISCARD_BLOCKS (1 << 14)
234608b18451SJoe Thornber 
too_many_discard_blocks(sector_t discard_block_size,sector_t origin_size)234708b18451SJoe Thornber static bool too_many_discard_blocks(sector_t discard_block_size,
234808b18451SJoe Thornber 				    sector_t origin_size)
234908b18451SJoe Thornber {
235008b18451SJoe Thornber 	(void) sector_div(origin_size, discard_block_size);
235108b18451SJoe Thornber 
235208b18451SJoe Thornber 	return origin_size > MAX_DISCARD_BLOCKS;
235308b18451SJoe Thornber }
235408b18451SJoe Thornber 
calculate_discard_block_size(sector_t cache_block_size,sector_t origin_size)235508b18451SJoe Thornber static sector_t calculate_discard_block_size(sector_t cache_block_size,
235608b18451SJoe Thornber 					     sector_t origin_size)
235708b18451SJoe Thornber {
23582bb812dfSJoe Thornber 	sector_t discard_block_size = cache_block_size;
235908b18451SJoe Thornber 
236008b18451SJoe Thornber 	if (origin_size)
236108b18451SJoe Thornber 		while (too_many_discard_blocks(discard_block_size, origin_size))
236208b18451SJoe Thornber 			discard_block_size *= 2;
236308b18451SJoe Thornber 
236408b18451SJoe Thornber 	return discard_block_size;
236508b18451SJoe Thornber }
236608b18451SJoe Thornber 
set_cache_size(struct cache * cache,dm_cblock_t size)2367d1d9220cSJoe Thornber static void set_cache_size(struct cache *cache, dm_cblock_t size)
2368d1d9220cSJoe Thornber {
2369d1d9220cSJoe Thornber 	dm_block_t nr_blocks = from_cblock(size);
2370d1d9220cSJoe Thornber 
2371d1d9220cSJoe Thornber 	if (nr_blocks > (1 << 20) && cache->cache_size != size)
2372d1d9220cSJoe Thornber 		DMWARN_LIMIT("You have created a cache device with a lot of individual cache blocks (%llu)\n"
2373d1d9220cSJoe Thornber 			     "All these mappings can consume a lot of kernel memory, and take some time to read/write.\n"
2374d1d9220cSJoe Thornber 			     "Please consider increasing the cache block size to reduce the overall cache block count.",
2375d1d9220cSJoe Thornber 			     (unsigned long long) nr_blocks);
2376d1d9220cSJoe Thornber 
2377d1d9220cSJoe Thornber 	cache->cache_size = size;
2378d1d9220cSJoe Thornber }
2379d1d9220cSJoe Thornber 
2380f8350dafSJoe Thornber #define DEFAULT_MIGRATION_THRESHOLD 2048
2381c6b4fcbaSJoe Thornber 
cache_create(struct cache_args * ca,struct cache ** result)2382c6b4fcbaSJoe Thornber static int cache_create(struct cache_args *ca, struct cache **result)
2383c6b4fcbaSJoe Thornber {
2384c6b4fcbaSJoe Thornber 	int r = 0;
2385c6b4fcbaSJoe Thornber 	char **error = &ca->ti->error;
2386c6b4fcbaSJoe Thornber 	struct cache *cache;
2387c6b4fcbaSJoe Thornber 	struct dm_target *ti = ca->ti;
2388c6b4fcbaSJoe Thornber 	dm_block_t origin_blocks;
2389c6b4fcbaSJoe Thornber 	struct dm_cache_metadata *cmd;
2390c6b4fcbaSJoe Thornber 	bool may_format = ca->features.mode == CM_WRITE;
2391c6b4fcbaSJoe Thornber 
2392c6b4fcbaSJoe Thornber 	cache = kzalloc(sizeof(*cache), GFP_KERNEL);
2393c6b4fcbaSJoe Thornber 	if (!cache)
2394c6b4fcbaSJoe Thornber 		return -ENOMEM;
2395c6b4fcbaSJoe Thornber 
2396c6b4fcbaSJoe Thornber 	cache->ti = ca->ti;
2397c6b4fcbaSJoe Thornber 	ti->private = cache;
239869596f55SMike Snitzer 	ti->accounts_remapped_io = true;
2399c6b4fcbaSJoe Thornber 	ti->num_flush_bios = 2;
2400c6b4fcbaSJoe Thornber 	ti->flush_supported = true;
2401c6b4fcbaSJoe Thornber 
2402c6b4fcbaSJoe Thornber 	ti->num_discard_bios = 1;
2403c6b4fcbaSJoe Thornber 	ti->discards_supported = true;
2404c6b4fcbaSJoe Thornber 
2405693b960eSMike Snitzer 	ti->per_io_data_size = sizeof(struct per_bio_data);
2406c6b4fcbaSJoe Thornber 
2407693b960eSMike Snitzer 	cache->features = ca->features;
24082df3bae9SMike Snitzer 	if (writethrough_mode(cache)) {
24092df3bae9SMike Snitzer 		/* Create bioset for writethrough bios issued to origin */
24106f1c819cSKent Overstreet 		r = bioset_init(&cache->bs, BIO_POOL_SIZE, 0, 0);
24116f1c819cSKent Overstreet 		if (r)
24122df3bae9SMike Snitzer 			goto bad;
24132df3bae9SMike Snitzer 	}
24142df3bae9SMike Snitzer 
2415c6b4fcbaSJoe Thornber 	cache->metadata_dev = ca->metadata_dev;
2416c6b4fcbaSJoe Thornber 	cache->origin_dev = ca->origin_dev;
2417c6b4fcbaSJoe Thornber 	cache->cache_dev = ca->cache_dev;
2418c6b4fcbaSJoe Thornber 
2419c6b4fcbaSJoe Thornber 	ca->metadata_dev = ca->origin_dev = ca->cache_dev = NULL;
2420c6b4fcbaSJoe Thornber 
242111d5a3f8SMing-Hung Tsai 	origin_blocks = cache->origin_sectors = ti->len;
2422414dd67dSJoe Thornber 	origin_blocks = block_div(origin_blocks, ca->block_size);
2423c6b4fcbaSJoe Thornber 	cache->origin_blocks = to_oblock(origin_blocks);
2424c6b4fcbaSJoe Thornber 
2425c6b4fcbaSJoe Thornber 	cache->sectors_per_block = ca->block_size;
2426c6b4fcbaSJoe Thornber 	if (dm_set_target_max_io_len(ti, cache->sectors_per_block)) {
2427c6b4fcbaSJoe Thornber 		r = -EINVAL;
2428c6b4fcbaSJoe Thornber 		goto bad;
2429c6b4fcbaSJoe Thornber 	}
2430c6b4fcbaSJoe Thornber 
2431c6b4fcbaSJoe Thornber 	if (ca->block_size & (ca->block_size - 1)) {
2432c6b4fcbaSJoe Thornber 		dm_block_t cache_size = ca->cache_sectors;
2433c6b4fcbaSJoe Thornber 
2434c6b4fcbaSJoe Thornber 		cache->sectors_per_block_shift = -1;
2435414dd67dSJoe Thornber 		cache_size = block_div(cache_size, ca->block_size);
2436d1d9220cSJoe Thornber 		set_cache_size(cache, to_cblock(cache_size));
2437c6b4fcbaSJoe Thornber 	} else {
2438c6b4fcbaSJoe Thornber 		cache->sectors_per_block_shift = __ffs(ca->block_size);
2439d1d9220cSJoe Thornber 		set_cache_size(cache, to_cblock(ca->cache_sectors >> cache->sectors_per_block_shift));
2440c6b4fcbaSJoe Thornber 	}
2441c6b4fcbaSJoe Thornber 
2442c6b4fcbaSJoe Thornber 	r = create_cache_policy(cache, ca, error);
2443c6b4fcbaSJoe Thornber 	if (r)
2444c6b4fcbaSJoe Thornber 		goto bad;
24452f14f4b5SJoe Thornber 
2446c6b4fcbaSJoe Thornber 	cache->policy_nr_args = ca->policy_argc;
24472f14f4b5SJoe Thornber 	cache->migration_threshold = DEFAULT_MIGRATION_THRESHOLD;
24482f14f4b5SJoe Thornber 
24492f14f4b5SJoe Thornber 	r = set_config_values(cache, ca->policy_argc, ca->policy_argv);
24502f14f4b5SJoe Thornber 	if (r) {
24512f14f4b5SJoe Thornber 		*error = "Error setting cache policy's config values";
24522f14f4b5SJoe Thornber 		goto bad;
24532f14f4b5SJoe Thornber 	}
2454c6b4fcbaSJoe Thornber 
2455c6b4fcbaSJoe Thornber 	cmd = dm_cache_metadata_open(cache->metadata_dev->bdev,
2456c6b4fcbaSJoe Thornber 				     ca->block_size, may_format,
2457629d0a8aSJoe Thornber 				     dm_cache_policy_get_hint_size(cache->policy),
2458629d0a8aSJoe Thornber 				     ca->features.metadata_version);
2459c6b4fcbaSJoe Thornber 	if (IS_ERR(cmd)) {
2460c6b4fcbaSJoe Thornber 		*error = "Error creating metadata object";
2461c6b4fcbaSJoe Thornber 		r = PTR_ERR(cmd);
2462c6b4fcbaSJoe Thornber 		goto bad;
2463c6b4fcbaSJoe Thornber 	}
2464c6b4fcbaSJoe Thornber 	cache->cmd = cmd;
2465028ae9f7SJoe Thornber 	set_cache_mode(cache, CM_WRITE);
2466028ae9f7SJoe Thornber 	if (get_cache_mode(cache) != CM_WRITE) {
2467028ae9f7SJoe Thornber 		*error = "Unable to get write access to metadata, please check/repair metadata.";
2468028ae9f7SJoe Thornber 		r = -EINVAL;
2469028ae9f7SJoe Thornber 		goto bad;
2470028ae9f7SJoe Thornber 	}
2471c6b4fcbaSJoe Thornber 
24728e3c3827SMike Snitzer 	if (passthrough_mode(cache)) {
24732ee57d58SJoe Thornber 		bool all_clean;
24742ee57d58SJoe Thornber 
24752ee57d58SJoe Thornber 		r = dm_cache_metadata_all_clean(cache->cmd, &all_clean);
24762ee57d58SJoe Thornber 		if (r) {
24772ee57d58SJoe Thornber 			*error = "dm_cache_metadata_all_clean() failed";
24782ee57d58SJoe Thornber 			goto bad;
24792ee57d58SJoe Thornber 		}
24802ee57d58SJoe Thornber 
24812ee57d58SJoe Thornber 		if (!all_clean) {
24822ee57d58SJoe Thornber 			*error = "Cannot enter passthrough mode unless all blocks are clean";
24832ee57d58SJoe Thornber 			r = -EINVAL;
24842ee57d58SJoe Thornber 			goto bad;
24852ee57d58SJoe Thornber 		}
2486b29d4986SJoe Thornber 
2487b29d4986SJoe Thornber 		policy_allow_migrations(cache->policy, false);
24882ee57d58SJoe Thornber 	}
24892ee57d58SJoe Thornber 
2490c6b4fcbaSJoe Thornber 	spin_lock_init(&cache->lock);
2491c6b4fcbaSJoe Thornber 	bio_list_init(&cache->deferred_bios);
2492a59db676SJoe Thornber 	atomic_set(&cache->nr_allocated_migrations, 0);
2493a59db676SJoe Thornber 	atomic_set(&cache->nr_io_migrations, 0);
2494c6b4fcbaSJoe Thornber 	init_waitqueue_head(&cache->migration_wait);
2495c6b4fcbaSJoe Thornber 
2496fa4d683aSWei Yongjun 	r = -ENOMEM;
249744fa816bSAnssi Hannula 	atomic_set(&cache->nr_dirty, 0);
2498c6b4fcbaSJoe Thornber 	cache->dirty_bitset = alloc_bitset(from_cblock(cache->cache_size));
2499c6b4fcbaSJoe Thornber 	if (!cache->dirty_bitset) {
2500c6b4fcbaSJoe Thornber 		*error = "could not allocate dirty bitset";
2501c6b4fcbaSJoe Thornber 		goto bad;
2502c6b4fcbaSJoe Thornber 	}
2503c6b4fcbaSJoe Thornber 	clear_bitset(cache->dirty_bitset, from_cblock(cache->cache_size));
2504c6b4fcbaSJoe Thornber 
250508b18451SJoe Thornber 	cache->discard_block_size =
250608b18451SJoe Thornber 		calculate_discard_block_size(cache->sectors_per_block,
250708b18451SJoe Thornber 					     cache->origin_sectors);
25082572629aSJoe Thornber 	cache->discard_nr_blocks = to_dblock(dm_sector_div_up(cache->origin_sectors,
25092572629aSJoe Thornber 							      cache->discard_block_size));
25101bad9bc4SJoe Thornber 	cache->discard_bitset = alloc_bitset(from_dblock(cache->discard_nr_blocks));
2511c6b4fcbaSJoe Thornber 	if (!cache->discard_bitset) {
2512c6b4fcbaSJoe Thornber 		*error = "could not allocate discard bitset";
2513c6b4fcbaSJoe Thornber 		goto bad;
2514c6b4fcbaSJoe Thornber 	}
25151bad9bc4SJoe Thornber 	clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks));
2516c6b4fcbaSJoe Thornber 
2517c6b4fcbaSJoe Thornber 	cache->copier = dm_kcopyd_client_create(&dm_kcopyd_throttle);
2518c6b4fcbaSJoe Thornber 	if (IS_ERR(cache->copier)) {
2519c6b4fcbaSJoe Thornber 		*error = "could not create kcopyd client";
2520c6b4fcbaSJoe Thornber 		r = PTR_ERR(cache->copier);
2521c6b4fcbaSJoe Thornber 		goto bad;
2522c6b4fcbaSJoe Thornber 	}
2523c6b4fcbaSJoe Thornber 
2524b29d4986SJoe Thornber 	cache->wq = alloc_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM, 0);
2525c6b4fcbaSJoe Thornber 	if (!cache->wq) {
2526c6b4fcbaSJoe Thornber 		*error = "could not create workqueue for metadata object";
2527c6b4fcbaSJoe Thornber 		goto bad;
2528c6b4fcbaSJoe Thornber 	}
2529b29d4986SJoe Thornber 	INIT_WORK(&cache->deferred_bio_worker, process_deferred_bios);
2530b29d4986SJoe Thornber 	INIT_WORK(&cache->migration_worker, check_migrations);
2531c6b4fcbaSJoe Thornber 	INIT_DELAYED_WORK(&cache->waker, do_waker);
2532c6b4fcbaSJoe Thornber 
2533b29d4986SJoe Thornber 	cache->prison = dm_bio_prison_create_v2(cache->wq);
2534c6b4fcbaSJoe Thornber 	if (!cache->prison) {
2535c6b4fcbaSJoe Thornber 		*error = "could not create bio prison";
2536c6b4fcbaSJoe Thornber 		goto bad;
2537c6b4fcbaSJoe Thornber 	}
2538c6b4fcbaSJoe Thornber 
25396f1c819cSKent Overstreet 	r = mempool_init_slab_pool(&cache->migration_pool, MIGRATION_POOL_SIZE,
2540c6b4fcbaSJoe Thornber 				   migration_cache);
25416f1c819cSKent Overstreet 	if (r) {
2542c6b4fcbaSJoe Thornber 		*error = "Error creating cache's migration mempool";
2543c6b4fcbaSJoe Thornber 		goto bad;
2544c6b4fcbaSJoe Thornber 	}
2545c6b4fcbaSJoe Thornber 
2546c6b4fcbaSJoe Thornber 	cache->need_tick_bio = true;
2547c6b4fcbaSJoe Thornber 	cache->sized = false;
254865790ff9SJoe Thornber 	cache->invalidate = false;
2549c6b4fcbaSJoe Thornber 	cache->commit_requested = false;
2550c6b4fcbaSJoe Thornber 	cache->loaded_mappings = false;
2551c6b4fcbaSJoe Thornber 	cache->loaded_discards = false;
2552c6b4fcbaSJoe Thornber 
2553c6b4fcbaSJoe Thornber 	load_stats(cache);
2554c6b4fcbaSJoe Thornber 
2555c6b4fcbaSJoe Thornber 	atomic_set(&cache->stats.demotion, 0);
2556c6b4fcbaSJoe Thornber 	atomic_set(&cache->stats.promotion, 0);
2557c6b4fcbaSJoe Thornber 	atomic_set(&cache->stats.copies_avoided, 0);
2558c6b4fcbaSJoe Thornber 	atomic_set(&cache->stats.cache_cell_clash, 0);
2559c6b4fcbaSJoe Thornber 	atomic_set(&cache->stats.commit_count, 0);
2560c6b4fcbaSJoe Thornber 	atomic_set(&cache->stats.discard_count, 0);
2561c6b4fcbaSJoe Thornber 
256265790ff9SJoe Thornber 	spin_lock_init(&cache->invalidation_lock);
256365790ff9SJoe Thornber 	INIT_LIST_HEAD(&cache->invalidation_requests);
256465790ff9SJoe Thornber 
2565b29d4986SJoe Thornber 	batcher_init(&cache->committer, commit_op, cache,
2566b29d4986SJoe Thornber 		     issue_op, cache, cache->wq);
2567dc4fa29fSMike Snitzer 	dm_iot_init(&cache->tracker);
2568066dbaa3SJoe Thornber 
2569b29d4986SJoe Thornber 	init_rwsem(&cache->background_work_lock);
2570b29d4986SJoe Thornber 	prevent_background_work(cache);
2571b29d4986SJoe Thornber 
2572c6b4fcbaSJoe Thornber 	*result = cache;
2573c6b4fcbaSJoe Thornber 	return 0;
2574c6b4fcbaSJoe Thornber bad:
25758cc12dabSMing-Hung Tsai 	__destroy(cache);
2576c6b4fcbaSJoe Thornber 	return r;
2577c6b4fcbaSJoe Thornber }
2578c6b4fcbaSJoe Thornber 
copy_ctr_args(struct cache * cache,int argc,const char ** argv)2579c6b4fcbaSJoe Thornber static int copy_ctr_args(struct cache *cache, int argc, const char **argv)
2580c6b4fcbaSJoe Thornber {
258186a3238cSHeinz Mauelshagen 	unsigned int i;
2582c6b4fcbaSJoe Thornber 	const char **copy;
2583c6b4fcbaSJoe Thornber 
2584c6b4fcbaSJoe Thornber 	copy = kcalloc(argc, sizeof(*copy), GFP_KERNEL);
2585c6b4fcbaSJoe Thornber 	if (!copy)
2586c6b4fcbaSJoe Thornber 		return -ENOMEM;
2587c6b4fcbaSJoe Thornber 	for (i = 0; i < argc; i++) {
2588c6b4fcbaSJoe Thornber 		copy[i] = kstrdup(argv[i], GFP_KERNEL);
2589c6b4fcbaSJoe Thornber 		if (!copy[i]) {
2590c6b4fcbaSJoe Thornber 			while (i--)
2591c6b4fcbaSJoe Thornber 				kfree(copy[i]);
2592c6b4fcbaSJoe Thornber 			kfree(copy);
2593c6b4fcbaSJoe Thornber 			return -ENOMEM;
2594c6b4fcbaSJoe Thornber 		}
2595c6b4fcbaSJoe Thornber 	}
2596c6b4fcbaSJoe Thornber 
2597c6b4fcbaSJoe Thornber 	cache->nr_ctr_args = argc;
2598c6b4fcbaSJoe Thornber 	cache->ctr_args = copy;
2599c6b4fcbaSJoe Thornber 
2600c6b4fcbaSJoe Thornber 	return 0;
2601c6b4fcbaSJoe Thornber }
2602c6b4fcbaSJoe Thornber 
cache_ctr(struct dm_target * ti,unsigned int argc,char ** argv)260386a3238cSHeinz Mauelshagen static int cache_ctr(struct dm_target *ti, unsigned int argc, char **argv)
2604c6b4fcbaSJoe Thornber {
2605c6b4fcbaSJoe Thornber 	int r = -EINVAL;
2606c6b4fcbaSJoe Thornber 	struct cache_args *ca;
2607c6b4fcbaSJoe Thornber 	struct cache *cache = NULL;
2608c6b4fcbaSJoe Thornber 
2609c6b4fcbaSJoe Thornber 	ca = kzalloc(sizeof(*ca), GFP_KERNEL);
2610c6b4fcbaSJoe Thornber 	if (!ca) {
2611c6b4fcbaSJoe Thornber 		ti->error = "Error allocating memory for cache";
2612c6b4fcbaSJoe Thornber 		return -ENOMEM;
2613c6b4fcbaSJoe Thornber 	}
2614c6b4fcbaSJoe Thornber 	ca->ti = ti;
2615c6b4fcbaSJoe Thornber 
2616c6b4fcbaSJoe Thornber 	r = parse_cache_args(ca, argc, argv, &ti->error);
2617c6b4fcbaSJoe Thornber 	if (r)
2618c6b4fcbaSJoe Thornber 		goto out;
2619c6b4fcbaSJoe Thornber 
2620c6b4fcbaSJoe Thornber 	r = cache_create(ca, &cache);
2621617a0b89SHeinz Mauelshagen 	if (r)
2622617a0b89SHeinz Mauelshagen 		goto out;
2623c6b4fcbaSJoe Thornber 
2624c6b4fcbaSJoe Thornber 	r = copy_ctr_args(cache, argc - 3, (const char **)argv + 3);
2625c6b4fcbaSJoe Thornber 	if (r) {
26268cc12dabSMing-Hung Tsai 		__destroy(cache);
2627c6b4fcbaSJoe Thornber 		goto out;
2628c6b4fcbaSJoe Thornber 	}
2629c6b4fcbaSJoe Thornber 
2630c6b4fcbaSJoe Thornber 	ti->private = cache;
2631c6b4fcbaSJoe Thornber out:
2632c6b4fcbaSJoe Thornber 	destroy_cache_args(ca);
2633c6b4fcbaSJoe Thornber 	return r;
2634c6b4fcbaSJoe Thornber }
2635c6b4fcbaSJoe Thornber 
2636651f5fa2SJoe Thornber /*----------------------------------------------------------------*/
2637651f5fa2SJoe Thornber 
cache_map(struct dm_target * ti,struct bio * bio)2638651f5fa2SJoe Thornber static int cache_map(struct dm_target *ti, struct bio *bio)
2639c6b4fcbaSJoe Thornber {
2640651f5fa2SJoe Thornber 	struct cache *cache = ti->private;
2641651f5fa2SJoe Thornber 
2642c6b4fcbaSJoe Thornber 	int r;
2643b29d4986SJoe Thornber 	bool commit_needed;
2644c6b4fcbaSJoe Thornber 	dm_oblock_t block = get_bio_block(cache, bio);
2645fb4100aeSJoe Thornber 
2646693b960eSMike Snitzer 	init_per_bio_data(bio);
2647e893fba9SHeinz Mauelshagen 	if (unlikely(from_oblock(block) >= from_oblock(cache->origin_blocks))) {
2648c6b4fcbaSJoe Thornber 		/*
2649c6b4fcbaSJoe Thornber 		 * This can only occur if the io goes to a partial block at
2650c6b4fcbaSJoe Thornber 		 * the end of the origin device.  We don't cache these.
2651c6b4fcbaSJoe Thornber 		 * Just remap to the origin and carry on.
2652c6b4fcbaSJoe Thornber 		 */
2653e893fba9SHeinz Mauelshagen 		remap_to_origin(cache, bio);
2654651f5fa2SJoe Thornber 		accounted_begin(cache, bio);
2655c6b4fcbaSJoe Thornber 		return DM_MAPIO_REMAPPED;
2656c6b4fcbaSJoe Thornber 	}
2657c6b4fcbaSJoe Thornber 
2658651f5fa2SJoe Thornber 	if (discard_or_flush(bio)) {
2659c6b4fcbaSJoe Thornber 		defer_bio(cache, bio);
2660c6b4fcbaSJoe Thornber 		return DM_MAPIO_SUBMITTED;
2661c6b4fcbaSJoe Thornber 	}
2662c6b4fcbaSJoe Thornber 
2663b29d4986SJoe Thornber 	r = map_bio(cache, bio, block, &commit_needed);
2664b29d4986SJoe Thornber 	if (commit_needed)
2665b29d4986SJoe Thornber 		schedule_commit(&cache->committer);
2666c6b4fcbaSJoe Thornber 
26672ee57d58SJoe Thornber 	return r;
2668c6b4fcbaSJoe Thornber }
2669c6b4fcbaSJoe Thornber 
cache_end_io(struct dm_target * ti,struct bio * bio,blk_status_t * error)2670693b960eSMike Snitzer static int cache_end_io(struct dm_target *ti, struct bio *bio, blk_status_t *error)
2671c6b4fcbaSJoe Thornber {
2672c6b4fcbaSJoe Thornber 	struct cache *cache = ti->private;
2673c6b4fcbaSJoe Thornber 	unsigned long flags;
2674693b960eSMike Snitzer 	struct per_bio_data *pb = get_per_bio_data(bio);
2675c6b4fcbaSJoe Thornber 
2676c6b4fcbaSJoe Thornber 	if (pb->tick) {
2677fba10109SJoe Thornber 		policy_tick(cache->policy, false);
2678c6b4fcbaSJoe Thornber 
2679c6b4fcbaSJoe Thornber 		spin_lock_irqsave(&cache->lock, flags);
2680c6b4fcbaSJoe Thornber 		cache->need_tick_bio = true;
2681c6b4fcbaSJoe Thornber 		spin_unlock_irqrestore(&cache->lock, flags);
2682c6b4fcbaSJoe Thornber 	}
2683c6b4fcbaSJoe Thornber 
2684b29d4986SJoe Thornber 	bio_drop_shared_lock(cache, bio);
2685066dbaa3SJoe Thornber 	accounted_complete(cache, bio);
2686c6b4fcbaSJoe Thornber 
26871be56909SChristoph Hellwig 	return DM_ENDIO_DONE;
2688c6b4fcbaSJoe Thornber }
2689c6b4fcbaSJoe Thornber 
write_dirty_bitset(struct cache * cache)2690c6b4fcbaSJoe Thornber static int write_dirty_bitset(struct cache *cache)
2691c6b4fcbaSJoe Thornber {
2692629d0a8aSJoe Thornber 	int r;
2693c6b4fcbaSJoe Thornber 
2694028ae9f7SJoe Thornber 	if (get_cache_mode(cache) >= CM_READ_ONLY)
2695028ae9f7SJoe Thornber 		return -EINVAL;
2696028ae9f7SJoe Thornber 
2697629d0a8aSJoe Thornber 	r = dm_cache_set_dirty_bits(cache->cmd, from_cblock(cache->cache_size), cache->dirty_bitset);
2698629d0a8aSJoe Thornber 	if (r)
2699629d0a8aSJoe Thornber 		metadata_operation_failed(cache, "dm_cache_set_dirty_bits", r);
2700c6b4fcbaSJoe Thornber 
2701629d0a8aSJoe Thornber 	return r;
2702c6b4fcbaSJoe Thornber }
2703c6b4fcbaSJoe Thornber 
write_discard_bitset(struct cache * cache)2704c6b4fcbaSJoe Thornber static int write_discard_bitset(struct cache *cache)
2705c6b4fcbaSJoe Thornber {
270686a3238cSHeinz Mauelshagen 	unsigned int i, r;
2707c6b4fcbaSJoe Thornber 
2708028ae9f7SJoe Thornber 	if (get_cache_mode(cache) >= CM_READ_ONLY)
2709028ae9f7SJoe Thornber 		return -EINVAL;
2710028ae9f7SJoe Thornber 
27111bad9bc4SJoe Thornber 	r = dm_cache_discard_bitset_resize(cache->cmd, cache->discard_block_size,
27121bad9bc4SJoe Thornber 					   cache->discard_nr_blocks);
2713c6b4fcbaSJoe Thornber 	if (r) {
2714b61d9509SMike Snitzer 		DMERR("%s: could not resize on-disk discard bitset", cache_device_name(cache));
2715028ae9f7SJoe Thornber 		metadata_operation_failed(cache, "dm_cache_discard_bitset_resize", r);
2716c6b4fcbaSJoe Thornber 		return r;
2717c6b4fcbaSJoe Thornber 	}
2718c6b4fcbaSJoe Thornber 
27191bad9bc4SJoe Thornber 	for (i = 0; i < from_dblock(cache->discard_nr_blocks); i++) {
27201bad9bc4SJoe Thornber 		r = dm_cache_set_discard(cache->cmd, to_dblock(i),
27211bad9bc4SJoe Thornber 					 is_discarded(cache, to_dblock(i)));
2722028ae9f7SJoe Thornber 		if (r) {
2723028ae9f7SJoe Thornber 			metadata_operation_failed(cache, "dm_cache_set_discard", r);
2724028ae9f7SJoe Thornber 			return r;
2725028ae9f7SJoe Thornber 		}
2726028ae9f7SJoe Thornber 	}
2727028ae9f7SJoe Thornber 
2728028ae9f7SJoe Thornber 	return 0;
2729028ae9f7SJoe Thornber }
2730028ae9f7SJoe Thornber 
write_hints(struct cache * cache)2731028ae9f7SJoe Thornber static int write_hints(struct cache *cache)
2732028ae9f7SJoe Thornber {
2733028ae9f7SJoe Thornber 	int r;
2734028ae9f7SJoe Thornber 
2735028ae9f7SJoe Thornber 	if (get_cache_mode(cache) >= CM_READ_ONLY)
2736028ae9f7SJoe Thornber 		return -EINVAL;
2737028ae9f7SJoe Thornber 
2738028ae9f7SJoe Thornber 	r = dm_cache_write_hints(cache->cmd, cache->policy);
2739028ae9f7SJoe Thornber 	if (r) {
2740028ae9f7SJoe Thornber 		metadata_operation_failed(cache, "dm_cache_write_hints", r);
2741c6b4fcbaSJoe Thornber 		return r;
2742c6b4fcbaSJoe Thornber 	}
2743c6b4fcbaSJoe Thornber 
2744c6b4fcbaSJoe Thornber 	return 0;
2745c6b4fcbaSJoe Thornber }
2746c6b4fcbaSJoe Thornber 
2747c6b4fcbaSJoe Thornber /*
2748c6b4fcbaSJoe Thornber  * returns true on success
2749c6b4fcbaSJoe Thornber  */
sync_metadata(struct cache * cache)2750c6b4fcbaSJoe Thornber static bool sync_metadata(struct cache *cache)
2751c6b4fcbaSJoe Thornber {
2752c6b4fcbaSJoe Thornber 	int r1, r2, r3, r4;
2753c6b4fcbaSJoe Thornber 
2754c6b4fcbaSJoe Thornber 	r1 = write_dirty_bitset(cache);
2755c6b4fcbaSJoe Thornber 	if (r1)
2756b61d9509SMike Snitzer 		DMERR("%s: could not write dirty bitset", cache_device_name(cache));
2757c6b4fcbaSJoe Thornber 
2758c6b4fcbaSJoe Thornber 	r2 = write_discard_bitset(cache);
2759c6b4fcbaSJoe Thornber 	if (r2)
2760b61d9509SMike Snitzer 		DMERR("%s: could not write discard bitset", cache_device_name(cache));
2761c6b4fcbaSJoe Thornber 
2762c6b4fcbaSJoe Thornber 	save_stats(cache);
2763c6b4fcbaSJoe Thornber 
2764028ae9f7SJoe Thornber 	r3 = write_hints(cache);
2765c6b4fcbaSJoe Thornber 	if (r3)
2766b61d9509SMike Snitzer 		DMERR("%s: could not write hints", cache_device_name(cache));
2767c6b4fcbaSJoe Thornber 
2768c6b4fcbaSJoe Thornber 	/*
2769c6b4fcbaSJoe Thornber 	 * If writing the above metadata failed, we still commit, but don't
2770c6b4fcbaSJoe Thornber 	 * set the clean shutdown flag.  This will effectively force every
2771c6b4fcbaSJoe Thornber 	 * dirty bit to be set on reload.
2772c6b4fcbaSJoe Thornber 	 */
2773028ae9f7SJoe Thornber 	r4 = commit(cache, !r1 && !r2 && !r3);
2774c6b4fcbaSJoe Thornber 	if (r4)
2775b61d9509SMike Snitzer 		DMERR("%s: could not write cache metadata", cache_device_name(cache));
2776c6b4fcbaSJoe Thornber 
2777c6b4fcbaSJoe Thornber 	return !r1 && !r2 && !r3 && !r4;
2778c6b4fcbaSJoe Thornber }
2779c6b4fcbaSJoe Thornber 
cache_postsuspend(struct dm_target * ti)2780c6b4fcbaSJoe Thornber static void cache_postsuspend(struct dm_target *ti)
2781c6b4fcbaSJoe Thornber {
2782c6b4fcbaSJoe Thornber 	struct cache *cache = ti->private;
2783c6b4fcbaSJoe Thornber 
2784b29d4986SJoe Thornber 	prevent_background_work(cache);
2785b29d4986SJoe Thornber 	BUG_ON(atomic_read(&cache->nr_io_migrations));
2786b29d4986SJoe Thornber 
27877cdf6a0aSMikulas Patocka 	cancel_delayed_work_sync(&cache->waker);
27887cdf6a0aSMikulas Patocka 	drain_workqueue(cache->wq);
2789701e03e4SJoe Thornber 	WARN_ON(cache->tracker.in_flight);
2790b29d4986SJoe Thornber 
2791b29d4986SJoe Thornber 	/*
2792b29d4986SJoe Thornber 	 * If it's a flush suspend there won't be any deferred bios, so this
2793b29d4986SJoe Thornber 	 * call is harmless.
2794b29d4986SJoe Thornber 	 */
2795651f5fa2SJoe Thornber 	requeue_deferred_bios(cache);
2796c6b4fcbaSJoe Thornber 
2797028ae9f7SJoe Thornber 	if (get_cache_mode(cache) == CM_WRITE)
2798c6b4fcbaSJoe Thornber 		(void) sync_metadata(cache);
2799c6b4fcbaSJoe Thornber }
2800c6b4fcbaSJoe Thornber 
load_mapping(void * context,dm_oblock_t oblock,dm_cblock_t cblock,bool dirty,uint32_t hint,bool hint_valid)2801c6b4fcbaSJoe Thornber static int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock,
2802c6b4fcbaSJoe Thornber 			bool dirty, uint32_t hint, bool hint_valid)
2803c6b4fcbaSJoe Thornber {
2804c6b4fcbaSJoe Thornber 	struct cache *cache = context;
2805c6b4fcbaSJoe Thornber 
2806449b668cSJoe Thornber 	if (dirty) {
2807449b668cSJoe Thornber 		set_bit(from_cblock(cblock), cache->dirty_bitset);
2808449b668cSJoe Thornber 		atomic_inc(&cache->nr_dirty);
2809449b668cSJoe Thornber 	} else
2810449b668cSJoe Thornber 		clear_bit(from_cblock(cblock), cache->dirty_bitset);
2811449b668cSJoe Thornber 
2812b7770923SZheng Yongjun 	return policy_load_mapping(cache->policy, oblock, cblock, dirty, hint, hint_valid);
2813c6b4fcbaSJoe Thornber }
2814c6b4fcbaSJoe Thornber 
28153e2e1c30SJoe Thornber /*
28163e2e1c30SJoe Thornber  * The discard block size in the on disk metadata is not
28175c29e784SSteven Lung  * necessarily the same as we're currently using.  So we have to
28183e2e1c30SJoe Thornber  * be careful to only set the discarded attribute if we know it
28193e2e1c30SJoe Thornber  * covers a complete block of the new size.
28203e2e1c30SJoe Thornber  */
28213e2e1c30SJoe Thornber struct discard_load_info {
28223e2e1c30SJoe Thornber 	struct cache *cache;
28233e2e1c30SJoe Thornber 
28243e2e1c30SJoe Thornber 	/*
28253e2e1c30SJoe Thornber 	 * These blocks are sized using the on disk dblock size, rather
28263e2e1c30SJoe Thornber 	 * than the current one.
28273e2e1c30SJoe Thornber 	 */
28283e2e1c30SJoe Thornber 	dm_block_t block_size;
28293e2e1c30SJoe Thornber 	dm_block_t discard_begin, discard_end;
28303e2e1c30SJoe Thornber };
28313e2e1c30SJoe Thornber 
discard_load_info_init(struct cache * cache,struct discard_load_info * li)28323e2e1c30SJoe Thornber static void discard_load_info_init(struct cache *cache,
28333e2e1c30SJoe Thornber 				   struct discard_load_info *li)
28343e2e1c30SJoe Thornber {
28353e2e1c30SJoe Thornber 	li->cache = cache;
28363e2e1c30SJoe Thornber 	li->discard_begin = li->discard_end = 0;
28373e2e1c30SJoe Thornber }
28383e2e1c30SJoe Thornber 
set_discard_range(struct discard_load_info * li)28393e2e1c30SJoe Thornber static void set_discard_range(struct discard_load_info *li)
28403e2e1c30SJoe Thornber {
28413e2e1c30SJoe Thornber 	sector_t b, e;
28423e2e1c30SJoe Thornber 
28433e2e1c30SJoe Thornber 	if (li->discard_begin == li->discard_end)
28443e2e1c30SJoe Thornber 		return;
28453e2e1c30SJoe Thornber 
28463e2e1c30SJoe Thornber 	/*
28473e2e1c30SJoe Thornber 	 * Convert to sectors.
28483e2e1c30SJoe Thornber 	 */
28493e2e1c30SJoe Thornber 	b = li->discard_begin * li->block_size;
28503e2e1c30SJoe Thornber 	e = li->discard_end * li->block_size;
28513e2e1c30SJoe Thornber 
28523e2e1c30SJoe Thornber 	/*
28533e2e1c30SJoe Thornber 	 * Then convert back to the current dblock size.
28543e2e1c30SJoe Thornber 	 */
28553e2e1c30SJoe Thornber 	b = dm_sector_div_up(b, li->cache->discard_block_size);
28563e2e1c30SJoe Thornber 	sector_div(e, li->cache->discard_block_size);
28573e2e1c30SJoe Thornber 
28583e2e1c30SJoe Thornber 	/*
28593e2e1c30SJoe Thornber 	 * The origin may have shrunk, so we need to check we're still in
28603e2e1c30SJoe Thornber 	 * bounds.
28613e2e1c30SJoe Thornber 	 */
28623e2e1c30SJoe Thornber 	if (e > from_dblock(li->cache->discard_nr_blocks))
28633e2e1c30SJoe Thornber 		e = from_dblock(li->cache->discard_nr_blocks);
28643e2e1c30SJoe Thornber 
28653e2e1c30SJoe Thornber 	for (; b < e; b++)
28663e2e1c30SJoe Thornber 		set_discard(li->cache, to_dblock(b));
28673e2e1c30SJoe Thornber }
28683e2e1c30SJoe Thornber 
load_discard(void * context,sector_t discard_block_size,dm_dblock_t dblock,bool discard)2869c6b4fcbaSJoe Thornber static int load_discard(void *context, sector_t discard_block_size,
28701bad9bc4SJoe Thornber 			dm_dblock_t dblock, bool discard)
2871c6b4fcbaSJoe Thornber {
28723e2e1c30SJoe Thornber 	struct discard_load_info *li = context;
2873c6b4fcbaSJoe Thornber 
28743e2e1c30SJoe Thornber 	li->block_size = discard_block_size;
28751bad9bc4SJoe Thornber 
28763e2e1c30SJoe Thornber 	if (discard) {
28773e2e1c30SJoe Thornber 		if (from_dblock(dblock) == li->discard_end)
28783e2e1c30SJoe Thornber 			/*
28793e2e1c30SJoe Thornber 			 * We're already in a discard range, just extend it.
28803e2e1c30SJoe Thornber 			 */
28813e2e1c30SJoe Thornber 			li->discard_end = li->discard_end + 1ULL;
28823e2e1c30SJoe Thornber 
28833e2e1c30SJoe Thornber 		else {
28843e2e1c30SJoe Thornber 			/*
28853e2e1c30SJoe Thornber 			 * Emit the old range and start a new one.
28863e2e1c30SJoe Thornber 			 */
28873e2e1c30SJoe Thornber 			set_discard_range(li);
28883e2e1c30SJoe Thornber 			li->discard_begin = from_dblock(dblock);
28893e2e1c30SJoe Thornber 			li->discard_end = li->discard_begin + 1ULL;
28903e2e1c30SJoe Thornber 		}
28913e2e1c30SJoe Thornber 	} else {
28923e2e1c30SJoe Thornber 		set_discard_range(li);
28933e2e1c30SJoe Thornber 		li->discard_begin = li->discard_end = 0;
28943e2e1c30SJoe Thornber 	}
2895c6b4fcbaSJoe Thornber 
2896c6b4fcbaSJoe Thornber 	return 0;
2897c6b4fcbaSJoe Thornber }
2898c6b4fcbaSJoe Thornber 
get_cache_dev_size(struct cache * cache)2899f494a9c6SJoe Thornber static dm_cblock_t get_cache_dev_size(struct cache *cache)
2900c6b4fcbaSJoe Thornber {
2901f494a9c6SJoe Thornber 	sector_t size = get_dev_size(cache->cache_dev);
2902f494a9c6SJoe Thornber 	(void) sector_div(size, cache->sectors_per_block);
2903f494a9c6SJoe Thornber 	return to_cblock(size);
2904f494a9c6SJoe Thornber }
2905f494a9c6SJoe Thornber 
can_resize(struct cache * cache,dm_cblock_t new_size)2906f494a9c6SJoe Thornber static bool can_resize(struct cache *cache, dm_cblock_t new_size)
2907f494a9c6SJoe Thornber {
29085d07384aSMike Snitzer 	if (from_cblock(new_size) > from_cblock(cache->cache_size)) {
29095d07384aSMike Snitzer 		DMERR("%s: unable to extend cache due to missing cache table reload",
29105d07384aSMike Snitzer 		      cache_device_name(cache));
29115d07384aSMike Snitzer 		return false;
29125d07384aSMike Snitzer 	}
2913c6b4fcbaSJoe Thornber 
2914c6b4fcbaSJoe Thornber 	/*
2915f494a9c6SJoe Thornber 	 * We can't drop a dirty block when shrinking the cache.
2916c6b4fcbaSJoe Thornber 	 */
2917036dd6e3SMing-Hung Tsai 	if (cache->loaded_mappings) {
291853421c3cSMing-Hung Tsai 		new_size = to_cblock(find_next_bit(cache->dirty_bitset,
291953421c3cSMing-Hung Tsai 						   from_cblock(cache->cache_size),
292053421c3cSMing-Hung Tsai 						   from_cblock(new_size)));
292153421c3cSMing-Hung Tsai 		if (new_size != cache->cache_size) {
2922b61d9509SMike Snitzer 			DMERR("%s: unable to shrink cache; cache block %llu is dirty",
2923b61d9509SMike Snitzer 			      cache_device_name(cache),
2924f494a9c6SJoe Thornber 			      (unsigned long long) from_cblock(new_size));
2925f494a9c6SJoe Thornber 			return false;
2926f494a9c6SJoe Thornber 		}
2927036dd6e3SMing-Hung Tsai 	}
2928f494a9c6SJoe Thornber 
2929f494a9c6SJoe Thornber 	return true;
2930f494a9c6SJoe Thornber }
2931f494a9c6SJoe Thornber 
resize_cache_dev(struct cache * cache,dm_cblock_t new_size)2932f494a9c6SJoe Thornber static int resize_cache_dev(struct cache *cache, dm_cblock_t new_size)
2933f494a9c6SJoe Thornber {
2934f494a9c6SJoe Thornber 	int r;
2935c6b4fcbaSJoe Thornber 
293608844800SVincent Pelletier 	r = dm_cache_resize(cache->cmd, new_size);
2937c6b4fcbaSJoe Thornber 	if (r) {
2938b61d9509SMike Snitzer 		DMERR("%s: could not resize cache metadata", cache_device_name(cache));
2939028ae9f7SJoe Thornber 		metadata_operation_failed(cache, "dm_cache_resize", r);
2940c6b4fcbaSJoe Thornber 		return r;
2941c6b4fcbaSJoe Thornber 	}
2942c6b4fcbaSJoe Thornber 
2943d1d9220cSJoe Thornber 	set_cache_size(cache, new_size);
2944f494a9c6SJoe Thornber 
2945f494a9c6SJoe Thornber 	return 0;
2946f494a9c6SJoe Thornber }
2947f494a9c6SJoe Thornber 
cache_preresume(struct dm_target * ti)2948f494a9c6SJoe Thornber static int cache_preresume(struct dm_target *ti)
2949f494a9c6SJoe Thornber {
2950f494a9c6SJoe Thornber 	int r = 0;
2951f494a9c6SJoe Thornber 	struct cache *cache = ti->private;
2952f494a9c6SJoe Thornber 	dm_cblock_t csize = get_cache_dev_size(cache);
2953f494a9c6SJoe Thornber 
2954f494a9c6SJoe Thornber 	/*
2955f494a9c6SJoe Thornber 	 * Check to see if the cache has resized.
2956f494a9c6SJoe Thornber 	 */
2957036dd6e3SMing-Hung Tsai 	if (!cache->sized || csize != cache->cache_size) {
2958f494a9c6SJoe Thornber 		if (!can_resize(cache, csize))
2959f494a9c6SJoe Thornber 			return -EINVAL;
2960f494a9c6SJoe Thornber 
2961f494a9c6SJoe Thornber 		r = resize_cache_dev(cache, csize);
2962f494a9c6SJoe Thornber 		if (r)
2963f494a9c6SJoe Thornber 			return r;
2964036dd6e3SMing-Hung Tsai 
2965036dd6e3SMing-Hung Tsai 		cache->sized = true;
2966c6b4fcbaSJoe Thornber 	}
2967c6b4fcbaSJoe Thornber 
2968c6b4fcbaSJoe Thornber 	if (!cache->loaded_mappings) {
2969ea2dd8c1SMike Snitzer 		r = dm_cache_load_mappings(cache->cmd, cache->policy,
2970c6b4fcbaSJoe Thornber 					   load_mapping, cache);
2971c6b4fcbaSJoe Thornber 		if (r) {
2972b61d9509SMike Snitzer 			DMERR("%s: could not load cache mappings", cache_device_name(cache));
2973028ae9f7SJoe Thornber 			metadata_operation_failed(cache, "dm_cache_load_mappings", r);
2974c6b4fcbaSJoe Thornber 			return r;
2975c6b4fcbaSJoe Thornber 		}
2976c6b4fcbaSJoe Thornber 
2977c6b4fcbaSJoe Thornber 		cache->loaded_mappings = true;
2978c6b4fcbaSJoe Thornber 	}
2979c6b4fcbaSJoe Thornber 
2980c6b4fcbaSJoe Thornber 	if (!cache->loaded_discards) {
29813e2e1c30SJoe Thornber 		struct discard_load_info li;
29823e2e1c30SJoe Thornber 
29833e2e1c30SJoe Thornber 		/*
29843e2e1c30SJoe Thornber 		 * The discard bitset could have been resized, or the
29853e2e1c30SJoe Thornber 		 * discard block size changed.  To be safe we start by
29863e2e1c30SJoe Thornber 		 * setting every dblock to not discarded.
29873e2e1c30SJoe Thornber 		 */
29883e2e1c30SJoe Thornber 		clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks));
29893e2e1c30SJoe Thornber 
29903e2e1c30SJoe Thornber 		discard_load_info_init(cache, &li);
29913e2e1c30SJoe Thornber 		r = dm_cache_load_discards(cache->cmd, load_discard, &li);
2992c6b4fcbaSJoe Thornber 		if (r) {
2993b61d9509SMike Snitzer 			DMERR("%s: could not load origin discards", cache_device_name(cache));
2994028ae9f7SJoe Thornber 			metadata_operation_failed(cache, "dm_cache_load_discards", r);
2995c6b4fcbaSJoe Thornber 			return r;
2996c6b4fcbaSJoe Thornber 		}
29973e2e1c30SJoe Thornber 		set_discard_range(&li);
2998c6b4fcbaSJoe Thornber 
2999c6b4fcbaSJoe Thornber 		cache->loaded_discards = true;
3000c6b4fcbaSJoe Thornber 	}
3001c6b4fcbaSJoe Thornber 
3002c6b4fcbaSJoe Thornber 	return r;
3003c6b4fcbaSJoe Thornber }
3004c6b4fcbaSJoe Thornber 
cache_resume(struct dm_target * ti)3005c6b4fcbaSJoe Thornber static void cache_resume(struct dm_target *ti)
3006c6b4fcbaSJoe Thornber {
3007c6b4fcbaSJoe Thornber 	struct cache *cache = ti->private;
3008c6b4fcbaSJoe Thornber 
3009c6b4fcbaSJoe Thornber 	cache->need_tick_bio = true;
3010b29d4986SJoe Thornber 	allow_background_work(cache);
3011c6b4fcbaSJoe Thornber 	do_waker(&cache->waker.work);
3012c6b4fcbaSJoe Thornber }
3013c6b4fcbaSJoe Thornber 
emit_flags(struct cache * cache,char * result,unsigned int maxlen,ssize_t * sz_ptr)3014de7180ffSMike Snitzer static void emit_flags(struct cache *cache, char *result,
301586a3238cSHeinz Mauelshagen 		       unsigned int maxlen, ssize_t *sz_ptr)
3016de7180ffSMike Snitzer {
3017de7180ffSMike Snitzer 	ssize_t sz = *sz_ptr;
3018de7180ffSMike Snitzer 	struct cache_features *cf = &cache->features;
301986a3238cSHeinz Mauelshagen 	unsigned int count = (cf->metadata_version == 2) + !cf->discard_passdown + 1;
3020de7180ffSMike Snitzer 
3021de7180ffSMike Snitzer 	DMEMIT("%u ", count);
3022de7180ffSMike Snitzer 
3023de7180ffSMike Snitzer 	if (cf->metadata_version == 2)
3024de7180ffSMike Snitzer 		DMEMIT("metadata2 ");
3025de7180ffSMike Snitzer 
3026de7180ffSMike Snitzer 	if (writethrough_mode(cache))
3027de7180ffSMike Snitzer 		DMEMIT("writethrough ");
3028de7180ffSMike Snitzer 
3029de7180ffSMike Snitzer 	else if (passthrough_mode(cache))
3030de7180ffSMike Snitzer 		DMEMIT("passthrough ");
3031de7180ffSMike Snitzer 
3032de7180ffSMike Snitzer 	else if (writeback_mode(cache))
3033de7180ffSMike Snitzer 		DMEMIT("writeback ");
3034de7180ffSMike Snitzer 
3035de7180ffSMike Snitzer 	else {
3036de7180ffSMike Snitzer 		DMEMIT("unknown ");
3037de7180ffSMike Snitzer 		DMERR("%s: internal error: unknown io mode: %d",
3038de7180ffSMike Snitzer 		      cache_device_name(cache), (int) cf->io_mode);
3039de7180ffSMike Snitzer 	}
3040de7180ffSMike Snitzer 
3041de7180ffSMike Snitzer 	if (!cf->discard_passdown)
3042de7180ffSMike Snitzer 		DMEMIT("no_discard_passdown ");
3043de7180ffSMike Snitzer 
3044de7180ffSMike Snitzer 	*sz_ptr = sz;
3045de7180ffSMike Snitzer }
3046de7180ffSMike Snitzer 
3047c6b4fcbaSJoe Thornber /*
3048c6b4fcbaSJoe Thornber  * Status format:
3049c6b4fcbaSJoe Thornber  *
30506a388618SMike Snitzer  * <metadata block size> <#used metadata blocks>/<#total metadata blocks>
30516a388618SMike Snitzer  * <cache block size> <#used cache blocks>/<#total cache blocks>
3052c6b4fcbaSJoe Thornber  * <#read hits> <#read misses> <#write hits> <#write misses>
30536a388618SMike Snitzer  * <#demotions> <#promotions> <#dirty>
3054c6b4fcbaSJoe Thornber  * <#features> <features>*
3055c6b4fcbaSJoe Thornber  * <#core args> <core args>
3056255eac20SMike Snitzer  * <policy name> <#policy args> <policy args>* <cache metadata mode> <needs_check>
3057c6b4fcbaSJoe Thornber  */
cache_status(struct dm_target * ti,status_type_t type,unsigned int status_flags,char * result,unsigned int maxlen)3058c6b4fcbaSJoe Thornber static void cache_status(struct dm_target *ti, status_type_t type,
305986a3238cSHeinz Mauelshagen 			 unsigned int status_flags, char *result, unsigned int maxlen)
3060c6b4fcbaSJoe Thornber {
3061c6b4fcbaSJoe Thornber 	int r = 0;
306286a3238cSHeinz Mauelshagen 	unsigned int i;
3063c6b4fcbaSJoe Thornber 	ssize_t sz = 0;
3064c6b4fcbaSJoe Thornber 	dm_block_t nr_free_blocks_metadata = 0;
3065c6b4fcbaSJoe Thornber 	dm_block_t nr_blocks_metadata = 0;
3066c6b4fcbaSJoe Thornber 	char buf[BDEVNAME_SIZE];
3067c6b4fcbaSJoe Thornber 	struct cache *cache = ti->private;
3068c6b4fcbaSJoe Thornber 	dm_cblock_t residency;
3069d14fcf3dSJoe Thornber 	bool needs_check;
3070c6b4fcbaSJoe Thornber 
3071c6b4fcbaSJoe Thornber 	switch (type) {
3072c6b4fcbaSJoe Thornber 	case STATUSTYPE_INFO:
3073028ae9f7SJoe Thornber 		if (get_cache_mode(cache) == CM_FAIL) {
3074028ae9f7SJoe Thornber 			DMEMIT("Fail");
3075028ae9f7SJoe Thornber 			break;
3076c6b4fcbaSJoe Thornber 		}
3077c6b4fcbaSJoe Thornber 
3078028ae9f7SJoe Thornber 		/* Commit to ensure statistics aren't out-of-date */
3079028ae9f7SJoe Thornber 		if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti))
3080028ae9f7SJoe Thornber 			(void) commit(cache, false);
3081028ae9f7SJoe Thornber 
3082b61d9509SMike Snitzer 		r = dm_cache_get_free_metadata_block_count(cache->cmd, &nr_free_blocks_metadata);
3083c6b4fcbaSJoe Thornber 		if (r) {
3084b61d9509SMike Snitzer 			DMERR("%s: dm_cache_get_free_metadata_block_count returned %d",
3085b61d9509SMike Snitzer 			      cache_device_name(cache), r);
3086c6b4fcbaSJoe Thornber 			goto err;
3087c6b4fcbaSJoe Thornber 		}
3088c6b4fcbaSJoe Thornber 
3089c6b4fcbaSJoe Thornber 		r = dm_cache_get_metadata_dev_size(cache->cmd, &nr_blocks_metadata);
3090c6b4fcbaSJoe Thornber 		if (r) {
3091b61d9509SMike Snitzer 			DMERR("%s: dm_cache_get_metadata_dev_size returned %d",
3092b61d9509SMike Snitzer 			      cache_device_name(cache), r);
3093c6b4fcbaSJoe Thornber 			goto err;
3094c6b4fcbaSJoe Thornber 		}
3095c6b4fcbaSJoe Thornber 
3096c6b4fcbaSJoe Thornber 		residency = policy_residency(cache->policy);
3097c6b4fcbaSJoe Thornber 
3098ca763d0aSJoe Thornber 		DMEMIT("%u %llu/%llu %llu %llu/%llu %u %u %u %u %u %u %lu ",
309986a3238cSHeinz Mauelshagen 		       (unsigned int)DM_CACHE_METADATA_BLOCK_SIZE,
3100c6b4fcbaSJoe Thornber 		       (unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata),
3101c6b4fcbaSJoe Thornber 		       (unsigned long long)nr_blocks_metadata,
3102ca763d0aSJoe Thornber 		       (unsigned long long)cache->sectors_per_block,
31036a388618SMike Snitzer 		       (unsigned long long) from_cblock(residency),
31046a388618SMike Snitzer 		       (unsigned long long) from_cblock(cache->cache_size),
310586a3238cSHeinz Mauelshagen 		       (unsigned int) atomic_read(&cache->stats.read_hit),
310686a3238cSHeinz Mauelshagen 		       (unsigned int) atomic_read(&cache->stats.read_miss),
310786a3238cSHeinz Mauelshagen 		       (unsigned int) atomic_read(&cache->stats.write_hit),
310886a3238cSHeinz Mauelshagen 		       (unsigned int) atomic_read(&cache->stats.write_miss),
310986a3238cSHeinz Mauelshagen 		       (unsigned int) atomic_read(&cache->stats.demotion),
311086a3238cSHeinz Mauelshagen 		       (unsigned int) atomic_read(&cache->stats.promotion),
311144fa816bSAnssi Hannula 		       (unsigned long) atomic_read(&cache->nr_dirty));
3112c6b4fcbaSJoe Thornber 
3113de7180ffSMike Snitzer 		emit_flags(cache, result, maxlen, &sz);
3114c6b4fcbaSJoe Thornber 
3115c6b4fcbaSJoe Thornber 		DMEMIT("2 migration_threshold %llu ", (unsigned long long) cache->migration_threshold);
31162e68c4e6SMike Snitzer 
31172e68c4e6SMike Snitzer 		DMEMIT("%s ", dm_cache_policy_get_name(cache->policy));
3118c6b4fcbaSJoe Thornber 		if (sz < maxlen) {
3119028ae9f7SJoe Thornber 			r = policy_emit_config_values(cache->policy, result, maxlen, &sz);
3120c6b4fcbaSJoe Thornber 			if (r)
3121b61d9509SMike Snitzer 				DMERR("%s: policy_emit_config_values returned %d",
3122b61d9509SMike Snitzer 				      cache_device_name(cache), r);
3123c6b4fcbaSJoe Thornber 		}
3124c6b4fcbaSJoe Thornber 
3125028ae9f7SJoe Thornber 		if (get_cache_mode(cache) == CM_READ_ONLY)
3126028ae9f7SJoe Thornber 			DMEMIT("ro ");
3127028ae9f7SJoe Thornber 		else
3128028ae9f7SJoe Thornber 			DMEMIT("rw ");
3129028ae9f7SJoe Thornber 
3130d14fcf3dSJoe Thornber 		r = dm_cache_metadata_needs_check(cache->cmd, &needs_check);
3131d14fcf3dSJoe Thornber 
3132d14fcf3dSJoe Thornber 		if (r || needs_check)
3133255eac20SMike Snitzer 			DMEMIT("needs_check ");
3134255eac20SMike Snitzer 		else
3135255eac20SMike Snitzer 			DMEMIT("- ");
3136255eac20SMike Snitzer 
3137c6b4fcbaSJoe Thornber 		break;
3138c6b4fcbaSJoe Thornber 
3139c6b4fcbaSJoe Thornber 	case STATUSTYPE_TABLE:
3140c6b4fcbaSJoe Thornber 		format_dev_t(buf, cache->metadata_dev->bdev->bd_dev);
3141c6b4fcbaSJoe Thornber 		DMEMIT("%s ", buf);
3142c6b4fcbaSJoe Thornber 		format_dev_t(buf, cache->cache_dev->bdev->bd_dev);
3143c6b4fcbaSJoe Thornber 		DMEMIT("%s ", buf);
3144c6b4fcbaSJoe Thornber 		format_dev_t(buf, cache->origin_dev->bdev->bd_dev);
3145c6b4fcbaSJoe Thornber 		DMEMIT("%s", buf);
3146c6b4fcbaSJoe Thornber 
3147c6b4fcbaSJoe Thornber 		for (i = 0; i < cache->nr_ctr_args - 1; i++)
3148c6b4fcbaSJoe Thornber 			DMEMIT(" %s", cache->ctr_args[i]);
3149c6b4fcbaSJoe Thornber 		if (cache->nr_ctr_args)
3150c6b4fcbaSJoe Thornber 			DMEMIT(" %s", cache->ctr_args[cache->nr_ctr_args - 1]);
31518ec45662STushar Sugandhi 		break;
31528ec45662STushar Sugandhi 
31538ec45662STushar Sugandhi 	case STATUSTYPE_IMA:
31548ec45662STushar Sugandhi 		DMEMIT_TARGET_NAME_VERSION(ti->type);
31558ec45662STushar Sugandhi 		if (get_cache_mode(cache) == CM_FAIL)
31568ec45662STushar Sugandhi 			DMEMIT(",metadata_mode=fail");
31578ec45662STushar Sugandhi 		else if (get_cache_mode(cache) == CM_READ_ONLY)
31588ec45662STushar Sugandhi 			DMEMIT(",metadata_mode=ro");
31598ec45662STushar Sugandhi 		else
31608ec45662STushar Sugandhi 			DMEMIT(",metadata_mode=rw");
31618ec45662STushar Sugandhi 
31628ec45662STushar Sugandhi 		format_dev_t(buf, cache->metadata_dev->bdev->bd_dev);
31638ec45662STushar Sugandhi 		DMEMIT(",cache_metadata_device=%s", buf);
31648ec45662STushar Sugandhi 		format_dev_t(buf, cache->cache_dev->bdev->bd_dev);
31658ec45662STushar Sugandhi 		DMEMIT(",cache_device=%s", buf);
31668ec45662STushar Sugandhi 		format_dev_t(buf, cache->origin_dev->bdev->bd_dev);
31678ec45662STushar Sugandhi 		DMEMIT(",cache_origin_device=%s", buf);
31688ec45662STushar Sugandhi 		DMEMIT(",writethrough=%c", writethrough_mode(cache) ? 'y' : 'n');
31698ec45662STushar Sugandhi 		DMEMIT(",writeback=%c", writeback_mode(cache) ? 'y' : 'n');
31708ec45662STushar Sugandhi 		DMEMIT(",passthrough=%c", passthrough_mode(cache) ? 'y' : 'n');
31718ec45662STushar Sugandhi 		DMEMIT(",metadata2=%c", cache->features.metadata_version == 2 ? 'y' : 'n');
31728ec45662STushar Sugandhi 		DMEMIT(",no_discard_passdown=%c", cache->features.discard_passdown ? 'n' : 'y');
31738ec45662STushar Sugandhi 		DMEMIT(";");
31748ec45662STushar Sugandhi 		break;
3175c6b4fcbaSJoe Thornber 	}
3176c6b4fcbaSJoe Thornber 
3177c6b4fcbaSJoe Thornber 	return;
3178c6b4fcbaSJoe Thornber 
3179c6b4fcbaSJoe Thornber err:
3180c6b4fcbaSJoe Thornber 	DMEMIT("Error");
3181c6b4fcbaSJoe Thornber }
3182c6b4fcbaSJoe Thornber 
3183c6b4fcbaSJoe Thornber /*
3184b29d4986SJoe Thornber  * Defines a range of cblocks, begin to (end - 1) are in the range.  end is
3185b29d4986SJoe Thornber  * the one-past-the-end value.
3186b29d4986SJoe Thornber  */
3187b29d4986SJoe Thornber struct cblock_range {
3188b29d4986SJoe Thornber 	dm_cblock_t begin;
3189b29d4986SJoe Thornber 	dm_cblock_t end;
3190b29d4986SJoe Thornber };
3191b29d4986SJoe Thornber 
3192b29d4986SJoe Thornber /*
319365790ff9SJoe Thornber  * A cache block range can take two forms:
319465790ff9SJoe Thornber  *
319565790ff9SJoe Thornber  * i) A single cblock, eg. '3456'
3196b29d4986SJoe Thornber  * ii) A begin and end cblock with a dash between, eg. 123-234
319765790ff9SJoe Thornber  */
parse_cblock_range(struct cache * cache,const char * str,struct cblock_range * result)319865790ff9SJoe Thornber static int parse_cblock_range(struct cache *cache, const char *str,
319965790ff9SJoe Thornber 			      struct cblock_range *result)
320065790ff9SJoe Thornber {
320165790ff9SJoe Thornber 	char dummy;
320265790ff9SJoe Thornber 	uint64_t b, e;
320365790ff9SJoe Thornber 	int r;
320465790ff9SJoe Thornber 
320565790ff9SJoe Thornber 	/*
320665790ff9SJoe Thornber 	 * Try and parse form (ii) first.
320765790ff9SJoe Thornber 	 */
320865790ff9SJoe Thornber 	r = sscanf(str, "%llu-%llu%c", &b, &e, &dummy);
320965790ff9SJoe Thornber 	if (r < 0)
321065790ff9SJoe Thornber 		return r;
321165790ff9SJoe Thornber 
321265790ff9SJoe Thornber 	if (r == 2) {
321365790ff9SJoe Thornber 		result->begin = to_cblock(b);
321465790ff9SJoe Thornber 		result->end = to_cblock(e);
321565790ff9SJoe Thornber 		return 0;
321665790ff9SJoe Thornber 	}
321765790ff9SJoe Thornber 
321865790ff9SJoe Thornber 	/*
321965790ff9SJoe Thornber 	 * That didn't work, try form (i).
322065790ff9SJoe Thornber 	 */
322165790ff9SJoe Thornber 	r = sscanf(str, "%llu%c", &b, &dummy);
322265790ff9SJoe Thornber 	if (r < 0)
322365790ff9SJoe Thornber 		return r;
322465790ff9SJoe Thornber 
322565790ff9SJoe Thornber 	if (r == 1) {
322665790ff9SJoe Thornber 		result->begin = to_cblock(b);
322765790ff9SJoe Thornber 		result->end = to_cblock(from_cblock(result->begin) + 1u);
322865790ff9SJoe Thornber 		return 0;
322965790ff9SJoe Thornber 	}
323065790ff9SJoe Thornber 
3231b61d9509SMike Snitzer 	DMERR("%s: invalid cblock range '%s'", cache_device_name(cache), str);
323265790ff9SJoe Thornber 	return -EINVAL;
323365790ff9SJoe Thornber }
323465790ff9SJoe Thornber 
validate_cblock_range(struct cache * cache,struct cblock_range * range)323565790ff9SJoe Thornber static int validate_cblock_range(struct cache *cache, struct cblock_range *range)
323665790ff9SJoe Thornber {
323765790ff9SJoe Thornber 	uint64_t b = from_cblock(range->begin);
323865790ff9SJoe Thornber 	uint64_t e = from_cblock(range->end);
323965790ff9SJoe Thornber 	uint64_t n = from_cblock(cache->cache_size);
324065790ff9SJoe Thornber 
324165790ff9SJoe Thornber 	if (b >= n) {
3242b61d9509SMike Snitzer 		DMERR("%s: begin cblock out of range: %llu >= %llu",
3243b61d9509SMike Snitzer 		      cache_device_name(cache), b, n);
324465790ff9SJoe Thornber 		return -EINVAL;
324565790ff9SJoe Thornber 	}
324665790ff9SJoe Thornber 
324765790ff9SJoe Thornber 	if (e > n) {
3248b61d9509SMike Snitzer 		DMERR("%s: end cblock out of range: %llu > %llu",
3249b61d9509SMike Snitzer 		      cache_device_name(cache), e, n);
325065790ff9SJoe Thornber 		return -EINVAL;
325165790ff9SJoe Thornber 	}
325265790ff9SJoe Thornber 
325365790ff9SJoe Thornber 	if (b >= e) {
3254b61d9509SMike Snitzer 		DMERR("%s: invalid cblock range: %llu >= %llu",
3255b61d9509SMike Snitzer 		      cache_device_name(cache), b, e);
325665790ff9SJoe Thornber 		return -EINVAL;
325765790ff9SJoe Thornber 	}
325865790ff9SJoe Thornber 
325965790ff9SJoe Thornber 	return 0;
326065790ff9SJoe Thornber }
326165790ff9SJoe Thornber 
cblock_succ(dm_cblock_t b)3262b29d4986SJoe Thornber static inline dm_cblock_t cblock_succ(dm_cblock_t b)
3263b29d4986SJoe Thornber {
3264b29d4986SJoe Thornber 	return to_cblock(from_cblock(b) + 1);
3265b29d4986SJoe Thornber }
3266b29d4986SJoe Thornber 
request_invalidation(struct cache * cache,struct cblock_range * range)326765790ff9SJoe Thornber static int request_invalidation(struct cache *cache, struct cblock_range *range)
326865790ff9SJoe Thornber {
3269b29d4986SJoe Thornber 	int r = 0;
327065790ff9SJoe Thornber 
3271b29d4986SJoe Thornber 	/*
3272b29d4986SJoe Thornber 	 * We don't need to do any locking here because we know we're in
3273b29d4986SJoe Thornber 	 * passthrough mode.  There's is potential for a race between an
3274b29d4986SJoe Thornber 	 * invalidation triggered by an io and an invalidation message.  This
3275b29d4986SJoe Thornber 	 * is harmless, we must not worry if the policy call fails.
3276b29d4986SJoe Thornber 	 */
3277b29d4986SJoe Thornber 	while (range->begin != range->end) {
3278b29d4986SJoe Thornber 		r = invalidate_cblock(cache, range->begin);
3279b29d4986SJoe Thornber 		if (r)
3280b29d4986SJoe Thornber 			return r;
328165790ff9SJoe Thornber 
3282b29d4986SJoe Thornber 		range->begin = cblock_succ(range->begin);
3283b29d4986SJoe Thornber 	}
328465790ff9SJoe Thornber 
3285b29d4986SJoe Thornber 	cache->commit_requested = true;
3286b29d4986SJoe Thornber 	return r;
328765790ff9SJoe Thornber }
328865790ff9SJoe Thornber 
process_invalidate_cblocks_message(struct cache * cache,unsigned int count,const char ** cblock_ranges)328986a3238cSHeinz Mauelshagen static int process_invalidate_cblocks_message(struct cache *cache, unsigned int count,
329065790ff9SJoe Thornber 					      const char **cblock_ranges)
329165790ff9SJoe Thornber {
329265790ff9SJoe Thornber 	int r = 0;
329386a3238cSHeinz Mauelshagen 	unsigned int i;
329465790ff9SJoe Thornber 	struct cblock_range range;
329565790ff9SJoe Thornber 
32968e3c3827SMike Snitzer 	if (!passthrough_mode(cache)) {
3297b61d9509SMike Snitzer 		DMERR("%s: cache has to be in passthrough mode for invalidation",
3298b61d9509SMike Snitzer 		      cache_device_name(cache));
329965790ff9SJoe Thornber 		return -EPERM;
330065790ff9SJoe Thornber 	}
330165790ff9SJoe Thornber 
330265790ff9SJoe Thornber 	for (i = 0; i < count; i++) {
330365790ff9SJoe Thornber 		r = parse_cblock_range(cache, cblock_ranges[i], &range);
330465790ff9SJoe Thornber 		if (r)
330565790ff9SJoe Thornber 			break;
330665790ff9SJoe Thornber 
330765790ff9SJoe Thornber 		r = validate_cblock_range(cache, &range);
330865790ff9SJoe Thornber 		if (r)
330965790ff9SJoe Thornber 			break;
331065790ff9SJoe Thornber 
331165790ff9SJoe Thornber 		/*
331265790ff9SJoe Thornber 		 * Pass begin and end origin blocks to the worker and wake it.
331365790ff9SJoe Thornber 		 */
331465790ff9SJoe Thornber 		r = request_invalidation(cache, &range);
331565790ff9SJoe Thornber 		if (r)
331665790ff9SJoe Thornber 			break;
331765790ff9SJoe Thornber 	}
331865790ff9SJoe Thornber 
331965790ff9SJoe Thornber 	return r;
332065790ff9SJoe Thornber }
332165790ff9SJoe Thornber 
332265790ff9SJoe Thornber /*
332365790ff9SJoe Thornber  * Supports
332465790ff9SJoe Thornber  *	"<key> <value>"
332565790ff9SJoe Thornber  * and
332665790ff9SJoe Thornber  *     "invalidate_cblocks [(<begin>)|(<begin>-<end>)]*
3327c6b4fcbaSJoe Thornber  *
3328c6b4fcbaSJoe Thornber  * The key migration_threshold is supported by the cache target core.
3329c6b4fcbaSJoe Thornber  */
cache_message(struct dm_target * ti,unsigned int argc,char ** argv,char * result,unsigned int maxlen)333086a3238cSHeinz Mauelshagen static int cache_message(struct dm_target *ti, unsigned int argc, char **argv,
333186a3238cSHeinz Mauelshagen 			 char *result, unsigned int maxlen)
3332c6b4fcbaSJoe Thornber {
3333c6b4fcbaSJoe Thornber 	struct cache *cache = ti->private;
3334c6b4fcbaSJoe Thornber 
333565790ff9SJoe Thornber 	if (!argc)
333665790ff9SJoe Thornber 		return -EINVAL;
333765790ff9SJoe Thornber 
3338028ae9f7SJoe Thornber 	if (get_cache_mode(cache) >= CM_READ_ONLY) {
3339b61d9509SMike Snitzer 		DMERR("%s: unable to service cache target messages in READ_ONLY or FAIL mode",
3340b61d9509SMike Snitzer 		      cache_device_name(cache));
3341028ae9f7SJoe Thornber 		return -EOPNOTSUPP;
3342028ae9f7SJoe Thornber 	}
3343028ae9f7SJoe Thornber 
33447b6b2bc9SMike Snitzer 	if (!strcasecmp(argv[0], "invalidate_cblocks"))
334565790ff9SJoe Thornber 		return process_invalidate_cblocks_message(cache, argc - 1, (const char **) argv + 1);
334665790ff9SJoe Thornber 
3347c6b4fcbaSJoe Thornber 	if (argc != 2)
3348c6b4fcbaSJoe Thornber 		return -EINVAL;
3349c6b4fcbaSJoe Thornber 
33502f14f4b5SJoe Thornber 	return set_config_value(cache, argv[0], argv[1]);
3351c6b4fcbaSJoe Thornber }
3352c6b4fcbaSJoe Thornber 
cache_iterate_devices(struct dm_target * ti,iterate_devices_callout_fn fn,void * data)3353c6b4fcbaSJoe Thornber static int cache_iterate_devices(struct dm_target *ti,
3354c6b4fcbaSJoe Thornber 				 iterate_devices_callout_fn fn, void *data)
3355c6b4fcbaSJoe Thornber {
3356c6b4fcbaSJoe Thornber 	int r = 0;
3357c6b4fcbaSJoe Thornber 	struct cache *cache = ti->private;
3358c6b4fcbaSJoe Thornber 
3359c6b4fcbaSJoe Thornber 	r = fn(ti, cache->cache_dev, 0, get_dev_size(cache->cache_dev), data);
3360c6b4fcbaSJoe Thornber 	if (!r)
3361c6b4fcbaSJoe Thornber 		r = fn(ti, cache->origin_dev, 0, ti->len, data);
3362c6b4fcbaSJoe Thornber 
3363c6b4fcbaSJoe Thornber 	return r;
3364c6b4fcbaSJoe Thornber }
3365c6b4fcbaSJoe Thornber 
3366de7180ffSMike Snitzer /*
3367de7180ffSMike Snitzer  * If discard_passdown was enabled verify that the origin device
3368de7180ffSMike Snitzer  * supports discards.  Disable discard_passdown if not.
3369de7180ffSMike Snitzer  */
disable_passdown_if_not_supported(struct cache * cache)3370de7180ffSMike Snitzer static void disable_passdown_if_not_supported(struct cache *cache)
3371de7180ffSMike Snitzer {
3372de7180ffSMike Snitzer 	struct block_device *origin_bdev = cache->origin_dev->bdev;
3373de7180ffSMike Snitzer 	struct queue_limits *origin_limits = &bdev_get_queue(origin_bdev)->limits;
3374de7180ffSMike Snitzer 	const char *reason = NULL;
3375de7180ffSMike Snitzer 
3376de7180ffSMike Snitzer 	if (!cache->features.discard_passdown)
3377de7180ffSMike Snitzer 		return;
3378de7180ffSMike Snitzer 
337970200574SChristoph Hellwig 	if (!bdev_max_discard_sectors(origin_bdev))
3380de7180ffSMike Snitzer 		reason = "discard unsupported";
3381de7180ffSMike Snitzer 
3382de7180ffSMike Snitzer 	else if (origin_limits->max_discard_sectors < cache->sectors_per_block)
3383de7180ffSMike Snitzer 		reason = "max discard sectors smaller than a block";
3384de7180ffSMike Snitzer 
3385de7180ffSMike Snitzer 	if (reason) {
3386385411ffSChristoph Hellwig 		DMWARN("Origin device (%pg) %s: Disabling discard passdown.",
3387385411ffSChristoph Hellwig 		       origin_bdev, reason);
3388de7180ffSMike Snitzer 		cache->features.discard_passdown = false;
3389de7180ffSMike Snitzer 	}
3390de7180ffSMike Snitzer }
3391de7180ffSMike Snitzer 
set_discard_limits(struct cache * cache,struct queue_limits * limits)3392c6b4fcbaSJoe Thornber static void set_discard_limits(struct cache *cache, struct queue_limits *limits)
3393c6b4fcbaSJoe Thornber {
3394de7180ffSMike Snitzer 	struct block_device *origin_bdev = cache->origin_dev->bdev;
3395de7180ffSMike Snitzer 	struct queue_limits *origin_limits = &bdev_get_queue(origin_bdev)->limits;
3396de7180ffSMike Snitzer 
3397de7180ffSMike Snitzer 	if (!cache->features.discard_passdown) {
3398de7180ffSMike Snitzer 		/* No passdown is done so setting own virtual limits */
33997ae34e77SJoe Thornber 		limits->max_discard_sectors = min_t(sector_t, cache->discard_block_size * 1024,
34007ae34e77SJoe Thornber 						    cache->origin_sectors);
34011bad9bc4SJoe Thornber 		limits->discard_granularity = cache->discard_block_size << SECTOR_SHIFT;
3402de7180ffSMike Snitzer 		return;
3403de7180ffSMike Snitzer 	}
3404de7180ffSMike Snitzer 
3405de7180ffSMike Snitzer 	/*
3406de7180ffSMike Snitzer 	 * cache_iterate_devices() is stacking both origin and fast device limits
3407de7180ffSMike Snitzer 	 * but discards aren't passed to fast device, so inherit origin's limits.
3408de7180ffSMike Snitzer 	 */
3409de7180ffSMike Snitzer 	limits->max_discard_sectors = origin_limits->max_discard_sectors;
3410de7180ffSMike Snitzer 	limits->max_hw_discard_sectors = origin_limits->max_hw_discard_sectors;
3411de7180ffSMike Snitzer 	limits->discard_granularity = origin_limits->discard_granularity;
3412de7180ffSMike Snitzer 	limits->discard_alignment = origin_limits->discard_alignment;
3413de7180ffSMike Snitzer 	limits->discard_misaligned = origin_limits->discard_misaligned;
3414c6b4fcbaSJoe Thornber }
3415c6b4fcbaSJoe Thornber 
cache_io_hints(struct dm_target * ti,struct queue_limits * limits)3416c6b4fcbaSJoe Thornber static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
3417c6b4fcbaSJoe Thornber {
3418c6b4fcbaSJoe Thornber 	struct cache *cache = ti->private;
3419f6109372SMike Snitzer 	uint64_t io_opt_sectors = limits->io_opt >> SECTOR_SHIFT;
3420c6b4fcbaSJoe Thornber 
3421f6109372SMike Snitzer 	/*
3422f6109372SMike Snitzer 	 * If the system-determined stacked limits are compatible with the
3423f6109372SMike Snitzer 	 * cache's blocksize (io_opt is a factor) do not override them.
3424f6109372SMike Snitzer 	 */
3425f6109372SMike Snitzer 	if (io_opt_sectors < cache->sectors_per_block ||
3426f6109372SMike Snitzer 	    do_div(io_opt_sectors, cache->sectors_per_block)) {
3427b0246530SMike Snitzer 		blk_limits_io_min(limits, cache->sectors_per_block << SECTOR_SHIFT);
3428c6b4fcbaSJoe Thornber 		blk_limits_io_opt(limits, cache->sectors_per_block << SECTOR_SHIFT);
3429f6109372SMike Snitzer 	}
3430de7180ffSMike Snitzer 
3431de7180ffSMike Snitzer 	disable_passdown_if_not_supported(cache);
3432c6b4fcbaSJoe Thornber 	set_discard_limits(cache, limits);
3433c6b4fcbaSJoe Thornber }
3434c6b4fcbaSJoe Thornber 
3435c6b4fcbaSJoe Thornber /*----------------------------------------------------------------*/
3436c6b4fcbaSJoe Thornber 
3437c6b4fcbaSJoe Thornber static struct target_type cache_target = {
3438c6b4fcbaSJoe Thornber 	.name = "cache",
3439636be424SMike Snitzer 	.version = {2, 2, 0},
3440c6b4fcbaSJoe Thornber 	.module = THIS_MODULE,
3441c6b4fcbaSJoe Thornber 	.ctr = cache_ctr,
3442c6b4fcbaSJoe Thornber 	.dtr = cache_dtr,
3443c6b4fcbaSJoe Thornber 	.map = cache_map,
3444c6b4fcbaSJoe Thornber 	.end_io = cache_end_io,
3445c6b4fcbaSJoe Thornber 	.postsuspend = cache_postsuspend,
3446c6b4fcbaSJoe Thornber 	.preresume = cache_preresume,
3447c6b4fcbaSJoe Thornber 	.resume = cache_resume,
3448c6b4fcbaSJoe Thornber 	.status = cache_status,
3449c6b4fcbaSJoe Thornber 	.message = cache_message,
3450c6b4fcbaSJoe Thornber 	.iterate_devices = cache_iterate_devices,
3451c6b4fcbaSJoe Thornber 	.io_hints = cache_io_hints,
3452c6b4fcbaSJoe Thornber };
3453c6b4fcbaSJoe Thornber 
dm_cache_init(void)3454c6b4fcbaSJoe Thornber static int __init dm_cache_init(void)
3455c6b4fcbaSJoe Thornber {
3456c6b4fcbaSJoe Thornber 	int r;
3457c6b4fcbaSJoe Thornber 
3458c6b4fcbaSJoe Thornber 	migration_cache = KMEM_CACHE(dm_cache_migration, 0);
3459*f14fc623SMikulas Patocka 	if (!migration_cache) {
3460*f14fc623SMikulas Patocka 		r = -ENOMEM;
3461*f14fc623SMikulas Patocka 		goto err;
3462*f14fc623SMikulas Patocka 	}
3463*f14fc623SMikulas Patocka 
3464*f14fc623SMikulas Patocka 	btracker_work_cache = kmem_cache_create("dm_cache_bt_work",
3465*f14fc623SMikulas Patocka 		sizeof(struct bt_work), __alignof__(struct bt_work), 0, NULL);
3466*f14fc623SMikulas Patocka 	if (!btracker_work_cache) {
3467*f14fc623SMikulas Patocka 		r = -ENOMEM;
3468*f14fc623SMikulas Patocka 		goto err;
3469*f14fc623SMikulas Patocka 	}
3470c6b4fcbaSJoe Thornber 
34717e6358d2Smonty_pavel@sina.com 	r = dm_register_target(&cache_target);
34727e6358d2Smonty_pavel@sina.com 	if (r) {
3473*f14fc623SMikulas Patocka 		goto err;
34747e6358d2Smonty_pavel@sina.com 	}
34757e6358d2Smonty_pavel@sina.com 
3476c6b4fcbaSJoe Thornber 	return 0;
3477*f14fc623SMikulas Patocka 
3478*f14fc623SMikulas Patocka err:
3479*f14fc623SMikulas Patocka 	kmem_cache_destroy(migration_cache);
3480*f14fc623SMikulas Patocka 	kmem_cache_destroy(btracker_work_cache);
3481*f14fc623SMikulas Patocka 	return r;
3482c6b4fcbaSJoe Thornber }
3483c6b4fcbaSJoe Thornber 
dm_cache_exit(void)3484c6b4fcbaSJoe Thornber static void __exit dm_cache_exit(void)
3485c6b4fcbaSJoe Thornber {
3486c6b4fcbaSJoe Thornber 	dm_unregister_target(&cache_target);
3487c6b4fcbaSJoe Thornber 	kmem_cache_destroy(migration_cache);
3488*f14fc623SMikulas Patocka 	kmem_cache_destroy(btracker_work_cache);
3489c6b4fcbaSJoe Thornber }
3490c6b4fcbaSJoe Thornber 
3491c6b4fcbaSJoe Thornber module_init(dm_cache_init);
3492c6b4fcbaSJoe Thornber module_exit(dm_cache_exit);
3493c6b4fcbaSJoe Thornber 
3494c6b4fcbaSJoe Thornber MODULE_DESCRIPTION(DM_NAME " cache target");
3495c6b4fcbaSJoe Thornber MODULE_AUTHOR("Joe Thornber <ejt@redhat.com>");
3496c6b4fcbaSJoe Thornber MODULE_LICENSE("GPL");
3497