xref: /openbmc/linux/drivers/md/dm-ebs-target.c (revision d3c7b35c20d60650bac8b55c17b194adda03a979)
1*d3c7b35cSHeinz Mauelshagen /*
2*d3c7b35cSHeinz Mauelshagen  * Copyright (C) 2020 Red Hat GmbH
3*d3c7b35cSHeinz Mauelshagen  *
4*d3c7b35cSHeinz Mauelshagen  * This file is released under the GPL.
5*d3c7b35cSHeinz Mauelshagen  *
6*d3c7b35cSHeinz Mauelshagen  * Device-mapper target to emulate smaller logical block
7*d3c7b35cSHeinz Mauelshagen  * size on backing devices exposing (natively) larger ones.
8*d3c7b35cSHeinz Mauelshagen  *
9*d3c7b35cSHeinz Mauelshagen  * E.g. 512 byte sector emulation on 4K native disks.
10*d3c7b35cSHeinz Mauelshagen  */
11*d3c7b35cSHeinz Mauelshagen 
12*d3c7b35cSHeinz Mauelshagen #include "dm.h"
13*d3c7b35cSHeinz Mauelshagen #include <linux/module.h>
14*d3c7b35cSHeinz Mauelshagen #include <linux/workqueue.h>
15*d3c7b35cSHeinz Mauelshagen #include <linux/dm-bufio.h>
16*d3c7b35cSHeinz Mauelshagen 
17*d3c7b35cSHeinz Mauelshagen #define DM_MSG_PREFIX "ebs"
18*d3c7b35cSHeinz Mauelshagen 
19*d3c7b35cSHeinz Mauelshagen static void ebs_dtr(struct dm_target *ti);
20*d3c7b35cSHeinz Mauelshagen 
21*d3c7b35cSHeinz Mauelshagen /* Emulated block size context. */
22*d3c7b35cSHeinz Mauelshagen struct ebs_c {
23*d3c7b35cSHeinz Mauelshagen 	struct dm_dev *dev;		/* Underlying device to emulate block size on. */
24*d3c7b35cSHeinz Mauelshagen 	struct dm_bufio_client *bufio;	/* Use dm-bufio for read and read-modify-write processing. */
25*d3c7b35cSHeinz Mauelshagen 	struct workqueue_struct *wq;	/* Workqueue for ^ processing of bios. */
26*d3c7b35cSHeinz Mauelshagen 	struct work_struct ws;		/* Work item used for ^. */
27*d3c7b35cSHeinz Mauelshagen 	struct bio_list bios_in;	/* Worker bios input list. */
28*d3c7b35cSHeinz Mauelshagen 	spinlock_t lock;		/* Guard bios input list above. */
29*d3c7b35cSHeinz Mauelshagen 	sector_t start;			/* <start> table line argument, see ebs_ctr below. */
30*d3c7b35cSHeinz Mauelshagen 	unsigned int e_bs;		/* Emulated block size in sectors exposed to upper layer. */
31*d3c7b35cSHeinz Mauelshagen 	unsigned int u_bs;		/* Underlying block size in sectors retrievd from/set on lower layer device. */
32*d3c7b35cSHeinz Mauelshagen 	unsigned char block_shift;	/* bitshift sectors -> blocks used in dm-bufio API. */
33*d3c7b35cSHeinz Mauelshagen 	bool u_bs_set:1;		/* Flag to indicate underlying block size is set on table line. */
34*d3c7b35cSHeinz Mauelshagen };
35*d3c7b35cSHeinz Mauelshagen 
36*d3c7b35cSHeinz Mauelshagen static inline sector_t __sector_to_block(struct ebs_c *ec, sector_t sector)
37*d3c7b35cSHeinz Mauelshagen {
38*d3c7b35cSHeinz Mauelshagen 	return sector >> ec->block_shift;
39*d3c7b35cSHeinz Mauelshagen }
40*d3c7b35cSHeinz Mauelshagen 
41*d3c7b35cSHeinz Mauelshagen static inline sector_t __block_mod(sector_t sector, unsigned int bs)
42*d3c7b35cSHeinz Mauelshagen {
43*d3c7b35cSHeinz Mauelshagen 	return sector & (bs - 1);
44*d3c7b35cSHeinz Mauelshagen }
45*d3c7b35cSHeinz Mauelshagen 
46*d3c7b35cSHeinz Mauelshagen /* Return number of blocks for a bio, accounting for misalignement of start and end sectors. */
47*d3c7b35cSHeinz Mauelshagen static inline unsigned int __nr_blocks(struct ebs_c *ec, struct bio *bio)
48*d3c7b35cSHeinz Mauelshagen {
49*d3c7b35cSHeinz Mauelshagen 	sector_t end_sector = __block_mod(bio->bi_iter.bi_sector, ec->u_bs) + bio_sectors(bio);
50*d3c7b35cSHeinz Mauelshagen 
51*d3c7b35cSHeinz Mauelshagen 	return __sector_to_block(ec, end_sector) + (__block_mod(end_sector, ec->u_bs) ? 1 : 0);
52*d3c7b35cSHeinz Mauelshagen }
53*d3c7b35cSHeinz Mauelshagen 
54*d3c7b35cSHeinz Mauelshagen static inline bool __ebs_check_bs(unsigned int bs)
55*d3c7b35cSHeinz Mauelshagen {
56*d3c7b35cSHeinz Mauelshagen 	return bs && is_power_of_2(bs);
57*d3c7b35cSHeinz Mauelshagen }
58*d3c7b35cSHeinz Mauelshagen 
59*d3c7b35cSHeinz Mauelshagen /*
60*d3c7b35cSHeinz Mauelshagen  * READ/WRITE:
61*d3c7b35cSHeinz Mauelshagen  *
62*d3c7b35cSHeinz Mauelshagen  * copy blocks between bufio blocks and bio vector's (partial/overlapping) pages.
63*d3c7b35cSHeinz Mauelshagen  */
64*d3c7b35cSHeinz Mauelshagen static int __ebs_rw_bvec(struct ebs_c *ec, int rw, struct bio_vec *bv, struct bvec_iter *iter)
65*d3c7b35cSHeinz Mauelshagen {
66*d3c7b35cSHeinz Mauelshagen 	int r = 0;
67*d3c7b35cSHeinz Mauelshagen 	unsigned char *ba, *pa;
68*d3c7b35cSHeinz Mauelshagen 	unsigned int cur_len;
69*d3c7b35cSHeinz Mauelshagen 	unsigned int bv_len = bv->bv_len;
70*d3c7b35cSHeinz Mauelshagen 	unsigned int buf_off = to_bytes(__block_mod(iter->bi_sector, ec->u_bs));
71*d3c7b35cSHeinz Mauelshagen 	sector_t block = __sector_to_block(ec, iter->bi_sector);
72*d3c7b35cSHeinz Mauelshagen 	struct dm_buffer *b;
73*d3c7b35cSHeinz Mauelshagen 
74*d3c7b35cSHeinz Mauelshagen 	if (unlikely(!bv->bv_page || !bv_len))
75*d3c7b35cSHeinz Mauelshagen 		return -EIO;
76*d3c7b35cSHeinz Mauelshagen 
77*d3c7b35cSHeinz Mauelshagen 	pa = page_address(bv->bv_page) + bv->bv_offset;
78*d3c7b35cSHeinz Mauelshagen 
79*d3c7b35cSHeinz Mauelshagen 	/* Handle overlapping page <-> blocks */
80*d3c7b35cSHeinz Mauelshagen 	while (bv_len) {
81*d3c7b35cSHeinz Mauelshagen 		cur_len = min(dm_bufio_get_block_size(ec->bufio) - buf_off, bv_len);
82*d3c7b35cSHeinz Mauelshagen 
83*d3c7b35cSHeinz Mauelshagen 		/* Avoid reading for writes in case bio vector's page overwrites block completely. */
84*d3c7b35cSHeinz Mauelshagen 		if (rw == READ || buf_off || bv_len < dm_bufio_get_block_size(ec->bufio))
85*d3c7b35cSHeinz Mauelshagen 			ba = dm_bufio_read(ec->bufio, block, &b);
86*d3c7b35cSHeinz Mauelshagen 		else
87*d3c7b35cSHeinz Mauelshagen 			ba = dm_bufio_new(ec->bufio, block, &b);
88*d3c7b35cSHeinz Mauelshagen 
89*d3c7b35cSHeinz Mauelshagen 		if (unlikely(IS_ERR(ba))) {
90*d3c7b35cSHeinz Mauelshagen 			/*
91*d3c7b35cSHeinz Mauelshagen 			 * Carry on with next buffer, if any, to issue all possible
92*d3c7b35cSHeinz Mauelshagen 			 * data but return error.
93*d3c7b35cSHeinz Mauelshagen 			 */
94*d3c7b35cSHeinz Mauelshagen 			r = PTR_ERR(ba);
95*d3c7b35cSHeinz Mauelshagen 		} else {
96*d3c7b35cSHeinz Mauelshagen 			/* Copy data to/from bio to buffer if read/new was successful above. */
97*d3c7b35cSHeinz Mauelshagen 			ba += buf_off;
98*d3c7b35cSHeinz Mauelshagen 			if (rw == READ) {
99*d3c7b35cSHeinz Mauelshagen 				memcpy(pa, ba, cur_len);
100*d3c7b35cSHeinz Mauelshagen 				flush_dcache_page(bv->bv_page);
101*d3c7b35cSHeinz Mauelshagen 			} else {
102*d3c7b35cSHeinz Mauelshagen 				flush_dcache_page(bv->bv_page);
103*d3c7b35cSHeinz Mauelshagen 				memcpy(ba, pa, cur_len);
104*d3c7b35cSHeinz Mauelshagen 				dm_bufio_mark_partial_buffer_dirty(b, buf_off, buf_off + cur_len);
105*d3c7b35cSHeinz Mauelshagen 			}
106*d3c7b35cSHeinz Mauelshagen 
107*d3c7b35cSHeinz Mauelshagen 			dm_bufio_release(b);
108*d3c7b35cSHeinz Mauelshagen 		}
109*d3c7b35cSHeinz Mauelshagen 
110*d3c7b35cSHeinz Mauelshagen 		pa += cur_len;
111*d3c7b35cSHeinz Mauelshagen 		bv_len -= cur_len;
112*d3c7b35cSHeinz Mauelshagen 		buf_off = 0;
113*d3c7b35cSHeinz Mauelshagen 		block++;
114*d3c7b35cSHeinz Mauelshagen 	}
115*d3c7b35cSHeinz Mauelshagen 
116*d3c7b35cSHeinz Mauelshagen 	return r;
117*d3c7b35cSHeinz Mauelshagen }
118*d3c7b35cSHeinz Mauelshagen 
119*d3c7b35cSHeinz Mauelshagen /* READ/WRITE: iterate bio vector's copying between (partial) pages and bufio blocks. */
120*d3c7b35cSHeinz Mauelshagen static int __ebs_rw_bio(struct ebs_c *ec, int rw, struct bio *bio)
121*d3c7b35cSHeinz Mauelshagen {
122*d3c7b35cSHeinz Mauelshagen 	int r = 0, rr;
123*d3c7b35cSHeinz Mauelshagen 	struct bio_vec bv;
124*d3c7b35cSHeinz Mauelshagen 	struct bvec_iter iter;
125*d3c7b35cSHeinz Mauelshagen 
126*d3c7b35cSHeinz Mauelshagen 	bio_for_each_bvec(bv, bio, iter) {
127*d3c7b35cSHeinz Mauelshagen 		rr = __ebs_rw_bvec(ec, rw, &bv, &iter);
128*d3c7b35cSHeinz Mauelshagen 		if (rr)
129*d3c7b35cSHeinz Mauelshagen 			r = rr;
130*d3c7b35cSHeinz Mauelshagen 	}
131*d3c7b35cSHeinz Mauelshagen 
132*d3c7b35cSHeinz Mauelshagen 	return r;
133*d3c7b35cSHeinz Mauelshagen }
134*d3c7b35cSHeinz Mauelshagen 
135*d3c7b35cSHeinz Mauelshagen /* 'Discard' blocks, i.e. release them from the bufio cache. */
136*d3c7b35cSHeinz Mauelshagen static int __ebs_forget_bio(struct ebs_c *ec, struct bio *bio)
137*d3c7b35cSHeinz Mauelshagen {
138*d3c7b35cSHeinz Mauelshagen 	sector_t blocks, sector = bio->bi_iter.bi_sector;
139*d3c7b35cSHeinz Mauelshagen 
140*d3c7b35cSHeinz Mauelshagen 	blocks = __nr_blocks(ec, bio);
141*d3c7b35cSHeinz Mauelshagen 	for (; blocks--; sector += ec->u_bs)
142*d3c7b35cSHeinz Mauelshagen 		dm_bufio_forget(ec->bufio, __sector_to_block(ec, sector));
143*d3c7b35cSHeinz Mauelshagen 
144*d3c7b35cSHeinz Mauelshagen 	return 0;
145*d3c7b35cSHeinz Mauelshagen }
146*d3c7b35cSHeinz Mauelshagen 
147*d3c7b35cSHeinz Mauelshagen /* Worker funtion to process incoming bios. */
148*d3c7b35cSHeinz Mauelshagen static void __ebs_process_bios(struct work_struct *ws)
149*d3c7b35cSHeinz Mauelshagen {
150*d3c7b35cSHeinz Mauelshagen 	int r;
151*d3c7b35cSHeinz Mauelshagen 	bool write = false;
152*d3c7b35cSHeinz Mauelshagen 	sector_t block1, block2;
153*d3c7b35cSHeinz Mauelshagen 	struct ebs_c *ec = container_of(ws, struct ebs_c, ws);
154*d3c7b35cSHeinz Mauelshagen 	struct bio *bio;
155*d3c7b35cSHeinz Mauelshagen 	struct bio_list bios;
156*d3c7b35cSHeinz Mauelshagen 
157*d3c7b35cSHeinz Mauelshagen 	bio_list_init(&bios);
158*d3c7b35cSHeinz Mauelshagen 
159*d3c7b35cSHeinz Mauelshagen 	spin_lock_irq(&ec->lock);
160*d3c7b35cSHeinz Mauelshagen 	bios = ec->bios_in;
161*d3c7b35cSHeinz Mauelshagen 	bio_list_init(&ec->bios_in);
162*d3c7b35cSHeinz Mauelshagen 	spin_unlock_irq(&ec->lock);
163*d3c7b35cSHeinz Mauelshagen 
164*d3c7b35cSHeinz Mauelshagen 	/* Prefetch all read and any mis-aligned write buffers */
165*d3c7b35cSHeinz Mauelshagen 	bio_list_for_each(bio, &bios) {
166*d3c7b35cSHeinz Mauelshagen 		block1 = __sector_to_block(ec, bio->bi_iter.bi_sector);
167*d3c7b35cSHeinz Mauelshagen 		if (bio_op(bio) == REQ_OP_READ)
168*d3c7b35cSHeinz Mauelshagen 			dm_bufio_prefetch(ec->bufio, block1, __nr_blocks(ec, bio));
169*d3c7b35cSHeinz Mauelshagen 		else if (bio_op(bio) == REQ_OP_WRITE && !(bio->bi_opf & REQ_PREFLUSH)) {
170*d3c7b35cSHeinz Mauelshagen 			block2 = __sector_to_block(ec, bio_end_sector(bio));
171*d3c7b35cSHeinz Mauelshagen 			if (__block_mod(bio->bi_iter.bi_sector, ec->u_bs))
172*d3c7b35cSHeinz Mauelshagen 				dm_bufio_prefetch(ec->bufio, block1, 1);
173*d3c7b35cSHeinz Mauelshagen 			if (__block_mod(bio_end_sector(bio), ec->u_bs) && block2 != block1)
174*d3c7b35cSHeinz Mauelshagen 				dm_bufio_prefetch(ec->bufio, block2, 1);
175*d3c7b35cSHeinz Mauelshagen 		}
176*d3c7b35cSHeinz Mauelshagen 	}
177*d3c7b35cSHeinz Mauelshagen 
178*d3c7b35cSHeinz Mauelshagen 	bio_list_for_each(bio, &bios) {
179*d3c7b35cSHeinz Mauelshagen 		r = -EIO;
180*d3c7b35cSHeinz Mauelshagen 		if (bio_op(bio) == REQ_OP_READ)
181*d3c7b35cSHeinz Mauelshagen 			r = __ebs_rw_bio(ec, READ, bio);
182*d3c7b35cSHeinz Mauelshagen 		else if (bio_op(bio) == REQ_OP_WRITE) {
183*d3c7b35cSHeinz Mauelshagen 			write = true;
184*d3c7b35cSHeinz Mauelshagen 			r = __ebs_rw_bio(ec, WRITE, bio);
185*d3c7b35cSHeinz Mauelshagen 		} else if (bio_op(bio) == REQ_OP_DISCARD) {
186*d3c7b35cSHeinz Mauelshagen 			/* FIXME: (optionally) call dm_bufio_discard_buffers() once upstream. */
187*d3c7b35cSHeinz Mauelshagen 			r = __ebs_forget_bio(ec, bio);
188*d3c7b35cSHeinz Mauelshagen 		}
189*d3c7b35cSHeinz Mauelshagen 
190*d3c7b35cSHeinz Mauelshagen 		if (r < 0)
191*d3c7b35cSHeinz Mauelshagen 			bio->bi_status = errno_to_blk_status(r);
192*d3c7b35cSHeinz Mauelshagen 	}
193*d3c7b35cSHeinz Mauelshagen 
194*d3c7b35cSHeinz Mauelshagen 	/*
195*d3c7b35cSHeinz Mauelshagen 	 * We write dirty buffers after processing I/O on them
196*d3c7b35cSHeinz Mauelshagen 	 * but before we endio thus addressing REQ_FUA/REQ_SYNC.
197*d3c7b35cSHeinz Mauelshagen 	 */
198*d3c7b35cSHeinz Mauelshagen 	r = write ? dm_bufio_write_dirty_buffers(ec->bufio) : 0;
199*d3c7b35cSHeinz Mauelshagen 
200*d3c7b35cSHeinz Mauelshagen 	while ((bio = bio_list_pop(&bios))) {
201*d3c7b35cSHeinz Mauelshagen 		/* Any other request is endioed. */
202*d3c7b35cSHeinz Mauelshagen 		if (unlikely(r && bio_op(bio) == REQ_OP_WRITE))
203*d3c7b35cSHeinz Mauelshagen 			bio_io_error(bio);
204*d3c7b35cSHeinz Mauelshagen 		else
205*d3c7b35cSHeinz Mauelshagen 			bio_endio(bio);
206*d3c7b35cSHeinz Mauelshagen 	}
207*d3c7b35cSHeinz Mauelshagen }
208*d3c7b35cSHeinz Mauelshagen 
209*d3c7b35cSHeinz Mauelshagen /*
210*d3c7b35cSHeinz Mauelshagen  * Construct an emulated block size mapping: <dev_path> <offset> <ebs> [<ubs>]
211*d3c7b35cSHeinz Mauelshagen  *
212*d3c7b35cSHeinz Mauelshagen  * <dev_path>: path of the underlying device
213*d3c7b35cSHeinz Mauelshagen  * <offset>: offset in 512 bytes sectors into <dev_path>
214*d3c7b35cSHeinz Mauelshagen  * <ebs>: emulated block size in units of 512 bytes exposed to the upper layer
215*d3c7b35cSHeinz Mauelshagen  * [<ubs>]: underlying block size in units of 512 bytes imposed on the lower layer;
216*d3c7b35cSHeinz Mauelshagen  * 	    optional, if not supplied, retrieve logical block size from underlying device
217*d3c7b35cSHeinz Mauelshagen  */
218*d3c7b35cSHeinz Mauelshagen static int ebs_ctr(struct dm_target *ti, unsigned int argc, char **argv)
219*d3c7b35cSHeinz Mauelshagen {
220*d3c7b35cSHeinz Mauelshagen 	int r;
221*d3c7b35cSHeinz Mauelshagen 	unsigned short tmp1;
222*d3c7b35cSHeinz Mauelshagen 	unsigned long long tmp;
223*d3c7b35cSHeinz Mauelshagen 	char dummy;
224*d3c7b35cSHeinz Mauelshagen 	struct ebs_c *ec;
225*d3c7b35cSHeinz Mauelshagen 
226*d3c7b35cSHeinz Mauelshagen 	if (argc < 3 || argc > 4) {
227*d3c7b35cSHeinz Mauelshagen 		ti->error = "Invalid argument count";
228*d3c7b35cSHeinz Mauelshagen 		return -EINVAL;
229*d3c7b35cSHeinz Mauelshagen 	}
230*d3c7b35cSHeinz Mauelshagen 
231*d3c7b35cSHeinz Mauelshagen 	ec = ti->private = kzalloc(sizeof(*ec), GFP_KERNEL);
232*d3c7b35cSHeinz Mauelshagen 	if (!ec) {
233*d3c7b35cSHeinz Mauelshagen 		ti->error = "Cannot allocate ebs context";
234*d3c7b35cSHeinz Mauelshagen 		return -ENOMEM;
235*d3c7b35cSHeinz Mauelshagen 	}
236*d3c7b35cSHeinz Mauelshagen 
237*d3c7b35cSHeinz Mauelshagen 	r = -EINVAL;
238*d3c7b35cSHeinz Mauelshagen 	if (sscanf(argv[1], "%llu%c", &tmp, &dummy) != 1 ||
239*d3c7b35cSHeinz Mauelshagen 	    tmp != (sector_t)tmp ||
240*d3c7b35cSHeinz Mauelshagen 	    (sector_t)tmp >= ti->len) {
241*d3c7b35cSHeinz Mauelshagen 		ti->error = "Invalid device offset sector";
242*d3c7b35cSHeinz Mauelshagen 		goto bad;
243*d3c7b35cSHeinz Mauelshagen 	}
244*d3c7b35cSHeinz Mauelshagen 	ec->start = tmp;
245*d3c7b35cSHeinz Mauelshagen 
246*d3c7b35cSHeinz Mauelshagen 	if (sscanf(argv[2], "%hu%c", &tmp1, &dummy) != 1 ||
247*d3c7b35cSHeinz Mauelshagen 	    !__ebs_check_bs(tmp1) ||
248*d3c7b35cSHeinz Mauelshagen 	    to_bytes(tmp1) > PAGE_SIZE) {
249*d3c7b35cSHeinz Mauelshagen 		ti->error = "Invalid emulated block size";
250*d3c7b35cSHeinz Mauelshagen 		goto bad;
251*d3c7b35cSHeinz Mauelshagen 	}
252*d3c7b35cSHeinz Mauelshagen 	ec->e_bs = tmp1;
253*d3c7b35cSHeinz Mauelshagen 
254*d3c7b35cSHeinz Mauelshagen 	if (argc > 3) {
255*d3c7b35cSHeinz Mauelshagen 		if (sscanf(argv[3], "%hu%c", &tmp1, &dummy) != 1 || !__ebs_check_bs(tmp1)) {
256*d3c7b35cSHeinz Mauelshagen 			ti->error = "Invalid underlying block size";
257*d3c7b35cSHeinz Mauelshagen 			goto bad;
258*d3c7b35cSHeinz Mauelshagen 		}
259*d3c7b35cSHeinz Mauelshagen 		ec->u_bs = tmp1;
260*d3c7b35cSHeinz Mauelshagen 		ec->u_bs_set = true;
261*d3c7b35cSHeinz Mauelshagen 	} else
262*d3c7b35cSHeinz Mauelshagen 		ec->u_bs_set = false;
263*d3c7b35cSHeinz Mauelshagen 
264*d3c7b35cSHeinz Mauelshagen 	r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &ec->dev);
265*d3c7b35cSHeinz Mauelshagen 	if (r) {
266*d3c7b35cSHeinz Mauelshagen 		ti->error = "Device lookup failed";
267*d3c7b35cSHeinz Mauelshagen 		ec->dev = NULL;
268*d3c7b35cSHeinz Mauelshagen 		goto bad;
269*d3c7b35cSHeinz Mauelshagen 	}
270*d3c7b35cSHeinz Mauelshagen 
271*d3c7b35cSHeinz Mauelshagen 	r = -EINVAL;
272*d3c7b35cSHeinz Mauelshagen 	if (!ec->u_bs_set) {
273*d3c7b35cSHeinz Mauelshagen 		ec->u_bs = to_sector(bdev_logical_block_size(ec->dev->bdev));
274*d3c7b35cSHeinz Mauelshagen 		if (!__ebs_check_bs(ec->u_bs)) {
275*d3c7b35cSHeinz Mauelshagen 			ti->error = "Invalid retrieved underlying block size";
276*d3c7b35cSHeinz Mauelshagen 			goto bad;
277*d3c7b35cSHeinz Mauelshagen 		}
278*d3c7b35cSHeinz Mauelshagen 	}
279*d3c7b35cSHeinz Mauelshagen 
280*d3c7b35cSHeinz Mauelshagen 	if (!ec->u_bs_set && ec->e_bs == ec->u_bs)
281*d3c7b35cSHeinz Mauelshagen 		DMINFO("Emulation superfluous: emulated equal to underlying block size");
282*d3c7b35cSHeinz Mauelshagen 
283*d3c7b35cSHeinz Mauelshagen 	if (__block_mod(ec->start, ec->u_bs)) {
284*d3c7b35cSHeinz Mauelshagen 		ti->error = "Device offset must be multiple of underlying block size";
285*d3c7b35cSHeinz Mauelshagen 		goto bad;
286*d3c7b35cSHeinz Mauelshagen 	}
287*d3c7b35cSHeinz Mauelshagen 
288*d3c7b35cSHeinz Mauelshagen 	ec->bufio = dm_bufio_client_create(ec->dev->bdev, to_bytes(ec->u_bs), 1, 0, NULL, NULL);
289*d3c7b35cSHeinz Mauelshagen 	if (IS_ERR(ec->bufio)) {
290*d3c7b35cSHeinz Mauelshagen 		ti->error = "Cannot create dm bufio client";
291*d3c7b35cSHeinz Mauelshagen 		r = PTR_ERR(ec->bufio);
292*d3c7b35cSHeinz Mauelshagen 		ec->bufio = NULL;
293*d3c7b35cSHeinz Mauelshagen 		goto bad;
294*d3c7b35cSHeinz Mauelshagen 	}
295*d3c7b35cSHeinz Mauelshagen 
296*d3c7b35cSHeinz Mauelshagen 	ec->wq = alloc_ordered_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM);
297*d3c7b35cSHeinz Mauelshagen 	if (!ec->wq) {
298*d3c7b35cSHeinz Mauelshagen 		ti->error = "Cannot create dm-" DM_MSG_PREFIX " workqueue";
299*d3c7b35cSHeinz Mauelshagen 		r = -ENOMEM;
300*d3c7b35cSHeinz Mauelshagen 		goto bad;
301*d3c7b35cSHeinz Mauelshagen 	}
302*d3c7b35cSHeinz Mauelshagen 
303*d3c7b35cSHeinz Mauelshagen 	ec->block_shift = __ffs(ec->u_bs);
304*d3c7b35cSHeinz Mauelshagen 	INIT_WORK(&ec->ws, &__ebs_process_bios);
305*d3c7b35cSHeinz Mauelshagen 	bio_list_init(&ec->bios_in);
306*d3c7b35cSHeinz Mauelshagen 	spin_lock_init(&ec->lock);
307*d3c7b35cSHeinz Mauelshagen 
308*d3c7b35cSHeinz Mauelshagen 	ti->num_flush_bios = 1;
309*d3c7b35cSHeinz Mauelshagen 	ti->num_discard_bios = 1;
310*d3c7b35cSHeinz Mauelshagen 	ti->num_secure_erase_bios = 0;
311*d3c7b35cSHeinz Mauelshagen 	ti->num_write_same_bios = 0;
312*d3c7b35cSHeinz Mauelshagen 	ti->num_write_zeroes_bios = 0;
313*d3c7b35cSHeinz Mauelshagen 	return 0;
314*d3c7b35cSHeinz Mauelshagen bad:
315*d3c7b35cSHeinz Mauelshagen 	ebs_dtr(ti);
316*d3c7b35cSHeinz Mauelshagen 	return r;
317*d3c7b35cSHeinz Mauelshagen }
318*d3c7b35cSHeinz Mauelshagen 
319*d3c7b35cSHeinz Mauelshagen static void ebs_dtr(struct dm_target *ti)
320*d3c7b35cSHeinz Mauelshagen {
321*d3c7b35cSHeinz Mauelshagen 	struct ebs_c *ec = ti->private;
322*d3c7b35cSHeinz Mauelshagen 
323*d3c7b35cSHeinz Mauelshagen 	if (ec->wq)
324*d3c7b35cSHeinz Mauelshagen 		destroy_workqueue(ec->wq);
325*d3c7b35cSHeinz Mauelshagen 	if (ec->bufio)
326*d3c7b35cSHeinz Mauelshagen 		dm_bufio_client_destroy(ec->bufio);
327*d3c7b35cSHeinz Mauelshagen 	if (ec->dev)
328*d3c7b35cSHeinz Mauelshagen 		dm_put_device(ti, ec->dev);
329*d3c7b35cSHeinz Mauelshagen 	kfree(ec);
330*d3c7b35cSHeinz Mauelshagen }
331*d3c7b35cSHeinz Mauelshagen 
332*d3c7b35cSHeinz Mauelshagen static int ebs_map(struct dm_target *ti, struct bio *bio)
333*d3c7b35cSHeinz Mauelshagen {
334*d3c7b35cSHeinz Mauelshagen 	struct ebs_c *ec = ti->private;
335*d3c7b35cSHeinz Mauelshagen 
336*d3c7b35cSHeinz Mauelshagen 	bio_set_dev(bio, ec->dev->bdev);
337*d3c7b35cSHeinz Mauelshagen 	bio->bi_iter.bi_sector = ec->start + dm_target_offset(ti, bio->bi_iter.bi_sector);
338*d3c7b35cSHeinz Mauelshagen 
339*d3c7b35cSHeinz Mauelshagen 	if (unlikely(bio->bi_opf & REQ_OP_FLUSH))
340*d3c7b35cSHeinz Mauelshagen 		return DM_MAPIO_REMAPPED;
341*d3c7b35cSHeinz Mauelshagen 	/*
342*d3c7b35cSHeinz Mauelshagen 	 * Only queue for bufio processing in case of partial or overlapping buffers
343*d3c7b35cSHeinz Mauelshagen 	 * -or-
344*d3c7b35cSHeinz Mauelshagen 	 * emulation with ebs == ubs aiming for tests of dm-bufio overhead.
345*d3c7b35cSHeinz Mauelshagen 	 */
346*d3c7b35cSHeinz Mauelshagen 	if (likely(__block_mod(bio->bi_iter.bi_sector, ec->u_bs) ||
347*d3c7b35cSHeinz Mauelshagen 		   __block_mod(bio_end_sector(bio), ec->u_bs) ||
348*d3c7b35cSHeinz Mauelshagen 		   ec->e_bs == ec->u_bs)) {
349*d3c7b35cSHeinz Mauelshagen 		spin_lock_irq(&ec->lock);
350*d3c7b35cSHeinz Mauelshagen 		bio_list_add(&ec->bios_in, bio);
351*d3c7b35cSHeinz Mauelshagen 		spin_unlock_irq(&ec->lock);
352*d3c7b35cSHeinz Mauelshagen 
353*d3c7b35cSHeinz Mauelshagen 		queue_work(ec->wq, &ec->ws);
354*d3c7b35cSHeinz Mauelshagen 
355*d3c7b35cSHeinz Mauelshagen 		return DM_MAPIO_SUBMITTED;
356*d3c7b35cSHeinz Mauelshagen 	}
357*d3c7b35cSHeinz Mauelshagen 
358*d3c7b35cSHeinz Mauelshagen 	/* Forget any buffer content relative to this direct backing device I/O. */
359*d3c7b35cSHeinz Mauelshagen 	__ebs_forget_bio(ec, bio);
360*d3c7b35cSHeinz Mauelshagen 
361*d3c7b35cSHeinz Mauelshagen 	return DM_MAPIO_REMAPPED;
362*d3c7b35cSHeinz Mauelshagen }
363*d3c7b35cSHeinz Mauelshagen 
364*d3c7b35cSHeinz Mauelshagen static void ebs_status(struct dm_target *ti, status_type_t type,
365*d3c7b35cSHeinz Mauelshagen 		       unsigned status_flags, char *result, unsigned maxlen)
366*d3c7b35cSHeinz Mauelshagen {
367*d3c7b35cSHeinz Mauelshagen 	struct ebs_c *ec = ti->private;
368*d3c7b35cSHeinz Mauelshagen 
369*d3c7b35cSHeinz Mauelshagen 	switch (type) {
370*d3c7b35cSHeinz Mauelshagen 	case STATUSTYPE_INFO:
371*d3c7b35cSHeinz Mauelshagen 		*result = '\0';
372*d3c7b35cSHeinz Mauelshagen 		break;
373*d3c7b35cSHeinz Mauelshagen 	case STATUSTYPE_TABLE:
374*d3c7b35cSHeinz Mauelshagen 		snprintf(result, maxlen, ec->u_bs_set ? "%s %llu %u %u" : "%s %llu %u",
375*d3c7b35cSHeinz Mauelshagen 			 ec->dev->name, (unsigned long long) ec->start, ec->e_bs, ec->u_bs);
376*d3c7b35cSHeinz Mauelshagen 		break;
377*d3c7b35cSHeinz Mauelshagen 	}
378*d3c7b35cSHeinz Mauelshagen }
379*d3c7b35cSHeinz Mauelshagen 
380*d3c7b35cSHeinz Mauelshagen static int ebs_prepare_ioctl(struct dm_target *ti, struct block_device **bdev)
381*d3c7b35cSHeinz Mauelshagen {
382*d3c7b35cSHeinz Mauelshagen 	struct ebs_c *ec = ti->private;
383*d3c7b35cSHeinz Mauelshagen 	struct dm_dev *dev = ec->dev;
384*d3c7b35cSHeinz Mauelshagen 
385*d3c7b35cSHeinz Mauelshagen 	/*
386*d3c7b35cSHeinz Mauelshagen 	 * Only pass ioctls through if the device sizes match exactly.
387*d3c7b35cSHeinz Mauelshagen 	 */
388*d3c7b35cSHeinz Mauelshagen 	*bdev = dev->bdev;
389*d3c7b35cSHeinz Mauelshagen 	return !!(ec->start || ti->len != i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT);
390*d3c7b35cSHeinz Mauelshagen }
391*d3c7b35cSHeinz Mauelshagen 
392*d3c7b35cSHeinz Mauelshagen static void ebs_io_hints(struct dm_target *ti, struct queue_limits *limits)
393*d3c7b35cSHeinz Mauelshagen {
394*d3c7b35cSHeinz Mauelshagen 	struct ebs_c *ec = ti->private;
395*d3c7b35cSHeinz Mauelshagen 
396*d3c7b35cSHeinz Mauelshagen 	limits->logical_block_size = to_bytes(ec->e_bs);
397*d3c7b35cSHeinz Mauelshagen 	limits->physical_block_size = to_bytes(ec->u_bs);
398*d3c7b35cSHeinz Mauelshagen 	limits->alignment_offset = limits->physical_block_size;
399*d3c7b35cSHeinz Mauelshagen 	blk_limits_io_min(limits, limits->logical_block_size);
400*d3c7b35cSHeinz Mauelshagen }
401*d3c7b35cSHeinz Mauelshagen 
402*d3c7b35cSHeinz Mauelshagen static int ebs_iterate_devices(struct dm_target *ti,
403*d3c7b35cSHeinz Mauelshagen 				  iterate_devices_callout_fn fn, void *data)
404*d3c7b35cSHeinz Mauelshagen {
405*d3c7b35cSHeinz Mauelshagen 	struct ebs_c *ec = ti->private;
406*d3c7b35cSHeinz Mauelshagen 
407*d3c7b35cSHeinz Mauelshagen 	return fn(ti, ec->dev, ec->start, ti->len, data);
408*d3c7b35cSHeinz Mauelshagen }
409*d3c7b35cSHeinz Mauelshagen 
410*d3c7b35cSHeinz Mauelshagen static struct target_type ebs_target = {
411*d3c7b35cSHeinz Mauelshagen 	.name		 = "ebs",
412*d3c7b35cSHeinz Mauelshagen 	.version	 = {1, 0, 0},
413*d3c7b35cSHeinz Mauelshagen 	.features	 = DM_TARGET_PASSES_INTEGRITY,
414*d3c7b35cSHeinz Mauelshagen 	.module		 = THIS_MODULE,
415*d3c7b35cSHeinz Mauelshagen 	.ctr		 = ebs_ctr,
416*d3c7b35cSHeinz Mauelshagen 	.dtr		 = ebs_dtr,
417*d3c7b35cSHeinz Mauelshagen 	.map		 = ebs_map,
418*d3c7b35cSHeinz Mauelshagen 	.status		 = ebs_status,
419*d3c7b35cSHeinz Mauelshagen 	.io_hints	 = ebs_io_hints,
420*d3c7b35cSHeinz Mauelshagen 	.prepare_ioctl	 = ebs_prepare_ioctl,
421*d3c7b35cSHeinz Mauelshagen 	.iterate_devices = ebs_iterate_devices,
422*d3c7b35cSHeinz Mauelshagen };
423*d3c7b35cSHeinz Mauelshagen 
424*d3c7b35cSHeinz Mauelshagen static int __init dm_ebs_init(void)
425*d3c7b35cSHeinz Mauelshagen {
426*d3c7b35cSHeinz Mauelshagen 	int r = dm_register_target(&ebs_target);
427*d3c7b35cSHeinz Mauelshagen 
428*d3c7b35cSHeinz Mauelshagen 	if (r < 0)
429*d3c7b35cSHeinz Mauelshagen 		DMERR("register failed %d", r);
430*d3c7b35cSHeinz Mauelshagen 
431*d3c7b35cSHeinz Mauelshagen 	return r;
432*d3c7b35cSHeinz Mauelshagen }
433*d3c7b35cSHeinz Mauelshagen 
434*d3c7b35cSHeinz Mauelshagen static void dm_ebs_exit(void)
435*d3c7b35cSHeinz Mauelshagen {
436*d3c7b35cSHeinz Mauelshagen 	dm_unregister_target(&ebs_target);
437*d3c7b35cSHeinz Mauelshagen }
438*d3c7b35cSHeinz Mauelshagen 
439*d3c7b35cSHeinz Mauelshagen module_init(dm_ebs_init);
440*d3c7b35cSHeinz Mauelshagen module_exit(dm_ebs_exit);
441*d3c7b35cSHeinz Mauelshagen 
442*d3c7b35cSHeinz Mauelshagen MODULE_AUTHOR("Heinz Mauelshagen <dm-devel@redhat.com>");
443*d3c7b35cSHeinz Mauelshagen MODULE_DESCRIPTION(DM_NAME " emulated block size target");
444*d3c7b35cSHeinz Mauelshagen MODULE_LICENSE("GPL");
445