1*d3c7b35cSHeinz Mauelshagen /* 2*d3c7b35cSHeinz Mauelshagen * Copyright (C) 2020 Red Hat GmbH 3*d3c7b35cSHeinz Mauelshagen * 4*d3c7b35cSHeinz Mauelshagen * This file is released under the GPL. 5*d3c7b35cSHeinz Mauelshagen * 6*d3c7b35cSHeinz Mauelshagen * Device-mapper target to emulate smaller logical block 7*d3c7b35cSHeinz Mauelshagen * size on backing devices exposing (natively) larger ones. 8*d3c7b35cSHeinz Mauelshagen * 9*d3c7b35cSHeinz Mauelshagen * E.g. 512 byte sector emulation on 4K native disks. 10*d3c7b35cSHeinz Mauelshagen */ 11*d3c7b35cSHeinz Mauelshagen 12*d3c7b35cSHeinz Mauelshagen #include "dm.h" 13*d3c7b35cSHeinz Mauelshagen #include <linux/module.h> 14*d3c7b35cSHeinz Mauelshagen #include <linux/workqueue.h> 15*d3c7b35cSHeinz Mauelshagen #include <linux/dm-bufio.h> 16*d3c7b35cSHeinz Mauelshagen 17*d3c7b35cSHeinz Mauelshagen #define DM_MSG_PREFIX "ebs" 18*d3c7b35cSHeinz Mauelshagen 19*d3c7b35cSHeinz Mauelshagen static void ebs_dtr(struct dm_target *ti); 20*d3c7b35cSHeinz Mauelshagen 21*d3c7b35cSHeinz Mauelshagen /* Emulated block size context. */ 22*d3c7b35cSHeinz Mauelshagen struct ebs_c { 23*d3c7b35cSHeinz Mauelshagen struct dm_dev *dev; /* Underlying device to emulate block size on. */ 24*d3c7b35cSHeinz Mauelshagen struct dm_bufio_client *bufio; /* Use dm-bufio for read and read-modify-write processing. */ 25*d3c7b35cSHeinz Mauelshagen struct workqueue_struct *wq; /* Workqueue for ^ processing of bios. */ 26*d3c7b35cSHeinz Mauelshagen struct work_struct ws; /* Work item used for ^. */ 27*d3c7b35cSHeinz Mauelshagen struct bio_list bios_in; /* Worker bios input list. */ 28*d3c7b35cSHeinz Mauelshagen spinlock_t lock; /* Guard bios input list above. */ 29*d3c7b35cSHeinz Mauelshagen sector_t start; /* <start> table line argument, see ebs_ctr below. */ 30*d3c7b35cSHeinz Mauelshagen unsigned int e_bs; /* Emulated block size in sectors exposed to upper layer. */ 31*d3c7b35cSHeinz Mauelshagen unsigned int u_bs; /* Underlying block size in sectors retrievd from/set on lower layer device. */ 32*d3c7b35cSHeinz Mauelshagen unsigned char block_shift; /* bitshift sectors -> blocks used in dm-bufio API. */ 33*d3c7b35cSHeinz Mauelshagen bool u_bs_set:1; /* Flag to indicate underlying block size is set on table line. */ 34*d3c7b35cSHeinz Mauelshagen }; 35*d3c7b35cSHeinz Mauelshagen 36*d3c7b35cSHeinz Mauelshagen static inline sector_t __sector_to_block(struct ebs_c *ec, sector_t sector) 37*d3c7b35cSHeinz Mauelshagen { 38*d3c7b35cSHeinz Mauelshagen return sector >> ec->block_shift; 39*d3c7b35cSHeinz Mauelshagen } 40*d3c7b35cSHeinz Mauelshagen 41*d3c7b35cSHeinz Mauelshagen static inline sector_t __block_mod(sector_t sector, unsigned int bs) 42*d3c7b35cSHeinz Mauelshagen { 43*d3c7b35cSHeinz Mauelshagen return sector & (bs - 1); 44*d3c7b35cSHeinz Mauelshagen } 45*d3c7b35cSHeinz Mauelshagen 46*d3c7b35cSHeinz Mauelshagen /* Return number of blocks for a bio, accounting for misalignement of start and end sectors. */ 47*d3c7b35cSHeinz Mauelshagen static inline unsigned int __nr_blocks(struct ebs_c *ec, struct bio *bio) 48*d3c7b35cSHeinz Mauelshagen { 49*d3c7b35cSHeinz Mauelshagen sector_t end_sector = __block_mod(bio->bi_iter.bi_sector, ec->u_bs) + bio_sectors(bio); 50*d3c7b35cSHeinz Mauelshagen 51*d3c7b35cSHeinz Mauelshagen return __sector_to_block(ec, end_sector) + (__block_mod(end_sector, ec->u_bs) ? 1 : 0); 52*d3c7b35cSHeinz Mauelshagen } 53*d3c7b35cSHeinz Mauelshagen 54*d3c7b35cSHeinz Mauelshagen static inline bool __ebs_check_bs(unsigned int bs) 55*d3c7b35cSHeinz Mauelshagen { 56*d3c7b35cSHeinz Mauelshagen return bs && is_power_of_2(bs); 57*d3c7b35cSHeinz Mauelshagen } 58*d3c7b35cSHeinz Mauelshagen 59*d3c7b35cSHeinz Mauelshagen /* 60*d3c7b35cSHeinz Mauelshagen * READ/WRITE: 61*d3c7b35cSHeinz Mauelshagen * 62*d3c7b35cSHeinz Mauelshagen * copy blocks between bufio blocks and bio vector's (partial/overlapping) pages. 63*d3c7b35cSHeinz Mauelshagen */ 64*d3c7b35cSHeinz Mauelshagen static int __ebs_rw_bvec(struct ebs_c *ec, int rw, struct bio_vec *bv, struct bvec_iter *iter) 65*d3c7b35cSHeinz Mauelshagen { 66*d3c7b35cSHeinz Mauelshagen int r = 0; 67*d3c7b35cSHeinz Mauelshagen unsigned char *ba, *pa; 68*d3c7b35cSHeinz Mauelshagen unsigned int cur_len; 69*d3c7b35cSHeinz Mauelshagen unsigned int bv_len = bv->bv_len; 70*d3c7b35cSHeinz Mauelshagen unsigned int buf_off = to_bytes(__block_mod(iter->bi_sector, ec->u_bs)); 71*d3c7b35cSHeinz Mauelshagen sector_t block = __sector_to_block(ec, iter->bi_sector); 72*d3c7b35cSHeinz Mauelshagen struct dm_buffer *b; 73*d3c7b35cSHeinz Mauelshagen 74*d3c7b35cSHeinz Mauelshagen if (unlikely(!bv->bv_page || !bv_len)) 75*d3c7b35cSHeinz Mauelshagen return -EIO; 76*d3c7b35cSHeinz Mauelshagen 77*d3c7b35cSHeinz Mauelshagen pa = page_address(bv->bv_page) + bv->bv_offset; 78*d3c7b35cSHeinz Mauelshagen 79*d3c7b35cSHeinz Mauelshagen /* Handle overlapping page <-> blocks */ 80*d3c7b35cSHeinz Mauelshagen while (bv_len) { 81*d3c7b35cSHeinz Mauelshagen cur_len = min(dm_bufio_get_block_size(ec->bufio) - buf_off, bv_len); 82*d3c7b35cSHeinz Mauelshagen 83*d3c7b35cSHeinz Mauelshagen /* Avoid reading for writes in case bio vector's page overwrites block completely. */ 84*d3c7b35cSHeinz Mauelshagen if (rw == READ || buf_off || bv_len < dm_bufio_get_block_size(ec->bufio)) 85*d3c7b35cSHeinz Mauelshagen ba = dm_bufio_read(ec->bufio, block, &b); 86*d3c7b35cSHeinz Mauelshagen else 87*d3c7b35cSHeinz Mauelshagen ba = dm_bufio_new(ec->bufio, block, &b); 88*d3c7b35cSHeinz Mauelshagen 89*d3c7b35cSHeinz Mauelshagen if (unlikely(IS_ERR(ba))) { 90*d3c7b35cSHeinz Mauelshagen /* 91*d3c7b35cSHeinz Mauelshagen * Carry on with next buffer, if any, to issue all possible 92*d3c7b35cSHeinz Mauelshagen * data but return error. 93*d3c7b35cSHeinz Mauelshagen */ 94*d3c7b35cSHeinz Mauelshagen r = PTR_ERR(ba); 95*d3c7b35cSHeinz Mauelshagen } else { 96*d3c7b35cSHeinz Mauelshagen /* Copy data to/from bio to buffer if read/new was successful above. */ 97*d3c7b35cSHeinz Mauelshagen ba += buf_off; 98*d3c7b35cSHeinz Mauelshagen if (rw == READ) { 99*d3c7b35cSHeinz Mauelshagen memcpy(pa, ba, cur_len); 100*d3c7b35cSHeinz Mauelshagen flush_dcache_page(bv->bv_page); 101*d3c7b35cSHeinz Mauelshagen } else { 102*d3c7b35cSHeinz Mauelshagen flush_dcache_page(bv->bv_page); 103*d3c7b35cSHeinz Mauelshagen memcpy(ba, pa, cur_len); 104*d3c7b35cSHeinz Mauelshagen dm_bufio_mark_partial_buffer_dirty(b, buf_off, buf_off + cur_len); 105*d3c7b35cSHeinz Mauelshagen } 106*d3c7b35cSHeinz Mauelshagen 107*d3c7b35cSHeinz Mauelshagen dm_bufio_release(b); 108*d3c7b35cSHeinz Mauelshagen } 109*d3c7b35cSHeinz Mauelshagen 110*d3c7b35cSHeinz Mauelshagen pa += cur_len; 111*d3c7b35cSHeinz Mauelshagen bv_len -= cur_len; 112*d3c7b35cSHeinz Mauelshagen buf_off = 0; 113*d3c7b35cSHeinz Mauelshagen block++; 114*d3c7b35cSHeinz Mauelshagen } 115*d3c7b35cSHeinz Mauelshagen 116*d3c7b35cSHeinz Mauelshagen return r; 117*d3c7b35cSHeinz Mauelshagen } 118*d3c7b35cSHeinz Mauelshagen 119*d3c7b35cSHeinz Mauelshagen /* READ/WRITE: iterate bio vector's copying between (partial) pages and bufio blocks. */ 120*d3c7b35cSHeinz Mauelshagen static int __ebs_rw_bio(struct ebs_c *ec, int rw, struct bio *bio) 121*d3c7b35cSHeinz Mauelshagen { 122*d3c7b35cSHeinz Mauelshagen int r = 0, rr; 123*d3c7b35cSHeinz Mauelshagen struct bio_vec bv; 124*d3c7b35cSHeinz Mauelshagen struct bvec_iter iter; 125*d3c7b35cSHeinz Mauelshagen 126*d3c7b35cSHeinz Mauelshagen bio_for_each_bvec(bv, bio, iter) { 127*d3c7b35cSHeinz Mauelshagen rr = __ebs_rw_bvec(ec, rw, &bv, &iter); 128*d3c7b35cSHeinz Mauelshagen if (rr) 129*d3c7b35cSHeinz Mauelshagen r = rr; 130*d3c7b35cSHeinz Mauelshagen } 131*d3c7b35cSHeinz Mauelshagen 132*d3c7b35cSHeinz Mauelshagen return r; 133*d3c7b35cSHeinz Mauelshagen } 134*d3c7b35cSHeinz Mauelshagen 135*d3c7b35cSHeinz Mauelshagen /* 'Discard' blocks, i.e. release them from the bufio cache. */ 136*d3c7b35cSHeinz Mauelshagen static int __ebs_forget_bio(struct ebs_c *ec, struct bio *bio) 137*d3c7b35cSHeinz Mauelshagen { 138*d3c7b35cSHeinz Mauelshagen sector_t blocks, sector = bio->bi_iter.bi_sector; 139*d3c7b35cSHeinz Mauelshagen 140*d3c7b35cSHeinz Mauelshagen blocks = __nr_blocks(ec, bio); 141*d3c7b35cSHeinz Mauelshagen for (; blocks--; sector += ec->u_bs) 142*d3c7b35cSHeinz Mauelshagen dm_bufio_forget(ec->bufio, __sector_to_block(ec, sector)); 143*d3c7b35cSHeinz Mauelshagen 144*d3c7b35cSHeinz Mauelshagen return 0; 145*d3c7b35cSHeinz Mauelshagen } 146*d3c7b35cSHeinz Mauelshagen 147*d3c7b35cSHeinz Mauelshagen /* Worker funtion to process incoming bios. */ 148*d3c7b35cSHeinz Mauelshagen static void __ebs_process_bios(struct work_struct *ws) 149*d3c7b35cSHeinz Mauelshagen { 150*d3c7b35cSHeinz Mauelshagen int r; 151*d3c7b35cSHeinz Mauelshagen bool write = false; 152*d3c7b35cSHeinz Mauelshagen sector_t block1, block2; 153*d3c7b35cSHeinz Mauelshagen struct ebs_c *ec = container_of(ws, struct ebs_c, ws); 154*d3c7b35cSHeinz Mauelshagen struct bio *bio; 155*d3c7b35cSHeinz Mauelshagen struct bio_list bios; 156*d3c7b35cSHeinz Mauelshagen 157*d3c7b35cSHeinz Mauelshagen bio_list_init(&bios); 158*d3c7b35cSHeinz Mauelshagen 159*d3c7b35cSHeinz Mauelshagen spin_lock_irq(&ec->lock); 160*d3c7b35cSHeinz Mauelshagen bios = ec->bios_in; 161*d3c7b35cSHeinz Mauelshagen bio_list_init(&ec->bios_in); 162*d3c7b35cSHeinz Mauelshagen spin_unlock_irq(&ec->lock); 163*d3c7b35cSHeinz Mauelshagen 164*d3c7b35cSHeinz Mauelshagen /* Prefetch all read and any mis-aligned write buffers */ 165*d3c7b35cSHeinz Mauelshagen bio_list_for_each(bio, &bios) { 166*d3c7b35cSHeinz Mauelshagen block1 = __sector_to_block(ec, bio->bi_iter.bi_sector); 167*d3c7b35cSHeinz Mauelshagen if (bio_op(bio) == REQ_OP_READ) 168*d3c7b35cSHeinz Mauelshagen dm_bufio_prefetch(ec->bufio, block1, __nr_blocks(ec, bio)); 169*d3c7b35cSHeinz Mauelshagen else if (bio_op(bio) == REQ_OP_WRITE && !(bio->bi_opf & REQ_PREFLUSH)) { 170*d3c7b35cSHeinz Mauelshagen block2 = __sector_to_block(ec, bio_end_sector(bio)); 171*d3c7b35cSHeinz Mauelshagen if (__block_mod(bio->bi_iter.bi_sector, ec->u_bs)) 172*d3c7b35cSHeinz Mauelshagen dm_bufio_prefetch(ec->bufio, block1, 1); 173*d3c7b35cSHeinz Mauelshagen if (__block_mod(bio_end_sector(bio), ec->u_bs) && block2 != block1) 174*d3c7b35cSHeinz Mauelshagen dm_bufio_prefetch(ec->bufio, block2, 1); 175*d3c7b35cSHeinz Mauelshagen } 176*d3c7b35cSHeinz Mauelshagen } 177*d3c7b35cSHeinz Mauelshagen 178*d3c7b35cSHeinz Mauelshagen bio_list_for_each(bio, &bios) { 179*d3c7b35cSHeinz Mauelshagen r = -EIO; 180*d3c7b35cSHeinz Mauelshagen if (bio_op(bio) == REQ_OP_READ) 181*d3c7b35cSHeinz Mauelshagen r = __ebs_rw_bio(ec, READ, bio); 182*d3c7b35cSHeinz Mauelshagen else if (bio_op(bio) == REQ_OP_WRITE) { 183*d3c7b35cSHeinz Mauelshagen write = true; 184*d3c7b35cSHeinz Mauelshagen r = __ebs_rw_bio(ec, WRITE, bio); 185*d3c7b35cSHeinz Mauelshagen } else if (bio_op(bio) == REQ_OP_DISCARD) { 186*d3c7b35cSHeinz Mauelshagen /* FIXME: (optionally) call dm_bufio_discard_buffers() once upstream. */ 187*d3c7b35cSHeinz Mauelshagen r = __ebs_forget_bio(ec, bio); 188*d3c7b35cSHeinz Mauelshagen } 189*d3c7b35cSHeinz Mauelshagen 190*d3c7b35cSHeinz Mauelshagen if (r < 0) 191*d3c7b35cSHeinz Mauelshagen bio->bi_status = errno_to_blk_status(r); 192*d3c7b35cSHeinz Mauelshagen } 193*d3c7b35cSHeinz Mauelshagen 194*d3c7b35cSHeinz Mauelshagen /* 195*d3c7b35cSHeinz Mauelshagen * We write dirty buffers after processing I/O on them 196*d3c7b35cSHeinz Mauelshagen * but before we endio thus addressing REQ_FUA/REQ_SYNC. 197*d3c7b35cSHeinz Mauelshagen */ 198*d3c7b35cSHeinz Mauelshagen r = write ? dm_bufio_write_dirty_buffers(ec->bufio) : 0; 199*d3c7b35cSHeinz Mauelshagen 200*d3c7b35cSHeinz Mauelshagen while ((bio = bio_list_pop(&bios))) { 201*d3c7b35cSHeinz Mauelshagen /* Any other request is endioed. */ 202*d3c7b35cSHeinz Mauelshagen if (unlikely(r && bio_op(bio) == REQ_OP_WRITE)) 203*d3c7b35cSHeinz Mauelshagen bio_io_error(bio); 204*d3c7b35cSHeinz Mauelshagen else 205*d3c7b35cSHeinz Mauelshagen bio_endio(bio); 206*d3c7b35cSHeinz Mauelshagen } 207*d3c7b35cSHeinz Mauelshagen } 208*d3c7b35cSHeinz Mauelshagen 209*d3c7b35cSHeinz Mauelshagen /* 210*d3c7b35cSHeinz Mauelshagen * Construct an emulated block size mapping: <dev_path> <offset> <ebs> [<ubs>] 211*d3c7b35cSHeinz Mauelshagen * 212*d3c7b35cSHeinz Mauelshagen * <dev_path>: path of the underlying device 213*d3c7b35cSHeinz Mauelshagen * <offset>: offset in 512 bytes sectors into <dev_path> 214*d3c7b35cSHeinz Mauelshagen * <ebs>: emulated block size in units of 512 bytes exposed to the upper layer 215*d3c7b35cSHeinz Mauelshagen * [<ubs>]: underlying block size in units of 512 bytes imposed on the lower layer; 216*d3c7b35cSHeinz Mauelshagen * optional, if not supplied, retrieve logical block size from underlying device 217*d3c7b35cSHeinz Mauelshagen */ 218*d3c7b35cSHeinz Mauelshagen static int ebs_ctr(struct dm_target *ti, unsigned int argc, char **argv) 219*d3c7b35cSHeinz Mauelshagen { 220*d3c7b35cSHeinz Mauelshagen int r; 221*d3c7b35cSHeinz Mauelshagen unsigned short tmp1; 222*d3c7b35cSHeinz Mauelshagen unsigned long long tmp; 223*d3c7b35cSHeinz Mauelshagen char dummy; 224*d3c7b35cSHeinz Mauelshagen struct ebs_c *ec; 225*d3c7b35cSHeinz Mauelshagen 226*d3c7b35cSHeinz Mauelshagen if (argc < 3 || argc > 4) { 227*d3c7b35cSHeinz Mauelshagen ti->error = "Invalid argument count"; 228*d3c7b35cSHeinz Mauelshagen return -EINVAL; 229*d3c7b35cSHeinz Mauelshagen } 230*d3c7b35cSHeinz Mauelshagen 231*d3c7b35cSHeinz Mauelshagen ec = ti->private = kzalloc(sizeof(*ec), GFP_KERNEL); 232*d3c7b35cSHeinz Mauelshagen if (!ec) { 233*d3c7b35cSHeinz Mauelshagen ti->error = "Cannot allocate ebs context"; 234*d3c7b35cSHeinz Mauelshagen return -ENOMEM; 235*d3c7b35cSHeinz Mauelshagen } 236*d3c7b35cSHeinz Mauelshagen 237*d3c7b35cSHeinz Mauelshagen r = -EINVAL; 238*d3c7b35cSHeinz Mauelshagen if (sscanf(argv[1], "%llu%c", &tmp, &dummy) != 1 || 239*d3c7b35cSHeinz Mauelshagen tmp != (sector_t)tmp || 240*d3c7b35cSHeinz Mauelshagen (sector_t)tmp >= ti->len) { 241*d3c7b35cSHeinz Mauelshagen ti->error = "Invalid device offset sector"; 242*d3c7b35cSHeinz Mauelshagen goto bad; 243*d3c7b35cSHeinz Mauelshagen } 244*d3c7b35cSHeinz Mauelshagen ec->start = tmp; 245*d3c7b35cSHeinz Mauelshagen 246*d3c7b35cSHeinz Mauelshagen if (sscanf(argv[2], "%hu%c", &tmp1, &dummy) != 1 || 247*d3c7b35cSHeinz Mauelshagen !__ebs_check_bs(tmp1) || 248*d3c7b35cSHeinz Mauelshagen to_bytes(tmp1) > PAGE_SIZE) { 249*d3c7b35cSHeinz Mauelshagen ti->error = "Invalid emulated block size"; 250*d3c7b35cSHeinz Mauelshagen goto bad; 251*d3c7b35cSHeinz Mauelshagen } 252*d3c7b35cSHeinz Mauelshagen ec->e_bs = tmp1; 253*d3c7b35cSHeinz Mauelshagen 254*d3c7b35cSHeinz Mauelshagen if (argc > 3) { 255*d3c7b35cSHeinz Mauelshagen if (sscanf(argv[3], "%hu%c", &tmp1, &dummy) != 1 || !__ebs_check_bs(tmp1)) { 256*d3c7b35cSHeinz Mauelshagen ti->error = "Invalid underlying block size"; 257*d3c7b35cSHeinz Mauelshagen goto bad; 258*d3c7b35cSHeinz Mauelshagen } 259*d3c7b35cSHeinz Mauelshagen ec->u_bs = tmp1; 260*d3c7b35cSHeinz Mauelshagen ec->u_bs_set = true; 261*d3c7b35cSHeinz Mauelshagen } else 262*d3c7b35cSHeinz Mauelshagen ec->u_bs_set = false; 263*d3c7b35cSHeinz Mauelshagen 264*d3c7b35cSHeinz Mauelshagen r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &ec->dev); 265*d3c7b35cSHeinz Mauelshagen if (r) { 266*d3c7b35cSHeinz Mauelshagen ti->error = "Device lookup failed"; 267*d3c7b35cSHeinz Mauelshagen ec->dev = NULL; 268*d3c7b35cSHeinz Mauelshagen goto bad; 269*d3c7b35cSHeinz Mauelshagen } 270*d3c7b35cSHeinz Mauelshagen 271*d3c7b35cSHeinz Mauelshagen r = -EINVAL; 272*d3c7b35cSHeinz Mauelshagen if (!ec->u_bs_set) { 273*d3c7b35cSHeinz Mauelshagen ec->u_bs = to_sector(bdev_logical_block_size(ec->dev->bdev)); 274*d3c7b35cSHeinz Mauelshagen if (!__ebs_check_bs(ec->u_bs)) { 275*d3c7b35cSHeinz Mauelshagen ti->error = "Invalid retrieved underlying block size"; 276*d3c7b35cSHeinz Mauelshagen goto bad; 277*d3c7b35cSHeinz Mauelshagen } 278*d3c7b35cSHeinz Mauelshagen } 279*d3c7b35cSHeinz Mauelshagen 280*d3c7b35cSHeinz Mauelshagen if (!ec->u_bs_set && ec->e_bs == ec->u_bs) 281*d3c7b35cSHeinz Mauelshagen DMINFO("Emulation superfluous: emulated equal to underlying block size"); 282*d3c7b35cSHeinz Mauelshagen 283*d3c7b35cSHeinz Mauelshagen if (__block_mod(ec->start, ec->u_bs)) { 284*d3c7b35cSHeinz Mauelshagen ti->error = "Device offset must be multiple of underlying block size"; 285*d3c7b35cSHeinz Mauelshagen goto bad; 286*d3c7b35cSHeinz Mauelshagen } 287*d3c7b35cSHeinz Mauelshagen 288*d3c7b35cSHeinz Mauelshagen ec->bufio = dm_bufio_client_create(ec->dev->bdev, to_bytes(ec->u_bs), 1, 0, NULL, NULL); 289*d3c7b35cSHeinz Mauelshagen if (IS_ERR(ec->bufio)) { 290*d3c7b35cSHeinz Mauelshagen ti->error = "Cannot create dm bufio client"; 291*d3c7b35cSHeinz Mauelshagen r = PTR_ERR(ec->bufio); 292*d3c7b35cSHeinz Mauelshagen ec->bufio = NULL; 293*d3c7b35cSHeinz Mauelshagen goto bad; 294*d3c7b35cSHeinz Mauelshagen } 295*d3c7b35cSHeinz Mauelshagen 296*d3c7b35cSHeinz Mauelshagen ec->wq = alloc_ordered_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM); 297*d3c7b35cSHeinz Mauelshagen if (!ec->wq) { 298*d3c7b35cSHeinz Mauelshagen ti->error = "Cannot create dm-" DM_MSG_PREFIX " workqueue"; 299*d3c7b35cSHeinz Mauelshagen r = -ENOMEM; 300*d3c7b35cSHeinz Mauelshagen goto bad; 301*d3c7b35cSHeinz Mauelshagen } 302*d3c7b35cSHeinz Mauelshagen 303*d3c7b35cSHeinz Mauelshagen ec->block_shift = __ffs(ec->u_bs); 304*d3c7b35cSHeinz Mauelshagen INIT_WORK(&ec->ws, &__ebs_process_bios); 305*d3c7b35cSHeinz Mauelshagen bio_list_init(&ec->bios_in); 306*d3c7b35cSHeinz Mauelshagen spin_lock_init(&ec->lock); 307*d3c7b35cSHeinz Mauelshagen 308*d3c7b35cSHeinz Mauelshagen ti->num_flush_bios = 1; 309*d3c7b35cSHeinz Mauelshagen ti->num_discard_bios = 1; 310*d3c7b35cSHeinz Mauelshagen ti->num_secure_erase_bios = 0; 311*d3c7b35cSHeinz Mauelshagen ti->num_write_same_bios = 0; 312*d3c7b35cSHeinz Mauelshagen ti->num_write_zeroes_bios = 0; 313*d3c7b35cSHeinz Mauelshagen return 0; 314*d3c7b35cSHeinz Mauelshagen bad: 315*d3c7b35cSHeinz Mauelshagen ebs_dtr(ti); 316*d3c7b35cSHeinz Mauelshagen return r; 317*d3c7b35cSHeinz Mauelshagen } 318*d3c7b35cSHeinz Mauelshagen 319*d3c7b35cSHeinz Mauelshagen static void ebs_dtr(struct dm_target *ti) 320*d3c7b35cSHeinz Mauelshagen { 321*d3c7b35cSHeinz Mauelshagen struct ebs_c *ec = ti->private; 322*d3c7b35cSHeinz Mauelshagen 323*d3c7b35cSHeinz Mauelshagen if (ec->wq) 324*d3c7b35cSHeinz Mauelshagen destroy_workqueue(ec->wq); 325*d3c7b35cSHeinz Mauelshagen if (ec->bufio) 326*d3c7b35cSHeinz Mauelshagen dm_bufio_client_destroy(ec->bufio); 327*d3c7b35cSHeinz Mauelshagen if (ec->dev) 328*d3c7b35cSHeinz Mauelshagen dm_put_device(ti, ec->dev); 329*d3c7b35cSHeinz Mauelshagen kfree(ec); 330*d3c7b35cSHeinz Mauelshagen } 331*d3c7b35cSHeinz Mauelshagen 332*d3c7b35cSHeinz Mauelshagen static int ebs_map(struct dm_target *ti, struct bio *bio) 333*d3c7b35cSHeinz Mauelshagen { 334*d3c7b35cSHeinz Mauelshagen struct ebs_c *ec = ti->private; 335*d3c7b35cSHeinz Mauelshagen 336*d3c7b35cSHeinz Mauelshagen bio_set_dev(bio, ec->dev->bdev); 337*d3c7b35cSHeinz Mauelshagen bio->bi_iter.bi_sector = ec->start + dm_target_offset(ti, bio->bi_iter.bi_sector); 338*d3c7b35cSHeinz Mauelshagen 339*d3c7b35cSHeinz Mauelshagen if (unlikely(bio->bi_opf & REQ_OP_FLUSH)) 340*d3c7b35cSHeinz Mauelshagen return DM_MAPIO_REMAPPED; 341*d3c7b35cSHeinz Mauelshagen /* 342*d3c7b35cSHeinz Mauelshagen * Only queue for bufio processing in case of partial or overlapping buffers 343*d3c7b35cSHeinz Mauelshagen * -or- 344*d3c7b35cSHeinz Mauelshagen * emulation with ebs == ubs aiming for tests of dm-bufio overhead. 345*d3c7b35cSHeinz Mauelshagen */ 346*d3c7b35cSHeinz Mauelshagen if (likely(__block_mod(bio->bi_iter.bi_sector, ec->u_bs) || 347*d3c7b35cSHeinz Mauelshagen __block_mod(bio_end_sector(bio), ec->u_bs) || 348*d3c7b35cSHeinz Mauelshagen ec->e_bs == ec->u_bs)) { 349*d3c7b35cSHeinz Mauelshagen spin_lock_irq(&ec->lock); 350*d3c7b35cSHeinz Mauelshagen bio_list_add(&ec->bios_in, bio); 351*d3c7b35cSHeinz Mauelshagen spin_unlock_irq(&ec->lock); 352*d3c7b35cSHeinz Mauelshagen 353*d3c7b35cSHeinz Mauelshagen queue_work(ec->wq, &ec->ws); 354*d3c7b35cSHeinz Mauelshagen 355*d3c7b35cSHeinz Mauelshagen return DM_MAPIO_SUBMITTED; 356*d3c7b35cSHeinz Mauelshagen } 357*d3c7b35cSHeinz Mauelshagen 358*d3c7b35cSHeinz Mauelshagen /* Forget any buffer content relative to this direct backing device I/O. */ 359*d3c7b35cSHeinz Mauelshagen __ebs_forget_bio(ec, bio); 360*d3c7b35cSHeinz Mauelshagen 361*d3c7b35cSHeinz Mauelshagen return DM_MAPIO_REMAPPED; 362*d3c7b35cSHeinz Mauelshagen } 363*d3c7b35cSHeinz Mauelshagen 364*d3c7b35cSHeinz Mauelshagen static void ebs_status(struct dm_target *ti, status_type_t type, 365*d3c7b35cSHeinz Mauelshagen unsigned status_flags, char *result, unsigned maxlen) 366*d3c7b35cSHeinz Mauelshagen { 367*d3c7b35cSHeinz Mauelshagen struct ebs_c *ec = ti->private; 368*d3c7b35cSHeinz Mauelshagen 369*d3c7b35cSHeinz Mauelshagen switch (type) { 370*d3c7b35cSHeinz Mauelshagen case STATUSTYPE_INFO: 371*d3c7b35cSHeinz Mauelshagen *result = '\0'; 372*d3c7b35cSHeinz Mauelshagen break; 373*d3c7b35cSHeinz Mauelshagen case STATUSTYPE_TABLE: 374*d3c7b35cSHeinz Mauelshagen snprintf(result, maxlen, ec->u_bs_set ? "%s %llu %u %u" : "%s %llu %u", 375*d3c7b35cSHeinz Mauelshagen ec->dev->name, (unsigned long long) ec->start, ec->e_bs, ec->u_bs); 376*d3c7b35cSHeinz Mauelshagen break; 377*d3c7b35cSHeinz Mauelshagen } 378*d3c7b35cSHeinz Mauelshagen } 379*d3c7b35cSHeinz Mauelshagen 380*d3c7b35cSHeinz Mauelshagen static int ebs_prepare_ioctl(struct dm_target *ti, struct block_device **bdev) 381*d3c7b35cSHeinz Mauelshagen { 382*d3c7b35cSHeinz Mauelshagen struct ebs_c *ec = ti->private; 383*d3c7b35cSHeinz Mauelshagen struct dm_dev *dev = ec->dev; 384*d3c7b35cSHeinz Mauelshagen 385*d3c7b35cSHeinz Mauelshagen /* 386*d3c7b35cSHeinz Mauelshagen * Only pass ioctls through if the device sizes match exactly. 387*d3c7b35cSHeinz Mauelshagen */ 388*d3c7b35cSHeinz Mauelshagen *bdev = dev->bdev; 389*d3c7b35cSHeinz Mauelshagen return !!(ec->start || ti->len != i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT); 390*d3c7b35cSHeinz Mauelshagen } 391*d3c7b35cSHeinz Mauelshagen 392*d3c7b35cSHeinz Mauelshagen static void ebs_io_hints(struct dm_target *ti, struct queue_limits *limits) 393*d3c7b35cSHeinz Mauelshagen { 394*d3c7b35cSHeinz Mauelshagen struct ebs_c *ec = ti->private; 395*d3c7b35cSHeinz Mauelshagen 396*d3c7b35cSHeinz Mauelshagen limits->logical_block_size = to_bytes(ec->e_bs); 397*d3c7b35cSHeinz Mauelshagen limits->physical_block_size = to_bytes(ec->u_bs); 398*d3c7b35cSHeinz Mauelshagen limits->alignment_offset = limits->physical_block_size; 399*d3c7b35cSHeinz Mauelshagen blk_limits_io_min(limits, limits->logical_block_size); 400*d3c7b35cSHeinz Mauelshagen } 401*d3c7b35cSHeinz Mauelshagen 402*d3c7b35cSHeinz Mauelshagen static int ebs_iterate_devices(struct dm_target *ti, 403*d3c7b35cSHeinz Mauelshagen iterate_devices_callout_fn fn, void *data) 404*d3c7b35cSHeinz Mauelshagen { 405*d3c7b35cSHeinz Mauelshagen struct ebs_c *ec = ti->private; 406*d3c7b35cSHeinz Mauelshagen 407*d3c7b35cSHeinz Mauelshagen return fn(ti, ec->dev, ec->start, ti->len, data); 408*d3c7b35cSHeinz Mauelshagen } 409*d3c7b35cSHeinz Mauelshagen 410*d3c7b35cSHeinz Mauelshagen static struct target_type ebs_target = { 411*d3c7b35cSHeinz Mauelshagen .name = "ebs", 412*d3c7b35cSHeinz Mauelshagen .version = {1, 0, 0}, 413*d3c7b35cSHeinz Mauelshagen .features = DM_TARGET_PASSES_INTEGRITY, 414*d3c7b35cSHeinz Mauelshagen .module = THIS_MODULE, 415*d3c7b35cSHeinz Mauelshagen .ctr = ebs_ctr, 416*d3c7b35cSHeinz Mauelshagen .dtr = ebs_dtr, 417*d3c7b35cSHeinz Mauelshagen .map = ebs_map, 418*d3c7b35cSHeinz Mauelshagen .status = ebs_status, 419*d3c7b35cSHeinz Mauelshagen .io_hints = ebs_io_hints, 420*d3c7b35cSHeinz Mauelshagen .prepare_ioctl = ebs_prepare_ioctl, 421*d3c7b35cSHeinz Mauelshagen .iterate_devices = ebs_iterate_devices, 422*d3c7b35cSHeinz Mauelshagen }; 423*d3c7b35cSHeinz Mauelshagen 424*d3c7b35cSHeinz Mauelshagen static int __init dm_ebs_init(void) 425*d3c7b35cSHeinz Mauelshagen { 426*d3c7b35cSHeinz Mauelshagen int r = dm_register_target(&ebs_target); 427*d3c7b35cSHeinz Mauelshagen 428*d3c7b35cSHeinz Mauelshagen if (r < 0) 429*d3c7b35cSHeinz Mauelshagen DMERR("register failed %d", r); 430*d3c7b35cSHeinz Mauelshagen 431*d3c7b35cSHeinz Mauelshagen return r; 432*d3c7b35cSHeinz Mauelshagen } 433*d3c7b35cSHeinz Mauelshagen 434*d3c7b35cSHeinz Mauelshagen static void dm_ebs_exit(void) 435*d3c7b35cSHeinz Mauelshagen { 436*d3c7b35cSHeinz Mauelshagen dm_unregister_target(&ebs_target); 437*d3c7b35cSHeinz Mauelshagen } 438*d3c7b35cSHeinz Mauelshagen 439*d3c7b35cSHeinz Mauelshagen module_init(dm_ebs_init); 440*d3c7b35cSHeinz Mauelshagen module_exit(dm_ebs_exit); 441*d3c7b35cSHeinz Mauelshagen 442*d3c7b35cSHeinz Mauelshagen MODULE_AUTHOR("Heinz Mauelshagen <dm-devel@redhat.com>"); 443*d3c7b35cSHeinz Mauelshagen MODULE_DESCRIPTION(DM_NAME " emulated block size target"); 444*d3c7b35cSHeinz Mauelshagen MODULE_LICENSE("GPL"); 445