11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * Copyright (C) 2003 Sistina Software 3891ce207SHeinz Mauelshagen * Copyright (C) 2006 Red Hat GmbH 41da177e4SLinus Torvalds * 51da177e4SLinus Torvalds * This file is released under the GPL. 61da177e4SLinus Torvalds */ 71da177e4SLinus Torvalds 8952b3557SMikulas Patocka #include "dm.h" 9952b3557SMikulas Patocka 10586e80e6SMikulas Patocka #include <linux/device-mapper.h> 111da177e4SLinus Torvalds 121da177e4SLinus Torvalds #include <linux/bio.h> 131da177e4SLinus Torvalds #include <linux/mempool.h> 141da177e4SLinus Torvalds #include <linux/module.h> 151da177e4SLinus Torvalds #include <linux/sched.h> 161da177e4SLinus Torvalds #include <linux/slab.h> 17a765e20eSAlasdair G Kergon #include <linux/dm-io.h> 181da177e4SLinus Torvalds 19f1e53987SMikulas Patocka #define DM_MSG_PREFIX "io" 20f1e53987SMikulas Patocka 21f1e53987SMikulas Patocka #define DM_IO_MAX_REGIONS BITS_PER_LONG 22f1e53987SMikulas Patocka 23891ce207SHeinz Mauelshagen struct dm_io_client { 24891ce207SHeinz Mauelshagen mempool_t *pool; 25891ce207SHeinz Mauelshagen struct bio_set *bios; 26891ce207SHeinz Mauelshagen }; 27891ce207SHeinz Mauelshagen 28f1e53987SMikulas Patocka /* 29f1e53987SMikulas Patocka * Aligning 'struct io' reduces the number of bits required to store 30f1e53987SMikulas Patocka * its address. Refer to store_io_and_region_in_bio() below. 31f1e53987SMikulas Patocka */ 321da177e4SLinus Torvalds struct io { 33e01fd7eeSAlasdair G Kergon unsigned long error_bits; 341da177e4SLinus Torvalds atomic_t count; 351da177e4SLinus Torvalds struct task_struct *sleeper; 36891ce207SHeinz Mauelshagen struct dm_io_client *client; 371da177e4SLinus Torvalds io_notify_fn callback; 381da177e4SLinus Torvalds void *context; 39bb91bc7bSMikulas Patocka void *vma_invalidate_address; 40bb91bc7bSMikulas Patocka unsigned long vma_invalidate_size; 41f1e53987SMikulas Patocka } __attribute__((aligned(DM_IO_MAX_REGIONS))); 421da177e4SLinus Torvalds 43952b3557SMikulas Patocka static struct kmem_cache *_dm_io_cache; 44952b3557SMikulas Patocka 451da177e4SLinus Torvalds /* 46c8b03afeSHeinz Mauelshagen * Create a client with mempool and bioset. 47c8b03afeSHeinz Mauelshagen */ 48bda8efecSMikulas Patocka struct dm_io_client *dm_io_client_create(void) 49c8b03afeSHeinz Mauelshagen { 50c8b03afeSHeinz Mauelshagen struct dm_io_client *client; 51e8603136SMike Snitzer unsigned min_ios = dm_get_reserved_bio_based_ios(); 52c8b03afeSHeinz Mauelshagen 53c8b03afeSHeinz Mauelshagen client = kmalloc(sizeof(*client), GFP_KERNEL); 54c8b03afeSHeinz Mauelshagen if (!client) 55c8b03afeSHeinz Mauelshagen return ERR_PTR(-ENOMEM); 56c8b03afeSHeinz Mauelshagen 57e8603136SMike Snitzer client->pool = mempool_create_slab_pool(min_ios, _dm_io_cache); 58c8b03afeSHeinz Mauelshagen if (!client->pool) 59c8b03afeSHeinz Mauelshagen goto bad; 60c8b03afeSHeinz Mauelshagen 61e8603136SMike Snitzer client->bios = bioset_create(min_ios, 0); 62c8b03afeSHeinz Mauelshagen if (!client->bios) 63c8b03afeSHeinz Mauelshagen goto bad; 64c8b03afeSHeinz Mauelshagen 65c8b03afeSHeinz Mauelshagen return client; 66c8b03afeSHeinz Mauelshagen 67c8b03afeSHeinz Mauelshagen bad: 68c8b03afeSHeinz Mauelshagen if (client->pool) 69c8b03afeSHeinz Mauelshagen mempool_destroy(client->pool); 70c8b03afeSHeinz Mauelshagen kfree(client); 71c8b03afeSHeinz Mauelshagen return ERR_PTR(-ENOMEM); 72c8b03afeSHeinz Mauelshagen } 73c8b03afeSHeinz Mauelshagen EXPORT_SYMBOL(dm_io_client_create); 74c8b03afeSHeinz Mauelshagen 75c8b03afeSHeinz Mauelshagen void dm_io_client_destroy(struct dm_io_client *client) 76c8b03afeSHeinz Mauelshagen { 77c8b03afeSHeinz Mauelshagen mempool_destroy(client->pool); 78c8b03afeSHeinz Mauelshagen bioset_free(client->bios); 79c8b03afeSHeinz Mauelshagen kfree(client); 80c8b03afeSHeinz Mauelshagen } 81c8b03afeSHeinz Mauelshagen EXPORT_SYMBOL(dm_io_client_destroy); 82c8b03afeSHeinz Mauelshagen 831da177e4SLinus Torvalds /*----------------------------------------------------------------- 841da177e4SLinus Torvalds * We need to keep track of which region a bio is doing io for. 85f1e53987SMikulas Patocka * To avoid a memory allocation to store just 5 or 6 bits, we 86f1e53987SMikulas Patocka * ensure the 'struct io' pointer is aligned so enough low bits are 87f1e53987SMikulas Patocka * always zero and then combine it with the region number directly in 88f1e53987SMikulas Patocka * bi_private. 891da177e4SLinus Torvalds *---------------------------------------------------------------*/ 90f1e53987SMikulas Patocka static void store_io_and_region_in_bio(struct bio *bio, struct io *io, 91f1e53987SMikulas Patocka unsigned region) 921da177e4SLinus Torvalds { 93f1e53987SMikulas Patocka if (unlikely(!IS_ALIGNED((unsigned long)io, DM_IO_MAX_REGIONS))) { 94f1e53987SMikulas Patocka DMCRIT("Unaligned struct io pointer %p", io); 95f1e53987SMikulas Patocka BUG(); 961da177e4SLinus Torvalds } 971da177e4SLinus Torvalds 98f1e53987SMikulas Patocka bio->bi_private = (void *)((unsigned long)io | region); 99f1e53987SMikulas Patocka } 100f1e53987SMikulas Patocka 101f1e53987SMikulas Patocka static void retrieve_io_and_region_from_bio(struct bio *bio, struct io **io, 102f1e53987SMikulas Patocka unsigned *region) 1031da177e4SLinus Torvalds { 104f1e53987SMikulas Patocka unsigned long val = (unsigned long)bio->bi_private; 105f1e53987SMikulas Patocka 106f1e53987SMikulas Patocka *io = (void *)(val & -(unsigned long)DM_IO_MAX_REGIONS); 107f1e53987SMikulas Patocka *region = val & (DM_IO_MAX_REGIONS - 1); 1081da177e4SLinus Torvalds } 1091da177e4SLinus Torvalds 1101da177e4SLinus Torvalds /*----------------------------------------------------------------- 1111da177e4SLinus Torvalds * We need an io object to keep track of the number of bios that 1121da177e4SLinus Torvalds * have been dispatched for a particular io. 1131da177e4SLinus Torvalds *---------------------------------------------------------------*/ 1141da177e4SLinus Torvalds static void dec_count(struct io *io, unsigned int region, int error) 1151da177e4SLinus Torvalds { 116d87f4c14STejun Heo if (error) 117e01fd7eeSAlasdair G Kergon set_bit(region, &io->error_bits); 1181da177e4SLinus Torvalds 1191da177e4SLinus Torvalds if (atomic_dec_and_test(&io->count)) { 120bb91bc7bSMikulas Patocka if (io->vma_invalidate_size) 121bb91bc7bSMikulas Patocka invalidate_kernel_vmap_range(io->vma_invalidate_address, 122bb91bc7bSMikulas Patocka io->vma_invalidate_size); 123bb91bc7bSMikulas Patocka 1241da177e4SLinus Torvalds if (io->sleeper) 1251da177e4SLinus Torvalds wake_up_process(io->sleeper); 1261da177e4SLinus Torvalds 1271da177e4SLinus Torvalds else { 128e01fd7eeSAlasdair G Kergon unsigned long r = io->error_bits; 1291da177e4SLinus Torvalds io_notify_fn fn = io->callback; 1301da177e4SLinus Torvalds void *context = io->context; 1311da177e4SLinus Torvalds 132bf17ce3aSMilan Broz mempool_free(io, io->client->pool); 1331da177e4SLinus Torvalds fn(r, context); 1341da177e4SLinus Torvalds } 1351da177e4SLinus Torvalds } 1361da177e4SLinus Torvalds } 1371da177e4SLinus Torvalds 1386712ecf8SNeilBrown static void endio(struct bio *bio, int error) 1391da177e4SLinus Torvalds { 140c897feb3SHeinz Mauelshagen struct io *io; 141c897feb3SHeinz Mauelshagen unsigned region; 1421da177e4SLinus Torvalds 1431da177e4SLinus Torvalds if (error && bio_data_dir(bio) == READ) 1441da177e4SLinus Torvalds zero_fill_bio(bio); 1451da177e4SLinus Torvalds 146c897feb3SHeinz Mauelshagen /* 147c897feb3SHeinz Mauelshagen * The bio destructor in bio_put() may use the io object. 148c897feb3SHeinz Mauelshagen */ 149f1e53987SMikulas Patocka retrieve_io_and_region_from_bio(bio, &io, ®ion); 150c897feb3SHeinz Mauelshagen 1511da177e4SLinus Torvalds bio_put(bio); 1521da177e4SLinus Torvalds 153c897feb3SHeinz Mauelshagen dec_count(io, region, error); 1541da177e4SLinus Torvalds } 1551da177e4SLinus Torvalds 1561da177e4SLinus Torvalds /*----------------------------------------------------------------- 1571da177e4SLinus Torvalds * These little objects provide an abstraction for getting a new 1581da177e4SLinus Torvalds * destination page for io. 1591da177e4SLinus Torvalds *---------------------------------------------------------------*/ 1601da177e4SLinus Torvalds struct dpages { 1611da177e4SLinus Torvalds void (*get_page)(struct dpages *dp, 1621da177e4SLinus Torvalds struct page **p, unsigned long *len, unsigned *offset); 1631da177e4SLinus Torvalds void (*next_page)(struct dpages *dp); 1641da177e4SLinus Torvalds 1651da177e4SLinus Torvalds unsigned context_u; 1661da177e4SLinus Torvalds void *context_ptr; 167bb91bc7bSMikulas Patocka 168bb91bc7bSMikulas Patocka void *vma_invalidate_address; 169bb91bc7bSMikulas Patocka unsigned long vma_invalidate_size; 1701da177e4SLinus Torvalds }; 1711da177e4SLinus Torvalds 1721da177e4SLinus Torvalds /* 1731da177e4SLinus Torvalds * Functions for getting the pages from a list. 1741da177e4SLinus Torvalds */ 1751da177e4SLinus Torvalds static void list_get_page(struct dpages *dp, 1761da177e4SLinus Torvalds struct page **p, unsigned long *len, unsigned *offset) 1771da177e4SLinus Torvalds { 1781da177e4SLinus Torvalds unsigned o = dp->context_u; 1791da177e4SLinus Torvalds struct page_list *pl = (struct page_list *) dp->context_ptr; 1801da177e4SLinus Torvalds 1811da177e4SLinus Torvalds *p = pl->page; 1821da177e4SLinus Torvalds *len = PAGE_SIZE - o; 1831da177e4SLinus Torvalds *offset = o; 1841da177e4SLinus Torvalds } 1851da177e4SLinus Torvalds 1861da177e4SLinus Torvalds static void list_next_page(struct dpages *dp) 1871da177e4SLinus Torvalds { 1881da177e4SLinus Torvalds struct page_list *pl = (struct page_list *) dp->context_ptr; 1891da177e4SLinus Torvalds dp->context_ptr = pl->next; 1901da177e4SLinus Torvalds dp->context_u = 0; 1911da177e4SLinus Torvalds } 1921da177e4SLinus Torvalds 1931da177e4SLinus Torvalds static void list_dp_init(struct dpages *dp, struct page_list *pl, unsigned offset) 1941da177e4SLinus Torvalds { 1951da177e4SLinus Torvalds dp->get_page = list_get_page; 1961da177e4SLinus Torvalds dp->next_page = list_next_page; 1971da177e4SLinus Torvalds dp->context_u = offset; 1981da177e4SLinus Torvalds dp->context_ptr = pl; 1991da177e4SLinus Torvalds } 2001da177e4SLinus Torvalds 2011da177e4SLinus Torvalds /* 2021da177e4SLinus Torvalds * Functions for getting the pages from a bvec. 2031da177e4SLinus Torvalds */ 2041da177e4SLinus Torvalds static void bvec_get_page(struct dpages *dp, 2051da177e4SLinus Torvalds struct page **p, unsigned long *len, unsigned *offset) 2061da177e4SLinus Torvalds { 2071da177e4SLinus Torvalds struct bio_vec *bvec = (struct bio_vec *) dp->context_ptr; 2081da177e4SLinus Torvalds *p = bvec->bv_page; 2091da177e4SLinus Torvalds *len = bvec->bv_len; 2101da177e4SLinus Torvalds *offset = bvec->bv_offset; 2111da177e4SLinus Torvalds } 2121da177e4SLinus Torvalds 2131da177e4SLinus Torvalds static void bvec_next_page(struct dpages *dp) 2141da177e4SLinus Torvalds { 2151da177e4SLinus Torvalds struct bio_vec *bvec = (struct bio_vec *) dp->context_ptr; 2161da177e4SLinus Torvalds dp->context_ptr = bvec + 1; 2171da177e4SLinus Torvalds } 2181da177e4SLinus Torvalds 2191da177e4SLinus Torvalds static void bvec_dp_init(struct dpages *dp, struct bio_vec *bvec) 2201da177e4SLinus Torvalds { 2211da177e4SLinus Torvalds dp->get_page = bvec_get_page; 2221da177e4SLinus Torvalds dp->next_page = bvec_next_page; 2231da177e4SLinus Torvalds dp->context_ptr = bvec; 2241da177e4SLinus Torvalds } 2251da177e4SLinus Torvalds 226c8b03afeSHeinz Mauelshagen /* 227c8b03afeSHeinz Mauelshagen * Functions for getting the pages from a VMA. 228c8b03afeSHeinz Mauelshagen */ 2291da177e4SLinus Torvalds static void vm_get_page(struct dpages *dp, 2301da177e4SLinus Torvalds struct page **p, unsigned long *len, unsigned *offset) 2311da177e4SLinus Torvalds { 2321da177e4SLinus Torvalds *p = vmalloc_to_page(dp->context_ptr); 2331da177e4SLinus Torvalds *offset = dp->context_u; 2341da177e4SLinus Torvalds *len = PAGE_SIZE - dp->context_u; 2351da177e4SLinus Torvalds } 2361da177e4SLinus Torvalds 2371da177e4SLinus Torvalds static void vm_next_page(struct dpages *dp) 2381da177e4SLinus Torvalds { 2391da177e4SLinus Torvalds dp->context_ptr += PAGE_SIZE - dp->context_u; 2401da177e4SLinus Torvalds dp->context_u = 0; 2411da177e4SLinus Torvalds } 2421da177e4SLinus Torvalds 2431da177e4SLinus Torvalds static void vm_dp_init(struct dpages *dp, void *data) 2441da177e4SLinus Torvalds { 2451da177e4SLinus Torvalds dp->get_page = vm_get_page; 2461da177e4SLinus Torvalds dp->next_page = vm_next_page; 2471da177e4SLinus Torvalds dp->context_u = ((unsigned long) data) & (PAGE_SIZE - 1); 2481da177e4SLinus Torvalds dp->context_ptr = data; 2491da177e4SLinus Torvalds } 2501da177e4SLinus Torvalds 251c8b03afeSHeinz Mauelshagen /* 252c8b03afeSHeinz Mauelshagen * Functions for getting the pages from kernel memory. 253c8b03afeSHeinz Mauelshagen */ 254c8b03afeSHeinz Mauelshagen static void km_get_page(struct dpages *dp, struct page **p, unsigned long *len, 255c8b03afeSHeinz Mauelshagen unsigned *offset) 256c8b03afeSHeinz Mauelshagen { 257c8b03afeSHeinz Mauelshagen *p = virt_to_page(dp->context_ptr); 258c8b03afeSHeinz Mauelshagen *offset = dp->context_u; 259c8b03afeSHeinz Mauelshagen *len = PAGE_SIZE - dp->context_u; 260c8b03afeSHeinz Mauelshagen } 261c8b03afeSHeinz Mauelshagen 262c8b03afeSHeinz Mauelshagen static void km_next_page(struct dpages *dp) 263c8b03afeSHeinz Mauelshagen { 264c8b03afeSHeinz Mauelshagen dp->context_ptr += PAGE_SIZE - dp->context_u; 265c8b03afeSHeinz Mauelshagen dp->context_u = 0; 266c8b03afeSHeinz Mauelshagen } 267c8b03afeSHeinz Mauelshagen 268c8b03afeSHeinz Mauelshagen static void km_dp_init(struct dpages *dp, void *data) 269c8b03afeSHeinz Mauelshagen { 270c8b03afeSHeinz Mauelshagen dp->get_page = km_get_page; 271c8b03afeSHeinz Mauelshagen dp->next_page = km_next_page; 272c8b03afeSHeinz Mauelshagen dp->context_u = ((unsigned long) data) & (PAGE_SIZE - 1); 273c8b03afeSHeinz Mauelshagen dp->context_ptr = data; 274c8b03afeSHeinz Mauelshagen } 275c8b03afeSHeinz Mauelshagen 2761da177e4SLinus Torvalds /*----------------------------------------------------------------- 2771da177e4SLinus Torvalds * IO routines that accept a list of pages. 2781da177e4SLinus Torvalds *---------------------------------------------------------------*/ 27922a1ceb1SHeinz Mauelshagen static void do_region(int rw, unsigned region, struct dm_io_region *where, 2801da177e4SLinus Torvalds struct dpages *dp, struct io *io) 2811da177e4SLinus Torvalds { 2821da177e4SLinus Torvalds struct bio *bio; 2831da177e4SLinus Torvalds struct page *page; 2841da177e4SLinus Torvalds unsigned long len; 2851da177e4SLinus Torvalds unsigned offset; 2861da177e4SLinus Torvalds unsigned num_bvecs; 2871da177e4SLinus Torvalds sector_t remaining = where->count; 2880c535e0dSMilan Broz struct request_queue *q = bdev_get_queue(where->bdev); 28970d6c400SMike Snitzer unsigned short logical_block_size = queue_logical_block_size(q); 29070d6c400SMike Snitzer sector_t num_sectors; 2911da177e4SLinus Torvalds 29212fc0f49SMikulas Patocka /* 293d87f4c14STejun Heo * where->count may be zero if rw holds a flush and we need to 294d87f4c14STejun Heo * send a zero-sized flush. 29512fc0f49SMikulas Patocka */ 29612fc0f49SMikulas Patocka do { 2971da177e4SLinus Torvalds /* 298f1e53987SMikulas Patocka * Allocate a suitably sized-bio. 2991da177e4SLinus Torvalds */ 30070d6c400SMike Snitzer if ((rw & REQ_DISCARD) || (rw & REQ_WRITE_SAME)) 3010c535e0dSMilan Broz num_bvecs = 1; 3020c535e0dSMilan Broz else 3030c535e0dSMilan Broz num_bvecs = min_t(int, bio_get_nr_vecs(where->bdev), 3040c535e0dSMilan Broz dm_sector_div_up(remaining, (PAGE_SIZE >> SECTOR_SHIFT))); 3050c535e0dSMilan Broz 306bf17ce3aSMilan Broz bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, io->client->bios); 307*4f024f37SKent Overstreet bio->bi_iter.bi_sector = where->sector + (where->count - remaining); 3081da177e4SLinus Torvalds bio->bi_bdev = where->bdev; 3091da177e4SLinus Torvalds bio->bi_end_io = endio; 310f1e53987SMikulas Patocka store_io_and_region_in_bio(bio, io, region); 3111da177e4SLinus Torvalds 3120c535e0dSMilan Broz if (rw & REQ_DISCARD) { 31370d6c400SMike Snitzer num_sectors = min_t(sector_t, q->limits.max_discard_sectors, remaining); 314*4f024f37SKent Overstreet bio->bi_iter.bi_size = num_sectors << SECTOR_SHIFT; 31570d6c400SMike Snitzer remaining -= num_sectors; 31670d6c400SMike Snitzer } else if (rw & REQ_WRITE_SAME) { 31770d6c400SMike Snitzer /* 31870d6c400SMike Snitzer * WRITE SAME only uses a single page. 31970d6c400SMike Snitzer */ 32070d6c400SMike Snitzer dp->get_page(dp, &page, &len, &offset); 32170d6c400SMike Snitzer bio_add_page(bio, page, logical_block_size, offset); 32270d6c400SMike Snitzer num_sectors = min_t(sector_t, q->limits.max_write_same_sectors, remaining); 323*4f024f37SKent Overstreet bio->bi_iter.bi_size = num_sectors << SECTOR_SHIFT; 32470d6c400SMike Snitzer 32570d6c400SMike Snitzer offset = 0; 32670d6c400SMike Snitzer remaining -= num_sectors; 32770d6c400SMike Snitzer dp->next_page(dp); 3280c535e0dSMilan Broz } else while (remaining) { 3291da177e4SLinus Torvalds /* 3301da177e4SLinus Torvalds * Try and add as many pages as possible. 3311da177e4SLinus Torvalds */ 3321da177e4SLinus Torvalds dp->get_page(dp, &page, &len, &offset); 3331da177e4SLinus Torvalds len = min(len, to_bytes(remaining)); 3341da177e4SLinus Torvalds if (!bio_add_page(bio, page, len, offset)) 3351da177e4SLinus Torvalds break; 3361da177e4SLinus Torvalds 3371da177e4SLinus Torvalds offset = 0; 3381da177e4SLinus Torvalds remaining -= to_sector(len); 3391da177e4SLinus Torvalds dp->next_page(dp); 3401da177e4SLinus Torvalds } 3411da177e4SLinus Torvalds 3421da177e4SLinus Torvalds atomic_inc(&io->count); 3431da177e4SLinus Torvalds submit_bio(rw, bio); 34412fc0f49SMikulas Patocka } while (remaining); 3451da177e4SLinus Torvalds } 3461da177e4SLinus Torvalds 3471da177e4SLinus Torvalds static void dispatch_io(int rw, unsigned int num_regions, 34822a1ceb1SHeinz Mauelshagen struct dm_io_region *where, struct dpages *dp, 3491da177e4SLinus Torvalds struct io *io, int sync) 3501da177e4SLinus Torvalds { 3511da177e4SLinus Torvalds int i; 3521da177e4SLinus Torvalds struct dpages old_pages = *dp; 3531da177e4SLinus Torvalds 354f1e53987SMikulas Patocka BUG_ON(num_regions > DM_IO_MAX_REGIONS); 355f1e53987SMikulas Patocka 3561da177e4SLinus Torvalds if (sync) 357721a9602SJens Axboe rw |= REQ_SYNC; 3581da177e4SLinus Torvalds 3591da177e4SLinus Torvalds /* 3601da177e4SLinus Torvalds * For multiple regions we need to be careful to rewind 3611da177e4SLinus Torvalds * the dp object for each call to do_region. 3621da177e4SLinus Torvalds */ 3631da177e4SLinus Torvalds for (i = 0; i < num_regions; i++) { 3641da177e4SLinus Torvalds *dp = old_pages; 365d87f4c14STejun Heo if (where[i].count || (rw & REQ_FLUSH)) 3661da177e4SLinus Torvalds do_region(rw, i, where + i, dp, io); 3671da177e4SLinus Torvalds } 3681da177e4SLinus Torvalds 3691da177e4SLinus Torvalds /* 370f00b16adSHeinz Mauelshagen * Drop the extra reference that we were holding to avoid 3711da177e4SLinus Torvalds * the io being completed too early. 3721da177e4SLinus Torvalds */ 3731da177e4SLinus Torvalds dec_count(io, 0, 0); 3741da177e4SLinus Torvalds } 3751da177e4SLinus Torvalds 376891ce207SHeinz Mauelshagen static int sync_io(struct dm_io_client *client, unsigned int num_regions, 37722a1ceb1SHeinz Mauelshagen struct dm_io_region *where, int rw, struct dpages *dp, 378891ce207SHeinz Mauelshagen unsigned long *error_bits) 3791da177e4SLinus Torvalds { 380f1e53987SMikulas Patocka /* 381f1e53987SMikulas Patocka * gcc <= 4.3 can't do the alignment for stack variables, so we must 382f1e53987SMikulas Patocka * align it on our own. 383f1e53987SMikulas Patocka * volatile prevents the optimizer from removing or reusing 384f1e53987SMikulas Patocka * "io_" field from the stack frame (allowed in ANSI C). 385f1e53987SMikulas Patocka */ 386f1e53987SMikulas Patocka volatile char io_[sizeof(struct io) + __alignof__(struct io) - 1]; 387f1e53987SMikulas Patocka struct io *io = (struct io *)PTR_ALIGN(&io_, __alignof__(struct io)); 3881da177e4SLinus Torvalds 3897ff14a36SMikulas Patocka if (num_regions > 1 && (rw & RW_MASK) != WRITE) { 3901da177e4SLinus Torvalds WARN_ON(1); 3911da177e4SLinus Torvalds return -EIO; 3921da177e4SLinus Torvalds } 3931da177e4SLinus Torvalds 394f1e53987SMikulas Patocka io->error_bits = 0; 395f1e53987SMikulas Patocka atomic_set(&io->count, 1); /* see dispatch_io() */ 396f1e53987SMikulas Patocka io->sleeper = current; 397f1e53987SMikulas Patocka io->client = client; 3981da177e4SLinus Torvalds 399bb91bc7bSMikulas Patocka io->vma_invalidate_address = dp->vma_invalidate_address; 400bb91bc7bSMikulas Patocka io->vma_invalidate_size = dp->vma_invalidate_size; 401bb91bc7bSMikulas Patocka 402f1e53987SMikulas Patocka dispatch_io(rw, num_regions, where, dp, io, 1); 4031da177e4SLinus Torvalds 4041da177e4SLinus Torvalds while (1) { 4051da177e4SLinus Torvalds set_current_state(TASK_UNINTERRUPTIBLE); 4061da177e4SLinus Torvalds 407f1e53987SMikulas Patocka if (!atomic_read(&io->count)) 4081da177e4SLinus Torvalds break; 4091da177e4SLinus Torvalds 4101da177e4SLinus Torvalds io_schedule(); 4111da177e4SLinus Torvalds } 4121da177e4SLinus Torvalds set_current_state(TASK_RUNNING); 4131da177e4SLinus Torvalds 414891ce207SHeinz Mauelshagen if (error_bits) 415f1e53987SMikulas Patocka *error_bits = io->error_bits; 416891ce207SHeinz Mauelshagen 417f1e53987SMikulas Patocka return io->error_bits ? -EIO : 0; 4181da177e4SLinus Torvalds } 4191da177e4SLinus Torvalds 420891ce207SHeinz Mauelshagen static int async_io(struct dm_io_client *client, unsigned int num_regions, 42122a1ceb1SHeinz Mauelshagen struct dm_io_region *where, int rw, struct dpages *dp, 422891ce207SHeinz Mauelshagen io_notify_fn fn, void *context) 4231da177e4SLinus Torvalds { 4241da177e4SLinus Torvalds struct io *io; 4251da177e4SLinus Torvalds 4267ff14a36SMikulas Patocka if (num_regions > 1 && (rw & RW_MASK) != WRITE) { 4271da177e4SLinus Torvalds WARN_ON(1); 4281da177e4SLinus Torvalds fn(1, context); 4291da177e4SLinus Torvalds return -EIO; 4301da177e4SLinus Torvalds } 4311da177e4SLinus Torvalds 432bf17ce3aSMilan Broz io = mempool_alloc(client->pool, GFP_NOIO); 433e01fd7eeSAlasdair G Kergon io->error_bits = 0; 4341da177e4SLinus Torvalds atomic_set(&io->count, 1); /* see dispatch_io() */ 4351da177e4SLinus Torvalds io->sleeper = NULL; 436891ce207SHeinz Mauelshagen io->client = client; 4371da177e4SLinus Torvalds io->callback = fn; 4381da177e4SLinus Torvalds io->context = context; 4391da177e4SLinus Torvalds 440bb91bc7bSMikulas Patocka io->vma_invalidate_address = dp->vma_invalidate_address; 441bb91bc7bSMikulas Patocka io->vma_invalidate_size = dp->vma_invalidate_size; 442bb91bc7bSMikulas Patocka 4431da177e4SLinus Torvalds dispatch_io(rw, num_regions, where, dp, io, 0); 4441da177e4SLinus Torvalds return 0; 4451da177e4SLinus Torvalds } 4461da177e4SLinus Torvalds 447bb91bc7bSMikulas Patocka static int dp_init(struct dm_io_request *io_req, struct dpages *dp, 448bb91bc7bSMikulas Patocka unsigned long size) 449c8b03afeSHeinz Mauelshagen { 450c8b03afeSHeinz Mauelshagen /* Set up dpages based on memory type */ 451bb91bc7bSMikulas Patocka 452bb91bc7bSMikulas Patocka dp->vma_invalidate_address = NULL; 453bb91bc7bSMikulas Patocka dp->vma_invalidate_size = 0; 454bb91bc7bSMikulas Patocka 455c8b03afeSHeinz Mauelshagen switch (io_req->mem.type) { 456c8b03afeSHeinz Mauelshagen case DM_IO_PAGE_LIST: 457c8b03afeSHeinz Mauelshagen list_dp_init(dp, io_req->mem.ptr.pl, io_req->mem.offset); 458c8b03afeSHeinz Mauelshagen break; 459c8b03afeSHeinz Mauelshagen 460c8b03afeSHeinz Mauelshagen case DM_IO_BVEC: 461c8b03afeSHeinz Mauelshagen bvec_dp_init(dp, io_req->mem.ptr.bvec); 462c8b03afeSHeinz Mauelshagen break; 463c8b03afeSHeinz Mauelshagen 464c8b03afeSHeinz Mauelshagen case DM_IO_VMA: 465bb91bc7bSMikulas Patocka flush_kernel_vmap_range(io_req->mem.ptr.vma, size); 466bb91bc7bSMikulas Patocka if ((io_req->bi_rw & RW_MASK) == READ) { 467bb91bc7bSMikulas Patocka dp->vma_invalidate_address = io_req->mem.ptr.vma; 468bb91bc7bSMikulas Patocka dp->vma_invalidate_size = size; 469bb91bc7bSMikulas Patocka } 470c8b03afeSHeinz Mauelshagen vm_dp_init(dp, io_req->mem.ptr.vma); 471c8b03afeSHeinz Mauelshagen break; 472c8b03afeSHeinz Mauelshagen 473c8b03afeSHeinz Mauelshagen case DM_IO_KMEM: 474c8b03afeSHeinz Mauelshagen km_dp_init(dp, io_req->mem.ptr.addr); 475c8b03afeSHeinz Mauelshagen break; 476c8b03afeSHeinz Mauelshagen 477c8b03afeSHeinz Mauelshagen default: 478c8b03afeSHeinz Mauelshagen return -EINVAL; 479c8b03afeSHeinz Mauelshagen } 480c8b03afeSHeinz Mauelshagen 481c8b03afeSHeinz Mauelshagen return 0; 482c8b03afeSHeinz Mauelshagen } 483c8b03afeSHeinz Mauelshagen 484c8b03afeSHeinz Mauelshagen /* 4857ff14a36SMikulas Patocka * New collapsed (a)synchronous interface. 4867ff14a36SMikulas Patocka * 4877ff14a36SMikulas Patocka * If the IO is asynchronous (i.e. it has notify.fn), you must either unplug 4887b6d91daSChristoph Hellwig * the queue with blk_unplug() some time later or set REQ_SYNC in 4897b6d91daSChristoph Hellwig io_req->bi_rw. If you fail to do one of these, the IO will be submitted to 4907ff14a36SMikulas Patocka * the disk after q->unplug_delay, which defaults to 3ms in blk-settings.c. 491c8b03afeSHeinz Mauelshagen */ 492c8b03afeSHeinz Mauelshagen int dm_io(struct dm_io_request *io_req, unsigned num_regions, 49322a1ceb1SHeinz Mauelshagen struct dm_io_region *where, unsigned long *sync_error_bits) 494c8b03afeSHeinz Mauelshagen { 495c8b03afeSHeinz Mauelshagen int r; 496c8b03afeSHeinz Mauelshagen struct dpages dp; 497c8b03afeSHeinz Mauelshagen 498bb91bc7bSMikulas Patocka r = dp_init(io_req, &dp, (unsigned long)where->count << SECTOR_SHIFT); 499c8b03afeSHeinz Mauelshagen if (r) 500c8b03afeSHeinz Mauelshagen return r; 501c8b03afeSHeinz Mauelshagen 502c8b03afeSHeinz Mauelshagen if (!io_req->notify.fn) 503c8b03afeSHeinz Mauelshagen return sync_io(io_req->client, num_regions, where, 504c8b03afeSHeinz Mauelshagen io_req->bi_rw, &dp, sync_error_bits); 505c8b03afeSHeinz Mauelshagen 506c8b03afeSHeinz Mauelshagen return async_io(io_req->client, num_regions, where, io_req->bi_rw, 507c8b03afeSHeinz Mauelshagen &dp, io_req->notify.fn, io_req->notify.context); 508c8b03afeSHeinz Mauelshagen } 509c8b03afeSHeinz Mauelshagen EXPORT_SYMBOL(dm_io); 510952b3557SMikulas Patocka 511952b3557SMikulas Patocka int __init dm_io_init(void) 512952b3557SMikulas Patocka { 513952b3557SMikulas Patocka _dm_io_cache = KMEM_CACHE(io, 0); 514952b3557SMikulas Patocka if (!_dm_io_cache) 515952b3557SMikulas Patocka return -ENOMEM; 516952b3557SMikulas Patocka 517952b3557SMikulas Patocka return 0; 518952b3557SMikulas Patocka } 519952b3557SMikulas Patocka 520952b3557SMikulas Patocka void dm_io_exit(void) 521952b3557SMikulas Patocka { 522952b3557SMikulas Patocka kmem_cache_destroy(_dm_io_cache); 523952b3557SMikulas Patocka _dm_io_cache = NULL; 524952b3557SMikulas Patocka } 525