11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * Copyright (C) 2003 Sistina Software 3891ce207SHeinz Mauelshagen * Copyright (C) 2006 Red Hat GmbH 41da177e4SLinus Torvalds * 51da177e4SLinus Torvalds * This file is released under the GPL. 61da177e4SLinus Torvalds */ 71da177e4SLinus Torvalds 8952b3557SMikulas Patocka #include "dm.h" 9952b3557SMikulas Patocka 10586e80e6SMikulas Patocka #include <linux/device-mapper.h> 111da177e4SLinus Torvalds 121da177e4SLinus Torvalds #include <linux/bio.h> 1310f1d5d1SJoe Thornber #include <linux/completion.h> 141da177e4SLinus Torvalds #include <linux/mempool.h> 151da177e4SLinus Torvalds #include <linux/module.h> 161da177e4SLinus Torvalds #include <linux/sched.h> 171da177e4SLinus Torvalds #include <linux/slab.h> 18a765e20eSAlasdair G Kergon #include <linux/dm-io.h> 191da177e4SLinus Torvalds 20f1e53987SMikulas Patocka #define DM_MSG_PREFIX "io" 21f1e53987SMikulas Patocka 22f1e53987SMikulas Patocka #define DM_IO_MAX_REGIONS BITS_PER_LONG 23f1e53987SMikulas Patocka 24891ce207SHeinz Mauelshagen struct dm_io_client { 25891ce207SHeinz Mauelshagen mempool_t *pool; 26891ce207SHeinz Mauelshagen struct bio_set *bios; 27891ce207SHeinz Mauelshagen }; 28891ce207SHeinz Mauelshagen 29f1e53987SMikulas Patocka /* 30f1e53987SMikulas Patocka * Aligning 'struct io' reduces the number of bits required to store 31f1e53987SMikulas Patocka * its address. Refer to store_io_and_region_in_bio() below. 32f1e53987SMikulas Patocka */ 331da177e4SLinus Torvalds struct io { 34e01fd7eeSAlasdair G Kergon unsigned long error_bits; 351da177e4SLinus Torvalds atomic_t count; 36891ce207SHeinz Mauelshagen struct dm_io_client *client; 371da177e4SLinus Torvalds io_notify_fn callback; 381da177e4SLinus Torvalds void *context; 39bb91bc7bSMikulas Patocka void *vma_invalidate_address; 40bb91bc7bSMikulas Patocka unsigned long vma_invalidate_size; 41f1e53987SMikulas Patocka } __attribute__((aligned(DM_IO_MAX_REGIONS))); 421da177e4SLinus Torvalds 43952b3557SMikulas Patocka static struct kmem_cache *_dm_io_cache; 44952b3557SMikulas Patocka 451da177e4SLinus Torvalds /* 46c8b03afeSHeinz Mauelshagen * Create a client with mempool and bioset. 47c8b03afeSHeinz Mauelshagen */ 48bda8efecSMikulas Patocka struct dm_io_client *dm_io_client_create(void) 49c8b03afeSHeinz Mauelshagen { 50c8b03afeSHeinz Mauelshagen struct dm_io_client *client; 51e8603136SMike Snitzer unsigned min_ios = dm_get_reserved_bio_based_ios(); 52c8b03afeSHeinz Mauelshagen 53c8b03afeSHeinz Mauelshagen client = kmalloc(sizeof(*client), GFP_KERNEL); 54c8b03afeSHeinz Mauelshagen if (!client) 55c8b03afeSHeinz Mauelshagen return ERR_PTR(-ENOMEM); 56c8b03afeSHeinz Mauelshagen 57e8603136SMike Snitzer client->pool = mempool_create_slab_pool(min_ios, _dm_io_cache); 58c8b03afeSHeinz Mauelshagen if (!client->pool) 59c8b03afeSHeinz Mauelshagen goto bad; 60c8b03afeSHeinz Mauelshagen 61e8603136SMike Snitzer client->bios = bioset_create(min_ios, 0); 62c8b03afeSHeinz Mauelshagen if (!client->bios) 63c8b03afeSHeinz Mauelshagen goto bad; 64c8b03afeSHeinz Mauelshagen 65c8b03afeSHeinz Mauelshagen return client; 66c8b03afeSHeinz Mauelshagen 67c8b03afeSHeinz Mauelshagen bad: 68c8b03afeSHeinz Mauelshagen if (client->pool) 69c8b03afeSHeinz Mauelshagen mempool_destroy(client->pool); 70c8b03afeSHeinz Mauelshagen kfree(client); 71c8b03afeSHeinz Mauelshagen return ERR_PTR(-ENOMEM); 72c8b03afeSHeinz Mauelshagen } 73c8b03afeSHeinz Mauelshagen EXPORT_SYMBOL(dm_io_client_create); 74c8b03afeSHeinz Mauelshagen 75c8b03afeSHeinz Mauelshagen void dm_io_client_destroy(struct dm_io_client *client) 76c8b03afeSHeinz Mauelshagen { 77c8b03afeSHeinz Mauelshagen mempool_destroy(client->pool); 78c8b03afeSHeinz Mauelshagen bioset_free(client->bios); 79c8b03afeSHeinz Mauelshagen kfree(client); 80c8b03afeSHeinz Mauelshagen } 81c8b03afeSHeinz Mauelshagen EXPORT_SYMBOL(dm_io_client_destroy); 82c8b03afeSHeinz Mauelshagen 831da177e4SLinus Torvalds /*----------------------------------------------------------------- 841da177e4SLinus Torvalds * We need to keep track of which region a bio is doing io for. 85f1e53987SMikulas Patocka * To avoid a memory allocation to store just 5 or 6 bits, we 86f1e53987SMikulas Patocka * ensure the 'struct io' pointer is aligned so enough low bits are 87f1e53987SMikulas Patocka * always zero and then combine it with the region number directly in 88f1e53987SMikulas Patocka * bi_private. 891da177e4SLinus Torvalds *---------------------------------------------------------------*/ 90f1e53987SMikulas Patocka static void store_io_and_region_in_bio(struct bio *bio, struct io *io, 91f1e53987SMikulas Patocka unsigned region) 921da177e4SLinus Torvalds { 93f1e53987SMikulas Patocka if (unlikely(!IS_ALIGNED((unsigned long)io, DM_IO_MAX_REGIONS))) { 94f1e53987SMikulas Patocka DMCRIT("Unaligned struct io pointer %p", io); 95f1e53987SMikulas Patocka BUG(); 961da177e4SLinus Torvalds } 971da177e4SLinus Torvalds 98f1e53987SMikulas Patocka bio->bi_private = (void *)((unsigned long)io | region); 99f1e53987SMikulas Patocka } 100f1e53987SMikulas Patocka 101f1e53987SMikulas Patocka static void retrieve_io_and_region_from_bio(struct bio *bio, struct io **io, 102f1e53987SMikulas Patocka unsigned *region) 1031da177e4SLinus Torvalds { 104f1e53987SMikulas Patocka unsigned long val = (unsigned long)bio->bi_private; 105f1e53987SMikulas Patocka 106f1e53987SMikulas Patocka *io = (void *)(val & -(unsigned long)DM_IO_MAX_REGIONS); 107f1e53987SMikulas Patocka *region = val & (DM_IO_MAX_REGIONS - 1); 1081da177e4SLinus Torvalds } 1091da177e4SLinus Torvalds 1101da177e4SLinus Torvalds /*----------------------------------------------------------------- 1111da177e4SLinus Torvalds * We need an io object to keep track of the number of bios that 1121da177e4SLinus Torvalds * have been dispatched for a particular io. 1131da177e4SLinus Torvalds *---------------------------------------------------------------*/ 11497e7cdf1SJoe Thornber static void complete_io(struct io *io) 11597e7cdf1SJoe Thornber { 11697e7cdf1SJoe Thornber unsigned long error_bits = io->error_bits; 11797e7cdf1SJoe Thornber io_notify_fn fn = io->callback; 11897e7cdf1SJoe Thornber void *context = io->context; 11997e7cdf1SJoe Thornber 12097e7cdf1SJoe Thornber if (io->vma_invalidate_size) 12197e7cdf1SJoe Thornber invalidate_kernel_vmap_range(io->vma_invalidate_address, 12297e7cdf1SJoe Thornber io->vma_invalidate_size); 12397e7cdf1SJoe Thornber 12497e7cdf1SJoe Thornber mempool_free(io, io->client->pool); 12597e7cdf1SJoe Thornber fn(error_bits, context); 12697e7cdf1SJoe Thornber } 12797e7cdf1SJoe Thornber 1281da177e4SLinus Torvalds static void dec_count(struct io *io, unsigned int region, int error) 1291da177e4SLinus Torvalds { 130d87f4c14STejun Heo if (error) 131e01fd7eeSAlasdair G Kergon set_bit(region, &io->error_bits); 1321da177e4SLinus Torvalds 13397e7cdf1SJoe Thornber if (atomic_dec_and_test(&io->count)) 13497e7cdf1SJoe Thornber complete_io(io); 1351da177e4SLinus Torvalds } 1361da177e4SLinus Torvalds 1376712ecf8SNeilBrown static void endio(struct bio *bio, int error) 1381da177e4SLinus Torvalds { 139c897feb3SHeinz Mauelshagen struct io *io; 140c897feb3SHeinz Mauelshagen unsigned region; 1411da177e4SLinus Torvalds 1421da177e4SLinus Torvalds if (error && bio_data_dir(bio) == READ) 1431da177e4SLinus Torvalds zero_fill_bio(bio); 1441da177e4SLinus Torvalds 145c897feb3SHeinz Mauelshagen /* 146c897feb3SHeinz Mauelshagen * The bio destructor in bio_put() may use the io object. 147c897feb3SHeinz Mauelshagen */ 148f1e53987SMikulas Patocka retrieve_io_and_region_from_bio(bio, &io, ®ion); 149c897feb3SHeinz Mauelshagen 1501da177e4SLinus Torvalds bio_put(bio); 1511da177e4SLinus Torvalds 152c897feb3SHeinz Mauelshagen dec_count(io, region, error); 1531da177e4SLinus Torvalds } 1541da177e4SLinus Torvalds 1551da177e4SLinus Torvalds /*----------------------------------------------------------------- 1561da177e4SLinus Torvalds * These little objects provide an abstraction for getting a new 1571da177e4SLinus Torvalds * destination page for io. 1581da177e4SLinus Torvalds *---------------------------------------------------------------*/ 1591da177e4SLinus Torvalds struct dpages { 1601da177e4SLinus Torvalds void (*get_page)(struct dpages *dp, 1611da177e4SLinus Torvalds struct page **p, unsigned long *len, unsigned *offset); 1621da177e4SLinus Torvalds void (*next_page)(struct dpages *dp); 1631da177e4SLinus Torvalds 1641da177e4SLinus Torvalds unsigned context_u; 1651da177e4SLinus Torvalds void *context_ptr; 166bb91bc7bSMikulas Patocka 167bb91bc7bSMikulas Patocka void *vma_invalidate_address; 168bb91bc7bSMikulas Patocka unsigned long vma_invalidate_size; 1691da177e4SLinus Torvalds }; 1701da177e4SLinus Torvalds 1711da177e4SLinus Torvalds /* 1721da177e4SLinus Torvalds * Functions for getting the pages from a list. 1731da177e4SLinus Torvalds */ 1741da177e4SLinus Torvalds static void list_get_page(struct dpages *dp, 1751da177e4SLinus Torvalds struct page **p, unsigned long *len, unsigned *offset) 1761da177e4SLinus Torvalds { 1771da177e4SLinus Torvalds unsigned o = dp->context_u; 1781da177e4SLinus Torvalds struct page_list *pl = (struct page_list *) dp->context_ptr; 1791da177e4SLinus Torvalds 1801da177e4SLinus Torvalds *p = pl->page; 1811da177e4SLinus Torvalds *len = PAGE_SIZE - o; 1821da177e4SLinus Torvalds *offset = o; 1831da177e4SLinus Torvalds } 1841da177e4SLinus Torvalds 1851da177e4SLinus Torvalds static void list_next_page(struct dpages *dp) 1861da177e4SLinus Torvalds { 1871da177e4SLinus Torvalds struct page_list *pl = (struct page_list *) dp->context_ptr; 1881da177e4SLinus Torvalds dp->context_ptr = pl->next; 1891da177e4SLinus Torvalds dp->context_u = 0; 1901da177e4SLinus Torvalds } 1911da177e4SLinus Torvalds 1921da177e4SLinus Torvalds static void list_dp_init(struct dpages *dp, struct page_list *pl, unsigned offset) 1931da177e4SLinus Torvalds { 1941da177e4SLinus Torvalds dp->get_page = list_get_page; 1951da177e4SLinus Torvalds dp->next_page = list_next_page; 1961da177e4SLinus Torvalds dp->context_u = offset; 1971da177e4SLinus Torvalds dp->context_ptr = pl; 1981da177e4SLinus Torvalds } 1991da177e4SLinus Torvalds 2001da177e4SLinus Torvalds /* 2011da177e4SLinus Torvalds * Functions for getting the pages from a bvec. 2021da177e4SLinus Torvalds */ 203d73f9907SMikulas Patocka static void bio_get_page(struct dpages *dp, struct page **p, 204d73f9907SMikulas Patocka unsigned long *len, unsigned *offset) 2051da177e4SLinus Torvalds { 206d73f9907SMikulas Patocka struct bio_vec *bvec = dp->context_ptr; 207d73f9907SMikulas Patocka *p = bvec->bv_page; 208d73f9907SMikulas Patocka *len = bvec->bv_len - dp->context_u; 209d73f9907SMikulas Patocka *offset = bvec->bv_offset + dp->context_u; 2101da177e4SLinus Torvalds } 2111da177e4SLinus Torvalds 212003b5c57SKent Overstreet static void bio_next_page(struct dpages *dp) 2131da177e4SLinus Torvalds { 214d73f9907SMikulas Patocka struct bio_vec *bvec = dp->context_ptr; 215d73f9907SMikulas Patocka dp->context_ptr = bvec + 1; 216d73f9907SMikulas Patocka dp->context_u = 0; 2171da177e4SLinus Torvalds } 2181da177e4SLinus Torvalds 219003b5c57SKent Overstreet static void bio_dp_init(struct dpages *dp, struct bio *bio) 2201da177e4SLinus Torvalds { 221003b5c57SKent Overstreet dp->get_page = bio_get_page; 222003b5c57SKent Overstreet dp->next_page = bio_next_page; 223d73f9907SMikulas Patocka dp->context_ptr = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter); 224d73f9907SMikulas Patocka dp->context_u = bio->bi_iter.bi_bvec_done; 2251da177e4SLinus Torvalds } 2261da177e4SLinus Torvalds 227c8b03afeSHeinz Mauelshagen /* 228c8b03afeSHeinz Mauelshagen * Functions for getting the pages from a VMA. 229c8b03afeSHeinz Mauelshagen */ 2301da177e4SLinus Torvalds static void vm_get_page(struct dpages *dp, 2311da177e4SLinus Torvalds struct page **p, unsigned long *len, unsigned *offset) 2321da177e4SLinus Torvalds { 2331da177e4SLinus Torvalds *p = vmalloc_to_page(dp->context_ptr); 2341da177e4SLinus Torvalds *offset = dp->context_u; 2351da177e4SLinus Torvalds *len = PAGE_SIZE - dp->context_u; 2361da177e4SLinus Torvalds } 2371da177e4SLinus Torvalds 2381da177e4SLinus Torvalds static void vm_next_page(struct dpages *dp) 2391da177e4SLinus Torvalds { 2401da177e4SLinus Torvalds dp->context_ptr += PAGE_SIZE - dp->context_u; 2411da177e4SLinus Torvalds dp->context_u = 0; 2421da177e4SLinus Torvalds } 2431da177e4SLinus Torvalds 2441da177e4SLinus Torvalds static void vm_dp_init(struct dpages *dp, void *data) 2451da177e4SLinus Torvalds { 2461da177e4SLinus Torvalds dp->get_page = vm_get_page; 2471da177e4SLinus Torvalds dp->next_page = vm_next_page; 2481da177e4SLinus Torvalds dp->context_u = ((unsigned long) data) & (PAGE_SIZE - 1); 2491da177e4SLinus Torvalds dp->context_ptr = data; 2501da177e4SLinus Torvalds } 2511da177e4SLinus Torvalds 252c8b03afeSHeinz Mauelshagen /* 253c8b03afeSHeinz Mauelshagen * Functions for getting the pages from kernel memory. 254c8b03afeSHeinz Mauelshagen */ 255c8b03afeSHeinz Mauelshagen static void km_get_page(struct dpages *dp, struct page **p, unsigned long *len, 256c8b03afeSHeinz Mauelshagen unsigned *offset) 257c8b03afeSHeinz Mauelshagen { 258c8b03afeSHeinz Mauelshagen *p = virt_to_page(dp->context_ptr); 259c8b03afeSHeinz Mauelshagen *offset = dp->context_u; 260c8b03afeSHeinz Mauelshagen *len = PAGE_SIZE - dp->context_u; 261c8b03afeSHeinz Mauelshagen } 262c8b03afeSHeinz Mauelshagen 263c8b03afeSHeinz Mauelshagen static void km_next_page(struct dpages *dp) 264c8b03afeSHeinz Mauelshagen { 265c8b03afeSHeinz Mauelshagen dp->context_ptr += PAGE_SIZE - dp->context_u; 266c8b03afeSHeinz Mauelshagen dp->context_u = 0; 267c8b03afeSHeinz Mauelshagen } 268c8b03afeSHeinz Mauelshagen 269c8b03afeSHeinz Mauelshagen static void km_dp_init(struct dpages *dp, void *data) 270c8b03afeSHeinz Mauelshagen { 271c8b03afeSHeinz Mauelshagen dp->get_page = km_get_page; 272c8b03afeSHeinz Mauelshagen dp->next_page = km_next_page; 273c8b03afeSHeinz Mauelshagen dp->context_u = ((unsigned long) data) & (PAGE_SIZE - 1); 274c8b03afeSHeinz Mauelshagen dp->context_ptr = data; 275c8b03afeSHeinz Mauelshagen } 276c8b03afeSHeinz Mauelshagen 2771da177e4SLinus Torvalds /*----------------------------------------------------------------- 2781da177e4SLinus Torvalds * IO routines that accept a list of pages. 2791da177e4SLinus Torvalds *---------------------------------------------------------------*/ 28022a1ceb1SHeinz Mauelshagen static void do_region(int rw, unsigned region, struct dm_io_region *where, 2811da177e4SLinus Torvalds struct dpages *dp, struct io *io) 2821da177e4SLinus Torvalds { 2831da177e4SLinus Torvalds struct bio *bio; 2841da177e4SLinus Torvalds struct page *page; 2851da177e4SLinus Torvalds unsigned long len; 2861da177e4SLinus Torvalds unsigned offset; 2871da177e4SLinus Torvalds unsigned num_bvecs; 2881da177e4SLinus Torvalds sector_t remaining = where->count; 2890c535e0dSMilan Broz struct request_queue *q = bdev_get_queue(where->bdev); 29070d6c400SMike Snitzer unsigned short logical_block_size = queue_logical_block_size(q); 29170d6c400SMike Snitzer sector_t num_sectors; 2921da177e4SLinus Torvalds 293*37527b86SDarrick J. Wong /* Reject unsupported discard requests */ 294*37527b86SDarrick J. Wong if ((rw & REQ_DISCARD) && !blk_queue_discard(q)) { 295*37527b86SDarrick J. Wong dec_count(io, region, -EOPNOTSUPP); 296*37527b86SDarrick J. Wong return; 297*37527b86SDarrick J. Wong } 298*37527b86SDarrick J. Wong 29912fc0f49SMikulas Patocka /* 300d87f4c14STejun Heo * where->count may be zero if rw holds a flush and we need to 301d87f4c14STejun Heo * send a zero-sized flush. 30212fc0f49SMikulas Patocka */ 30312fc0f49SMikulas Patocka do { 3041da177e4SLinus Torvalds /* 305f1e53987SMikulas Patocka * Allocate a suitably sized-bio. 3061da177e4SLinus Torvalds */ 30770d6c400SMike Snitzer if ((rw & REQ_DISCARD) || (rw & REQ_WRITE_SAME)) 3080c535e0dSMilan Broz num_bvecs = 1; 3090c535e0dSMilan Broz else 3100c535e0dSMilan Broz num_bvecs = min_t(int, bio_get_nr_vecs(where->bdev), 3110c535e0dSMilan Broz dm_sector_div_up(remaining, (PAGE_SIZE >> SECTOR_SHIFT))); 3120c535e0dSMilan Broz 313bf17ce3aSMilan Broz bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, io->client->bios); 3144f024f37SKent Overstreet bio->bi_iter.bi_sector = where->sector + (where->count - remaining); 3151da177e4SLinus Torvalds bio->bi_bdev = where->bdev; 3161da177e4SLinus Torvalds bio->bi_end_io = endio; 317f1e53987SMikulas Patocka store_io_and_region_in_bio(bio, io, region); 3181da177e4SLinus Torvalds 3190c535e0dSMilan Broz if (rw & REQ_DISCARD) { 32070d6c400SMike Snitzer num_sectors = min_t(sector_t, q->limits.max_discard_sectors, remaining); 3214f024f37SKent Overstreet bio->bi_iter.bi_size = num_sectors << SECTOR_SHIFT; 32270d6c400SMike Snitzer remaining -= num_sectors; 32370d6c400SMike Snitzer } else if (rw & REQ_WRITE_SAME) { 32470d6c400SMike Snitzer /* 32570d6c400SMike Snitzer * WRITE SAME only uses a single page. 32670d6c400SMike Snitzer */ 32770d6c400SMike Snitzer dp->get_page(dp, &page, &len, &offset); 32870d6c400SMike Snitzer bio_add_page(bio, page, logical_block_size, offset); 32970d6c400SMike Snitzer num_sectors = min_t(sector_t, q->limits.max_write_same_sectors, remaining); 3304f024f37SKent Overstreet bio->bi_iter.bi_size = num_sectors << SECTOR_SHIFT; 33170d6c400SMike Snitzer 33270d6c400SMike Snitzer offset = 0; 33370d6c400SMike Snitzer remaining -= num_sectors; 33470d6c400SMike Snitzer dp->next_page(dp); 3350c535e0dSMilan Broz } else while (remaining) { 3361da177e4SLinus Torvalds /* 3371da177e4SLinus Torvalds * Try and add as many pages as possible. 3381da177e4SLinus Torvalds */ 3391da177e4SLinus Torvalds dp->get_page(dp, &page, &len, &offset); 3401da177e4SLinus Torvalds len = min(len, to_bytes(remaining)); 3411da177e4SLinus Torvalds if (!bio_add_page(bio, page, len, offset)) 3421da177e4SLinus Torvalds break; 3431da177e4SLinus Torvalds 3441da177e4SLinus Torvalds offset = 0; 3451da177e4SLinus Torvalds remaining -= to_sector(len); 3461da177e4SLinus Torvalds dp->next_page(dp); 3471da177e4SLinus Torvalds } 3481da177e4SLinus Torvalds 3491da177e4SLinus Torvalds atomic_inc(&io->count); 3501da177e4SLinus Torvalds submit_bio(rw, bio); 35112fc0f49SMikulas Patocka } while (remaining); 3521da177e4SLinus Torvalds } 3531da177e4SLinus Torvalds 3541da177e4SLinus Torvalds static void dispatch_io(int rw, unsigned int num_regions, 35522a1ceb1SHeinz Mauelshagen struct dm_io_region *where, struct dpages *dp, 3561da177e4SLinus Torvalds struct io *io, int sync) 3571da177e4SLinus Torvalds { 3581da177e4SLinus Torvalds int i; 3591da177e4SLinus Torvalds struct dpages old_pages = *dp; 3601da177e4SLinus Torvalds 361f1e53987SMikulas Patocka BUG_ON(num_regions > DM_IO_MAX_REGIONS); 362f1e53987SMikulas Patocka 3631da177e4SLinus Torvalds if (sync) 364721a9602SJens Axboe rw |= REQ_SYNC; 3651da177e4SLinus Torvalds 3661da177e4SLinus Torvalds /* 3671da177e4SLinus Torvalds * For multiple regions we need to be careful to rewind 3681da177e4SLinus Torvalds * the dp object for each call to do_region. 3691da177e4SLinus Torvalds */ 3701da177e4SLinus Torvalds for (i = 0; i < num_regions; i++) { 3711da177e4SLinus Torvalds *dp = old_pages; 372d87f4c14STejun Heo if (where[i].count || (rw & REQ_FLUSH)) 3731da177e4SLinus Torvalds do_region(rw, i, where + i, dp, io); 3741da177e4SLinus Torvalds } 3751da177e4SLinus Torvalds 3761da177e4SLinus Torvalds /* 377f00b16adSHeinz Mauelshagen * Drop the extra reference that we were holding to avoid 3781da177e4SLinus Torvalds * the io being completed too early. 3791da177e4SLinus Torvalds */ 3801da177e4SLinus Torvalds dec_count(io, 0, 0); 3811da177e4SLinus Torvalds } 3821da177e4SLinus Torvalds 38397e7cdf1SJoe Thornber struct sync_io { 38497e7cdf1SJoe Thornber unsigned long error_bits; 38597e7cdf1SJoe Thornber struct completion wait; 38697e7cdf1SJoe Thornber }; 38797e7cdf1SJoe Thornber 38897e7cdf1SJoe Thornber static void sync_io_complete(unsigned long error, void *context) 38997e7cdf1SJoe Thornber { 39097e7cdf1SJoe Thornber struct sync_io *sio = context; 39197e7cdf1SJoe Thornber 39297e7cdf1SJoe Thornber sio->error_bits = error; 39397e7cdf1SJoe Thornber complete(&sio->wait); 39497e7cdf1SJoe Thornber } 39597e7cdf1SJoe Thornber 396891ce207SHeinz Mauelshagen static int sync_io(struct dm_io_client *client, unsigned int num_regions, 39722a1ceb1SHeinz Mauelshagen struct dm_io_region *where, int rw, struct dpages *dp, 398891ce207SHeinz Mauelshagen unsigned long *error_bits) 3991da177e4SLinus Torvalds { 40097e7cdf1SJoe Thornber struct io *io; 40197e7cdf1SJoe Thornber struct sync_io sio; 4021da177e4SLinus Torvalds 4037ff14a36SMikulas Patocka if (num_regions > 1 && (rw & RW_MASK) != WRITE) { 4041da177e4SLinus Torvalds WARN_ON(1); 4051da177e4SLinus Torvalds return -EIO; 4061da177e4SLinus Torvalds } 4071da177e4SLinus Torvalds 40897e7cdf1SJoe Thornber init_completion(&sio.wait); 40997e7cdf1SJoe Thornber 41097e7cdf1SJoe Thornber io = mempool_alloc(client->pool, GFP_NOIO); 411f1e53987SMikulas Patocka io->error_bits = 0; 412f1e53987SMikulas Patocka atomic_set(&io->count, 1); /* see dispatch_io() */ 413f1e53987SMikulas Patocka io->client = client; 41497e7cdf1SJoe Thornber io->callback = sync_io_complete; 41597e7cdf1SJoe Thornber io->context = &sio; 4161da177e4SLinus Torvalds 417bb91bc7bSMikulas Patocka io->vma_invalidate_address = dp->vma_invalidate_address; 418bb91bc7bSMikulas Patocka io->vma_invalidate_size = dp->vma_invalidate_size; 419bb91bc7bSMikulas Patocka 420f1e53987SMikulas Patocka dispatch_io(rw, num_regions, where, dp, io, 1); 4211da177e4SLinus Torvalds 42297e7cdf1SJoe Thornber wait_for_completion_io(&sio.wait); 4231da177e4SLinus Torvalds 424891ce207SHeinz Mauelshagen if (error_bits) 42597e7cdf1SJoe Thornber *error_bits = sio.error_bits; 426891ce207SHeinz Mauelshagen 42797e7cdf1SJoe Thornber return sio.error_bits ? -EIO : 0; 4281da177e4SLinus Torvalds } 4291da177e4SLinus Torvalds 430891ce207SHeinz Mauelshagen static int async_io(struct dm_io_client *client, unsigned int num_regions, 43122a1ceb1SHeinz Mauelshagen struct dm_io_region *where, int rw, struct dpages *dp, 432891ce207SHeinz Mauelshagen io_notify_fn fn, void *context) 4331da177e4SLinus Torvalds { 4341da177e4SLinus Torvalds struct io *io; 4351da177e4SLinus Torvalds 4367ff14a36SMikulas Patocka if (num_regions > 1 && (rw & RW_MASK) != WRITE) { 4371da177e4SLinus Torvalds WARN_ON(1); 4381da177e4SLinus Torvalds fn(1, context); 4391da177e4SLinus Torvalds return -EIO; 4401da177e4SLinus Torvalds } 4411da177e4SLinus Torvalds 442bf17ce3aSMilan Broz io = mempool_alloc(client->pool, GFP_NOIO); 443e01fd7eeSAlasdair G Kergon io->error_bits = 0; 4441da177e4SLinus Torvalds atomic_set(&io->count, 1); /* see dispatch_io() */ 445891ce207SHeinz Mauelshagen io->client = client; 4461da177e4SLinus Torvalds io->callback = fn; 4471da177e4SLinus Torvalds io->context = context; 4481da177e4SLinus Torvalds 449bb91bc7bSMikulas Patocka io->vma_invalidate_address = dp->vma_invalidate_address; 450bb91bc7bSMikulas Patocka io->vma_invalidate_size = dp->vma_invalidate_size; 451bb91bc7bSMikulas Patocka 4521da177e4SLinus Torvalds dispatch_io(rw, num_regions, where, dp, io, 0); 4531da177e4SLinus Torvalds return 0; 4541da177e4SLinus Torvalds } 4551da177e4SLinus Torvalds 456bb91bc7bSMikulas Patocka static int dp_init(struct dm_io_request *io_req, struct dpages *dp, 457bb91bc7bSMikulas Patocka unsigned long size) 458c8b03afeSHeinz Mauelshagen { 459c8b03afeSHeinz Mauelshagen /* Set up dpages based on memory type */ 460bb91bc7bSMikulas Patocka 461bb91bc7bSMikulas Patocka dp->vma_invalidate_address = NULL; 462bb91bc7bSMikulas Patocka dp->vma_invalidate_size = 0; 463bb91bc7bSMikulas Patocka 464c8b03afeSHeinz Mauelshagen switch (io_req->mem.type) { 465c8b03afeSHeinz Mauelshagen case DM_IO_PAGE_LIST: 466c8b03afeSHeinz Mauelshagen list_dp_init(dp, io_req->mem.ptr.pl, io_req->mem.offset); 467c8b03afeSHeinz Mauelshagen break; 468c8b03afeSHeinz Mauelshagen 469003b5c57SKent Overstreet case DM_IO_BIO: 470003b5c57SKent Overstreet bio_dp_init(dp, io_req->mem.ptr.bio); 471c8b03afeSHeinz Mauelshagen break; 472c8b03afeSHeinz Mauelshagen 473c8b03afeSHeinz Mauelshagen case DM_IO_VMA: 474bb91bc7bSMikulas Patocka flush_kernel_vmap_range(io_req->mem.ptr.vma, size); 475bb91bc7bSMikulas Patocka if ((io_req->bi_rw & RW_MASK) == READ) { 476bb91bc7bSMikulas Patocka dp->vma_invalidate_address = io_req->mem.ptr.vma; 477bb91bc7bSMikulas Patocka dp->vma_invalidate_size = size; 478bb91bc7bSMikulas Patocka } 479c8b03afeSHeinz Mauelshagen vm_dp_init(dp, io_req->mem.ptr.vma); 480c8b03afeSHeinz Mauelshagen break; 481c8b03afeSHeinz Mauelshagen 482c8b03afeSHeinz Mauelshagen case DM_IO_KMEM: 483c8b03afeSHeinz Mauelshagen km_dp_init(dp, io_req->mem.ptr.addr); 484c8b03afeSHeinz Mauelshagen break; 485c8b03afeSHeinz Mauelshagen 486c8b03afeSHeinz Mauelshagen default: 487c8b03afeSHeinz Mauelshagen return -EINVAL; 488c8b03afeSHeinz Mauelshagen } 489c8b03afeSHeinz Mauelshagen 490c8b03afeSHeinz Mauelshagen return 0; 491c8b03afeSHeinz Mauelshagen } 492c8b03afeSHeinz Mauelshagen 493c8b03afeSHeinz Mauelshagen /* 4947ff14a36SMikulas Patocka * New collapsed (a)synchronous interface. 4957ff14a36SMikulas Patocka * 4967ff14a36SMikulas Patocka * If the IO is asynchronous (i.e. it has notify.fn), you must either unplug 49797e7cdf1SJoe Thornber * the queue with blk_unplug() some time later or set REQ_SYNC in io_req->bi_rw. 49897e7cdf1SJoe Thornber * If you fail to do one of these, the IO will be submitted to the disk after 49997e7cdf1SJoe Thornber * q->unplug_delay, which defaults to 3ms in blk-settings.c. 500c8b03afeSHeinz Mauelshagen */ 501c8b03afeSHeinz Mauelshagen int dm_io(struct dm_io_request *io_req, unsigned num_regions, 50222a1ceb1SHeinz Mauelshagen struct dm_io_region *where, unsigned long *sync_error_bits) 503c8b03afeSHeinz Mauelshagen { 504c8b03afeSHeinz Mauelshagen int r; 505c8b03afeSHeinz Mauelshagen struct dpages dp; 506c8b03afeSHeinz Mauelshagen 507bb91bc7bSMikulas Patocka r = dp_init(io_req, &dp, (unsigned long)where->count << SECTOR_SHIFT); 508c8b03afeSHeinz Mauelshagen if (r) 509c8b03afeSHeinz Mauelshagen return r; 510c8b03afeSHeinz Mauelshagen 511c8b03afeSHeinz Mauelshagen if (!io_req->notify.fn) 512c8b03afeSHeinz Mauelshagen return sync_io(io_req->client, num_regions, where, 513c8b03afeSHeinz Mauelshagen io_req->bi_rw, &dp, sync_error_bits); 514c8b03afeSHeinz Mauelshagen 515c8b03afeSHeinz Mauelshagen return async_io(io_req->client, num_regions, where, io_req->bi_rw, 516c8b03afeSHeinz Mauelshagen &dp, io_req->notify.fn, io_req->notify.context); 517c8b03afeSHeinz Mauelshagen } 518c8b03afeSHeinz Mauelshagen EXPORT_SYMBOL(dm_io); 519952b3557SMikulas Patocka 520952b3557SMikulas Patocka int __init dm_io_init(void) 521952b3557SMikulas Patocka { 522952b3557SMikulas Patocka _dm_io_cache = KMEM_CACHE(io, 0); 523952b3557SMikulas Patocka if (!_dm_io_cache) 524952b3557SMikulas Patocka return -ENOMEM; 525952b3557SMikulas Patocka 526952b3557SMikulas Patocka return 0; 527952b3557SMikulas Patocka } 528952b3557SMikulas Patocka 529952b3557SMikulas Patocka void dm_io_exit(void) 530952b3557SMikulas Patocka { 531952b3557SMikulas Patocka kmem_cache_destroy(_dm_io_cache); 532952b3557SMikulas Patocka _dm_io_cache = NULL; 533952b3557SMikulas Patocka } 534