11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * Copyright (C) 2003 Sistina Software 3891ce207SHeinz Mauelshagen * Copyright (C) 2006 Red Hat GmbH 41da177e4SLinus Torvalds * 51da177e4SLinus Torvalds * This file is released under the GPL. 61da177e4SLinus Torvalds */ 71da177e4SLinus Torvalds 8952b3557SMikulas Patocka #include "dm.h" 9952b3557SMikulas Patocka 10586e80e6SMikulas Patocka #include <linux/device-mapper.h> 111da177e4SLinus Torvalds 121da177e4SLinus Torvalds #include <linux/bio.h> 1310f1d5d1SJoe Thornber #include <linux/completion.h> 141da177e4SLinus Torvalds #include <linux/mempool.h> 151da177e4SLinus Torvalds #include <linux/module.h> 161da177e4SLinus Torvalds #include <linux/sched.h> 171da177e4SLinus Torvalds #include <linux/slab.h> 18a765e20eSAlasdair G Kergon #include <linux/dm-io.h> 191da177e4SLinus Torvalds 20f1e53987SMikulas Patocka #define DM_MSG_PREFIX "io" 21f1e53987SMikulas Patocka 22f1e53987SMikulas Patocka #define DM_IO_MAX_REGIONS BITS_PER_LONG 23f1e53987SMikulas Patocka 24891ce207SHeinz Mauelshagen struct dm_io_client { 25891ce207SHeinz Mauelshagen mempool_t *pool; 26891ce207SHeinz Mauelshagen struct bio_set *bios; 27891ce207SHeinz Mauelshagen }; 28891ce207SHeinz Mauelshagen 29f1e53987SMikulas Patocka /* 30f1e53987SMikulas Patocka * Aligning 'struct io' reduces the number of bits required to store 31f1e53987SMikulas Patocka * its address. Refer to store_io_and_region_in_bio() below. 32f1e53987SMikulas Patocka */ 331da177e4SLinus Torvalds struct io { 34e01fd7eeSAlasdair G Kergon unsigned long error_bits; 351da177e4SLinus Torvalds atomic_t count; 36891ce207SHeinz Mauelshagen struct dm_io_client *client; 371da177e4SLinus Torvalds io_notify_fn callback; 381da177e4SLinus Torvalds void *context; 39bb91bc7bSMikulas Patocka void *vma_invalidate_address; 40bb91bc7bSMikulas Patocka unsigned long vma_invalidate_size; 41f1e53987SMikulas Patocka } __attribute__((aligned(DM_IO_MAX_REGIONS))); 421da177e4SLinus Torvalds 43952b3557SMikulas Patocka static struct kmem_cache *_dm_io_cache; 44952b3557SMikulas Patocka 451da177e4SLinus Torvalds /* 46c8b03afeSHeinz Mauelshagen * Create a client with mempool and bioset. 47c8b03afeSHeinz Mauelshagen */ 48bda8efecSMikulas Patocka struct dm_io_client *dm_io_client_create(void) 49c8b03afeSHeinz Mauelshagen { 50c8b03afeSHeinz Mauelshagen struct dm_io_client *client; 51e8603136SMike Snitzer unsigned min_ios = dm_get_reserved_bio_based_ios(); 52c8b03afeSHeinz Mauelshagen 53c8b03afeSHeinz Mauelshagen client = kmalloc(sizeof(*client), GFP_KERNEL); 54c8b03afeSHeinz Mauelshagen if (!client) 55c8b03afeSHeinz Mauelshagen return ERR_PTR(-ENOMEM); 56c8b03afeSHeinz Mauelshagen 57e8603136SMike Snitzer client->pool = mempool_create_slab_pool(min_ios, _dm_io_cache); 58c8b03afeSHeinz Mauelshagen if (!client->pool) 59c8b03afeSHeinz Mauelshagen goto bad; 60c8b03afeSHeinz Mauelshagen 61e8603136SMike Snitzer client->bios = bioset_create(min_ios, 0); 62c8b03afeSHeinz Mauelshagen if (!client->bios) 63c8b03afeSHeinz Mauelshagen goto bad; 64c8b03afeSHeinz Mauelshagen 65c8b03afeSHeinz Mauelshagen return client; 66c8b03afeSHeinz Mauelshagen 67c8b03afeSHeinz Mauelshagen bad: 68c8b03afeSHeinz Mauelshagen if (client->pool) 69c8b03afeSHeinz Mauelshagen mempool_destroy(client->pool); 70c8b03afeSHeinz Mauelshagen kfree(client); 71c8b03afeSHeinz Mauelshagen return ERR_PTR(-ENOMEM); 72c8b03afeSHeinz Mauelshagen } 73c8b03afeSHeinz Mauelshagen EXPORT_SYMBOL(dm_io_client_create); 74c8b03afeSHeinz Mauelshagen 75c8b03afeSHeinz Mauelshagen void dm_io_client_destroy(struct dm_io_client *client) 76c8b03afeSHeinz Mauelshagen { 77c8b03afeSHeinz Mauelshagen mempool_destroy(client->pool); 78c8b03afeSHeinz Mauelshagen bioset_free(client->bios); 79c8b03afeSHeinz Mauelshagen kfree(client); 80c8b03afeSHeinz Mauelshagen } 81c8b03afeSHeinz Mauelshagen EXPORT_SYMBOL(dm_io_client_destroy); 82c8b03afeSHeinz Mauelshagen 831da177e4SLinus Torvalds /*----------------------------------------------------------------- 841da177e4SLinus Torvalds * We need to keep track of which region a bio is doing io for. 85f1e53987SMikulas Patocka * To avoid a memory allocation to store just 5 or 6 bits, we 86f1e53987SMikulas Patocka * ensure the 'struct io' pointer is aligned so enough low bits are 87f1e53987SMikulas Patocka * always zero and then combine it with the region number directly in 88f1e53987SMikulas Patocka * bi_private. 891da177e4SLinus Torvalds *---------------------------------------------------------------*/ 90f1e53987SMikulas Patocka static void store_io_and_region_in_bio(struct bio *bio, struct io *io, 91f1e53987SMikulas Patocka unsigned region) 921da177e4SLinus Torvalds { 93f1e53987SMikulas Patocka if (unlikely(!IS_ALIGNED((unsigned long)io, DM_IO_MAX_REGIONS))) { 94f1e53987SMikulas Patocka DMCRIT("Unaligned struct io pointer %p", io); 95f1e53987SMikulas Patocka BUG(); 961da177e4SLinus Torvalds } 971da177e4SLinus Torvalds 98f1e53987SMikulas Patocka bio->bi_private = (void *)((unsigned long)io | region); 99f1e53987SMikulas Patocka } 100f1e53987SMikulas Patocka 101f1e53987SMikulas Patocka static void retrieve_io_and_region_from_bio(struct bio *bio, struct io **io, 102f1e53987SMikulas Patocka unsigned *region) 1031da177e4SLinus Torvalds { 104f1e53987SMikulas Patocka unsigned long val = (unsigned long)bio->bi_private; 105f1e53987SMikulas Patocka 106f1e53987SMikulas Patocka *io = (void *)(val & -(unsigned long)DM_IO_MAX_REGIONS); 107f1e53987SMikulas Patocka *region = val & (DM_IO_MAX_REGIONS - 1); 1081da177e4SLinus Torvalds } 1091da177e4SLinus Torvalds 1101da177e4SLinus Torvalds /*----------------------------------------------------------------- 1111da177e4SLinus Torvalds * We need an io object to keep track of the number of bios that 1121da177e4SLinus Torvalds * have been dispatched for a particular io. 1131da177e4SLinus Torvalds *---------------------------------------------------------------*/ 11497e7cdf1SJoe Thornber static void complete_io(struct io *io) 11597e7cdf1SJoe Thornber { 11697e7cdf1SJoe Thornber unsigned long error_bits = io->error_bits; 11797e7cdf1SJoe Thornber io_notify_fn fn = io->callback; 11897e7cdf1SJoe Thornber void *context = io->context; 11997e7cdf1SJoe Thornber 12097e7cdf1SJoe Thornber if (io->vma_invalidate_size) 12197e7cdf1SJoe Thornber invalidate_kernel_vmap_range(io->vma_invalidate_address, 12297e7cdf1SJoe Thornber io->vma_invalidate_size); 12397e7cdf1SJoe Thornber 12497e7cdf1SJoe Thornber mempool_free(io, io->client->pool); 12597e7cdf1SJoe Thornber fn(error_bits, context); 12697e7cdf1SJoe Thornber } 12797e7cdf1SJoe Thornber 1281da177e4SLinus Torvalds static void dec_count(struct io *io, unsigned int region, int error) 1291da177e4SLinus Torvalds { 130d87f4c14STejun Heo if (error) 131e01fd7eeSAlasdair G Kergon set_bit(region, &io->error_bits); 1321da177e4SLinus Torvalds 13397e7cdf1SJoe Thornber if (atomic_dec_and_test(&io->count)) 13497e7cdf1SJoe Thornber complete_io(io); 1351da177e4SLinus Torvalds } 1361da177e4SLinus Torvalds 1374246a0b6SChristoph Hellwig static void endio(struct bio *bio) 1381da177e4SLinus Torvalds { 139c897feb3SHeinz Mauelshagen struct io *io; 140c897feb3SHeinz Mauelshagen unsigned region; 141*9b81c842SSasha Levin int error; 1421da177e4SLinus Torvalds 1434246a0b6SChristoph Hellwig if (bio->bi_error && bio_data_dir(bio) == READ) 1441da177e4SLinus Torvalds zero_fill_bio(bio); 1451da177e4SLinus Torvalds 146c897feb3SHeinz Mauelshagen /* 147c897feb3SHeinz Mauelshagen * The bio destructor in bio_put() may use the io object. 148c897feb3SHeinz Mauelshagen */ 149f1e53987SMikulas Patocka retrieve_io_and_region_from_bio(bio, &io, ®ion); 150c897feb3SHeinz Mauelshagen 151*9b81c842SSasha Levin error = bio->bi_error; 1521da177e4SLinus Torvalds bio_put(bio); 1531da177e4SLinus Torvalds 154*9b81c842SSasha Levin dec_count(io, region, error); 1551da177e4SLinus Torvalds } 1561da177e4SLinus Torvalds 1571da177e4SLinus Torvalds /*----------------------------------------------------------------- 1581da177e4SLinus Torvalds * These little objects provide an abstraction for getting a new 1591da177e4SLinus Torvalds * destination page for io. 1601da177e4SLinus Torvalds *---------------------------------------------------------------*/ 1611da177e4SLinus Torvalds struct dpages { 1621da177e4SLinus Torvalds void (*get_page)(struct dpages *dp, 1631da177e4SLinus Torvalds struct page **p, unsigned long *len, unsigned *offset); 1641da177e4SLinus Torvalds void (*next_page)(struct dpages *dp); 1651da177e4SLinus Torvalds 1661da177e4SLinus Torvalds unsigned context_u; 1671da177e4SLinus Torvalds void *context_ptr; 168bb91bc7bSMikulas Patocka 169bb91bc7bSMikulas Patocka void *vma_invalidate_address; 170bb91bc7bSMikulas Patocka unsigned long vma_invalidate_size; 1711da177e4SLinus Torvalds }; 1721da177e4SLinus Torvalds 1731da177e4SLinus Torvalds /* 1741da177e4SLinus Torvalds * Functions for getting the pages from a list. 1751da177e4SLinus Torvalds */ 1761da177e4SLinus Torvalds static void list_get_page(struct dpages *dp, 1771da177e4SLinus Torvalds struct page **p, unsigned long *len, unsigned *offset) 1781da177e4SLinus Torvalds { 1791da177e4SLinus Torvalds unsigned o = dp->context_u; 1801da177e4SLinus Torvalds struct page_list *pl = (struct page_list *) dp->context_ptr; 1811da177e4SLinus Torvalds 1821da177e4SLinus Torvalds *p = pl->page; 1831da177e4SLinus Torvalds *len = PAGE_SIZE - o; 1841da177e4SLinus Torvalds *offset = o; 1851da177e4SLinus Torvalds } 1861da177e4SLinus Torvalds 1871da177e4SLinus Torvalds static void list_next_page(struct dpages *dp) 1881da177e4SLinus Torvalds { 1891da177e4SLinus Torvalds struct page_list *pl = (struct page_list *) dp->context_ptr; 1901da177e4SLinus Torvalds dp->context_ptr = pl->next; 1911da177e4SLinus Torvalds dp->context_u = 0; 1921da177e4SLinus Torvalds } 1931da177e4SLinus Torvalds 1941da177e4SLinus Torvalds static void list_dp_init(struct dpages *dp, struct page_list *pl, unsigned offset) 1951da177e4SLinus Torvalds { 1961da177e4SLinus Torvalds dp->get_page = list_get_page; 1971da177e4SLinus Torvalds dp->next_page = list_next_page; 1981da177e4SLinus Torvalds dp->context_u = offset; 1991da177e4SLinus Torvalds dp->context_ptr = pl; 2001da177e4SLinus Torvalds } 2011da177e4SLinus Torvalds 2021da177e4SLinus Torvalds /* 2031da177e4SLinus Torvalds * Functions for getting the pages from a bvec. 2041da177e4SLinus Torvalds */ 205d73f9907SMikulas Patocka static void bio_get_page(struct dpages *dp, struct page **p, 206d73f9907SMikulas Patocka unsigned long *len, unsigned *offset) 2071da177e4SLinus Torvalds { 208d73f9907SMikulas Patocka struct bio_vec *bvec = dp->context_ptr; 209d73f9907SMikulas Patocka *p = bvec->bv_page; 210d73f9907SMikulas Patocka *len = bvec->bv_len - dp->context_u; 211d73f9907SMikulas Patocka *offset = bvec->bv_offset + dp->context_u; 2121da177e4SLinus Torvalds } 2131da177e4SLinus Torvalds 214003b5c57SKent Overstreet static void bio_next_page(struct dpages *dp) 2151da177e4SLinus Torvalds { 216d73f9907SMikulas Patocka struct bio_vec *bvec = dp->context_ptr; 217d73f9907SMikulas Patocka dp->context_ptr = bvec + 1; 218d73f9907SMikulas Patocka dp->context_u = 0; 2191da177e4SLinus Torvalds } 2201da177e4SLinus Torvalds 221003b5c57SKent Overstreet static void bio_dp_init(struct dpages *dp, struct bio *bio) 2221da177e4SLinus Torvalds { 223003b5c57SKent Overstreet dp->get_page = bio_get_page; 224003b5c57SKent Overstreet dp->next_page = bio_next_page; 225d73f9907SMikulas Patocka dp->context_ptr = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter); 226d73f9907SMikulas Patocka dp->context_u = bio->bi_iter.bi_bvec_done; 2271da177e4SLinus Torvalds } 2281da177e4SLinus Torvalds 229c8b03afeSHeinz Mauelshagen /* 230c8b03afeSHeinz Mauelshagen * Functions for getting the pages from a VMA. 231c8b03afeSHeinz Mauelshagen */ 2321da177e4SLinus Torvalds static void vm_get_page(struct dpages *dp, 2331da177e4SLinus Torvalds struct page **p, unsigned long *len, unsigned *offset) 2341da177e4SLinus Torvalds { 2351da177e4SLinus Torvalds *p = vmalloc_to_page(dp->context_ptr); 2361da177e4SLinus Torvalds *offset = dp->context_u; 2371da177e4SLinus Torvalds *len = PAGE_SIZE - dp->context_u; 2381da177e4SLinus Torvalds } 2391da177e4SLinus Torvalds 2401da177e4SLinus Torvalds static void vm_next_page(struct dpages *dp) 2411da177e4SLinus Torvalds { 2421da177e4SLinus Torvalds dp->context_ptr += PAGE_SIZE - dp->context_u; 2431da177e4SLinus Torvalds dp->context_u = 0; 2441da177e4SLinus Torvalds } 2451da177e4SLinus Torvalds 2461da177e4SLinus Torvalds static void vm_dp_init(struct dpages *dp, void *data) 2471da177e4SLinus Torvalds { 2481da177e4SLinus Torvalds dp->get_page = vm_get_page; 2491da177e4SLinus Torvalds dp->next_page = vm_next_page; 2501da177e4SLinus Torvalds dp->context_u = ((unsigned long) data) & (PAGE_SIZE - 1); 2511da177e4SLinus Torvalds dp->context_ptr = data; 2521da177e4SLinus Torvalds } 2531da177e4SLinus Torvalds 254c8b03afeSHeinz Mauelshagen /* 255c8b03afeSHeinz Mauelshagen * Functions for getting the pages from kernel memory. 256c8b03afeSHeinz Mauelshagen */ 257c8b03afeSHeinz Mauelshagen static void km_get_page(struct dpages *dp, struct page **p, unsigned long *len, 258c8b03afeSHeinz Mauelshagen unsigned *offset) 259c8b03afeSHeinz Mauelshagen { 260c8b03afeSHeinz Mauelshagen *p = virt_to_page(dp->context_ptr); 261c8b03afeSHeinz Mauelshagen *offset = dp->context_u; 262c8b03afeSHeinz Mauelshagen *len = PAGE_SIZE - dp->context_u; 263c8b03afeSHeinz Mauelshagen } 264c8b03afeSHeinz Mauelshagen 265c8b03afeSHeinz Mauelshagen static void km_next_page(struct dpages *dp) 266c8b03afeSHeinz Mauelshagen { 267c8b03afeSHeinz Mauelshagen dp->context_ptr += PAGE_SIZE - dp->context_u; 268c8b03afeSHeinz Mauelshagen dp->context_u = 0; 269c8b03afeSHeinz Mauelshagen } 270c8b03afeSHeinz Mauelshagen 271c8b03afeSHeinz Mauelshagen static void km_dp_init(struct dpages *dp, void *data) 272c8b03afeSHeinz Mauelshagen { 273c8b03afeSHeinz Mauelshagen dp->get_page = km_get_page; 274c8b03afeSHeinz Mauelshagen dp->next_page = km_next_page; 275c8b03afeSHeinz Mauelshagen dp->context_u = ((unsigned long) data) & (PAGE_SIZE - 1); 276c8b03afeSHeinz Mauelshagen dp->context_ptr = data; 277c8b03afeSHeinz Mauelshagen } 278c8b03afeSHeinz Mauelshagen 2791da177e4SLinus Torvalds /*----------------------------------------------------------------- 2801da177e4SLinus Torvalds * IO routines that accept a list of pages. 2811da177e4SLinus Torvalds *---------------------------------------------------------------*/ 28222a1ceb1SHeinz Mauelshagen static void do_region(int rw, unsigned region, struct dm_io_region *where, 2831da177e4SLinus Torvalds struct dpages *dp, struct io *io) 2841da177e4SLinus Torvalds { 2851da177e4SLinus Torvalds struct bio *bio; 2861da177e4SLinus Torvalds struct page *page; 2871da177e4SLinus Torvalds unsigned long len; 2881da177e4SLinus Torvalds unsigned offset; 2891da177e4SLinus Torvalds unsigned num_bvecs; 2901da177e4SLinus Torvalds sector_t remaining = where->count; 2910c535e0dSMilan Broz struct request_queue *q = bdev_get_queue(where->bdev); 29270d6c400SMike Snitzer unsigned short logical_block_size = queue_logical_block_size(q); 29370d6c400SMike Snitzer sector_t num_sectors; 294e5db2980SDarrick J. Wong unsigned int uninitialized_var(special_cmd_max_sectors); 2951da177e4SLinus Torvalds 296e5db2980SDarrick J. Wong /* 297e5db2980SDarrick J. Wong * Reject unsupported discard and write same requests. 298e5db2980SDarrick J. Wong */ 299e5db2980SDarrick J. Wong if (rw & REQ_DISCARD) 300e5db2980SDarrick J. Wong special_cmd_max_sectors = q->limits.max_discard_sectors; 301e5db2980SDarrick J. Wong else if (rw & REQ_WRITE_SAME) 302e5db2980SDarrick J. Wong special_cmd_max_sectors = q->limits.max_write_same_sectors; 303e5db2980SDarrick J. Wong if ((rw & (REQ_DISCARD | REQ_WRITE_SAME)) && special_cmd_max_sectors == 0) { 30437527b86SDarrick J. Wong dec_count(io, region, -EOPNOTSUPP); 30537527b86SDarrick J. Wong return; 30637527b86SDarrick J. Wong } 30737527b86SDarrick J. Wong 30812fc0f49SMikulas Patocka /* 309d87f4c14STejun Heo * where->count may be zero if rw holds a flush and we need to 310d87f4c14STejun Heo * send a zero-sized flush. 31112fc0f49SMikulas Patocka */ 31212fc0f49SMikulas Patocka do { 3131da177e4SLinus Torvalds /* 314f1e53987SMikulas Patocka * Allocate a suitably sized-bio. 3151da177e4SLinus Torvalds */ 31670d6c400SMike Snitzer if ((rw & REQ_DISCARD) || (rw & REQ_WRITE_SAME)) 3170c535e0dSMilan Broz num_bvecs = 1; 3180c535e0dSMilan Broz else 3190c535e0dSMilan Broz num_bvecs = min_t(int, bio_get_nr_vecs(where->bdev), 3200c535e0dSMilan Broz dm_sector_div_up(remaining, (PAGE_SIZE >> SECTOR_SHIFT))); 3210c535e0dSMilan Broz 322bf17ce3aSMilan Broz bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, io->client->bios); 3234f024f37SKent Overstreet bio->bi_iter.bi_sector = where->sector + (where->count - remaining); 3241da177e4SLinus Torvalds bio->bi_bdev = where->bdev; 3251da177e4SLinus Torvalds bio->bi_end_io = endio; 326f1e53987SMikulas Patocka store_io_and_region_in_bio(bio, io, region); 3271da177e4SLinus Torvalds 3280c535e0dSMilan Broz if (rw & REQ_DISCARD) { 329e5db2980SDarrick J. Wong num_sectors = min_t(sector_t, special_cmd_max_sectors, remaining); 3304f024f37SKent Overstreet bio->bi_iter.bi_size = num_sectors << SECTOR_SHIFT; 33170d6c400SMike Snitzer remaining -= num_sectors; 33270d6c400SMike Snitzer } else if (rw & REQ_WRITE_SAME) { 33370d6c400SMike Snitzer /* 33470d6c400SMike Snitzer * WRITE SAME only uses a single page. 33570d6c400SMike Snitzer */ 33670d6c400SMike Snitzer dp->get_page(dp, &page, &len, &offset); 33770d6c400SMike Snitzer bio_add_page(bio, page, logical_block_size, offset); 338e5db2980SDarrick J. Wong num_sectors = min_t(sector_t, special_cmd_max_sectors, remaining); 3394f024f37SKent Overstreet bio->bi_iter.bi_size = num_sectors << SECTOR_SHIFT; 34070d6c400SMike Snitzer 34170d6c400SMike Snitzer offset = 0; 34270d6c400SMike Snitzer remaining -= num_sectors; 34370d6c400SMike Snitzer dp->next_page(dp); 3440c535e0dSMilan Broz } else while (remaining) { 3451da177e4SLinus Torvalds /* 3461da177e4SLinus Torvalds * Try and add as many pages as possible. 3471da177e4SLinus Torvalds */ 3481da177e4SLinus Torvalds dp->get_page(dp, &page, &len, &offset); 3491da177e4SLinus Torvalds len = min(len, to_bytes(remaining)); 3501da177e4SLinus Torvalds if (!bio_add_page(bio, page, len, offset)) 3511da177e4SLinus Torvalds break; 3521da177e4SLinus Torvalds 3531da177e4SLinus Torvalds offset = 0; 3541da177e4SLinus Torvalds remaining -= to_sector(len); 3551da177e4SLinus Torvalds dp->next_page(dp); 3561da177e4SLinus Torvalds } 3571da177e4SLinus Torvalds 3581da177e4SLinus Torvalds atomic_inc(&io->count); 3591da177e4SLinus Torvalds submit_bio(rw, bio); 36012fc0f49SMikulas Patocka } while (remaining); 3611da177e4SLinus Torvalds } 3621da177e4SLinus Torvalds 3631da177e4SLinus Torvalds static void dispatch_io(int rw, unsigned int num_regions, 36422a1ceb1SHeinz Mauelshagen struct dm_io_region *where, struct dpages *dp, 3651da177e4SLinus Torvalds struct io *io, int sync) 3661da177e4SLinus Torvalds { 3671da177e4SLinus Torvalds int i; 3681da177e4SLinus Torvalds struct dpages old_pages = *dp; 3691da177e4SLinus Torvalds 370f1e53987SMikulas Patocka BUG_ON(num_regions > DM_IO_MAX_REGIONS); 371f1e53987SMikulas Patocka 3721da177e4SLinus Torvalds if (sync) 373721a9602SJens Axboe rw |= REQ_SYNC; 3741da177e4SLinus Torvalds 3751da177e4SLinus Torvalds /* 3761da177e4SLinus Torvalds * For multiple regions we need to be careful to rewind 3771da177e4SLinus Torvalds * the dp object for each call to do_region. 3781da177e4SLinus Torvalds */ 3791da177e4SLinus Torvalds for (i = 0; i < num_regions; i++) { 3801da177e4SLinus Torvalds *dp = old_pages; 381d87f4c14STejun Heo if (where[i].count || (rw & REQ_FLUSH)) 3821da177e4SLinus Torvalds do_region(rw, i, where + i, dp, io); 3831da177e4SLinus Torvalds } 3841da177e4SLinus Torvalds 3851da177e4SLinus Torvalds /* 386f00b16adSHeinz Mauelshagen * Drop the extra reference that we were holding to avoid 3871da177e4SLinus Torvalds * the io being completed too early. 3881da177e4SLinus Torvalds */ 3891da177e4SLinus Torvalds dec_count(io, 0, 0); 3901da177e4SLinus Torvalds } 3911da177e4SLinus Torvalds 39297e7cdf1SJoe Thornber struct sync_io { 39397e7cdf1SJoe Thornber unsigned long error_bits; 39497e7cdf1SJoe Thornber struct completion wait; 39597e7cdf1SJoe Thornber }; 39697e7cdf1SJoe Thornber 39797e7cdf1SJoe Thornber static void sync_io_complete(unsigned long error, void *context) 39897e7cdf1SJoe Thornber { 39997e7cdf1SJoe Thornber struct sync_io *sio = context; 40097e7cdf1SJoe Thornber 40197e7cdf1SJoe Thornber sio->error_bits = error; 40297e7cdf1SJoe Thornber complete(&sio->wait); 40397e7cdf1SJoe Thornber } 40497e7cdf1SJoe Thornber 405891ce207SHeinz Mauelshagen static int sync_io(struct dm_io_client *client, unsigned int num_regions, 40622a1ceb1SHeinz Mauelshagen struct dm_io_region *where, int rw, struct dpages *dp, 407891ce207SHeinz Mauelshagen unsigned long *error_bits) 4081da177e4SLinus Torvalds { 40997e7cdf1SJoe Thornber struct io *io; 41097e7cdf1SJoe Thornber struct sync_io sio; 4111da177e4SLinus Torvalds 4127ff14a36SMikulas Patocka if (num_regions > 1 && (rw & RW_MASK) != WRITE) { 4131da177e4SLinus Torvalds WARN_ON(1); 4141da177e4SLinus Torvalds return -EIO; 4151da177e4SLinus Torvalds } 4161da177e4SLinus Torvalds 41797e7cdf1SJoe Thornber init_completion(&sio.wait); 41897e7cdf1SJoe Thornber 41997e7cdf1SJoe Thornber io = mempool_alloc(client->pool, GFP_NOIO); 420f1e53987SMikulas Patocka io->error_bits = 0; 421f1e53987SMikulas Patocka atomic_set(&io->count, 1); /* see dispatch_io() */ 422f1e53987SMikulas Patocka io->client = client; 42397e7cdf1SJoe Thornber io->callback = sync_io_complete; 42497e7cdf1SJoe Thornber io->context = &sio; 4251da177e4SLinus Torvalds 426bb91bc7bSMikulas Patocka io->vma_invalidate_address = dp->vma_invalidate_address; 427bb91bc7bSMikulas Patocka io->vma_invalidate_size = dp->vma_invalidate_size; 428bb91bc7bSMikulas Patocka 429f1e53987SMikulas Patocka dispatch_io(rw, num_regions, where, dp, io, 1); 4301da177e4SLinus Torvalds 43197e7cdf1SJoe Thornber wait_for_completion_io(&sio.wait); 4321da177e4SLinus Torvalds 433891ce207SHeinz Mauelshagen if (error_bits) 43497e7cdf1SJoe Thornber *error_bits = sio.error_bits; 435891ce207SHeinz Mauelshagen 43697e7cdf1SJoe Thornber return sio.error_bits ? -EIO : 0; 4371da177e4SLinus Torvalds } 4381da177e4SLinus Torvalds 439891ce207SHeinz Mauelshagen static int async_io(struct dm_io_client *client, unsigned int num_regions, 44022a1ceb1SHeinz Mauelshagen struct dm_io_region *where, int rw, struct dpages *dp, 441891ce207SHeinz Mauelshagen io_notify_fn fn, void *context) 4421da177e4SLinus Torvalds { 4431da177e4SLinus Torvalds struct io *io; 4441da177e4SLinus Torvalds 4457ff14a36SMikulas Patocka if (num_regions > 1 && (rw & RW_MASK) != WRITE) { 4461da177e4SLinus Torvalds WARN_ON(1); 4471da177e4SLinus Torvalds fn(1, context); 4481da177e4SLinus Torvalds return -EIO; 4491da177e4SLinus Torvalds } 4501da177e4SLinus Torvalds 451bf17ce3aSMilan Broz io = mempool_alloc(client->pool, GFP_NOIO); 452e01fd7eeSAlasdair G Kergon io->error_bits = 0; 4531da177e4SLinus Torvalds atomic_set(&io->count, 1); /* see dispatch_io() */ 454891ce207SHeinz Mauelshagen io->client = client; 4551da177e4SLinus Torvalds io->callback = fn; 4561da177e4SLinus Torvalds io->context = context; 4571da177e4SLinus Torvalds 458bb91bc7bSMikulas Patocka io->vma_invalidate_address = dp->vma_invalidate_address; 459bb91bc7bSMikulas Patocka io->vma_invalidate_size = dp->vma_invalidate_size; 460bb91bc7bSMikulas Patocka 4611da177e4SLinus Torvalds dispatch_io(rw, num_regions, where, dp, io, 0); 4621da177e4SLinus Torvalds return 0; 4631da177e4SLinus Torvalds } 4641da177e4SLinus Torvalds 465bb91bc7bSMikulas Patocka static int dp_init(struct dm_io_request *io_req, struct dpages *dp, 466bb91bc7bSMikulas Patocka unsigned long size) 467c8b03afeSHeinz Mauelshagen { 468c8b03afeSHeinz Mauelshagen /* Set up dpages based on memory type */ 469bb91bc7bSMikulas Patocka 470bb91bc7bSMikulas Patocka dp->vma_invalidate_address = NULL; 471bb91bc7bSMikulas Patocka dp->vma_invalidate_size = 0; 472bb91bc7bSMikulas Patocka 473c8b03afeSHeinz Mauelshagen switch (io_req->mem.type) { 474c8b03afeSHeinz Mauelshagen case DM_IO_PAGE_LIST: 475c8b03afeSHeinz Mauelshagen list_dp_init(dp, io_req->mem.ptr.pl, io_req->mem.offset); 476c8b03afeSHeinz Mauelshagen break; 477c8b03afeSHeinz Mauelshagen 478003b5c57SKent Overstreet case DM_IO_BIO: 479003b5c57SKent Overstreet bio_dp_init(dp, io_req->mem.ptr.bio); 480c8b03afeSHeinz Mauelshagen break; 481c8b03afeSHeinz Mauelshagen 482c8b03afeSHeinz Mauelshagen case DM_IO_VMA: 483bb91bc7bSMikulas Patocka flush_kernel_vmap_range(io_req->mem.ptr.vma, size); 484bb91bc7bSMikulas Patocka if ((io_req->bi_rw & RW_MASK) == READ) { 485bb91bc7bSMikulas Patocka dp->vma_invalidate_address = io_req->mem.ptr.vma; 486bb91bc7bSMikulas Patocka dp->vma_invalidate_size = size; 487bb91bc7bSMikulas Patocka } 488c8b03afeSHeinz Mauelshagen vm_dp_init(dp, io_req->mem.ptr.vma); 489c8b03afeSHeinz Mauelshagen break; 490c8b03afeSHeinz Mauelshagen 491c8b03afeSHeinz Mauelshagen case DM_IO_KMEM: 492c8b03afeSHeinz Mauelshagen km_dp_init(dp, io_req->mem.ptr.addr); 493c8b03afeSHeinz Mauelshagen break; 494c8b03afeSHeinz Mauelshagen 495c8b03afeSHeinz Mauelshagen default: 496c8b03afeSHeinz Mauelshagen return -EINVAL; 497c8b03afeSHeinz Mauelshagen } 498c8b03afeSHeinz Mauelshagen 499c8b03afeSHeinz Mauelshagen return 0; 500c8b03afeSHeinz Mauelshagen } 501c8b03afeSHeinz Mauelshagen 502c8b03afeSHeinz Mauelshagen /* 5037ff14a36SMikulas Patocka * New collapsed (a)synchronous interface. 5047ff14a36SMikulas Patocka * 5057ff14a36SMikulas Patocka * If the IO is asynchronous (i.e. it has notify.fn), you must either unplug 50697e7cdf1SJoe Thornber * the queue with blk_unplug() some time later or set REQ_SYNC in io_req->bi_rw. 50797e7cdf1SJoe Thornber * If you fail to do one of these, the IO will be submitted to the disk after 50897e7cdf1SJoe Thornber * q->unplug_delay, which defaults to 3ms in blk-settings.c. 509c8b03afeSHeinz Mauelshagen */ 510c8b03afeSHeinz Mauelshagen int dm_io(struct dm_io_request *io_req, unsigned num_regions, 51122a1ceb1SHeinz Mauelshagen struct dm_io_region *where, unsigned long *sync_error_bits) 512c8b03afeSHeinz Mauelshagen { 513c8b03afeSHeinz Mauelshagen int r; 514c8b03afeSHeinz Mauelshagen struct dpages dp; 515c8b03afeSHeinz Mauelshagen 516bb91bc7bSMikulas Patocka r = dp_init(io_req, &dp, (unsigned long)where->count << SECTOR_SHIFT); 517c8b03afeSHeinz Mauelshagen if (r) 518c8b03afeSHeinz Mauelshagen return r; 519c8b03afeSHeinz Mauelshagen 520c8b03afeSHeinz Mauelshagen if (!io_req->notify.fn) 521c8b03afeSHeinz Mauelshagen return sync_io(io_req->client, num_regions, where, 522c8b03afeSHeinz Mauelshagen io_req->bi_rw, &dp, sync_error_bits); 523c8b03afeSHeinz Mauelshagen 524c8b03afeSHeinz Mauelshagen return async_io(io_req->client, num_regions, where, io_req->bi_rw, 525c8b03afeSHeinz Mauelshagen &dp, io_req->notify.fn, io_req->notify.context); 526c8b03afeSHeinz Mauelshagen } 527c8b03afeSHeinz Mauelshagen EXPORT_SYMBOL(dm_io); 528952b3557SMikulas Patocka 529952b3557SMikulas Patocka int __init dm_io_init(void) 530952b3557SMikulas Patocka { 531952b3557SMikulas Patocka _dm_io_cache = KMEM_CACHE(io, 0); 532952b3557SMikulas Patocka if (!_dm_io_cache) 533952b3557SMikulas Patocka return -ENOMEM; 534952b3557SMikulas Patocka 535952b3557SMikulas Patocka return 0; 536952b3557SMikulas Patocka } 537952b3557SMikulas Patocka 538952b3557SMikulas Patocka void dm_io_exit(void) 539952b3557SMikulas Patocka { 540952b3557SMikulas Patocka kmem_cache_destroy(_dm_io_cache); 541952b3557SMikulas Patocka _dm_io_cache = NULL; 542952b3557SMikulas Patocka } 543