11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * Copyright (C) 2003 Sistina Software 3891ce207SHeinz Mauelshagen * Copyright (C) 2006 Red Hat GmbH 41da177e4SLinus Torvalds * 51da177e4SLinus Torvalds * This file is released under the GPL. 61da177e4SLinus Torvalds */ 71da177e4SLinus Torvalds 8952b3557SMikulas Patocka #include "dm.h" 9952b3557SMikulas Patocka 10586e80e6SMikulas Patocka #include <linux/device-mapper.h> 111da177e4SLinus Torvalds 121da177e4SLinus Torvalds #include <linux/bio.h> 131da177e4SLinus Torvalds #include <linux/mempool.h> 141da177e4SLinus Torvalds #include <linux/module.h> 151da177e4SLinus Torvalds #include <linux/sched.h> 161da177e4SLinus Torvalds #include <linux/slab.h> 17a765e20eSAlasdair G Kergon #include <linux/dm-io.h> 181da177e4SLinus Torvalds 19f1e53987SMikulas Patocka #define DM_MSG_PREFIX "io" 20f1e53987SMikulas Patocka 21f1e53987SMikulas Patocka #define DM_IO_MAX_REGIONS BITS_PER_LONG 22bda8efecSMikulas Patocka #define MIN_IOS 16 23bda8efecSMikulas Patocka #define MIN_BIOS 16 24f1e53987SMikulas Patocka 25891ce207SHeinz Mauelshagen struct dm_io_client { 26891ce207SHeinz Mauelshagen mempool_t *pool; 27891ce207SHeinz Mauelshagen struct bio_set *bios; 28891ce207SHeinz Mauelshagen }; 29891ce207SHeinz Mauelshagen 30f1e53987SMikulas Patocka /* 31f1e53987SMikulas Patocka * Aligning 'struct io' reduces the number of bits required to store 32f1e53987SMikulas Patocka * its address. Refer to store_io_and_region_in_bio() below. 33f1e53987SMikulas Patocka */ 341da177e4SLinus Torvalds struct io { 35e01fd7eeSAlasdair G Kergon unsigned long error_bits; 361da177e4SLinus Torvalds atomic_t count; 371da177e4SLinus Torvalds struct task_struct *sleeper; 38891ce207SHeinz Mauelshagen struct dm_io_client *client; 391da177e4SLinus Torvalds io_notify_fn callback; 401da177e4SLinus Torvalds void *context; 41*bb91bc7bSMikulas Patocka void *vma_invalidate_address; 42*bb91bc7bSMikulas Patocka unsigned long vma_invalidate_size; 43f1e53987SMikulas Patocka } __attribute__((aligned(DM_IO_MAX_REGIONS))); 441da177e4SLinus Torvalds 45952b3557SMikulas Patocka static struct kmem_cache *_dm_io_cache; 46952b3557SMikulas Patocka 471da177e4SLinus Torvalds /* 48c8b03afeSHeinz Mauelshagen * Create a client with mempool and bioset. 49c8b03afeSHeinz Mauelshagen */ 50bda8efecSMikulas Patocka struct dm_io_client *dm_io_client_create(void) 51c8b03afeSHeinz Mauelshagen { 52c8b03afeSHeinz Mauelshagen struct dm_io_client *client; 53c8b03afeSHeinz Mauelshagen 54c8b03afeSHeinz Mauelshagen client = kmalloc(sizeof(*client), GFP_KERNEL); 55c8b03afeSHeinz Mauelshagen if (!client) 56c8b03afeSHeinz Mauelshagen return ERR_PTR(-ENOMEM); 57c8b03afeSHeinz Mauelshagen 58bda8efecSMikulas Patocka client->pool = mempool_create_slab_pool(MIN_IOS, _dm_io_cache); 59c8b03afeSHeinz Mauelshagen if (!client->pool) 60c8b03afeSHeinz Mauelshagen goto bad; 61c8b03afeSHeinz Mauelshagen 62bda8efecSMikulas Patocka client->bios = bioset_create(MIN_BIOS, 0); 63c8b03afeSHeinz Mauelshagen if (!client->bios) 64c8b03afeSHeinz Mauelshagen goto bad; 65c8b03afeSHeinz Mauelshagen 66c8b03afeSHeinz Mauelshagen return client; 67c8b03afeSHeinz Mauelshagen 68c8b03afeSHeinz Mauelshagen bad: 69c8b03afeSHeinz Mauelshagen if (client->pool) 70c8b03afeSHeinz Mauelshagen mempool_destroy(client->pool); 71c8b03afeSHeinz Mauelshagen kfree(client); 72c8b03afeSHeinz Mauelshagen return ERR_PTR(-ENOMEM); 73c8b03afeSHeinz Mauelshagen } 74c8b03afeSHeinz Mauelshagen EXPORT_SYMBOL(dm_io_client_create); 75c8b03afeSHeinz Mauelshagen 76c8b03afeSHeinz Mauelshagen void dm_io_client_destroy(struct dm_io_client *client) 77c8b03afeSHeinz Mauelshagen { 78c8b03afeSHeinz Mauelshagen mempool_destroy(client->pool); 79c8b03afeSHeinz Mauelshagen bioset_free(client->bios); 80c8b03afeSHeinz Mauelshagen kfree(client); 81c8b03afeSHeinz Mauelshagen } 82c8b03afeSHeinz Mauelshagen EXPORT_SYMBOL(dm_io_client_destroy); 83c8b03afeSHeinz Mauelshagen 841da177e4SLinus Torvalds /*----------------------------------------------------------------- 851da177e4SLinus Torvalds * We need to keep track of which region a bio is doing io for. 86f1e53987SMikulas Patocka * To avoid a memory allocation to store just 5 or 6 bits, we 87f1e53987SMikulas Patocka * ensure the 'struct io' pointer is aligned so enough low bits are 88f1e53987SMikulas Patocka * always zero and then combine it with the region number directly in 89f1e53987SMikulas Patocka * bi_private. 901da177e4SLinus Torvalds *---------------------------------------------------------------*/ 91f1e53987SMikulas Patocka static void store_io_and_region_in_bio(struct bio *bio, struct io *io, 92f1e53987SMikulas Patocka unsigned region) 931da177e4SLinus Torvalds { 94f1e53987SMikulas Patocka if (unlikely(!IS_ALIGNED((unsigned long)io, DM_IO_MAX_REGIONS))) { 95f1e53987SMikulas Patocka DMCRIT("Unaligned struct io pointer %p", io); 96f1e53987SMikulas Patocka BUG(); 971da177e4SLinus Torvalds } 981da177e4SLinus Torvalds 99f1e53987SMikulas Patocka bio->bi_private = (void *)((unsigned long)io | region); 100f1e53987SMikulas Patocka } 101f1e53987SMikulas Patocka 102f1e53987SMikulas Patocka static void retrieve_io_and_region_from_bio(struct bio *bio, struct io **io, 103f1e53987SMikulas Patocka unsigned *region) 1041da177e4SLinus Torvalds { 105f1e53987SMikulas Patocka unsigned long val = (unsigned long)bio->bi_private; 106f1e53987SMikulas Patocka 107f1e53987SMikulas Patocka *io = (void *)(val & -(unsigned long)DM_IO_MAX_REGIONS); 108f1e53987SMikulas Patocka *region = val & (DM_IO_MAX_REGIONS - 1); 1091da177e4SLinus Torvalds } 1101da177e4SLinus Torvalds 1111da177e4SLinus Torvalds /*----------------------------------------------------------------- 1121da177e4SLinus Torvalds * We need an io object to keep track of the number of bios that 1131da177e4SLinus Torvalds * have been dispatched for a particular io. 1141da177e4SLinus Torvalds *---------------------------------------------------------------*/ 1151da177e4SLinus Torvalds static void dec_count(struct io *io, unsigned int region, int error) 1161da177e4SLinus Torvalds { 117d87f4c14STejun Heo if (error) 118e01fd7eeSAlasdair G Kergon set_bit(region, &io->error_bits); 1191da177e4SLinus Torvalds 1201da177e4SLinus Torvalds if (atomic_dec_and_test(&io->count)) { 121*bb91bc7bSMikulas Patocka if (io->vma_invalidate_size) 122*bb91bc7bSMikulas Patocka invalidate_kernel_vmap_range(io->vma_invalidate_address, 123*bb91bc7bSMikulas Patocka io->vma_invalidate_size); 124*bb91bc7bSMikulas Patocka 1251da177e4SLinus Torvalds if (io->sleeper) 1261da177e4SLinus Torvalds wake_up_process(io->sleeper); 1271da177e4SLinus Torvalds 1281da177e4SLinus Torvalds else { 129e01fd7eeSAlasdair G Kergon unsigned long r = io->error_bits; 1301da177e4SLinus Torvalds io_notify_fn fn = io->callback; 1311da177e4SLinus Torvalds void *context = io->context; 1321da177e4SLinus Torvalds 133bf17ce3aSMilan Broz mempool_free(io, io->client->pool); 1341da177e4SLinus Torvalds fn(r, context); 1351da177e4SLinus Torvalds } 1361da177e4SLinus Torvalds } 1371da177e4SLinus Torvalds } 1381da177e4SLinus Torvalds 1396712ecf8SNeilBrown static void endio(struct bio *bio, int error) 1401da177e4SLinus Torvalds { 141c897feb3SHeinz Mauelshagen struct io *io; 142c897feb3SHeinz Mauelshagen unsigned region; 1431da177e4SLinus Torvalds 1441da177e4SLinus Torvalds if (error && bio_data_dir(bio) == READ) 1451da177e4SLinus Torvalds zero_fill_bio(bio); 1461da177e4SLinus Torvalds 147c897feb3SHeinz Mauelshagen /* 148c897feb3SHeinz Mauelshagen * The bio destructor in bio_put() may use the io object. 149c897feb3SHeinz Mauelshagen */ 150f1e53987SMikulas Patocka retrieve_io_and_region_from_bio(bio, &io, ®ion); 151c897feb3SHeinz Mauelshagen 1521da177e4SLinus Torvalds bio_put(bio); 1531da177e4SLinus Torvalds 154c897feb3SHeinz Mauelshagen dec_count(io, region, error); 1551da177e4SLinus Torvalds } 1561da177e4SLinus Torvalds 1571da177e4SLinus Torvalds /*----------------------------------------------------------------- 1581da177e4SLinus Torvalds * These little objects provide an abstraction for getting a new 1591da177e4SLinus Torvalds * destination page for io. 1601da177e4SLinus Torvalds *---------------------------------------------------------------*/ 1611da177e4SLinus Torvalds struct dpages { 1621da177e4SLinus Torvalds void (*get_page)(struct dpages *dp, 1631da177e4SLinus Torvalds struct page **p, unsigned long *len, unsigned *offset); 1641da177e4SLinus Torvalds void (*next_page)(struct dpages *dp); 1651da177e4SLinus Torvalds 1661da177e4SLinus Torvalds unsigned context_u; 1671da177e4SLinus Torvalds void *context_ptr; 168*bb91bc7bSMikulas Patocka 169*bb91bc7bSMikulas Patocka void *vma_invalidate_address; 170*bb91bc7bSMikulas Patocka unsigned long vma_invalidate_size; 1711da177e4SLinus Torvalds }; 1721da177e4SLinus Torvalds 1731da177e4SLinus Torvalds /* 1741da177e4SLinus Torvalds * Functions for getting the pages from a list. 1751da177e4SLinus Torvalds */ 1761da177e4SLinus Torvalds static void list_get_page(struct dpages *dp, 1771da177e4SLinus Torvalds struct page **p, unsigned long *len, unsigned *offset) 1781da177e4SLinus Torvalds { 1791da177e4SLinus Torvalds unsigned o = dp->context_u; 1801da177e4SLinus Torvalds struct page_list *pl = (struct page_list *) dp->context_ptr; 1811da177e4SLinus Torvalds 1821da177e4SLinus Torvalds *p = pl->page; 1831da177e4SLinus Torvalds *len = PAGE_SIZE - o; 1841da177e4SLinus Torvalds *offset = o; 1851da177e4SLinus Torvalds } 1861da177e4SLinus Torvalds 1871da177e4SLinus Torvalds static void list_next_page(struct dpages *dp) 1881da177e4SLinus Torvalds { 1891da177e4SLinus Torvalds struct page_list *pl = (struct page_list *) dp->context_ptr; 1901da177e4SLinus Torvalds dp->context_ptr = pl->next; 1911da177e4SLinus Torvalds dp->context_u = 0; 1921da177e4SLinus Torvalds } 1931da177e4SLinus Torvalds 1941da177e4SLinus Torvalds static void list_dp_init(struct dpages *dp, struct page_list *pl, unsigned offset) 1951da177e4SLinus Torvalds { 1961da177e4SLinus Torvalds dp->get_page = list_get_page; 1971da177e4SLinus Torvalds dp->next_page = list_next_page; 1981da177e4SLinus Torvalds dp->context_u = offset; 1991da177e4SLinus Torvalds dp->context_ptr = pl; 2001da177e4SLinus Torvalds } 2011da177e4SLinus Torvalds 2021da177e4SLinus Torvalds /* 2031da177e4SLinus Torvalds * Functions for getting the pages from a bvec. 2041da177e4SLinus Torvalds */ 2051da177e4SLinus Torvalds static void bvec_get_page(struct dpages *dp, 2061da177e4SLinus Torvalds struct page **p, unsigned long *len, unsigned *offset) 2071da177e4SLinus Torvalds { 2081da177e4SLinus Torvalds struct bio_vec *bvec = (struct bio_vec *) dp->context_ptr; 2091da177e4SLinus Torvalds *p = bvec->bv_page; 2101da177e4SLinus Torvalds *len = bvec->bv_len; 2111da177e4SLinus Torvalds *offset = bvec->bv_offset; 2121da177e4SLinus Torvalds } 2131da177e4SLinus Torvalds 2141da177e4SLinus Torvalds static void bvec_next_page(struct dpages *dp) 2151da177e4SLinus Torvalds { 2161da177e4SLinus Torvalds struct bio_vec *bvec = (struct bio_vec *) dp->context_ptr; 2171da177e4SLinus Torvalds dp->context_ptr = bvec + 1; 2181da177e4SLinus Torvalds } 2191da177e4SLinus Torvalds 2201da177e4SLinus Torvalds static void bvec_dp_init(struct dpages *dp, struct bio_vec *bvec) 2211da177e4SLinus Torvalds { 2221da177e4SLinus Torvalds dp->get_page = bvec_get_page; 2231da177e4SLinus Torvalds dp->next_page = bvec_next_page; 2241da177e4SLinus Torvalds dp->context_ptr = bvec; 2251da177e4SLinus Torvalds } 2261da177e4SLinus Torvalds 227c8b03afeSHeinz Mauelshagen /* 228c8b03afeSHeinz Mauelshagen * Functions for getting the pages from a VMA. 229c8b03afeSHeinz Mauelshagen */ 2301da177e4SLinus Torvalds static void vm_get_page(struct dpages *dp, 2311da177e4SLinus Torvalds struct page **p, unsigned long *len, unsigned *offset) 2321da177e4SLinus Torvalds { 2331da177e4SLinus Torvalds *p = vmalloc_to_page(dp->context_ptr); 2341da177e4SLinus Torvalds *offset = dp->context_u; 2351da177e4SLinus Torvalds *len = PAGE_SIZE - dp->context_u; 2361da177e4SLinus Torvalds } 2371da177e4SLinus Torvalds 2381da177e4SLinus Torvalds static void vm_next_page(struct dpages *dp) 2391da177e4SLinus Torvalds { 2401da177e4SLinus Torvalds dp->context_ptr += PAGE_SIZE - dp->context_u; 2411da177e4SLinus Torvalds dp->context_u = 0; 2421da177e4SLinus Torvalds } 2431da177e4SLinus Torvalds 2441da177e4SLinus Torvalds static void vm_dp_init(struct dpages *dp, void *data) 2451da177e4SLinus Torvalds { 2461da177e4SLinus Torvalds dp->get_page = vm_get_page; 2471da177e4SLinus Torvalds dp->next_page = vm_next_page; 2481da177e4SLinus Torvalds dp->context_u = ((unsigned long) data) & (PAGE_SIZE - 1); 2491da177e4SLinus Torvalds dp->context_ptr = data; 2501da177e4SLinus Torvalds } 2511da177e4SLinus Torvalds 2523676347aSPeter Osterlund static void dm_bio_destructor(struct bio *bio) 2533676347aSPeter Osterlund { 254f1e53987SMikulas Patocka unsigned region; 255f1e53987SMikulas Patocka struct io *io; 256f1e53987SMikulas Patocka 257f1e53987SMikulas Patocka retrieve_io_and_region_from_bio(bio, &io, ®ion); 258891ce207SHeinz Mauelshagen 259bf17ce3aSMilan Broz bio_free(bio, io->client->bios); 2603676347aSPeter Osterlund } 2613676347aSPeter Osterlund 262c8b03afeSHeinz Mauelshagen /* 263c8b03afeSHeinz Mauelshagen * Functions for getting the pages from kernel memory. 264c8b03afeSHeinz Mauelshagen */ 265c8b03afeSHeinz Mauelshagen static void km_get_page(struct dpages *dp, struct page **p, unsigned long *len, 266c8b03afeSHeinz Mauelshagen unsigned *offset) 267c8b03afeSHeinz Mauelshagen { 268c8b03afeSHeinz Mauelshagen *p = virt_to_page(dp->context_ptr); 269c8b03afeSHeinz Mauelshagen *offset = dp->context_u; 270c8b03afeSHeinz Mauelshagen *len = PAGE_SIZE - dp->context_u; 271c8b03afeSHeinz Mauelshagen } 272c8b03afeSHeinz Mauelshagen 273c8b03afeSHeinz Mauelshagen static void km_next_page(struct dpages *dp) 274c8b03afeSHeinz Mauelshagen { 275c8b03afeSHeinz Mauelshagen dp->context_ptr += PAGE_SIZE - dp->context_u; 276c8b03afeSHeinz Mauelshagen dp->context_u = 0; 277c8b03afeSHeinz Mauelshagen } 278c8b03afeSHeinz Mauelshagen 279c8b03afeSHeinz Mauelshagen static void km_dp_init(struct dpages *dp, void *data) 280c8b03afeSHeinz Mauelshagen { 281c8b03afeSHeinz Mauelshagen dp->get_page = km_get_page; 282c8b03afeSHeinz Mauelshagen dp->next_page = km_next_page; 283c8b03afeSHeinz Mauelshagen dp->context_u = ((unsigned long) data) & (PAGE_SIZE - 1); 284c8b03afeSHeinz Mauelshagen dp->context_ptr = data; 285c8b03afeSHeinz Mauelshagen } 286c8b03afeSHeinz Mauelshagen 2871da177e4SLinus Torvalds /*----------------------------------------------------------------- 2881da177e4SLinus Torvalds * IO routines that accept a list of pages. 2891da177e4SLinus Torvalds *---------------------------------------------------------------*/ 29022a1ceb1SHeinz Mauelshagen static void do_region(int rw, unsigned region, struct dm_io_region *where, 2911da177e4SLinus Torvalds struct dpages *dp, struct io *io) 2921da177e4SLinus Torvalds { 2931da177e4SLinus Torvalds struct bio *bio; 2941da177e4SLinus Torvalds struct page *page; 2951da177e4SLinus Torvalds unsigned long len; 2961da177e4SLinus Torvalds unsigned offset; 2971da177e4SLinus Torvalds unsigned num_bvecs; 2981da177e4SLinus Torvalds sector_t remaining = where->count; 2991da177e4SLinus Torvalds 30012fc0f49SMikulas Patocka /* 301d87f4c14STejun Heo * where->count may be zero if rw holds a flush and we need to 302d87f4c14STejun Heo * send a zero-sized flush. 30312fc0f49SMikulas Patocka */ 30412fc0f49SMikulas Patocka do { 3051da177e4SLinus Torvalds /* 306f1e53987SMikulas Patocka * Allocate a suitably sized-bio. 3071da177e4SLinus Torvalds */ 308596f138eSJun'ichi Nomura num_bvecs = dm_sector_div_up(remaining, 309596f138eSJun'ichi Nomura (PAGE_SIZE >> SECTOR_SHIFT)); 310f1e53987SMikulas Patocka num_bvecs = min_t(int, bio_get_nr_vecs(where->bdev), num_bvecs); 311bf17ce3aSMilan Broz bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, io->client->bios); 3121da177e4SLinus Torvalds bio->bi_sector = where->sector + (where->count - remaining); 3131da177e4SLinus Torvalds bio->bi_bdev = where->bdev; 3141da177e4SLinus Torvalds bio->bi_end_io = endio; 3153676347aSPeter Osterlund bio->bi_destructor = dm_bio_destructor; 316f1e53987SMikulas Patocka store_io_and_region_in_bio(bio, io, region); 3171da177e4SLinus Torvalds 3181da177e4SLinus Torvalds /* 3191da177e4SLinus Torvalds * Try and add as many pages as possible. 3201da177e4SLinus Torvalds */ 3211da177e4SLinus Torvalds while (remaining) { 3221da177e4SLinus Torvalds dp->get_page(dp, &page, &len, &offset); 3231da177e4SLinus Torvalds len = min(len, to_bytes(remaining)); 3241da177e4SLinus Torvalds if (!bio_add_page(bio, page, len, offset)) 3251da177e4SLinus Torvalds break; 3261da177e4SLinus Torvalds 3271da177e4SLinus Torvalds offset = 0; 3281da177e4SLinus Torvalds remaining -= to_sector(len); 3291da177e4SLinus Torvalds dp->next_page(dp); 3301da177e4SLinus Torvalds } 3311da177e4SLinus Torvalds 3321da177e4SLinus Torvalds atomic_inc(&io->count); 3331da177e4SLinus Torvalds submit_bio(rw, bio); 33412fc0f49SMikulas Patocka } while (remaining); 3351da177e4SLinus Torvalds } 3361da177e4SLinus Torvalds 3371da177e4SLinus Torvalds static void dispatch_io(int rw, unsigned int num_regions, 33822a1ceb1SHeinz Mauelshagen struct dm_io_region *where, struct dpages *dp, 3391da177e4SLinus Torvalds struct io *io, int sync) 3401da177e4SLinus Torvalds { 3411da177e4SLinus Torvalds int i; 3421da177e4SLinus Torvalds struct dpages old_pages = *dp; 3431da177e4SLinus Torvalds 344f1e53987SMikulas Patocka BUG_ON(num_regions > DM_IO_MAX_REGIONS); 345f1e53987SMikulas Patocka 3461da177e4SLinus Torvalds if (sync) 347721a9602SJens Axboe rw |= REQ_SYNC; 3481da177e4SLinus Torvalds 3491da177e4SLinus Torvalds /* 3501da177e4SLinus Torvalds * For multiple regions we need to be careful to rewind 3511da177e4SLinus Torvalds * the dp object for each call to do_region. 3521da177e4SLinus Torvalds */ 3531da177e4SLinus Torvalds for (i = 0; i < num_regions; i++) { 3541da177e4SLinus Torvalds *dp = old_pages; 355d87f4c14STejun Heo if (where[i].count || (rw & REQ_FLUSH)) 3561da177e4SLinus Torvalds do_region(rw, i, where + i, dp, io); 3571da177e4SLinus Torvalds } 3581da177e4SLinus Torvalds 3591da177e4SLinus Torvalds /* 360f00b16adSHeinz Mauelshagen * Drop the extra reference that we were holding to avoid 3611da177e4SLinus Torvalds * the io being completed too early. 3621da177e4SLinus Torvalds */ 3631da177e4SLinus Torvalds dec_count(io, 0, 0); 3641da177e4SLinus Torvalds } 3651da177e4SLinus Torvalds 366891ce207SHeinz Mauelshagen static int sync_io(struct dm_io_client *client, unsigned int num_regions, 36722a1ceb1SHeinz Mauelshagen struct dm_io_region *where, int rw, struct dpages *dp, 368891ce207SHeinz Mauelshagen unsigned long *error_bits) 3691da177e4SLinus Torvalds { 370f1e53987SMikulas Patocka /* 371f1e53987SMikulas Patocka * gcc <= 4.3 can't do the alignment for stack variables, so we must 372f1e53987SMikulas Patocka * align it on our own. 373f1e53987SMikulas Patocka * volatile prevents the optimizer from removing or reusing 374f1e53987SMikulas Patocka * "io_" field from the stack frame (allowed in ANSI C). 375f1e53987SMikulas Patocka */ 376f1e53987SMikulas Patocka volatile char io_[sizeof(struct io) + __alignof__(struct io) - 1]; 377f1e53987SMikulas Patocka struct io *io = (struct io *)PTR_ALIGN(&io_, __alignof__(struct io)); 3781da177e4SLinus Torvalds 3797ff14a36SMikulas Patocka if (num_regions > 1 && (rw & RW_MASK) != WRITE) { 3801da177e4SLinus Torvalds WARN_ON(1); 3811da177e4SLinus Torvalds return -EIO; 3821da177e4SLinus Torvalds } 3831da177e4SLinus Torvalds 384f1e53987SMikulas Patocka io->error_bits = 0; 385f1e53987SMikulas Patocka atomic_set(&io->count, 1); /* see dispatch_io() */ 386f1e53987SMikulas Patocka io->sleeper = current; 387f1e53987SMikulas Patocka io->client = client; 3881da177e4SLinus Torvalds 389*bb91bc7bSMikulas Patocka io->vma_invalidate_address = dp->vma_invalidate_address; 390*bb91bc7bSMikulas Patocka io->vma_invalidate_size = dp->vma_invalidate_size; 391*bb91bc7bSMikulas Patocka 392f1e53987SMikulas Patocka dispatch_io(rw, num_regions, where, dp, io, 1); 3931da177e4SLinus Torvalds 3941da177e4SLinus Torvalds while (1) { 3951da177e4SLinus Torvalds set_current_state(TASK_UNINTERRUPTIBLE); 3961da177e4SLinus Torvalds 397f1e53987SMikulas Patocka if (!atomic_read(&io->count)) 3981da177e4SLinus Torvalds break; 3991da177e4SLinus Torvalds 4001da177e4SLinus Torvalds io_schedule(); 4011da177e4SLinus Torvalds } 4021da177e4SLinus Torvalds set_current_state(TASK_RUNNING); 4031da177e4SLinus Torvalds 404891ce207SHeinz Mauelshagen if (error_bits) 405f1e53987SMikulas Patocka *error_bits = io->error_bits; 406891ce207SHeinz Mauelshagen 407f1e53987SMikulas Patocka return io->error_bits ? -EIO : 0; 4081da177e4SLinus Torvalds } 4091da177e4SLinus Torvalds 410891ce207SHeinz Mauelshagen static int async_io(struct dm_io_client *client, unsigned int num_regions, 41122a1ceb1SHeinz Mauelshagen struct dm_io_region *where, int rw, struct dpages *dp, 412891ce207SHeinz Mauelshagen io_notify_fn fn, void *context) 4131da177e4SLinus Torvalds { 4141da177e4SLinus Torvalds struct io *io; 4151da177e4SLinus Torvalds 4167ff14a36SMikulas Patocka if (num_regions > 1 && (rw & RW_MASK) != WRITE) { 4171da177e4SLinus Torvalds WARN_ON(1); 4181da177e4SLinus Torvalds fn(1, context); 4191da177e4SLinus Torvalds return -EIO; 4201da177e4SLinus Torvalds } 4211da177e4SLinus Torvalds 422bf17ce3aSMilan Broz io = mempool_alloc(client->pool, GFP_NOIO); 423e01fd7eeSAlasdair G Kergon io->error_bits = 0; 4241da177e4SLinus Torvalds atomic_set(&io->count, 1); /* see dispatch_io() */ 4251da177e4SLinus Torvalds io->sleeper = NULL; 426891ce207SHeinz Mauelshagen io->client = client; 4271da177e4SLinus Torvalds io->callback = fn; 4281da177e4SLinus Torvalds io->context = context; 4291da177e4SLinus Torvalds 430*bb91bc7bSMikulas Patocka io->vma_invalidate_address = dp->vma_invalidate_address; 431*bb91bc7bSMikulas Patocka io->vma_invalidate_size = dp->vma_invalidate_size; 432*bb91bc7bSMikulas Patocka 4331da177e4SLinus Torvalds dispatch_io(rw, num_regions, where, dp, io, 0); 4341da177e4SLinus Torvalds return 0; 4351da177e4SLinus Torvalds } 4361da177e4SLinus Torvalds 437*bb91bc7bSMikulas Patocka static int dp_init(struct dm_io_request *io_req, struct dpages *dp, 438*bb91bc7bSMikulas Patocka unsigned long size) 439c8b03afeSHeinz Mauelshagen { 440c8b03afeSHeinz Mauelshagen /* Set up dpages based on memory type */ 441*bb91bc7bSMikulas Patocka 442*bb91bc7bSMikulas Patocka dp->vma_invalidate_address = NULL; 443*bb91bc7bSMikulas Patocka dp->vma_invalidate_size = 0; 444*bb91bc7bSMikulas Patocka 445c8b03afeSHeinz Mauelshagen switch (io_req->mem.type) { 446c8b03afeSHeinz Mauelshagen case DM_IO_PAGE_LIST: 447c8b03afeSHeinz Mauelshagen list_dp_init(dp, io_req->mem.ptr.pl, io_req->mem.offset); 448c8b03afeSHeinz Mauelshagen break; 449c8b03afeSHeinz Mauelshagen 450c8b03afeSHeinz Mauelshagen case DM_IO_BVEC: 451c8b03afeSHeinz Mauelshagen bvec_dp_init(dp, io_req->mem.ptr.bvec); 452c8b03afeSHeinz Mauelshagen break; 453c8b03afeSHeinz Mauelshagen 454c8b03afeSHeinz Mauelshagen case DM_IO_VMA: 455*bb91bc7bSMikulas Patocka flush_kernel_vmap_range(io_req->mem.ptr.vma, size); 456*bb91bc7bSMikulas Patocka if ((io_req->bi_rw & RW_MASK) == READ) { 457*bb91bc7bSMikulas Patocka dp->vma_invalidate_address = io_req->mem.ptr.vma; 458*bb91bc7bSMikulas Patocka dp->vma_invalidate_size = size; 459*bb91bc7bSMikulas Patocka } 460c8b03afeSHeinz Mauelshagen vm_dp_init(dp, io_req->mem.ptr.vma); 461c8b03afeSHeinz Mauelshagen break; 462c8b03afeSHeinz Mauelshagen 463c8b03afeSHeinz Mauelshagen case DM_IO_KMEM: 464c8b03afeSHeinz Mauelshagen km_dp_init(dp, io_req->mem.ptr.addr); 465c8b03afeSHeinz Mauelshagen break; 466c8b03afeSHeinz Mauelshagen 467c8b03afeSHeinz Mauelshagen default: 468c8b03afeSHeinz Mauelshagen return -EINVAL; 469c8b03afeSHeinz Mauelshagen } 470c8b03afeSHeinz Mauelshagen 471c8b03afeSHeinz Mauelshagen return 0; 472c8b03afeSHeinz Mauelshagen } 473c8b03afeSHeinz Mauelshagen 474c8b03afeSHeinz Mauelshagen /* 4757ff14a36SMikulas Patocka * New collapsed (a)synchronous interface. 4767ff14a36SMikulas Patocka * 4777ff14a36SMikulas Patocka * If the IO is asynchronous (i.e. it has notify.fn), you must either unplug 4787b6d91daSChristoph Hellwig * the queue with blk_unplug() some time later or set REQ_SYNC in 4797b6d91daSChristoph Hellwig io_req->bi_rw. If you fail to do one of these, the IO will be submitted to 4807ff14a36SMikulas Patocka * the disk after q->unplug_delay, which defaults to 3ms in blk-settings.c. 481c8b03afeSHeinz Mauelshagen */ 482c8b03afeSHeinz Mauelshagen int dm_io(struct dm_io_request *io_req, unsigned num_regions, 48322a1ceb1SHeinz Mauelshagen struct dm_io_region *where, unsigned long *sync_error_bits) 484c8b03afeSHeinz Mauelshagen { 485c8b03afeSHeinz Mauelshagen int r; 486c8b03afeSHeinz Mauelshagen struct dpages dp; 487c8b03afeSHeinz Mauelshagen 488*bb91bc7bSMikulas Patocka r = dp_init(io_req, &dp, (unsigned long)where->count << SECTOR_SHIFT); 489c8b03afeSHeinz Mauelshagen if (r) 490c8b03afeSHeinz Mauelshagen return r; 491c8b03afeSHeinz Mauelshagen 492c8b03afeSHeinz Mauelshagen if (!io_req->notify.fn) 493c8b03afeSHeinz Mauelshagen return sync_io(io_req->client, num_regions, where, 494c8b03afeSHeinz Mauelshagen io_req->bi_rw, &dp, sync_error_bits); 495c8b03afeSHeinz Mauelshagen 496c8b03afeSHeinz Mauelshagen return async_io(io_req->client, num_regions, where, io_req->bi_rw, 497c8b03afeSHeinz Mauelshagen &dp, io_req->notify.fn, io_req->notify.context); 498c8b03afeSHeinz Mauelshagen } 499c8b03afeSHeinz Mauelshagen EXPORT_SYMBOL(dm_io); 500952b3557SMikulas Patocka 501952b3557SMikulas Patocka int __init dm_io_init(void) 502952b3557SMikulas Patocka { 503952b3557SMikulas Patocka _dm_io_cache = KMEM_CACHE(io, 0); 504952b3557SMikulas Patocka if (!_dm_io_cache) 505952b3557SMikulas Patocka return -ENOMEM; 506952b3557SMikulas Patocka 507952b3557SMikulas Patocka return 0; 508952b3557SMikulas Patocka } 509952b3557SMikulas Patocka 510952b3557SMikulas Patocka void dm_io_exit(void) 511952b3557SMikulas Patocka { 512952b3557SMikulas Patocka kmem_cache_destroy(_dm_io_cache); 513952b3557SMikulas Patocka _dm_io_cache = NULL; 514952b3557SMikulas Patocka } 515