11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * Copyright (C) 2003 Sistina Software 3891ce207SHeinz Mauelshagen * Copyright (C) 2006 Red Hat GmbH 41da177e4SLinus Torvalds * 51da177e4SLinus Torvalds * This file is released under the GPL. 61da177e4SLinus Torvalds */ 71da177e4SLinus Torvalds 84cc96131SMike Snitzer #include "dm-core.h" 9952b3557SMikulas Patocka 10586e80e6SMikulas Patocka #include <linux/device-mapper.h> 111da177e4SLinus Torvalds 121da177e4SLinus Torvalds #include <linux/bio.h> 1310f1d5d1SJoe Thornber #include <linux/completion.h> 141da177e4SLinus Torvalds #include <linux/mempool.h> 151da177e4SLinus Torvalds #include <linux/module.h> 161da177e4SLinus Torvalds #include <linux/sched.h> 171da177e4SLinus Torvalds #include <linux/slab.h> 18a765e20eSAlasdair G Kergon #include <linux/dm-io.h> 191da177e4SLinus Torvalds 20f1e53987SMikulas Patocka #define DM_MSG_PREFIX "io" 21f1e53987SMikulas Patocka 22f1e53987SMikulas Patocka #define DM_IO_MAX_REGIONS BITS_PER_LONG 23f1e53987SMikulas Patocka 24891ce207SHeinz Mauelshagen struct dm_io_client { 25*6f1c819cSKent Overstreet mempool_t pool; 26*6f1c819cSKent Overstreet struct bio_set bios; 27891ce207SHeinz Mauelshagen }; 28891ce207SHeinz Mauelshagen 29f1e53987SMikulas Patocka /* 30f1e53987SMikulas Patocka * Aligning 'struct io' reduces the number of bits required to store 31f1e53987SMikulas Patocka * its address. Refer to store_io_and_region_in_bio() below. 32f1e53987SMikulas Patocka */ 331da177e4SLinus Torvalds struct io { 34e01fd7eeSAlasdair G Kergon unsigned long error_bits; 351da177e4SLinus Torvalds atomic_t count; 36891ce207SHeinz Mauelshagen struct dm_io_client *client; 371da177e4SLinus Torvalds io_notify_fn callback; 381da177e4SLinus Torvalds void *context; 39bb91bc7bSMikulas Patocka void *vma_invalidate_address; 40bb91bc7bSMikulas Patocka unsigned long vma_invalidate_size; 41f1e53987SMikulas Patocka } __attribute__((aligned(DM_IO_MAX_REGIONS))); 421da177e4SLinus Torvalds 43952b3557SMikulas Patocka static struct kmem_cache *_dm_io_cache; 44952b3557SMikulas Patocka 451da177e4SLinus Torvalds /* 46c8b03afeSHeinz Mauelshagen * Create a client with mempool and bioset. 47c8b03afeSHeinz Mauelshagen */ 48bda8efecSMikulas Patocka struct dm_io_client *dm_io_client_create(void) 49c8b03afeSHeinz Mauelshagen { 50c8b03afeSHeinz Mauelshagen struct dm_io_client *client; 51e8603136SMike Snitzer unsigned min_ios = dm_get_reserved_bio_based_ios(); 52*6f1c819cSKent Overstreet int ret; 53c8b03afeSHeinz Mauelshagen 54c8b03afeSHeinz Mauelshagen client = kmalloc(sizeof(*client), GFP_KERNEL); 55c8b03afeSHeinz Mauelshagen if (!client) 56c8b03afeSHeinz Mauelshagen return ERR_PTR(-ENOMEM); 57c8b03afeSHeinz Mauelshagen 58*6f1c819cSKent Overstreet ret = mempool_init_slab_pool(&client->pool, min_ios, _dm_io_cache); 59*6f1c819cSKent Overstreet if (ret) 60c8b03afeSHeinz Mauelshagen goto bad; 61c8b03afeSHeinz Mauelshagen 62*6f1c819cSKent Overstreet ret = bioset_init(&client->bios, min_ios, 0, BIOSET_NEED_BVECS); 63*6f1c819cSKent Overstreet if (ret) 64c8b03afeSHeinz Mauelshagen goto bad; 65c8b03afeSHeinz Mauelshagen 66c8b03afeSHeinz Mauelshagen return client; 67c8b03afeSHeinz Mauelshagen 68c8b03afeSHeinz Mauelshagen bad: 69*6f1c819cSKent Overstreet mempool_exit(&client->pool); 70c8b03afeSHeinz Mauelshagen kfree(client); 71*6f1c819cSKent Overstreet return ERR_PTR(ret); 72c8b03afeSHeinz Mauelshagen } 73c8b03afeSHeinz Mauelshagen EXPORT_SYMBOL(dm_io_client_create); 74c8b03afeSHeinz Mauelshagen 75c8b03afeSHeinz Mauelshagen void dm_io_client_destroy(struct dm_io_client *client) 76c8b03afeSHeinz Mauelshagen { 77*6f1c819cSKent Overstreet mempool_exit(&client->pool); 78*6f1c819cSKent Overstreet bioset_exit(&client->bios); 79c8b03afeSHeinz Mauelshagen kfree(client); 80c8b03afeSHeinz Mauelshagen } 81c8b03afeSHeinz Mauelshagen EXPORT_SYMBOL(dm_io_client_destroy); 82c8b03afeSHeinz Mauelshagen 831da177e4SLinus Torvalds /*----------------------------------------------------------------- 841da177e4SLinus Torvalds * We need to keep track of which region a bio is doing io for. 85f1e53987SMikulas Patocka * To avoid a memory allocation to store just 5 or 6 bits, we 86f1e53987SMikulas Patocka * ensure the 'struct io' pointer is aligned so enough low bits are 87f1e53987SMikulas Patocka * always zero and then combine it with the region number directly in 88f1e53987SMikulas Patocka * bi_private. 891da177e4SLinus Torvalds *---------------------------------------------------------------*/ 90f1e53987SMikulas Patocka static void store_io_and_region_in_bio(struct bio *bio, struct io *io, 91f1e53987SMikulas Patocka unsigned region) 921da177e4SLinus Torvalds { 93f1e53987SMikulas Patocka if (unlikely(!IS_ALIGNED((unsigned long)io, DM_IO_MAX_REGIONS))) { 94f1e53987SMikulas Patocka DMCRIT("Unaligned struct io pointer %p", io); 95f1e53987SMikulas Patocka BUG(); 961da177e4SLinus Torvalds } 971da177e4SLinus Torvalds 98f1e53987SMikulas Patocka bio->bi_private = (void *)((unsigned long)io | region); 99f1e53987SMikulas Patocka } 100f1e53987SMikulas Patocka 101f1e53987SMikulas Patocka static void retrieve_io_and_region_from_bio(struct bio *bio, struct io **io, 102f1e53987SMikulas Patocka unsigned *region) 1031da177e4SLinus Torvalds { 104f1e53987SMikulas Patocka unsigned long val = (unsigned long)bio->bi_private; 105f1e53987SMikulas Patocka 106f1e53987SMikulas Patocka *io = (void *)(val & -(unsigned long)DM_IO_MAX_REGIONS); 107f1e53987SMikulas Patocka *region = val & (DM_IO_MAX_REGIONS - 1); 1081da177e4SLinus Torvalds } 1091da177e4SLinus Torvalds 1101da177e4SLinus Torvalds /*----------------------------------------------------------------- 1111da177e4SLinus Torvalds * We need an io object to keep track of the number of bios that 1121da177e4SLinus Torvalds * have been dispatched for a particular io. 1131da177e4SLinus Torvalds *---------------------------------------------------------------*/ 11497e7cdf1SJoe Thornber static void complete_io(struct io *io) 11597e7cdf1SJoe Thornber { 11697e7cdf1SJoe Thornber unsigned long error_bits = io->error_bits; 11797e7cdf1SJoe Thornber io_notify_fn fn = io->callback; 11897e7cdf1SJoe Thornber void *context = io->context; 11997e7cdf1SJoe Thornber 12097e7cdf1SJoe Thornber if (io->vma_invalidate_size) 12197e7cdf1SJoe Thornber invalidate_kernel_vmap_range(io->vma_invalidate_address, 12297e7cdf1SJoe Thornber io->vma_invalidate_size); 12397e7cdf1SJoe Thornber 124*6f1c819cSKent Overstreet mempool_free(io, &io->client->pool); 12597e7cdf1SJoe Thornber fn(error_bits, context); 12697e7cdf1SJoe Thornber } 12797e7cdf1SJoe Thornber 1284e4cbee9SChristoph Hellwig static void dec_count(struct io *io, unsigned int region, blk_status_t error) 1291da177e4SLinus Torvalds { 130d87f4c14STejun Heo if (error) 131e01fd7eeSAlasdair G Kergon set_bit(region, &io->error_bits); 1321da177e4SLinus Torvalds 13397e7cdf1SJoe Thornber if (atomic_dec_and_test(&io->count)) 13497e7cdf1SJoe Thornber complete_io(io); 1351da177e4SLinus Torvalds } 1361da177e4SLinus Torvalds 1374246a0b6SChristoph Hellwig static void endio(struct bio *bio) 1381da177e4SLinus Torvalds { 139c897feb3SHeinz Mauelshagen struct io *io; 140c897feb3SHeinz Mauelshagen unsigned region; 1414e4cbee9SChristoph Hellwig blk_status_t error; 1421da177e4SLinus Torvalds 1434e4cbee9SChristoph Hellwig if (bio->bi_status && bio_data_dir(bio) == READ) 1441da177e4SLinus Torvalds zero_fill_bio(bio); 1451da177e4SLinus Torvalds 146c897feb3SHeinz Mauelshagen /* 147c897feb3SHeinz Mauelshagen * The bio destructor in bio_put() may use the io object. 148c897feb3SHeinz Mauelshagen */ 149f1e53987SMikulas Patocka retrieve_io_and_region_from_bio(bio, &io, ®ion); 150c897feb3SHeinz Mauelshagen 1514e4cbee9SChristoph Hellwig error = bio->bi_status; 1521da177e4SLinus Torvalds bio_put(bio); 1531da177e4SLinus Torvalds 1549b81c842SSasha Levin dec_count(io, region, error); 1551da177e4SLinus Torvalds } 1561da177e4SLinus Torvalds 1571da177e4SLinus Torvalds /*----------------------------------------------------------------- 1581da177e4SLinus Torvalds * These little objects provide an abstraction for getting a new 1591da177e4SLinus Torvalds * destination page for io. 1601da177e4SLinus Torvalds *---------------------------------------------------------------*/ 1611da177e4SLinus Torvalds struct dpages { 1621da177e4SLinus Torvalds void (*get_page)(struct dpages *dp, 1631da177e4SLinus Torvalds struct page **p, unsigned long *len, unsigned *offset); 1641da177e4SLinus Torvalds void (*next_page)(struct dpages *dp); 1651da177e4SLinus Torvalds 166cacc7b05SMing Lei union { 1671da177e4SLinus Torvalds unsigned context_u; 168cacc7b05SMing Lei struct bvec_iter context_bi; 169cacc7b05SMing Lei }; 1701da177e4SLinus Torvalds void *context_ptr; 171bb91bc7bSMikulas Patocka 172bb91bc7bSMikulas Patocka void *vma_invalidate_address; 173bb91bc7bSMikulas Patocka unsigned long vma_invalidate_size; 1741da177e4SLinus Torvalds }; 1751da177e4SLinus Torvalds 1761da177e4SLinus Torvalds /* 1771da177e4SLinus Torvalds * Functions for getting the pages from a list. 1781da177e4SLinus Torvalds */ 1791da177e4SLinus Torvalds static void list_get_page(struct dpages *dp, 1801da177e4SLinus Torvalds struct page **p, unsigned long *len, unsigned *offset) 1811da177e4SLinus Torvalds { 1821da177e4SLinus Torvalds unsigned o = dp->context_u; 1831da177e4SLinus Torvalds struct page_list *pl = (struct page_list *) dp->context_ptr; 1841da177e4SLinus Torvalds 1851da177e4SLinus Torvalds *p = pl->page; 1861da177e4SLinus Torvalds *len = PAGE_SIZE - o; 1871da177e4SLinus Torvalds *offset = o; 1881da177e4SLinus Torvalds } 1891da177e4SLinus Torvalds 1901da177e4SLinus Torvalds static void list_next_page(struct dpages *dp) 1911da177e4SLinus Torvalds { 1921da177e4SLinus Torvalds struct page_list *pl = (struct page_list *) dp->context_ptr; 1931da177e4SLinus Torvalds dp->context_ptr = pl->next; 1941da177e4SLinus Torvalds dp->context_u = 0; 1951da177e4SLinus Torvalds } 1961da177e4SLinus Torvalds 1971da177e4SLinus Torvalds static void list_dp_init(struct dpages *dp, struct page_list *pl, unsigned offset) 1981da177e4SLinus Torvalds { 1991da177e4SLinus Torvalds dp->get_page = list_get_page; 2001da177e4SLinus Torvalds dp->next_page = list_next_page; 2011da177e4SLinus Torvalds dp->context_u = offset; 2021da177e4SLinus Torvalds dp->context_ptr = pl; 2031da177e4SLinus Torvalds } 2041da177e4SLinus Torvalds 2051da177e4SLinus Torvalds /* 2061da177e4SLinus Torvalds * Functions for getting the pages from a bvec. 2071da177e4SLinus Torvalds */ 208d73f9907SMikulas Patocka static void bio_get_page(struct dpages *dp, struct page **p, 209d73f9907SMikulas Patocka unsigned long *len, unsigned *offset) 2101da177e4SLinus Torvalds { 211cacc7b05SMing Lei struct bio_vec bvec = bvec_iter_bvec((struct bio_vec *)dp->context_ptr, 212cacc7b05SMing Lei dp->context_bi); 213cacc7b05SMing Lei 214cacc7b05SMing Lei *p = bvec.bv_page; 215cacc7b05SMing Lei *len = bvec.bv_len; 216cacc7b05SMing Lei *offset = bvec.bv_offset; 217cacc7b05SMing Lei 218cacc7b05SMing Lei /* avoid figuring it out again in bio_next_page() */ 219cacc7b05SMing Lei dp->context_bi.bi_sector = (sector_t)bvec.bv_len; 2201da177e4SLinus Torvalds } 2211da177e4SLinus Torvalds 222003b5c57SKent Overstreet static void bio_next_page(struct dpages *dp) 2231da177e4SLinus Torvalds { 224cacc7b05SMing Lei unsigned int len = (unsigned int)dp->context_bi.bi_sector; 225cacc7b05SMing Lei 226cacc7b05SMing Lei bvec_iter_advance((struct bio_vec *)dp->context_ptr, 227cacc7b05SMing Lei &dp->context_bi, len); 2281da177e4SLinus Torvalds } 2291da177e4SLinus Torvalds 230003b5c57SKent Overstreet static void bio_dp_init(struct dpages *dp, struct bio *bio) 2311da177e4SLinus Torvalds { 232003b5c57SKent Overstreet dp->get_page = bio_get_page; 233003b5c57SKent Overstreet dp->next_page = bio_next_page; 234cacc7b05SMing Lei 235cacc7b05SMing Lei /* 236cacc7b05SMing Lei * We just use bvec iterator to retrieve pages, so it is ok to 237cacc7b05SMing Lei * access the bvec table directly here 238cacc7b05SMing Lei */ 239cacc7b05SMing Lei dp->context_ptr = bio->bi_io_vec; 240cacc7b05SMing Lei dp->context_bi = bio->bi_iter; 2411da177e4SLinus Torvalds } 2421da177e4SLinus Torvalds 243c8b03afeSHeinz Mauelshagen /* 244c8b03afeSHeinz Mauelshagen * Functions for getting the pages from a VMA. 245c8b03afeSHeinz Mauelshagen */ 2461da177e4SLinus Torvalds static void vm_get_page(struct dpages *dp, 2471da177e4SLinus Torvalds struct page **p, unsigned long *len, unsigned *offset) 2481da177e4SLinus Torvalds { 2491da177e4SLinus Torvalds *p = vmalloc_to_page(dp->context_ptr); 2501da177e4SLinus Torvalds *offset = dp->context_u; 2511da177e4SLinus Torvalds *len = PAGE_SIZE - dp->context_u; 2521da177e4SLinus Torvalds } 2531da177e4SLinus Torvalds 2541da177e4SLinus Torvalds static void vm_next_page(struct dpages *dp) 2551da177e4SLinus Torvalds { 2561da177e4SLinus Torvalds dp->context_ptr += PAGE_SIZE - dp->context_u; 2571da177e4SLinus Torvalds dp->context_u = 0; 2581da177e4SLinus Torvalds } 2591da177e4SLinus Torvalds 2601da177e4SLinus Torvalds static void vm_dp_init(struct dpages *dp, void *data) 2611da177e4SLinus Torvalds { 2621da177e4SLinus Torvalds dp->get_page = vm_get_page; 2631da177e4SLinus Torvalds dp->next_page = vm_next_page; 26493bbf583SAl Viro dp->context_u = offset_in_page(data); 2651da177e4SLinus Torvalds dp->context_ptr = data; 2661da177e4SLinus Torvalds } 2671da177e4SLinus Torvalds 268c8b03afeSHeinz Mauelshagen /* 269c8b03afeSHeinz Mauelshagen * Functions for getting the pages from kernel memory. 270c8b03afeSHeinz Mauelshagen */ 271c8b03afeSHeinz Mauelshagen static void km_get_page(struct dpages *dp, struct page **p, unsigned long *len, 272c8b03afeSHeinz Mauelshagen unsigned *offset) 273c8b03afeSHeinz Mauelshagen { 274c8b03afeSHeinz Mauelshagen *p = virt_to_page(dp->context_ptr); 275c8b03afeSHeinz Mauelshagen *offset = dp->context_u; 276c8b03afeSHeinz Mauelshagen *len = PAGE_SIZE - dp->context_u; 277c8b03afeSHeinz Mauelshagen } 278c8b03afeSHeinz Mauelshagen 279c8b03afeSHeinz Mauelshagen static void km_next_page(struct dpages *dp) 280c8b03afeSHeinz Mauelshagen { 281c8b03afeSHeinz Mauelshagen dp->context_ptr += PAGE_SIZE - dp->context_u; 282c8b03afeSHeinz Mauelshagen dp->context_u = 0; 283c8b03afeSHeinz Mauelshagen } 284c8b03afeSHeinz Mauelshagen 285c8b03afeSHeinz Mauelshagen static void km_dp_init(struct dpages *dp, void *data) 286c8b03afeSHeinz Mauelshagen { 287c8b03afeSHeinz Mauelshagen dp->get_page = km_get_page; 288c8b03afeSHeinz Mauelshagen dp->next_page = km_next_page; 28993bbf583SAl Viro dp->context_u = offset_in_page(data); 290c8b03afeSHeinz Mauelshagen dp->context_ptr = data; 291c8b03afeSHeinz Mauelshagen } 292c8b03afeSHeinz Mauelshagen 2931da177e4SLinus Torvalds /*----------------------------------------------------------------- 2941da177e4SLinus Torvalds * IO routines that accept a list of pages. 2951da177e4SLinus Torvalds *---------------------------------------------------------------*/ 296e6047149SMike Christie static void do_region(int op, int op_flags, unsigned region, 297e6047149SMike Christie struct dm_io_region *where, struct dpages *dp, 298e6047149SMike Christie struct io *io) 2991da177e4SLinus Torvalds { 3001da177e4SLinus Torvalds struct bio *bio; 3011da177e4SLinus Torvalds struct page *page; 3021da177e4SLinus Torvalds unsigned long len; 3031da177e4SLinus Torvalds unsigned offset; 3041da177e4SLinus Torvalds unsigned num_bvecs; 3051da177e4SLinus Torvalds sector_t remaining = where->count; 3060c535e0dSMilan Broz struct request_queue *q = bdev_get_queue(where->bdev); 30770d6c400SMike Snitzer unsigned short logical_block_size = queue_logical_block_size(q); 30870d6c400SMike Snitzer sector_t num_sectors; 309e5db2980SDarrick J. Wong unsigned int uninitialized_var(special_cmd_max_sectors); 3101da177e4SLinus Torvalds 311e5db2980SDarrick J. Wong /* 312e5db2980SDarrick J. Wong * Reject unsupported discard and write same requests. 313e5db2980SDarrick J. Wong */ 314e6047149SMike Christie if (op == REQ_OP_DISCARD) 315e5db2980SDarrick J. Wong special_cmd_max_sectors = q->limits.max_discard_sectors; 316ac62d620SChristoph Hellwig else if (op == REQ_OP_WRITE_ZEROES) 317ac62d620SChristoph Hellwig special_cmd_max_sectors = q->limits.max_write_zeroes_sectors; 318e6047149SMike Christie else if (op == REQ_OP_WRITE_SAME) 319e5db2980SDarrick J. Wong special_cmd_max_sectors = q->limits.max_write_same_sectors; 320ac62d620SChristoph Hellwig if ((op == REQ_OP_DISCARD || op == REQ_OP_WRITE_ZEROES || 321feb7695fSMike Snitzer op == REQ_OP_WRITE_SAME) && special_cmd_max_sectors == 0) { 322feb7695fSMike Snitzer atomic_inc(&io->count); 3234e4cbee9SChristoph Hellwig dec_count(io, region, BLK_STS_NOTSUPP); 32437527b86SDarrick J. Wong return; 32537527b86SDarrick J. Wong } 32637527b86SDarrick J. Wong 32712fc0f49SMikulas Patocka /* 328e6047149SMike Christie * where->count may be zero if op holds a flush and we need to 329d87f4c14STejun Heo * send a zero-sized flush. 33012fc0f49SMikulas Patocka */ 33112fc0f49SMikulas Patocka do { 3321da177e4SLinus Torvalds /* 333f1e53987SMikulas Patocka * Allocate a suitably sized-bio. 3341da177e4SLinus Torvalds */ 3350f5d690fSChristoph Hellwig switch (op) { 3360f5d690fSChristoph Hellwig case REQ_OP_DISCARD: 337ac62d620SChristoph Hellwig case REQ_OP_WRITE_ZEROES: 3380f5d690fSChristoph Hellwig num_bvecs = 0; 3390f5d690fSChristoph Hellwig break; 3400f5d690fSChristoph Hellwig case REQ_OP_WRITE_SAME: 3410c535e0dSMilan Broz num_bvecs = 1; 3420f5d690fSChristoph Hellwig break; 3430f5d690fSChristoph Hellwig default: 344b54ffb73SKent Overstreet num_bvecs = min_t(int, BIO_MAX_PAGES, 3450c535e0dSMilan Broz dm_sector_div_up(remaining, (PAGE_SIZE >> SECTOR_SHIFT))); 3460f5d690fSChristoph Hellwig } 3470c535e0dSMilan Broz 348*6f1c819cSKent Overstreet bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, &io->client->bios); 3494f024f37SKent Overstreet bio->bi_iter.bi_sector = where->sector + (where->count - remaining); 35074d46992SChristoph Hellwig bio_set_dev(bio, where->bdev); 3511da177e4SLinus Torvalds bio->bi_end_io = endio; 352e6047149SMike Christie bio_set_op_attrs(bio, op, op_flags); 353f1e53987SMikulas Patocka store_io_and_region_in_bio(bio, io, region); 3541da177e4SLinus Torvalds 355ac62d620SChristoph Hellwig if (op == REQ_OP_DISCARD || op == REQ_OP_WRITE_ZEROES) { 356e5db2980SDarrick J. Wong num_sectors = min_t(sector_t, special_cmd_max_sectors, remaining); 3574f024f37SKent Overstreet bio->bi_iter.bi_size = num_sectors << SECTOR_SHIFT; 35870d6c400SMike Snitzer remaining -= num_sectors; 359e6047149SMike Christie } else if (op == REQ_OP_WRITE_SAME) { 36070d6c400SMike Snitzer /* 36170d6c400SMike Snitzer * WRITE SAME only uses a single page. 36270d6c400SMike Snitzer */ 36370d6c400SMike Snitzer dp->get_page(dp, &page, &len, &offset); 36470d6c400SMike Snitzer bio_add_page(bio, page, logical_block_size, offset); 365e5db2980SDarrick J. Wong num_sectors = min_t(sector_t, special_cmd_max_sectors, remaining); 3664f024f37SKent Overstreet bio->bi_iter.bi_size = num_sectors << SECTOR_SHIFT; 36770d6c400SMike Snitzer 36870d6c400SMike Snitzer offset = 0; 36970d6c400SMike Snitzer remaining -= num_sectors; 37070d6c400SMike Snitzer dp->next_page(dp); 3710c535e0dSMilan Broz } else while (remaining) { 3721da177e4SLinus Torvalds /* 3731da177e4SLinus Torvalds * Try and add as many pages as possible. 3741da177e4SLinus Torvalds */ 3751da177e4SLinus Torvalds dp->get_page(dp, &page, &len, &offset); 3761da177e4SLinus Torvalds len = min(len, to_bytes(remaining)); 3771da177e4SLinus Torvalds if (!bio_add_page(bio, page, len, offset)) 3781da177e4SLinus Torvalds break; 3791da177e4SLinus Torvalds 3801da177e4SLinus Torvalds offset = 0; 3811da177e4SLinus Torvalds remaining -= to_sector(len); 3821da177e4SLinus Torvalds dp->next_page(dp); 3831da177e4SLinus Torvalds } 3841da177e4SLinus Torvalds 3851da177e4SLinus Torvalds atomic_inc(&io->count); 3864e49ea4aSMike Christie submit_bio(bio); 38712fc0f49SMikulas Patocka } while (remaining); 3881da177e4SLinus Torvalds } 3891da177e4SLinus Torvalds 390e6047149SMike Christie static void dispatch_io(int op, int op_flags, unsigned int num_regions, 39122a1ceb1SHeinz Mauelshagen struct dm_io_region *where, struct dpages *dp, 3921da177e4SLinus Torvalds struct io *io, int sync) 3931da177e4SLinus Torvalds { 3941da177e4SLinus Torvalds int i; 3951da177e4SLinus Torvalds struct dpages old_pages = *dp; 3961da177e4SLinus Torvalds 397f1e53987SMikulas Patocka BUG_ON(num_regions > DM_IO_MAX_REGIONS); 398f1e53987SMikulas Patocka 3991da177e4SLinus Torvalds if (sync) 400e6047149SMike Christie op_flags |= REQ_SYNC; 4011da177e4SLinus Torvalds 4021da177e4SLinus Torvalds /* 4031da177e4SLinus Torvalds * For multiple regions we need to be careful to rewind 4041da177e4SLinus Torvalds * the dp object for each call to do_region. 4051da177e4SLinus Torvalds */ 4061da177e4SLinus Torvalds for (i = 0; i < num_regions; i++) { 4071da177e4SLinus Torvalds *dp = old_pages; 40828a8f0d3SMike Christie if (where[i].count || (op_flags & REQ_PREFLUSH)) 409e6047149SMike Christie do_region(op, op_flags, i, where + i, dp, io); 4101da177e4SLinus Torvalds } 4111da177e4SLinus Torvalds 4121da177e4SLinus Torvalds /* 413f00b16adSHeinz Mauelshagen * Drop the extra reference that we were holding to avoid 4141da177e4SLinus Torvalds * the io being completed too early. 4151da177e4SLinus Torvalds */ 4161da177e4SLinus Torvalds dec_count(io, 0, 0); 4171da177e4SLinus Torvalds } 4181da177e4SLinus Torvalds 41997e7cdf1SJoe Thornber struct sync_io { 42097e7cdf1SJoe Thornber unsigned long error_bits; 42197e7cdf1SJoe Thornber struct completion wait; 42297e7cdf1SJoe Thornber }; 42397e7cdf1SJoe Thornber 42497e7cdf1SJoe Thornber static void sync_io_complete(unsigned long error, void *context) 42597e7cdf1SJoe Thornber { 42697e7cdf1SJoe Thornber struct sync_io *sio = context; 42797e7cdf1SJoe Thornber 42897e7cdf1SJoe Thornber sio->error_bits = error; 42997e7cdf1SJoe Thornber complete(&sio->wait); 43097e7cdf1SJoe Thornber } 43197e7cdf1SJoe Thornber 432891ce207SHeinz Mauelshagen static int sync_io(struct dm_io_client *client, unsigned int num_regions, 433e6047149SMike Christie struct dm_io_region *where, int op, int op_flags, 434e6047149SMike Christie struct dpages *dp, unsigned long *error_bits) 4351da177e4SLinus Torvalds { 43697e7cdf1SJoe Thornber struct io *io; 43797e7cdf1SJoe Thornber struct sync_io sio; 4381da177e4SLinus Torvalds 439e6047149SMike Christie if (num_regions > 1 && !op_is_write(op)) { 4401da177e4SLinus Torvalds WARN_ON(1); 4411da177e4SLinus Torvalds return -EIO; 4421da177e4SLinus Torvalds } 4431da177e4SLinus Torvalds 44497e7cdf1SJoe Thornber init_completion(&sio.wait); 44597e7cdf1SJoe Thornber 446*6f1c819cSKent Overstreet io = mempool_alloc(&client->pool, GFP_NOIO); 447f1e53987SMikulas Patocka io->error_bits = 0; 448f1e53987SMikulas Patocka atomic_set(&io->count, 1); /* see dispatch_io() */ 449f1e53987SMikulas Patocka io->client = client; 45097e7cdf1SJoe Thornber io->callback = sync_io_complete; 45197e7cdf1SJoe Thornber io->context = &sio; 4521da177e4SLinus Torvalds 453bb91bc7bSMikulas Patocka io->vma_invalidate_address = dp->vma_invalidate_address; 454bb91bc7bSMikulas Patocka io->vma_invalidate_size = dp->vma_invalidate_size; 455bb91bc7bSMikulas Patocka 456e6047149SMike Christie dispatch_io(op, op_flags, num_regions, where, dp, io, 1); 4571da177e4SLinus Torvalds 45897e7cdf1SJoe Thornber wait_for_completion_io(&sio.wait); 4591da177e4SLinus Torvalds 460891ce207SHeinz Mauelshagen if (error_bits) 46197e7cdf1SJoe Thornber *error_bits = sio.error_bits; 462891ce207SHeinz Mauelshagen 46397e7cdf1SJoe Thornber return sio.error_bits ? -EIO : 0; 4641da177e4SLinus Torvalds } 4651da177e4SLinus Torvalds 466891ce207SHeinz Mauelshagen static int async_io(struct dm_io_client *client, unsigned int num_regions, 467e6047149SMike Christie struct dm_io_region *where, int op, int op_flags, 468e6047149SMike Christie struct dpages *dp, io_notify_fn fn, void *context) 4691da177e4SLinus Torvalds { 4701da177e4SLinus Torvalds struct io *io; 4711da177e4SLinus Torvalds 472e6047149SMike Christie if (num_regions > 1 && !op_is_write(op)) { 4731da177e4SLinus Torvalds WARN_ON(1); 4741da177e4SLinus Torvalds fn(1, context); 4751da177e4SLinus Torvalds return -EIO; 4761da177e4SLinus Torvalds } 4771da177e4SLinus Torvalds 478*6f1c819cSKent Overstreet io = mempool_alloc(&client->pool, GFP_NOIO); 479e01fd7eeSAlasdair G Kergon io->error_bits = 0; 4801da177e4SLinus Torvalds atomic_set(&io->count, 1); /* see dispatch_io() */ 481891ce207SHeinz Mauelshagen io->client = client; 4821da177e4SLinus Torvalds io->callback = fn; 4831da177e4SLinus Torvalds io->context = context; 4841da177e4SLinus Torvalds 485bb91bc7bSMikulas Patocka io->vma_invalidate_address = dp->vma_invalidate_address; 486bb91bc7bSMikulas Patocka io->vma_invalidate_size = dp->vma_invalidate_size; 487bb91bc7bSMikulas Patocka 488e6047149SMike Christie dispatch_io(op, op_flags, num_regions, where, dp, io, 0); 4891da177e4SLinus Torvalds return 0; 4901da177e4SLinus Torvalds } 4911da177e4SLinus Torvalds 492bb91bc7bSMikulas Patocka static int dp_init(struct dm_io_request *io_req, struct dpages *dp, 493bb91bc7bSMikulas Patocka unsigned long size) 494c8b03afeSHeinz Mauelshagen { 495c8b03afeSHeinz Mauelshagen /* Set up dpages based on memory type */ 496bb91bc7bSMikulas Patocka 497bb91bc7bSMikulas Patocka dp->vma_invalidate_address = NULL; 498bb91bc7bSMikulas Patocka dp->vma_invalidate_size = 0; 499bb91bc7bSMikulas Patocka 500c8b03afeSHeinz Mauelshagen switch (io_req->mem.type) { 501c8b03afeSHeinz Mauelshagen case DM_IO_PAGE_LIST: 502c8b03afeSHeinz Mauelshagen list_dp_init(dp, io_req->mem.ptr.pl, io_req->mem.offset); 503c8b03afeSHeinz Mauelshagen break; 504c8b03afeSHeinz Mauelshagen 505003b5c57SKent Overstreet case DM_IO_BIO: 506003b5c57SKent Overstreet bio_dp_init(dp, io_req->mem.ptr.bio); 507c8b03afeSHeinz Mauelshagen break; 508c8b03afeSHeinz Mauelshagen 509c8b03afeSHeinz Mauelshagen case DM_IO_VMA: 510bb91bc7bSMikulas Patocka flush_kernel_vmap_range(io_req->mem.ptr.vma, size); 511e6047149SMike Christie if (io_req->bi_op == REQ_OP_READ) { 512bb91bc7bSMikulas Patocka dp->vma_invalidate_address = io_req->mem.ptr.vma; 513bb91bc7bSMikulas Patocka dp->vma_invalidate_size = size; 514bb91bc7bSMikulas Patocka } 515c8b03afeSHeinz Mauelshagen vm_dp_init(dp, io_req->mem.ptr.vma); 516c8b03afeSHeinz Mauelshagen break; 517c8b03afeSHeinz Mauelshagen 518c8b03afeSHeinz Mauelshagen case DM_IO_KMEM: 519c8b03afeSHeinz Mauelshagen km_dp_init(dp, io_req->mem.ptr.addr); 520c8b03afeSHeinz Mauelshagen break; 521c8b03afeSHeinz Mauelshagen 522c8b03afeSHeinz Mauelshagen default: 523c8b03afeSHeinz Mauelshagen return -EINVAL; 524c8b03afeSHeinz Mauelshagen } 525c8b03afeSHeinz Mauelshagen 526c8b03afeSHeinz Mauelshagen return 0; 527c8b03afeSHeinz Mauelshagen } 528c8b03afeSHeinz Mauelshagen 529c8b03afeSHeinz Mauelshagen /* 5307ff14a36SMikulas Patocka * New collapsed (a)synchronous interface. 5317ff14a36SMikulas Patocka * 5327ff14a36SMikulas Patocka * If the IO is asynchronous (i.e. it has notify.fn), you must either unplug 5331eff9d32SJens Axboe * the queue with blk_unplug() some time later or set REQ_SYNC in 5341eff9d32SJens Axboe * io_req->bi_opf. If you fail to do one of these, the IO will be submitted to 5351eff9d32SJens Axboe * the disk after q->unplug_delay, which defaults to 3ms in blk-settings.c. 536c8b03afeSHeinz Mauelshagen */ 537c8b03afeSHeinz Mauelshagen int dm_io(struct dm_io_request *io_req, unsigned num_regions, 53822a1ceb1SHeinz Mauelshagen struct dm_io_region *where, unsigned long *sync_error_bits) 539c8b03afeSHeinz Mauelshagen { 540c8b03afeSHeinz Mauelshagen int r; 541c8b03afeSHeinz Mauelshagen struct dpages dp; 542c8b03afeSHeinz Mauelshagen 543bb91bc7bSMikulas Patocka r = dp_init(io_req, &dp, (unsigned long)where->count << SECTOR_SHIFT); 544c8b03afeSHeinz Mauelshagen if (r) 545c8b03afeSHeinz Mauelshagen return r; 546c8b03afeSHeinz Mauelshagen 547c8b03afeSHeinz Mauelshagen if (!io_req->notify.fn) 548c8b03afeSHeinz Mauelshagen return sync_io(io_req->client, num_regions, where, 549e6047149SMike Christie io_req->bi_op, io_req->bi_op_flags, &dp, 550e6047149SMike Christie sync_error_bits); 551c8b03afeSHeinz Mauelshagen 552e6047149SMike Christie return async_io(io_req->client, num_regions, where, io_req->bi_op, 553e6047149SMike Christie io_req->bi_op_flags, &dp, io_req->notify.fn, 554e6047149SMike Christie io_req->notify.context); 555c8b03afeSHeinz Mauelshagen } 556c8b03afeSHeinz Mauelshagen EXPORT_SYMBOL(dm_io); 557952b3557SMikulas Patocka 558952b3557SMikulas Patocka int __init dm_io_init(void) 559952b3557SMikulas Patocka { 560952b3557SMikulas Patocka _dm_io_cache = KMEM_CACHE(io, 0); 561952b3557SMikulas Patocka if (!_dm_io_cache) 562952b3557SMikulas Patocka return -ENOMEM; 563952b3557SMikulas Patocka 564952b3557SMikulas Patocka return 0; 565952b3557SMikulas Patocka } 566952b3557SMikulas Patocka 567952b3557SMikulas Patocka void dm_io_exit(void) 568952b3557SMikulas Patocka { 569952b3557SMikulas Patocka kmem_cache_destroy(_dm_io_cache); 570952b3557SMikulas Patocka _dm_io_cache = NULL; 571952b3557SMikulas Patocka } 572