1 /* 2 * Copyright (C) 2012 Red Hat. All rights reserved. 3 * 4 * This file is released under the GPL. 5 */ 6 7 #include "dm.h" 8 #include "dm-bio-prison.h" 9 #include "dm-bio-record.h" 10 #include "dm-cache-metadata.h" 11 12 #include <linux/dm-io.h> 13 #include <linux/dm-kcopyd.h> 14 #include <linux/init.h> 15 #include <linux/mempool.h> 16 #include <linux/module.h> 17 #include <linux/slab.h> 18 #include <linux/vmalloc.h> 19 20 #define DM_MSG_PREFIX "cache" 21 22 DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(cache_copy_throttle, 23 "A percentage of time allocated for copying to and/or from cache"); 24 25 /*----------------------------------------------------------------*/ 26 27 /* 28 * Glossary: 29 * 30 * oblock: index of an origin block 31 * cblock: index of a cache block 32 * promotion: movement of a block from origin to cache 33 * demotion: movement of a block from cache to origin 34 * migration: movement of a block between the origin and cache device, 35 * either direction 36 */ 37 38 /*----------------------------------------------------------------*/ 39 40 static size_t bitset_size_in_bytes(unsigned nr_entries) 41 { 42 return sizeof(unsigned long) * dm_div_up(nr_entries, BITS_PER_LONG); 43 } 44 45 static unsigned long *alloc_bitset(unsigned nr_entries) 46 { 47 size_t s = bitset_size_in_bytes(nr_entries); 48 return vzalloc(s); 49 } 50 51 static void clear_bitset(void *bitset, unsigned nr_entries) 52 { 53 size_t s = bitset_size_in_bytes(nr_entries); 54 memset(bitset, 0, s); 55 } 56 57 static void free_bitset(unsigned long *bits) 58 { 59 vfree(bits); 60 } 61 62 /*----------------------------------------------------------------*/ 63 64 /* 65 * There are a couple of places where we let a bio run, but want to do some 66 * work before calling its endio function. We do this by temporarily 67 * changing the endio fn. 68 */ 69 struct dm_hook_info { 70 bio_end_io_t *bi_end_io; 71 void *bi_private; 72 }; 73 74 static void dm_hook_bio(struct dm_hook_info *h, struct bio *bio, 75 bio_end_io_t *bi_end_io, void *bi_private) 76 { 77 h->bi_end_io = bio->bi_end_io; 78 h->bi_private = bio->bi_private; 79 80 bio->bi_end_io = bi_end_io; 81 bio->bi_private = bi_private; 82 } 83 84 static void dm_unhook_bio(struct dm_hook_info *h, struct bio *bio) 85 { 86 bio->bi_end_io = h->bi_end_io; 87 bio->bi_private = h->bi_private; 88 89 /* 90 * Must bump bi_remaining to allow bio to complete with 91 * restored bi_end_io. 92 */ 93 atomic_inc(&bio->bi_remaining); 94 } 95 96 /*----------------------------------------------------------------*/ 97 98 #define PRISON_CELLS 1024 99 #define MIGRATION_POOL_SIZE 128 100 #define COMMIT_PERIOD HZ 101 #define MIGRATION_COUNT_WINDOW 10 102 103 /* 104 * The block size of the device holding cache data must be 105 * between 32KB and 1GB. 106 */ 107 #define DATA_DEV_BLOCK_SIZE_MIN_SECTORS (32 * 1024 >> SECTOR_SHIFT) 108 #define DATA_DEV_BLOCK_SIZE_MAX_SECTORS (1024 * 1024 * 1024 >> SECTOR_SHIFT) 109 110 /* 111 * FIXME: the cache is read/write for the time being. 112 */ 113 enum cache_metadata_mode { 114 CM_WRITE, /* metadata may be changed */ 115 CM_READ_ONLY, /* metadata may not be changed */ 116 }; 117 118 enum cache_io_mode { 119 /* 120 * Data is written to cached blocks only. These blocks are marked 121 * dirty. If you lose the cache device you will lose data. 122 * Potential performance increase for both reads and writes. 123 */ 124 CM_IO_WRITEBACK, 125 126 /* 127 * Data is written to both cache and origin. Blocks are never 128 * dirty. Potential performance benfit for reads only. 129 */ 130 CM_IO_WRITETHROUGH, 131 132 /* 133 * A degraded mode useful for various cache coherency situations 134 * (eg, rolling back snapshots). Reads and writes always go to the 135 * origin. If a write goes to a cached oblock, then the cache 136 * block is invalidated. 137 */ 138 CM_IO_PASSTHROUGH 139 }; 140 141 struct cache_features { 142 enum cache_metadata_mode mode; 143 enum cache_io_mode io_mode; 144 }; 145 146 struct cache_stats { 147 atomic_t read_hit; 148 atomic_t read_miss; 149 atomic_t write_hit; 150 atomic_t write_miss; 151 atomic_t demotion; 152 atomic_t promotion; 153 atomic_t copies_avoided; 154 atomic_t cache_cell_clash; 155 atomic_t commit_count; 156 atomic_t discard_count; 157 }; 158 159 /* 160 * Defines a range of cblocks, begin to (end - 1) are in the range. end is 161 * the one-past-the-end value. 162 */ 163 struct cblock_range { 164 dm_cblock_t begin; 165 dm_cblock_t end; 166 }; 167 168 struct invalidation_request { 169 struct list_head list; 170 struct cblock_range *cblocks; 171 172 atomic_t complete; 173 int err; 174 175 wait_queue_head_t result_wait; 176 }; 177 178 struct cache { 179 struct dm_target *ti; 180 struct dm_target_callbacks callbacks; 181 182 struct dm_cache_metadata *cmd; 183 184 /* 185 * Metadata is written to this device. 186 */ 187 struct dm_dev *metadata_dev; 188 189 /* 190 * The slower of the two data devices. Typically a spindle. 191 */ 192 struct dm_dev *origin_dev; 193 194 /* 195 * The faster of the two data devices. Typically an SSD. 196 */ 197 struct dm_dev *cache_dev; 198 199 /* 200 * Size of the origin device in _complete_ blocks and native sectors. 201 */ 202 dm_oblock_t origin_blocks; 203 sector_t origin_sectors; 204 205 /* 206 * Size of the cache device in blocks. 207 */ 208 dm_cblock_t cache_size; 209 210 /* 211 * Fields for converting from sectors to blocks. 212 */ 213 uint32_t sectors_per_block; 214 int sectors_per_block_shift; 215 216 spinlock_t lock; 217 struct bio_list deferred_bios; 218 struct bio_list deferred_flush_bios; 219 struct bio_list deferred_writethrough_bios; 220 struct list_head quiesced_migrations; 221 struct list_head completed_migrations; 222 struct list_head need_commit_migrations; 223 sector_t migration_threshold; 224 wait_queue_head_t migration_wait; 225 atomic_t nr_migrations; 226 227 wait_queue_head_t quiescing_wait; 228 atomic_t quiescing; 229 atomic_t quiescing_ack; 230 231 /* 232 * cache_size entries, dirty if set 233 */ 234 dm_cblock_t nr_dirty; 235 unsigned long *dirty_bitset; 236 237 /* 238 * origin_blocks entries, discarded if set. 239 */ 240 dm_oblock_t discard_nr_blocks; 241 unsigned long *discard_bitset; 242 243 /* 244 * Rather than reconstructing the table line for the status we just 245 * save it and regurgitate. 246 */ 247 unsigned nr_ctr_args; 248 const char **ctr_args; 249 250 struct dm_kcopyd_client *copier; 251 struct workqueue_struct *wq; 252 struct work_struct worker; 253 254 struct delayed_work waker; 255 unsigned long last_commit_jiffies; 256 257 struct dm_bio_prison *prison; 258 struct dm_deferred_set *all_io_ds; 259 260 mempool_t *migration_pool; 261 struct dm_cache_migration *next_migration; 262 263 struct dm_cache_policy *policy; 264 unsigned policy_nr_args; 265 266 bool need_tick_bio:1; 267 bool sized:1; 268 bool invalidate:1; 269 bool commit_requested:1; 270 bool loaded_mappings:1; 271 bool loaded_discards:1; 272 273 /* 274 * Cache features such as write-through. 275 */ 276 struct cache_features features; 277 278 struct cache_stats stats; 279 280 /* 281 * Invalidation fields. 282 */ 283 spinlock_t invalidation_lock; 284 struct list_head invalidation_requests; 285 }; 286 287 struct per_bio_data { 288 bool tick:1; 289 unsigned req_nr:2; 290 struct dm_deferred_entry *all_io_entry; 291 struct dm_hook_info hook_info; 292 293 /* 294 * writethrough fields. These MUST remain at the end of this 295 * structure and the 'cache' member must be the first as it 296 * is used to determine the offset of the writethrough fields. 297 */ 298 struct cache *cache; 299 dm_cblock_t cblock; 300 struct dm_bio_details bio_details; 301 }; 302 303 struct dm_cache_migration { 304 struct list_head list; 305 struct cache *cache; 306 307 unsigned long start_jiffies; 308 dm_oblock_t old_oblock; 309 dm_oblock_t new_oblock; 310 dm_cblock_t cblock; 311 312 bool err:1; 313 bool writeback:1; 314 bool demote:1; 315 bool promote:1; 316 bool requeue_holder:1; 317 bool invalidate:1; 318 319 struct dm_bio_prison_cell *old_ocell; 320 struct dm_bio_prison_cell *new_ocell; 321 }; 322 323 /* 324 * Processing a bio in the worker thread may require these memory 325 * allocations. We prealloc to avoid deadlocks (the same worker thread 326 * frees them back to the mempool). 327 */ 328 struct prealloc { 329 struct dm_cache_migration *mg; 330 struct dm_bio_prison_cell *cell1; 331 struct dm_bio_prison_cell *cell2; 332 }; 333 334 static void wake_worker(struct cache *cache) 335 { 336 queue_work(cache->wq, &cache->worker); 337 } 338 339 /*----------------------------------------------------------------*/ 340 341 static struct dm_bio_prison_cell *alloc_prison_cell(struct cache *cache) 342 { 343 /* FIXME: change to use a local slab. */ 344 return dm_bio_prison_alloc_cell(cache->prison, GFP_NOWAIT); 345 } 346 347 static void free_prison_cell(struct cache *cache, struct dm_bio_prison_cell *cell) 348 { 349 dm_bio_prison_free_cell(cache->prison, cell); 350 } 351 352 static int prealloc_data_structs(struct cache *cache, struct prealloc *p) 353 { 354 if (!p->mg) { 355 p->mg = mempool_alloc(cache->migration_pool, GFP_NOWAIT); 356 if (!p->mg) 357 return -ENOMEM; 358 } 359 360 if (!p->cell1) { 361 p->cell1 = alloc_prison_cell(cache); 362 if (!p->cell1) 363 return -ENOMEM; 364 } 365 366 if (!p->cell2) { 367 p->cell2 = alloc_prison_cell(cache); 368 if (!p->cell2) 369 return -ENOMEM; 370 } 371 372 return 0; 373 } 374 375 static void prealloc_free_structs(struct cache *cache, struct prealloc *p) 376 { 377 if (p->cell2) 378 free_prison_cell(cache, p->cell2); 379 380 if (p->cell1) 381 free_prison_cell(cache, p->cell1); 382 383 if (p->mg) 384 mempool_free(p->mg, cache->migration_pool); 385 } 386 387 static struct dm_cache_migration *prealloc_get_migration(struct prealloc *p) 388 { 389 struct dm_cache_migration *mg = p->mg; 390 391 BUG_ON(!mg); 392 p->mg = NULL; 393 394 return mg; 395 } 396 397 /* 398 * You must have a cell within the prealloc struct to return. If not this 399 * function will BUG() rather than returning NULL. 400 */ 401 static struct dm_bio_prison_cell *prealloc_get_cell(struct prealloc *p) 402 { 403 struct dm_bio_prison_cell *r = NULL; 404 405 if (p->cell1) { 406 r = p->cell1; 407 p->cell1 = NULL; 408 409 } else if (p->cell2) { 410 r = p->cell2; 411 p->cell2 = NULL; 412 } else 413 BUG(); 414 415 return r; 416 } 417 418 /* 419 * You can't have more than two cells in a prealloc struct. BUG() will be 420 * called if you try and overfill. 421 */ 422 static void prealloc_put_cell(struct prealloc *p, struct dm_bio_prison_cell *cell) 423 { 424 if (!p->cell2) 425 p->cell2 = cell; 426 427 else if (!p->cell1) 428 p->cell1 = cell; 429 430 else 431 BUG(); 432 } 433 434 /*----------------------------------------------------------------*/ 435 436 static void build_key(dm_oblock_t oblock, struct dm_cell_key *key) 437 { 438 key->virtual = 0; 439 key->dev = 0; 440 key->block = from_oblock(oblock); 441 } 442 443 /* 444 * The caller hands in a preallocated cell, and a free function for it. 445 * The cell will be freed if there's an error, or if it wasn't used because 446 * a cell with that key already exists. 447 */ 448 typedef void (*cell_free_fn)(void *context, struct dm_bio_prison_cell *cell); 449 450 static int bio_detain(struct cache *cache, dm_oblock_t oblock, 451 struct bio *bio, struct dm_bio_prison_cell *cell_prealloc, 452 cell_free_fn free_fn, void *free_context, 453 struct dm_bio_prison_cell **cell_result) 454 { 455 int r; 456 struct dm_cell_key key; 457 458 build_key(oblock, &key); 459 r = dm_bio_detain(cache->prison, &key, bio, cell_prealloc, cell_result); 460 if (r) 461 free_fn(free_context, cell_prealloc); 462 463 return r; 464 } 465 466 static int get_cell(struct cache *cache, 467 dm_oblock_t oblock, 468 struct prealloc *structs, 469 struct dm_bio_prison_cell **cell_result) 470 { 471 int r; 472 struct dm_cell_key key; 473 struct dm_bio_prison_cell *cell_prealloc; 474 475 cell_prealloc = prealloc_get_cell(structs); 476 477 build_key(oblock, &key); 478 r = dm_get_cell(cache->prison, &key, cell_prealloc, cell_result); 479 if (r) 480 prealloc_put_cell(structs, cell_prealloc); 481 482 return r; 483 } 484 485 /*----------------------------------------------------------------*/ 486 487 static bool is_dirty(struct cache *cache, dm_cblock_t b) 488 { 489 return test_bit(from_cblock(b), cache->dirty_bitset); 490 } 491 492 static void set_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cblock) 493 { 494 if (!test_and_set_bit(from_cblock(cblock), cache->dirty_bitset)) { 495 cache->nr_dirty = to_cblock(from_cblock(cache->nr_dirty) + 1); 496 policy_set_dirty(cache->policy, oblock); 497 } 498 } 499 500 static void clear_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cblock) 501 { 502 if (test_and_clear_bit(from_cblock(cblock), cache->dirty_bitset)) { 503 policy_clear_dirty(cache->policy, oblock); 504 cache->nr_dirty = to_cblock(from_cblock(cache->nr_dirty) - 1); 505 if (!from_cblock(cache->nr_dirty)) 506 dm_table_event(cache->ti->table); 507 } 508 } 509 510 /*----------------------------------------------------------------*/ 511 512 static bool block_size_is_power_of_two(struct cache *cache) 513 { 514 return cache->sectors_per_block_shift >= 0; 515 } 516 517 /* gcc on ARM generates spurious references to __udivdi3 and __umoddi3 */ 518 #if defined(CONFIG_ARM) && __GNUC__ == 4 && __GNUC_MINOR__ <= 6 519 __always_inline 520 #endif 521 static dm_block_t block_div(dm_block_t b, uint32_t n) 522 { 523 do_div(b, n); 524 525 return b; 526 } 527 528 static void set_discard(struct cache *cache, dm_oblock_t b) 529 { 530 unsigned long flags; 531 532 atomic_inc(&cache->stats.discard_count); 533 534 spin_lock_irqsave(&cache->lock, flags); 535 set_bit(from_oblock(b), cache->discard_bitset); 536 spin_unlock_irqrestore(&cache->lock, flags); 537 } 538 539 static void clear_discard(struct cache *cache, dm_oblock_t b) 540 { 541 unsigned long flags; 542 543 spin_lock_irqsave(&cache->lock, flags); 544 clear_bit(from_oblock(b), cache->discard_bitset); 545 spin_unlock_irqrestore(&cache->lock, flags); 546 } 547 548 static bool is_discarded(struct cache *cache, dm_oblock_t b) 549 { 550 int r; 551 unsigned long flags; 552 553 spin_lock_irqsave(&cache->lock, flags); 554 r = test_bit(from_oblock(b), cache->discard_bitset); 555 spin_unlock_irqrestore(&cache->lock, flags); 556 557 return r; 558 } 559 560 static bool is_discarded_oblock(struct cache *cache, dm_oblock_t b) 561 { 562 int r; 563 unsigned long flags; 564 565 spin_lock_irqsave(&cache->lock, flags); 566 r = test_bit(from_oblock(b), cache->discard_bitset); 567 spin_unlock_irqrestore(&cache->lock, flags); 568 569 return r; 570 } 571 572 /*----------------------------------------------------------------*/ 573 574 static void load_stats(struct cache *cache) 575 { 576 struct dm_cache_statistics stats; 577 578 dm_cache_metadata_get_stats(cache->cmd, &stats); 579 atomic_set(&cache->stats.read_hit, stats.read_hits); 580 atomic_set(&cache->stats.read_miss, stats.read_misses); 581 atomic_set(&cache->stats.write_hit, stats.write_hits); 582 atomic_set(&cache->stats.write_miss, stats.write_misses); 583 } 584 585 static void save_stats(struct cache *cache) 586 { 587 struct dm_cache_statistics stats; 588 589 stats.read_hits = atomic_read(&cache->stats.read_hit); 590 stats.read_misses = atomic_read(&cache->stats.read_miss); 591 stats.write_hits = atomic_read(&cache->stats.write_hit); 592 stats.write_misses = atomic_read(&cache->stats.write_miss); 593 594 dm_cache_metadata_set_stats(cache->cmd, &stats); 595 } 596 597 /*---------------------------------------------------------------- 598 * Per bio data 599 *--------------------------------------------------------------*/ 600 601 /* 602 * If using writeback, leave out struct per_bio_data's writethrough fields. 603 */ 604 #define PB_DATA_SIZE_WB (offsetof(struct per_bio_data, cache)) 605 #define PB_DATA_SIZE_WT (sizeof(struct per_bio_data)) 606 607 static bool writethrough_mode(struct cache_features *f) 608 { 609 return f->io_mode == CM_IO_WRITETHROUGH; 610 } 611 612 static bool writeback_mode(struct cache_features *f) 613 { 614 return f->io_mode == CM_IO_WRITEBACK; 615 } 616 617 static bool passthrough_mode(struct cache_features *f) 618 { 619 return f->io_mode == CM_IO_PASSTHROUGH; 620 } 621 622 static size_t get_per_bio_data_size(struct cache *cache) 623 { 624 return writethrough_mode(&cache->features) ? PB_DATA_SIZE_WT : PB_DATA_SIZE_WB; 625 } 626 627 static struct per_bio_data *get_per_bio_data(struct bio *bio, size_t data_size) 628 { 629 struct per_bio_data *pb = dm_per_bio_data(bio, data_size); 630 BUG_ON(!pb); 631 return pb; 632 } 633 634 static struct per_bio_data *init_per_bio_data(struct bio *bio, size_t data_size) 635 { 636 struct per_bio_data *pb = get_per_bio_data(bio, data_size); 637 638 pb->tick = false; 639 pb->req_nr = dm_bio_get_target_bio_nr(bio); 640 pb->all_io_entry = NULL; 641 642 return pb; 643 } 644 645 /*---------------------------------------------------------------- 646 * Remapping 647 *--------------------------------------------------------------*/ 648 static void remap_to_origin(struct cache *cache, struct bio *bio) 649 { 650 bio->bi_bdev = cache->origin_dev->bdev; 651 } 652 653 static void remap_to_cache(struct cache *cache, struct bio *bio, 654 dm_cblock_t cblock) 655 { 656 sector_t bi_sector = bio->bi_iter.bi_sector; 657 sector_t block = from_cblock(cblock); 658 659 bio->bi_bdev = cache->cache_dev->bdev; 660 if (!block_size_is_power_of_two(cache)) 661 bio->bi_iter.bi_sector = 662 (block * cache->sectors_per_block) + 663 sector_div(bi_sector, cache->sectors_per_block); 664 else 665 bio->bi_iter.bi_sector = 666 (block << cache->sectors_per_block_shift) | 667 (bi_sector & (cache->sectors_per_block - 1)); 668 } 669 670 static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio) 671 { 672 unsigned long flags; 673 size_t pb_data_size = get_per_bio_data_size(cache); 674 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); 675 676 spin_lock_irqsave(&cache->lock, flags); 677 if (cache->need_tick_bio && 678 !(bio->bi_rw & (REQ_FUA | REQ_FLUSH | REQ_DISCARD))) { 679 pb->tick = true; 680 cache->need_tick_bio = false; 681 } 682 spin_unlock_irqrestore(&cache->lock, flags); 683 } 684 685 static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio, 686 dm_oblock_t oblock) 687 { 688 check_if_tick_bio_needed(cache, bio); 689 remap_to_origin(cache, bio); 690 if (bio_data_dir(bio) == WRITE) 691 clear_discard(cache, oblock); 692 } 693 694 static void remap_to_cache_dirty(struct cache *cache, struct bio *bio, 695 dm_oblock_t oblock, dm_cblock_t cblock) 696 { 697 check_if_tick_bio_needed(cache, bio); 698 remap_to_cache(cache, bio, cblock); 699 if (bio_data_dir(bio) == WRITE) { 700 set_dirty(cache, oblock, cblock); 701 clear_discard(cache, oblock); 702 } 703 } 704 705 static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio) 706 { 707 sector_t block_nr = bio->bi_iter.bi_sector; 708 709 if (!block_size_is_power_of_two(cache)) 710 (void) sector_div(block_nr, cache->sectors_per_block); 711 else 712 block_nr >>= cache->sectors_per_block_shift; 713 714 return to_oblock(block_nr); 715 } 716 717 static int bio_triggers_commit(struct cache *cache, struct bio *bio) 718 { 719 return bio->bi_rw & (REQ_FLUSH | REQ_FUA); 720 } 721 722 static void issue(struct cache *cache, struct bio *bio) 723 { 724 unsigned long flags; 725 726 if (!bio_triggers_commit(cache, bio)) { 727 generic_make_request(bio); 728 return; 729 } 730 731 /* 732 * Batch together any bios that trigger commits and then issue a 733 * single commit for them in do_worker(). 734 */ 735 spin_lock_irqsave(&cache->lock, flags); 736 cache->commit_requested = true; 737 bio_list_add(&cache->deferred_flush_bios, bio); 738 spin_unlock_irqrestore(&cache->lock, flags); 739 } 740 741 static void defer_writethrough_bio(struct cache *cache, struct bio *bio) 742 { 743 unsigned long flags; 744 745 spin_lock_irqsave(&cache->lock, flags); 746 bio_list_add(&cache->deferred_writethrough_bios, bio); 747 spin_unlock_irqrestore(&cache->lock, flags); 748 749 wake_worker(cache); 750 } 751 752 static void writethrough_endio(struct bio *bio, int err) 753 { 754 struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT); 755 756 dm_unhook_bio(&pb->hook_info, bio); 757 758 if (err) { 759 bio_endio(bio, err); 760 return; 761 } 762 763 dm_bio_restore(&pb->bio_details, bio); 764 remap_to_cache(pb->cache, bio, pb->cblock); 765 766 /* 767 * We can't issue this bio directly, since we're in interrupt 768 * context. So it gets put on a bio list for processing by the 769 * worker thread. 770 */ 771 defer_writethrough_bio(pb->cache, bio); 772 } 773 774 /* 775 * When running in writethrough mode we need to send writes to clean blocks 776 * to both the cache and origin devices. In future we'd like to clone the 777 * bio and send them in parallel, but for now we're doing them in 778 * series as this is easier. 779 */ 780 static void remap_to_origin_then_cache(struct cache *cache, struct bio *bio, 781 dm_oblock_t oblock, dm_cblock_t cblock) 782 { 783 struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT); 784 785 pb->cache = cache; 786 pb->cblock = cblock; 787 dm_hook_bio(&pb->hook_info, bio, writethrough_endio, NULL); 788 dm_bio_record(&pb->bio_details, bio); 789 790 remap_to_origin_clear_discard(pb->cache, bio, oblock); 791 } 792 793 /*---------------------------------------------------------------- 794 * Migration processing 795 * 796 * Migration covers moving data from the origin device to the cache, or 797 * vice versa. 798 *--------------------------------------------------------------*/ 799 static void free_migration(struct dm_cache_migration *mg) 800 { 801 mempool_free(mg, mg->cache->migration_pool); 802 } 803 804 static void inc_nr_migrations(struct cache *cache) 805 { 806 atomic_inc(&cache->nr_migrations); 807 } 808 809 static void dec_nr_migrations(struct cache *cache) 810 { 811 atomic_dec(&cache->nr_migrations); 812 813 /* 814 * Wake the worker in case we're suspending the target. 815 */ 816 wake_up(&cache->migration_wait); 817 } 818 819 static void __cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell, 820 bool holder) 821 { 822 (holder ? dm_cell_release : dm_cell_release_no_holder) 823 (cache->prison, cell, &cache->deferred_bios); 824 free_prison_cell(cache, cell); 825 } 826 827 static void cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell, 828 bool holder) 829 { 830 unsigned long flags; 831 832 spin_lock_irqsave(&cache->lock, flags); 833 __cell_defer(cache, cell, holder); 834 spin_unlock_irqrestore(&cache->lock, flags); 835 836 wake_worker(cache); 837 } 838 839 static void cleanup_migration(struct dm_cache_migration *mg) 840 { 841 struct cache *cache = mg->cache; 842 free_migration(mg); 843 dec_nr_migrations(cache); 844 } 845 846 static void migration_failure(struct dm_cache_migration *mg) 847 { 848 struct cache *cache = mg->cache; 849 850 if (mg->writeback) { 851 DMWARN_LIMIT("writeback failed; couldn't copy block"); 852 set_dirty(cache, mg->old_oblock, mg->cblock); 853 cell_defer(cache, mg->old_ocell, false); 854 855 } else if (mg->demote) { 856 DMWARN_LIMIT("demotion failed; couldn't copy block"); 857 policy_force_mapping(cache->policy, mg->new_oblock, mg->old_oblock); 858 859 cell_defer(cache, mg->old_ocell, mg->promote ? false : true); 860 if (mg->promote) 861 cell_defer(cache, mg->new_ocell, true); 862 } else { 863 DMWARN_LIMIT("promotion failed; couldn't copy block"); 864 policy_remove_mapping(cache->policy, mg->new_oblock); 865 cell_defer(cache, mg->new_ocell, true); 866 } 867 868 cleanup_migration(mg); 869 } 870 871 static void migration_success_pre_commit(struct dm_cache_migration *mg) 872 { 873 unsigned long flags; 874 struct cache *cache = mg->cache; 875 876 if (mg->writeback) { 877 cell_defer(cache, mg->old_ocell, false); 878 clear_dirty(cache, mg->old_oblock, mg->cblock); 879 cleanup_migration(mg); 880 return; 881 882 } else if (mg->demote) { 883 if (dm_cache_remove_mapping(cache->cmd, mg->cblock)) { 884 DMWARN_LIMIT("demotion failed; couldn't update on disk metadata"); 885 policy_force_mapping(cache->policy, mg->new_oblock, 886 mg->old_oblock); 887 if (mg->promote) 888 cell_defer(cache, mg->new_ocell, true); 889 cleanup_migration(mg); 890 return; 891 } 892 } else { 893 if (dm_cache_insert_mapping(cache->cmd, mg->cblock, mg->new_oblock)) { 894 DMWARN_LIMIT("promotion failed; couldn't update on disk metadata"); 895 policy_remove_mapping(cache->policy, mg->new_oblock); 896 cleanup_migration(mg); 897 return; 898 } 899 } 900 901 spin_lock_irqsave(&cache->lock, flags); 902 list_add_tail(&mg->list, &cache->need_commit_migrations); 903 cache->commit_requested = true; 904 spin_unlock_irqrestore(&cache->lock, flags); 905 } 906 907 static void migration_success_post_commit(struct dm_cache_migration *mg) 908 { 909 unsigned long flags; 910 struct cache *cache = mg->cache; 911 912 if (mg->writeback) { 913 DMWARN("writeback unexpectedly triggered commit"); 914 return; 915 916 } else if (mg->demote) { 917 cell_defer(cache, mg->old_ocell, mg->promote ? false : true); 918 919 if (mg->promote) { 920 mg->demote = false; 921 922 spin_lock_irqsave(&cache->lock, flags); 923 list_add_tail(&mg->list, &cache->quiesced_migrations); 924 spin_unlock_irqrestore(&cache->lock, flags); 925 926 } else { 927 if (mg->invalidate) 928 policy_remove_mapping(cache->policy, mg->old_oblock); 929 cleanup_migration(mg); 930 } 931 932 } else { 933 if (mg->requeue_holder) 934 cell_defer(cache, mg->new_ocell, true); 935 else { 936 bio_endio(mg->new_ocell->holder, 0); 937 cell_defer(cache, mg->new_ocell, false); 938 } 939 clear_dirty(cache, mg->new_oblock, mg->cblock); 940 cleanup_migration(mg); 941 } 942 } 943 944 static void copy_complete(int read_err, unsigned long write_err, void *context) 945 { 946 unsigned long flags; 947 struct dm_cache_migration *mg = (struct dm_cache_migration *) context; 948 struct cache *cache = mg->cache; 949 950 if (read_err || write_err) 951 mg->err = true; 952 953 spin_lock_irqsave(&cache->lock, flags); 954 list_add_tail(&mg->list, &cache->completed_migrations); 955 spin_unlock_irqrestore(&cache->lock, flags); 956 957 wake_worker(cache); 958 } 959 960 static void issue_copy_real(struct dm_cache_migration *mg) 961 { 962 int r; 963 struct dm_io_region o_region, c_region; 964 struct cache *cache = mg->cache; 965 sector_t cblock = from_cblock(mg->cblock); 966 967 o_region.bdev = cache->origin_dev->bdev; 968 o_region.count = cache->sectors_per_block; 969 970 c_region.bdev = cache->cache_dev->bdev; 971 c_region.sector = cblock * cache->sectors_per_block; 972 c_region.count = cache->sectors_per_block; 973 974 if (mg->writeback || mg->demote) { 975 /* demote */ 976 o_region.sector = from_oblock(mg->old_oblock) * cache->sectors_per_block; 977 r = dm_kcopyd_copy(cache->copier, &c_region, 1, &o_region, 0, copy_complete, mg); 978 } else { 979 /* promote */ 980 o_region.sector = from_oblock(mg->new_oblock) * cache->sectors_per_block; 981 r = dm_kcopyd_copy(cache->copier, &o_region, 1, &c_region, 0, copy_complete, mg); 982 } 983 984 if (r < 0) { 985 DMERR_LIMIT("issuing migration failed"); 986 migration_failure(mg); 987 } 988 } 989 990 static void overwrite_endio(struct bio *bio, int err) 991 { 992 struct dm_cache_migration *mg = bio->bi_private; 993 struct cache *cache = mg->cache; 994 size_t pb_data_size = get_per_bio_data_size(cache); 995 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); 996 unsigned long flags; 997 998 dm_unhook_bio(&pb->hook_info, bio); 999 1000 if (err) 1001 mg->err = true; 1002 1003 mg->requeue_holder = false; 1004 1005 spin_lock_irqsave(&cache->lock, flags); 1006 list_add_tail(&mg->list, &cache->completed_migrations); 1007 spin_unlock_irqrestore(&cache->lock, flags); 1008 1009 wake_worker(cache); 1010 } 1011 1012 static void issue_overwrite(struct dm_cache_migration *mg, struct bio *bio) 1013 { 1014 size_t pb_data_size = get_per_bio_data_size(mg->cache); 1015 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); 1016 1017 dm_hook_bio(&pb->hook_info, bio, overwrite_endio, mg); 1018 remap_to_cache_dirty(mg->cache, bio, mg->new_oblock, mg->cblock); 1019 generic_make_request(bio); 1020 } 1021 1022 static bool bio_writes_complete_block(struct cache *cache, struct bio *bio) 1023 { 1024 return (bio_data_dir(bio) == WRITE) && 1025 (bio->bi_iter.bi_size == (cache->sectors_per_block << SECTOR_SHIFT)); 1026 } 1027 1028 static void avoid_copy(struct dm_cache_migration *mg) 1029 { 1030 atomic_inc(&mg->cache->stats.copies_avoided); 1031 migration_success_pre_commit(mg); 1032 } 1033 1034 static void issue_copy(struct dm_cache_migration *mg) 1035 { 1036 bool avoid; 1037 struct cache *cache = mg->cache; 1038 1039 if (mg->writeback || mg->demote) 1040 avoid = !is_dirty(cache, mg->cblock) || 1041 is_discarded_oblock(cache, mg->old_oblock); 1042 else { 1043 struct bio *bio = mg->new_ocell->holder; 1044 1045 avoid = is_discarded_oblock(cache, mg->new_oblock); 1046 1047 if (!avoid && bio_writes_complete_block(cache, bio)) { 1048 issue_overwrite(mg, bio); 1049 return; 1050 } 1051 } 1052 1053 avoid ? avoid_copy(mg) : issue_copy_real(mg); 1054 } 1055 1056 static void complete_migration(struct dm_cache_migration *mg) 1057 { 1058 if (mg->err) 1059 migration_failure(mg); 1060 else 1061 migration_success_pre_commit(mg); 1062 } 1063 1064 static void process_migrations(struct cache *cache, struct list_head *head, 1065 void (*fn)(struct dm_cache_migration *)) 1066 { 1067 unsigned long flags; 1068 struct list_head list; 1069 struct dm_cache_migration *mg, *tmp; 1070 1071 INIT_LIST_HEAD(&list); 1072 spin_lock_irqsave(&cache->lock, flags); 1073 list_splice_init(head, &list); 1074 spin_unlock_irqrestore(&cache->lock, flags); 1075 1076 list_for_each_entry_safe(mg, tmp, &list, list) 1077 fn(mg); 1078 } 1079 1080 static void __queue_quiesced_migration(struct dm_cache_migration *mg) 1081 { 1082 list_add_tail(&mg->list, &mg->cache->quiesced_migrations); 1083 } 1084 1085 static void queue_quiesced_migration(struct dm_cache_migration *mg) 1086 { 1087 unsigned long flags; 1088 struct cache *cache = mg->cache; 1089 1090 spin_lock_irqsave(&cache->lock, flags); 1091 __queue_quiesced_migration(mg); 1092 spin_unlock_irqrestore(&cache->lock, flags); 1093 1094 wake_worker(cache); 1095 } 1096 1097 static void queue_quiesced_migrations(struct cache *cache, struct list_head *work) 1098 { 1099 unsigned long flags; 1100 struct dm_cache_migration *mg, *tmp; 1101 1102 spin_lock_irqsave(&cache->lock, flags); 1103 list_for_each_entry_safe(mg, tmp, work, list) 1104 __queue_quiesced_migration(mg); 1105 spin_unlock_irqrestore(&cache->lock, flags); 1106 1107 wake_worker(cache); 1108 } 1109 1110 static void check_for_quiesced_migrations(struct cache *cache, 1111 struct per_bio_data *pb) 1112 { 1113 struct list_head work; 1114 1115 if (!pb->all_io_entry) 1116 return; 1117 1118 INIT_LIST_HEAD(&work); 1119 if (pb->all_io_entry) 1120 dm_deferred_entry_dec(pb->all_io_entry, &work); 1121 1122 if (!list_empty(&work)) 1123 queue_quiesced_migrations(cache, &work); 1124 } 1125 1126 static void quiesce_migration(struct dm_cache_migration *mg) 1127 { 1128 if (!dm_deferred_set_add_work(mg->cache->all_io_ds, &mg->list)) 1129 queue_quiesced_migration(mg); 1130 } 1131 1132 static void promote(struct cache *cache, struct prealloc *structs, 1133 dm_oblock_t oblock, dm_cblock_t cblock, 1134 struct dm_bio_prison_cell *cell) 1135 { 1136 struct dm_cache_migration *mg = prealloc_get_migration(structs); 1137 1138 mg->err = false; 1139 mg->writeback = false; 1140 mg->demote = false; 1141 mg->promote = true; 1142 mg->requeue_holder = true; 1143 mg->invalidate = false; 1144 mg->cache = cache; 1145 mg->new_oblock = oblock; 1146 mg->cblock = cblock; 1147 mg->old_ocell = NULL; 1148 mg->new_ocell = cell; 1149 mg->start_jiffies = jiffies; 1150 1151 inc_nr_migrations(cache); 1152 quiesce_migration(mg); 1153 } 1154 1155 static void writeback(struct cache *cache, struct prealloc *structs, 1156 dm_oblock_t oblock, dm_cblock_t cblock, 1157 struct dm_bio_prison_cell *cell) 1158 { 1159 struct dm_cache_migration *mg = prealloc_get_migration(structs); 1160 1161 mg->err = false; 1162 mg->writeback = true; 1163 mg->demote = false; 1164 mg->promote = false; 1165 mg->requeue_holder = true; 1166 mg->invalidate = false; 1167 mg->cache = cache; 1168 mg->old_oblock = oblock; 1169 mg->cblock = cblock; 1170 mg->old_ocell = cell; 1171 mg->new_ocell = NULL; 1172 mg->start_jiffies = jiffies; 1173 1174 inc_nr_migrations(cache); 1175 quiesce_migration(mg); 1176 } 1177 1178 static void demote_then_promote(struct cache *cache, struct prealloc *structs, 1179 dm_oblock_t old_oblock, dm_oblock_t new_oblock, 1180 dm_cblock_t cblock, 1181 struct dm_bio_prison_cell *old_ocell, 1182 struct dm_bio_prison_cell *new_ocell) 1183 { 1184 struct dm_cache_migration *mg = prealloc_get_migration(structs); 1185 1186 mg->err = false; 1187 mg->writeback = false; 1188 mg->demote = true; 1189 mg->promote = true; 1190 mg->requeue_holder = true; 1191 mg->invalidate = false; 1192 mg->cache = cache; 1193 mg->old_oblock = old_oblock; 1194 mg->new_oblock = new_oblock; 1195 mg->cblock = cblock; 1196 mg->old_ocell = old_ocell; 1197 mg->new_ocell = new_ocell; 1198 mg->start_jiffies = jiffies; 1199 1200 inc_nr_migrations(cache); 1201 quiesce_migration(mg); 1202 } 1203 1204 /* 1205 * Invalidate a cache entry. No writeback occurs; any changes in the cache 1206 * block are thrown away. 1207 */ 1208 static void invalidate(struct cache *cache, struct prealloc *structs, 1209 dm_oblock_t oblock, dm_cblock_t cblock, 1210 struct dm_bio_prison_cell *cell) 1211 { 1212 struct dm_cache_migration *mg = prealloc_get_migration(structs); 1213 1214 mg->err = false; 1215 mg->writeback = false; 1216 mg->demote = true; 1217 mg->promote = false; 1218 mg->requeue_holder = true; 1219 mg->invalidate = true; 1220 mg->cache = cache; 1221 mg->old_oblock = oblock; 1222 mg->cblock = cblock; 1223 mg->old_ocell = cell; 1224 mg->new_ocell = NULL; 1225 mg->start_jiffies = jiffies; 1226 1227 inc_nr_migrations(cache); 1228 quiesce_migration(mg); 1229 } 1230 1231 /*---------------------------------------------------------------- 1232 * bio processing 1233 *--------------------------------------------------------------*/ 1234 static void defer_bio(struct cache *cache, struct bio *bio) 1235 { 1236 unsigned long flags; 1237 1238 spin_lock_irqsave(&cache->lock, flags); 1239 bio_list_add(&cache->deferred_bios, bio); 1240 spin_unlock_irqrestore(&cache->lock, flags); 1241 1242 wake_worker(cache); 1243 } 1244 1245 static void process_flush_bio(struct cache *cache, struct bio *bio) 1246 { 1247 size_t pb_data_size = get_per_bio_data_size(cache); 1248 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); 1249 1250 BUG_ON(bio->bi_iter.bi_size); 1251 if (!pb->req_nr) 1252 remap_to_origin(cache, bio); 1253 else 1254 remap_to_cache(cache, bio, 0); 1255 1256 issue(cache, bio); 1257 } 1258 1259 /* 1260 * People generally discard large parts of a device, eg, the whole device 1261 * when formatting. Splitting these large discards up into cache block 1262 * sized ios and then quiescing (always neccessary for discard) takes too 1263 * long. 1264 * 1265 * We keep it simple, and allow any size of discard to come in, and just 1266 * mark off blocks on the discard bitset. No passdown occurs! 1267 * 1268 * To implement passdown we need to change the bio_prison such that a cell 1269 * can have a key that spans many blocks. 1270 */ 1271 static void process_discard_bio(struct cache *cache, struct bio *bio) 1272 { 1273 dm_block_t start_block = dm_sector_div_up(bio->bi_iter.bi_sector, 1274 cache->sectors_per_block); 1275 dm_block_t end_block = bio_end_sector(bio); 1276 dm_block_t b; 1277 1278 end_block = block_div(end_block, cache->sectors_per_block); 1279 1280 for (b = start_block; b < end_block; b++) 1281 set_discard(cache, to_oblock(b)); 1282 1283 bio_endio(bio, 0); 1284 } 1285 1286 static bool spare_migration_bandwidth(struct cache *cache) 1287 { 1288 sector_t current_volume = (atomic_read(&cache->nr_migrations) + 1) * 1289 cache->sectors_per_block; 1290 return current_volume < cache->migration_threshold; 1291 } 1292 1293 static void inc_hit_counter(struct cache *cache, struct bio *bio) 1294 { 1295 atomic_inc(bio_data_dir(bio) == READ ? 1296 &cache->stats.read_hit : &cache->stats.write_hit); 1297 } 1298 1299 static void inc_miss_counter(struct cache *cache, struct bio *bio) 1300 { 1301 atomic_inc(bio_data_dir(bio) == READ ? 1302 &cache->stats.read_miss : &cache->stats.write_miss); 1303 } 1304 1305 static void issue_cache_bio(struct cache *cache, struct bio *bio, 1306 struct per_bio_data *pb, 1307 dm_oblock_t oblock, dm_cblock_t cblock) 1308 { 1309 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); 1310 remap_to_cache_dirty(cache, bio, oblock, cblock); 1311 issue(cache, bio); 1312 } 1313 1314 static void process_bio(struct cache *cache, struct prealloc *structs, 1315 struct bio *bio) 1316 { 1317 int r; 1318 bool release_cell = true; 1319 dm_oblock_t block = get_bio_block(cache, bio); 1320 struct dm_bio_prison_cell *cell_prealloc, *old_ocell, *new_ocell; 1321 struct policy_result lookup_result; 1322 size_t pb_data_size = get_per_bio_data_size(cache); 1323 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); 1324 bool discarded_block = is_discarded_oblock(cache, block); 1325 bool passthrough = passthrough_mode(&cache->features); 1326 bool can_migrate = !passthrough && (discarded_block || spare_migration_bandwidth(cache)); 1327 1328 /* 1329 * Check to see if that block is currently migrating. 1330 */ 1331 cell_prealloc = prealloc_get_cell(structs); 1332 r = bio_detain(cache, block, bio, cell_prealloc, 1333 (cell_free_fn) prealloc_put_cell, 1334 structs, &new_ocell); 1335 if (r > 0) 1336 return; 1337 1338 r = policy_map(cache->policy, block, true, can_migrate, discarded_block, 1339 bio, &lookup_result); 1340 1341 if (r == -EWOULDBLOCK) 1342 /* migration has been denied */ 1343 lookup_result.op = POLICY_MISS; 1344 1345 switch (lookup_result.op) { 1346 case POLICY_HIT: 1347 if (passthrough) { 1348 inc_miss_counter(cache, bio); 1349 1350 /* 1351 * Passthrough always maps to the origin, 1352 * invalidating any cache blocks that are written 1353 * to. 1354 */ 1355 1356 if (bio_data_dir(bio) == WRITE) { 1357 atomic_inc(&cache->stats.demotion); 1358 invalidate(cache, structs, block, lookup_result.cblock, new_ocell); 1359 release_cell = false; 1360 1361 } else { 1362 /* FIXME: factor out issue_origin() */ 1363 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); 1364 remap_to_origin_clear_discard(cache, bio, block); 1365 issue(cache, bio); 1366 } 1367 } else { 1368 inc_hit_counter(cache, bio); 1369 1370 if (bio_data_dir(bio) == WRITE && 1371 writethrough_mode(&cache->features) && 1372 !is_dirty(cache, lookup_result.cblock)) { 1373 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); 1374 remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock); 1375 issue(cache, bio); 1376 } else 1377 issue_cache_bio(cache, bio, pb, block, lookup_result.cblock); 1378 } 1379 1380 break; 1381 1382 case POLICY_MISS: 1383 inc_miss_counter(cache, bio); 1384 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); 1385 remap_to_origin_clear_discard(cache, bio, block); 1386 issue(cache, bio); 1387 break; 1388 1389 case POLICY_NEW: 1390 atomic_inc(&cache->stats.promotion); 1391 promote(cache, structs, block, lookup_result.cblock, new_ocell); 1392 release_cell = false; 1393 break; 1394 1395 case POLICY_REPLACE: 1396 cell_prealloc = prealloc_get_cell(structs); 1397 r = bio_detain(cache, lookup_result.old_oblock, bio, cell_prealloc, 1398 (cell_free_fn) prealloc_put_cell, 1399 structs, &old_ocell); 1400 if (r > 0) { 1401 /* 1402 * We have to be careful to avoid lock inversion of 1403 * the cells. So we back off, and wait for the 1404 * old_ocell to become free. 1405 */ 1406 policy_force_mapping(cache->policy, block, 1407 lookup_result.old_oblock); 1408 atomic_inc(&cache->stats.cache_cell_clash); 1409 break; 1410 } 1411 atomic_inc(&cache->stats.demotion); 1412 atomic_inc(&cache->stats.promotion); 1413 1414 demote_then_promote(cache, structs, lookup_result.old_oblock, 1415 block, lookup_result.cblock, 1416 old_ocell, new_ocell); 1417 release_cell = false; 1418 break; 1419 1420 default: 1421 DMERR_LIMIT("%s: erroring bio, unknown policy op: %u", __func__, 1422 (unsigned) lookup_result.op); 1423 bio_io_error(bio); 1424 } 1425 1426 if (release_cell) 1427 cell_defer(cache, new_ocell, false); 1428 } 1429 1430 static int need_commit_due_to_time(struct cache *cache) 1431 { 1432 return jiffies < cache->last_commit_jiffies || 1433 jiffies > cache->last_commit_jiffies + COMMIT_PERIOD; 1434 } 1435 1436 static int commit_if_needed(struct cache *cache) 1437 { 1438 int r = 0; 1439 1440 if ((cache->commit_requested || need_commit_due_to_time(cache)) && 1441 dm_cache_changed_this_transaction(cache->cmd)) { 1442 atomic_inc(&cache->stats.commit_count); 1443 cache->commit_requested = false; 1444 r = dm_cache_commit(cache->cmd, false); 1445 cache->last_commit_jiffies = jiffies; 1446 } 1447 1448 return r; 1449 } 1450 1451 static void process_deferred_bios(struct cache *cache) 1452 { 1453 unsigned long flags; 1454 struct bio_list bios; 1455 struct bio *bio; 1456 struct prealloc structs; 1457 1458 memset(&structs, 0, sizeof(structs)); 1459 bio_list_init(&bios); 1460 1461 spin_lock_irqsave(&cache->lock, flags); 1462 bio_list_merge(&bios, &cache->deferred_bios); 1463 bio_list_init(&cache->deferred_bios); 1464 spin_unlock_irqrestore(&cache->lock, flags); 1465 1466 while (!bio_list_empty(&bios)) { 1467 /* 1468 * If we've got no free migration structs, and processing 1469 * this bio might require one, we pause until there are some 1470 * prepared mappings to process. 1471 */ 1472 if (prealloc_data_structs(cache, &structs)) { 1473 spin_lock_irqsave(&cache->lock, flags); 1474 bio_list_merge(&cache->deferred_bios, &bios); 1475 spin_unlock_irqrestore(&cache->lock, flags); 1476 break; 1477 } 1478 1479 bio = bio_list_pop(&bios); 1480 1481 if (bio->bi_rw & REQ_FLUSH) 1482 process_flush_bio(cache, bio); 1483 else if (bio->bi_rw & REQ_DISCARD) 1484 process_discard_bio(cache, bio); 1485 else 1486 process_bio(cache, &structs, bio); 1487 } 1488 1489 prealloc_free_structs(cache, &structs); 1490 } 1491 1492 static void process_deferred_flush_bios(struct cache *cache, bool submit_bios) 1493 { 1494 unsigned long flags; 1495 struct bio_list bios; 1496 struct bio *bio; 1497 1498 bio_list_init(&bios); 1499 1500 spin_lock_irqsave(&cache->lock, flags); 1501 bio_list_merge(&bios, &cache->deferred_flush_bios); 1502 bio_list_init(&cache->deferred_flush_bios); 1503 spin_unlock_irqrestore(&cache->lock, flags); 1504 1505 while ((bio = bio_list_pop(&bios))) 1506 submit_bios ? generic_make_request(bio) : bio_io_error(bio); 1507 } 1508 1509 static void process_deferred_writethrough_bios(struct cache *cache) 1510 { 1511 unsigned long flags; 1512 struct bio_list bios; 1513 struct bio *bio; 1514 1515 bio_list_init(&bios); 1516 1517 spin_lock_irqsave(&cache->lock, flags); 1518 bio_list_merge(&bios, &cache->deferred_writethrough_bios); 1519 bio_list_init(&cache->deferred_writethrough_bios); 1520 spin_unlock_irqrestore(&cache->lock, flags); 1521 1522 while ((bio = bio_list_pop(&bios))) 1523 generic_make_request(bio); 1524 } 1525 1526 static void writeback_some_dirty_blocks(struct cache *cache) 1527 { 1528 int r = 0; 1529 dm_oblock_t oblock; 1530 dm_cblock_t cblock; 1531 struct prealloc structs; 1532 struct dm_bio_prison_cell *old_ocell; 1533 1534 memset(&structs, 0, sizeof(structs)); 1535 1536 while (spare_migration_bandwidth(cache)) { 1537 if (prealloc_data_structs(cache, &structs)) 1538 break; 1539 1540 r = policy_writeback_work(cache->policy, &oblock, &cblock); 1541 if (r) 1542 break; 1543 1544 r = get_cell(cache, oblock, &structs, &old_ocell); 1545 if (r) { 1546 policy_set_dirty(cache->policy, oblock); 1547 break; 1548 } 1549 1550 writeback(cache, &structs, oblock, cblock, old_ocell); 1551 } 1552 1553 prealloc_free_structs(cache, &structs); 1554 } 1555 1556 /*---------------------------------------------------------------- 1557 * Invalidations. 1558 * Dropping something from the cache *without* writing back. 1559 *--------------------------------------------------------------*/ 1560 1561 static void process_invalidation_request(struct cache *cache, struct invalidation_request *req) 1562 { 1563 int r = 0; 1564 uint64_t begin = from_cblock(req->cblocks->begin); 1565 uint64_t end = from_cblock(req->cblocks->end); 1566 1567 while (begin != end) { 1568 r = policy_remove_cblock(cache->policy, to_cblock(begin)); 1569 if (!r) { 1570 r = dm_cache_remove_mapping(cache->cmd, to_cblock(begin)); 1571 if (r) 1572 break; 1573 1574 } else if (r == -ENODATA) { 1575 /* harmless, already unmapped */ 1576 r = 0; 1577 1578 } else { 1579 DMERR("policy_remove_cblock failed"); 1580 break; 1581 } 1582 1583 begin++; 1584 } 1585 1586 cache->commit_requested = true; 1587 1588 req->err = r; 1589 atomic_set(&req->complete, 1); 1590 1591 wake_up(&req->result_wait); 1592 } 1593 1594 static void process_invalidation_requests(struct cache *cache) 1595 { 1596 struct list_head list; 1597 struct invalidation_request *req, *tmp; 1598 1599 INIT_LIST_HEAD(&list); 1600 spin_lock(&cache->invalidation_lock); 1601 list_splice_init(&cache->invalidation_requests, &list); 1602 spin_unlock(&cache->invalidation_lock); 1603 1604 list_for_each_entry_safe (req, tmp, &list, list) 1605 process_invalidation_request(cache, req); 1606 } 1607 1608 /*---------------------------------------------------------------- 1609 * Main worker loop 1610 *--------------------------------------------------------------*/ 1611 static bool is_quiescing(struct cache *cache) 1612 { 1613 return atomic_read(&cache->quiescing); 1614 } 1615 1616 static void ack_quiescing(struct cache *cache) 1617 { 1618 if (is_quiescing(cache)) { 1619 atomic_inc(&cache->quiescing_ack); 1620 wake_up(&cache->quiescing_wait); 1621 } 1622 } 1623 1624 static void wait_for_quiescing_ack(struct cache *cache) 1625 { 1626 wait_event(cache->quiescing_wait, atomic_read(&cache->quiescing_ack)); 1627 } 1628 1629 static void start_quiescing(struct cache *cache) 1630 { 1631 atomic_inc(&cache->quiescing); 1632 wait_for_quiescing_ack(cache); 1633 } 1634 1635 static void stop_quiescing(struct cache *cache) 1636 { 1637 atomic_set(&cache->quiescing, 0); 1638 atomic_set(&cache->quiescing_ack, 0); 1639 } 1640 1641 static void wait_for_migrations(struct cache *cache) 1642 { 1643 wait_event(cache->migration_wait, !atomic_read(&cache->nr_migrations)); 1644 } 1645 1646 static void stop_worker(struct cache *cache) 1647 { 1648 cancel_delayed_work(&cache->waker); 1649 flush_workqueue(cache->wq); 1650 } 1651 1652 static void requeue_deferred_io(struct cache *cache) 1653 { 1654 struct bio *bio; 1655 struct bio_list bios; 1656 1657 bio_list_init(&bios); 1658 bio_list_merge(&bios, &cache->deferred_bios); 1659 bio_list_init(&cache->deferred_bios); 1660 1661 while ((bio = bio_list_pop(&bios))) 1662 bio_endio(bio, DM_ENDIO_REQUEUE); 1663 } 1664 1665 static int more_work(struct cache *cache) 1666 { 1667 if (is_quiescing(cache)) 1668 return !list_empty(&cache->quiesced_migrations) || 1669 !list_empty(&cache->completed_migrations) || 1670 !list_empty(&cache->need_commit_migrations); 1671 else 1672 return !bio_list_empty(&cache->deferred_bios) || 1673 !bio_list_empty(&cache->deferred_flush_bios) || 1674 !bio_list_empty(&cache->deferred_writethrough_bios) || 1675 !list_empty(&cache->quiesced_migrations) || 1676 !list_empty(&cache->completed_migrations) || 1677 !list_empty(&cache->need_commit_migrations) || 1678 cache->invalidate; 1679 } 1680 1681 static void do_worker(struct work_struct *ws) 1682 { 1683 struct cache *cache = container_of(ws, struct cache, worker); 1684 1685 do { 1686 if (!is_quiescing(cache)) { 1687 writeback_some_dirty_blocks(cache); 1688 process_deferred_writethrough_bios(cache); 1689 process_deferred_bios(cache); 1690 process_invalidation_requests(cache); 1691 } 1692 1693 process_migrations(cache, &cache->quiesced_migrations, issue_copy); 1694 process_migrations(cache, &cache->completed_migrations, complete_migration); 1695 1696 if (commit_if_needed(cache)) { 1697 process_deferred_flush_bios(cache, false); 1698 1699 /* 1700 * FIXME: rollback metadata or just go into a 1701 * failure mode and error everything 1702 */ 1703 } else { 1704 process_deferred_flush_bios(cache, true); 1705 process_migrations(cache, &cache->need_commit_migrations, 1706 migration_success_post_commit); 1707 } 1708 1709 ack_quiescing(cache); 1710 1711 } while (more_work(cache)); 1712 } 1713 1714 /* 1715 * We want to commit periodically so that not too much 1716 * unwritten metadata builds up. 1717 */ 1718 static void do_waker(struct work_struct *ws) 1719 { 1720 struct cache *cache = container_of(to_delayed_work(ws), struct cache, waker); 1721 policy_tick(cache->policy); 1722 wake_worker(cache); 1723 queue_delayed_work(cache->wq, &cache->waker, COMMIT_PERIOD); 1724 } 1725 1726 /*----------------------------------------------------------------*/ 1727 1728 static int is_congested(struct dm_dev *dev, int bdi_bits) 1729 { 1730 struct request_queue *q = bdev_get_queue(dev->bdev); 1731 return bdi_congested(&q->backing_dev_info, bdi_bits); 1732 } 1733 1734 static int cache_is_congested(struct dm_target_callbacks *cb, int bdi_bits) 1735 { 1736 struct cache *cache = container_of(cb, struct cache, callbacks); 1737 1738 return is_congested(cache->origin_dev, bdi_bits) || 1739 is_congested(cache->cache_dev, bdi_bits); 1740 } 1741 1742 /*---------------------------------------------------------------- 1743 * Target methods 1744 *--------------------------------------------------------------*/ 1745 1746 /* 1747 * This function gets called on the error paths of the constructor, so we 1748 * have to cope with a partially initialised struct. 1749 */ 1750 static void destroy(struct cache *cache) 1751 { 1752 unsigned i; 1753 1754 if (cache->next_migration) 1755 mempool_free(cache->next_migration, cache->migration_pool); 1756 1757 if (cache->migration_pool) 1758 mempool_destroy(cache->migration_pool); 1759 1760 if (cache->all_io_ds) 1761 dm_deferred_set_destroy(cache->all_io_ds); 1762 1763 if (cache->prison) 1764 dm_bio_prison_destroy(cache->prison); 1765 1766 if (cache->wq) 1767 destroy_workqueue(cache->wq); 1768 1769 if (cache->dirty_bitset) 1770 free_bitset(cache->dirty_bitset); 1771 1772 if (cache->discard_bitset) 1773 free_bitset(cache->discard_bitset); 1774 1775 if (cache->copier) 1776 dm_kcopyd_client_destroy(cache->copier); 1777 1778 if (cache->cmd) 1779 dm_cache_metadata_close(cache->cmd); 1780 1781 if (cache->metadata_dev) 1782 dm_put_device(cache->ti, cache->metadata_dev); 1783 1784 if (cache->origin_dev) 1785 dm_put_device(cache->ti, cache->origin_dev); 1786 1787 if (cache->cache_dev) 1788 dm_put_device(cache->ti, cache->cache_dev); 1789 1790 if (cache->policy) 1791 dm_cache_policy_destroy(cache->policy); 1792 1793 for (i = 0; i < cache->nr_ctr_args ; i++) 1794 kfree(cache->ctr_args[i]); 1795 kfree(cache->ctr_args); 1796 1797 kfree(cache); 1798 } 1799 1800 static void cache_dtr(struct dm_target *ti) 1801 { 1802 struct cache *cache = ti->private; 1803 1804 destroy(cache); 1805 } 1806 1807 static sector_t get_dev_size(struct dm_dev *dev) 1808 { 1809 return i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT; 1810 } 1811 1812 /*----------------------------------------------------------------*/ 1813 1814 /* 1815 * Construct a cache device mapping. 1816 * 1817 * cache <metadata dev> <cache dev> <origin dev> <block size> 1818 * <#feature args> [<feature arg>]* 1819 * <policy> <#policy args> [<policy arg>]* 1820 * 1821 * metadata dev : fast device holding the persistent metadata 1822 * cache dev : fast device holding cached data blocks 1823 * origin dev : slow device holding original data blocks 1824 * block size : cache unit size in sectors 1825 * 1826 * #feature args : number of feature arguments passed 1827 * feature args : writethrough. (The default is writeback.) 1828 * 1829 * policy : the replacement policy to use 1830 * #policy args : an even number of policy arguments corresponding 1831 * to key/value pairs passed to the policy 1832 * policy args : key/value pairs passed to the policy 1833 * E.g. 'sequential_threshold 1024' 1834 * See cache-policies.txt for details. 1835 * 1836 * Optional feature arguments are: 1837 * writethrough : write through caching that prohibits cache block 1838 * content from being different from origin block content. 1839 * Without this argument, the default behaviour is to write 1840 * back cache block contents later for performance reasons, 1841 * so they may differ from the corresponding origin blocks. 1842 */ 1843 struct cache_args { 1844 struct dm_target *ti; 1845 1846 struct dm_dev *metadata_dev; 1847 1848 struct dm_dev *cache_dev; 1849 sector_t cache_sectors; 1850 1851 struct dm_dev *origin_dev; 1852 sector_t origin_sectors; 1853 1854 uint32_t block_size; 1855 1856 const char *policy_name; 1857 int policy_argc; 1858 const char **policy_argv; 1859 1860 struct cache_features features; 1861 }; 1862 1863 static void destroy_cache_args(struct cache_args *ca) 1864 { 1865 if (ca->metadata_dev) 1866 dm_put_device(ca->ti, ca->metadata_dev); 1867 1868 if (ca->cache_dev) 1869 dm_put_device(ca->ti, ca->cache_dev); 1870 1871 if (ca->origin_dev) 1872 dm_put_device(ca->ti, ca->origin_dev); 1873 1874 kfree(ca); 1875 } 1876 1877 static bool at_least_one_arg(struct dm_arg_set *as, char **error) 1878 { 1879 if (!as->argc) { 1880 *error = "Insufficient args"; 1881 return false; 1882 } 1883 1884 return true; 1885 } 1886 1887 static int parse_metadata_dev(struct cache_args *ca, struct dm_arg_set *as, 1888 char **error) 1889 { 1890 int r; 1891 sector_t metadata_dev_size; 1892 char b[BDEVNAME_SIZE]; 1893 1894 if (!at_least_one_arg(as, error)) 1895 return -EINVAL; 1896 1897 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE, 1898 &ca->metadata_dev); 1899 if (r) { 1900 *error = "Error opening metadata device"; 1901 return r; 1902 } 1903 1904 metadata_dev_size = get_dev_size(ca->metadata_dev); 1905 if (metadata_dev_size > DM_CACHE_METADATA_MAX_SECTORS_WARNING) 1906 DMWARN("Metadata device %s is larger than %u sectors: excess space will not be used.", 1907 bdevname(ca->metadata_dev->bdev, b), THIN_METADATA_MAX_SECTORS); 1908 1909 return 0; 1910 } 1911 1912 static int parse_cache_dev(struct cache_args *ca, struct dm_arg_set *as, 1913 char **error) 1914 { 1915 int r; 1916 1917 if (!at_least_one_arg(as, error)) 1918 return -EINVAL; 1919 1920 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE, 1921 &ca->cache_dev); 1922 if (r) { 1923 *error = "Error opening cache device"; 1924 return r; 1925 } 1926 ca->cache_sectors = get_dev_size(ca->cache_dev); 1927 1928 return 0; 1929 } 1930 1931 static int parse_origin_dev(struct cache_args *ca, struct dm_arg_set *as, 1932 char **error) 1933 { 1934 int r; 1935 1936 if (!at_least_one_arg(as, error)) 1937 return -EINVAL; 1938 1939 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE, 1940 &ca->origin_dev); 1941 if (r) { 1942 *error = "Error opening origin device"; 1943 return r; 1944 } 1945 1946 ca->origin_sectors = get_dev_size(ca->origin_dev); 1947 if (ca->ti->len > ca->origin_sectors) { 1948 *error = "Device size larger than cached device"; 1949 return -EINVAL; 1950 } 1951 1952 return 0; 1953 } 1954 1955 static int parse_block_size(struct cache_args *ca, struct dm_arg_set *as, 1956 char **error) 1957 { 1958 unsigned long block_size; 1959 1960 if (!at_least_one_arg(as, error)) 1961 return -EINVAL; 1962 1963 if (kstrtoul(dm_shift_arg(as), 10, &block_size) || !block_size || 1964 block_size < DATA_DEV_BLOCK_SIZE_MIN_SECTORS || 1965 block_size > DATA_DEV_BLOCK_SIZE_MAX_SECTORS || 1966 block_size & (DATA_DEV_BLOCK_SIZE_MIN_SECTORS - 1)) { 1967 *error = "Invalid data block size"; 1968 return -EINVAL; 1969 } 1970 1971 if (block_size > ca->cache_sectors) { 1972 *error = "Data block size is larger than the cache device"; 1973 return -EINVAL; 1974 } 1975 1976 ca->block_size = block_size; 1977 1978 return 0; 1979 } 1980 1981 static void init_features(struct cache_features *cf) 1982 { 1983 cf->mode = CM_WRITE; 1984 cf->io_mode = CM_IO_WRITEBACK; 1985 } 1986 1987 static int parse_features(struct cache_args *ca, struct dm_arg_set *as, 1988 char **error) 1989 { 1990 static struct dm_arg _args[] = { 1991 {0, 1, "Invalid number of cache feature arguments"}, 1992 }; 1993 1994 int r; 1995 unsigned argc; 1996 const char *arg; 1997 struct cache_features *cf = &ca->features; 1998 1999 init_features(cf); 2000 2001 r = dm_read_arg_group(_args, as, &argc, error); 2002 if (r) 2003 return -EINVAL; 2004 2005 while (argc--) { 2006 arg = dm_shift_arg(as); 2007 2008 if (!strcasecmp(arg, "writeback")) 2009 cf->io_mode = CM_IO_WRITEBACK; 2010 2011 else if (!strcasecmp(arg, "writethrough")) 2012 cf->io_mode = CM_IO_WRITETHROUGH; 2013 2014 else if (!strcasecmp(arg, "passthrough")) 2015 cf->io_mode = CM_IO_PASSTHROUGH; 2016 2017 else { 2018 *error = "Unrecognised cache feature requested"; 2019 return -EINVAL; 2020 } 2021 } 2022 2023 return 0; 2024 } 2025 2026 static int parse_policy(struct cache_args *ca, struct dm_arg_set *as, 2027 char **error) 2028 { 2029 static struct dm_arg _args[] = { 2030 {0, 1024, "Invalid number of policy arguments"}, 2031 }; 2032 2033 int r; 2034 2035 if (!at_least_one_arg(as, error)) 2036 return -EINVAL; 2037 2038 ca->policy_name = dm_shift_arg(as); 2039 2040 r = dm_read_arg_group(_args, as, &ca->policy_argc, error); 2041 if (r) 2042 return -EINVAL; 2043 2044 ca->policy_argv = (const char **)as->argv; 2045 dm_consume_args(as, ca->policy_argc); 2046 2047 return 0; 2048 } 2049 2050 static int parse_cache_args(struct cache_args *ca, int argc, char **argv, 2051 char **error) 2052 { 2053 int r; 2054 struct dm_arg_set as; 2055 2056 as.argc = argc; 2057 as.argv = argv; 2058 2059 r = parse_metadata_dev(ca, &as, error); 2060 if (r) 2061 return r; 2062 2063 r = parse_cache_dev(ca, &as, error); 2064 if (r) 2065 return r; 2066 2067 r = parse_origin_dev(ca, &as, error); 2068 if (r) 2069 return r; 2070 2071 r = parse_block_size(ca, &as, error); 2072 if (r) 2073 return r; 2074 2075 r = parse_features(ca, &as, error); 2076 if (r) 2077 return r; 2078 2079 r = parse_policy(ca, &as, error); 2080 if (r) 2081 return r; 2082 2083 return 0; 2084 } 2085 2086 /*----------------------------------------------------------------*/ 2087 2088 static struct kmem_cache *migration_cache; 2089 2090 #define NOT_CORE_OPTION 1 2091 2092 static int process_config_option(struct cache *cache, const char *key, const char *value) 2093 { 2094 unsigned long tmp; 2095 2096 if (!strcasecmp(key, "migration_threshold")) { 2097 if (kstrtoul(value, 10, &tmp)) 2098 return -EINVAL; 2099 2100 cache->migration_threshold = tmp; 2101 return 0; 2102 } 2103 2104 return NOT_CORE_OPTION; 2105 } 2106 2107 static int set_config_value(struct cache *cache, const char *key, const char *value) 2108 { 2109 int r = process_config_option(cache, key, value); 2110 2111 if (r == NOT_CORE_OPTION) 2112 r = policy_set_config_value(cache->policy, key, value); 2113 2114 if (r) 2115 DMWARN("bad config value for %s: %s", key, value); 2116 2117 return r; 2118 } 2119 2120 static int set_config_values(struct cache *cache, int argc, const char **argv) 2121 { 2122 int r = 0; 2123 2124 if (argc & 1) { 2125 DMWARN("Odd number of policy arguments given but they should be <key> <value> pairs."); 2126 return -EINVAL; 2127 } 2128 2129 while (argc) { 2130 r = set_config_value(cache, argv[0], argv[1]); 2131 if (r) 2132 break; 2133 2134 argc -= 2; 2135 argv += 2; 2136 } 2137 2138 return r; 2139 } 2140 2141 static int create_cache_policy(struct cache *cache, struct cache_args *ca, 2142 char **error) 2143 { 2144 struct dm_cache_policy *p = dm_cache_policy_create(ca->policy_name, 2145 cache->cache_size, 2146 cache->origin_sectors, 2147 cache->sectors_per_block); 2148 if (IS_ERR(p)) { 2149 *error = "Error creating cache's policy"; 2150 return PTR_ERR(p); 2151 } 2152 cache->policy = p; 2153 2154 return 0; 2155 } 2156 2157 #define DEFAULT_MIGRATION_THRESHOLD 2048 2158 2159 static int cache_create(struct cache_args *ca, struct cache **result) 2160 { 2161 int r = 0; 2162 char **error = &ca->ti->error; 2163 struct cache *cache; 2164 struct dm_target *ti = ca->ti; 2165 dm_block_t origin_blocks; 2166 struct dm_cache_metadata *cmd; 2167 bool may_format = ca->features.mode == CM_WRITE; 2168 2169 cache = kzalloc(sizeof(*cache), GFP_KERNEL); 2170 if (!cache) 2171 return -ENOMEM; 2172 2173 cache->ti = ca->ti; 2174 ti->private = cache; 2175 ti->num_flush_bios = 2; 2176 ti->flush_supported = true; 2177 2178 ti->num_discard_bios = 1; 2179 ti->discards_supported = true; 2180 ti->discard_zeroes_data_unsupported = true; 2181 /* Discard bios must be split on a block boundary */ 2182 ti->split_discard_bios = true; 2183 2184 cache->features = ca->features; 2185 ti->per_bio_data_size = get_per_bio_data_size(cache); 2186 2187 cache->callbacks.congested_fn = cache_is_congested; 2188 dm_table_add_target_callbacks(ti->table, &cache->callbacks); 2189 2190 cache->metadata_dev = ca->metadata_dev; 2191 cache->origin_dev = ca->origin_dev; 2192 cache->cache_dev = ca->cache_dev; 2193 2194 ca->metadata_dev = ca->origin_dev = ca->cache_dev = NULL; 2195 2196 /* FIXME: factor out this whole section */ 2197 origin_blocks = cache->origin_sectors = ca->origin_sectors; 2198 origin_blocks = block_div(origin_blocks, ca->block_size); 2199 cache->origin_blocks = to_oblock(origin_blocks); 2200 2201 cache->sectors_per_block = ca->block_size; 2202 if (dm_set_target_max_io_len(ti, cache->sectors_per_block)) { 2203 r = -EINVAL; 2204 goto bad; 2205 } 2206 2207 if (ca->block_size & (ca->block_size - 1)) { 2208 dm_block_t cache_size = ca->cache_sectors; 2209 2210 cache->sectors_per_block_shift = -1; 2211 cache_size = block_div(cache_size, ca->block_size); 2212 cache->cache_size = to_cblock(cache_size); 2213 } else { 2214 cache->sectors_per_block_shift = __ffs(ca->block_size); 2215 cache->cache_size = to_cblock(ca->cache_sectors >> cache->sectors_per_block_shift); 2216 } 2217 2218 r = create_cache_policy(cache, ca, error); 2219 if (r) 2220 goto bad; 2221 2222 cache->policy_nr_args = ca->policy_argc; 2223 cache->migration_threshold = DEFAULT_MIGRATION_THRESHOLD; 2224 2225 r = set_config_values(cache, ca->policy_argc, ca->policy_argv); 2226 if (r) { 2227 *error = "Error setting cache policy's config values"; 2228 goto bad; 2229 } 2230 2231 cmd = dm_cache_metadata_open(cache->metadata_dev->bdev, 2232 ca->block_size, may_format, 2233 dm_cache_policy_get_hint_size(cache->policy)); 2234 if (IS_ERR(cmd)) { 2235 *error = "Error creating metadata object"; 2236 r = PTR_ERR(cmd); 2237 goto bad; 2238 } 2239 cache->cmd = cmd; 2240 2241 if (passthrough_mode(&cache->features)) { 2242 bool all_clean; 2243 2244 r = dm_cache_metadata_all_clean(cache->cmd, &all_clean); 2245 if (r) { 2246 *error = "dm_cache_metadata_all_clean() failed"; 2247 goto bad; 2248 } 2249 2250 if (!all_clean) { 2251 *error = "Cannot enter passthrough mode unless all blocks are clean"; 2252 r = -EINVAL; 2253 goto bad; 2254 } 2255 } 2256 2257 spin_lock_init(&cache->lock); 2258 bio_list_init(&cache->deferred_bios); 2259 bio_list_init(&cache->deferred_flush_bios); 2260 bio_list_init(&cache->deferred_writethrough_bios); 2261 INIT_LIST_HEAD(&cache->quiesced_migrations); 2262 INIT_LIST_HEAD(&cache->completed_migrations); 2263 INIT_LIST_HEAD(&cache->need_commit_migrations); 2264 atomic_set(&cache->nr_migrations, 0); 2265 init_waitqueue_head(&cache->migration_wait); 2266 2267 init_waitqueue_head(&cache->quiescing_wait); 2268 atomic_set(&cache->quiescing, 0); 2269 atomic_set(&cache->quiescing_ack, 0); 2270 2271 r = -ENOMEM; 2272 cache->nr_dirty = 0; 2273 cache->dirty_bitset = alloc_bitset(from_cblock(cache->cache_size)); 2274 if (!cache->dirty_bitset) { 2275 *error = "could not allocate dirty bitset"; 2276 goto bad; 2277 } 2278 clear_bitset(cache->dirty_bitset, from_cblock(cache->cache_size)); 2279 2280 cache->discard_nr_blocks = cache->origin_blocks; 2281 cache->discard_bitset = alloc_bitset(from_oblock(cache->discard_nr_blocks)); 2282 if (!cache->discard_bitset) { 2283 *error = "could not allocate discard bitset"; 2284 goto bad; 2285 } 2286 clear_bitset(cache->discard_bitset, from_oblock(cache->discard_nr_blocks)); 2287 2288 cache->copier = dm_kcopyd_client_create(&dm_kcopyd_throttle); 2289 if (IS_ERR(cache->copier)) { 2290 *error = "could not create kcopyd client"; 2291 r = PTR_ERR(cache->copier); 2292 goto bad; 2293 } 2294 2295 cache->wq = alloc_ordered_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM); 2296 if (!cache->wq) { 2297 *error = "could not create workqueue for metadata object"; 2298 goto bad; 2299 } 2300 INIT_WORK(&cache->worker, do_worker); 2301 INIT_DELAYED_WORK(&cache->waker, do_waker); 2302 cache->last_commit_jiffies = jiffies; 2303 2304 cache->prison = dm_bio_prison_create(PRISON_CELLS); 2305 if (!cache->prison) { 2306 *error = "could not create bio prison"; 2307 goto bad; 2308 } 2309 2310 cache->all_io_ds = dm_deferred_set_create(); 2311 if (!cache->all_io_ds) { 2312 *error = "could not create all_io deferred set"; 2313 goto bad; 2314 } 2315 2316 cache->migration_pool = mempool_create_slab_pool(MIGRATION_POOL_SIZE, 2317 migration_cache); 2318 if (!cache->migration_pool) { 2319 *error = "Error creating cache's migration mempool"; 2320 goto bad; 2321 } 2322 2323 cache->next_migration = NULL; 2324 2325 cache->need_tick_bio = true; 2326 cache->sized = false; 2327 cache->invalidate = false; 2328 cache->commit_requested = false; 2329 cache->loaded_mappings = false; 2330 cache->loaded_discards = false; 2331 2332 load_stats(cache); 2333 2334 atomic_set(&cache->stats.demotion, 0); 2335 atomic_set(&cache->stats.promotion, 0); 2336 atomic_set(&cache->stats.copies_avoided, 0); 2337 atomic_set(&cache->stats.cache_cell_clash, 0); 2338 atomic_set(&cache->stats.commit_count, 0); 2339 atomic_set(&cache->stats.discard_count, 0); 2340 2341 spin_lock_init(&cache->invalidation_lock); 2342 INIT_LIST_HEAD(&cache->invalidation_requests); 2343 2344 *result = cache; 2345 return 0; 2346 2347 bad: 2348 destroy(cache); 2349 return r; 2350 } 2351 2352 static int copy_ctr_args(struct cache *cache, int argc, const char **argv) 2353 { 2354 unsigned i; 2355 const char **copy; 2356 2357 copy = kcalloc(argc, sizeof(*copy), GFP_KERNEL); 2358 if (!copy) 2359 return -ENOMEM; 2360 for (i = 0; i < argc; i++) { 2361 copy[i] = kstrdup(argv[i], GFP_KERNEL); 2362 if (!copy[i]) { 2363 while (i--) 2364 kfree(copy[i]); 2365 kfree(copy); 2366 return -ENOMEM; 2367 } 2368 } 2369 2370 cache->nr_ctr_args = argc; 2371 cache->ctr_args = copy; 2372 2373 return 0; 2374 } 2375 2376 static int cache_ctr(struct dm_target *ti, unsigned argc, char **argv) 2377 { 2378 int r = -EINVAL; 2379 struct cache_args *ca; 2380 struct cache *cache = NULL; 2381 2382 ca = kzalloc(sizeof(*ca), GFP_KERNEL); 2383 if (!ca) { 2384 ti->error = "Error allocating memory for cache"; 2385 return -ENOMEM; 2386 } 2387 ca->ti = ti; 2388 2389 r = parse_cache_args(ca, argc, argv, &ti->error); 2390 if (r) 2391 goto out; 2392 2393 r = cache_create(ca, &cache); 2394 if (r) 2395 goto out; 2396 2397 r = copy_ctr_args(cache, argc - 3, (const char **)argv + 3); 2398 if (r) { 2399 destroy(cache); 2400 goto out; 2401 } 2402 2403 ti->private = cache; 2404 2405 out: 2406 destroy_cache_args(ca); 2407 return r; 2408 } 2409 2410 static int cache_map(struct dm_target *ti, struct bio *bio) 2411 { 2412 struct cache *cache = ti->private; 2413 2414 int r; 2415 dm_oblock_t block = get_bio_block(cache, bio); 2416 size_t pb_data_size = get_per_bio_data_size(cache); 2417 bool can_migrate = false; 2418 bool discarded_block; 2419 struct dm_bio_prison_cell *cell; 2420 struct policy_result lookup_result; 2421 struct per_bio_data *pb = init_per_bio_data(bio, pb_data_size); 2422 2423 if (unlikely(from_oblock(block) >= from_oblock(cache->origin_blocks))) { 2424 /* 2425 * This can only occur if the io goes to a partial block at 2426 * the end of the origin device. We don't cache these. 2427 * Just remap to the origin and carry on. 2428 */ 2429 remap_to_origin(cache, bio); 2430 return DM_MAPIO_REMAPPED; 2431 } 2432 2433 if (bio->bi_rw & (REQ_FLUSH | REQ_FUA | REQ_DISCARD)) { 2434 defer_bio(cache, bio); 2435 return DM_MAPIO_SUBMITTED; 2436 } 2437 2438 /* 2439 * Check to see if that block is currently migrating. 2440 */ 2441 cell = alloc_prison_cell(cache); 2442 if (!cell) { 2443 defer_bio(cache, bio); 2444 return DM_MAPIO_SUBMITTED; 2445 } 2446 2447 r = bio_detain(cache, block, bio, cell, 2448 (cell_free_fn) free_prison_cell, 2449 cache, &cell); 2450 if (r) { 2451 if (r < 0) 2452 defer_bio(cache, bio); 2453 2454 return DM_MAPIO_SUBMITTED; 2455 } 2456 2457 discarded_block = is_discarded_oblock(cache, block); 2458 2459 r = policy_map(cache->policy, block, false, can_migrate, discarded_block, 2460 bio, &lookup_result); 2461 if (r == -EWOULDBLOCK) { 2462 cell_defer(cache, cell, true); 2463 return DM_MAPIO_SUBMITTED; 2464 2465 } else if (r) { 2466 DMERR_LIMIT("Unexpected return from cache replacement policy: %d", r); 2467 bio_io_error(bio); 2468 return DM_MAPIO_SUBMITTED; 2469 } 2470 2471 r = DM_MAPIO_REMAPPED; 2472 switch (lookup_result.op) { 2473 case POLICY_HIT: 2474 if (passthrough_mode(&cache->features)) { 2475 if (bio_data_dir(bio) == WRITE) { 2476 /* 2477 * We need to invalidate this block, so 2478 * defer for the worker thread. 2479 */ 2480 cell_defer(cache, cell, true); 2481 r = DM_MAPIO_SUBMITTED; 2482 2483 } else { 2484 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); 2485 inc_miss_counter(cache, bio); 2486 remap_to_origin_clear_discard(cache, bio, block); 2487 2488 cell_defer(cache, cell, false); 2489 } 2490 2491 } else { 2492 inc_hit_counter(cache, bio); 2493 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); 2494 2495 if (bio_data_dir(bio) == WRITE && writethrough_mode(&cache->features) && 2496 !is_dirty(cache, lookup_result.cblock)) 2497 remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock); 2498 else 2499 remap_to_cache_dirty(cache, bio, block, lookup_result.cblock); 2500 2501 cell_defer(cache, cell, false); 2502 } 2503 break; 2504 2505 case POLICY_MISS: 2506 inc_miss_counter(cache, bio); 2507 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); 2508 2509 if (pb->req_nr != 0) { 2510 /* 2511 * This is a duplicate writethrough io that is no 2512 * longer needed because the block has been demoted. 2513 */ 2514 bio_endio(bio, 0); 2515 cell_defer(cache, cell, false); 2516 return DM_MAPIO_SUBMITTED; 2517 } else { 2518 remap_to_origin_clear_discard(cache, bio, block); 2519 cell_defer(cache, cell, false); 2520 } 2521 break; 2522 2523 default: 2524 DMERR_LIMIT("%s: erroring bio: unknown policy op: %u", __func__, 2525 (unsigned) lookup_result.op); 2526 bio_io_error(bio); 2527 r = DM_MAPIO_SUBMITTED; 2528 } 2529 2530 return r; 2531 } 2532 2533 static int cache_end_io(struct dm_target *ti, struct bio *bio, int error) 2534 { 2535 struct cache *cache = ti->private; 2536 unsigned long flags; 2537 size_t pb_data_size = get_per_bio_data_size(cache); 2538 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); 2539 2540 if (pb->tick) { 2541 policy_tick(cache->policy); 2542 2543 spin_lock_irqsave(&cache->lock, flags); 2544 cache->need_tick_bio = true; 2545 spin_unlock_irqrestore(&cache->lock, flags); 2546 } 2547 2548 check_for_quiesced_migrations(cache, pb); 2549 2550 return 0; 2551 } 2552 2553 static int write_dirty_bitset(struct cache *cache) 2554 { 2555 unsigned i, r; 2556 2557 for (i = 0; i < from_cblock(cache->cache_size); i++) { 2558 r = dm_cache_set_dirty(cache->cmd, to_cblock(i), 2559 is_dirty(cache, to_cblock(i))); 2560 if (r) 2561 return r; 2562 } 2563 2564 return 0; 2565 } 2566 2567 static int write_discard_bitset(struct cache *cache) 2568 { 2569 unsigned i, r; 2570 2571 r = dm_cache_discard_bitset_resize(cache->cmd, cache->sectors_per_block, 2572 cache->origin_blocks); 2573 if (r) { 2574 DMERR("could not resize on-disk discard bitset"); 2575 return r; 2576 } 2577 2578 for (i = 0; i < from_oblock(cache->discard_nr_blocks); i++) { 2579 r = dm_cache_set_discard(cache->cmd, to_oblock(i), 2580 is_discarded(cache, to_oblock(i))); 2581 if (r) 2582 return r; 2583 } 2584 2585 return 0; 2586 } 2587 2588 /* 2589 * returns true on success 2590 */ 2591 static bool sync_metadata(struct cache *cache) 2592 { 2593 int r1, r2, r3, r4; 2594 2595 r1 = write_dirty_bitset(cache); 2596 if (r1) 2597 DMERR("could not write dirty bitset"); 2598 2599 r2 = write_discard_bitset(cache); 2600 if (r2) 2601 DMERR("could not write discard bitset"); 2602 2603 save_stats(cache); 2604 2605 r3 = dm_cache_write_hints(cache->cmd, cache->policy); 2606 if (r3) 2607 DMERR("could not write hints"); 2608 2609 /* 2610 * If writing the above metadata failed, we still commit, but don't 2611 * set the clean shutdown flag. This will effectively force every 2612 * dirty bit to be set on reload. 2613 */ 2614 r4 = dm_cache_commit(cache->cmd, !r1 && !r2 && !r3); 2615 if (r4) 2616 DMERR("could not write cache metadata. Data loss may occur."); 2617 2618 return !r1 && !r2 && !r3 && !r4; 2619 } 2620 2621 static void cache_postsuspend(struct dm_target *ti) 2622 { 2623 struct cache *cache = ti->private; 2624 2625 start_quiescing(cache); 2626 wait_for_migrations(cache); 2627 stop_worker(cache); 2628 requeue_deferred_io(cache); 2629 stop_quiescing(cache); 2630 2631 (void) sync_metadata(cache); 2632 } 2633 2634 static int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock, 2635 bool dirty, uint32_t hint, bool hint_valid) 2636 { 2637 int r; 2638 struct cache *cache = context; 2639 2640 r = policy_load_mapping(cache->policy, oblock, cblock, hint, hint_valid); 2641 if (r) 2642 return r; 2643 2644 if (dirty) 2645 set_dirty(cache, oblock, cblock); 2646 else 2647 clear_dirty(cache, oblock, cblock); 2648 2649 return 0; 2650 } 2651 2652 static int load_discard(void *context, sector_t discard_block_size, 2653 dm_oblock_t oblock, bool discard) 2654 { 2655 struct cache *cache = context; 2656 2657 if (discard) 2658 set_discard(cache, oblock); 2659 else 2660 clear_discard(cache, oblock); 2661 2662 return 0; 2663 } 2664 2665 static dm_cblock_t get_cache_dev_size(struct cache *cache) 2666 { 2667 sector_t size = get_dev_size(cache->cache_dev); 2668 (void) sector_div(size, cache->sectors_per_block); 2669 return to_cblock(size); 2670 } 2671 2672 static bool can_resize(struct cache *cache, dm_cblock_t new_size) 2673 { 2674 if (from_cblock(new_size) > from_cblock(cache->cache_size)) 2675 return true; 2676 2677 /* 2678 * We can't drop a dirty block when shrinking the cache. 2679 */ 2680 while (from_cblock(new_size) < from_cblock(cache->cache_size)) { 2681 new_size = to_cblock(from_cblock(new_size) + 1); 2682 if (is_dirty(cache, new_size)) { 2683 DMERR("unable to shrink cache; cache block %llu is dirty", 2684 (unsigned long long) from_cblock(new_size)); 2685 return false; 2686 } 2687 } 2688 2689 return true; 2690 } 2691 2692 static int resize_cache_dev(struct cache *cache, dm_cblock_t new_size) 2693 { 2694 int r; 2695 2696 r = dm_cache_resize(cache->cmd, new_size); 2697 if (r) { 2698 DMERR("could not resize cache metadata"); 2699 return r; 2700 } 2701 2702 cache->cache_size = new_size; 2703 2704 return 0; 2705 } 2706 2707 static int cache_preresume(struct dm_target *ti) 2708 { 2709 int r = 0; 2710 struct cache *cache = ti->private; 2711 dm_cblock_t csize = get_cache_dev_size(cache); 2712 2713 /* 2714 * Check to see if the cache has resized. 2715 */ 2716 if (!cache->sized) { 2717 r = resize_cache_dev(cache, csize); 2718 if (r) 2719 return r; 2720 2721 cache->sized = true; 2722 2723 } else if (csize != cache->cache_size) { 2724 if (!can_resize(cache, csize)) 2725 return -EINVAL; 2726 2727 r = resize_cache_dev(cache, csize); 2728 if (r) 2729 return r; 2730 } 2731 2732 if (!cache->loaded_mappings) { 2733 r = dm_cache_load_mappings(cache->cmd, cache->policy, 2734 load_mapping, cache); 2735 if (r) { 2736 DMERR("could not load cache mappings"); 2737 return r; 2738 } 2739 2740 cache->loaded_mappings = true; 2741 } 2742 2743 if (!cache->loaded_discards) { 2744 r = dm_cache_load_discards(cache->cmd, load_discard, cache); 2745 if (r) { 2746 DMERR("could not load origin discards"); 2747 return r; 2748 } 2749 2750 cache->loaded_discards = true; 2751 } 2752 2753 return r; 2754 } 2755 2756 static void cache_resume(struct dm_target *ti) 2757 { 2758 struct cache *cache = ti->private; 2759 2760 cache->need_tick_bio = true; 2761 do_waker(&cache->waker.work); 2762 } 2763 2764 /* 2765 * Status format: 2766 * 2767 * <metadata block size> <#used metadata blocks>/<#total metadata blocks> 2768 * <cache block size> <#used cache blocks>/<#total cache blocks> 2769 * <#read hits> <#read misses> <#write hits> <#write misses> 2770 * <#demotions> <#promotions> <#dirty> 2771 * <#features> <features>* 2772 * <#core args> <core args> 2773 * <policy name> <#policy args> <policy args>* 2774 */ 2775 static void cache_status(struct dm_target *ti, status_type_t type, 2776 unsigned status_flags, char *result, unsigned maxlen) 2777 { 2778 int r = 0; 2779 unsigned i; 2780 ssize_t sz = 0; 2781 dm_block_t nr_free_blocks_metadata = 0; 2782 dm_block_t nr_blocks_metadata = 0; 2783 char buf[BDEVNAME_SIZE]; 2784 struct cache *cache = ti->private; 2785 dm_cblock_t residency; 2786 2787 switch (type) { 2788 case STATUSTYPE_INFO: 2789 /* Commit to ensure statistics aren't out-of-date */ 2790 if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti)) { 2791 r = dm_cache_commit(cache->cmd, false); 2792 if (r) 2793 DMERR("could not commit metadata for accurate status"); 2794 } 2795 2796 r = dm_cache_get_free_metadata_block_count(cache->cmd, 2797 &nr_free_blocks_metadata); 2798 if (r) { 2799 DMERR("could not get metadata free block count"); 2800 goto err; 2801 } 2802 2803 r = dm_cache_get_metadata_dev_size(cache->cmd, &nr_blocks_metadata); 2804 if (r) { 2805 DMERR("could not get metadata device size"); 2806 goto err; 2807 } 2808 2809 residency = policy_residency(cache->policy); 2810 2811 DMEMIT("%u %llu/%llu %u %llu/%llu %u %u %u %u %u %u %llu ", 2812 (unsigned)(DM_CACHE_METADATA_BLOCK_SIZE >> SECTOR_SHIFT), 2813 (unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata), 2814 (unsigned long long)nr_blocks_metadata, 2815 cache->sectors_per_block, 2816 (unsigned long long) from_cblock(residency), 2817 (unsigned long long) from_cblock(cache->cache_size), 2818 (unsigned) atomic_read(&cache->stats.read_hit), 2819 (unsigned) atomic_read(&cache->stats.read_miss), 2820 (unsigned) atomic_read(&cache->stats.write_hit), 2821 (unsigned) atomic_read(&cache->stats.write_miss), 2822 (unsigned) atomic_read(&cache->stats.demotion), 2823 (unsigned) atomic_read(&cache->stats.promotion), 2824 (unsigned long long) from_cblock(cache->nr_dirty)); 2825 2826 if (writethrough_mode(&cache->features)) 2827 DMEMIT("1 writethrough "); 2828 2829 else if (passthrough_mode(&cache->features)) 2830 DMEMIT("1 passthrough "); 2831 2832 else if (writeback_mode(&cache->features)) 2833 DMEMIT("1 writeback "); 2834 2835 else { 2836 DMERR("internal error: unknown io mode: %d", (int) cache->features.io_mode); 2837 goto err; 2838 } 2839 2840 DMEMIT("2 migration_threshold %llu ", (unsigned long long) cache->migration_threshold); 2841 2842 DMEMIT("%s ", dm_cache_policy_get_name(cache->policy)); 2843 if (sz < maxlen) { 2844 r = policy_emit_config_values(cache->policy, result + sz, maxlen - sz); 2845 if (r) 2846 DMERR("policy_emit_config_values returned %d", r); 2847 } 2848 2849 break; 2850 2851 case STATUSTYPE_TABLE: 2852 format_dev_t(buf, cache->metadata_dev->bdev->bd_dev); 2853 DMEMIT("%s ", buf); 2854 format_dev_t(buf, cache->cache_dev->bdev->bd_dev); 2855 DMEMIT("%s ", buf); 2856 format_dev_t(buf, cache->origin_dev->bdev->bd_dev); 2857 DMEMIT("%s", buf); 2858 2859 for (i = 0; i < cache->nr_ctr_args - 1; i++) 2860 DMEMIT(" %s", cache->ctr_args[i]); 2861 if (cache->nr_ctr_args) 2862 DMEMIT(" %s", cache->ctr_args[cache->nr_ctr_args - 1]); 2863 } 2864 2865 return; 2866 2867 err: 2868 DMEMIT("Error"); 2869 } 2870 2871 /* 2872 * A cache block range can take two forms: 2873 * 2874 * i) A single cblock, eg. '3456' 2875 * ii) A begin and end cblock with dots between, eg. 123-234 2876 */ 2877 static int parse_cblock_range(struct cache *cache, const char *str, 2878 struct cblock_range *result) 2879 { 2880 char dummy; 2881 uint64_t b, e; 2882 int r; 2883 2884 /* 2885 * Try and parse form (ii) first. 2886 */ 2887 r = sscanf(str, "%llu-%llu%c", &b, &e, &dummy); 2888 if (r < 0) 2889 return r; 2890 2891 if (r == 2) { 2892 result->begin = to_cblock(b); 2893 result->end = to_cblock(e); 2894 return 0; 2895 } 2896 2897 /* 2898 * That didn't work, try form (i). 2899 */ 2900 r = sscanf(str, "%llu%c", &b, &dummy); 2901 if (r < 0) 2902 return r; 2903 2904 if (r == 1) { 2905 result->begin = to_cblock(b); 2906 result->end = to_cblock(from_cblock(result->begin) + 1u); 2907 return 0; 2908 } 2909 2910 DMERR("invalid cblock range '%s'", str); 2911 return -EINVAL; 2912 } 2913 2914 static int validate_cblock_range(struct cache *cache, struct cblock_range *range) 2915 { 2916 uint64_t b = from_cblock(range->begin); 2917 uint64_t e = from_cblock(range->end); 2918 uint64_t n = from_cblock(cache->cache_size); 2919 2920 if (b >= n) { 2921 DMERR("begin cblock out of range: %llu >= %llu", b, n); 2922 return -EINVAL; 2923 } 2924 2925 if (e > n) { 2926 DMERR("end cblock out of range: %llu > %llu", e, n); 2927 return -EINVAL; 2928 } 2929 2930 if (b >= e) { 2931 DMERR("invalid cblock range: %llu >= %llu", b, e); 2932 return -EINVAL; 2933 } 2934 2935 return 0; 2936 } 2937 2938 static int request_invalidation(struct cache *cache, struct cblock_range *range) 2939 { 2940 struct invalidation_request req; 2941 2942 INIT_LIST_HEAD(&req.list); 2943 req.cblocks = range; 2944 atomic_set(&req.complete, 0); 2945 req.err = 0; 2946 init_waitqueue_head(&req.result_wait); 2947 2948 spin_lock(&cache->invalidation_lock); 2949 list_add(&req.list, &cache->invalidation_requests); 2950 spin_unlock(&cache->invalidation_lock); 2951 wake_worker(cache); 2952 2953 wait_event(req.result_wait, atomic_read(&req.complete)); 2954 return req.err; 2955 } 2956 2957 static int process_invalidate_cblocks_message(struct cache *cache, unsigned count, 2958 const char **cblock_ranges) 2959 { 2960 int r = 0; 2961 unsigned i; 2962 struct cblock_range range; 2963 2964 if (!passthrough_mode(&cache->features)) { 2965 DMERR("cache has to be in passthrough mode for invalidation"); 2966 return -EPERM; 2967 } 2968 2969 for (i = 0; i < count; i++) { 2970 r = parse_cblock_range(cache, cblock_ranges[i], &range); 2971 if (r) 2972 break; 2973 2974 r = validate_cblock_range(cache, &range); 2975 if (r) 2976 break; 2977 2978 /* 2979 * Pass begin and end origin blocks to the worker and wake it. 2980 */ 2981 r = request_invalidation(cache, &range); 2982 if (r) 2983 break; 2984 } 2985 2986 return r; 2987 } 2988 2989 /* 2990 * Supports 2991 * "<key> <value>" 2992 * and 2993 * "invalidate_cblocks [(<begin>)|(<begin>-<end>)]* 2994 * 2995 * The key migration_threshold is supported by the cache target core. 2996 */ 2997 static int cache_message(struct dm_target *ti, unsigned argc, char **argv) 2998 { 2999 struct cache *cache = ti->private; 3000 3001 if (!argc) 3002 return -EINVAL; 3003 3004 if (!strcasecmp(argv[0], "invalidate_cblocks")) 3005 return process_invalidate_cblocks_message(cache, argc - 1, (const char **) argv + 1); 3006 3007 if (argc != 2) 3008 return -EINVAL; 3009 3010 return set_config_value(cache, argv[0], argv[1]); 3011 } 3012 3013 static int cache_iterate_devices(struct dm_target *ti, 3014 iterate_devices_callout_fn fn, void *data) 3015 { 3016 int r = 0; 3017 struct cache *cache = ti->private; 3018 3019 r = fn(ti, cache->cache_dev, 0, get_dev_size(cache->cache_dev), data); 3020 if (!r) 3021 r = fn(ti, cache->origin_dev, 0, ti->len, data); 3022 3023 return r; 3024 } 3025 3026 /* 3027 * We assume I/O is going to the origin (which is the volume 3028 * more likely to have restrictions e.g. by being striped). 3029 * (Looking up the exact location of the data would be expensive 3030 * and could always be out of date by the time the bio is submitted.) 3031 */ 3032 static int cache_bvec_merge(struct dm_target *ti, 3033 struct bvec_merge_data *bvm, 3034 struct bio_vec *biovec, int max_size) 3035 { 3036 struct cache *cache = ti->private; 3037 struct request_queue *q = bdev_get_queue(cache->origin_dev->bdev); 3038 3039 if (!q->merge_bvec_fn) 3040 return max_size; 3041 3042 bvm->bi_bdev = cache->origin_dev->bdev; 3043 return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); 3044 } 3045 3046 static void set_discard_limits(struct cache *cache, struct queue_limits *limits) 3047 { 3048 /* 3049 * FIXME: these limits may be incompatible with the cache device 3050 */ 3051 limits->max_discard_sectors = cache->sectors_per_block; 3052 limits->discard_granularity = cache->sectors_per_block << SECTOR_SHIFT; 3053 } 3054 3055 static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits) 3056 { 3057 struct cache *cache = ti->private; 3058 uint64_t io_opt_sectors = limits->io_opt >> SECTOR_SHIFT; 3059 3060 /* 3061 * If the system-determined stacked limits are compatible with the 3062 * cache's blocksize (io_opt is a factor) do not override them. 3063 */ 3064 if (io_opt_sectors < cache->sectors_per_block || 3065 do_div(io_opt_sectors, cache->sectors_per_block)) { 3066 blk_limits_io_min(limits, 0); 3067 blk_limits_io_opt(limits, cache->sectors_per_block << SECTOR_SHIFT); 3068 } 3069 set_discard_limits(cache, limits); 3070 } 3071 3072 /*----------------------------------------------------------------*/ 3073 3074 static struct target_type cache_target = { 3075 .name = "cache", 3076 .version = {1, 4, 0}, 3077 .module = THIS_MODULE, 3078 .ctr = cache_ctr, 3079 .dtr = cache_dtr, 3080 .map = cache_map, 3081 .end_io = cache_end_io, 3082 .postsuspend = cache_postsuspend, 3083 .preresume = cache_preresume, 3084 .resume = cache_resume, 3085 .status = cache_status, 3086 .message = cache_message, 3087 .iterate_devices = cache_iterate_devices, 3088 .merge = cache_bvec_merge, 3089 .io_hints = cache_io_hints, 3090 }; 3091 3092 static int __init dm_cache_init(void) 3093 { 3094 int r; 3095 3096 r = dm_register_target(&cache_target); 3097 if (r) { 3098 DMERR("cache target registration failed: %d", r); 3099 return r; 3100 } 3101 3102 migration_cache = KMEM_CACHE(dm_cache_migration, 0); 3103 if (!migration_cache) { 3104 dm_unregister_target(&cache_target); 3105 return -ENOMEM; 3106 } 3107 3108 return 0; 3109 } 3110 3111 static void __exit dm_cache_exit(void) 3112 { 3113 dm_unregister_target(&cache_target); 3114 kmem_cache_destroy(migration_cache); 3115 } 3116 3117 module_init(dm_cache_init); 3118 module_exit(dm_cache_exit); 3119 3120 MODULE_DESCRIPTION(DM_NAME " cache target"); 3121 MODULE_AUTHOR("Joe Thornber <ejt@redhat.com>"); 3122 MODULE_LICENSE("GPL"); 3123