1 /* 2 * Copyright (C) 2012 Red Hat. All rights reserved. 3 * 4 * This file is released under the GPL. 5 */ 6 7 #include "dm.h" 8 #include "dm-bio-prison.h" 9 #include "dm-cache-metadata.h" 10 11 #include <linux/dm-io.h> 12 #include <linux/dm-kcopyd.h> 13 #include <linux/init.h> 14 #include <linux/mempool.h> 15 #include <linux/module.h> 16 #include <linux/slab.h> 17 #include <linux/vmalloc.h> 18 19 #define DM_MSG_PREFIX "cache" 20 21 DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(cache_copy_throttle, 22 "A percentage of time allocated for copying to and/or from cache"); 23 24 /*----------------------------------------------------------------*/ 25 26 /* 27 * Glossary: 28 * 29 * oblock: index of an origin block 30 * cblock: index of a cache block 31 * promotion: movement of a block from origin to cache 32 * demotion: movement of a block from cache to origin 33 * migration: movement of a block between the origin and cache device, 34 * either direction 35 */ 36 37 /*----------------------------------------------------------------*/ 38 39 static size_t bitset_size_in_bytes(unsigned nr_entries) 40 { 41 return sizeof(unsigned long) * dm_div_up(nr_entries, BITS_PER_LONG); 42 } 43 44 static unsigned long *alloc_bitset(unsigned nr_entries) 45 { 46 size_t s = bitset_size_in_bytes(nr_entries); 47 return vzalloc(s); 48 } 49 50 static void clear_bitset(void *bitset, unsigned nr_entries) 51 { 52 size_t s = bitset_size_in_bytes(nr_entries); 53 memset(bitset, 0, s); 54 } 55 56 static void free_bitset(unsigned long *bits) 57 { 58 vfree(bits); 59 } 60 61 /*----------------------------------------------------------------*/ 62 63 #define PRISON_CELLS 1024 64 #define MIGRATION_POOL_SIZE 128 65 #define COMMIT_PERIOD HZ 66 #define MIGRATION_COUNT_WINDOW 10 67 68 /* 69 * The block size of the device holding cache data must be >= 32KB 70 */ 71 #define DATA_DEV_BLOCK_SIZE_MIN_SECTORS (32 * 1024 >> SECTOR_SHIFT) 72 73 /* 74 * FIXME: the cache is read/write for the time being. 75 */ 76 enum cache_mode { 77 CM_WRITE, /* metadata may be changed */ 78 CM_READ_ONLY, /* metadata may not be changed */ 79 }; 80 81 struct cache_features { 82 enum cache_mode mode; 83 bool write_through:1; 84 }; 85 86 struct cache_stats { 87 atomic_t read_hit; 88 atomic_t read_miss; 89 atomic_t write_hit; 90 atomic_t write_miss; 91 atomic_t demotion; 92 atomic_t promotion; 93 atomic_t copies_avoided; 94 atomic_t cache_cell_clash; 95 atomic_t commit_count; 96 atomic_t discard_count; 97 }; 98 99 struct cache { 100 struct dm_target *ti; 101 struct dm_target_callbacks callbacks; 102 103 /* 104 * Metadata is written to this device. 105 */ 106 struct dm_dev *metadata_dev; 107 108 /* 109 * The slower of the two data devices. Typically a spindle. 110 */ 111 struct dm_dev *origin_dev; 112 113 /* 114 * The faster of the two data devices. Typically an SSD. 115 */ 116 struct dm_dev *cache_dev; 117 118 /* 119 * Cache features such as write-through. 120 */ 121 struct cache_features features; 122 123 /* 124 * Size of the origin device in _complete_ blocks and native sectors. 125 */ 126 dm_oblock_t origin_blocks; 127 sector_t origin_sectors; 128 129 /* 130 * Size of the cache device in blocks. 131 */ 132 dm_cblock_t cache_size; 133 134 /* 135 * Fields for converting from sectors to blocks. 136 */ 137 uint32_t sectors_per_block; 138 int sectors_per_block_shift; 139 140 struct dm_cache_metadata *cmd; 141 142 spinlock_t lock; 143 struct bio_list deferred_bios; 144 struct bio_list deferred_flush_bios; 145 struct list_head quiesced_migrations; 146 struct list_head completed_migrations; 147 struct list_head need_commit_migrations; 148 sector_t migration_threshold; 149 atomic_t nr_migrations; 150 wait_queue_head_t migration_wait; 151 152 /* 153 * cache_size entries, dirty if set 154 */ 155 dm_cblock_t nr_dirty; 156 unsigned long *dirty_bitset; 157 158 /* 159 * origin_blocks entries, discarded if set. 160 */ 161 sector_t discard_block_size; /* a power of 2 times sectors per block */ 162 dm_dblock_t discard_nr_blocks; 163 unsigned long *discard_bitset; 164 165 struct dm_kcopyd_client *copier; 166 struct workqueue_struct *wq; 167 struct work_struct worker; 168 169 struct delayed_work waker; 170 unsigned long last_commit_jiffies; 171 172 struct dm_bio_prison *prison; 173 struct dm_deferred_set *all_io_ds; 174 175 mempool_t *migration_pool; 176 struct dm_cache_migration *next_migration; 177 178 struct dm_cache_policy *policy; 179 unsigned policy_nr_args; 180 181 bool need_tick_bio:1; 182 bool sized:1; 183 bool quiescing:1; 184 bool commit_requested:1; 185 bool loaded_mappings:1; 186 bool loaded_discards:1; 187 188 struct cache_stats stats; 189 190 /* 191 * Rather than reconstructing the table line for the status we just 192 * save it and regurgitate. 193 */ 194 unsigned nr_ctr_args; 195 const char **ctr_args; 196 }; 197 198 struct per_bio_data { 199 bool tick:1; 200 unsigned req_nr:2; 201 struct dm_deferred_entry *all_io_entry; 202 }; 203 204 struct dm_cache_migration { 205 struct list_head list; 206 struct cache *cache; 207 208 unsigned long start_jiffies; 209 dm_oblock_t old_oblock; 210 dm_oblock_t new_oblock; 211 dm_cblock_t cblock; 212 213 bool err:1; 214 bool writeback:1; 215 bool demote:1; 216 bool promote:1; 217 218 struct dm_bio_prison_cell *old_ocell; 219 struct dm_bio_prison_cell *new_ocell; 220 }; 221 222 /* 223 * Processing a bio in the worker thread may require these memory 224 * allocations. We prealloc to avoid deadlocks (the same worker thread 225 * frees them back to the mempool). 226 */ 227 struct prealloc { 228 struct dm_cache_migration *mg; 229 struct dm_bio_prison_cell *cell1; 230 struct dm_bio_prison_cell *cell2; 231 }; 232 233 static void wake_worker(struct cache *cache) 234 { 235 queue_work(cache->wq, &cache->worker); 236 } 237 238 /*----------------------------------------------------------------*/ 239 240 static struct dm_bio_prison_cell *alloc_prison_cell(struct cache *cache) 241 { 242 /* FIXME: change to use a local slab. */ 243 return dm_bio_prison_alloc_cell(cache->prison, GFP_NOWAIT); 244 } 245 246 static void free_prison_cell(struct cache *cache, struct dm_bio_prison_cell *cell) 247 { 248 dm_bio_prison_free_cell(cache->prison, cell); 249 } 250 251 static int prealloc_data_structs(struct cache *cache, struct prealloc *p) 252 { 253 if (!p->mg) { 254 p->mg = mempool_alloc(cache->migration_pool, GFP_NOWAIT); 255 if (!p->mg) 256 return -ENOMEM; 257 } 258 259 if (!p->cell1) { 260 p->cell1 = alloc_prison_cell(cache); 261 if (!p->cell1) 262 return -ENOMEM; 263 } 264 265 if (!p->cell2) { 266 p->cell2 = alloc_prison_cell(cache); 267 if (!p->cell2) 268 return -ENOMEM; 269 } 270 271 return 0; 272 } 273 274 static void prealloc_free_structs(struct cache *cache, struct prealloc *p) 275 { 276 if (p->cell2) 277 free_prison_cell(cache, p->cell2); 278 279 if (p->cell1) 280 free_prison_cell(cache, p->cell1); 281 282 if (p->mg) 283 mempool_free(p->mg, cache->migration_pool); 284 } 285 286 static struct dm_cache_migration *prealloc_get_migration(struct prealloc *p) 287 { 288 struct dm_cache_migration *mg = p->mg; 289 290 BUG_ON(!mg); 291 p->mg = NULL; 292 293 return mg; 294 } 295 296 /* 297 * You must have a cell within the prealloc struct to return. If not this 298 * function will BUG() rather than returning NULL. 299 */ 300 static struct dm_bio_prison_cell *prealloc_get_cell(struct prealloc *p) 301 { 302 struct dm_bio_prison_cell *r = NULL; 303 304 if (p->cell1) { 305 r = p->cell1; 306 p->cell1 = NULL; 307 308 } else if (p->cell2) { 309 r = p->cell2; 310 p->cell2 = NULL; 311 } else 312 BUG(); 313 314 return r; 315 } 316 317 /* 318 * You can't have more than two cells in a prealloc struct. BUG() will be 319 * called if you try and overfill. 320 */ 321 static void prealloc_put_cell(struct prealloc *p, struct dm_bio_prison_cell *cell) 322 { 323 if (!p->cell2) 324 p->cell2 = cell; 325 326 else if (!p->cell1) 327 p->cell1 = cell; 328 329 else 330 BUG(); 331 } 332 333 /*----------------------------------------------------------------*/ 334 335 static void build_key(dm_oblock_t oblock, struct dm_cell_key *key) 336 { 337 key->virtual = 0; 338 key->dev = 0; 339 key->block = from_oblock(oblock); 340 } 341 342 /* 343 * The caller hands in a preallocated cell, and a free function for it. 344 * The cell will be freed if there's an error, or if it wasn't used because 345 * a cell with that key already exists. 346 */ 347 typedef void (*cell_free_fn)(void *context, struct dm_bio_prison_cell *cell); 348 349 static int bio_detain(struct cache *cache, dm_oblock_t oblock, 350 struct bio *bio, struct dm_bio_prison_cell *cell_prealloc, 351 cell_free_fn free_fn, void *free_context, 352 struct dm_bio_prison_cell **cell_result) 353 { 354 int r; 355 struct dm_cell_key key; 356 357 build_key(oblock, &key); 358 r = dm_bio_detain(cache->prison, &key, bio, cell_prealloc, cell_result); 359 if (r) 360 free_fn(free_context, cell_prealloc); 361 362 return r; 363 } 364 365 static int get_cell(struct cache *cache, 366 dm_oblock_t oblock, 367 struct prealloc *structs, 368 struct dm_bio_prison_cell **cell_result) 369 { 370 int r; 371 struct dm_cell_key key; 372 struct dm_bio_prison_cell *cell_prealloc; 373 374 cell_prealloc = prealloc_get_cell(structs); 375 376 build_key(oblock, &key); 377 r = dm_get_cell(cache->prison, &key, cell_prealloc, cell_result); 378 if (r) 379 prealloc_put_cell(structs, cell_prealloc); 380 381 return r; 382 } 383 384 /*----------------------------------------------------------------*/ 385 386 static bool is_dirty(struct cache *cache, dm_cblock_t b) 387 { 388 return test_bit(from_cblock(b), cache->dirty_bitset); 389 } 390 391 static void set_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cblock) 392 { 393 if (!test_and_set_bit(from_cblock(cblock), cache->dirty_bitset)) { 394 cache->nr_dirty = to_cblock(from_cblock(cache->nr_dirty) + 1); 395 policy_set_dirty(cache->policy, oblock); 396 } 397 } 398 399 static void clear_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cblock) 400 { 401 if (test_and_clear_bit(from_cblock(cblock), cache->dirty_bitset)) { 402 policy_clear_dirty(cache->policy, oblock); 403 cache->nr_dirty = to_cblock(from_cblock(cache->nr_dirty) - 1); 404 if (!from_cblock(cache->nr_dirty)) 405 dm_table_event(cache->ti->table); 406 } 407 } 408 409 /*----------------------------------------------------------------*/ 410 static bool block_size_is_power_of_two(struct cache *cache) 411 { 412 return cache->sectors_per_block_shift >= 0; 413 } 414 415 static dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock) 416 { 417 sector_t discard_blocks = cache->discard_block_size; 418 dm_block_t b = from_oblock(oblock); 419 420 if (!block_size_is_power_of_two(cache)) 421 (void) sector_div(discard_blocks, cache->sectors_per_block); 422 else 423 discard_blocks >>= cache->sectors_per_block_shift; 424 425 (void) sector_div(b, discard_blocks); 426 427 return to_dblock(b); 428 } 429 430 static void set_discard(struct cache *cache, dm_dblock_t b) 431 { 432 unsigned long flags; 433 434 atomic_inc(&cache->stats.discard_count); 435 436 spin_lock_irqsave(&cache->lock, flags); 437 set_bit(from_dblock(b), cache->discard_bitset); 438 spin_unlock_irqrestore(&cache->lock, flags); 439 } 440 441 static void clear_discard(struct cache *cache, dm_dblock_t b) 442 { 443 unsigned long flags; 444 445 spin_lock_irqsave(&cache->lock, flags); 446 clear_bit(from_dblock(b), cache->discard_bitset); 447 spin_unlock_irqrestore(&cache->lock, flags); 448 } 449 450 static bool is_discarded(struct cache *cache, dm_dblock_t b) 451 { 452 int r; 453 unsigned long flags; 454 455 spin_lock_irqsave(&cache->lock, flags); 456 r = test_bit(from_dblock(b), cache->discard_bitset); 457 spin_unlock_irqrestore(&cache->lock, flags); 458 459 return r; 460 } 461 462 static bool is_discarded_oblock(struct cache *cache, dm_oblock_t b) 463 { 464 int r; 465 unsigned long flags; 466 467 spin_lock_irqsave(&cache->lock, flags); 468 r = test_bit(from_dblock(oblock_to_dblock(cache, b)), 469 cache->discard_bitset); 470 spin_unlock_irqrestore(&cache->lock, flags); 471 472 return r; 473 } 474 475 /*----------------------------------------------------------------*/ 476 477 static void load_stats(struct cache *cache) 478 { 479 struct dm_cache_statistics stats; 480 481 dm_cache_metadata_get_stats(cache->cmd, &stats); 482 atomic_set(&cache->stats.read_hit, stats.read_hits); 483 atomic_set(&cache->stats.read_miss, stats.read_misses); 484 atomic_set(&cache->stats.write_hit, stats.write_hits); 485 atomic_set(&cache->stats.write_miss, stats.write_misses); 486 } 487 488 static void save_stats(struct cache *cache) 489 { 490 struct dm_cache_statistics stats; 491 492 stats.read_hits = atomic_read(&cache->stats.read_hit); 493 stats.read_misses = atomic_read(&cache->stats.read_miss); 494 stats.write_hits = atomic_read(&cache->stats.write_hit); 495 stats.write_misses = atomic_read(&cache->stats.write_miss); 496 497 dm_cache_metadata_set_stats(cache->cmd, &stats); 498 } 499 500 /*---------------------------------------------------------------- 501 * Per bio data 502 *--------------------------------------------------------------*/ 503 static struct per_bio_data *get_per_bio_data(struct bio *bio) 504 { 505 struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data)); 506 BUG_ON(!pb); 507 return pb; 508 } 509 510 static struct per_bio_data *init_per_bio_data(struct bio *bio) 511 { 512 struct per_bio_data *pb = get_per_bio_data(bio); 513 514 pb->tick = false; 515 pb->req_nr = dm_bio_get_target_bio_nr(bio); 516 pb->all_io_entry = NULL; 517 518 return pb; 519 } 520 521 /*---------------------------------------------------------------- 522 * Remapping 523 *--------------------------------------------------------------*/ 524 static void remap_to_origin(struct cache *cache, struct bio *bio) 525 { 526 bio->bi_bdev = cache->origin_dev->bdev; 527 } 528 529 static void remap_to_cache(struct cache *cache, struct bio *bio, 530 dm_cblock_t cblock) 531 { 532 sector_t bi_sector = bio->bi_sector; 533 534 bio->bi_bdev = cache->cache_dev->bdev; 535 if (!block_size_is_power_of_two(cache)) 536 bio->bi_sector = (from_cblock(cblock) * cache->sectors_per_block) + 537 sector_div(bi_sector, cache->sectors_per_block); 538 else 539 bio->bi_sector = (from_cblock(cblock) << cache->sectors_per_block_shift) | 540 (bi_sector & (cache->sectors_per_block - 1)); 541 } 542 543 static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio) 544 { 545 unsigned long flags; 546 struct per_bio_data *pb = get_per_bio_data(bio); 547 548 spin_lock_irqsave(&cache->lock, flags); 549 if (cache->need_tick_bio && 550 !(bio->bi_rw & (REQ_FUA | REQ_FLUSH | REQ_DISCARD))) { 551 pb->tick = true; 552 cache->need_tick_bio = false; 553 } 554 spin_unlock_irqrestore(&cache->lock, flags); 555 } 556 557 static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio, 558 dm_oblock_t oblock) 559 { 560 check_if_tick_bio_needed(cache, bio); 561 remap_to_origin(cache, bio); 562 if (bio_data_dir(bio) == WRITE) 563 clear_discard(cache, oblock_to_dblock(cache, oblock)); 564 } 565 566 static void remap_to_cache_dirty(struct cache *cache, struct bio *bio, 567 dm_oblock_t oblock, dm_cblock_t cblock) 568 { 569 remap_to_cache(cache, bio, cblock); 570 if (bio_data_dir(bio) == WRITE) { 571 set_dirty(cache, oblock, cblock); 572 clear_discard(cache, oblock_to_dblock(cache, oblock)); 573 } 574 } 575 576 static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio) 577 { 578 sector_t block_nr = bio->bi_sector; 579 580 if (!block_size_is_power_of_two(cache)) 581 (void) sector_div(block_nr, cache->sectors_per_block); 582 else 583 block_nr >>= cache->sectors_per_block_shift; 584 585 return to_oblock(block_nr); 586 } 587 588 static int bio_triggers_commit(struct cache *cache, struct bio *bio) 589 { 590 return bio->bi_rw & (REQ_FLUSH | REQ_FUA); 591 } 592 593 static void issue(struct cache *cache, struct bio *bio) 594 { 595 unsigned long flags; 596 597 if (!bio_triggers_commit(cache, bio)) { 598 generic_make_request(bio); 599 return; 600 } 601 602 /* 603 * Batch together any bios that trigger commits and then issue a 604 * single commit for them in do_worker(). 605 */ 606 spin_lock_irqsave(&cache->lock, flags); 607 cache->commit_requested = true; 608 bio_list_add(&cache->deferred_flush_bios, bio); 609 spin_unlock_irqrestore(&cache->lock, flags); 610 } 611 612 /*---------------------------------------------------------------- 613 * Migration processing 614 * 615 * Migration covers moving data from the origin device to the cache, or 616 * vice versa. 617 *--------------------------------------------------------------*/ 618 static void free_migration(struct dm_cache_migration *mg) 619 { 620 mempool_free(mg, mg->cache->migration_pool); 621 } 622 623 static void inc_nr_migrations(struct cache *cache) 624 { 625 atomic_inc(&cache->nr_migrations); 626 } 627 628 static void dec_nr_migrations(struct cache *cache) 629 { 630 atomic_dec(&cache->nr_migrations); 631 632 /* 633 * Wake the worker in case we're suspending the target. 634 */ 635 wake_up(&cache->migration_wait); 636 } 637 638 static void __cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell, 639 bool holder) 640 { 641 (holder ? dm_cell_release : dm_cell_release_no_holder) 642 (cache->prison, cell, &cache->deferred_bios); 643 free_prison_cell(cache, cell); 644 } 645 646 static void cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell, 647 bool holder) 648 { 649 unsigned long flags; 650 651 spin_lock_irqsave(&cache->lock, flags); 652 __cell_defer(cache, cell, holder); 653 spin_unlock_irqrestore(&cache->lock, flags); 654 655 wake_worker(cache); 656 } 657 658 static void cleanup_migration(struct dm_cache_migration *mg) 659 { 660 dec_nr_migrations(mg->cache); 661 free_migration(mg); 662 } 663 664 static void migration_failure(struct dm_cache_migration *mg) 665 { 666 struct cache *cache = mg->cache; 667 668 if (mg->writeback) { 669 DMWARN_LIMIT("writeback failed; couldn't copy block"); 670 set_dirty(cache, mg->old_oblock, mg->cblock); 671 cell_defer(cache, mg->old_ocell, false); 672 673 } else if (mg->demote) { 674 DMWARN_LIMIT("demotion failed; couldn't copy block"); 675 policy_force_mapping(cache->policy, mg->new_oblock, mg->old_oblock); 676 677 cell_defer(cache, mg->old_ocell, mg->promote ? 0 : 1); 678 if (mg->promote) 679 cell_defer(cache, mg->new_ocell, 1); 680 } else { 681 DMWARN_LIMIT("promotion failed; couldn't copy block"); 682 policy_remove_mapping(cache->policy, mg->new_oblock); 683 cell_defer(cache, mg->new_ocell, 1); 684 } 685 686 cleanup_migration(mg); 687 } 688 689 static void migration_success_pre_commit(struct dm_cache_migration *mg) 690 { 691 unsigned long flags; 692 struct cache *cache = mg->cache; 693 694 if (mg->writeback) { 695 cell_defer(cache, mg->old_ocell, false); 696 clear_dirty(cache, mg->old_oblock, mg->cblock); 697 cleanup_migration(mg); 698 return; 699 700 } else if (mg->demote) { 701 if (dm_cache_remove_mapping(cache->cmd, mg->cblock)) { 702 DMWARN_LIMIT("demotion failed; couldn't update on disk metadata"); 703 policy_force_mapping(cache->policy, mg->new_oblock, 704 mg->old_oblock); 705 if (mg->promote) 706 cell_defer(cache, mg->new_ocell, true); 707 cleanup_migration(mg); 708 return; 709 } 710 } else { 711 if (dm_cache_insert_mapping(cache->cmd, mg->cblock, mg->new_oblock)) { 712 DMWARN_LIMIT("promotion failed; couldn't update on disk metadata"); 713 policy_remove_mapping(cache->policy, mg->new_oblock); 714 cleanup_migration(mg); 715 return; 716 } 717 } 718 719 spin_lock_irqsave(&cache->lock, flags); 720 list_add_tail(&mg->list, &cache->need_commit_migrations); 721 cache->commit_requested = true; 722 spin_unlock_irqrestore(&cache->lock, flags); 723 } 724 725 static void migration_success_post_commit(struct dm_cache_migration *mg) 726 { 727 unsigned long flags; 728 struct cache *cache = mg->cache; 729 730 if (mg->writeback) { 731 DMWARN("writeback unexpectedly triggered commit"); 732 return; 733 734 } else if (mg->demote) { 735 cell_defer(cache, mg->old_ocell, mg->promote ? 0 : 1); 736 737 if (mg->promote) { 738 mg->demote = false; 739 740 spin_lock_irqsave(&cache->lock, flags); 741 list_add_tail(&mg->list, &cache->quiesced_migrations); 742 spin_unlock_irqrestore(&cache->lock, flags); 743 744 } else 745 cleanup_migration(mg); 746 747 } else { 748 cell_defer(cache, mg->new_ocell, true); 749 clear_dirty(cache, mg->new_oblock, mg->cblock); 750 cleanup_migration(mg); 751 } 752 } 753 754 static void copy_complete(int read_err, unsigned long write_err, void *context) 755 { 756 unsigned long flags; 757 struct dm_cache_migration *mg = (struct dm_cache_migration *) context; 758 struct cache *cache = mg->cache; 759 760 if (read_err || write_err) 761 mg->err = true; 762 763 spin_lock_irqsave(&cache->lock, flags); 764 list_add_tail(&mg->list, &cache->completed_migrations); 765 spin_unlock_irqrestore(&cache->lock, flags); 766 767 wake_worker(cache); 768 } 769 770 static void issue_copy_real(struct dm_cache_migration *mg) 771 { 772 int r; 773 struct dm_io_region o_region, c_region; 774 struct cache *cache = mg->cache; 775 776 o_region.bdev = cache->origin_dev->bdev; 777 o_region.count = cache->sectors_per_block; 778 779 c_region.bdev = cache->cache_dev->bdev; 780 c_region.sector = from_cblock(mg->cblock) * cache->sectors_per_block; 781 c_region.count = cache->sectors_per_block; 782 783 if (mg->writeback || mg->demote) { 784 /* demote */ 785 o_region.sector = from_oblock(mg->old_oblock) * cache->sectors_per_block; 786 r = dm_kcopyd_copy(cache->copier, &c_region, 1, &o_region, 0, copy_complete, mg); 787 } else { 788 /* promote */ 789 o_region.sector = from_oblock(mg->new_oblock) * cache->sectors_per_block; 790 r = dm_kcopyd_copy(cache->copier, &o_region, 1, &c_region, 0, copy_complete, mg); 791 } 792 793 if (r < 0) 794 migration_failure(mg); 795 } 796 797 static void avoid_copy(struct dm_cache_migration *mg) 798 { 799 atomic_inc(&mg->cache->stats.copies_avoided); 800 migration_success_pre_commit(mg); 801 } 802 803 static void issue_copy(struct dm_cache_migration *mg) 804 { 805 bool avoid; 806 struct cache *cache = mg->cache; 807 808 if (mg->writeback || mg->demote) 809 avoid = !is_dirty(cache, mg->cblock) || 810 is_discarded_oblock(cache, mg->old_oblock); 811 else 812 avoid = is_discarded_oblock(cache, mg->new_oblock); 813 814 avoid ? avoid_copy(mg) : issue_copy_real(mg); 815 } 816 817 static void complete_migration(struct dm_cache_migration *mg) 818 { 819 if (mg->err) 820 migration_failure(mg); 821 else 822 migration_success_pre_commit(mg); 823 } 824 825 static void process_migrations(struct cache *cache, struct list_head *head, 826 void (*fn)(struct dm_cache_migration *)) 827 { 828 unsigned long flags; 829 struct list_head list; 830 struct dm_cache_migration *mg, *tmp; 831 832 INIT_LIST_HEAD(&list); 833 spin_lock_irqsave(&cache->lock, flags); 834 list_splice_init(head, &list); 835 spin_unlock_irqrestore(&cache->lock, flags); 836 837 list_for_each_entry_safe(mg, tmp, &list, list) 838 fn(mg); 839 } 840 841 static void __queue_quiesced_migration(struct dm_cache_migration *mg) 842 { 843 list_add_tail(&mg->list, &mg->cache->quiesced_migrations); 844 } 845 846 static void queue_quiesced_migration(struct dm_cache_migration *mg) 847 { 848 unsigned long flags; 849 struct cache *cache = mg->cache; 850 851 spin_lock_irqsave(&cache->lock, flags); 852 __queue_quiesced_migration(mg); 853 spin_unlock_irqrestore(&cache->lock, flags); 854 855 wake_worker(cache); 856 } 857 858 static void queue_quiesced_migrations(struct cache *cache, struct list_head *work) 859 { 860 unsigned long flags; 861 struct dm_cache_migration *mg, *tmp; 862 863 spin_lock_irqsave(&cache->lock, flags); 864 list_for_each_entry_safe(mg, tmp, work, list) 865 __queue_quiesced_migration(mg); 866 spin_unlock_irqrestore(&cache->lock, flags); 867 868 wake_worker(cache); 869 } 870 871 static void check_for_quiesced_migrations(struct cache *cache, 872 struct per_bio_data *pb) 873 { 874 struct list_head work; 875 876 if (!pb->all_io_entry) 877 return; 878 879 INIT_LIST_HEAD(&work); 880 if (pb->all_io_entry) 881 dm_deferred_entry_dec(pb->all_io_entry, &work); 882 883 if (!list_empty(&work)) 884 queue_quiesced_migrations(cache, &work); 885 } 886 887 static void quiesce_migration(struct dm_cache_migration *mg) 888 { 889 if (!dm_deferred_set_add_work(mg->cache->all_io_ds, &mg->list)) 890 queue_quiesced_migration(mg); 891 } 892 893 static void promote(struct cache *cache, struct prealloc *structs, 894 dm_oblock_t oblock, dm_cblock_t cblock, 895 struct dm_bio_prison_cell *cell) 896 { 897 struct dm_cache_migration *mg = prealloc_get_migration(structs); 898 899 mg->err = false; 900 mg->writeback = false; 901 mg->demote = false; 902 mg->promote = true; 903 mg->cache = cache; 904 mg->new_oblock = oblock; 905 mg->cblock = cblock; 906 mg->old_ocell = NULL; 907 mg->new_ocell = cell; 908 mg->start_jiffies = jiffies; 909 910 inc_nr_migrations(cache); 911 quiesce_migration(mg); 912 } 913 914 static void writeback(struct cache *cache, struct prealloc *structs, 915 dm_oblock_t oblock, dm_cblock_t cblock, 916 struct dm_bio_prison_cell *cell) 917 { 918 struct dm_cache_migration *mg = prealloc_get_migration(structs); 919 920 mg->err = false; 921 mg->writeback = true; 922 mg->demote = false; 923 mg->promote = false; 924 mg->cache = cache; 925 mg->old_oblock = oblock; 926 mg->cblock = cblock; 927 mg->old_ocell = cell; 928 mg->new_ocell = NULL; 929 mg->start_jiffies = jiffies; 930 931 inc_nr_migrations(cache); 932 quiesce_migration(mg); 933 } 934 935 static void demote_then_promote(struct cache *cache, struct prealloc *structs, 936 dm_oblock_t old_oblock, dm_oblock_t new_oblock, 937 dm_cblock_t cblock, 938 struct dm_bio_prison_cell *old_ocell, 939 struct dm_bio_prison_cell *new_ocell) 940 { 941 struct dm_cache_migration *mg = prealloc_get_migration(structs); 942 943 mg->err = false; 944 mg->writeback = false; 945 mg->demote = true; 946 mg->promote = true; 947 mg->cache = cache; 948 mg->old_oblock = old_oblock; 949 mg->new_oblock = new_oblock; 950 mg->cblock = cblock; 951 mg->old_ocell = old_ocell; 952 mg->new_ocell = new_ocell; 953 mg->start_jiffies = jiffies; 954 955 inc_nr_migrations(cache); 956 quiesce_migration(mg); 957 } 958 959 /*---------------------------------------------------------------- 960 * bio processing 961 *--------------------------------------------------------------*/ 962 static void defer_bio(struct cache *cache, struct bio *bio) 963 { 964 unsigned long flags; 965 966 spin_lock_irqsave(&cache->lock, flags); 967 bio_list_add(&cache->deferred_bios, bio); 968 spin_unlock_irqrestore(&cache->lock, flags); 969 970 wake_worker(cache); 971 } 972 973 static void process_flush_bio(struct cache *cache, struct bio *bio) 974 { 975 struct per_bio_data *pb = get_per_bio_data(bio); 976 977 BUG_ON(bio->bi_size); 978 if (!pb->req_nr) 979 remap_to_origin(cache, bio); 980 else 981 remap_to_cache(cache, bio, 0); 982 983 issue(cache, bio); 984 } 985 986 /* 987 * People generally discard large parts of a device, eg, the whole device 988 * when formatting. Splitting these large discards up into cache block 989 * sized ios and then quiescing (always neccessary for discard) takes too 990 * long. 991 * 992 * We keep it simple, and allow any size of discard to come in, and just 993 * mark off blocks on the discard bitset. No passdown occurs! 994 * 995 * To implement passdown we need to change the bio_prison such that a cell 996 * can have a key that spans many blocks. 997 */ 998 static void process_discard_bio(struct cache *cache, struct bio *bio) 999 { 1000 dm_block_t start_block = dm_sector_div_up(bio->bi_sector, 1001 cache->discard_block_size); 1002 dm_block_t end_block = bio->bi_sector + bio_sectors(bio); 1003 dm_block_t b; 1004 1005 (void) sector_div(end_block, cache->discard_block_size); 1006 1007 for (b = start_block; b < end_block; b++) 1008 set_discard(cache, to_dblock(b)); 1009 1010 bio_endio(bio, 0); 1011 } 1012 1013 static bool spare_migration_bandwidth(struct cache *cache) 1014 { 1015 sector_t current_volume = (atomic_read(&cache->nr_migrations) + 1) * 1016 cache->sectors_per_block; 1017 return current_volume < cache->migration_threshold; 1018 } 1019 1020 static bool is_writethrough_io(struct cache *cache, struct bio *bio, 1021 dm_cblock_t cblock) 1022 { 1023 return bio_data_dir(bio) == WRITE && 1024 cache->features.write_through && !is_dirty(cache, cblock); 1025 } 1026 1027 static void inc_hit_counter(struct cache *cache, struct bio *bio) 1028 { 1029 atomic_inc(bio_data_dir(bio) == READ ? 1030 &cache->stats.read_hit : &cache->stats.write_hit); 1031 } 1032 1033 static void inc_miss_counter(struct cache *cache, struct bio *bio) 1034 { 1035 atomic_inc(bio_data_dir(bio) == READ ? 1036 &cache->stats.read_miss : &cache->stats.write_miss); 1037 } 1038 1039 static void process_bio(struct cache *cache, struct prealloc *structs, 1040 struct bio *bio) 1041 { 1042 int r; 1043 bool release_cell = true; 1044 dm_oblock_t block = get_bio_block(cache, bio); 1045 struct dm_bio_prison_cell *cell_prealloc, *old_ocell, *new_ocell; 1046 struct policy_result lookup_result; 1047 struct per_bio_data *pb = get_per_bio_data(bio); 1048 bool discarded_block = is_discarded_oblock(cache, block); 1049 bool can_migrate = discarded_block || spare_migration_bandwidth(cache); 1050 1051 /* 1052 * Check to see if that block is currently migrating. 1053 */ 1054 cell_prealloc = prealloc_get_cell(structs); 1055 r = bio_detain(cache, block, bio, cell_prealloc, 1056 (cell_free_fn) prealloc_put_cell, 1057 structs, &new_ocell); 1058 if (r > 0) 1059 return; 1060 1061 r = policy_map(cache->policy, block, true, can_migrate, discarded_block, 1062 bio, &lookup_result); 1063 1064 if (r == -EWOULDBLOCK) 1065 /* migration has been denied */ 1066 lookup_result.op = POLICY_MISS; 1067 1068 switch (lookup_result.op) { 1069 case POLICY_HIT: 1070 inc_hit_counter(cache, bio); 1071 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); 1072 1073 if (is_writethrough_io(cache, bio, lookup_result.cblock)) { 1074 /* 1075 * No need to mark anything dirty in write through mode. 1076 */ 1077 pb->req_nr == 0 ? 1078 remap_to_cache(cache, bio, lookup_result.cblock) : 1079 remap_to_origin_clear_discard(cache, bio, block); 1080 } else 1081 remap_to_cache_dirty(cache, bio, block, lookup_result.cblock); 1082 1083 issue(cache, bio); 1084 break; 1085 1086 case POLICY_MISS: 1087 inc_miss_counter(cache, bio); 1088 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); 1089 1090 if (pb->req_nr != 0) { 1091 /* 1092 * This is a duplicate writethrough io that is no 1093 * longer needed because the block has been demoted. 1094 */ 1095 bio_endio(bio, 0); 1096 } else { 1097 remap_to_origin_clear_discard(cache, bio, block); 1098 issue(cache, bio); 1099 } 1100 break; 1101 1102 case POLICY_NEW: 1103 atomic_inc(&cache->stats.promotion); 1104 promote(cache, structs, block, lookup_result.cblock, new_ocell); 1105 release_cell = false; 1106 break; 1107 1108 case POLICY_REPLACE: 1109 cell_prealloc = prealloc_get_cell(structs); 1110 r = bio_detain(cache, lookup_result.old_oblock, bio, cell_prealloc, 1111 (cell_free_fn) prealloc_put_cell, 1112 structs, &old_ocell); 1113 if (r > 0) { 1114 /* 1115 * We have to be careful to avoid lock inversion of 1116 * the cells. So we back off, and wait for the 1117 * old_ocell to become free. 1118 */ 1119 policy_force_mapping(cache->policy, block, 1120 lookup_result.old_oblock); 1121 atomic_inc(&cache->stats.cache_cell_clash); 1122 break; 1123 } 1124 atomic_inc(&cache->stats.demotion); 1125 atomic_inc(&cache->stats.promotion); 1126 1127 demote_then_promote(cache, structs, lookup_result.old_oblock, 1128 block, lookup_result.cblock, 1129 old_ocell, new_ocell); 1130 release_cell = false; 1131 break; 1132 1133 default: 1134 DMERR_LIMIT("%s: erroring bio, unknown policy op: %u", __func__, 1135 (unsigned) lookup_result.op); 1136 bio_io_error(bio); 1137 } 1138 1139 if (release_cell) 1140 cell_defer(cache, new_ocell, false); 1141 } 1142 1143 static int need_commit_due_to_time(struct cache *cache) 1144 { 1145 return jiffies < cache->last_commit_jiffies || 1146 jiffies > cache->last_commit_jiffies + COMMIT_PERIOD; 1147 } 1148 1149 static int commit_if_needed(struct cache *cache) 1150 { 1151 if (dm_cache_changed_this_transaction(cache->cmd) && 1152 (cache->commit_requested || need_commit_due_to_time(cache))) { 1153 atomic_inc(&cache->stats.commit_count); 1154 cache->last_commit_jiffies = jiffies; 1155 cache->commit_requested = false; 1156 return dm_cache_commit(cache->cmd, false); 1157 } 1158 1159 return 0; 1160 } 1161 1162 static void process_deferred_bios(struct cache *cache) 1163 { 1164 unsigned long flags; 1165 struct bio_list bios; 1166 struct bio *bio; 1167 struct prealloc structs; 1168 1169 memset(&structs, 0, sizeof(structs)); 1170 bio_list_init(&bios); 1171 1172 spin_lock_irqsave(&cache->lock, flags); 1173 bio_list_merge(&bios, &cache->deferred_bios); 1174 bio_list_init(&cache->deferred_bios); 1175 spin_unlock_irqrestore(&cache->lock, flags); 1176 1177 while (!bio_list_empty(&bios)) { 1178 /* 1179 * If we've got no free migration structs, and processing 1180 * this bio might require one, we pause until there are some 1181 * prepared mappings to process. 1182 */ 1183 if (prealloc_data_structs(cache, &structs)) { 1184 spin_lock_irqsave(&cache->lock, flags); 1185 bio_list_merge(&cache->deferred_bios, &bios); 1186 spin_unlock_irqrestore(&cache->lock, flags); 1187 break; 1188 } 1189 1190 bio = bio_list_pop(&bios); 1191 1192 if (bio->bi_rw & REQ_FLUSH) 1193 process_flush_bio(cache, bio); 1194 else if (bio->bi_rw & REQ_DISCARD) 1195 process_discard_bio(cache, bio); 1196 else 1197 process_bio(cache, &structs, bio); 1198 } 1199 1200 prealloc_free_structs(cache, &structs); 1201 } 1202 1203 static void process_deferred_flush_bios(struct cache *cache, bool submit_bios) 1204 { 1205 unsigned long flags; 1206 struct bio_list bios; 1207 struct bio *bio; 1208 1209 bio_list_init(&bios); 1210 1211 spin_lock_irqsave(&cache->lock, flags); 1212 bio_list_merge(&bios, &cache->deferred_flush_bios); 1213 bio_list_init(&cache->deferred_flush_bios); 1214 spin_unlock_irqrestore(&cache->lock, flags); 1215 1216 while ((bio = bio_list_pop(&bios))) 1217 submit_bios ? generic_make_request(bio) : bio_io_error(bio); 1218 } 1219 1220 static void writeback_some_dirty_blocks(struct cache *cache) 1221 { 1222 int r = 0; 1223 dm_oblock_t oblock; 1224 dm_cblock_t cblock; 1225 struct prealloc structs; 1226 struct dm_bio_prison_cell *old_ocell; 1227 1228 memset(&structs, 0, sizeof(structs)); 1229 1230 while (spare_migration_bandwidth(cache)) { 1231 if (prealloc_data_structs(cache, &structs)) 1232 break; 1233 1234 r = policy_writeback_work(cache->policy, &oblock, &cblock); 1235 if (r) 1236 break; 1237 1238 r = get_cell(cache, oblock, &structs, &old_ocell); 1239 if (r) { 1240 policy_set_dirty(cache->policy, oblock); 1241 break; 1242 } 1243 1244 writeback(cache, &structs, oblock, cblock, old_ocell); 1245 } 1246 1247 prealloc_free_structs(cache, &structs); 1248 } 1249 1250 /*---------------------------------------------------------------- 1251 * Main worker loop 1252 *--------------------------------------------------------------*/ 1253 static void start_quiescing(struct cache *cache) 1254 { 1255 unsigned long flags; 1256 1257 spin_lock_irqsave(&cache->lock, flags); 1258 cache->quiescing = 1; 1259 spin_unlock_irqrestore(&cache->lock, flags); 1260 } 1261 1262 static void stop_quiescing(struct cache *cache) 1263 { 1264 unsigned long flags; 1265 1266 spin_lock_irqsave(&cache->lock, flags); 1267 cache->quiescing = 0; 1268 spin_unlock_irqrestore(&cache->lock, flags); 1269 } 1270 1271 static bool is_quiescing(struct cache *cache) 1272 { 1273 int r; 1274 unsigned long flags; 1275 1276 spin_lock_irqsave(&cache->lock, flags); 1277 r = cache->quiescing; 1278 spin_unlock_irqrestore(&cache->lock, flags); 1279 1280 return r; 1281 } 1282 1283 static void wait_for_migrations(struct cache *cache) 1284 { 1285 wait_event(cache->migration_wait, !atomic_read(&cache->nr_migrations)); 1286 } 1287 1288 static void stop_worker(struct cache *cache) 1289 { 1290 cancel_delayed_work(&cache->waker); 1291 flush_workqueue(cache->wq); 1292 } 1293 1294 static void requeue_deferred_io(struct cache *cache) 1295 { 1296 struct bio *bio; 1297 struct bio_list bios; 1298 1299 bio_list_init(&bios); 1300 bio_list_merge(&bios, &cache->deferred_bios); 1301 bio_list_init(&cache->deferred_bios); 1302 1303 while ((bio = bio_list_pop(&bios))) 1304 bio_endio(bio, DM_ENDIO_REQUEUE); 1305 } 1306 1307 static int more_work(struct cache *cache) 1308 { 1309 if (is_quiescing(cache)) 1310 return !list_empty(&cache->quiesced_migrations) || 1311 !list_empty(&cache->completed_migrations) || 1312 !list_empty(&cache->need_commit_migrations); 1313 else 1314 return !bio_list_empty(&cache->deferred_bios) || 1315 !bio_list_empty(&cache->deferred_flush_bios) || 1316 !list_empty(&cache->quiesced_migrations) || 1317 !list_empty(&cache->completed_migrations) || 1318 !list_empty(&cache->need_commit_migrations); 1319 } 1320 1321 static void do_worker(struct work_struct *ws) 1322 { 1323 struct cache *cache = container_of(ws, struct cache, worker); 1324 1325 do { 1326 if (!is_quiescing(cache)) 1327 process_deferred_bios(cache); 1328 1329 process_migrations(cache, &cache->quiesced_migrations, issue_copy); 1330 process_migrations(cache, &cache->completed_migrations, complete_migration); 1331 1332 writeback_some_dirty_blocks(cache); 1333 1334 if (commit_if_needed(cache)) { 1335 process_deferred_flush_bios(cache, false); 1336 1337 /* 1338 * FIXME: rollback metadata or just go into a 1339 * failure mode and error everything 1340 */ 1341 } else { 1342 process_deferred_flush_bios(cache, true); 1343 process_migrations(cache, &cache->need_commit_migrations, 1344 migration_success_post_commit); 1345 } 1346 } while (more_work(cache)); 1347 } 1348 1349 /* 1350 * We want to commit periodically so that not too much 1351 * unwritten metadata builds up. 1352 */ 1353 static void do_waker(struct work_struct *ws) 1354 { 1355 struct cache *cache = container_of(to_delayed_work(ws), struct cache, waker); 1356 wake_worker(cache); 1357 queue_delayed_work(cache->wq, &cache->waker, COMMIT_PERIOD); 1358 } 1359 1360 /*----------------------------------------------------------------*/ 1361 1362 static int is_congested(struct dm_dev *dev, int bdi_bits) 1363 { 1364 struct request_queue *q = bdev_get_queue(dev->bdev); 1365 return bdi_congested(&q->backing_dev_info, bdi_bits); 1366 } 1367 1368 static int cache_is_congested(struct dm_target_callbacks *cb, int bdi_bits) 1369 { 1370 struct cache *cache = container_of(cb, struct cache, callbacks); 1371 1372 return is_congested(cache->origin_dev, bdi_bits) || 1373 is_congested(cache->cache_dev, bdi_bits); 1374 } 1375 1376 /*---------------------------------------------------------------- 1377 * Target methods 1378 *--------------------------------------------------------------*/ 1379 1380 /* 1381 * This function gets called on the error paths of the constructor, so we 1382 * have to cope with a partially initialised struct. 1383 */ 1384 static void destroy(struct cache *cache) 1385 { 1386 unsigned i; 1387 1388 if (cache->next_migration) 1389 mempool_free(cache->next_migration, cache->migration_pool); 1390 1391 if (cache->migration_pool) 1392 mempool_destroy(cache->migration_pool); 1393 1394 if (cache->all_io_ds) 1395 dm_deferred_set_destroy(cache->all_io_ds); 1396 1397 if (cache->prison) 1398 dm_bio_prison_destroy(cache->prison); 1399 1400 if (cache->wq) 1401 destroy_workqueue(cache->wq); 1402 1403 if (cache->dirty_bitset) 1404 free_bitset(cache->dirty_bitset); 1405 1406 if (cache->discard_bitset) 1407 free_bitset(cache->discard_bitset); 1408 1409 if (cache->copier) 1410 dm_kcopyd_client_destroy(cache->copier); 1411 1412 if (cache->cmd) 1413 dm_cache_metadata_close(cache->cmd); 1414 1415 if (cache->metadata_dev) 1416 dm_put_device(cache->ti, cache->metadata_dev); 1417 1418 if (cache->origin_dev) 1419 dm_put_device(cache->ti, cache->origin_dev); 1420 1421 if (cache->cache_dev) 1422 dm_put_device(cache->ti, cache->cache_dev); 1423 1424 if (cache->policy) 1425 dm_cache_policy_destroy(cache->policy); 1426 1427 for (i = 0; i < cache->nr_ctr_args ; i++) 1428 kfree(cache->ctr_args[i]); 1429 kfree(cache->ctr_args); 1430 1431 kfree(cache); 1432 } 1433 1434 static void cache_dtr(struct dm_target *ti) 1435 { 1436 struct cache *cache = ti->private; 1437 1438 destroy(cache); 1439 } 1440 1441 static sector_t get_dev_size(struct dm_dev *dev) 1442 { 1443 return i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT; 1444 } 1445 1446 /*----------------------------------------------------------------*/ 1447 1448 /* 1449 * Construct a cache device mapping. 1450 * 1451 * cache <metadata dev> <cache dev> <origin dev> <block size> 1452 * <#feature args> [<feature arg>]* 1453 * <policy> <#policy args> [<policy arg>]* 1454 * 1455 * metadata dev : fast device holding the persistent metadata 1456 * cache dev : fast device holding cached data blocks 1457 * origin dev : slow device holding original data blocks 1458 * block size : cache unit size in sectors 1459 * 1460 * #feature args : number of feature arguments passed 1461 * feature args : writethrough. (The default is writeback.) 1462 * 1463 * policy : the replacement policy to use 1464 * #policy args : an even number of policy arguments corresponding 1465 * to key/value pairs passed to the policy 1466 * policy args : key/value pairs passed to the policy 1467 * E.g. 'sequential_threshold 1024' 1468 * See cache-policies.txt for details. 1469 * 1470 * Optional feature arguments are: 1471 * writethrough : write through caching that prohibits cache block 1472 * content from being different from origin block content. 1473 * Without this argument, the default behaviour is to write 1474 * back cache block contents later for performance reasons, 1475 * so they may differ from the corresponding origin blocks. 1476 */ 1477 struct cache_args { 1478 struct dm_target *ti; 1479 1480 struct dm_dev *metadata_dev; 1481 1482 struct dm_dev *cache_dev; 1483 sector_t cache_sectors; 1484 1485 struct dm_dev *origin_dev; 1486 sector_t origin_sectors; 1487 1488 uint32_t block_size; 1489 1490 const char *policy_name; 1491 int policy_argc; 1492 const char **policy_argv; 1493 1494 struct cache_features features; 1495 }; 1496 1497 static void destroy_cache_args(struct cache_args *ca) 1498 { 1499 if (ca->metadata_dev) 1500 dm_put_device(ca->ti, ca->metadata_dev); 1501 1502 if (ca->cache_dev) 1503 dm_put_device(ca->ti, ca->cache_dev); 1504 1505 if (ca->origin_dev) 1506 dm_put_device(ca->ti, ca->origin_dev); 1507 1508 kfree(ca); 1509 } 1510 1511 static bool at_least_one_arg(struct dm_arg_set *as, char **error) 1512 { 1513 if (!as->argc) { 1514 *error = "Insufficient args"; 1515 return false; 1516 } 1517 1518 return true; 1519 } 1520 1521 static int parse_metadata_dev(struct cache_args *ca, struct dm_arg_set *as, 1522 char **error) 1523 { 1524 int r; 1525 sector_t metadata_dev_size; 1526 char b[BDEVNAME_SIZE]; 1527 1528 if (!at_least_one_arg(as, error)) 1529 return -EINVAL; 1530 1531 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE, 1532 &ca->metadata_dev); 1533 if (r) { 1534 *error = "Error opening metadata device"; 1535 return r; 1536 } 1537 1538 metadata_dev_size = get_dev_size(ca->metadata_dev); 1539 if (metadata_dev_size > DM_CACHE_METADATA_MAX_SECTORS_WARNING) 1540 DMWARN("Metadata device %s is larger than %u sectors: excess space will not be used.", 1541 bdevname(ca->metadata_dev->bdev, b), THIN_METADATA_MAX_SECTORS); 1542 1543 return 0; 1544 } 1545 1546 static int parse_cache_dev(struct cache_args *ca, struct dm_arg_set *as, 1547 char **error) 1548 { 1549 int r; 1550 1551 if (!at_least_one_arg(as, error)) 1552 return -EINVAL; 1553 1554 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE, 1555 &ca->cache_dev); 1556 if (r) { 1557 *error = "Error opening cache device"; 1558 return r; 1559 } 1560 ca->cache_sectors = get_dev_size(ca->cache_dev); 1561 1562 return 0; 1563 } 1564 1565 static int parse_origin_dev(struct cache_args *ca, struct dm_arg_set *as, 1566 char **error) 1567 { 1568 int r; 1569 1570 if (!at_least_one_arg(as, error)) 1571 return -EINVAL; 1572 1573 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE, 1574 &ca->origin_dev); 1575 if (r) { 1576 *error = "Error opening origin device"; 1577 return r; 1578 } 1579 1580 ca->origin_sectors = get_dev_size(ca->origin_dev); 1581 if (ca->ti->len > ca->origin_sectors) { 1582 *error = "Device size larger than cached device"; 1583 return -EINVAL; 1584 } 1585 1586 return 0; 1587 } 1588 1589 static int parse_block_size(struct cache_args *ca, struct dm_arg_set *as, 1590 char **error) 1591 { 1592 unsigned long tmp; 1593 1594 if (!at_least_one_arg(as, error)) 1595 return -EINVAL; 1596 1597 if (kstrtoul(dm_shift_arg(as), 10, &tmp) || !tmp || 1598 tmp < DATA_DEV_BLOCK_SIZE_MIN_SECTORS || 1599 tmp & (DATA_DEV_BLOCK_SIZE_MIN_SECTORS - 1)) { 1600 *error = "Invalid data block size"; 1601 return -EINVAL; 1602 } 1603 1604 if (tmp > ca->cache_sectors) { 1605 *error = "Data block size is larger than the cache device"; 1606 return -EINVAL; 1607 } 1608 1609 ca->block_size = tmp; 1610 1611 return 0; 1612 } 1613 1614 static void init_features(struct cache_features *cf) 1615 { 1616 cf->mode = CM_WRITE; 1617 cf->write_through = false; 1618 } 1619 1620 static int parse_features(struct cache_args *ca, struct dm_arg_set *as, 1621 char **error) 1622 { 1623 static struct dm_arg _args[] = { 1624 {0, 1, "Invalid number of cache feature arguments"}, 1625 }; 1626 1627 int r; 1628 unsigned argc; 1629 const char *arg; 1630 struct cache_features *cf = &ca->features; 1631 1632 init_features(cf); 1633 1634 r = dm_read_arg_group(_args, as, &argc, error); 1635 if (r) 1636 return -EINVAL; 1637 1638 while (argc--) { 1639 arg = dm_shift_arg(as); 1640 1641 if (!strcasecmp(arg, "writeback")) 1642 cf->write_through = false; 1643 1644 else if (!strcasecmp(arg, "writethrough")) 1645 cf->write_through = true; 1646 1647 else { 1648 *error = "Unrecognised cache feature requested"; 1649 return -EINVAL; 1650 } 1651 } 1652 1653 return 0; 1654 } 1655 1656 static int parse_policy(struct cache_args *ca, struct dm_arg_set *as, 1657 char **error) 1658 { 1659 static struct dm_arg _args[] = { 1660 {0, 1024, "Invalid number of policy arguments"}, 1661 }; 1662 1663 int r; 1664 1665 if (!at_least_one_arg(as, error)) 1666 return -EINVAL; 1667 1668 ca->policy_name = dm_shift_arg(as); 1669 1670 r = dm_read_arg_group(_args, as, &ca->policy_argc, error); 1671 if (r) 1672 return -EINVAL; 1673 1674 ca->policy_argv = (const char **)as->argv; 1675 dm_consume_args(as, ca->policy_argc); 1676 1677 return 0; 1678 } 1679 1680 static int parse_cache_args(struct cache_args *ca, int argc, char **argv, 1681 char **error) 1682 { 1683 int r; 1684 struct dm_arg_set as; 1685 1686 as.argc = argc; 1687 as.argv = argv; 1688 1689 r = parse_metadata_dev(ca, &as, error); 1690 if (r) 1691 return r; 1692 1693 r = parse_cache_dev(ca, &as, error); 1694 if (r) 1695 return r; 1696 1697 r = parse_origin_dev(ca, &as, error); 1698 if (r) 1699 return r; 1700 1701 r = parse_block_size(ca, &as, error); 1702 if (r) 1703 return r; 1704 1705 r = parse_features(ca, &as, error); 1706 if (r) 1707 return r; 1708 1709 r = parse_policy(ca, &as, error); 1710 if (r) 1711 return r; 1712 1713 return 0; 1714 } 1715 1716 /*----------------------------------------------------------------*/ 1717 1718 static struct kmem_cache *migration_cache; 1719 1720 static int set_config_values(struct dm_cache_policy *p, int argc, const char **argv) 1721 { 1722 int r = 0; 1723 1724 if (argc & 1) { 1725 DMWARN("Odd number of policy arguments given but they should be <key> <value> pairs."); 1726 return -EINVAL; 1727 } 1728 1729 while (argc) { 1730 r = policy_set_config_value(p, argv[0], argv[1]); 1731 if (r) { 1732 DMWARN("policy_set_config_value failed: key = '%s', value = '%s'", 1733 argv[0], argv[1]); 1734 return r; 1735 } 1736 1737 argc -= 2; 1738 argv += 2; 1739 } 1740 1741 return r; 1742 } 1743 1744 static int create_cache_policy(struct cache *cache, struct cache_args *ca, 1745 char **error) 1746 { 1747 int r; 1748 1749 cache->policy = dm_cache_policy_create(ca->policy_name, 1750 cache->cache_size, 1751 cache->origin_sectors, 1752 cache->sectors_per_block); 1753 if (!cache->policy) { 1754 *error = "Error creating cache's policy"; 1755 return -ENOMEM; 1756 } 1757 1758 r = set_config_values(cache->policy, ca->policy_argc, ca->policy_argv); 1759 if (r) 1760 dm_cache_policy_destroy(cache->policy); 1761 1762 return r; 1763 } 1764 1765 /* 1766 * We want the discard block size to be a power of two, at least the size 1767 * of the cache block size, and have no more than 2^14 discard blocks 1768 * across the origin. 1769 */ 1770 #define MAX_DISCARD_BLOCKS (1 << 14) 1771 1772 static bool too_many_discard_blocks(sector_t discard_block_size, 1773 sector_t origin_size) 1774 { 1775 (void) sector_div(origin_size, discard_block_size); 1776 1777 return origin_size > MAX_DISCARD_BLOCKS; 1778 } 1779 1780 static sector_t calculate_discard_block_size(sector_t cache_block_size, 1781 sector_t origin_size) 1782 { 1783 sector_t discard_block_size; 1784 1785 discard_block_size = roundup_pow_of_two(cache_block_size); 1786 1787 if (origin_size) 1788 while (too_many_discard_blocks(discard_block_size, origin_size)) 1789 discard_block_size *= 2; 1790 1791 return discard_block_size; 1792 } 1793 1794 #define DEFAULT_MIGRATION_THRESHOLD (2048 * 100) 1795 1796 static unsigned cache_num_write_bios(struct dm_target *ti, struct bio *bio); 1797 1798 static int cache_create(struct cache_args *ca, struct cache **result) 1799 { 1800 int r = 0; 1801 char **error = &ca->ti->error; 1802 struct cache *cache; 1803 struct dm_target *ti = ca->ti; 1804 dm_block_t origin_blocks; 1805 struct dm_cache_metadata *cmd; 1806 bool may_format = ca->features.mode == CM_WRITE; 1807 1808 cache = kzalloc(sizeof(*cache), GFP_KERNEL); 1809 if (!cache) 1810 return -ENOMEM; 1811 1812 cache->ti = ca->ti; 1813 ti->private = cache; 1814 ti->per_bio_data_size = sizeof(struct per_bio_data); 1815 ti->num_flush_bios = 2; 1816 ti->flush_supported = true; 1817 1818 ti->num_discard_bios = 1; 1819 ti->discards_supported = true; 1820 ti->discard_zeroes_data_unsupported = true; 1821 1822 memcpy(&cache->features, &ca->features, sizeof(cache->features)); 1823 1824 if (cache->features.write_through) 1825 ti->num_write_bios = cache_num_write_bios; 1826 1827 cache->callbacks.congested_fn = cache_is_congested; 1828 dm_table_add_target_callbacks(ti->table, &cache->callbacks); 1829 1830 cache->metadata_dev = ca->metadata_dev; 1831 cache->origin_dev = ca->origin_dev; 1832 cache->cache_dev = ca->cache_dev; 1833 1834 ca->metadata_dev = ca->origin_dev = ca->cache_dev = NULL; 1835 1836 /* FIXME: factor out this whole section */ 1837 origin_blocks = cache->origin_sectors = ca->origin_sectors; 1838 (void) sector_div(origin_blocks, ca->block_size); 1839 cache->origin_blocks = to_oblock(origin_blocks); 1840 1841 cache->sectors_per_block = ca->block_size; 1842 if (dm_set_target_max_io_len(ti, cache->sectors_per_block)) { 1843 r = -EINVAL; 1844 goto bad; 1845 } 1846 1847 if (ca->block_size & (ca->block_size - 1)) { 1848 dm_block_t cache_size = ca->cache_sectors; 1849 1850 cache->sectors_per_block_shift = -1; 1851 (void) sector_div(cache_size, ca->block_size); 1852 cache->cache_size = to_cblock(cache_size); 1853 } else { 1854 cache->sectors_per_block_shift = __ffs(ca->block_size); 1855 cache->cache_size = to_cblock(ca->cache_sectors >> cache->sectors_per_block_shift); 1856 } 1857 1858 r = create_cache_policy(cache, ca, error); 1859 if (r) 1860 goto bad; 1861 cache->policy_nr_args = ca->policy_argc; 1862 1863 cmd = dm_cache_metadata_open(cache->metadata_dev->bdev, 1864 ca->block_size, may_format, 1865 dm_cache_policy_get_hint_size(cache->policy)); 1866 if (IS_ERR(cmd)) { 1867 *error = "Error creating metadata object"; 1868 r = PTR_ERR(cmd); 1869 goto bad; 1870 } 1871 cache->cmd = cmd; 1872 1873 spin_lock_init(&cache->lock); 1874 bio_list_init(&cache->deferred_bios); 1875 bio_list_init(&cache->deferred_flush_bios); 1876 INIT_LIST_HEAD(&cache->quiesced_migrations); 1877 INIT_LIST_HEAD(&cache->completed_migrations); 1878 INIT_LIST_HEAD(&cache->need_commit_migrations); 1879 cache->migration_threshold = DEFAULT_MIGRATION_THRESHOLD; 1880 atomic_set(&cache->nr_migrations, 0); 1881 init_waitqueue_head(&cache->migration_wait); 1882 1883 cache->nr_dirty = 0; 1884 cache->dirty_bitset = alloc_bitset(from_cblock(cache->cache_size)); 1885 if (!cache->dirty_bitset) { 1886 *error = "could not allocate dirty bitset"; 1887 goto bad; 1888 } 1889 clear_bitset(cache->dirty_bitset, from_cblock(cache->cache_size)); 1890 1891 cache->discard_block_size = 1892 calculate_discard_block_size(cache->sectors_per_block, 1893 cache->origin_sectors); 1894 cache->discard_nr_blocks = oblock_to_dblock(cache, cache->origin_blocks); 1895 cache->discard_bitset = alloc_bitset(from_dblock(cache->discard_nr_blocks)); 1896 if (!cache->discard_bitset) { 1897 *error = "could not allocate discard bitset"; 1898 goto bad; 1899 } 1900 clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks)); 1901 1902 cache->copier = dm_kcopyd_client_create(&dm_kcopyd_throttle); 1903 if (IS_ERR(cache->copier)) { 1904 *error = "could not create kcopyd client"; 1905 r = PTR_ERR(cache->copier); 1906 goto bad; 1907 } 1908 1909 cache->wq = alloc_ordered_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM); 1910 if (!cache->wq) { 1911 *error = "could not create workqueue for metadata object"; 1912 goto bad; 1913 } 1914 INIT_WORK(&cache->worker, do_worker); 1915 INIT_DELAYED_WORK(&cache->waker, do_waker); 1916 cache->last_commit_jiffies = jiffies; 1917 1918 cache->prison = dm_bio_prison_create(PRISON_CELLS); 1919 if (!cache->prison) { 1920 *error = "could not create bio prison"; 1921 goto bad; 1922 } 1923 1924 cache->all_io_ds = dm_deferred_set_create(); 1925 if (!cache->all_io_ds) { 1926 *error = "could not create all_io deferred set"; 1927 goto bad; 1928 } 1929 1930 cache->migration_pool = mempool_create_slab_pool(MIGRATION_POOL_SIZE, 1931 migration_cache); 1932 if (!cache->migration_pool) { 1933 *error = "Error creating cache's migration mempool"; 1934 goto bad; 1935 } 1936 1937 cache->next_migration = NULL; 1938 1939 cache->need_tick_bio = true; 1940 cache->sized = false; 1941 cache->quiescing = false; 1942 cache->commit_requested = false; 1943 cache->loaded_mappings = false; 1944 cache->loaded_discards = false; 1945 1946 load_stats(cache); 1947 1948 atomic_set(&cache->stats.demotion, 0); 1949 atomic_set(&cache->stats.promotion, 0); 1950 atomic_set(&cache->stats.copies_avoided, 0); 1951 atomic_set(&cache->stats.cache_cell_clash, 0); 1952 atomic_set(&cache->stats.commit_count, 0); 1953 atomic_set(&cache->stats.discard_count, 0); 1954 1955 *result = cache; 1956 return 0; 1957 1958 bad: 1959 destroy(cache); 1960 return r; 1961 } 1962 1963 static int copy_ctr_args(struct cache *cache, int argc, const char **argv) 1964 { 1965 unsigned i; 1966 const char **copy; 1967 1968 copy = kcalloc(argc, sizeof(*copy), GFP_KERNEL); 1969 if (!copy) 1970 return -ENOMEM; 1971 for (i = 0; i < argc; i++) { 1972 copy[i] = kstrdup(argv[i], GFP_KERNEL); 1973 if (!copy[i]) { 1974 while (i--) 1975 kfree(copy[i]); 1976 kfree(copy); 1977 return -ENOMEM; 1978 } 1979 } 1980 1981 cache->nr_ctr_args = argc; 1982 cache->ctr_args = copy; 1983 1984 return 0; 1985 } 1986 1987 static int cache_ctr(struct dm_target *ti, unsigned argc, char **argv) 1988 { 1989 int r = -EINVAL; 1990 struct cache_args *ca; 1991 struct cache *cache = NULL; 1992 1993 ca = kzalloc(sizeof(*ca), GFP_KERNEL); 1994 if (!ca) { 1995 ti->error = "Error allocating memory for cache"; 1996 return -ENOMEM; 1997 } 1998 ca->ti = ti; 1999 2000 r = parse_cache_args(ca, argc, argv, &ti->error); 2001 if (r) 2002 goto out; 2003 2004 r = cache_create(ca, &cache); 2005 2006 r = copy_ctr_args(cache, argc - 3, (const char **)argv + 3); 2007 if (r) { 2008 destroy(cache); 2009 goto out; 2010 } 2011 2012 ti->private = cache; 2013 2014 out: 2015 destroy_cache_args(ca); 2016 return r; 2017 } 2018 2019 static unsigned cache_num_write_bios(struct dm_target *ti, struct bio *bio) 2020 { 2021 int r; 2022 struct cache *cache = ti->private; 2023 dm_oblock_t block = get_bio_block(cache, bio); 2024 dm_cblock_t cblock; 2025 2026 r = policy_lookup(cache->policy, block, &cblock); 2027 if (r < 0) 2028 return 2; /* assume the worst */ 2029 2030 return (!r && !is_dirty(cache, cblock)) ? 2 : 1; 2031 } 2032 2033 static int cache_map(struct dm_target *ti, struct bio *bio) 2034 { 2035 struct cache *cache = ti->private; 2036 2037 int r; 2038 dm_oblock_t block = get_bio_block(cache, bio); 2039 bool can_migrate = false; 2040 bool discarded_block; 2041 struct dm_bio_prison_cell *cell; 2042 struct policy_result lookup_result; 2043 struct per_bio_data *pb; 2044 2045 if (from_oblock(block) > from_oblock(cache->origin_blocks)) { 2046 /* 2047 * This can only occur if the io goes to a partial block at 2048 * the end of the origin device. We don't cache these. 2049 * Just remap to the origin and carry on. 2050 */ 2051 remap_to_origin_clear_discard(cache, bio, block); 2052 return DM_MAPIO_REMAPPED; 2053 } 2054 2055 pb = init_per_bio_data(bio); 2056 2057 if (bio->bi_rw & (REQ_FLUSH | REQ_FUA | REQ_DISCARD)) { 2058 defer_bio(cache, bio); 2059 return DM_MAPIO_SUBMITTED; 2060 } 2061 2062 /* 2063 * Check to see if that block is currently migrating. 2064 */ 2065 cell = alloc_prison_cell(cache); 2066 if (!cell) { 2067 defer_bio(cache, bio); 2068 return DM_MAPIO_SUBMITTED; 2069 } 2070 2071 r = bio_detain(cache, block, bio, cell, 2072 (cell_free_fn) free_prison_cell, 2073 cache, &cell); 2074 if (r) { 2075 if (r < 0) 2076 defer_bio(cache, bio); 2077 2078 return DM_MAPIO_SUBMITTED; 2079 } 2080 2081 discarded_block = is_discarded_oblock(cache, block); 2082 2083 r = policy_map(cache->policy, block, false, can_migrate, discarded_block, 2084 bio, &lookup_result); 2085 if (r == -EWOULDBLOCK) { 2086 cell_defer(cache, cell, true); 2087 return DM_MAPIO_SUBMITTED; 2088 2089 } else if (r) { 2090 DMERR_LIMIT("Unexpected return from cache replacement policy: %d", r); 2091 bio_io_error(bio); 2092 return DM_MAPIO_SUBMITTED; 2093 } 2094 2095 switch (lookup_result.op) { 2096 case POLICY_HIT: 2097 inc_hit_counter(cache, bio); 2098 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); 2099 2100 if (is_writethrough_io(cache, bio, lookup_result.cblock)) { 2101 /* 2102 * No need to mark anything dirty in write through mode. 2103 */ 2104 pb->req_nr == 0 ? 2105 remap_to_cache(cache, bio, lookup_result.cblock) : 2106 remap_to_origin_clear_discard(cache, bio, block); 2107 cell_defer(cache, cell, false); 2108 } else { 2109 remap_to_cache_dirty(cache, bio, block, lookup_result.cblock); 2110 cell_defer(cache, cell, false); 2111 } 2112 break; 2113 2114 case POLICY_MISS: 2115 inc_miss_counter(cache, bio); 2116 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); 2117 2118 if (pb->req_nr != 0) { 2119 /* 2120 * This is a duplicate writethrough io that is no 2121 * longer needed because the block has been demoted. 2122 */ 2123 bio_endio(bio, 0); 2124 cell_defer(cache, cell, false); 2125 return DM_MAPIO_SUBMITTED; 2126 } else { 2127 remap_to_origin_clear_discard(cache, bio, block); 2128 cell_defer(cache, cell, false); 2129 } 2130 break; 2131 2132 default: 2133 DMERR_LIMIT("%s: erroring bio: unknown policy op: %u", __func__, 2134 (unsigned) lookup_result.op); 2135 bio_io_error(bio); 2136 return DM_MAPIO_SUBMITTED; 2137 } 2138 2139 return DM_MAPIO_REMAPPED; 2140 } 2141 2142 static int cache_end_io(struct dm_target *ti, struct bio *bio, int error) 2143 { 2144 struct cache *cache = ti->private; 2145 unsigned long flags; 2146 struct per_bio_data *pb = get_per_bio_data(bio); 2147 2148 if (pb->tick) { 2149 policy_tick(cache->policy); 2150 2151 spin_lock_irqsave(&cache->lock, flags); 2152 cache->need_tick_bio = true; 2153 spin_unlock_irqrestore(&cache->lock, flags); 2154 } 2155 2156 check_for_quiesced_migrations(cache, pb); 2157 2158 return 0; 2159 } 2160 2161 static int write_dirty_bitset(struct cache *cache) 2162 { 2163 unsigned i, r; 2164 2165 for (i = 0; i < from_cblock(cache->cache_size); i++) { 2166 r = dm_cache_set_dirty(cache->cmd, to_cblock(i), 2167 is_dirty(cache, to_cblock(i))); 2168 if (r) 2169 return r; 2170 } 2171 2172 return 0; 2173 } 2174 2175 static int write_discard_bitset(struct cache *cache) 2176 { 2177 unsigned i, r; 2178 2179 r = dm_cache_discard_bitset_resize(cache->cmd, cache->discard_block_size, 2180 cache->discard_nr_blocks); 2181 if (r) { 2182 DMERR("could not resize on-disk discard bitset"); 2183 return r; 2184 } 2185 2186 for (i = 0; i < from_dblock(cache->discard_nr_blocks); i++) { 2187 r = dm_cache_set_discard(cache->cmd, to_dblock(i), 2188 is_discarded(cache, to_dblock(i))); 2189 if (r) 2190 return r; 2191 } 2192 2193 return 0; 2194 } 2195 2196 static int save_hint(void *context, dm_cblock_t cblock, dm_oblock_t oblock, 2197 uint32_t hint) 2198 { 2199 struct cache *cache = context; 2200 return dm_cache_save_hint(cache->cmd, cblock, hint); 2201 } 2202 2203 static int write_hints(struct cache *cache) 2204 { 2205 int r; 2206 2207 r = dm_cache_begin_hints(cache->cmd, cache->policy); 2208 if (r) { 2209 DMERR("dm_cache_begin_hints failed"); 2210 return r; 2211 } 2212 2213 r = policy_walk_mappings(cache->policy, save_hint, cache); 2214 if (r) 2215 DMERR("policy_walk_mappings failed"); 2216 2217 return r; 2218 } 2219 2220 /* 2221 * returns true on success 2222 */ 2223 static bool sync_metadata(struct cache *cache) 2224 { 2225 int r1, r2, r3, r4; 2226 2227 r1 = write_dirty_bitset(cache); 2228 if (r1) 2229 DMERR("could not write dirty bitset"); 2230 2231 r2 = write_discard_bitset(cache); 2232 if (r2) 2233 DMERR("could not write discard bitset"); 2234 2235 save_stats(cache); 2236 2237 r3 = write_hints(cache); 2238 if (r3) 2239 DMERR("could not write hints"); 2240 2241 /* 2242 * If writing the above metadata failed, we still commit, but don't 2243 * set the clean shutdown flag. This will effectively force every 2244 * dirty bit to be set on reload. 2245 */ 2246 r4 = dm_cache_commit(cache->cmd, !r1 && !r2 && !r3); 2247 if (r4) 2248 DMERR("could not write cache metadata. Data loss may occur."); 2249 2250 return !r1 && !r2 && !r3 && !r4; 2251 } 2252 2253 static void cache_postsuspend(struct dm_target *ti) 2254 { 2255 struct cache *cache = ti->private; 2256 2257 start_quiescing(cache); 2258 wait_for_migrations(cache); 2259 stop_worker(cache); 2260 requeue_deferred_io(cache); 2261 stop_quiescing(cache); 2262 2263 (void) sync_metadata(cache); 2264 } 2265 2266 static int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock, 2267 bool dirty, uint32_t hint, bool hint_valid) 2268 { 2269 int r; 2270 struct cache *cache = context; 2271 2272 r = policy_load_mapping(cache->policy, oblock, cblock, hint, hint_valid); 2273 if (r) 2274 return r; 2275 2276 if (dirty) 2277 set_dirty(cache, oblock, cblock); 2278 else 2279 clear_dirty(cache, oblock, cblock); 2280 2281 return 0; 2282 } 2283 2284 static int load_discard(void *context, sector_t discard_block_size, 2285 dm_dblock_t dblock, bool discard) 2286 { 2287 struct cache *cache = context; 2288 2289 /* FIXME: handle mis-matched block size */ 2290 2291 if (discard) 2292 set_discard(cache, dblock); 2293 else 2294 clear_discard(cache, dblock); 2295 2296 return 0; 2297 } 2298 2299 static int cache_preresume(struct dm_target *ti) 2300 { 2301 int r = 0; 2302 struct cache *cache = ti->private; 2303 sector_t actual_cache_size = get_dev_size(cache->cache_dev); 2304 (void) sector_div(actual_cache_size, cache->sectors_per_block); 2305 2306 /* 2307 * Check to see if the cache has resized. 2308 */ 2309 if (from_cblock(cache->cache_size) != actual_cache_size || !cache->sized) { 2310 cache->cache_size = to_cblock(actual_cache_size); 2311 2312 r = dm_cache_resize(cache->cmd, cache->cache_size); 2313 if (r) { 2314 DMERR("could not resize cache metadata"); 2315 return r; 2316 } 2317 2318 cache->sized = true; 2319 } 2320 2321 if (!cache->loaded_mappings) { 2322 r = dm_cache_load_mappings(cache->cmd, 2323 dm_cache_policy_get_name(cache->policy), 2324 load_mapping, cache); 2325 if (r) { 2326 DMERR("could not load cache mappings"); 2327 return r; 2328 } 2329 2330 cache->loaded_mappings = true; 2331 } 2332 2333 if (!cache->loaded_discards) { 2334 r = dm_cache_load_discards(cache->cmd, load_discard, cache); 2335 if (r) { 2336 DMERR("could not load origin discards"); 2337 return r; 2338 } 2339 2340 cache->loaded_discards = true; 2341 } 2342 2343 return r; 2344 } 2345 2346 static void cache_resume(struct dm_target *ti) 2347 { 2348 struct cache *cache = ti->private; 2349 2350 cache->need_tick_bio = true; 2351 do_waker(&cache->waker.work); 2352 } 2353 2354 /* 2355 * Status format: 2356 * 2357 * <#used metadata blocks>/<#total metadata blocks> 2358 * <#read hits> <#read misses> <#write hits> <#write misses> 2359 * <#demotions> <#promotions> <#blocks in cache> <#dirty> 2360 * <#features> <features>* 2361 * <#core args> <core args> 2362 * <#policy args> <policy args>* 2363 */ 2364 static void cache_status(struct dm_target *ti, status_type_t type, 2365 unsigned status_flags, char *result, unsigned maxlen) 2366 { 2367 int r = 0; 2368 unsigned i; 2369 ssize_t sz = 0; 2370 dm_block_t nr_free_blocks_metadata = 0; 2371 dm_block_t nr_blocks_metadata = 0; 2372 char buf[BDEVNAME_SIZE]; 2373 struct cache *cache = ti->private; 2374 dm_cblock_t residency; 2375 2376 switch (type) { 2377 case STATUSTYPE_INFO: 2378 /* Commit to ensure statistics aren't out-of-date */ 2379 if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti)) { 2380 r = dm_cache_commit(cache->cmd, false); 2381 if (r) 2382 DMERR("could not commit metadata for accurate status"); 2383 } 2384 2385 r = dm_cache_get_free_metadata_block_count(cache->cmd, 2386 &nr_free_blocks_metadata); 2387 if (r) { 2388 DMERR("could not get metadata free block count"); 2389 goto err; 2390 } 2391 2392 r = dm_cache_get_metadata_dev_size(cache->cmd, &nr_blocks_metadata); 2393 if (r) { 2394 DMERR("could not get metadata device size"); 2395 goto err; 2396 } 2397 2398 residency = policy_residency(cache->policy); 2399 2400 DMEMIT("%llu/%llu %u %u %u %u %u %u %llu %u ", 2401 (unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata), 2402 (unsigned long long)nr_blocks_metadata, 2403 (unsigned) atomic_read(&cache->stats.read_hit), 2404 (unsigned) atomic_read(&cache->stats.read_miss), 2405 (unsigned) atomic_read(&cache->stats.write_hit), 2406 (unsigned) atomic_read(&cache->stats.write_miss), 2407 (unsigned) atomic_read(&cache->stats.demotion), 2408 (unsigned) atomic_read(&cache->stats.promotion), 2409 (unsigned long long) from_cblock(residency), 2410 cache->nr_dirty); 2411 2412 if (cache->features.write_through) 2413 DMEMIT("1 writethrough "); 2414 else 2415 DMEMIT("0 "); 2416 2417 DMEMIT("2 migration_threshold %llu ", (unsigned long long) cache->migration_threshold); 2418 if (sz < maxlen) { 2419 r = policy_emit_config_values(cache->policy, result + sz, maxlen - sz); 2420 if (r) 2421 DMERR("policy_emit_config_values returned %d", r); 2422 } 2423 2424 break; 2425 2426 case STATUSTYPE_TABLE: 2427 format_dev_t(buf, cache->metadata_dev->bdev->bd_dev); 2428 DMEMIT("%s ", buf); 2429 format_dev_t(buf, cache->cache_dev->bdev->bd_dev); 2430 DMEMIT("%s ", buf); 2431 format_dev_t(buf, cache->origin_dev->bdev->bd_dev); 2432 DMEMIT("%s", buf); 2433 2434 for (i = 0; i < cache->nr_ctr_args - 1; i++) 2435 DMEMIT(" %s", cache->ctr_args[i]); 2436 if (cache->nr_ctr_args) 2437 DMEMIT(" %s", cache->ctr_args[cache->nr_ctr_args - 1]); 2438 } 2439 2440 return; 2441 2442 err: 2443 DMEMIT("Error"); 2444 } 2445 2446 #define NOT_CORE_OPTION 1 2447 2448 static int process_config_option(struct cache *cache, char **argv) 2449 { 2450 unsigned long tmp; 2451 2452 if (!strcasecmp(argv[0], "migration_threshold")) { 2453 if (kstrtoul(argv[1], 10, &tmp)) 2454 return -EINVAL; 2455 2456 cache->migration_threshold = tmp; 2457 return 0; 2458 } 2459 2460 return NOT_CORE_OPTION; 2461 } 2462 2463 /* 2464 * Supports <key> <value>. 2465 * 2466 * The key migration_threshold is supported by the cache target core. 2467 */ 2468 static int cache_message(struct dm_target *ti, unsigned argc, char **argv) 2469 { 2470 int r; 2471 struct cache *cache = ti->private; 2472 2473 if (argc != 2) 2474 return -EINVAL; 2475 2476 r = process_config_option(cache, argv); 2477 if (r == NOT_CORE_OPTION) 2478 return policy_set_config_value(cache->policy, argv[0], argv[1]); 2479 2480 return r; 2481 } 2482 2483 static int cache_iterate_devices(struct dm_target *ti, 2484 iterate_devices_callout_fn fn, void *data) 2485 { 2486 int r = 0; 2487 struct cache *cache = ti->private; 2488 2489 r = fn(ti, cache->cache_dev, 0, get_dev_size(cache->cache_dev), data); 2490 if (!r) 2491 r = fn(ti, cache->origin_dev, 0, ti->len, data); 2492 2493 return r; 2494 } 2495 2496 /* 2497 * We assume I/O is going to the origin (which is the volume 2498 * more likely to have restrictions e.g. by being striped). 2499 * (Looking up the exact location of the data would be expensive 2500 * and could always be out of date by the time the bio is submitted.) 2501 */ 2502 static int cache_bvec_merge(struct dm_target *ti, 2503 struct bvec_merge_data *bvm, 2504 struct bio_vec *biovec, int max_size) 2505 { 2506 struct cache *cache = ti->private; 2507 struct request_queue *q = bdev_get_queue(cache->origin_dev->bdev); 2508 2509 if (!q->merge_bvec_fn) 2510 return max_size; 2511 2512 bvm->bi_bdev = cache->origin_dev->bdev; 2513 return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); 2514 } 2515 2516 static void set_discard_limits(struct cache *cache, struct queue_limits *limits) 2517 { 2518 /* 2519 * FIXME: these limits may be incompatible with the cache device 2520 */ 2521 limits->max_discard_sectors = cache->discard_block_size * 1024; 2522 limits->discard_granularity = cache->discard_block_size << SECTOR_SHIFT; 2523 } 2524 2525 static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits) 2526 { 2527 struct cache *cache = ti->private; 2528 2529 blk_limits_io_min(limits, 0); 2530 blk_limits_io_opt(limits, cache->sectors_per_block << SECTOR_SHIFT); 2531 set_discard_limits(cache, limits); 2532 } 2533 2534 /*----------------------------------------------------------------*/ 2535 2536 static struct target_type cache_target = { 2537 .name = "cache", 2538 .version = {1, 0, 0}, 2539 .module = THIS_MODULE, 2540 .ctr = cache_ctr, 2541 .dtr = cache_dtr, 2542 .map = cache_map, 2543 .end_io = cache_end_io, 2544 .postsuspend = cache_postsuspend, 2545 .preresume = cache_preresume, 2546 .resume = cache_resume, 2547 .status = cache_status, 2548 .message = cache_message, 2549 .iterate_devices = cache_iterate_devices, 2550 .merge = cache_bvec_merge, 2551 .io_hints = cache_io_hints, 2552 }; 2553 2554 static int __init dm_cache_init(void) 2555 { 2556 int r; 2557 2558 r = dm_register_target(&cache_target); 2559 if (r) { 2560 DMERR("cache target registration failed: %d", r); 2561 return r; 2562 } 2563 2564 migration_cache = KMEM_CACHE(dm_cache_migration, 0); 2565 if (!migration_cache) { 2566 dm_unregister_target(&cache_target); 2567 return -ENOMEM; 2568 } 2569 2570 return 0; 2571 } 2572 2573 static void __exit dm_cache_exit(void) 2574 { 2575 dm_unregister_target(&cache_target); 2576 kmem_cache_destroy(migration_cache); 2577 } 2578 2579 module_init(dm_cache_init); 2580 module_exit(dm_cache_exit); 2581 2582 MODULE_DESCRIPTION(DM_NAME " cache target"); 2583 MODULE_AUTHOR("Joe Thornber <ejt@redhat.com>"); 2584 MODULE_LICENSE("GPL"); 2585