1 /* 2 * Copyright (C) 2012 Red Hat. All rights reserved. 3 * 4 * This file is released under the GPL. 5 */ 6 7 #include "dm.h" 8 #include "dm-bio-prison.h" 9 #include "dm-bio-record.h" 10 #include "dm-cache-metadata.h" 11 12 #include <linux/dm-io.h> 13 #include <linux/dm-kcopyd.h> 14 #include <linux/init.h> 15 #include <linux/mempool.h> 16 #include <linux/module.h> 17 #include <linux/slab.h> 18 #include <linux/vmalloc.h> 19 20 #define DM_MSG_PREFIX "cache" 21 22 DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(cache_copy_throttle, 23 "A percentage of time allocated for copying to and/or from cache"); 24 25 /*----------------------------------------------------------------*/ 26 27 /* 28 * Glossary: 29 * 30 * oblock: index of an origin block 31 * cblock: index of a cache block 32 * promotion: movement of a block from origin to cache 33 * demotion: movement of a block from cache to origin 34 * migration: movement of a block between the origin and cache device, 35 * either direction 36 */ 37 38 /*----------------------------------------------------------------*/ 39 40 static size_t bitset_size_in_bytes(unsigned nr_entries) 41 { 42 return sizeof(unsigned long) * dm_div_up(nr_entries, BITS_PER_LONG); 43 } 44 45 static unsigned long *alloc_bitset(unsigned nr_entries) 46 { 47 size_t s = bitset_size_in_bytes(nr_entries); 48 return vzalloc(s); 49 } 50 51 static void clear_bitset(void *bitset, unsigned nr_entries) 52 { 53 size_t s = bitset_size_in_bytes(nr_entries); 54 memset(bitset, 0, s); 55 } 56 57 static void free_bitset(unsigned long *bits) 58 { 59 vfree(bits); 60 } 61 62 /*----------------------------------------------------------------*/ 63 64 /* 65 * There are a couple of places where we let a bio run, but want to do some 66 * work before calling its endio function. We do this by temporarily 67 * changing the endio fn. 68 */ 69 struct dm_hook_info { 70 bio_end_io_t *bi_end_io; 71 void *bi_private; 72 }; 73 74 static void dm_hook_bio(struct dm_hook_info *h, struct bio *bio, 75 bio_end_io_t *bi_end_io, void *bi_private) 76 { 77 h->bi_end_io = bio->bi_end_io; 78 h->bi_private = bio->bi_private; 79 80 bio->bi_end_io = bi_end_io; 81 bio->bi_private = bi_private; 82 } 83 84 static void dm_unhook_bio(struct dm_hook_info *h, struct bio *bio) 85 { 86 bio->bi_end_io = h->bi_end_io; 87 bio->bi_private = h->bi_private; 88 89 /* 90 * Must bump bi_remaining to allow bio to complete with 91 * restored bi_end_io. 92 */ 93 atomic_inc(&bio->bi_remaining); 94 } 95 96 /*----------------------------------------------------------------*/ 97 98 #define PRISON_CELLS 1024 99 #define MIGRATION_POOL_SIZE 128 100 #define COMMIT_PERIOD HZ 101 #define MIGRATION_COUNT_WINDOW 10 102 103 /* 104 * The block size of the device holding cache data must be 105 * between 32KB and 1GB. 106 */ 107 #define DATA_DEV_BLOCK_SIZE_MIN_SECTORS (32 * 1024 >> SECTOR_SHIFT) 108 #define DATA_DEV_BLOCK_SIZE_MAX_SECTORS (1024 * 1024 * 1024 >> SECTOR_SHIFT) 109 110 /* 111 * FIXME: the cache is read/write for the time being. 112 */ 113 enum cache_metadata_mode { 114 CM_WRITE, /* metadata may be changed */ 115 CM_READ_ONLY, /* metadata may not be changed */ 116 }; 117 118 enum cache_io_mode { 119 /* 120 * Data is written to cached blocks only. These blocks are marked 121 * dirty. If you lose the cache device you will lose data. 122 * Potential performance increase for both reads and writes. 123 */ 124 CM_IO_WRITEBACK, 125 126 /* 127 * Data is written to both cache and origin. Blocks are never 128 * dirty. Potential performance benfit for reads only. 129 */ 130 CM_IO_WRITETHROUGH, 131 132 /* 133 * A degraded mode useful for various cache coherency situations 134 * (eg, rolling back snapshots). Reads and writes always go to the 135 * origin. If a write goes to a cached oblock, then the cache 136 * block is invalidated. 137 */ 138 CM_IO_PASSTHROUGH 139 }; 140 141 struct cache_features { 142 enum cache_metadata_mode mode; 143 enum cache_io_mode io_mode; 144 }; 145 146 struct cache_stats { 147 atomic_t read_hit; 148 atomic_t read_miss; 149 atomic_t write_hit; 150 atomic_t write_miss; 151 atomic_t demotion; 152 atomic_t promotion; 153 atomic_t copies_avoided; 154 atomic_t cache_cell_clash; 155 atomic_t commit_count; 156 atomic_t discard_count; 157 }; 158 159 /* 160 * Defines a range of cblocks, begin to (end - 1) are in the range. end is 161 * the one-past-the-end value. 162 */ 163 struct cblock_range { 164 dm_cblock_t begin; 165 dm_cblock_t end; 166 }; 167 168 struct invalidation_request { 169 struct list_head list; 170 struct cblock_range *cblocks; 171 172 atomic_t complete; 173 int err; 174 175 wait_queue_head_t result_wait; 176 }; 177 178 struct cache { 179 struct dm_target *ti; 180 struct dm_target_callbacks callbacks; 181 182 struct dm_cache_metadata *cmd; 183 184 /* 185 * Metadata is written to this device. 186 */ 187 struct dm_dev *metadata_dev; 188 189 /* 190 * The slower of the two data devices. Typically a spindle. 191 */ 192 struct dm_dev *origin_dev; 193 194 /* 195 * The faster of the two data devices. Typically an SSD. 196 */ 197 struct dm_dev *cache_dev; 198 199 /* 200 * Size of the origin device in _complete_ blocks and native sectors. 201 */ 202 dm_oblock_t origin_blocks; 203 sector_t origin_sectors; 204 205 /* 206 * Size of the cache device in blocks. 207 */ 208 dm_cblock_t cache_size; 209 210 /* 211 * Fields for converting from sectors to blocks. 212 */ 213 uint32_t sectors_per_block; 214 int sectors_per_block_shift; 215 216 spinlock_t lock; 217 struct bio_list deferred_bios; 218 struct bio_list deferred_flush_bios; 219 struct bio_list deferred_writethrough_bios; 220 struct list_head quiesced_migrations; 221 struct list_head completed_migrations; 222 struct list_head need_commit_migrations; 223 sector_t migration_threshold; 224 wait_queue_head_t migration_wait; 225 atomic_t nr_migrations; 226 227 wait_queue_head_t quiescing_wait; 228 atomic_t quiescing; 229 atomic_t quiescing_ack; 230 231 /* 232 * cache_size entries, dirty if set 233 */ 234 dm_cblock_t nr_dirty; 235 unsigned long *dirty_bitset; 236 237 /* 238 * origin_blocks entries, discarded if set. 239 */ 240 dm_dblock_t discard_nr_blocks; 241 unsigned long *discard_bitset; 242 uint32_t discard_block_size; /* a power of 2 times sectors per block */ 243 244 /* 245 * Rather than reconstructing the table line for the status we just 246 * save it and regurgitate. 247 */ 248 unsigned nr_ctr_args; 249 const char **ctr_args; 250 251 struct dm_kcopyd_client *copier; 252 struct workqueue_struct *wq; 253 struct work_struct worker; 254 255 struct delayed_work waker; 256 unsigned long last_commit_jiffies; 257 258 struct dm_bio_prison *prison; 259 struct dm_deferred_set *all_io_ds; 260 261 mempool_t *migration_pool; 262 struct dm_cache_migration *next_migration; 263 264 struct dm_cache_policy *policy; 265 unsigned policy_nr_args; 266 267 bool need_tick_bio:1; 268 bool sized:1; 269 bool invalidate:1; 270 bool commit_requested:1; 271 bool loaded_mappings:1; 272 bool loaded_discards:1; 273 274 /* 275 * Cache features such as write-through. 276 */ 277 struct cache_features features; 278 279 struct cache_stats stats; 280 281 /* 282 * Invalidation fields. 283 */ 284 spinlock_t invalidation_lock; 285 struct list_head invalidation_requests; 286 }; 287 288 struct per_bio_data { 289 bool tick:1; 290 unsigned req_nr:2; 291 struct dm_deferred_entry *all_io_entry; 292 293 /* 294 * writethrough fields. These MUST remain at the end of this 295 * structure and the 'cache' member must be the first as it 296 * is used to determine the offset of the writethrough fields. 297 */ 298 struct cache *cache; 299 dm_cblock_t cblock; 300 struct dm_hook_info hook_info; 301 struct dm_bio_details bio_details; 302 }; 303 304 struct dm_cache_migration { 305 struct list_head list; 306 struct cache *cache; 307 308 unsigned long start_jiffies; 309 dm_oblock_t old_oblock; 310 dm_oblock_t new_oblock; 311 dm_cblock_t cblock; 312 313 bool err:1; 314 bool writeback:1; 315 bool demote:1; 316 bool promote:1; 317 bool requeue_holder:1; 318 bool invalidate:1; 319 320 struct dm_bio_prison_cell *old_ocell; 321 struct dm_bio_prison_cell *new_ocell; 322 }; 323 324 /* 325 * Processing a bio in the worker thread may require these memory 326 * allocations. We prealloc to avoid deadlocks (the same worker thread 327 * frees them back to the mempool). 328 */ 329 struct prealloc { 330 struct dm_cache_migration *mg; 331 struct dm_bio_prison_cell *cell1; 332 struct dm_bio_prison_cell *cell2; 333 }; 334 335 static void wake_worker(struct cache *cache) 336 { 337 queue_work(cache->wq, &cache->worker); 338 } 339 340 /*----------------------------------------------------------------*/ 341 342 static struct dm_bio_prison_cell *alloc_prison_cell(struct cache *cache) 343 { 344 /* FIXME: change to use a local slab. */ 345 return dm_bio_prison_alloc_cell(cache->prison, GFP_NOWAIT); 346 } 347 348 static void free_prison_cell(struct cache *cache, struct dm_bio_prison_cell *cell) 349 { 350 dm_bio_prison_free_cell(cache->prison, cell); 351 } 352 353 static int prealloc_data_structs(struct cache *cache, struct prealloc *p) 354 { 355 if (!p->mg) { 356 p->mg = mempool_alloc(cache->migration_pool, GFP_NOWAIT); 357 if (!p->mg) 358 return -ENOMEM; 359 } 360 361 if (!p->cell1) { 362 p->cell1 = alloc_prison_cell(cache); 363 if (!p->cell1) 364 return -ENOMEM; 365 } 366 367 if (!p->cell2) { 368 p->cell2 = alloc_prison_cell(cache); 369 if (!p->cell2) 370 return -ENOMEM; 371 } 372 373 return 0; 374 } 375 376 static void prealloc_free_structs(struct cache *cache, struct prealloc *p) 377 { 378 if (p->cell2) 379 free_prison_cell(cache, p->cell2); 380 381 if (p->cell1) 382 free_prison_cell(cache, p->cell1); 383 384 if (p->mg) 385 mempool_free(p->mg, cache->migration_pool); 386 } 387 388 static struct dm_cache_migration *prealloc_get_migration(struct prealloc *p) 389 { 390 struct dm_cache_migration *mg = p->mg; 391 392 BUG_ON(!mg); 393 p->mg = NULL; 394 395 return mg; 396 } 397 398 /* 399 * You must have a cell within the prealloc struct to return. If not this 400 * function will BUG() rather than returning NULL. 401 */ 402 static struct dm_bio_prison_cell *prealloc_get_cell(struct prealloc *p) 403 { 404 struct dm_bio_prison_cell *r = NULL; 405 406 if (p->cell1) { 407 r = p->cell1; 408 p->cell1 = NULL; 409 410 } else if (p->cell2) { 411 r = p->cell2; 412 p->cell2 = NULL; 413 } else 414 BUG(); 415 416 return r; 417 } 418 419 /* 420 * You can't have more than two cells in a prealloc struct. BUG() will be 421 * called if you try and overfill. 422 */ 423 static void prealloc_put_cell(struct prealloc *p, struct dm_bio_prison_cell *cell) 424 { 425 if (!p->cell2) 426 p->cell2 = cell; 427 428 else if (!p->cell1) 429 p->cell1 = cell; 430 431 else 432 BUG(); 433 } 434 435 /*----------------------------------------------------------------*/ 436 437 static void build_key(dm_oblock_t oblock, struct dm_cell_key *key) 438 { 439 key->virtual = 0; 440 key->dev = 0; 441 key->block = from_oblock(oblock); 442 } 443 444 /* 445 * The caller hands in a preallocated cell, and a free function for it. 446 * The cell will be freed if there's an error, or if it wasn't used because 447 * a cell with that key already exists. 448 */ 449 typedef void (*cell_free_fn)(void *context, struct dm_bio_prison_cell *cell); 450 451 static int bio_detain(struct cache *cache, dm_oblock_t oblock, 452 struct bio *bio, struct dm_bio_prison_cell *cell_prealloc, 453 cell_free_fn free_fn, void *free_context, 454 struct dm_bio_prison_cell **cell_result) 455 { 456 int r; 457 struct dm_cell_key key; 458 459 build_key(oblock, &key); 460 r = dm_bio_detain(cache->prison, &key, bio, cell_prealloc, cell_result); 461 if (r) 462 free_fn(free_context, cell_prealloc); 463 464 return r; 465 } 466 467 static int get_cell(struct cache *cache, 468 dm_oblock_t oblock, 469 struct prealloc *structs, 470 struct dm_bio_prison_cell **cell_result) 471 { 472 int r; 473 struct dm_cell_key key; 474 struct dm_bio_prison_cell *cell_prealloc; 475 476 cell_prealloc = prealloc_get_cell(structs); 477 478 build_key(oblock, &key); 479 r = dm_get_cell(cache->prison, &key, cell_prealloc, cell_result); 480 if (r) 481 prealloc_put_cell(structs, cell_prealloc); 482 483 return r; 484 } 485 486 /*----------------------------------------------------------------*/ 487 488 static bool is_dirty(struct cache *cache, dm_cblock_t b) 489 { 490 return test_bit(from_cblock(b), cache->dirty_bitset); 491 } 492 493 static void set_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cblock) 494 { 495 if (!test_and_set_bit(from_cblock(cblock), cache->dirty_bitset)) { 496 cache->nr_dirty = to_cblock(from_cblock(cache->nr_dirty) + 1); 497 policy_set_dirty(cache->policy, oblock); 498 } 499 } 500 501 static void clear_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cblock) 502 { 503 if (test_and_clear_bit(from_cblock(cblock), cache->dirty_bitset)) { 504 policy_clear_dirty(cache->policy, oblock); 505 cache->nr_dirty = to_cblock(from_cblock(cache->nr_dirty) - 1); 506 if (!from_cblock(cache->nr_dirty)) 507 dm_table_event(cache->ti->table); 508 } 509 } 510 511 /*----------------------------------------------------------------*/ 512 513 static bool block_size_is_power_of_two(struct cache *cache) 514 { 515 return cache->sectors_per_block_shift >= 0; 516 } 517 518 /* gcc on ARM generates spurious references to __udivdi3 and __umoddi3 */ 519 #if defined(CONFIG_ARM) && __GNUC__ == 4 && __GNUC_MINOR__ <= 6 520 __always_inline 521 #endif 522 static dm_block_t block_div(dm_block_t b, uint32_t n) 523 { 524 do_div(b, n); 525 526 return b; 527 } 528 529 static dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock) 530 { 531 uint32_t discard_blocks = cache->discard_block_size; 532 dm_block_t b = from_oblock(oblock); 533 534 if (!block_size_is_power_of_two(cache)) 535 discard_blocks = discard_blocks / cache->sectors_per_block; 536 else 537 discard_blocks >>= cache->sectors_per_block_shift; 538 539 b = block_div(b, discard_blocks); 540 541 return to_dblock(b); 542 } 543 544 static void set_discard(struct cache *cache, dm_dblock_t b) 545 { 546 unsigned long flags; 547 548 atomic_inc(&cache->stats.discard_count); 549 550 spin_lock_irqsave(&cache->lock, flags); 551 set_bit(from_dblock(b), cache->discard_bitset); 552 spin_unlock_irqrestore(&cache->lock, flags); 553 } 554 555 static void clear_discard(struct cache *cache, dm_dblock_t b) 556 { 557 unsigned long flags; 558 559 spin_lock_irqsave(&cache->lock, flags); 560 clear_bit(from_dblock(b), cache->discard_bitset); 561 spin_unlock_irqrestore(&cache->lock, flags); 562 } 563 564 static bool is_discarded(struct cache *cache, dm_dblock_t b) 565 { 566 int r; 567 unsigned long flags; 568 569 spin_lock_irqsave(&cache->lock, flags); 570 r = test_bit(from_dblock(b), cache->discard_bitset); 571 spin_unlock_irqrestore(&cache->lock, flags); 572 573 return r; 574 } 575 576 static bool is_discarded_oblock(struct cache *cache, dm_oblock_t b) 577 { 578 int r; 579 unsigned long flags; 580 581 spin_lock_irqsave(&cache->lock, flags); 582 r = test_bit(from_dblock(oblock_to_dblock(cache, b)), 583 cache->discard_bitset); 584 spin_unlock_irqrestore(&cache->lock, flags); 585 586 return r; 587 } 588 589 /*----------------------------------------------------------------*/ 590 591 static void load_stats(struct cache *cache) 592 { 593 struct dm_cache_statistics stats; 594 595 dm_cache_metadata_get_stats(cache->cmd, &stats); 596 atomic_set(&cache->stats.read_hit, stats.read_hits); 597 atomic_set(&cache->stats.read_miss, stats.read_misses); 598 atomic_set(&cache->stats.write_hit, stats.write_hits); 599 atomic_set(&cache->stats.write_miss, stats.write_misses); 600 } 601 602 static void save_stats(struct cache *cache) 603 { 604 struct dm_cache_statistics stats; 605 606 stats.read_hits = atomic_read(&cache->stats.read_hit); 607 stats.read_misses = atomic_read(&cache->stats.read_miss); 608 stats.write_hits = atomic_read(&cache->stats.write_hit); 609 stats.write_misses = atomic_read(&cache->stats.write_miss); 610 611 dm_cache_metadata_set_stats(cache->cmd, &stats); 612 } 613 614 /*---------------------------------------------------------------- 615 * Per bio data 616 *--------------------------------------------------------------*/ 617 618 /* 619 * If using writeback, leave out struct per_bio_data's writethrough fields. 620 */ 621 #define PB_DATA_SIZE_WB (offsetof(struct per_bio_data, cache)) 622 #define PB_DATA_SIZE_WT (sizeof(struct per_bio_data)) 623 624 static bool writethrough_mode(struct cache_features *f) 625 { 626 return f->io_mode == CM_IO_WRITETHROUGH; 627 } 628 629 static bool writeback_mode(struct cache_features *f) 630 { 631 return f->io_mode == CM_IO_WRITEBACK; 632 } 633 634 static bool passthrough_mode(struct cache_features *f) 635 { 636 return f->io_mode == CM_IO_PASSTHROUGH; 637 } 638 639 static size_t get_per_bio_data_size(struct cache *cache) 640 { 641 return writethrough_mode(&cache->features) ? PB_DATA_SIZE_WT : PB_DATA_SIZE_WB; 642 } 643 644 static struct per_bio_data *get_per_bio_data(struct bio *bio, size_t data_size) 645 { 646 struct per_bio_data *pb = dm_per_bio_data(bio, data_size); 647 BUG_ON(!pb); 648 return pb; 649 } 650 651 static struct per_bio_data *init_per_bio_data(struct bio *bio, size_t data_size) 652 { 653 struct per_bio_data *pb = get_per_bio_data(bio, data_size); 654 655 pb->tick = false; 656 pb->req_nr = dm_bio_get_target_bio_nr(bio); 657 pb->all_io_entry = NULL; 658 659 return pb; 660 } 661 662 /*---------------------------------------------------------------- 663 * Remapping 664 *--------------------------------------------------------------*/ 665 static void remap_to_origin(struct cache *cache, struct bio *bio) 666 { 667 bio->bi_bdev = cache->origin_dev->bdev; 668 } 669 670 static void remap_to_cache(struct cache *cache, struct bio *bio, 671 dm_cblock_t cblock) 672 { 673 sector_t bi_sector = bio->bi_iter.bi_sector; 674 675 bio->bi_bdev = cache->cache_dev->bdev; 676 if (!block_size_is_power_of_two(cache)) 677 bio->bi_iter.bi_sector = 678 (from_cblock(cblock) * cache->sectors_per_block) + 679 sector_div(bi_sector, cache->sectors_per_block); 680 else 681 bio->bi_iter.bi_sector = 682 (from_cblock(cblock) << cache->sectors_per_block_shift) | 683 (bi_sector & (cache->sectors_per_block - 1)); 684 } 685 686 static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio) 687 { 688 unsigned long flags; 689 size_t pb_data_size = get_per_bio_data_size(cache); 690 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); 691 692 spin_lock_irqsave(&cache->lock, flags); 693 if (cache->need_tick_bio && 694 !(bio->bi_rw & (REQ_FUA | REQ_FLUSH | REQ_DISCARD))) { 695 pb->tick = true; 696 cache->need_tick_bio = false; 697 } 698 spin_unlock_irqrestore(&cache->lock, flags); 699 } 700 701 static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio, 702 dm_oblock_t oblock) 703 { 704 check_if_tick_bio_needed(cache, bio); 705 remap_to_origin(cache, bio); 706 if (bio_data_dir(bio) == WRITE) 707 clear_discard(cache, oblock_to_dblock(cache, oblock)); 708 } 709 710 static void remap_to_cache_dirty(struct cache *cache, struct bio *bio, 711 dm_oblock_t oblock, dm_cblock_t cblock) 712 { 713 check_if_tick_bio_needed(cache, bio); 714 remap_to_cache(cache, bio, cblock); 715 if (bio_data_dir(bio) == WRITE) { 716 set_dirty(cache, oblock, cblock); 717 clear_discard(cache, oblock_to_dblock(cache, oblock)); 718 } 719 } 720 721 static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio) 722 { 723 sector_t block_nr = bio->bi_iter.bi_sector; 724 725 if (!block_size_is_power_of_two(cache)) 726 (void) sector_div(block_nr, cache->sectors_per_block); 727 else 728 block_nr >>= cache->sectors_per_block_shift; 729 730 return to_oblock(block_nr); 731 } 732 733 static int bio_triggers_commit(struct cache *cache, struct bio *bio) 734 { 735 return bio->bi_rw & (REQ_FLUSH | REQ_FUA); 736 } 737 738 static void issue(struct cache *cache, struct bio *bio) 739 { 740 unsigned long flags; 741 742 if (!bio_triggers_commit(cache, bio)) { 743 generic_make_request(bio); 744 return; 745 } 746 747 /* 748 * Batch together any bios that trigger commits and then issue a 749 * single commit for them in do_worker(). 750 */ 751 spin_lock_irqsave(&cache->lock, flags); 752 cache->commit_requested = true; 753 bio_list_add(&cache->deferred_flush_bios, bio); 754 spin_unlock_irqrestore(&cache->lock, flags); 755 } 756 757 static void defer_writethrough_bio(struct cache *cache, struct bio *bio) 758 { 759 unsigned long flags; 760 761 spin_lock_irqsave(&cache->lock, flags); 762 bio_list_add(&cache->deferred_writethrough_bios, bio); 763 spin_unlock_irqrestore(&cache->lock, flags); 764 765 wake_worker(cache); 766 } 767 768 static void writethrough_endio(struct bio *bio, int err) 769 { 770 struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT); 771 772 dm_unhook_bio(&pb->hook_info, bio); 773 774 if (err) { 775 bio_endio(bio, err); 776 return; 777 } 778 779 dm_bio_restore(&pb->bio_details, bio); 780 remap_to_cache(pb->cache, bio, pb->cblock); 781 782 /* 783 * We can't issue this bio directly, since we're in interrupt 784 * context. So it gets put on a bio list for processing by the 785 * worker thread. 786 */ 787 defer_writethrough_bio(pb->cache, bio); 788 } 789 790 /* 791 * When running in writethrough mode we need to send writes to clean blocks 792 * to both the cache and origin devices. In future we'd like to clone the 793 * bio and send them in parallel, but for now we're doing them in 794 * series as this is easier. 795 */ 796 static void remap_to_origin_then_cache(struct cache *cache, struct bio *bio, 797 dm_oblock_t oblock, dm_cblock_t cblock) 798 { 799 struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT); 800 801 pb->cache = cache; 802 pb->cblock = cblock; 803 dm_hook_bio(&pb->hook_info, bio, writethrough_endio, NULL); 804 dm_bio_record(&pb->bio_details, bio); 805 806 remap_to_origin_clear_discard(pb->cache, bio, oblock); 807 } 808 809 /*---------------------------------------------------------------- 810 * Migration processing 811 * 812 * Migration covers moving data from the origin device to the cache, or 813 * vice versa. 814 *--------------------------------------------------------------*/ 815 static void free_migration(struct dm_cache_migration *mg) 816 { 817 mempool_free(mg, mg->cache->migration_pool); 818 } 819 820 static void inc_nr_migrations(struct cache *cache) 821 { 822 atomic_inc(&cache->nr_migrations); 823 } 824 825 static void dec_nr_migrations(struct cache *cache) 826 { 827 atomic_dec(&cache->nr_migrations); 828 829 /* 830 * Wake the worker in case we're suspending the target. 831 */ 832 wake_up(&cache->migration_wait); 833 } 834 835 static void __cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell, 836 bool holder) 837 { 838 (holder ? dm_cell_release : dm_cell_release_no_holder) 839 (cache->prison, cell, &cache->deferred_bios); 840 free_prison_cell(cache, cell); 841 } 842 843 static void cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell, 844 bool holder) 845 { 846 unsigned long flags; 847 848 spin_lock_irqsave(&cache->lock, flags); 849 __cell_defer(cache, cell, holder); 850 spin_unlock_irqrestore(&cache->lock, flags); 851 852 wake_worker(cache); 853 } 854 855 static void cleanup_migration(struct dm_cache_migration *mg) 856 { 857 struct cache *cache = mg->cache; 858 free_migration(mg); 859 dec_nr_migrations(cache); 860 } 861 862 static void migration_failure(struct dm_cache_migration *mg) 863 { 864 struct cache *cache = mg->cache; 865 866 if (mg->writeback) { 867 DMWARN_LIMIT("writeback failed; couldn't copy block"); 868 set_dirty(cache, mg->old_oblock, mg->cblock); 869 cell_defer(cache, mg->old_ocell, false); 870 871 } else if (mg->demote) { 872 DMWARN_LIMIT("demotion failed; couldn't copy block"); 873 policy_force_mapping(cache->policy, mg->new_oblock, mg->old_oblock); 874 875 cell_defer(cache, mg->old_ocell, mg->promote ? false : true); 876 if (mg->promote) 877 cell_defer(cache, mg->new_ocell, true); 878 } else { 879 DMWARN_LIMIT("promotion failed; couldn't copy block"); 880 policy_remove_mapping(cache->policy, mg->new_oblock); 881 cell_defer(cache, mg->new_ocell, true); 882 } 883 884 cleanup_migration(mg); 885 } 886 887 static void migration_success_pre_commit(struct dm_cache_migration *mg) 888 { 889 unsigned long flags; 890 struct cache *cache = mg->cache; 891 892 if (mg->writeback) { 893 cell_defer(cache, mg->old_ocell, false); 894 clear_dirty(cache, mg->old_oblock, mg->cblock); 895 cleanup_migration(mg); 896 return; 897 898 } else if (mg->demote) { 899 if (dm_cache_remove_mapping(cache->cmd, mg->cblock)) { 900 DMWARN_LIMIT("demotion failed; couldn't update on disk metadata"); 901 policy_force_mapping(cache->policy, mg->new_oblock, 902 mg->old_oblock); 903 if (mg->promote) 904 cell_defer(cache, mg->new_ocell, true); 905 cleanup_migration(mg); 906 return; 907 } 908 } else { 909 if (dm_cache_insert_mapping(cache->cmd, mg->cblock, mg->new_oblock)) { 910 DMWARN_LIMIT("promotion failed; couldn't update on disk metadata"); 911 policy_remove_mapping(cache->policy, mg->new_oblock); 912 cleanup_migration(mg); 913 return; 914 } 915 } 916 917 spin_lock_irqsave(&cache->lock, flags); 918 list_add_tail(&mg->list, &cache->need_commit_migrations); 919 cache->commit_requested = true; 920 spin_unlock_irqrestore(&cache->lock, flags); 921 } 922 923 static void migration_success_post_commit(struct dm_cache_migration *mg) 924 { 925 unsigned long flags; 926 struct cache *cache = mg->cache; 927 928 if (mg->writeback) { 929 DMWARN("writeback unexpectedly triggered commit"); 930 return; 931 932 } else if (mg->demote) { 933 cell_defer(cache, mg->old_ocell, mg->promote ? false : true); 934 935 if (mg->promote) { 936 mg->demote = false; 937 938 spin_lock_irqsave(&cache->lock, flags); 939 list_add_tail(&mg->list, &cache->quiesced_migrations); 940 spin_unlock_irqrestore(&cache->lock, flags); 941 942 } else { 943 if (mg->invalidate) 944 policy_remove_mapping(cache->policy, mg->old_oblock); 945 cleanup_migration(mg); 946 } 947 948 } else { 949 if (mg->requeue_holder) 950 cell_defer(cache, mg->new_ocell, true); 951 else { 952 bio_endio(mg->new_ocell->holder, 0); 953 cell_defer(cache, mg->new_ocell, false); 954 } 955 clear_dirty(cache, mg->new_oblock, mg->cblock); 956 cleanup_migration(mg); 957 } 958 } 959 960 static void copy_complete(int read_err, unsigned long write_err, void *context) 961 { 962 unsigned long flags; 963 struct dm_cache_migration *mg = (struct dm_cache_migration *) context; 964 struct cache *cache = mg->cache; 965 966 if (read_err || write_err) 967 mg->err = true; 968 969 spin_lock_irqsave(&cache->lock, flags); 970 list_add_tail(&mg->list, &cache->completed_migrations); 971 spin_unlock_irqrestore(&cache->lock, flags); 972 973 wake_worker(cache); 974 } 975 976 static void issue_copy_real(struct dm_cache_migration *mg) 977 { 978 int r; 979 struct dm_io_region o_region, c_region; 980 struct cache *cache = mg->cache; 981 982 o_region.bdev = cache->origin_dev->bdev; 983 o_region.count = cache->sectors_per_block; 984 985 c_region.bdev = cache->cache_dev->bdev; 986 c_region.sector = from_cblock(mg->cblock) * cache->sectors_per_block; 987 c_region.count = cache->sectors_per_block; 988 989 if (mg->writeback || mg->demote) { 990 /* demote */ 991 o_region.sector = from_oblock(mg->old_oblock) * cache->sectors_per_block; 992 r = dm_kcopyd_copy(cache->copier, &c_region, 1, &o_region, 0, copy_complete, mg); 993 } else { 994 /* promote */ 995 o_region.sector = from_oblock(mg->new_oblock) * cache->sectors_per_block; 996 r = dm_kcopyd_copy(cache->copier, &o_region, 1, &c_region, 0, copy_complete, mg); 997 } 998 999 if (r < 0) { 1000 DMERR_LIMIT("issuing migration failed"); 1001 migration_failure(mg); 1002 } 1003 } 1004 1005 static void overwrite_endio(struct bio *bio, int err) 1006 { 1007 struct dm_cache_migration *mg = bio->bi_private; 1008 struct cache *cache = mg->cache; 1009 size_t pb_data_size = get_per_bio_data_size(cache); 1010 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); 1011 unsigned long flags; 1012 1013 if (err) 1014 mg->err = true; 1015 1016 spin_lock_irqsave(&cache->lock, flags); 1017 list_add_tail(&mg->list, &cache->completed_migrations); 1018 dm_unhook_bio(&pb->hook_info, bio); 1019 mg->requeue_holder = false; 1020 spin_unlock_irqrestore(&cache->lock, flags); 1021 1022 wake_worker(cache); 1023 } 1024 1025 static void issue_overwrite(struct dm_cache_migration *mg, struct bio *bio) 1026 { 1027 size_t pb_data_size = get_per_bio_data_size(mg->cache); 1028 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); 1029 1030 dm_hook_bio(&pb->hook_info, bio, overwrite_endio, mg); 1031 remap_to_cache_dirty(mg->cache, bio, mg->new_oblock, mg->cblock); 1032 generic_make_request(bio); 1033 } 1034 1035 static bool bio_writes_complete_block(struct cache *cache, struct bio *bio) 1036 { 1037 return (bio_data_dir(bio) == WRITE) && 1038 (bio->bi_iter.bi_size == (cache->sectors_per_block << SECTOR_SHIFT)); 1039 } 1040 1041 static void avoid_copy(struct dm_cache_migration *mg) 1042 { 1043 atomic_inc(&mg->cache->stats.copies_avoided); 1044 migration_success_pre_commit(mg); 1045 } 1046 1047 static void issue_copy(struct dm_cache_migration *mg) 1048 { 1049 bool avoid; 1050 struct cache *cache = mg->cache; 1051 1052 if (mg->writeback || mg->demote) 1053 avoid = !is_dirty(cache, mg->cblock) || 1054 is_discarded_oblock(cache, mg->old_oblock); 1055 else { 1056 struct bio *bio = mg->new_ocell->holder; 1057 1058 avoid = is_discarded_oblock(cache, mg->new_oblock); 1059 1060 if (!avoid && bio_writes_complete_block(cache, bio)) { 1061 issue_overwrite(mg, bio); 1062 return; 1063 } 1064 } 1065 1066 avoid ? avoid_copy(mg) : issue_copy_real(mg); 1067 } 1068 1069 static void complete_migration(struct dm_cache_migration *mg) 1070 { 1071 if (mg->err) 1072 migration_failure(mg); 1073 else 1074 migration_success_pre_commit(mg); 1075 } 1076 1077 static void process_migrations(struct cache *cache, struct list_head *head, 1078 void (*fn)(struct dm_cache_migration *)) 1079 { 1080 unsigned long flags; 1081 struct list_head list; 1082 struct dm_cache_migration *mg, *tmp; 1083 1084 INIT_LIST_HEAD(&list); 1085 spin_lock_irqsave(&cache->lock, flags); 1086 list_splice_init(head, &list); 1087 spin_unlock_irqrestore(&cache->lock, flags); 1088 1089 list_for_each_entry_safe(mg, tmp, &list, list) 1090 fn(mg); 1091 } 1092 1093 static void __queue_quiesced_migration(struct dm_cache_migration *mg) 1094 { 1095 list_add_tail(&mg->list, &mg->cache->quiesced_migrations); 1096 } 1097 1098 static void queue_quiesced_migration(struct dm_cache_migration *mg) 1099 { 1100 unsigned long flags; 1101 struct cache *cache = mg->cache; 1102 1103 spin_lock_irqsave(&cache->lock, flags); 1104 __queue_quiesced_migration(mg); 1105 spin_unlock_irqrestore(&cache->lock, flags); 1106 1107 wake_worker(cache); 1108 } 1109 1110 static void queue_quiesced_migrations(struct cache *cache, struct list_head *work) 1111 { 1112 unsigned long flags; 1113 struct dm_cache_migration *mg, *tmp; 1114 1115 spin_lock_irqsave(&cache->lock, flags); 1116 list_for_each_entry_safe(mg, tmp, work, list) 1117 __queue_quiesced_migration(mg); 1118 spin_unlock_irqrestore(&cache->lock, flags); 1119 1120 wake_worker(cache); 1121 } 1122 1123 static void check_for_quiesced_migrations(struct cache *cache, 1124 struct per_bio_data *pb) 1125 { 1126 struct list_head work; 1127 1128 if (!pb->all_io_entry) 1129 return; 1130 1131 INIT_LIST_HEAD(&work); 1132 if (pb->all_io_entry) 1133 dm_deferred_entry_dec(pb->all_io_entry, &work); 1134 1135 if (!list_empty(&work)) 1136 queue_quiesced_migrations(cache, &work); 1137 } 1138 1139 static void quiesce_migration(struct dm_cache_migration *mg) 1140 { 1141 if (!dm_deferred_set_add_work(mg->cache->all_io_ds, &mg->list)) 1142 queue_quiesced_migration(mg); 1143 } 1144 1145 static void promote(struct cache *cache, struct prealloc *structs, 1146 dm_oblock_t oblock, dm_cblock_t cblock, 1147 struct dm_bio_prison_cell *cell) 1148 { 1149 struct dm_cache_migration *mg = prealloc_get_migration(structs); 1150 1151 mg->err = false; 1152 mg->writeback = false; 1153 mg->demote = false; 1154 mg->promote = true; 1155 mg->requeue_holder = true; 1156 mg->invalidate = false; 1157 mg->cache = cache; 1158 mg->new_oblock = oblock; 1159 mg->cblock = cblock; 1160 mg->old_ocell = NULL; 1161 mg->new_ocell = cell; 1162 mg->start_jiffies = jiffies; 1163 1164 inc_nr_migrations(cache); 1165 quiesce_migration(mg); 1166 } 1167 1168 static void writeback(struct cache *cache, struct prealloc *structs, 1169 dm_oblock_t oblock, dm_cblock_t cblock, 1170 struct dm_bio_prison_cell *cell) 1171 { 1172 struct dm_cache_migration *mg = prealloc_get_migration(structs); 1173 1174 mg->err = false; 1175 mg->writeback = true; 1176 mg->demote = false; 1177 mg->promote = false; 1178 mg->requeue_holder = true; 1179 mg->invalidate = false; 1180 mg->cache = cache; 1181 mg->old_oblock = oblock; 1182 mg->cblock = cblock; 1183 mg->old_ocell = cell; 1184 mg->new_ocell = NULL; 1185 mg->start_jiffies = jiffies; 1186 1187 inc_nr_migrations(cache); 1188 quiesce_migration(mg); 1189 } 1190 1191 static void demote_then_promote(struct cache *cache, struct prealloc *structs, 1192 dm_oblock_t old_oblock, dm_oblock_t new_oblock, 1193 dm_cblock_t cblock, 1194 struct dm_bio_prison_cell *old_ocell, 1195 struct dm_bio_prison_cell *new_ocell) 1196 { 1197 struct dm_cache_migration *mg = prealloc_get_migration(structs); 1198 1199 mg->err = false; 1200 mg->writeback = false; 1201 mg->demote = true; 1202 mg->promote = true; 1203 mg->requeue_holder = true; 1204 mg->invalidate = false; 1205 mg->cache = cache; 1206 mg->old_oblock = old_oblock; 1207 mg->new_oblock = new_oblock; 1208 mg->cblock = cblock; 1209 mg->old_ocell = old_ocell; 1210 mg->new_ocell = new_ocell; 1211 mg->start_jiffies = jiffies; 1212 1213 inc_nr_migrations(cache); 1214 quiesce_migration(mg); 1215 } 1216 1217 /* 1218 * Invalidate a cache entry. No writeback occurs; any changes in the cache 1219 * block are thrown away. 1220 */ 1221 static void invalidate(struct cache *cache, struct prealloc *structs, 1222 dm_oblock_t oblock, dm_cblock_t cblock, 1223 struct dm_bio_prison_cell *cell) 1224 { 1225 struct dm_cache_migration *mg = prealloc_get_migration(structs); 1226 1227 mg->err = false; 1228 mg->writeback = false; 1229 mg->demote = true; 1230 mg->promote = false; 1231 mg->requeue_holder = true; 1232 mg->invalidate = true; 1233 mg->cache = cache; 1234 mg->old_oblock = oblock; 1235 mg->cblock = cblock; 1236 mg->old_ocell = cell; 1237 mg->new_ocell = NULL; 1238 mg->start_jiffies = jiffies; 1239 1240 inc_nr_migrations(cache); 1241 quiesce_migration(mg); 1242 } 1243 1244 /*---------------------------------------------------------------- 1245 * bio processing 1246 *--------------------------------------------------------------*/ 1247 static void defer_bio(struct cache *cache, struct bio *bio) 1248 { 1249 unsigned long flags; 1250 1251 spin_lock_irqsave(&cache->lock, flags); 1252 bio_list_add(&cache->deferred_bios, bio); 1253 spin_unlock_irqrestore(&cache->lock, flags); 1254 1255 wake_worker(cache); 1256 } 1257 1258 static void process_flush_bio(struct cache *cache, struct bio *bio) 1259 { 1260 size_t pb_data_size = get_per_bio_data_size(cache); 1261 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); 1262 1263 BUG_ON(bio->bi_iter.bi_size); 1264 if (!pb->req_nr) 1265 remap_to_origin(cache, bio); 1266 else 1267 remap_to_cache(cache, bio, 0); 1268 1269 issue(cache, bio); 1270 } 1271 1272 /* 1273 * People generally discard large parts of a device, eg, the whole device 1274 * when formatting. Splitting these large discards up into cache block 1275 * sized ios and then quiescing (always neccessary for discard) takes too 1276 * long. 1277 * 1278 * We keep it simple, and allow any size of discard to come in, and just 1279 * mark off blocks on the discard bitset. No passdown occurs! 1280 * 1281 * To implement passdown we need to change the bio_prison such that a cell 1282 * can have a key that spans many blocks. 1283 */ 1284 static void process_discard_bio(struct cache *cache, struct bio *bio) 1285 { 1286 dm_block_t start_block = dm_sector_div_up(bio->bi_iter.bi_sector, 1287 cache->discard_block_size); 1288 dm_block_t end_block = bio_end_sector(bio); 1289 dm_block_t b; 1290 1291 end_block = block_div(end_block, cache->discard_block_size); 1292 1293 for (b = start_block; b < end_block; b++) 1294 set_discard(cache, to_dblock(b)); 1295 1296 bio_endio(bio, 0); 1297 } 1298 1299 static bool spare_migration_bandwidth(struct cache *cache) 1300 { 1301 sector_t current_volume = (atomic_read(&cache->nr_migrations) + 1) * 1302 cache->sectors_per_block; 1303 return current_volume < cache->migration_threshold; 1304 } 1305 1306 static void inc_hit_counter(struct cache *cache, struct bio *bio) 1307 { 1308 atomic_inc(bio_data_dir(bio) == READ ? 1309 &cache->stats.read_hit : &cache->stats.write_hit); 1310 } 1311 1312 static void inc_miss_counter(struct cache *cache, struct bio *bio) 1313 { 1314 atomic_inc(bio_data_dir(bio) == READ ? 1315 &cache->stats.read_miss : &cache->stats.write_miss); 1316 } 1317 1318 static void issue_cache_bio(struct cache *cache, struct bio *bio, 1319 struct per_bio_data *pb, 1320 dm_oblock_t oblock, dm_cblock_t cblock) 1321 { 1322 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); 1323 remap_to_cache_dirty(cache, bio, oblock, cblock); 1324 issue(cache, bio); 1325 } 1326 1327 static void process_bio(struct cache *cache, struct prealloc *structs, 1328 struct bio *bio) 1329 { 1330 int r; 1331 bool release_cell = true; 1332 dm_oblock_t block = get_bio_block(cache, bio); 1333 struct dm_bio_prison_cell *cell_prealloc, *old_ocell, *new_ocell; 1334 struct policy_result lookup_result; 1335 size_t pb_data_size = get_per_bio_data_size(cache); 1336 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); 1337 bool discarded_block = is_discarded_oblock(cache, block); 1338 bool passthrough = passthrough_mode(&cache->features); 1339 bool can_migrate = !passthrough && (discarded_block || spare_migration_bandwidth(cache)); 1340 1341 /* 1342 * Check to see if that block is currently migrating. 1343 */ 1344 cell_prealloc = prealloc_get_cell(structs); 1345 r = bio_detain(cache, block, bio, cell_prealloc, 1346 (cell_free_fn) prealloc_put_cell, 1347 structs, &new_ocell); 1348 if (r > 0) 1349 return; 1350 1351 r = policy_map(cache->policy, block, true, can_migrate, discarded_block, 1352 bio, &lookup_result); 1353 1354 if (r == -EWOULDBLOCK) 1355 /* migration has been denied */ 1356 lookup_result.op = POLICY_MISS; 1357 1358 switch (lookup_result.op) { 1359 case POLICY_HIT: 1360 if (passthrough) { 1361 inc_miss_counter(cache, bio); 1362 1363 /* 1364 * Passthrough always maps to the origin, 1365 * invalidating any cache blocks that are written 1366 * to. 1367 */ 1368 1369 if (bio_data_dir(bio) == WRITE) { 1370 atomic_inc(&cache->stats.demotion); 1371 invalidate(cache, structs, block, lookup_result.cblock, new_ocell); 1372 release_cell = false; 1373 1374 } else { 1375 /* FIXME: factor out issue_origin() */ 1376 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); 1377 remap_to_origin_clear_discard(cache, bio, block); 1378 issue(cache, bio); 1379 } 1380 } else { 1381 inc_hit_counter(cache, bio); 1382 1383 if (bio_data_dir(bio) == WRITE && 1384 writethrough_mode(&cache->features) && 1385 !is_dirty(cache, lookup_result.cblock)) { 1386 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); 1387 remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock); 1388 issue(cache, bio); 1389 } else 1390 issue_cache_bio(cache, bio, pb, block, lookup_result.cblock); 1391 } 1392 1393 break; 1394 1395 case POLICY_MISS: 1396 inc_miss_counter(cache, bio); 1397 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); 1398 remap_to_origin_clear_discard(cache, bio, block); 1399 issue(cache, bio); 1400 break; 1401 1402 case POLICY_NEW: 1403 atomic_inc(&cache->stats.promotion); 1404 promote(cache, structs, block, lookup_result.cblock, new_ocell); 1405 release_cell = false; 1406 break; 1407 1408 case POLICY_REPLACE: 1409 cell_prealloc = prealloc_get_cell(structs); 1410 r = bio_detain(cache, lookup_result.old_oblock, bio, cell_prealloc, 1411 (cell_free_fn) prealloc_put_cell, 1412 structs, &old_ocell); 1413 if (r > 0) { 1414 /* 1415 * We have to be careful to avoid lock inversion of 1416 * the cells. So we back off, and wait for the 1417 * old_ocell to become free. 1418 */ 1419 policy_force_mapping(cache->policy, block, 1420 lookup_result.old_oblock); 1421 atomic_inc(&cache->stats.cache_cell_clash); 1422 break; 1423 } 1424 atomic_inc(&cache->stats.demotion); 1425 atomic_inc(&cache->stats.promotion); 1426 1427 demote_then_promote(cache, structs, lookup_result.old_oblock, 1428 block, lookup_result.cblock, 1429 old_ocell, new_ocell); 1430 release_cell = false; 1431 break; 1432 1433 default: 1434 DMERR_LIMIT("%s: erroring bio, unknown policy op: %u", __func__, 1435 (unsigned) lookup_result.op); 1436 bio_io_error(bio); 1437 } 1438 1439 if (release_cell) 1440 cell_defer(cache, new_ocell, false); 1441 } 1442 1443 static int need_commit_due_to_time(struct cache *cache) 1444 { 1445 return jiffies < cache->last_commit_jiffies || 1446 jiffies > cache->last_commit_jiffies + COMMIT_PERIOD; 1447 } 1448 1449 static int commit_if_needed(struct cache *cache) 1450 { 1451 int r = 0; 1452 1453 if ((cache->commit_requested || need_commit_due_to_time(cache)) && 1454 dm_cache_changed_this_transaction(cache->cmd)) { 1455 atomic_inc(&cache->stats.commit_count); 1456 cache->commit_requested = false; 1457 r = dm_cache_commit(cache->cmd, false); 1458 cache->last_commit_jiffies = jiffies; 1459 } 1460 1461 return r; 1462 } 1463 1464 static void process_deferred_bios(struct cache *cache) 1465 { 1466 unsigned long flags; 1467 struct bio_list bios; 1468 struct bio *bio; 1469 struct prealloc structs; 1470 1471 memset(&structs, 0, sizeof(structs)); 1472 bio_list_init(&bios); 1473 1474 spin_lock_irqsave(&cache->lock, flags); 1475 bio_list_merge(&bios, &cache->deferred_bios); 1476 bio_list_init(&cache->deferred_bios); 1477 spin_unlock_irqrestore(&cache->lock, flags); 1478 1479 while (!bio_list_empty(&bios)) { 1480 /* 1481 * If we've got no free migration structs, and processing 1482 * this bio might require one, we pause until there are some 1483 * prepared mappings to process. 1484 */ 1485 if (prealloc_data_structs(cache, &structs)) { 1486 spin_lock_irqsave(&cache->lock, flags); 1487 bio_list_merge(&cache->deferred_bios, &bios); 1488 spin_unlock_irqrestore(&cache->lock, flags); 1489 break; 1490 } 1491 1492 bio = bio_list_pop(&bios); 1493 1494 if (bio->bi_rw & REQ_FLUSH) 1495 process_flush_bio(cache, bio); 1496 else if (bio->bi_rw & REQ_DISCARD) 1497 process_discard_bio(cache, bio); 1498 else 1499 process_bio(cache, &structs, bio); 1500 } 1501 1502 prealloc_free_structs(cache, &structs); 1503 } 1504 1505 static void process_deferred_flush_bios(struct cache *cache, bool submit_bios) 1506 { 1507 unsigned long flags; 1508 struct bio_list bios; 1509 struct bio *bio; 1510 1511 bio_list_init(&bios); 1512 1513 spin_lock_irqsave(&cache->lock, flags); 1514 bio_list_merge(&bios, &cache->deferred_flush_bios); 1515 bio_list_init(&cache->deferred_flush_bios); 1516 spin_unlock_irqrestore(&cache->lock, flags); 1517 1518 while ((bio = bio_list_pop(&bios))) 1519 submit_bios ? generic_make_request(bio) : bio_io_error(bio); 1520 } 1521 1522 static void process_deferred_writethrough_bios(struct cache *cache) 1523 { 1524 unsigned long flags; 1525 struct bio_list bios; 1526 struct bio *bio; 1527 1528 bio_list_init(&bios); 1529 1530 spin_lock_irqsave(&cache->lock, flags); 1531 bio_list_merge(&bios, &cache->deferred_writethrough_bios); 1532 bio_list_init(&cache->deferred_writethrough_bios); 1533 spin_unlock_irqrestore(&cache->lock, flags); 1534 1535 while ((bio = bio_list_pop(&bios))) 1536 generic_make_request(bio); 1537 } 1538 1539 static void writeback_some_dirty_blocks(struct cache *cache) 1540 { 1541 int r = 0; 1542 dm_oblock_t oblock; 1543 dm_cblock_t cblock; 1544 struct prealloc structs; 1545 struct dm_bio_prison_cell *old_ocell; 1546 1547 memset(&structs, 0, sizeof(structs)); 1548 1549 while (spare_migration_bandwidth(cache)) { 1550 if (prealloc_data_structs(cache, &structs)) 1551 break; 1552 1553 r = policy_writeback_work(cache->policy, &oblock, &cblock); 1554 if (r) 1555 break; 1556 1557 r = get_cell(cache, oblock, &structs, &old_ocell); 1558 if (r) { 1559 policy_set_dirty(cache->policy, oblock); 1560 break; 1561 } 1562 1563 writeback(cache, &structs, oblock, cblock, old_ocell); 1564 } 1565 1566 prealloc_free_structs(cache, &structs); 1567 } 1568 1569 /*---------------------------------------------------------------- 1570 * Invalidations. 1571 * Dropping something from the cache *without* writing back. 1572 *--------------------------------------------------------------*/ 1573 1574 static void process_invalidation_request(struct cache *cache, struct invalidation_request *req) 1575 { 1576 int r = 0; 1577 uint64_t begin = from_cblock(req->cblocks->begin); 1578 uint64_t end = from_cblock(req->cblocks->end); 1579 1580 while (begin != end) { 1581 r = policy_remove_cblock(cache->policy, to_cblock(begin)); 1582 if (!r) { 1583 r = dm_cache_remove_mapping(cache->cmd, to_cblock(begin)); 1584 if (r) 1585 break; 1586 1587 } else if (r == -ENODATA) { 1588 /* harmless, already unmapped */ 1589 r = 0; 1590 1591 } else { 1592 DMERR("policy_remove_cblock failed"); 1593 break; 1594 } 1595 1596 begin++; 1597 } 1598 1599 cache->commit_requested = true; 1600 1601 req->err = r; 1602 atomic_set(&req->complete, 1); 1603 1604 wake_up(&req->result_wait); 1605 } 1606 1607 static void process_invalidation_requests(struct cache *cache) 1608 { 1609 struct list_head list; 1610 struct invalidation_request *req, *tmp; 1611 1612 INIT_LIST_HEAD(&list); 1613 spin_lock(&cache->invalidation_lock); 1614 list_splice_init(&cache->invalidation_requests, &list); 1615 spin_unlock(&cache->invalidation_lock); 1616 1617 list_for_each_entry_safe (req, tmp, &list, list) 1618 process_invalidation_request(cache, req); 1619 } 1620 1621 /*---------------------------------------------------------------- 1622 * Main worker loop 1623 *--------------------------------------------------------------*/ 1624 static bool is_quiescing(struct cache *cache) 1625 { 1626 return atomic_read(&cache->quiescing); 1627 } 1628 1629 static void ack_quiescing(struct cache *cache) 1630 { 1631 if (is_quiescing(cache)) { 1632 atomic_inc(&cache->quiescing_ack); 1633 wake_up(&cache->quiescing_wait); 1634 } 1635 } 1636 1637 static void wait_for_quiescing_ack(struct cache *cache) 1638 { 1639 wait_event(cache->quiescing_wait, atomic_read(&cache->quiescing_ack)); 1640 } 1641 1642 static void start_quiescing(struct cache *cache) 1643 { 1644 atomic_inc(&cache->quiescing); 1645 wait_for_quiescing_ack(cache); 1646 } 1647 1648 static void stop_quiescing(struct cache *cache) 1649 { 1650 atomic_set(&cache->quiescing, 0); 1651 atomic_set(&cache->quiescing_ack, 0); 1652 } 1653 1654 static void wait_for_migrations(struct cache *cache) 1655 { 1656 wait_event(cache->migration_wait, !atomic_read(&cache->nr_migrations)); 1657 } 1658 1659 static void stop_worker(struct cache *cache) 1660 { 1661 cancel_delayed_work(&cache->waker); 1662 flush_workqueue(cache->wq); 1663 } 1664 1665 static void requeue_deferred_io(struct cache *cache) 1666 { 1667 struct bio *bio; 1668 struct bio_list bios; 1669 1670 bio_list_init(&bios); 1671 bio_list_merge(&bios, &cache->deferred_bios); 1672 bio_list_init(&cache->deferred_bios); 1673 1674 while ((bio = bio_list_pop(&bios))) 1675 bio_endio(bio, DM_ENDIO_REQUEUE); 1676 } 1677 1678 static int more_work(struct cache *cache) 1679 { 1680 if (is_quiescing(cache)) 1681 return !list_empty(&cache->quiesced_migrations) || 1682 !list_empty(&cache->completed_migrations) || 1683 !list_empty(&cache->need_commit_migrations); 1684 else 1685 return !bio_list_empty(&cache->deferred_bios) || 1686 !bio_list_empty(&cache->deferred_flush_bios) || 1687 !bio_list_empty(&cache->deferred_writethrough_bios) || 1688 !list_empty(&cache->quiesced_migrations) || 1689 !list_empty(&cache->completed_migrations) || 1690 !list_empty(&cache->need_commit_migrations) || 1691 cache->invalidate; 1692 } 1693 1694 static void do_worker(struct work_struct *ws) 1695 { 1696 struct cache *cache = container_of(ws, struct cache, worker); 1697 1698 do { 1699 if (!is_quiescing(cache)) { 1700 writeback_some_dirty_blocks(cache); 1701 process_deferred_writethrough_bios(cache); 1702 process_deferred_bios(cache); 1703 process_invalidation_requests(cache); 1704 } 1705 1706 process_migrations(cache, &cache->quiesced_migrations, issue_copy); 1707 process_migrations(cache, &cache->completed_migrations, complete_migration); 1708 1709 if (commit_if_needed(cache)) { 1710 process_deferred_flush_bios(cache, false); 1711 1712 /* 1713 * FIXME: rollback metadata or just go into a 1714 * failure mode and error everything 1715 */ 1716 } else { 1717 process_deferred_flush_bios(cache, true); 1718 process_migrations(cache, &cache->need_commit_migrations, 1719 migration_success_post_commit); 1720 } 1721 1722 ack_quiescing(cache); 1723 1724 } while (more_work(cache)); 1725 } 1726 1727 /* 1728 * We want to commit periodically so that not too much 1729 * unwritten metadata builds up. 1730 */ 1731 static void do_waker(struct work_struct *ws) 1732 { 1733 struct cache *cache = container_of(to_delayed_work(ws), struct cache, waker); 1734 policy_tick(cache->policy); 1735 wake_worker(cache); 1736 queue_delayed_work(cache->wq, &cache->waker, COMMIT_PERIOD); 1737 } 1738 1739 /*----------------------------------------------------------------*/ 1740 1741 static int is_congested(struct dm_dev *dev, int bdi_bits) 1742 { 1743 struct request_queue *q = bdev_get_queue(dev->bdev); 1744 return bdi_congested(&q->backing_dev_info, bdi_bits); 1745 } 1746 1747 static int cache_is_congested(struct dm_target_callbacks *cb, int bdi_bits) 1748 { 1749 struct cache *cache = container_of(cb, struct cache, callbacks); 1750 1751 return is_congested(cache->origin_dev, bdi_bits) || 1752 is_congested(cache->cache_dev, bdi_bits); 1753 } 1754 1755 /*---------------------------------------------------------------- 1756 * Target methods 1757 *--------------------------------------------------------------*/ 1758 1759 /* 1760 * This function gets called on the error paths of the constructor, so we 1761 * have to cope with a partially initialised struct. 1762 */ 1763 static void destroy(struct cache *cache) 1764 { 1765 unsigned i; 1766 1767 if (cache->next_migration) 1768 mempool_free(cache->next_migration, cache->migration_pool); 1769 1770 if (cache->migration_pool) 1771 mempool_destroy(cache->migration_pool); 1772 1773 if (cache->all_io_ds) 1774 dm_deferred_set_destroy(cache->all_io_ds); 1775 1776 if (cache->prison) 1777 dm_bio_prison_destroy(cache->prison); 1778 1779 if (cache->wq) 1780 destroy_workqueue(cache->wq); 1781 1782 if (cache->dirty_bitset) 1783 free_bitset(cache->dirty_bitset); 1784 1785 if (cache->discard_bitset) 1786 free_bitset(cache->discard_bitset); 1787 1788 if (cache->copier) 1789 dm_kcopyd_client_destroy(cache->copier); 1790 1791 if (cache->cmd) 1792 dm_cache_metadata_close(cache->cmd); 1793 1794 if (cache->metadata_dev) 1795 dm_put_device(cache->ti, cache->metadata_dev); 1796 1797 if (cache->origin_dev) 1798 dm_put_device(cache->ti, cache->origin_dev); 1799 1800 if (cache->cache_dev) 1801 dm_put_device(cache->ti, cache->cache_dev); 1802 1803 if (cache->policy) 1804 dm_cache_policy_destroy(cache->policy); 1805 1806 for (i = 0; i < cache->nr_ctr_args ; i++) 1807 kfree(cache->ctr_args[i]); 1808 kfree(cache->ctr_args); 1809 1810 kfree(cache); 1811 } 1812 1813 static void cache_dtr(struct dm_target *ti) 1814 { 1815 struct cache *cache = ti->private; 1816 1817 destroy(cache); 1818 } 1819 1820 static sector_t get_dev_size(struct dm_dev *dev) 1821 { 1822 return i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT; 1823 } 1824 1825 /*----------------------------------------------------------------*/ 1826 1827 /* 1828 * Construct a cache device mapping. 1829 * 1830 * cache <metadata dev> <cache dev> <origin dev> <block size> 1831 * <#feature args> [<feature arg>]* 1832 * <policy> <#policy args> [<policy arg>]* 1833 * 1834 * metadata dev : fast device holding the persistent metadata 1835 * cache dev : fast device holding cached data blocks 1836 * origin dev : slow device holding original data blocks 1837 * block size : cache unit size in sectors 1838 * 1839 * #feature args : number of feature arguments passed 1840 * feature args : writethrough. (The default is writeback.) 1841 * 1842 * policy : the replacement policy to use 1843 * #policy args : an even number of policy arguments corresponding 1844 * to key/value pairs passed to the policy 1845 * policy args : key/value pairs passed to the policy 1846 * E.g. 'sequential_threshold 1024' 1847 * See cache-policies.txt for details. 1848 * 1849 * Optional feature arguments are: 1850 * writethrough : write through caching that prohibits cache block 1851 * content from being different from origin block content. 1852 * Without this argument, the default behaviour is to write 1853 * back cache block contents later for performance reasons, 1854 * so they may differ from the corresponding origin blocks. 1855 */ 1856 struct cache_args { 1857 struct dm_target *ti; 1858 1859 struct dm_dev *metadata_dev; 1860 1861 struct dm_dev *cache_dev; 1862 sector_t cache_sectors; 1863 1864 struct dm_dev *origin_dev; 1865 sector_t origin_sectors; 1866 1867 uint32_t block_size; 1868 1869 const char *policy_name; 1870 int policy_argc; 1871 const char **policy_argv; 1872 1873 struct cache_features features; 1874 }; 1875 1876 static void destroy_cache_args(struct cache_args *ca) 1877 { 1878 if (ca->metadata_dev) 1879 dm_put_device(ca->ti, ca->metadata_dev); 1880 1881 if (ca->cache_dev) 1882 dm_put_device(ca->ti, ca->cache_dev); 1883 1884 if (ca->origin_dev) 1885 dm_put_device(ca->ti, ca->origin_dev); 1886 1887 kfree(ca); 1888 } 1889 1890 static bool at_least_one_arg(struct dm_arg_set *as, char **error) 1891 { 1892 if (!as->argc) { 1893 *error = "Insufficient args"; 1894 return false; 1895 } 1896 1897 return true; 1898 } 1899 1900 static int parse_metadata_dev(struct cache_args *ca, struct dm_arg_set *as, 1901 char **error) 1902 { 1903 int r; 1904 sector_t metadata_dev_size; 1905 char b[BDEVNAME_SIZE]; 1906 1907 if (!at_least_one_arg(as, error)) 1908 return -EINVAL; 1909 1910 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE, 1911 &ca->metadata_dev); 1912 if (r) { 1913 *error = "Error opening metadata device"; 1914 return r; 1915 } 1916 1917 metadata_dev_size = get_dev_size(ca->metadata_dev); 1918 if (metadata_dev_size > DM_CACHE_METADATA_MAX_SECTORS_WARNING) 1919 DMWARN("Metadata device %s is larger than %u sectors: excess space will not be used.", 1920 bdevname(ca->metadata_dev->bdev, b), THIN_METADATA_MAX_SECTORS); 1921 1922 return 0; 1923 } 1924 1925 static int parse_cache_dev(struct cache_args *ca, struct dm_arg_set *as, 1926 char **error) 1927 { 1928 int r; 1929 1930 if (!at_least_one_arg(as, error)) 1931 return -EINVAL; 1932 1933 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE, 1934 &ca->cache_dev); 1935 if (r) { 1936 *error = "Error opening cache device"; 1937 return r; 1938 } 1939 ca->cache_sectors = get_dev_size(ca->cache_dev); 1940 1941 return 0; 1942 } 1943 1944 static int parse_origin_dev(struct cache_args *ca, struct dm_arg_set *as, 1945 char **error) 1946 { 1947 int r; 1948 1949 if (!at_least_one_arg(as, error)) 1950 return -EINVAL; 1951 1952 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE, 1953 &ca->origin_dev); 1954 if (r) { 1955 *error = "Error opening origin device"; 1956 return r; 1957 } 1958 1959 ca->origin_sectors = get_dev_size(ca->origin_dev); 1960 if (ca->ti->len > ca->origin_sectors) { 1961 *error = "Device size larger than cached device"; 1962 return -EINVAL; 1963 } 1964 1965 return 0; 1966 } 1967 1968 static int parse_block_size(struct cache_args *ca, struct dm_arg_set *as, 1969 char **error) 1970 { 1971 unsigned long block_size; 1972 1973 if (!at_least_one_arg(as, error)) 1974 return -EINVAL; 1975 1976 if (kstrtoul(dm_shift_arg(as), 10, &block_size) || !block_size || 1977 block_size < DATA_DEV_BLOCK_SIZE_MIN_SECTORS || 1978 block_size > DATA_DEV_BLOCK_SIZE_MAX_SECTORS || 1979 block_size & (DATA_DEV_BLOCK_SIZE_MIN_SECTORS - 1)) { 1980 *error = "Invalid data block size"; 1981 return -EINVAL; 1982 } 1983 1984 if (block_size > ca->cache_sectors) { 1985 *error = "Data block size is larger than the cache device"; 1986 return -EINVAL; 1987 } 1988 1989 ca->block_size = block_size; 1990 1991 return 0; 1992 } 1993 1994 static void init_features(struct cache_features *cf) 1995 { 1996 cf->mode = CM_WRITE; 1997 cf->io_mode = CM_IO_WRITEBACK; 1998 } 1999 2000 static int parse_features(struct cache_args *ca, struct dm_arg_set *as, 2001 char **error) 2002 { 2003 static struct dm_arg _args[] = { 2004 {0, 1, "Invalid number of cache feature arguments"}, 2005 }; 2006 2007 int r; 2008 unsigned argc; 2009 const char *arg; 2010 struct cache_features *cf = &ca->features; 2011 2012 init_features(cf); 2013 2014 r = dm_read_arg_group(_args, as, &argc, error); 2015 if (r) 2016 return -EINVAL; 2017 2018 while (argc--) { 2019 arg = dm_shift_arg(as); 2020 2021 if (!strcasecmp(arg, "writeback")) 2022 cf->io_mode = CM_IO_WRITEBACK; 2023 2024 else if (!strcasecmp(arg, "writethrough")) 2025 cf->io_mode = CM_IO_WRITETHROUGH; 2026 2027 else if (!strcasecmp(arg, "passthrough")) 2028 cf->io_mode = CM_IO_PASSTHROUGH; 2029 2030 else { 2031 *error = "Unrecognised cache feature requested"; 2032 return -EINVAL; 2033 } 2034 } 2035 2036 return 0; 2037 } 2038 2039 static int parse_policy(struct cache_args *ca, struct dm_arg_set *as, 2040 char **error) 2041 { 2042 static struct dm_arg _args[] = { 2043 {0, 1024, "Invalid number of policy arguments"}, 2044 }; 2045 2046 int r; 2047 2048 if (!at_least_one_arg(as, error)) 2049 return -EINVAL; 2050 2051 ca->policy_name = dm_shift_arg(as); 2052 2053 r = dm_read_arg_group(_args, as, &ca->policy_argc, error); 2054 if (r) 2055 return -EINVAL; 2056 2057 ca->policy_argv = (const char **)as->argv; 2058 dm_consume_args(as, ca->policy_argc); 2059 2060 return 0; 2061 } 2062 2063 static int parse_cache_args(struct cache_args *ca, int argc, char **argv, 2064 char **error) 2065 { 2066 int r; 2067 struct dm_arg_set as; 2068 2069 as.argc = argc; 2070 as.argv = argv; 2071 2072 r = parse_metadata_dev(ca, &as, error); 2073 if (r) 2074 return r; 2075 2076 r = parse_cache_dev(ca, &as, error); 2077 if (r) 2078 return r; 2079 2080 r = parse_origin_dev(ca, &as, error); 2081 if (r) 2082 return r; 2083 2084 r = parse_block_size(ca, &as, error); 2085 if (r) 2086 return r; 2087 2088 r = parse_features(ca, &as, error); 2089 if (r) 2090 return r; 2091 2092 r = parse_policy(ca, &as, error); 2093 if (r) 2094 return r; 2095 2096 return 0; 2097 } 2098 2099 /*----------------------------------------------------------------*/ 2100 2101 static struct kmem_cache *migration_cache; 2102 2103 #define NOT_CORE_OPTION 1 2104 2105 static int process_config_option(struct cache *cache, const char *key, const char *value) 2106 { 2107 unsigned long tmp; 2108 2109 if (!strcasecmp(key, "migration_threshold")) { 2110 if (kstrtoul(value, 10, &tmp)) 2111 return -EINVAL; 2112 2113 cache->migration_threshold = tmp; 2114 return 0; 2115 } 2116 2117 return NOT_CORE_OPTION; 2118 } 2119 2120 static int set_config_value(struct cache *cache, const char *key, const char *value) 2121 { 2122 int r = process_config_option(cache, key, value); 2123 2124 if (r == NOT_CORE_OPTION) 2125 r = policy_set_config_value(cache->policy, key, value); 2126 2127 if (r) 2128 DMWARN("bad config value for %s: %s", key, value); 2129 2130 return r; 2131 } 2132 2133 static int set_config_values(struct cache *cache, int argc, const char **argv) 2134 { 2135 int r = 0; 2136 2137 if (argc & 1) { 2138 DMWARN("Odd number of policy arguments given but they should be <key> <value> pairs."); 2139 return -EINVAL; 2140 } 2141 2142 while (argc) { 2143 r = set_config_value(cache, argv[0], argv[1]); 2144 if (r) 2145 break; 2146 2147 argc -= 2; 2148 argv += 2; 2149 } 2150 2151 return r; 2152 } 2153 2154 static int create_cache_policy(struct cache *cache, struct cache_args *ca, 2155 char **error) 2156 { 2157 struct dm_cache_policy *p = dm_cache_policy_create(ca->policy_name, 2158 cache->cache_size, 2159 cache->origin_sectors, 2160 cache->sectors_per_block); 2161 if (IS_ERR(p)) { 2162 *error = "Error creating cache's policy"; 2163 return PTR_ERR(p); 2164 } 2165 cache->policy = p; 2166 2167 return 0; 2168 } 2169 2170 /* 2171 * We want the discard block size to be a power of two, at least the size 2172 * of the cache block size, and have no more than 2^14 discard blocks 2173 * across the origin. 2174 */ 2175 #define MAX_DISCARD_BLOCKS (1 << 14) 2176 2177 static bool too_many_discard_blocks(sector_t discard_block_size, 2178 sector_t origin_size) 2179 { 2180 (void) sector_div(origin_size, discard_block_size); 2181 2182 return origin_size > MAX_DISCARD_BLOCKS; 2183 } 2184 2185 static sector_t calculate_discard_block_size(sector_t cache_block_size, 2186 sector_t origin_size) 2187 { 2188 sector_t discard_block_size; 2189 2190 discard_block_size = roundup_pow_of_two(cache_block_size); 2191 2192 if (origin_size) 2193 while (too_many_discard_blocks(discard_block_size, origin_size)) 2194 discard_block_size *= 2; 2195 2196 return discard_block_size; 2197 } 2198 2199 #define DEFAULT_MIGRATION_THRESHOLD 2048 2200 2201 static int cache_create(struct cache_args *ca, struct cache **result) 2202 { 2203 int r = 0; 2204 char **error = &ca->ti->error; 2205 struct cache *cache; 2206 struct dm_target *ti = ca->ti; 2207 dm_block_t origin_blocks; 2208 struct dm_cache_metadata *cmd; 2209 bool may_format = ca->features.mode == CM_WRITE; 2210 2211 cache = kzalloc(sizeof(*cache), GFP_KERNEL); 2212 if (!cache) 2213 return -ENOMEM; 2214 2215 cache->ti = ca->ti; 2216 ti->private = cache; 2217 ti->num_flush_bios = 2; 2218 ti->flush_supported = true; 2219 2220 ti->num_discard_bios = 1; 2221 ti->discards_supported = true; 2222 ti->discard_zeroes_data_unsupported = true; 2223 2224 cache->features = ca->features; 2225 ti->per_bio_data_size = get_per_bio_data_size(cache); 2226 2227 cache->callbacks.congested_fn = cache_is_congested; 2228 dm_table_add_target_callbacks(ti->table, &cache->callbacks); 2229 2230 cache->metadata_dev = ca->metadata_dev; 2231 cache->origin_dev = ca->origin_dev; 2232 cache->cache_dev = ca->cache_dev; 2233 2234 ca->metadata_dev = ca->origin_dev = ca->cache_dev = NULL; 2235 2236 /* FIXME: factor out this whole section */ 2237 origin_blocks = cache->origin_sectors = ca->origin_sectors; 2238 origin_blocks = block_div(origin_blocks, ca->block_size); 2239 cache->origin_blocks = to_oblock(origin_blocks); 2240 2241 cache->sectors_per_block = ca->block_size; 2242 if (dm_set_target_max_io_len(ti, cache->sectors_per_block)) { 2243 r = -EINVAL; 2244 goto bad; 2245 } 2246 2247 if (ca->block_size & (ca->block_size - 1)) { 2248 dm_block_t cache_size = ca->cache_sectors; 2249 2250 cache->sectors_per_block_shift = -1; 2251 cache_size = block_div(cache_size, ca->block_size); 2252 cache->cache_size = to_cblock(cache_size); 2253 } else { 2254 cache->sectors_per_block_shift = __ffs(ca->block_size); 2255 cache->cache_size = to_cblock(ca->cache_sectors >> cache->sectors_per_block_shift); 2256 } 2257 2258 r = create_cache_policy(cache, ca, error); 2259 if (r) 2260 goto bad; 2261 2262 cache->policy_nr_args = ca->policy_argc; 2263 cache->migration_threshold = DEFAULT_MIGRATION_THRESHOLD; 2264 2265 r = set_config_values(cache, ca->policy_argc, ca->policy_argv); 2266 if (r) { 2267 *error = "Error setting cache policy's config values"; 2268 goto bad; 2269 } 2270 2271 cmd = dm_cache_metadata_open(cache->metadata_dev->bdev, 2272 ca->block_size, may_format, 2273 dm_cache_policy_get_hint_size(cache->policy)); 2274 if (IS_ERR(cmd)) { 2275 *error = "Error creating metadata object"; 2276 r = PTR_ERR(cmd); 2277 goto bad; 2278 } 2279 cache->cmd = cmd; 2280 2281 if (passthrough_mode(&cache->features)) { 2282 bool all_clean; 2283 2284 r = dm_cache_metadata_all_clean(cache->cmd, &all_clean); 2285 if (r) { 2286 *error = "dm_cache_metadata_all_clean() failed"; 2287 goto bad; 2288 } 2289 2290 if (!all_clean) { 2291 *error = "Cannot enter passthrough mode unless all blocks are clean"; 2292 r = -EINVAL; 2293 goto bad; 2294 } 2295 } 2296 2297 spin_lock_init(&cache->lock); 2298 bio_list_init(&cache->deferred_bios); 2299 bio_list_init(&cache->deferred_flush_bios); 2300 bio_list_init(&cache->deferred_writethrough_bios); 2301 INIT_LIST_HEAD(&cache->quiesced_migrations); 2302 INIT_LIST_HEAD(&cache->completed_migrations); 2303 INIT_LIST_HEAD(&cache->need_commit_migrations); 2304 atomic_set(&cache->nr_migrations, 0); 2305 init_waitqueue_head(&cache->migration_wait); 2306 2307 init_waitqueue_head(&cache->quiescing_wait); 2308 atomic_set(&cache->quiescing, 0); 2309 atomic_set(&cache->quiescing_ack, 0); 2310 2311 r = -ENOMEM; 2312 cache->nr_dirty = 0; 2313 cache->dirty_bitset = alloc_bitset(from_cblock(cache->cache_size)); 2314 if (!cache->dirty_bitset) { 2315 *error = "could not allocate dirty bitset"; 2316 goto bad; 2317 } 2318 clear_bitset(cache->dirty_bitset, from_cblock(cache->cache_size)); 2319 2320 cache->discard_block_size = 2321 calculate_discard_block_size(cache->sectors_per_block, 2322 cache->origin_sectors); 2323 cache->discard_nr_blocks = oblock_to_dblock(cache, cache->origin_blocks); 2324 cache->discard_bitset = alloc_bitset(from_dblock(cache->discard_nr_blocks)); 2325 if (!cache->discard_bitset) { 2326 *error = "could not allocate discard bitset"; 2327 goto bad; 2328 } 2329 clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks)); 2330 2331 cache->copier = dm_kcopyd_client_create(&dm_kcopyd_throttle); 2332 if (IS_ERR(cache->copier)) { 2333 *error = "could not create kcopyd client"; 2334 r = PTR_ERR(cache->copier); 2335 goto bad; 2336 } 2337 2338 cache->wq = alloc_ordered_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM); 2339 if (!cache->wq) { 2340 *error = "could not create workqueue for metadata object"; 2341 goto bad; 2342 } 2343 INIT_WORK(&cache->worker, do_worker); 2344 INIT_DELAYED_WORK(&cache->waker, do_waker); 2345 cache->last_commit_jiffies = jiffies; 2346 2347 cache->prison = dm_bio_prison_create(PRISON_CELLS); 2348 if (!cache->prison) { 2349 *error = "could not create bio prison"; 2350 goto bad; 2351 } 2352 2353 cache->all_io_ds = dm_deferred_set_create(); 2354 if (!cache->all_io_ds) { 2355 *error = "could not create all_io deferred set"; 2356 goto bad; 2357 } 2358 2359 cache->migration_pool = mempool_create_slab_pool(MIGRATION_POOL_SIZE, 2360 migration_cache); 2361 if (!cache->migration_pool) { 2362 *error = "Error creating cache's migration mempool"; 2363 goto bad; 2364 } 2365 2366 cache->next_migration = NULL; 2367 2368 cache->need_tick_bio = true; 2369 cache->sized = false; 2370 cache->invalidate = false; 2371 cache->commit_requested = false; 2372 cache->loaded_mappings = false; 2373 cache->loaded_discards = false; 2374 2375 load_stats(cache); 2376 2377 atomic_set(&cache->stats.demotion, 0); 2378 atomic_set(&cache->stats.promotion, 0); 2379 atomic_set(&cache->stats.copies_avoided, 0); 2380 atomic_set(&cache->stats.cache_cell_clash, 0); 2381 atomic_set(&cache->stats.commit_count, 0); 2382 atomic_set(&cache->stats.discard_count, 0); 2383 2384 spin_lock_init(&cache->invalidation_lock); 2385 INIT_LIST_HEAD(&cache->invalidation_requests); 2386 2387 *result = cache; 2388 return 0; 2389 2390 bad: 2391 destroy(cache); 2392 return r; 2393 } 2394 2395 static int copy_ctr_args(struct cache *cache, int argc, const char **argv) 2396 { 2397 unsigned i; 2398 const char **copy; 2399 2400 copy = kcalloc(argc, sizeof(*copy), GFP_KERNEL); 2401 if (!copy) 2402 return -ENOMEM; 2403 for (i = 0; i < argc; i++) { 2404 copy[i] = kstrdup(argv[i], GFP_KERNEL); 2405 if (!copy[i]) { 2406 while (i--) 2407 kfree(copy[i]); 2408 kfree(copy); 2409 return -ENOMEM; 2410 } 2411 } 2412 2413 cache->nr_ctr_args = argc; 2414 cache->ctr_args = copy; 2415 2416 return 0; 2417 } 2418 2419 static int cache_ctr(struct dm_target *ti, unsigned argc, char **argv) 2420 { 2421 int r = -EINVAL; 2422 struct cache_args *ca; 2423 struct cache *cache = NULL; 2424 2425 ca = kzalloc(sizeof(*ca), GFP_KERNEL); 2426 if (!ca) { 2427 ti->error = "Error allocating memory for cache"; 2428 return -ENOMEM; 2429 } 2430 ca->ti = ti; 2431 2432 r = parse_cache_args(ca, argc, argv, &ti->error); 2433 if (r) 2434 goto out; 2435 2436 r = cache_create(ca, &cache); 2437 if (r) 2438 goto out; 2439 2440 r = copy_ctr_args(cache, argc - 3, (const char **)argv + 3); 2441 if (r) { 2442 destroy(cache); 2443 goto out; 2444 } 2445 2446 ti->private = cache; 2447 2448 out: 2449 destroy_cache_args(ca); 2450 return r; 2451 } 2452 2453 static int cache_map(struct dm_target *ti, struct bio *bio) 2454 { 2455 struct cache *cache = ti->private; 2456 2457 int r; 2458 dm_oblock_t block = get_bio_block(cache, bio); 2459 size_t pb_data_size = get_per_bio_data_size(cache); 2460 bool can_migrate = false; 2461 bool discarded_block; 2462 struct dm_bio_prison_cell *cell; 2463 struct policy_result lookup_result; 2464 struct per_bio_data *pb; 2465 2466 if (from_oblock(block) > from_oblock(cache->origin_blocks)) { 2467 /* 2468 * This can only occur if the io goes to a partial block at 2469 * the end of the origin device. We don't cache these. 2470 * Just remap to the origin and carry on. 2471 */ 2472 remap_to_origin_clear_discard(cache, bio, block); 2473 return DM_MAPIO_REMAPPED; 2474 } 2475 2476 pb = init_per_bio_data(bio, pb_data_size); 2477 2478 if (bio->bi_rw & (REQ_FLUSH | REQ_FUA | REQ_DISCARD)) { 2479 defer_bio(cache, bio); 2480 return DM_MAPIO_SUBMITTED; 2481 } 2482 2483 /* 2484 * Check to see if that block is currently migrating. 2485 */ 2486 cell = alloc_prison_cell(cache); 2487 if (!cell) { 2488 defer_bio(cache, bio); 2489 return DM_MAPIO_SUBMITTED; 2490 } 2491 2492 r = bio_detain(cache, block, bio, cell, 2493 (cell_free_fn) free_prison_cell, 2494 cache, &cell); 2495 if (r) { 2496 if (r < 0) 2497 defer_bio(cache, bio); 2498 2499 return DM_MAPIO_SUBMITTED; 2500 } 2501 2502 discarded_block = is_discarded_oblock(cache, block); 2503 2504 r = policy_map(cache->policy, block, false, can_migrate, discarded_block, 2505 bio, &lookup_result); 2506 if (r == -EWOULDBLOCK) { 2507 cell_defer(cache, cell, true); 2508 return DM_MAPIO_SUBMITTED; 2509 2510 } else if (r) { 2511 DMERR_LIMIT("Unexpected return from cache replacement policy: %d", r); 2512 bio_io_error(bio); 2513 return DM_MAPIO_SUBMITTED; 2514 } 2515 2516 r = DM_MAPIO_REMAPPED; 2517 switch (lookup_result.op) { 2518 case POLICY_HIT: 2519 if (passthrough_mode(&cache->features)) { 2520 if (bio_data_dir(bio) == WRITE) { 2521 /* 2522 * We need to invalidate this block, so 2523 * defer for the worker thread. 2524 */ 2525 cell_defer(cache, cell, true); 2526 r = DM_MAPIO_SUBMITTED; 2527 2528 } else { 2529 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); 2530 inc_miss_counter(cache, bio); 2531 remap_to_origin_clear_discard(cache, bio, block); 2532 2533 cell_defer(cache, cell, false); 2534 } 2535 2536 } else { 2537 inc_hit_counter(cache, bio); 2538 2539 if (bio_data_dir(bio) == WRITE && writethrough_mode(&cache->features) && 2540 !is_dirty(cache, lookup_result.cblock)) 2541 remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock); 2542 else 2543 remap_to_cache_dirty(cache, bio, block, lookup_result.cblock); 2544 2545 cell_defer(cache, cell, false); 2546 } 2547 break; 2548 2549 case POLICY_MISS: 2550 inc_miss_counter(cache, bio); 2551 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); 2552 2553 if (pb->req_nr != 0) { 2554 /* 2555 * This is a duplicate writethrough io that is no 2556 * longer needed because the block has been demoted. 2557 */ 2558 bio_endio(bio, 0); 2559 cell_defer(cache, cell, false); 2560 return DM_MAPIO_SUBMITTED; 2561 } else { 2562 remap_to_origin_clear_discard(cache, bio, block); 2563 cell_defer(cache, cell, false); 2564 } 2565 break; 2566 2567 default: 2568 DMERR_LIMIT("%s: erroring bio: unknown policy op: %u", __func__, 2569 (unsigned) lookup_result.op); 2570 bio_io_error(bio); 2571 r = DM_MAPIO_SUBMITTED; 2572 } 2573 2574 return r; 2575 } 2576 2577 static int cache_end_io(struct dm_target *ti, struct bio *bio, int error) 2578 { 2579 struct cache *cache = ti->private; 2580 unsigned long flags; 2581 size_t pb_data_size = get_per_bio_data_size(cache); 2582 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); 2583 2584 if (pb->tick) { 2585 policy_tick(cache->policy); 2586 2587 spin_lock_irqsave(&cache->lock, flags); 2588 cache->need_tick_bio = true; 2589 spin_unlock_irqrestore(&cache->lock, flags); 2590 } 2591 2592 check_for_quiesced_migrations(cache, pb); 2593 2594 return 0; 2595 } 2596 2597 static int write_dirty_bitset(struct cache *cache) 2598 { 2599 unsigned i, r; 2600 2601 for (i = 0; i < from_cblock(cache->cache_size); i++) { 2602 r = dm_cache_set_dirty(cache->cmd, to_cblock(i), 2603 is_dirty(cache, to_cblock(i))); 2604 if (r) 2605 return r; 2606 } 2607 2608 return 0; 2609 } 2610 2611 static int write_discard_bitset(struct cache *cache) 2612 { 2613 unsigned i, r; 2614 2615 r = dm_cache_discard_bitset_resize(cache->cmd, cache->discard_block_size, 2616 cache->discard_nr_blocks); 2617 if (r) { 2618 DMERR("could not resize on-disk discard bitset"); 2619 return r; 2620 } 2621 2622 for (i = 0; i < from_dblock(cache->discard_nr_blocks); i++) { 2623 r = dm_cache_set_discard(cache->cmd, to_dblock(i), 2624 is_discarded(cache, to_dblock(i))); 2625 if (r) 2626 return r; 2627 } 2628 2629 return 0; 2630 } 2631 2632 static int save_hint(void *context, dm_cblock_t cblock, dm_oblock_t oblock, 2633 uint32_t hint) 2634 { 2635 struct cache *cache = context; 2636 return dm_cache_save_hint(cache->cmd, cblock, hint); 2637 } 2638 2639 static int write_hints(struct cache *cache) 2640 { 2641 int r; 2642 2643 r = dm_cache_begin_hints(cache->cmd, cache->policy); 2644 if (r) { 2645 DMERR("dm_cache_begin_hints failed"); 2646 return r; 2647 } 2648 2649 r = policy_walk_mappings(cache->policy, save_hint, cache); 2650 if (r) 2651 DMERR("policy_walk_mappings failed"); 2652 2653 return r; 2654 } 2655 2656 /* 2657 * returns true on success 2658 */ 2659 static bool sync_metadata(struct cache *cache) 2660 { 2661 int r1, r2, r3, r4; 2662 2663 r1 = write_dirty_bitset(cache); 2664 if (r1) 2665 DMERR("could not write dirty bitset"); 2666 2667 r2 = write_discard_bitset(cache); 2668 if (r2) 2669 DMERR("could not write discard bitset"); 2670 2671 save_stats(cache); 2672 2673 r3 = write_hints(cache); 2674 if (r3) 2675 DMERR("could not write hints"); 2676 2677 /* 2678 * If writing the above metadata failed, we still commit, but don't 2679 * set the clean shutdown flag. This will effectively force every 2680 * dirty bit to be set on reload. 2681 */ 2682 r4 = dm_cache_commit(cache->cmd, !r1 && !r2 && !r3); 2683 if (r4) 2684 DMERR("could not write cache metadata. Data loss may occur."); 2685 2686 return !r1 && !r2 && !r3 && !r4; 2687 } 2688 2689 static void cache_postsuspend(struct dm_target *ti) 2690 { 2691 struct cache *cache = ti->private; 2692 2693 start_quiescing(cache); 2694 wait_for_migrations(cache); 2695 stop_worker(cache); 2696 requeue_deferred_io(cache); 2697 stop_quiescing(cache); 2698 2699 (void) sync_metadata(cache); 2700 } 2701 2702 static int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock, 2703 bool dirty, uint32_t hint, bool hint_valid) 2704 { 2705 int r; 2706 struct cache *cache = context; 2707 2708 r = policy_load_mapping(cache->policy, oblock, cblock, hint, hint_valid); 2709 if (r) 2710 return r; 2711 2712 if (dirty) 2713 set_dirty(cache, oblock, cblock); 2714 else 2715 clear_dirty(cache, oblock, cblock); 2716 2717 return 0; 2718 } 2719 2720 static int load_discard(void *context, sector_t discard_block_size, 2721 dm_dblock_t dblock, bool discard) 2722 { 2723 struct cache *cache = context; 2724 2725 /* FIXME: handle mis-matched block size */ 2726 2727 if (discard) 2728 set_discard(cache, dblock); 2729 else 2730 clear_discard(cache, dblock); 2731 2732 return 0; 2733 } 2734 2735 static dm_cblock_t get_cache_dev_size(struct cache *cache) 2736 { 2737 sector_t size = get_dev_size(cache->cache_dev); 2738 (void) sector_div(size, cache->sectors_per_block); 2739 return to_cblock(size); 2740 } 2741 2742 static bool can_resize(struct cache *cache, dm_cblock_t new_size) 2743 { 2744 if (from_cblock(new_size) > from_cblock(cache->cache_size)) 2745 return true; 2746 2747 /* 2748 * We can't drop a dirty block when shrinking the cache. 2749 */ 2750 while (from_cblock(new_size) < from_cblock(cache->cache_size)) { 2751 new_size = to_cblock(from_cblock(new_size) + 1); 2752 if (is_dirty(cache, new_size)) { 2753 DMERR("unable to shrink cache; cache block %llu is dirty", 2754 (unsigned long long) from_cblock(new_size)); 2755 return false; 2756 } 2757 } 2758 2759 return true; 2760 } 2761 2762 static int resize_cache_dev(struct cache *cache, dm_cblock_t new_size) 2763 { 2764 int r; 2765 2766 r = dm_cache_resize(cache->cmd, new_size); 2767 if (r) { 2768 DMERR("could not resize cache metadata"); 2769 return r; 2770 } 2771 2772 cache->cache_size = new_size; 2773 2774 return 0; 2775 } 2776 2777 static int cache_preresume(struct dm_target *ti) 2778 { 2779 int r = 0; 2780 struct cache *cache = ti->private; 2781 dm_cblock_t csize = get_cache_dev_size(cache); 2782 2783 /* 2784 * Check to see if the cache has resized. 2785 */ 2786 if (!cache->sized) { 2787 r = resize_cache_dev(cache, csize); 2788 if (r) 2789 return r; 2790 2791 cache->sized = true; 2792 2793 } else if (csize != cache->cache_size) { 2794 if (!can_resize(cache, csize)) 2795 return -EINVAL; 2796 2797 r = resize_cache_dev(cache, csize); 2798 if (r) 2799 return r; 2800 } 2801 2802 if (!cache->loaded_mappings) { 2803 r = dm_cache_load_mappings(cache->cmd, cache->policy, 2804 load_mapping, cache); 2805 if (r) { 2806 DMERR("could not load cache mappings"); 2807 return r; 2808 } 2809 2810 cache->loaded_mappings = true; 2811 } 2812 2813 if (!cache->loaded_discards) { 2814 r = dm_cache_load_discards(cache->cmd, load_discard, cache); 2815 if (r) { 2816 DMERR("could not load origin discards"); 2817 return r; 2818 } 2819 2820 cache->loaded_discards = true; 2821 } 2822 2823 return r; 2824 } 2825 2826 static void cache_resume(struct dm_target *ti) 2827 { 2828 struct cache *cache = ti->private; 2829 2830 cache->need_tick_bio = true; 2831 do_waker(&cache->waker.work); 2832 } 2833 2834 /* 2835 * Status format: 2836 * 2837 * <metadata block size> <#used metadata blocks>/<#total metadata blocks> 2838 * <cache block size> <#used cache blocks>/<#total cache blocks> 2839 * <#read hits> <#read misses> <#write hits> <#write misses> 2840 * <#demotions> <#promotions> <#dirty> 2841 * <#features> <features>* 2842 * <#core args> <core args> 2843 * <policy name> <#policy args> <policy args>* 2844 */ 2845 static void cache_status(struct dm_target *ti, status_type_t type, 2846 unsigned status_flags, char *result, unsigned maxlen) 2847 { 2848 int r = 0; 2849 unsigned i; 2850 ssize_t sz = 0; 2851 dm_block_t nr_free_blocks_metadata = 0; 2852 dm_block_t nr_blocks_metadata = 0; 2853 char buf[BDEVNAME_SIZE]; 2854 struct cache *cache = ti->private; 2855 dm_cblock_t residency; 2856 2857 switch (type) { 2858 case STATUSTYPE_INFO: 2859 /* Commit to ensure statistics aren't out-of-date */ 2860 if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti)) { 2861 r = dm_cache_commit(cache->cmd, false); 2862 if (r) 2863 DMERR("could not commit metadata for accurate status"); 2864 } 2865 2866 r = dm_cache_get_free_metadata_block_count(cache->cmd, 2867 &nr_free_blocks_metadata); 2868 if (r) { 2869 DMERR("could not get metadata free block count"); 2870 goto err; 2871 } 2872 2873 r = dm_cache_get_metadata_dev_size(cache->cmd, &nr_blocks_metadata); 2874 if (r) { 2875 DMERR("could not get metadata device size"); 2876 goto err; 2877 } 2878 2879 residency = policy_residency(cache->policy); 2880 2881 DMEMIT("%u %llu/%llu %u %llu/%llu %u %u %u %u %u %u %llu ", 2882 (unsigned)(DM_CACHE_METADATA_BLOCK_SIZE >> SECTOR_SHIFT), 2883 (unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata), 2884 (unsigned long long)nr_blocks_metadata, 2885 cache->sectors_per_block, 2886 (unsigned long long) from_cblock(residency), 2887 (unsigned long long) from_cblock(cache->cache_size), 2888 (unsigned) atomic_read(&cache->stats.read_hit), 2889 (unsigned) atomic_read(&cache->stats.read_miss), 2890 (unsigned) atomic_read(&cache->stats.write_hit), 2891 (unsigned) atomic_read(&cache->stats.write_miss), 2892 (unsigned) atomic_read(&cache->stats.demotion), 2893 (unsigned) atomic_read(&cache->stats.promotion), 2894 (unsigned long long) from_cblock(cache->nr_dirty)); 2895 2896 if (writethrough_mode(&cache->features)) 2897 DMEMIT("1 writethrough "); 2898 2899 else if (passthrough_mode(&cache->features)) 2900 DMEMIT("1 passthrough "); 2901 2902 else if (writeback_mode(&cache->features)) 2903 DMEMIT("1 writeback "); 2904 2905 else { 2906 DMERR("internal error: unknown io mode: %d", (int) cache->features.io_mode); 2907 goto err; 2908 } 2909 2910 DMEMIT("2 migration_threshold %llu ", (unsigned long long) cache->migration_threshold); 2911 2912 DMEMIT("%s ", dm_cache_policy_get_name(cache->policy)); 2913 if (sz < maxlen) { 2914 r = policy_emit_config_values(cache->policy, result + sz, maxlen - sz); 2915 if (r) 2916 DMERR("policy_emit_config_values returned %d", r); 2917 } 2918 2919 break; 2920 2921 case STATUSTYPE_TABLE: 2922 format_dev_t(buf, cache->metadata_dev->bdev->bd_dev); 2923 DMEMIT("%s ", buf); 2924 format_dev_t(buf, cache->cache_dev->bdev->bd_dev); 2925 DMEMIT("%s ", buf); 2926 format_dev_t(buf, cache->origin_dev->bdev->bd_dev); 2927 DMEMIT("%s", buf); 2928 2929 for (i = 0; i < cache->nr_ctr_args - 1; i++) 2930 DMEMIT(" %s", cache->ctr_args[i]); 2931 if (cache->nr_ctr_args) 2932 DMEMIT(" %s", cache->ctr_args[cache->nr_ctr_args - 1]); 2933 } 2934 2935 return; 2936 2937 err: 2938 DMEMIT("Error"); 2939 } 2940 2941 /* 2942 * A cache block range can take two forms: 2943 * 2944 * i) A single cblock, eg. '3456' 2945 * ii) A begin and end cblock with dots between, eg. 123-234 2946 */ 2947 static int parse_cblock_range(struct cache *cache, const char *str, 2948 struct cblock_range *result) 2949 { 2950 char dummy; 2951 uint64_t b, e; 2952 int r; 2953 2954 /* 2955 * Try and parse form (ii) first. 2956 */ 2957 r = sscanf(str, "%llu-%llu%c", &b, &e, &dummy); 2958 if (r < 0) 2959 return r; 2960 2961 if (r == 2) { 2962 result->begin = to_cblock(b); 2963 result->end = to_cblock(e); 2964 return 0; 2965 } 2966 2967 /* 2968 * That didn't work, try form (i). 2969 */ 2970 r = sscanf(str, "%llu%c", &b, &dummy); 2971 if (r < 0) 2972 return r; 2973 2974 if (r == 1) { 2975 result->begin = to_cblock(b); 2976 result->end = to_cblock(from_cblock(result->begin) + 1u); 2977 return 0; 2978 } 2979 2980 DMERR("invalid cblock range '%s'", str); 2981 return -EINVAL; 2982 } 2983 2984 static int validate_cblock_range(struct cache *cache, struct cblock_range *range) 2985 { 2986 uint64_t b = from_cblock(range->begin); 2987 uint64_t e = from_cblock(range->end); 2988 uint64_t n = from_cblock(cache->cache_size); 2989 2990 if (b >= n) { 2991 DMERR("begin cblock out of range: %llu >= %llu", b, n); 2992 return -EINVAL; 2993 } 2994 2995 if (e > n) { 2996 DMERR("end cblock out of range: %llu > %llu", e, n); 2997 return -EINVAL; 2998 } 2999 3000 if (b >= e) { 3001 DMERR("invalid cblock range: %llu >= %llu", b, e); 3002 return -EINVAL; 3003 } 3004 3005 return 0; 3006 } 3007 3008 static int request_invalidation(struct cache *cache, struct cblock_range *range) 3009 { 3010 struct invalidation_request req; 3011 3012 INIT_LIST_HEAD(&req.list); 3013 req.cblocks = range; 3014 atomic_set(&req.complete, 0); 3015 req.err = 0; 3016 init_waitqueue_head(&req.result_wait); 3017 3018 spin_lock(&cache->invalidation_lock); 3019 list_add(&req.list, &cache->invalidation_requests); 3020 spin_unlock(&cache->invalidation_lock); 3021 wake_worker(cache); 3022 3023 wait_event(req.result_wait, atomic_read(&req.complete)); 3024 return req.err; 3025 } 3026 3027 static int process_invalidate_cblocks_message(struct cache *cache, unsigned count, 3028 const char **cblock_ranges) 3029 { 3030 int r = 0; 3031 unsigned i; 3032 struct cblock_range range; 3033 3034 if (!passthrough_mode(&cache->features)) { 3035 DMERR("cache has to be in passthrough mode for invalidation"); 3036 return -EPERM; 3037 } 3038 3039 for (i = 0; i < count; i++) { 3040 r = parse_cblock_range(cache, cblock_ranges[i], &range); 3041 if (r) 3042 break; 3043 3044 r = validate_cblock_range(cache, &range); 3045 if (r) 3046 break; 3047 3048 /* 3049 * Pass begin and end origin blocks to the worker and wake it. 3050 */ 3051 r = request_invalidation(cache, &range); 3052 if (r) 3053 break; 3054 } 3055 3056 return r; 3057 } 3058 3059 /* 3060 * Supports 3061 * "<key> <value>" 3062 * and 3063 * "invalidate_cblocks [(<begin>)|(<begin>-<end>)]* 3064 * 3065 * The key migration_threshold is supported by the cache target core. 3066 */ 3067 static int cache_message(struct dm_target *ti, unsigned argc, char **argv) 3068 { 3069 struct cache *cache = ti->private; 3070 3071 if (!argc) 3072 return -EINVAL; 3073 3074 if (!strcasecmp(argv[0], "invalidate_cblocks")) 3075 return process_invalidate_cblocks_message(cache, argc - 1, (const char **) argv + 1); 3076 3077 if (argc != 2) 3078 return -EINVAL; 3079 3080 return set_config_value(cache, argv[0], argv[1]); 3081 } 3082 3083 static int cache_iterate_devices(struct dm_target *ti, 3084 iterate_devices_callout_fn fn, void *data) 3085 { 3086 int r = 0; 3087 struct cache *cache = ti->private; 3088 3089 r = fn(ti, cache->cache_dev, 0, get_dev_size(cache->cache_dev), data); 3090 if (!r) 3091 r = fn(ti, cache->origin_dev, 0, ti->len, data); 3092 3093 return r; 3094 } 3095 3096 /* 3097 * We assume I/O is going to the origin (which is the volume 3098 * more likely to have restrictions e.g. by being striped). 3099 * (Looking up the exact location of the data would be expensive 3100 * and could always be out of date by the time the bio is submitted.) 3101 */ 3102 static int cache_bvec_merge(struct dm_target *ti, 3103 struct bvec_merge_data *bvm, 3104 struct bio_vec *biovec, int max_size) 3105 { 3106 struct cache *cache = ti->private; 3107 struct request_queue *q = bdev_get_queue(cache->origin_dev->bdev); 3108 3109 if (!q->merge_bvec_fn) 3110 return max_size; 3111 3112 bvm->bi_bdev = cache->origin_dev->bdev; 3113 return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); 3114 } 3115 3116 static void set_discard_limits(struct cache *cache, struct queue_limits *limits) 3117 { 3118 /* 3119 * FIXME: these limits may be incompatible with the cache device 3120 */ 3121 limits->max_discard_sectors = cache->discard_block_size * 1024; 3122 limits->discard_granularity = cache->discard_block_size << SECTOR_SHIFT; 3123 } 3124 3125 static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits) 3126 { 3127 struct cache *cache = ti->private; 3128 uint64_t io_opt_sectors = limits->io_opt >> SECTOR_SHIFT; 3129 3130 /* 3131 * If the system-determined stacked limits are compatible with the 3132 * cache's blocksize (io_opt is a factor) do not override them. 3133 */ 3134 if (io_opt_sectors < cache->sectors_per_block || 3135 do_div(io_opt_sectors, cache->sectors_per_block)) { 3136 blk_limits_io_min(limits, 0); 3137 blk_limits_io_opt(limits, cache->sectors_per_block << SECTOR_SHIFT); 3138 } 3139 set_discard_limits(cache, limits); 3140 } 3141 3142 /*----------------------------------------------------------------*/ 3143 3144 static struct target_type cache_target = { 3145 .name = "cache", 3146 .version = {1, 3, 0}, 3147 .module = THIS_MODULE, 3148 .ctr = cache_ctr, 3149 .dtr = cache_dtr, 3150 .map = cache_map, 3151 .end_io = cache_end_io, 3152 .postsuspend = cache_postsuspend, 3153 .preresume = cache_preresume, 3154 .resume = cache_resume, 3155 .status = cache_status, 3156 .message = cache_message, 3157 .iterate_devices = cache_iterate_devices, 3158 .merge = cache_bvec_merge, 3159 .io_hints = cache_io_hints, 3160 }; 3161 3162 static int __init dm_cache_init(void) 3163 { 3164 int r; 3165 3166 r = dm_register_target(&cache_target); 3167 if (r) { 3168 DMERR("cache target registration failed: %d", r); 3169 return r; 3170 } 3171 3172 migration_cache = KMEM_CACHE(dm_cache_migration, 0); 3173 if (!migration_cache) { 3174 dm_unregister_target(&cache_target); 3175 return -ENOMEM; 3176 } 3177 3178 return 0; 3179 } 3180 3181 static void __exit dm_cache_exit(void) 3182 { 3183 dm_unregister_target(&cache_target); 3184 kmem_cache_destroy(migration_cache); 3185 } 3186 3187 module_init(dm_cache_init); 3188 module_exit(dm_cache_exit); 3189 3190 MODULE_DESCRIPTION(DM_NAME " cache target"); 3191 MODULE_AUTHOR("Joe Thornber <ejt@redhat.com>"); 3192 MODULE_LICENSE("GPL"); 3193