1 /* 2 * Copyright (C) 2012 Red Hat. All rights reserved. 3 * 4 * This file is released under the GPL. 5 */ 6 7 #include "dm.h" 8 #include "dm-bio-prison.h" 9 #include "dm-bio-record.h" 10 #include "dm-cache-metadata.h" 11 12 #include <linux/dm-io.h> 13 #include <linux/dm-kcopyd.h> 14 #include <linux/jiffies.h> 15 #include <linux/init.h> 16 #include <linux/mempool.h> 17 #include <linux/module.h> 18 #include <linux/slab.h> 19 #include <linux/vmalloc.h> 20 21 #define DM_MSG_PREFIX "cache" 22 23 DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(cache_copy_throttle, 24 "A percentage of time allocated for copying to and/or from cache"); 25 26 /*----------------------------------------------------------------*/ 27 28 #define IOT_RESOLUTION 4 29 30 struct io_tracker { 31 spinlock_t lock; 32 33 /* 34 * Sectors of in-flight IO. 35 */ 36 sector_t in_flight; 37 38 /* 39 * The time, in jiffies, when this device became idle (if it is 40 * indeed idle). 41 */ 42 unsigned long idle_time; 43 unsigned long last_update_time; 44 }; 45 46 static void iot_init(struct io_tracker *iot) 47 { 48 spin_lock_init(&iot->lock); 49 iot->in_flight = 0ul; 50 iot->idle_time = 0ul; 51 iot->last_update_time = jiffies; 52 } 53 54 static bool __iot_idle_for(struct io_tracker *iot, unsigned long jifs) 55 { 56 if (iot->in_flight) 57 return false; 58 59 return time_after(jiffies, iot->idle_time + jifs); 60 } 61 62 static bool iot_idle_for(struct io_tracker *iot, unsigned long jifs) 63 { 64 bool r; 65 unsigned long flags; 66 67 spin_lock_irqsave(&iot->lock, flags); 68 r = __iot_idle_for(iot, jifs); 69 spin_unlock_irqrestore(&iot->lock, flags); 70 71 return r; 72 } 73 74 static void iot_io_begin(struct io_tracker *iot, sector_t len) 75 { 76 unsigned long flags; 77 78 spin_lock_irqsave(&iot->lock, flags); 79 iot->in_flight += len; 80 spin_unlock_irqrestore(&iot->lock, flags); 81 } 82 83 static void __iot_io_end(struct io_tracker *iot, sector_t len) 84 { 85 iot->in_flight -= len; 86 if (!iot->in_flight) 87 iot->idle_time = jiffies; 88 } 89 90 static void iot_io_end(struct io_tracker *iot, sector_t len) 91 { 92 unsigned long flags; 93 94 spin_lock_irqsave(&iot->lock, flags); 95 __iot_io_end(iot, len); 96 spin_unlock_irqrestore(&iot->lock, flags); 97 } 98 99 /*----------------------------------------------------------------*/ 100 101 /* 102 * Glossary: 103 * 104 * oblock: index of an origin block 105 * cblock: index of a cache block 106 * promotion: movement of a block from origin to cache 107 * demotion: movement of a block from cache to origin 108 * migration: movement of a block between the origin and cache device, 109 * either direction 110 */ 111 112 /*----------------------------------------------------------------*/ 113 114 /* 115 * There are a couple of places where we let a bio run, but want to do some 116 * work before calling its endio function. We do this by temporarily 117 * changing the endio fn. 118 */ 119 struct dm_hook_info { 120 bio_end_io_t *bi_end_io; 121 }; 122 123 static void dm_hook_bio(struct dm_hook_info *h, struct bio *bio, 124 bio_end_io_t *bi_end_io, void *bi_private) 125 { 126 h->bi_end_io = bio->bi_end_io; 127 128 bio->bi_end_io = bi_end_io; 129 bio->bi_private = bi_private; 130 } 131 132 static void dm_unhook_bio(struct dm_hook_info *h, struct bio *bio) 133 { 134 bio->bi_end_io = h->bi_end_io; 135 } 136 137 /*----------------------------------------------------------------*/ 138 139 #define MIGRATION_POOL_SIZE 128 140 #define COMMIT_PERIOD HZ 141 #define MIGRATION_COUNT_WINDOW 10 142 143 /* 144 * The block size of the device holding cache data must be 145 * between 32KB and 1GB. 146 */ 147 #define DATA_DEV_BLOCK_SIZE_MIN_SECTORS (32 * 1024 >> SECTOR_SHIFT) 148 #define DATA_DEV_BLOCK_SIZE_MAX_SECTORS (1024 * 1024 * 1024 >> SECTOR_SHIFT) 149 150 enum cache_metadata_mode { 151 CM_WRITE, /* metadata may be changed */ 152 CM_READ_ONLY, /* metadata may not be changed */ 153 CM_FAIL 154 }; 155 156 enum cache_io_mode { 157 /* 158 * Data is written to cached blocks only. These blocks are marked 159 * dirty. If you lose the cache device you will lose data. 160 * Potential performance increase for both reads and writes. 161 */ 162 CM_IO_WRITEBACK, 163 164 /* 165 * Data is written to both cache and origin. Blocks are never 166 * dirty. Potential performance benfit for reads only. 167 */ 168 CM_IO_WRITETHROUGH, 169 170 /* 171 * A degraded mode useful for various cache coherency situations 172 * (eg, rolling back snapshots). Reads and writes always go to the 173 * origin. If a write goes to a cached oblock, then the cache 174 * block is invalidated. 175 */ 176 CM_IO_PASSTHROUGH 177 }; 178 179 struct cache_features { 180 enum cache_metadata_mode mode; 181 enum cache_io_mode io_mode; 182 }; 183 184 struct cache_stats { 185 atomic_t read_hit; 186 atomic_t read_miss; 187 atomic_t write_hit; 188 atomic_t write_miss; 189 atomic_t demotion; 190 atomic_t promotion; 191 atomic_t copies_avoided; 192 atomic_t cache_cell_clash; 193 atomic_t commit_count; 194 atomic_t discard_count; 195 }; 196 197 /* 198 * Defines a range of cblocks, begin to (end - 1) are in the range. end is 199 * the one-past-the-end value. 200 */ 201 struct cblock_range { 202 dm_cblock_t begin; 203 dm_cblock_t end; 204 }; 205 206 struct invalidation_request { 207 struct list_head list; 208 struct cblock_range *cblocks; 209 210 atomic_t complete; 211 int err; 212 213 wait_queue_head_t result_wait; 214 }; 215 216 struct cache { 217 struct dm_target *ti; 218 struct dm_target_callbacks callbacks; 219 220 struct dm_cache_metadata *cmd; 221 222 /* 223 * Metadata is written to this device. 224 */ 225 struct dm_dev *metadata_dev; 226 227 /* 228 * The slower of the two data devices. Typically a spindle. 229 */ 230 struct dm_dev *origin_dev; 231 232 /* 233 * The faster of the two data devices. Typically an SSD. 234 */ 235 struct dm_dev *cache_dev; 236 237 /* 238 * Size of the origin device in _complete_ blocks and native sectors. 239 */ 240 dm_oblock_t origin_blocks; 241 sector_t origin_sectors; 242 243 /* 244 * Size of the cache device in blocks. 245 */ 246 dm_cblock_t cache_size; 247 248 /* 249 * Fields for converting from sectors to blocks. 250 */ 251 uint32_t sectors_per_block; 252 int sectors_per_block_shift; 253 254 spinlock_t lock; 255 struct list_head deferred_cells; 256 struct bio_list deferred_bios; 257 struct bio_list deferred_flush_bios; 258 struct bio_list deferred_writethrough_bios; 259 struct list_head quiesced_migrations; 260 struct list_head completed_migrations; 261 struct list_head need_commit_migrations; 262 sector_t migration_threshold; 263 wait_queue_head_t migration_wait; 264 atomic_t nr_allocated_migrations; 265 266 /* 267 * The number of in flight migrations that are performing 268 * background io. eg, promotion, writeback. 269 */ 270 atomic_t nr_io_migrations; 271 272 wait_queue_head_t quiescing_wait; 273 atomic_t quiescing; 274 atomic_t quiescing_ack; 275 276 /* 277 * cache_size entries, dirty if set 278 */ 279 atomic_t nr_dirty; 280 unsigned long *dirty_bitset; 281 282 /* 283 * origin_blocks entries, discarded if set. 284 */ 285 dm_dblock_t discard_nr_blocks; 286 unsigned long *discard_bitset; 287 uint32_t discard_block_size; /* a power of 2 times sectors per block */ 288 289 /* 290 * Rather than reconstructing the table line for the status we just 291 * save it and regurgitate. 292 */ 293 unsigned nr_ctr_args; 294 const char **ctr_args; 295 296 struct dm_kcopyd_client *copier; 297 struct workqueue_struct *wq; 298 struct work_struct worker; 299 300 struct delayed_work waker; 301 unsigned long last_commit_jiffies; 302 303 struct dm_bio_prison *prison; 304 struct dm_deferred_set *all_io_ds; 305 306 mempool_t *migration_pool; 307 308 struct dm_cache_policy *policy; 309 unsigned policy_nr_args; 310 311 bool need_tick_bio:1; 312 bool sized:1; 313 bool invalidate:1; 314 bool commit_requested:1; 315 bool loaded_mappings:1; 316 bool loaded_discards:1; 317 318 /* 319 * Cache features such as write-through. 320 */ 321 struct cache_features features; 322 323 struct cache_stats stats; 324 325 /* 326 * Invalidation fields. 327 */ 328 spinlock_t invalidation_lock; 329 struct list_head invalidation_requests; 330 331 struct io_tracker origin_tracker; 332 }; 333 334 struct per_bio_data { 335 bool tick:1; 336 unsigned req_nr:2; 337 struct dm_deferred_entry *all_io_entry; 338 struct dm_hook_info hook_info; 339 sector_t len; 340 341 /* 342 * writethrough fields. These MUST remain at the end of this 343 * structure and the 'cache' member must be the first as it 344 * is used to determine the offset of the writethrough fields. 345 */ 346 struct cache *cache; 347 dm_cblock_t cblock; 348 struct dm_bio_details bio_details; 349 }; 350 351 struct dm_cache_migration { 352 struct list_head list; 353 struct cache *cache; 354 355 unsigned long start_jiffies; 356 dm_oblock_t old_oblock; 357 dm_oblock_t new_oblock; 358 dm_cblock_t cblock; 359 360 bool err:1; 361 bool discard:1; 362 bool writeback:1; 363 bool demote:1; 364 bool promote:1; 365 bool requeue_holder:1; 366 bool invalidate:1; 367 368 struct dm_bio_prison_cell *old_ocell; 369 struct dm_bio_prison_cell *new_ocell; 370 }; 371 372 /* 373 * Processing a bio in the worker thread may require these memory 374 * allocations. We prealloc to avoid deadlocks (the same worker thread 375 * frees them back to the mempool). 376 */ 377 struct prealloc { 378 struct dm_cache_migration *mg; 379 struct dm_bio_prison_cell *cell1; 380 struct dm_bio_prison_cell *cell2; 381 }; 382 383 static enum cache_metadata_mode get_cache_mode(struct cache *cache); 384 385 static void wake_worker(struct cache *cache) 386 { 387 queue_work(cache->wq, &cache->worker); 388 } 389 390 /*----------------------------------------------------------------*/ 391 392 static struct dm_bio_prison_cell *alloc_prison_cell(struct cache *cache) 393 { 394 /* FIXME: change to use a local slab. */ 395 return dm_bio_prison_alloc_cell(cache->prison, GFP_NOWAIT); 396 } 397 398 static void free_prison_cell(struct cache *cache, struct dm_bio_prison_cell *cell) 399 { 400 dm_bio_prison_free_cell(cache->prison, cell); 401 } 402 403 static struct dm_cache_migration *alloc_migration(struct cache *cache) 404 { 405 struct dm_cache_migration *mg; 406 407 mg = mempool_alloc(cache->migration_pool, GFP_NOWAIT); 408 if (mg) { 409 mg->cache = cache; 410 atomic_inc(&mg->cache->nr_allocated_migrations); 411 } 412 413 return mg; 414 } 415 416 static void free_migration(struct dm_cache_migration *mg) 417 { 418 struct cache *cache = mg->cache; 419 420 if (atomic_dec_and_test(&cache->nr_allocated_migrations)) 421 wake_up(&cache->migration_wait); 422 423 mempool_free(mg, cache->migration_pool); 424 } 425 426 static int prealloc_data_structs(struct cache *cache, struct prealloc *p) 427 { 428 if (!p->mg) { 429 p->mg = alloc_migration(cache); 430 if (!p->mg) 431 return -ENOMEM; 432 } 433 434 if (!p->cell1) { 435 p->cell1 = alloc_prison_cell(cache); 436 if (!p->cell1) 437 return -ENOMEM; 438 } 439 440 if (!p->cell2) { 441 p->cell2 = alloc_prison_cell(cache); 442 if (!p->cell2) 443 return -ENOMEM; 444 } 445 446 return 0; 447 } 448 449 static void prealloc_free_structs(struct cache *cache, struct prealloc *p) 450 { 451 if (p->cell2) 452 free_prison_cell(cache, p->cell2); 453 454 if (p->cell1) 455 free_prison_cell(cache, p->cell1); 456 457 if (p->mg) 458 free_migration(p->mg); 459 } 460 461 static struct dm_cache_migration *prealloc_get_migration(struct prealloc *p) 462 { 463 struct dm_cache_migration *mg = p->mg; 464 465 BUG_ON(!mg); 466 p->mg = NULL; 467 468 return mg; 469 } 470 471 /* 472 * You must have a cell within the prealloc struct to return. If not this 473 * function will BUG() rather than returning NULL. 474 */ 475 static struct dm_bio_prison_cell *prealloc_get_cell(struct prealloc *p) 476 { 477 struct dm_bio_prison_cell *r = NULL; 478 479 if (p->cell1) { 480 r = p->cell1; 481 p->cell1 = NULL; 482 483 } else if (p->cell2) { 484 r = p->cell2; 485 p->cell2 = NULL; 486 } else 487 BUG(); 488 489 return r; 490 } 491 492 /* 493 * You can't have more than two cells in a prealloc struct. BUG() will be 494 * called if you try and overfill. 495 */ 496 static void prealloc_put_cell(struct prealloc *p, struct dm_bio_prison_cell *cell) 497 { 498 if (!p->cell2) 499 p->cell2 = cell; 500 501 else if (!p->cell1) 502 p->cell1 = cell; 503 504 else 505 BUG(); 506 } 507 508 /*----------------------------------------------------------------*/ 509 510 static void build_key(dm_oblock_t begin, dm_oblock_t end, struct dm_cell_key *key) 511 { 512 key->virtual = 0; 513 key->dev = 0; 514 key->block_begin = from_oblock(begin); 515 key->block_end = from_oblock(end); 516 } 517 518 /* 519 * The caller hands in a preallocated cell, and a free function for it. 520 * The cell will be freed if there's an error, or if it wasn't used because 521 * a cell with that key already exists. 522 */ 523 typedef void (*cell_free_fn)(void *context, struct dm_bio_prison_cell *cell); 524 525 static int bio_detain_range(struct cache *cache, dm_oblock_t oblock_begin, dm_oblock_t oblock_end, 526 struct bio *bio, struct dm_bio_prison_cell *cell_prealloc, 527 cell_free_fn free_fn, void *free_context, 528 struct dm_bio_prison_cell **cell_result) 529 { 530 int r; 531 struct dm_cell_key key; 532 533 build_key(oblock_begin, oblock_end, &key); 534 r = dm_bio_detain(cache->prison, &key, bio, cell_prealloc, cell_result); 535 if (r) 536 free_fn(free_context, cell_prealloc); 537 538 return r; 539 } 540 541 static int bio_detain(struct cache *cache, dm_oblock_t oblock, 542 struct bio *bio, struct dm_bio_prison_cell *cell_prealloc, 543 cell_free_fn free_fn, void *free_context, 544 struct dm_bio_prison_cell **cell_result) 545 { 546 dm_oblock_t end = to_oblock(from_oblock(oblock) + 1ULL); 547 return bio_detain_range(cache, oblock, end, bio, 548 cell_prealloc, free_fn, free_context, cell_result); 549 } 550 551 static int get_cell(struct cache *cache, 552 dm_oblock_t oblock, 553 struct prealloc *structs, 554 struct dm_bio_prison_cell **cell_result) 555 { 556 int r; 557 struct dm_cell_key key; 558 struct dm_bio_prison_cell *cell_prealloc; 559 560 cell_prealloc = prealloc_get_cell(structs); 561 562 build_key(oblock, to_oblock(from_oblock(oblock) + 1ULL), &key); 563 r = dm_get_cell(cache->prison, &key, cell_prealloc, cell_result); 564 if (r) 565 prealloc_put_cell(structs, cell_prealloc); 566 567 return r; 568 } 569 570 /*----------------------------------------------------------------*/ 571 572 static bool is_dirty(struct cache *cache, dm_cblock_t b) 573 { 574 return test_bit(from_cblock(b), cache->dirty_bitset); 575 } 576 577 static void set_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cblock) 578 { 579 if (!test_and_set_bit(from_cblock(cblock), cache->dirty_bitset)) { 580 atomic_inc(&cache->nr_dirty); 581 policy_set_dirty(cache->policy, oblock); 582 } 583 } 584 585 static void clear_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cblock) 586 { 587 if (test_and_clear_bit(from_cblock(cblock), cache->dirty_bitset)) { 588 policy_clear_dirty(cache->policy, oblock); 589 if (atomic_dec_return(&cache->nr_dirty) == 0) 590 dm_table_event(cache->ti->table); 591 } 592 } 593 594 /*----------------------------------------------------------------*/ 595 596 static bool block_size_is_power_of_two(struct cache *cache) 597 { 598 return cache->sectors_per_block_shift >= 0; 599 } 600 601 /* gcc on ARM generates spurious references to __udivdi3 and __umoddi3 */ 602 #if defined(CONFIG_ARM) && __GNUC__ == 4 && __GNUC_MINOR__ <= 6 603 __always_inline 604 #endif 605 static dm_block_t block_div(dm_block_t b, uint32_t n) 606 { 607 do_div(b, n); 608 609 return b; 610 } 611 612 static dm_block_t oblocks_per_dblock(struct cache *cache) 613 { 614 dm_block_t oblocks = cache->discard_block_size; 615 616 if (block_size_is_power_of_two(cache)) 617 oblocks >>= cache->sectors_per_block_shift; 618 else 619 oblocks = block_div(oblocks, cache->sectors_per_block); 620 621 return oblocks; 622 } 623 624 static dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock) 625 { 626 return to_dblock(block_div(from_oblock(oblock), 627 oblocks_per_dblock(cache))); 628 } 629 630 static dm_oblock_t dblock_to_oblock(struct cache *cache, dm_dblock_t dblock) 631 { 632 return to_oblock(from_dblock(dblock) * oblocks_per_dblock(cache)); 633 } 634 635 static void set_discard(struct cache *cache, dm_dblock_t b) 636 { 637 unsigned long flags; 638 639 BUG_ON(from_dblock(b) >= from_dblock(cache->discard_nr_blocks)); 640 atomic_inc(&cache->stats.discard_count); 641 642 spin_lock_irqsave(&cache->lock, flags); 643 set_bit(from_dblock(b), cache->discard_bitset); 644 spin_unlock_irqrestore(&cache->lock, flags); 645 } 646 647 static void clear_discard(struct cache *cache, dm_dblock_t b) 648 { 649 unsigned long flags; 650 651 spin_lock_irqsave(&cache->lock, flags); 652 clear_bit(from_dblock(b), cache->discard_bitset); 653 spin_unlock_irqrestore(&cache->lock, flags); 654 } 655 656 static bool is_discarded(struct cache *cache, dm_dblock_t b) 657 { 658 int r; 659 unsigned long flags; 660 661 spin_lock_irqsave(&cache->lock, flags); 662 r = test_bit(from_dblock(b), cache->discard_bitset); 663 spin_unlock_irqrestore(&cache->lock, flags); 664 665 return r; 666 } 667 668 static bool is_discarded_oblock(struct cache *cache, dm_oblock_t b) 669 { 670 int r; 671 unsigned long flags; 672 673 spin_lock_irqsave(&cache->lock, flags); 674 r = test_bit(from_dblock(oblock_to_dblock(cache, b)), 675 cache->discard_bitset); 676 spin_unlock_irqrestore(&cache->lock, flags); 677 678 return r; 679 } 680 681 /*----------------------------------------------------------------*/ 682 683 static void load_stats(struct cache *cache) 684 { 685 struct dm_cache_statistics stats; 686 687 dm_cache_metadata_get_stats(cache->cmd, &stats); 688 atomic_set(&cache->stats.read_hit, stats.read_hits); 689 atomic_set(&cache->stats.read_miss, stats.read_misses); 690 atomic_set(&cache->stats.write_hit, stats.write_hits); 691 atomic_set(&cache->stats.write_miss, stats.write_misses); 692 } 693 694 static void save_stats(struct cache *cache) 695 { 696 struct dm_cache_statistics stats; 697 698 if (get_cache_mode(cache) >= CM_READ_ONLY) 699 return; 700 701 stats.read_hits = atomic_read(&cache->stats.read_hit); 702 stats.read_misses = atomic_read(&cache->stats.read_miss); 703 stats.write_hits = atomic_read(&cache->stats.write_hit); 704 stats.write_misses = atomic_read(&cache->stats.write_miss); 705 706 dm_cache_metadata_set_stats(cache->cmd, &stats); 707 } 708 709 /*---------------------------------------------------------------- 710 * Per bio data 711 *--------------------------------------------------------------*/ 712 713 /* 714 * If using writeback, leave out struct per_bio_data's writethrough fields. 715 */ 716 #define PB_DATA_SIZE_WB (offsetof(struct per_bio_data, cache)) 717 #define PB_DATA_SIZE_WT (sizeof(struct per_bio_data)) 718 719 static bool writethrough_mode(struct cache_features *f) 720 { 721 return f->io_mode == CM_IO_WRITETHROUGH; 722 } 723 724 static bool writeback_mode(struct cache_features *f) 725 { 726 return f->io_mode == CM_IO_WRITEBACK; 727 } 728 729 static bool passthrough_mode(struct cache_features *f) 730 { 731 return f->io_mode == CM_IO_PASSTHROUGH; 732 } 733 734 static size_t get_per_bio_data_size(struct cache *cache) 735 { 736 return writethrough_mode(&cache->features) ? PB_DATA_SIZE_WT : PB_DATA_SIZE_WB; 737 } 738 739 static struct per_bio_data *get_per_bio_data(struct bio *bio, size_t data_size) 740 { 741 struct per_bio_data *pb = dm_per_bio_data(bio, data_size); 742 BUG_ON(!pb); 743 return pb; 744 } 745 746 static struct per_bio_data *init_per_bio_data(struct bio *bio, size_t data_size) 747 { 748 struct per_bio_data *pb = get_per_bio_data(bio, data_size); 749 750 pb->tick = false; 751 pb->req_nr = dm_bio_get_target_bio_nr(bio); 752 pb->all_io_entry = NULL; 753 pb->len = 0; 754 755 return pb; 756 } 757 758 /*---------------------------------------------------------------- 759 * Remapping 760 *--------------------------------------------------------------*/ 761 static void remap_to_origin(struct cache *cache, struct bio *bio) 762 { 763 bio->bi_bdev = cache->origin_dev->bdev; 764 } 765 766 static void remap_to_cache(struct cache *cache, struct bio *bio, 767 dm_cblock_t cblock) 768 { 769 sector_t bi_sector = bio->bi_iter.bi_sector; 770 sector_t block = from_cblock(cblock); 771 772 bio->bi_bdev = cache->cache_dev->bdev; 773 if (!block_size_is_power_of_two(cache)) 774 bio->bi_iter.bi_sector = 775 (block * cache->sectors_per_block) + 776 sector_div(bi_sector, cache->sectors_per_block); 777 else 778 bio->bi_iter.bi_sector = 779 (block << cache->sectors_per_block_shift) | 780 (bi_sector & (cache->sectors_per_block - 1)); 781 } 782 783 static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio) 784 { 785 unsigned long flags; 786 size_t pb_data_size = get_per_bio_data_size(cache); 787 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); 788 789 spin_lock_irqsave(&cache->lock, flags); 790 if (cache->need_tick_bio && 791 !(bio->bi_rw & (REQ_FUA | REQ_FLUSH | REQ_DISCARD))) { 792 pb->tick = true; 793 cache->need_tick_bio = false; 794 } 795 spin_unlock_irqrestore(&cache->lock, flags); 796 } 797 798 static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio, 799 dm_oblock_t oblock) 800 { 801 check_if_tick_bio_needed(cache, bio); 802 remap_to_origin(cache, bio); 803 if (bio_data_dir(bio) == WRITE) 804 clear_discard(cache, oblock_to_dblock(cache, oblock)); 805 } 806 807 static void remap_to_cache_dirty(struct cache *cache, struct bio *bio, 808 dm_oblock_t oblock, dm_cblock_t cblock) 809 { 810 check_if_tick_bio_needed(cache, bio); 811 remap_to_cache(cache, bio, cblock); 812 if (bio_data_dir(bio) == WRITE) { 813 set_dirty(cache, oblock, cblock); 814 clear_discard(cache, oblock_to_dblock(cache, oblock)); 815 } 816 } 817 818 static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio) 819 { 820 sector_t block_nr = bio->bi_iter.bi_sector; 821 822 if (!block_size_is_power_of_two(cache)) 823 (void) sector_div(block_nr, cache->sectors_per_block); 824 else 825 block_nr >>= cache->sectors_per_block_shift; 826 827 return to_oblock(block_nr); 828 } 829 830 static int bio_triggers_commit(struct cache *cache, struct bio *bio) 831 { 832 return bio->bi_rw & (REQ_FLUSH | REQ_FUA); 833 } 834 835 /* 836 * You must increment the deferred set whilst the prison cell is held. To 837 * encourage this, we ask for 'cell' to be passed in. 838 */ 839 static void inc_ds(struct cache *cache, struct bio *bio, 840 struct dm_bio_prison_cell *cell) 841 { 842 size_t pb_data_size = get_per_bio_data_size(cache); 843 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); 844 845 BUG_ON(!cell); 846 BUG_ON(pb->all_io_entry); 847 848 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); 849 } 850 851 static bool accountable_bio(struct cache *cache, struct bio *bio) 852 { 853 return ((bio->bi_bdev == cache->origin_dev->bdev) && 854 !(bio->bi_rw & REQ_DISCARD)); 855 } 856 857 static void accounted_begin(struct cache *cache, struct bio *bio) 858 { 859 size_t pb_data_size = get_per_bio_data_size(cache); 860 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); 861 862 if (accountable_bio(cache, bio)) { 863 pb->len = bio_sectors(bio); 864 iot_io_begin(&cache->origin_tracker, pb->len); 865 } 866 } 867 868 static void accounted_complete(struct cache *cache, struct bio *bio) 869 { 870 size_t pb_data_size = get_per_bio_data_size(cache); 871 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); 872 873 iot_io_end(&cache->origin_tracker, pb->len); 874 } 875 876 static void accounted_request(struct cache *cache, struct bio *bio) 877 { 878 accounted_begin(cache, bio); 879 generic_make_request(bio); 880 } 881 882 static void issue(struct cache *cache, struct bio *bio) 883 { 884 unsigned long flags; 885 886 if (!bio_triggers_commit(cache, bio)) { 887 accounted_request(cache, bio); 888 return; 889 } 890 891 /* 892 * Batch together any bios that trigger commits and then issue a 893 * single commit for them in do_worker(). 894 */ 895 spin_lock_irqsave(&cache->lock, flags); 896 cache->commit_requested = true; 897 bio_list_add(&cache->deferred_flush_bios, bio); 898 spin_unlock_irqrestore(&cache->lock, flags); 899 } 900 901 static void inc_and_issue(struct cache *cache, struct bio *bio, struct dm_bio_prison_cell *cell) 902 { 903 inc_ds(cache, bio, cell); 904 issue(cache, bio); 905 } 906 907 static void defer_writethrough_bio(struct cache *cache, struct bio *bio) 908 { 909 unsigned long flags; 910 911 spin_lock_irqsave(&cache->lock, flags); 912 bio_list_add(&cache->deferred_writethrough_bios, bio); 913 spin_unlock_irqrestore(&cache->lock, flags); 914 915 wake_worker(cache); 916 } 917 918 static void writethrough_endio(struct bio *bio) 919 { 920 struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT); 921 922 dm_unhook_bio(&pb->hook_info, bio); 923 924 if (bio->bi_error) { 925 bio_endio(bio); 926 return; 927 } 928 929 dm_bio_restore(&pb->bio_details, bio); 930 remap_to_cache(pb->cache, bio, pb->cblock); 931 932 /* 933 * We can't issue this bio directly, since we're in interrupt 934 * context. So it gets put on a bio list for processing by the 935 * worker thread. 936 */ 937 defer_writethrough_bio(pb->cache, bio); 938 } 939 940 /* 941 * When running in writethrough mode we need to send writes to clean blocks 942 * to both the cache and origin devices. In future we'd like to clone the 943 * bio and send them in parallel, but for now we're doing them in 944 * series as this is easier. 945 */ 946 static void remap_to_origin_then_cache(struct cache *cache, struct bio *bio, 947 dm_oblock_t oblock, dm_cblock_t cblock) 948 { 949 struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT); 950 951 pb->cache = cache; 952 pb->cblock = cblock; 953 dm_hook_bio(&pb->hook_info, bio, writethrough_endio, NULL); 954 dm_bio_record(&pb->bio_details, bio); 955 956 remap_to_origin_clear_discard(pb->cache, bio, oblock); 957 } 958 959 /*---------------------------------------------------------------- 960 * Failure modes 961 *--------------------------------------------------------------*/ 962 static enum cache_metadata_mode get_cache_mode(struct cache *cache) 963 { 964 return cache->features.mode; 965 } 966 967 static const char *cache_device_name(struct cache *cache) 968 { 969 return dm_device_name(dm_table_get_md(cache->ti->table)); 970 } 971 972 static void notify_mode_switch(struct cache *cache, enum cache_metadata_mode mode) 973 { 974 const char *descs[] = { 975 "write", 976 "read-only", 977 "fail" 978 }; 979 980 dm_table_event(cache->ti->table); 981 DMINFO("%s: switching cache to %s mode", 982 cache_device_name(cache), descs[(int)mode]); 983 } 984 985 static void set_cache_mode(struct cache *cache, enum cache_metadata_mode new_mode) 986 { 987 bool needs_check = dm_cache_metadata_needs_check(cache->cmd); 988 enum cache_metadata_mode old_mode = get_cache_mode(cache); 989 990 if (new_mode == CM_WRITE && needs_check) { 991 DMERR("%s: unable to switch cache to write mode until repaired.", 992 cache_device_name(cache)); 993 if (old_mode != new_mode) 994 new_mode = old_mode; 995 else 996 new_mode = CM_READ_ONLY; 997 } 998 999 /* Never move out of fail mode */ 1000 if (old_mode == CM_FAIL) 1001 new_mode = CM_FAIL; 1002 1003 switch (new_mode) { 1004 case CM_FAIL: 1005 case CM_READ_ONLY: 1006 dm_cache_metadata_set_read_only(cache->cmd); 1007 break; 1008 1009 case CM_WRITE: 1010 dm_cache_metadata_set_read_write(cache->cmd); 1011 break; 1012 } 1013 1014 cache->features.mode = new_mode; 1015 1016 if (new_mode != old_mode) 1017 notify_mode_switch(cache, new_mode); 1018 } 1019 1020 static void abort_transaction(struct cache *cache) 1021 { 1022 const char *dev_name = cache_device_name(cache); 1023 1024 if (get_cache_mode(cache) >= CM_READ_ONLY) 1025 return; 1026 1027 if (dm_cache_metadata_set_needs_check(cache->cmd)) { 1028 DMERR("%s: failed to set 'needs_check' flag in metadata", dev_name); 1029 set_cache_mode(cache, CM_FAIL); 1030 } 1031 1032 DMERR_LIMIT("%s: aborting current metadata transaction", dev_name); 1033 if (dm_cache_metadata_abort(cache->cmd)) { 1034 DMERR("%s: failed to abort metadata transaction", dev_name); 1035 set_cache_mode(cache, CM_FAIL); 1036 } 1037 } 1038 1039 static void metadata_operation_failed(struct cache *cache, const char *op, int r) 1040 { 1041 DMERR_LIMIT("%s: metadata operation '%s' failed: error = %d", 1042 cache_device_name(cache), op, r); 1043 abort_transaction(cache); 1044 set_cache_mode(cache, CM_READ_ONLY); 1045 } 1046 1047 /*---------------------------------------------------------------- 1048 * Migration processing 1049 * 1050 * Migration covers moving data from the origin device to the cache, or 1051 * vice versa. 1052 *--------------------------------------------------------------*/ 1053 static void inc_io_migrations(struct cache *cache) 1054 { 1055 atomic_inc(&cache->nr_io_migrations); 1056 } 1057 1058 static void dec_io_migrations(struct cache *cache) 1059 { 1060 atomic_dec(&cache->nr_io_migrations); 1061 } 1062 1063 static bool discard_or_flush(struct bio *bio) 1064 { 1065 return bio->bi_rw & (REQ_FLUSH | REQ_FUA | REQ_DISCARD); 1066 } 1067 1068 static void __cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell) 1069 { 1070 if (discard_or_flush(cell->holder)) { 1071 /* 1072 * We have to handle these bios individually. 1073 */ 1074 dm_cell_release(cache->prison, cell, &cache->deferred_bios); 1075 free_prison_cell(cache, cell); 1076 } else 1077 list_add_tail(&cell->user_list, &cache->deferred_cells); 1078 } 1079 1080 static void cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell, bool holder) 1081 { 1082 unsigned long flags; 1083 1084 if (!holder && dm_cell_promote_or_release(cache->prison, cell)) { 1085 /* 1086 * There was no prisoner to promote to holder, the 1087 * cell has been released. 1088 */ 1089 free_prison_cell(cache, cell); 1090 return; 1091 } 1092 1093 spin_lock_irqsave(&cache->lock, flags); 1094 __cell_defer(cache, cell); 1095 spin_unlock_irqrestore(&cache->lock, flags); 1096 1097 wake_worker(cache); 1098 } 1099 1100 static void cell_error_with_code(struct cache *cache, struct dm_bio_prison_cell *cell, int err) 1101 { 1102 dm_cell_error(cache->prison, cell, err); 1103 free_prison_cell(cache, cell); 1104 } 1105 1106 static void cell_requeue(struct cache *cache, struct dm_bio_prison_cell *cell) 1107 { 1108 cell_error_with_code(cache, cell, DM_ENDIO_REQUEUE); 1109 } 1110 1111 static void free_io_migration(struct dm_cache_migration *mg) 1112 { 1113 struct cache *cache = mg->cache; 1114 1115 dec_io_migrations(cache); 1116 free_migration(mg); 1117 wake_worker(cache); 1118 } 1119 1120 static void migration_failure(struct dm_cache_migration *mg) 1121 { 1122 struct cache *cache = mg->cache; 1123 const char *dev_name = cache_device_name(cache); 1124 1125 if (mg->writeback) { 1126 DMERR_LIMIT("%s: writeback failed; couldn't copy block", dev_name); 1127 set_dirty(cache, mg->old_oblock, mg->cblock); 1128 cell_defer(cache, mg->old_ocell, false); 1129 1130 } else if (mg->demote) { 1131 DMERR_LIMIT("%s: demotion failed; couldn't copy block", dev_name); 1132 policy_force_mapping(cache->policy, mg->new_oblock, mg->old_oblock); 1133 1134 cell_defer(cache, mg->old_ocell, mg->promote ? false : true); 1135 if (mg->promote) 1136 cell_defer(cache, mg->new_ocell, true); 1137 } else { 1138 DMERR_LIMIT("%s: promotion failed; couldn't copy block", dev_name); 1139 policy_remove_mapping(cache->policy, mg->new_oblock); 1140 cell_defer(cache, mg->new_ocell, true); 1141 } 1142 1143 free_io_migration(mg); 1144 } 1145 1146 static void migration_success_pre_commit(struct dm_cache_migration *mg) 1147 { 1148 int r; 1149 unsigned long flags; 1150 struct cache *cache = mg->cache; 1151 1152 if (mg->writeback) { 1153 clear_dirty(cache, mg->old_oblock, mg->cblock); 1154 cell_defer(cache, mg->old_ocell, false); 1155 free_io_migration(mg); 1156 return; 1157 1158 } else if (mg->demote) { 1159 r = dm_cache_remove_mapping(cache->cmd, mg->cblock); 1160 if (r) { 1161 DMERR_LIMIT("%s: demotion failed; couldn't update on disk metadata", 1162 cache_device_name(cache)); 1163 metadata_operation_failed(cache, "dm_cache_remove_mapping", r); 1164 policy_force_mapping(cache->policy, mg->new_oblock, 1165 mg->old_oblock); 1166 if (mg->promote) 1167 cell_defer(cache, mg->new_ocell, true); 1168 free_io_migration(mg); 1169 return; 1170 } 1171 } else { 1172 r = dm_cache_insert_mapping(cache->cmd, mg->cblock, mg->new_oblock); 1173 if (r) { 1174 DMERR_LIMIT("%s: promotion failed; couldn't update on disk metadata", 1175 cache_device_name(cache)); 1176 metadata_operation_failed(cache, "dm_cache_insert_mapping", r); 1177 policy_remove_mapping(cache->policy, mg->new_oblock); 1178 free_io_migration(mg); 1179 return; 1180 } 1181 } 1182 1183 spin_lock_irqsave(&cache->lock, flags); 1184 list_add_tail(&mg->list, &cache->need_commit_migrations); 1185 cache->commit_requested = true; 1186 spin_unlock_irqrestore(&cache->lock, flags); 1187 } 1188 1189 static void migration_success_post_commit(struct dm_cache_migration *mg) 1190 { 1191 unsigned long flags; 1192 struct cache *cache = mg->cache; 1193 1194 if (mg->writeback) { 1195 DMWARN_LIMIT("%s: writeback unexpectedly triggered commit", 1196 cache_device_name(cache)); 1197 return; 1198 1199 } else if (mg->demote) { 1200 cell_defer(cache, mg->old_ocell, mg->promote ? false : true); 1201 1202 if (mg->promote) { 1203 mg->demote = false; 1204 1205 spin_lock_irqsave(&cache->lock, flags); 1206 list_add_tail(&mg->list, &cache->quiesced_migrations); 1207 spin_unlock_irqrestore(&cache->lock, flags); 1208 1209 } else { 1210 if (mg->invalidate) 1211 policy_remove_mapping(cache->policy, mg->old_oblock); 1212 free_io_migration(mg); 1213 } 1214 1215 } else { 1216 if (mg->requeue_holder) { 1217 clear_dirty(cache, mg->new_oblock, mg->cblock); 1218 cell_defer(cache, mg->new_ocell, true); 1219 } else { 1220 /* 1221 * The block was promoted via an overwrite, so it's dirty. 1222 */ 1223 set_dirty(cache, mg->new_oblock, mg->cblock); 1224 bio_endio(mg->new_ocell->holder); 1225 cell_defer(cache, mg->new_ocell, false); 1226 } 1227 free_io_migration(mg); 1228 } 1229 } 1230 1231 static void copy_complete(int read_err, unsigned long write_err, void *context) 1232 { 1233 unsigned long flags; 1234 struct dm_cache_migration *mg = (struct dm_cache_migration *) context; 1235 struct cache *cache = mg->cache; 1236 1237 if (read_err || write_err) 1238 mg->err = true; 1239 1240 spin_lock_irqsave(&cache->lock, flags); 1241 list_add_tail(&mg->list, &cache->completed_migrations); 1242 spin_unlock_irqrestore(&cache->lock, flags); 1243 1244 wake_worker(cache); 1245 } 1246 1247 static void issue_copy(struct dm_cache_migration *mg) 1248 { 1249 int r; 1250 struct dm_io_region o_region, c_region; 1251 struct cache *cache = mg->cache; 1252 sector_t cblock = from_cblock(mg->cblock); 1253 1254 o_region.bdev = cache->origin_dev->bdev; 1255 o_region.count = cache->sectors_per_block; 1256 1257 c_region.bdev = cache->cache_dev->bdev; 1258 c_region.sector = cblock * cache->sectors_per_block; 1259 c_region.count = cache->sectors_per_block; 1260 1261 if (mg->writeback || mg->demote) { 1262 /* demote */ 1263 o_region.sector = from_oblock(mg->old_oblock) * cache->sectors_per_block; 1264 r = dm_kcopyd_copy(cache->copier, &c_region, 1, &o_region, 0, copy_complete, mg); 1265 } else { 1266 /* promote */ 1267 o_region.sector = from_oblock(mg->new_oblock) * cache->sectors_per_block; 1268 r = dm_kcopyd_copy(cache->copier, &o_region, 1, &c_region, 0, copy_complete, mg); 1269 } 1270 1271 if (r < 0) { 1272 DMERR_LIMIT("%s: issuing migration failed", cache_device_name(cache)); 1273 migration_failure(mg); 1274 } 1275 } 1276 1277 static void overwrite_endio(struct bio *bio) 1278 { 1279 struct dm_cache_migration *mg = bio->bi_private; 1280 struct cache *cache = mg->cache; 1281 size_t pb_data_size = get_per_bio_data_size(cache); 1282 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); 1283 unsigned long flags; 1284 1285 dm_unhook_bio(&pb->hook_info, bio); 1286 1287 if (bio->bi_error) 1288 mg->err = true; 1289 1290 mg->requeue_holder = false; 1291 1292 spin_lock_irqsave(&cache->lock, flags); 1293 list_add_tail(&mg->list, &cache->completed_migrations); 1294 spin_unlock_irqrestore(&cache->lock, flags); 1295 1296 wake_worker(cache); 1297 } 1298 1299 static void issue_overwrite(struct dm_cache_migration *mg, struct bio *bio) 1300 { 1301 size_t pb_data_size = get_per_bio_data_size(mg->cache); 1302 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); 1303 1304 dm_hook_bio(&pb->hook_info, bio, overwrite_endio, mg); 1305 remap_to_cache_dirty(mg->cache, bio, mg->new_oblock, mg->cblock); 1306 1307 /* 1308 * No need to inc_ds() here, since the cell will be held for the 1309 * duration of the io. 1310 */ 1311 accounted_request(mg->cache, bio); 1312 } 1313 1314 static bool bio_writes_complete_block(struct cache *cache, struct bio *bio) 1315 { 1316 return (bio_data_dir(bio) == WRITE) && 1317 (bio->bi_iter.bi_size == (cache->sectors_per_block << SECTOR_SHIFT)); 1318 } 1319 1320 static void avoid_copy(struct dm_cache_migration *mg) 1321 { 1322 atomic_inc(&mg->cache->stats.copies_avoided); 1323 migration_success_pre_commit(mg); 1324 } 1325 1326 static void calc_discard_block_range(struct cache *cache, struct bio *bio, 1327 dm_dblock_t *b, dm_dblock_t *e) 1328 { 1329 sector_t sb = bio->bi_iter.bi_sector; 1330 sector_t se = bio_end_sector(bio); 1331 1332 *b = to_dblock(dm_sector_div_up(sb, cache->discard_block_size)); 1333 1334 if (se - sb < cache->discard_block_size) 1335 *e = *b; 1336 else 1337 *e = to_dblock(block_div(se, cache->discard_block_size)); 1338 } 1339 1340 static void issue_discard(struct dm_cache_migration *mg) 1341 { 1342 dm_dblock_t b, e; 1343 struct bio *bio = mg->new_ocell->holder; 1344 struct cache *cache = mg->cache; 1345 1346 calc_discard_block_range(cache, bio, &b, &e); 1347 while (b != e) { 1348 set_discard(cache, b); 1349 b = to_dblock(from_dblock(b) + 1); 1350 } 1351 1352 bio_endio(bio); 1353 cell_defer(cache, mg->new_ocell, false); 1354 free_migration(mg); 1355 wake_worker(cache); 1356 } 1357 1358 static void issue_copy_or_discard(struct dm_cache_migration *mg) 1359 { 1360 bool avoid; 1361 struct cache *cache = mg->cache; 1362 1363 if (mg->discard) { 1364 issue_discard(mg); 1365 return; 1366 } 1367 1368 if (mg->writeback || mg->demote) 1369 avoid = !is_dirty(cache, mg->cblock) || 1370 is_discarded_oblock(cache, mg->old_oblock); 1371 else { 1372 struct bio *bio = mg->new_ocell->holder; 1373 1374 avoid = is_discarded_oblock(cache, mg->new_oblock); 1375 1376 if (writeback_mode(&cache->features) && 1377 !avoid && bio_writes_complete_block(cache, bio)) { 1378 issue_overwrite(mg, bio); 1379 return; 1380 } 1381 } 1382 1383 avoid ? avoid_copy(mg) : issue_copy(mg); 1384 } 1385 1386 static void complete_migration(struct dm_cache_migration *mg) 1387 { 1388 if (mg->err) 1389 migration_failure(mg); 1390 else 1391 migration_success_pre_commit(mg); 1392 } 1393 1394 static void process_migrations(struct cache *cache, struct list_head *head, 1395 void (*fn)(struct dm_cache_migration *)) 1396 { 1397 unsigned long flags; 1398 struct list_head list; 1399 struct dm_cache_migration *mg, *tmp; 1400 1401 INIT_LIST_HEAD(&list); 1402 spin_lock_irqsave(&cache->lock, flags); 1403 list_splice_init(head, &list); 1404 spin_unlock_irqrestore(&cache->lock, flags); 1405 1406 list_for_each_entry_safe(mg, tmp, &list, list) 1407 fn(mg); 1408 } 1409 1410 static void __queue_quiesced_migration(struct dm_cache_migration *mg) 1411 { 1412 list_add_tail(&mg->list, &mg->cache->quiesced_migrations); 1413 } 1414 1415 static void queue_quiesced_migration(struct dm_cache_migration *mg) 1416 { 1417 unsigned long flags; 1418 struct cache *cache = mg->cache; 1419 1420 spin_lock_irqsave(&cache->lock, flags); 1421 __queue_quiesced_migration(mg); 1422 spin_unlock_irqrestore(&cache->lock, flags); 1423 1424 wake_worker(cache); 1425 } 1426 1427 static void queue_quiesced_migrations(struct cache *cache, struct list_head *work) 1428 { 1429 unsigned long flags; 1430 struct dm_cache_migration *mg, *tmp; 1431 1432 spin_lock_irqsave(&cache->lock, flags); 1433 list_for_each_entry_safe(mg, tmp, work, list) 1434 __queue_quiesced_migration(mg); 1435 spin_unlock_irqrestore(&cache->lock, flags); 1436 1437 wake_worker(cache); 1438 } 1439 1440 static void check_for_quiesced_migrations(struct cache *cache, 1441 struct per_bio_data *pb) 1442 { 1443 struct list_head work; 1444 1445 if (!pb->all_io_entry) 1446 return; 1447 1448 INIT_LIST_HEAD(&work); 1449 dm_deferred_entry_dec(pb->all_io_entry, &work); 1450 1451 if (!list_empty(&work)) 1452 queue_quiesced_migrations(cache, &work); 1453 } 1454 1455 static void quiesce_migration(struct dm_cache_migration *mg) 1456 { 1457 if (!dm_deferred_set_add_work(mg->cache->all_io_ds, &mg->list)) 1458 queue_quiesced_migration(mg); 1459 } 1460 1461 static void promote(struct cache *cache, struct prealloc *structs, 1462 dm_oblock_t oblock, dm_cblock_t cblock, 1463 struct dm_bio_prison_cell *cell) 1464 { 1465 struct dm_cache_migration *mg = prealloc_get_migration(structs); 1466 1467 mg->err = false; 1468 mg->discard = false; 1469 mg->writeback = false; 1470 mg->demote = false; 1471 mg->promote = true; 1472 mg->requeue_holder = true; 1473 mg->invalidate = false; 1474 mg->cache = cache; 1475 mg->new_oblock = oblock; 1476 mg->cblock = cblock; 1477 mg->old_ocell = NULL; 1478 mg->new_ocell = cell; 1479 mg->start_jiffies = jiffies; 1480 1481 inc_io_migrations(cache); 1482 quiesce_migration(mg); 1483 } 1484 1485 static void writeback(struct cache *cache, struct prealloc *structs, 1486 dm_oblock_t oblock, dm_cblock_t cblock, 1487 struct dm_bio_prison_cell *cell) 1488 { 1489 struct dm_cache_migration *mg = prealloc_get_migration(structs); 1490 1491 mg->err = false; 1492 mg->discard = false; 1493 mg->writeback = true; 1494 mg->demote = false; 1495 mg->promote = false; 1496 mg->requeue_holder = true; 1497 mg->invalidate = false; 1498 mg->cache = cache; 1499 mg->old_oblock = oblock; 1500 mg->cblock = cblock; 1501 mg->old_ocell = cell; 1502 mg->new_ocell = NULL; 1503 mg->start_jiffies = jiffies; 1504 1505 inc_io_migrations(cache); 1506 quiesce_migration(mg); 1507 } 1508 1509 static void demote_then_promote(struct cache *cache, struct prealloc *structs, 1510 dm_oblock_t old_oblock, dm_oblock_t new_oblock, 1511 dm_cblock_t cblock, 1512 struct dm_bio_prison_cell *old_ocell, 1513 struct dm_bio_prison_cell *new_ocell) 1514 { 1515 struct dm_cache_migration *mg = prealloc_get_migration(structs); 1516 1517 mg->err = false; 1518 mg->discard = false; 1519 mg->writeback = false; 1520 mg->demote = true; 1521 mg->promote = true; 1522 mg->requeue_holder = true; 1523 mg->invalidate = false; 1524 mg->cache = cache; 1525 mg->old_oblock = old_oblock; 1526 mg->new_oblock = new_oblock; 1527 mg->cblock = cblock; 1528 mg->old_ocell = old_ocell; 1529 mg->new_ocell = new_ocell; 1530 mg->start_jiffies = jiffies; 1531 1532 inc_io_migrations(cache); 1533 quiesce_migration(mg); 1534 } 1535 1536 /* 1537 * Invalidate a cache entry. No writeback occurs; any changes in the cache 1538 * block are thrown away. 1539 */ 1540 static void invalidate(struct cache *cache, struct prealloc *structs, 1541 dm_oblock_t oblock, dm_cblock_t cblock, 1542 struct dm_bio_prison_cell *cell) 1543 { 1544 struct dm_cache_migration *mg = prealloc_get_migration(structs); 1545 1546 mg->err = false; 1547 mg->discard = false; 1548 mg->writeback = false; 1549 mg->demote = true; 1550 mg->promote = false; 1551 mg->requeue_holder = true; 1552 mg->invalidate = true; 1553 mg->cache = cache; 1554 mg->old_oblock = oblock; 1555 mg->cblock = cblock; 1556 mg->old_ocell = cell; 1557 mg->new_ocell = NULL; 1558 mg->start_jiffies = jiffies; 1559 1560 inc_io_migrations(cache); 1561 quiesce_migration(mg); 1562 } 1563 1564 static void discard(struct cache *cache, struct prealloc *structs, 1565 struct dm_bio_prison_cell *cell) 1566 { 1567 struct dm_cache_migration *mg = prealloc_get_migration(structs); 1568 1569 mg->err = false; 1570 mg->discard = true; 1571 mg->writeback = false; 1572 mg->demote = false; 1573 mg->promote = false; 1574 mg->requeue_holder = false; 1575 mg->invalidate = false; 1576 mg->cache = cache; 1577 mg->old_ocell = NULL; 1578 mg->new_ocell = cell; 1579 mg->start_jiffies = jiffies; 1580 1581 quiesce_migration(mg); 1582 } 1583 1584 /*---------------------------------------------------------------- 1585 * bio processing 1586 *--------------------------------------------------------------*/ 1587 static void defer_bio(struct cache *cache, struct bio *bio) 1588 { 1589 unsigned long flags; 1590 1591 spin_lock_irqsave(&cache->lock, flags); 1592 bio_list_add(&cache->deferred_bios, bio); 1593 spin_unlock_irqrestore(&cache->lock, flags); 1594 1595 wake_worker(cache); 1596 } 1597 1598 static void process_flush_bio(struct cache *cache, struct bio *bio) 1599 { 1600 size_t pb_data_size = get_per_bio_data_size(cache); 1601 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); 1602 1603 BUG_ON(bio->bi_iter.bi_size); 1604 if (!pb->req_nr) 1605 remap_to_origin(cache, bio); 1606 else 1607 remap_to_cache(cache, bio, 0); 1608 1609 /* 1610 * REQ_FLUSH is not directed at any particular block so we don't 1611 * need to inc_ds(). REQ_FUA's are split into a write + REQ_FLUSH 1612 * by dm-core. 1613 */ 1614 issue(cache, bio); 1615 } 1616 1617 static void process_discard_bio(struct cache *cache, struct prealloc *structs, 1618 struct bio *bio) 1619 { 1620 int r; 1621 dm_dblock_t b, e; 1622 struct dm_bio_prison_cell *cell_prealloc, *new_ocell; 1623 1624 calc_discard_block_range(cache, bio, &b, &e); 1625 if (b == e) { 1626 bio_endio(bio); 1627 return; 1628 } 1629 1630 cell_prealloc = prealloc_get_cell(structs); 1631 r = bio_detain_range(cache, dblock_to_oblock(cache, b), dblock_to_oblock(cache, e), bio, cell_prealloc, 1632 (cell_free_fn) prealloc_put_cell, 1633 structs, &new_ocell); 1634 if (r > 0) 1635 return; 1636 1637 discard(cache, structs, new_ocell); 1638 } 1639 1640 static bool spare_migration_bandwidth(struct cache *cache) 1641 { 1642 sector_t current_volume = (atomic_read(&cache->nr_io_migrations) + 1) * 1643 cache->sectors_per_block; 1644 return current_volume < cache->migration_threshold; 1645 } 1646 1647 static void inc_hit_counter(struct cache *cache, struct bio *bio) 1648 { 1649 atomic_inc(bio_data_dir(bio) == READ ? 1650 &cache->stats.read_hit : &cache->stats.write_hit); 1651 } 1652 1653 static void inc_miss_counter(struct cache *cache, struct bio *bio) 1654 { 1655 atomic_inc(bio_data_dir(bio) == READ ? 1656 &cache->stats.read_miss : &cache->stats.write_miss); 1657 } 1658 1659 /*----------------------------------------------------------------*/ 1660 1661 struct inc_detail { 1662 struct cache *cache; 1663 struct bio_list bios_for_issue; 1664 struct bio_list unhandled_bios; 1665 bool any_writes; 1666 }; 1667 1668 static void inc_fn(void *context, struct dm_bio_prison_cell *cell) 1669 { 1670 struct bio *bio; 1671 struct inc_detail *detail = context; 1672 struct cache *cache = detail->cache; 1673 1674 inc_ds(cache, cell->holder, cell); 1675 if (bio_data_dir(cell->holder) == WRITE) 1676 detail->any_writes = true; 1677 1678 while ((bio = bio_list_pop(&cell->bios))) { 1679 if (discard_or_flush(bio)) { 1680 bio_list_add(&detail->unhandled_bios, bio); 1681 continue; 1682 } 1683 1684 if (bio_data_dir(bio) == WRITE) 1685 detail->any_writes = true; 1686 1687 bio_list_add(&detail->bios_for_issue, bio); 1688 inc_ds(cache, bio, cell); 1689 } 1690 } 1691 1692 // FIXME: refactor these two 1693 static void remap_cell_to_origin_clear_discard(struct cache *cache, 1694 struct dm_bio_prison_cell *cell, 1695 dm_oblock_t oblock, bool issue_holder) 1696 { 1697 struct bio *bio; 1698 unsigned long flags; 1699 struct inc_detail detail; 1700 1701 detail.cache = cache; 1702 bio_list_init(&detail.bios_for_issue); 1703 bio_list_init(&detail.unhandled_bios); 1704 detail.any_writes = false; 1705 1706 spin_lock_irqsave(&cache->lock, flags); 1707 dm_cell_visit_release(cache->prison, inc_fn, &detail, cell); 1708 bio_list_merge(&cache->deferred_bios, &detail.unhandled_bios); 1709 spin_unlock_irqrestore(&cache->lock, flags); 1710 1711 remap_to_origin(cache, cell->holder); 1712 if (issue_holder) 1713 issue(cache, cell->holder); 1714 else 1715 accounted_begin(cache, cell->holder); 1716 1717 if (detail.any_writes) 1718 clear_discard(cache, oblock_to_dblock(cache, oblock)); 1719 1720 while ((bio = bio_list_pop(&detail.bios_for_issue))) { 1721 remap_to_origin(cache, bio); 1722 issue(cache, bio); 1723 } 1724 1725 free_prison_cell(cache, cell); 1726 } 1727 1728 static void remap_cell_to_cache_dirty(struct cache *cache, struct dm_bio_prison_cell *cell, 1729 dm_oblock_t oblock, dm_cblock_t cblock, bool issue_holder) 1730 { 1731 struct bio *bio; 1732 unsigned long flags; 1733 struct inc_detail detail; 1734 1735 detail.cache = cache; 1736 bio_list_init(&detail.bios_for_issue); 1737 bio_list_init(&detail.unhandled_bios); 1738 detail.any_writes = false; 1739 1740 spin_lock_irqsave(&cache->lock, flags); 1741 dm_cell_visit_release(cache->prison, inc_fn, &detail, cell); 1742 bio_list_merge(&cache->deferred_bios, &detail.unhandled_bios); 1743 spin_unlock_irqrestore(&cache->lock, flags); 1744 1745 remap_to_cache(cache, cell->holder, cblock); 1746 if (issue_holder) 1747 issue(cache, cell->holder); 1748 else 1749 accounted_begin(cache, cell->holder); 1750 1751 if (detail.any_writes) { 1752 set_dirty(cache, oblock, cblock); 1753 clear_discard(cache, oblock_to_dblock(cache, oblock)); 1754 } 1755 1756 while ((bio = bio_list_pop(&detail.bios_for_issue))) { 1757 remap_to_cache(cache, bio, cblock); 1758 issue(cache, bio); 1759 } 1760 1761 free_prison_cell(cache, cell); 1762 } 1763 1764 /*----------------------------------------------------------------*/ 1765 1766 struct old_oblock_lock { 1767 struct policy_locker locker; 1768 struct cache *cache; 1769 struct prealloc *structs; 1770 struct dm_bio_prison_cell *cell; 1771 }; 1772 1773 static int null_locker(struct policy_locker *locker, dm_oblock_t b) 1774 { 1775 /* This should never be called */ 1776 BUG(); 1777 return 0; 1778 } 1779 1780 static int cell_locker(struct policy_locker *locker, dm_oblock_t b) 1781 { 1782 struct old_oblock_lock *l = container_of(locker, struct old_oblock_lock, locker); 1783 struct dm_bio_prison_cell *cell_prealloc = prealloc_get_cell(l->structs); 1784 1785 return bio_detain(l->cache, b, NULL, cell_prealloc, 1786 (cell_free_fn) prealloc_put_cell, 1787 l->structs, &l->cell); 1788 } 1789 1790 static void process_cell(struct cache *cache, struct prealloc *structs, 1791 struct dm_bio_prison_cell *new_ocell) 1792 { 1793 int r; 1794 bool release_cell = true; 1795 struct bio *bio = new_ocell->holder; 1796 dm_oblock_t block = get_bio_block(cache, bio); 1797 struct policy_result lookup_result; 1798 bool passthrough = passthrough_mode(&cache->features); 1799 bool fast_promotion, can_migrate; 1800 struct old_oblock_lock ool; 1801 1802 fast_promotion = is_discarded_oblock(cache, block) || bio_writes_complete_block(cache, bio); 1803 can_migrate = !passthrough && (fast_promotion || spare_migration_bandwidth(cache)); 1804 1805 ool.locker.fn = cell_locker; 1806 ool.cache = cache; 1807 ool.structs = structs; 1808 ool.cell = NULL; 1809 r = policy_map(cache->policy, block, true, can_migrate, fast_promotion, 1810 bio, &ool.locker, &lookup_result); 1811 1812 if (r == -EWOULDBLOCK) 1813 /* migration has been denied */ 1814 lookup_result.op = POLICY_MISS; 1815 1816 switch (lookup_result.op) { 1817 case POLICY_HIT: 1818 if (passthrough) { 1819 inc_miss_counter(cache, bio); 1820 1821 /* 1822 * Passthrough always maps to the origin, 1823 * invalidating any cache blocks that are written 1824 * to. 1825 */ 1826 1827 if (bio_data_dir(bio) == WRITE) { 1828 atomic_inc(&cache->stats.demotion); 1829 invalidate(cache, structs, block, lookup_result.cblock, new_ocell); 1830 release_cell = false; 1831 1832 } else { 1833 /* FIXME: factor out issue_origin() */ 1834 remap_to_origin_clear_discard(cache, bio, block); 1835 inc_and_issue(cache, bio, new_ocell); 1836 } 1837 } else { 1838 inc_hit_counter(cache, bio); 1839 1840 if (bio_data_dir(bio) == WRITE && 1841 writethrough_mode(&cache->features) && 1842 !is_dirty(cache, lookup_result.cblock)) { 1843 remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock); 1844 inc_and_issue(cache, bio, new_ocell); 1845 1846 } else { 1847 remap_cell_to_cache_dirty(cache, new_ocell, block, lookup_result.cblock, true); 1848 release_cell = false; 1849 } 1850 } 1851 1852 break; 1853 1854 case POLICY_MISS: 1855 inc_miss_counter(cache, bio); 1856 remap_cell_to_origin_clear_discard(cache, new_ocell, block, true); 1857 release_cell = false; 1858 break; 1859 1860 case POLICY_NEW: 1861 atomic_inc(&cache->stats.promotion); 1862 promote(cache, structs, block, lookup_result.cblock, new_ocell); 1863 release_cell = false; 1864 break; 1865 1866 case POLICY_REPLACE: 1867 atomic_inc(&cache->stats.demotion); 1868 atomic_inc(&cache->stats.promotion); 1869 demote_then_promote(cache, structs, lookup_result.old_oblock, 1870 block, lookup_result.cblock, 1871 ool.cell, new_ocell); 1872 release_cell = false; 1873 break; 1874 1875 default: 1876 DMERR_LIMIT("%s: %s: erroring bio, unknown policy op: %u", 1877 cache_device_name(cache), __func__, 1878 (unsigned) lookup_result.op); 1879 bio_io_error(bio); 1880 } 1881 1882 if (release_cell) 1883 cell_defer(cache, new_ocell, false); 1884 } 1885 1886 static void process_bio(struct cache *cache, struct prealloc *structs, 1887 struct bio *bio) 1888 { 1889 int r; 1890 dm_oblock_t block = get_bio_block(cache, bio); 1891 struct dm_bio_prison_cell *cell_prealloc, *new_ocell; 1892 1893 /* 1894 * Check to see if that block is currently migrating. 1895 */ 1896 cell_prealloc = prealloc_get_cell(structs); 1897 r = bio_detain(cache, block, bio, cell_prealloc, 1898 (cell_free_fn) prealloc_put_cell, 1899 structs, &new_ocell); 1900 if (r > 0) 1901 return; 1902 1903 process_cell(cache, structs, new_ocell); 1904 } 1905 1906 static int need_commit_due_to_time(struct cache *cache) 1907 { 1908 return jiffies < cache->last_commit_jiffies || 1909 jiffies > cache->last_commit_jiffies + COMMIT_PERIOD; 1910 } 1911 1912 /* 1913 * A non-zero return indicates read_only or fail_io mode. 1914 */ 1915 static int commit(struct cache *cache, bool clean_shutdown) 1916 { 1917 int r; 1918 1919 if (get_cache_mode(cache) >= CM_READ_ONLY) 1920 return -EINVAL; 1921 1922 atomic_inc(&cache->stats.commit_count); 1923 r = dm_cache_commit(cache->cmd, clean_shutdown); 1924 if (r) 1925 metadata_operation_failed(cache, "dm_cache_commit", r); 1926 1927 return r; 1928 } 1929 1930 static int commit_if_needed(struct cache *cache) 1931 { 1932 int r = 0; 1933 1934 if ((cache->commit_requested || need_commit_due_to_time(cache)) && 1935 dm_cache_changed_this_transaction(cache->cmd)) { 1936 r = commit(cache, false); 1937 cache->commit_requested = false; 1938 cache->last_commit_jiffies = jiffies; 1939 } 1940 1941 return r; 1942 } 1943 1944 static void process_deferred_bios(struct cache *cache) 1945 { 1946 bool prealloc_used = false; 1947 unsigned long flags; 1948 struct bio_list bios; 1949 struct bio *bio; 1950 struct prealloc structs; 1951 1952 memset(&structs, 0, sizeof(structs)); 1953 bio_list_init(&bios); 1954 1955 spin_lock_irqsave(&cache->lock, flags); 1956 bio_list_merge(&bios, &cache->deferred_bios); 1957 bio_list_init(&cache->deferred_bios); 1958 spin_unlock_irqrestore(&cache->lock, flags); 1959 1960 while (!bio_list_empty(&bios)) { 1961 /* 1962 * If we've got no free migration structs, and processing 1963 * this bio might require one, we pause until there are some 1964 * prepared mappings to process. 1965 */ 1966 prealloc_used = true; 1967 if (prealloc_data_structs(cache, &structs)) { 1968 spin_lock_irqsave(&cache->lock, flags); 1969 bio_list_merge(&cache->deferred_bios, &bios); 1970 spin_unlock_irqrestore(&cache->lock, flags); 1971 break; 1972 } 1973 1974 bio = bio_list_pop(&bios); 1975 1976 if (bio->bi_rw & REQ_FLUSH) 1977 process_flush_bio(cache, bio); 1978 else if (bio->bi_rw & REQ_DISCARD) 1979 process_discard_bio(cache, &structs, bio); 1980 else 1981 process_bio(cache, &structs, bio); 1982 } 1983 1984 if (prealloc_used) 1985 prealloc_free_structs(cache, &structs); 1986 } 1987 1988 static void process_deferred_cells(struct cache *cache) 1989 { 1990 bool prealloc_used = false; 1991 unsigned long flags; 1992 struct dm_bio_prison_cell *cell, *tmp; 1993 struct list_head cells; 1994 struct prealloc structs; 1995 1996 memset(&structs, 0, sizeof(structs)); 1997 1998 INIT_LIST_HEAD(&cells); 1999 2000 spin_lock_irqsave(&cache->lock, flags); 2001 list_splice_init(&cache->deferred_cells, &cells); 2002 spin_unlock_irqrestore(&cache->lock, flags); 2003 2004 list_for_each_entry_safe(cell, tmp, &cells, user_list) { 2005 /* 2006 * If we've got no free migration structs, and processing 2007 * this bio might require one, we pause until there are some 2008 * prepared mappings to process. 2009 */ 2010 prealloc_used = true; 2011 if (prealloc_data_structs(cache, &structs)) { 2012 spin_lock_irqsave(&cache->lock, flags); 2013 list_splice(&cells, &cache->deferred_cells); 2014 spin_unlock_irqrestore(&cache->lock, flags); 2015 break; 2016 } 2017 2018 process_cell(cache, &structs, cell); 2019 } 2020 2021 if (prealloc_used) 2022 prealloc_free_structs(cache, &structs); 2023 } 2024 2025 static void process_deferred_flush_bios(struct cache *cache, bool submit_bios) 2026 { 2027 unsigned long flags; 2028 struct bio_list bios; 2029 struct bio *bio; 2030 2031 bio_list_init(&bios); 2032 2033 spin_lock_irqsave(&cache->lock, flags); 2034 bio_list_merge(&bios, &cache->deferred_flush_bios); 2035 bio_list_init(&cache->deferred_flush_bios); 2036 spin_unlock_irqrestore(&cache->lock, flags); 2037 2038 /* 2039 * These bios have already been through inc_ds() 2040 */ 2041 while ((bio = bio_list_pop(&bios))) 2042 submit_bios ? accounted_request(cache, bio) : bio_io_error(bio); 2043 } 2044 2045 static void process_deferred_writethrough_bios(struct cache *cache) 2046 { 2047 unsigned long flags; 2048 struct bio_list bios; 2049 struct bio *bio; 2050 2051 bio_list_init(&bios); 2052 2053 spin_lock_irqsave(&cache->lock, flags); 2054 bio_list_merge(&bios, &cache->deferred_writethrough_bios); 2055 bio_list_init(&cache->deferred_writethrough_bios); 2056 spin_unlock_irqrestore(&cache->lock, flags); 2057 2058 /* 2059 * These bios have already been through inc_ds() 2060 */ 2061 while ((bio = bio_list_pop(&bios))) 2062 accounted_request(cache, bio); 2063 } 2064 2065 static void writeback_some_dirty_blocks(struct cache *cache) 2066 { 2067 bool prealloc_used = false; 2068 dm_oblock_t oblock; 2069 dm_cblock_t cblock; 2070 struct prealloc structs; 2071 struct dm_bio_prison_cell *old_ocell; 2072 bool busy = !iot_idle_for(&cache->origin_tracker, HZ); 2073 2074 memset(&structs, 0, sizeof(structs)); 2075 2076 while (spare_migration_bandwidth(cache)) { 2077 if (policy_writeback_work(cache->policy, &oblock, &cblock, busy)) 2078 break; /* no work to do */ 2079 2080 prealloc_used = true; 2081 if (prealloc_data_structs(cache, &structs) || 2082 get_cell(cache, oblock, &structs, &old_ocell)) { 2083 policy_set_dirty(cache->policy, oblock); 2084 break; 2085 } 2086 2087 writeback(cache, &structs, oblock, cblock, old_ocell); 2088 } 2089 2090 if (prealloc_used) 2091 prealloc_free_structs(cache, &structs); 2092 } 2093 2094 /*---------------------------------------------------------------- 2095 * Invalidations. 2096 * Dropping something from the cache *without* writing back. 2097 *--------------------------------------------------------------*/ 2098 2099 static void process_invalidation_request(struct cache *cache, struct invalidation_request *req) 2100 { 2101 int r = 0; 2102 uint64_t begin = from_cblock(req->cblocks->begin); 2103 uint64_t end = from_cblock(req->cblocks->end); 2104 2105 while (begin != end) { 2106 r = policy_remove_cblock(cache->policy, to_cblock(begin)); 2107 if (!r) { 2108 r = dm_cache_remove_mapping(cache->cmd, to_cblock(begin)); 2109 if (r) { 2110 metadata_operation_failed(cache, "dm_cache_remove_mapping", r); 2111 break; 2112 } 2113 2114 } else if (r == -ENODATA) { 2115 /* harmless, already unmapped */ 2116 r = 0; 2117 2118 } else { 2119 DMERR("%s: policy_remove_cblock failed", cache_device_name(cache)); 2120 break; 2121 } 2122 2123 begin++; 2124 } 2125 2126 cache->commit_requested = true; 2127 2128 req->err = r; 2129 atomic_set(&req->complete, 1); 2130 2131 wake_up(&req->result_wait); 2132 } 2133 2134 static void process_invalidation_requests(struct cache *cache) 2135 { 2136 struct list_head list; 2137 struct invalidation_request *req, *tmp; 2138 2139 INIT_LIST_HEAD(&list); 2140 spin_lock(&cache->invalidation_lock); 2141 list_splice_init(&cache->invalidation_requests, &list); 2142 spin_unlock(&cache->invalidation_lock); 2143 2144 list_for_each_entry_safe (req, tmp, &list, list) 2145 process_invalidation_request(cache, req); 2146 } 2147 2148 /*---------------------------------------------------------------- 2149 * Main worker loop 2150 *--------------------------------------------------------------*/ 2151 static bool is_quiescing(struct cache *cache) 2152 { 2153 return atomic_read(&cache->quiescing); 2154 } 2155 2156 static void ack_quiescing(struct cache *cache) 2157 { 2158 if (is_quiescing(cache)) { 2159 atomic_inc(&cache->quiescing_ack); 2160 wake_up(&cache->quiescing_wait); 2161 } 2162 } 2163 2164 static void wait_for_quiescing_ack(struct cache *cache) 2165 { 2166 wait_event(cache->quiescing_wait, atomic_read(&cache->quiescing_ack)); 2167 } 2168 2169 static void start_quiescing(struct cache *cache) 2170 { 2171 atomic_inc(&cache->quiescing); 2172 wait_for_quiescing_ack(cache); 2173 } 2174 2175 static void stop_quiescing(struct cache *cache) 2176 { 2177 atomic_set(&cache->quiescing, 0); 2178 atomic_set(&cache->quiescing_ack, 0); 2179 } 2180 2181 static void wait_for_migrations(struct cache *cache) 2182 { 2183 wait_event(cache->migration_wait, !atomic_read(&cache->nr_allocated_migrations)); 2184 } 2185 2186 static void stop_worker(struct cache *cache) 2187 { 2188 cancel_delayed_work(&cache->waker); 2189 flush_workqueue(cache->wq); 2190 } 2191 2192 static void requeue_deferred_cells(struct cache *cache) 2193 { 2194 unsigned long flags; 2195 struct list_head cells; 2196 struct dm_bio_prison_cell *cell, *tmp; 2197 2198 INIT_LIST_HEAD(&cells); 2199 spin_lock_irqsave(&cache->lock, flags); 2200 list_splice_init(&cache->deferred_cells, &cells); 2201 spin_unlock_irqrestore(&cache->lock, flags); 2202 2203 list_for_each_entry_safe(cell, tmp, &cells, user_list) 2204 cell_requeue(cache, cell); 2205 } 2206 2207 static void requeue_deferred_bios(struct cache *cache) 2208 { 2209 struct bio *bio; 2210 struct bio_list bios; 2211 2212 bio_list_init(&bios); 2213 bio_list_merge(&bios, &cache->deferred_bios); 2214 bio_list_init(&cache->deferred_bios); 2215 2216 while ((bio = bio_list_pop(&bios))) { 2217 bio->bi_error = DM_ENDIO_REQUEUE; 2218 bio_endio(bio); 2219 } 2220 } 2221 2222 static int more_work(struct cache *cache) 2223 { 2224 if (is_quiescing(cache)) 2225 return !list_empty(&cache->quiesced_migrations) || 2226 !list_empty(&cache->completed_migrations) || 2227 !list_empty(&cache->need_commit_migrations); 2228 else 2229 return !bio_list_empty(&cache->deferred_bios) || 2230 !list_empty(&cache->deferred_cells) || 2231 !bio_list_empty(&cache->deferred_flush_bios) || 2232 !bio_list_empty(&cache->deferred_writethrough_bios) || 2233 !list_empty(&cache->quiesced_migrations) || 2234 !list_empty(&cache->completed_migrations) || 2235 !list_empty(&cache->need_commit_migrations) || 2236 cache->invalidate; 2237 } 2238 2239 static void do_worker(struct work_struct *ws) 2240 { 2241 struct cache *cache = container_of(ws, struct cache, worker); 2242 2243 do { 2244 if (!is_quiescing(cache)) { 2245 writeback_some_dirty_blocks(cache); 2246 process_deferred_writethrough_bios(cache); 2247 process_deferred_bios(cache); 2248 process_deferred_cells(cache); 2249 process_invalidation_requests(cache); 2250 } 2251 2252 process_migrations(cache, &cache->quiesced_migrations, issue_copy_or_discard); 2253 process_migrations(cache, &cache->completed_migrations, complete_migration); 2254 2255 if (commit_if_needed(cache)) { 2256 process_deferred_flush_bios(cache, false); 2257 process_migrations(cache, &cache->need_commit_migrations, migration_failure); 2258 } else { 2259 process_deferred_flush_bios(cache, true); 2260 process_migrations(cache, &cache->need_commit_migrations, 2261 migration_success_post_commit); 2262 } 2263 2264 ack_quiescing(cache); 2265 2266 } while (more_work(cache)); 2267 } 2268 2269 /* 2270 * We want to commit periodically so that not too much 2271 * unwritten metadata builds up. 2272 */ 2273 static void do_waker(struct work_struct *ws) 2274 { 2275 struct cache *cache = container_of(to_delayed_work(ws), struct cache, waker); 2276 policy_tick(cache->policy, true); 2277 wake_worker(cache); 2278 queue_delayed_work(cache->wq, &cache->waker, COMMIT_PERIOD); 2279 } 2280 2281 /*----------------------------------------------------------------*/ 2282 2283 static int is_congested(struct dm_dev *dev, int bdi_bits) 2284 { 2285 struct request_queue *q = bdev_get_queue(dev->bdev); 2286 return bdi_congested(&q->backing_dev_info, bdi_bits); 2287 } 2288 2289 static int cache_is_congested(struct dm_target_callbacks *cb, int bdi_bits) 2290 { 2291 struct cache *cache = container_of(cb, struct cache, callbacks); 2292 2293 return is_congested(cache->origin_dev, bdi_bits) || 2294 is_congested(cache->cache_dev, bdi_bits); 2295 } 2296 2297 /*---------------------------------------------------------------- 2298 * Target methods 2299 *--------------------------------------------------------------*/ 2300 2301 /* 2302 * This function gets called on the error paths of the constructor, so we 2303 * have to cope with a partially initialised struct. 2304 */ 2305 static void destroy(struct cache *cache) 2306 { 2307 unsigned i; 2308 2309 mempool_destroy(cache->migration_pool); 2310 2311 if (cache->all_io_ds) 2312 dm_deferred_set_destroy(cache->all_io_ds); 2313 2314 if (cache->prison) 2315 dm_bio_prison_destroy(cache->prison); 2316 2317 if (cache->wq) 2318 destroy_workqueue(cache->wq); 2319 2320 if (cache->dirty_bitset) 2321 free_bitset(cache->dirty_bitset); 2322 2323 if (cache->discard_bitset) 2324 free_bitset(cache->discard_bitset); 2325 2326 if (cache->copier) 2327 dm_kcopyd_client_destroy(cache->copier); 2328 2329 if (cache->cmd) 2330 dm_cache_metadata_close(cache->cmd); 2331 2332 if (cache->metadata_dev) 2333 dm_put_device(cache->ti, cache->metadata_dev); 2334 2335 if (cache->origin_dev) 2336 dm_put_device(cache->ti, cache->origin_dev); 2337 2338 if (cache->cache_dev) 2339 dm_put_device(cache->ti, cache->cache_dev); 2340 2341 if (cache->policy) 2342 dm_cache_policy_destroy(cache->policy); 2343 2344 for (i = 0; i < cache->nr_ctr_args ; i++) 2345 kfree(cache->ctr_args[i]); 2346 kfree(cache->ctr_args); 2347 2348 kfree(cache); 2349 } 2350 2351 static void cache_dtr(struct dm_target *ti) 2352 { 2353 struct cache *cache = ti->private; 2354 2355 destroy(cache); 2356 } 2357 2358 static sector_t get_dev_size(struct dm_dev *dev) 2359 { 2360 return i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT; 2361 } 2362 2363 /*----------------------------------------------------------------*/ 2364 2365 /* 2366 * Construct a cache device mapping. 2367 * 2368 * cache <metadata dev> <cache dev> <origin dev> <block size> 2369 * <#feature args> [<feature arg>]* 2370 * <policy> <#policy args> [<policy arg>]* 2371 * 2372 * metadata dev : fast device holding the persistent metadata 2373 * cache dev : fast device holding cached data blocks 2374 * origin dev : slow device holding original data blocks 2375 * block size : cache unit size in sectors 2376 * 2377 * #feature args : number of feature arguments passed 2378 * feature args : writethrough. (The default is writeback.) 2379 * 2380 * policy : the replacement policy to use 2381 * #policy args : an even number of policy arguments corresponding 2382 * to key/value pairs passed to the policy 2383 * policy args : key/value pairs passed to the policy 2384 * E.g. 'sequential_threshold 1024' 2385 * See cache-policies.txt for details. 2386 * 2387 * Optional feature arguments are: 2388 * writethrough : write through caching that prohibits cache block 2389 * content from being different from origin block content. 2390 * Without this argument, the default behaviour is to write 2391 * back cache block contents later for performance reasons, 2392 * so they may differ from the corresponding origin blocks. 2393 */ 2394 struct cache_args { 2395 struct dm_target *ti; 2396 2397 struct dm_dev *metadata_dev; 2398 2399 struct dm_dev *cache_dev; 2400 sector_t cache_sectors; 2401 2402 struct dm_dev *origin_dev; 2403 sector_t origin_sectors; 2404 2405 uint32_t block_size; 2406 2407 const char *policy_name; 2408 int policy_argc; 2409 const char **policy_argv; 2410 2411 struct cache_features features; 2412 }; 2413 2414 static void destroy_cache_args(struct cache_args *ca) 2415 { 2416 if (ca->metadata_dev) 2417 dm_put_device(ca->ti, ca->metadata_dev); 2418 2419 if (ca->cache_dev) 2420 dm_put_device(ca->ti, ca->cache_dev); 2421 2422 if (ca->origin_dev) 2423 dm_put_device(ca->ti, ca->origin_dev); 2424 2425 kfree(ca); 2426 } 2427 2428 static bool at_least_one_arg(struct dm_arg_set *as, char **error) 2429 { 2430 if (!as->argc) { 2431 *error = "Insufficient args"; 2432 return false; 2433 } 2434 2435 return true; 2436 } 2437 2438 static int parse_metadata_dev(struct cache_args *ca, struct dm_arg_set *as, 2439 char **error) 2440 { 2441 int r; 2442 sector_t metadata_dev_size; 2443 char b[BDEVNAME_SIZE]; 2444 2445 if (!at_least_one_arg(as, error)) 2446 return -EINVAL; 2447 2448 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE, 2449 &ca->metadata_dev); 2450 if (r) { 2451 *error = "Error opening metadata device"; 2452 return r; 2453 } 2454 2455 metadata_dev_size = get_dev_size(ca->metadata_dev); 2456 if (metadata_dev_size > DM_CACHE_METADATA_MAX_SECTORS_WARNING) 2457 DMWARN("Metadata device %s is larger than %u sectors: excess space will not be used.", 2458 bdevname(ca->metadata_dev->bdev, b), THIN_METADATA_MAX_SECTORS); 2459 2460 return 0; 2461 } 2462 2463 static int parse_cache_dev(struct cache_args *ca, struct dm_arg_set *as, 2464 char **error) 2465 { 2466 int r; 2467 2468 if (!at_least_one_arg(as, error)) 2469 return -EINVAL; 2470 2471 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE, 2472 &ca->cache_dev); 2473 if (r) { 2474 *error = "Error opening cache device"; 2475 return r; 2476 } 2477 ca->cache_sectors = get_dev_size(ca->cache_dev); 2478 2479 return 0; 2480 } 2481 2482 static int parse_origin_dev(struct cache_args *ca, struct dm_arg_set *as, 2483 char **error) 2484 { 2485 int r; 2486 2487 if (!at_least_one_arg(as, error)) 2488 return -EINVAL; 2489 2490 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE, 2491 &ca->origin_dev); 2492 if (r) { 2493 *error = "Error opening origin device"; 2494 return r; 2495 } 2496 2497 ca->origin_sectors = get_dev_size(ca->origin_dev); 2498 if (ca->ti->len > ca->origin_sectors) { 2499 *error = "Device size larger than cached device"; 2500 return -EINVAL; 2501 } 2502 2503 return 0; 2504 } 2505 2506 static int parse_block_size(struct cache_args *ca, struct dm_arg_set *as, 2507 char **error) 2508 { 2509 unsigned long block_size; 2510 2511 if (!at_least_one_arg(as, error)) 2512 return -EINVAL; 2513 2514 if (kstrtoul(dm_shift_arg(as), 10, &block_size) || !block_size || 2515 block_size < DATA_DEV_BLOCK_SIZE_MIN_SECTORS || 2516 block_size > DATA_DEV_BLOCK_SIZE_MAX_SECTORS || 2517 block_size & (DATA_DEV_BLOCK_SIZE_MIN_SECTORS - 1)) { 2518 *error = "Invalid data block size"; 2519 return -EINVAL; 2520 } 2521 2522 if (block_size > ca->cache_sectors) { 2523 *error = "Data block size is larger than the cache device"; 2524 return -EINVAL; 2525 } 2526 2527 ca->block_size = block_size; 2528 2529 return 0; 2530 } 2531 2532 static void init_features(struct cache_features *cf) 2533 { 2534 cf->mode = CM_WRITE; 2535 cf->io_mode = CM_IO_WRITEBACK; 2536 } 2537 2538 static int parse_features(struct cache_args *ca, struct dm_arg_set *as, 2539 char **error) 2540 { 2541 static struct dm_arg _args[] = { 2542 {0, 1, "Invalid number of cache feature arguments"}, 2543 }; 2544 2545 int r; 2546 unsigned argc; 2547 const char *arg; 2548 struct cache_features *cf = &ca->features; 2549 2550 init_features(cf); 2551 2552 r = dm_read_arg_group(_args, as, &argc, error); 2553 if (r) 2554 return -EINVAL; 2555 2556 while (argc--) { 2557 arg = dm_shift_arg(as); 2558 2559 if (!strcasecmp(arg, "writeback")) 2560 cf->io_mode = CM_IO_WRITEBACK; 2561 2562 else if (!strcasecmp(arg, "writethrough")) 2563 cf->io_mode = CM_IO_WRITETHROUGH; 2564 2565 else if (!strcasecmp(arg, "passthrough")) 2566 cf->io_mode = CM_IO_PASSTHROUGH; 2567 2568 else { 2569 *error = "Unrecognised cache feature requested"; 2570 return -EINVAL; 2571 } 2572 } 2573 2574 return 0; 2575 } 2576 2577 static int parse_policy(struct cache_args *ca, struct dm_arg_set *as, 2578 char **error) 2579 { 2580 static struct dm_arg _args[] = { 2581 {0, 1024, "Invalid number of policy arguments"}, 2582 }; 2583 2584 int r; 2585 2586 if (!at_least_one_arg(as, error)) 2587 return -EINVAL; 2588 2589 ca->policy_name = dm_shift_arg(as); 2590 2591 r = dm_read_arg_group(_args, as, &ca->policy_argc, error); 2592 if (r) 2593 return -EINVAL; 2594 2595 ca->policy_argv = (const char **)as->argv; 2596 dm_consume_args(as, ca->policy_argc); 2597 2598 return 0; 2599 } 2600 2601 static int parse_cache_args(struct cache_args *ca, int argc, char **argv, 2602 char **error) 2603 { 2604 int r; 2605 struct dm_arg_set as; 2606 2607 as.argc = argc; 2608 as.argv = argv; 2609 2610 r = parse_metadata_dev(ca, &as, error); 2611 if (r) 2612 return r; 2613 2614 r = parse_cache_dev(ca, &as, error); 2615 if (r) 2616 return r; 2617 2618 r = parse_origin_dev(ca, &as, error); 2619 if (r) 2620 return r; 2621 2622 r = parse_block_size(ca, &as, error); 2623 if (r) 2624 return r; 2625 2626 r = parse_features(ca, &as, error); 2627 if (r) 2628 return r; 2629 2630 r = parse_policy(ca, &as, error); 2631 if (r) 2632 return r; 2633 2634 return 0; 2635 } 2636 2637 /*----------------------------------------------------------------*/ 2638 2639 static struct kmem_cache *migration_cache; 2640 2641 #define NOT_CORE_OPTION 1 2642 2643 static int process_config_option(struct cache *cache, const char *key, const char *value) 2644 { 2645 unsigned long tmp; 2646 2647 if (!strcasecmp(key, "migration_threshold")) { 2648 if (kstrtoul(value, 10, &tmp)) 2649 return -EINVAL; 2650 2651 cache->migration_threshold = tmp; 2652 return 0; 2653 } 2654 2655 return NOT_CORE_OPTION; 2656 } 2657 2658 static int set_config_value(struct cache *cache, const char *key, const char *value) 2659 { 2660 int r = process_config_option(cache, key, value); 2661 2662 if (r == NOT_CORE_OPTION) 2663 r = policy_set_config_value(cache->policy, key, value); 2664 2665 if (r) 2666 DMWARN("bad config value for %s: %s", key, value); 2667 2668 return r; 2669 } 2670 2671 static int set_config_values(struct cache *cache, int argc, const char **argv) 2672 { 2673 int r = 0; 2674 2675 if (argc & 1) { 2676 DMWARN("Odd number of policy arguments given but they should be <key> <value> pairs."); 2677 return -EINVAL; 2678 } 2679 2680 while (argc) { 2681 r = set_config_value(cache, argv[0], argv[1]); 2682 if (r) 2683 break; 2684 2685 argc -= 2; 2686 argv += 2; 2687 } 2688 2689 return r; 2690 } 2691 2692 static int create_cache_policy(struct cache *cache, struct cache_args *ca, 2693 char **error) 2694 { 2695 struct dm_cache_policy *p = dm_cache_policy_create(ca->policy_name, 2696 cache->cache_size, 2697 cache->origin_sectors, 2698 cache->sectors_per_block); 2699 if (IS_ERR(p)) { 2700 *error = "Error creating cache's policy"; 2701 return PTR_ERR(p); 2702 } 2703 cache->policy = p; 2704 2705 return 0; 2706 } 2707 2708 /* 2709 * We want the discard block size to be at least the size of the cache 2710 * block size and have no more than 2^14 discard blocks across the origin. 2711 */ 2712 #define MAX_DISCARD_BLOCKS (1 << 14) 2713 2714 static bool too_many_discard_blocks(sector_t discard_block_size, 2715 sector_t origin_size) 2716 { 2717 (void) sector_div(origin_size, discard_block_size); 2718 2719 return origin_size > MAX_DISCARD_BLOCKS; 2720 } 2721 2722 static sector_t calculate_discard_block_size(sector_t cache_block_size, 2723 sector_t origin_size) 2724 { 2725 sector_t discard_block_size = cache_block_size; 2726 2727 if (origin_size) 2728 while (too_many_discard_blocks(discard_block_size, origin_size)) 2729 discard_block_size *= 2; 2730 2731 return discard_block_size; 2732 } 2733 2734 static void set_cache_size(struct cache *cache, dm_cblock_t size) 2735 { 2736 dm_block_t nr_blocks = from_cblock(size); 2737 2738 if (nr_blocks > (1 << 20) && cache->cache_size != size) 2739 DMWARN_LIMIT("You have created a cache device with a lot of individual cache blocks (%llu)\n" 2740 "All these mappings can consume a lot of kernel memory, and take some time to read/write.\n" 2741 "Please consider increasing the cache block size to reduce the overall cache block count.", 2742 (unsigned long long) nr_blocks); 2743 2744 cache->cache_size = size; 2745 } 2746 2747 #define DEFAULT_MIGRATION_THRESHOLD 2048 2748 2749 static int cache_create(struct cache_args *ca, struct cache **result) 2750 { 2751 int r = 0; 2752 char **error = &ca->ti->error; 2753 struct cache *cache; 2754 struct dm_target *ti = ca->ti; 2755 dm_block_t origin_blocks; 2756 struct dm_cache_metadata *cmd; 2757 bool may_format = ca->features.mode == CM_WRITE; 2758 2759 cache = kzalloc(sizeof(*cache), GFP_KERNEL); 2760 if (!cache) 2761 return -ENOMEM; 2762 2763 cache->ti = ca->ti; 2764 ti->private = cache; 2765 ti->num_flush_bios = 2; 2766 ti->flush_supported = true; 2767 2768 ti->num_discard_bios = 1; 2769 ti->discards_supported = true; 2770 ti->discard_zeroes_data_unsupported = true; 2771 ti->split_discard_bios = false; 2772 2773 cache->features = ca->features; 2774 ti->per_bio_data_size = get_per_bio_data_size(cache); 2775 2776 cache->callbacks.congested_fn = cache_is_congested; 2777 dm_table_add_target_callbacks(ti->table, &cache->callbacks); 2778 2779 cache->metadata_dev = ca->metadata_dev; 2780 cache->origin_dev = ca->origin_dev; 2781 cache->cache_dev = ca->cache_dev; 2782 2783 ca->metadata_dev = ca->origin_dev = ca->cache_dev = NULL; 2784 2785 /* FIXME: factor out this whole section */ 2786 origin_blocks = cache->origin_sectors = ca->origin_sectors; 2787 origin_blocks = block_div(origin_blocks, ca->block_size); 2788 cache->origin_blocks = to_oblock(origin_blocks); 2789 2790 cache->sectors_per_block = ca->block_size; 2791 if (dm_set_target_max_io_len(ti, cache->sectors_per_block)) { 2792 r = -EINVAL; 2793 goto bad; 2794 } 2795 2796 if (ca->block_size & (ca->block_size - 1)) { 2797 dm_block_t cache_size = ca->cache_sectors; 2798 2799 cache->sectors_per_block_shift = -1; 2800 cache_size = block_div(cache_size, ca->block_size); 2801 set_cache_size(cache, to_cblock(cache_size)); 2802 } else { 2803 cache->sectors_per_block_shift = __ffs(ca->block_size); 2804 set_cache_size(cache, to_cblock(ca->cache_sectors >> cache->sectors_per_block_shift)); 2805 } 2806 2807 r = create_cache_policy(cache, ca, error); 2808 if (r) 2809 goto bad; 2810 2811 cache->policy_nr_args = ca->policy_argc; 2812 cache->migration_threshold = DEFAULT_MIGRATION_THRESHOLD; 2813 2814 r = set_config_values(cache, ca->policy_argc, ca->policy_argv); 2815 if (r) { 2816 *error = "Error setting cache policy's config values"; 2817 goto bad; 2818 } 2819 2820 cmd = dm_cache_metadata_open(cache->metadata_dev->bdev, 2821 ca->block_size, may_format, 2822 dm_cache_policy_get_hint_size(cache->policy)); 2823 if (IS_ERR(cmd)) { 2824 *error = "Error creating metadata object"; 2825 r = PTR_ERR(cmd); 2826 goto bad; 2827 } 2828 cache->cmd = cmd; 2829 set_cache_mode(cache, CM_WRITE); 2830 if (get_cache_mode(cache) != CM_WRITE) { 2831 *error = "Unable to get write access to metadata, please check/repair metadata."; 2832 r = -EINVAL; 2833 goto bad; 2834 } 2835 2836 if (passthrough_mode(&cache->features)) { 2837 bool all_clean; 2838 2839 r = dm_cache_metadata_all_clean(cache->cmd, &all_clean); 2840 if (r) { 2841 *error = "dm_cache_metadata_all_clean() failed"; 2842 goto bad; 2843 } 2844 2845 if (!all_clean) { 2846 *error = "Cannot enter passthrough mode unless all blocks are clean"; 2847 r = -EINVAL; 2848 goto bad; 2849 } 2850 } 2851 2852 spin_lock_init(&cache->lock); 2853 INIT_LIST_HEAD(&cache->deferred_cells); 2854 bio_list_init(&cache->deferred_bios); 2855 bio_list_init(&cache->deferred_flush_bios); 2856 bio_list_init(&cache->deferred_writethrough_bios); 2857 INIT_LIST_HEAD(&cache->quiesced_migrations); 2858 INIT_LIST_HEAD(&cache->completed_migrations); 2859 INIT_LIST_HEAD(&cache->need_commit_migrations); 2860 atomic_set(&cache->nr_allocated_migrations, 0); 2861 atomic_set(&cache->nr_io_migrations, 0); 2862 init_waitqueue_head(&cache->migration_wait); 2863 2864 init_waitqueue_head(&cache->quiescing_wait); 2865 atomic_set(&cache->quiescing, 0); 2866 atomic_set(&cache->quiescing_ack, 0); 2867 2868 r = -ENOMEM; 2869 atomic_set(&cache->nr_dirty, 0); 2870 cache->dirty_bitset = alloc_bitset(from_cblock(cache->cache_size)); 2871 if (!cache->dirty_bitset) { 2872 *error = "could not allocate dirty bitset"; 2873 goto bad; 2874 } 2875 clear_bitset(cache->dirty_bitset, from_cblock(cache->cache_size)); 2876 2877 cache->discard_block_size = 2878 calculate_discard_block_size(cache->sectors_per_block, 2879 cache->origin_sectors); 2880 cache->discard_nr_blocks = to_dblock(dm_sector_div_up(cache->origin_sectors, 2881 cache->discard_block_size)); 2882 cache->discard_bitset = alloc_bitset(from_dblock(cache->discard_nr_blocks)); 2883 if (!cache->discard_bitset) { 2884 *error = "could not allocate discard bitset"; 2885 goto bad; 2886 } 2887 clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks)); 2888 2889 cache->copier = dm_kcopyd_client_create(&dm_kcopyd_throttle); 2890 if (IS_ERR(cache->copier)) { 2891 *error = "could not create kcopyd client"; 2892 r = PTR_ERR(cache->copier); 2893 goto bad; 2894 } 2895 2896 cache->wq = alloc_ordered_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM); 2897 if (!cache->wq) { 2898 *error = "could not create workqueue for metadata object"; 2899 goto bad; 2900 } 2901 INIT_WORK(&cache->worker, do_worker); 2902 INIT_DELAYED_WORK(&cache->waker, do_waker); 2903 cache->last_commit_jiffies = jiffies; 2904 2905 cache->prison = dm_bio_prison_create(); 2906 if (!cache->prison) { 2907 *error = "could not create bio prison"; 2908 goto bad; 2909 } 2910 2911 cache->all_io_ds = dm_deferred_set_create(); 2912 if (!cache->all_io_ds) { 2913 *error = "could not create all_io deferred set"; 2914 goto bad; 2915 } 2916 2917 cache->migration_pool = mempool_create_slab_pool(MIGRATION_POOL_SIZE, 2918 migration_cache); 2919 if (!cache->migration_pool) { 2920 *error = "Error creating cache's migration mempool"; 2921 goto bad; 2922 } 2923 2924 cache->need_tick_bio = true; 2925 cache->sized = false; 2926 cache->invalidate = false; 2927 cache->commit_requested = false; 2928 cache->loaded_mappings = false; 2929 cache->loaded_discards = false; 2930 2931 load_stats(cache); 2932 2933 atomic_set(&cache->stats.demotion, 0); 2934 atomic_set(&cache->stats.promotion, 0); 2935 atomic_set(&cache->stats.copies_avoided, 0); 2936 atomic_set(&cache->stats.cache_cell_clash, 0); 2937 atomic_set(&cache->stats.commit_count, 0); 2938 atomic_set(&cache->stats.discard_count, 0); 2939 2940 spin_lock_init(&cache->invalidation_lock); 2941 INIT_LIST_HEAD(&cache->invalidation_requests); 2942 2943 iot_init(&cache->origin_tracker); 2944 2945 *result = cache; 2946 return 0; 2947 2948 bad: 2949 destroy(cache); 2950 return r; 2951 } 2952 2953 static int copy_ctr_args(struct cache *cache, int argc, const char **argv) 2954 { 2955 unsigned i; 2956 const char **copy; 2957 2958 copy = kcalloc(argc, sizeof(*copy), GFP_KERNEL); 2959 if (!copy) 2960 return -ENOMEM; 2961 for (i = 0; i < argc; i++) { 2962 copy[i] = kstrdup(argv[i], GFP_KERNEL); 2963 if (!copy[i]) { 2964 while (i--) 2965 kfree(copy[i]); 2966 kfree(copy); 2967 return -ENOMEM; 2968 } 2969 } 2970 2971 cache->nr_ctr_args = argc; 2972 cache->ctr_args = copy; 2973 2974 return 0; 2975 } 2976 2977 static int cache_ctr(struct dm_target *ti, unsigned argc, char **argv) 2978 { 2979 int r = -EINVAL; 2980 struct cache_args *ca; 2981 struct cache *cache = NULL; 2982 2983 ca = kzalloc(sizeof(*ca), GFP_KERNEL); 2984 if (!ca) { 2985 ti->error = "Error allocating memory for cache"; 2986 return -ENOMEM; 2987 } 2988 ca->ti = ti; 2989 2990 r = parse_cache_args(ca, argc, argv, &ti->error); 2991 if (r) 2992 goto out; 2993 2994 r = cache_create(ca, &cache); 2995 if (r) 2996 goto out; 2997 2998 r = copy_ctr_args(cache, argc - 3, (const char **)argv + 3); 2999 if (r) { 3000 destroy(cache); 3001 goto out; 3002 } 3003 3004 ti->private = cache; 3005 3006 out: 3007 destroy_cache_args(ca); 3008 return r; 3009 } 3010 3011 /*----------------------------------------------------------------*/ 3012 3013 static int cache_map(struct dm_target *ti, struct bio *bio) 3014 { 3015 struct cache *cache = ti->private; 3016 3017 int r; 3018 struct dm_bio_prison_cell *cell = NULL; 3019 dm_oblock_t block = get_bio_block(cache, bio); 3020 size_t pb_data_size = get_per_bio_data_size(cache); 3021 bool can_migrate = false; 3022 bool fast_promotion; 3023 struct policy_result lookup_result; 3024 struct per_bio_data *pb = init_per_bio_data(bio, pb_data_size); 3025 struct old_oblock_lock ool; 3026 3027 ool.locker.fn = null_locker; 3028 3029 if (unlikely(from_oblock(block) >= from_oblock(cache->origin_blocks))) { 3030 /* 3031 * This can only occur if the io goes to a partial block at 3032 * the end of the origin device. We don't cache these. 3033 * Just remap to the origin and carry on. 3034 */ 3035 remap_to_origin(cache, bio); 3036 accounted_begin(cache, bio); 3037 return DM_MAPIO_REMAPPED; 3038 } 3039 3040 if (discard_or_flush(bio)) { 3041 defer_bio(cache, bio); 3042 return DM_MAPIO_SUBMITTED; 3043 } 3044 3045 /* 3046 * Check to see if that block is currently migrating. 3047 */ 3048 cell = alloc_prison_cell(cache); 3049 if (!cell) { 3050 defer_bio(cache, bio); 3051 return DM_MAPIO_SUBMITTED; 3052 } 3053 3054 r = bio_detain(cache, block, bio, cell, 3055 (cell_free_fn) free_prison_cell, 3056 cache, &cell); 3057 if (r) { 3058 if (r < 0) 3059 defer_bio(cache, bio); 3060 3061 return DM_MAPIO_SUBMITTED; 3062 } 3063 3064 fast_promotion = is_discarded_oblock(cache, block) || bio_writes_complete_block(cache, bio); 3065 3066 r = policy_map(cache->policy, block, false, can_migrate, fast_promotion, 3067 bio, &ool.locker, &lookup_result); 3068 if (r == -EWOULDBLOCK) { 3069 cell_defer(cache, cell, true); 3070 return DM_MAPIO_SUBMITTED; 3071 3072 } else if (r) { 3073 DMERR_LIMIT("%s: Unexpected return from cache replacement policy: %d", 3074 cache_device_name(cache), r); 3075 cell_defer(cache, cell, false); 3076 bio_io_error(bio); 3077 return DM_MAPIO_SUBMITTED; 3078 } 3079 3080 r = DM_MAPIO_REMAPPED; 3081 switch (lookup_result.op) { 3082 case POLICY_HIT: 3083 if (passthrough_mode(&cache->features)) { 3084 if (bio_data_dir(bio) == WRITE) { 3085 /* 3086 * We need to invalidate this block, so 3087 * defer for the worker thread. 3088 */ 3089 cell_defer(cache, cell, true); 3090 r = DM_MAPIO_SUBMITTED; 3091 3092 } else { 3093 inc_miss_counter(cache, bio); 3094 remap_to_origin_clear_discard(cache, bio, block); 3095 accounted_begin(cache, bio); 3096 inc_ds(cache, bio, cell); 3097 // FIXME: we want to remap hits or misses straight 3098 // away rather than passing over to the worker. 3099 cell_defer(cache, cell, false); 3100 } 3101 3102 } else { 3103 inc_hit_counter(cache, bio); 3104 if (bio_data_dir(bio) == WRITE && writethrough_mode(&cache->features) && 3105 !is_dirty(cache, lookup_result.cblock)) { 3106 remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock); 3107 accounted_begin(cache, bio); 3108 inc_ds(cache, bio, cell); 3109 cell_defer(cache, cell, false); 3110 3111 } else 3112 remap_cell_to_cache_dirty(cache, cell, block, lookup_result.cblock, false); 3113 } 3114 break; 3115 3116 case POLICY_MISS: 3117 inc_miss_counter(cache, bio); 3118 if (pb->req_nr != 0) { 3119 /* 3120 * This is a duplicate writethrough io that is no 3121 * longer needed because the block has been demoted. 3122 */ 3123 bio_endio(bio); 3124 // FIXME: remap everything as a miss 3125 cell_defer(cache, cell, false); 3126 r = DM_MAPIO_SUBMITTED; 3127 3128 } else 3129 remap_cell_to_origin_clear_discard(cache, cell, block, false); 3130 break; 3131 3132 default: 3133 DMERR_LIMIT("%s: %s: erroring bio: unknown policy op: %u", 3134 cache_device_name(cache), __func__, 3135 (unsigned) lookup_result.op); 3136 cell_defer(cache, cell, false); 3137 bio_io_error(bio); 3138 r = DM_MAPIO_SUBMITTED; 3139 } 3140 3141 return r; 3142 } 3143 3144 static int cache_end_io(struct dm_target *ti, struct bio *bio, int error) 3145 { 3146 struct cache *cache = ti->private; 3147 unsigned long flags; 3148 size_t pb_data_size = get_per_bio_data_size(cache); 3149 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); 3150 3151 if (pb->tick) { 3152 policy_tick(cache->policy, false); 3153 3154 spin_lock_irqsave(&cache->lock, flags); 3155 cache->need_tick_bio = true; 3156 spin_unlock_irqrestore(&cache->lock, flags); 3157 } 3158 3159 check_for_quiesced_migrations(cache, pb); 3160 accounted_complete(cache, bio); 3161 3162 return 0; 3163 } 3164 3165 static int write_dirty_bitset(struct cache *cache) 3166 { 3167 unsigned i, r; 3168 3169 if (get_cache_mode(cache) >= CM_READ_ONLY) 3170 return -EINVAL; 3171 3172 for (i = 0; i < from_cblock(cache->cache_size); i++) { 3173 r = dm_cache_set_dirty(cache->cmd, to_cblock(i), 3174 is_dirty(cache, to_cblock(i))); 3175 if (r) { 3176 metadata_operation_failed(cache, "dm_cache_set_dirty", r); 3177 return r; 3178 } 3179 } 3180 3181 return 0; 3182 } 3183 3184 static int write_discard_bitset(struct cache *cache) 3185 { 3186 unsigned i, r; 3187 3188 if (get_cache_mode(cache) >= CM_READ_ONLY) 3189 return -EINVAL; 3190 3191 r = dm_cache_discard_bitset_resize(cache->cmd, cache->discard_block_size, 3192 cache->discard_nr_blocks); 3193 if (r) { 3194 DMERR("%s: could not resize on-disk discard bitset", cache_device_name(cache)); 3195 metadata_operation_failed(cache, "dm_cache_discard_bitset_resize", r); 3196 return r; 3197 } 3198 3199 for (i = 0; i < from_dblock(cache->discard_nr_blocks); i++) { 3200 r = dm_cache_set_discard(cache->cmd, to_dblock(i), 3201 is_discarded(cache, to_dblock(i))); 3202 if (r) { 3203 metadata_operation_failed(cache, "dm_cache_set_discard", r); 3204 return r; 3205 } 3206 } 3207 3208 return 0; 3209 } 3210 3211 static int write_hints(struct cache *cache) 3212 { 3213 int r; 3214 3215 if (get_cache_mode(cache) >= CM_READ_ONLY) 3216 return -EINVAL; 3217 3218 r = dm_cache_write_hints(cache->cmd, cache->policy); 3219 if (r) { 3220 metadata_operation_failed(cache, "dm_cache_write_hints", r); 3221 return r; 3222 } 3223 3224 return 0; 3225 } 3226 3227 /* 3228 * returns true on success 3229 */ 3230 static bool sync_metadata(struct cache *cache) 3231 { 3232 int r1, r2, r3, r4; 3233 3234 r1 = write_dirty_bitset(cache); 3235 if (r1) 3236 DMERR("%s: could not write dirty bitset", cache_device_name(cache)); 3237 3238 r2 = write_discard_bitset(cache); 3239 if (r2) 3240 DMERR("%s: could not write discard bitset", cache_device_name(cache)); 3241 3242 save_stats(cache); 3243 3244 r3 = write_hints(cache); 3245 if (r3) 3246 DMERR("%s: could not write hints", cache_device_name(cache)); 3247 3248 /* 3249 * If writing the above metadata failed, we still commit, but don't 3250 * set the clean shutdown flag. This will effectively force every 3251 * dirty bit to be set on reload. 3252 */ 3253 r4 = commit(cache, !r1 && !r2 && !r3); 3254 if (r4) 3255 DMERR("%s: could not write cache metadata", cache_device_name(cache)); 3256 3257 return !r1 && !r2 && !r3 && !r4; 3258 } 3259 3260 static void cache_postsuspend(struct dm_target *ti) 3261 { 3262 struct cache *cache = ti->private; 3263 3264 start_quiescing(cache); 3265 wait_for_migrations(cache); 3266 stop_worker(cache); 3267 requeue_deferred_bios(cache); 3268 requeue_deferred_cells(cache); 3269 stop_quiescing(cache); 3270 3271 if (get_cache_mode(cache) == CM_WRITE) 3272 (void) sync_metadata(cache); 3273 } 3274 3275 static int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock, 3276 bool dirty, uint32_t hint, bool hint_valid) 3277 { 3278 int r; 3279 struct cache *cache = context; 3280 3281 r = policy_load_mapping(cache->policy, oblock, cblock, hint, hint_valid); 3282 if (r) 3283 return r; 3284 3285 if (dirty) 3286 set_dirty(cache, oblock, cblock); 3287 else 3288 clear_dirty(cache, oblock, cblock); 3289 3290 return 0; 3291 } 3292 3293 /* 3294 * The discard block size in the on disk metadata is not 3295 * neccessarily the same as we're currently using. So we have to 3296 * be careful to only set the discarded attribute if we know it 3297 * covers a complete block of the new size. 3298 */ 3299 struct discard_load_info { 3300 struct cache *cache; 3301 3302 /* 3303 * These blocks are sized using the on disk dblock size, rather 3304 * than the current one. 3305 */ 3306 dm_block_t block_size; 3307 dm_block_t discard_begin, discard_end; 3308 }; 3309 3310 static void discard_load_info_init(struct cache *cache, 3311 struct discard_load_info *li) 3312 { 3313 li->cache = cache; 3314 li->discard_begin = li->discard_end = 0; 3315 } 3316 3317 static void set_discard_range(struct discard_load_info *li) 3318 { 3319 sector_t b, e; 3320 3321 if (li->discard_begin == li->discard_end) 3322 return; 3323 3324 /* 3325 * Convert to sectors. 3326 */ 3327 b = li->discard_begin * li->block_size; 3328 e = li->discard_end * li->block_size; 3329 3330 /* 3331 * Then convert back to the current dblock size. 3332 */ 3333 b = dm_sector_div_up(b, li->cache->discard_block_size); 3334 sector_div(e, li->cache->discard_block_size); 3335 3336 /* 3337 * The origin may have shrunk, so we need to check we're still in 3338 * bounds. 3339 */ 3340 if (e > from_dblock(li->cache->discard_nr_blocks)) 3341 e = from_dblock(li->cache->discard_nr_blocks); 3342 3343 for (; b < e; b++) 3344 set_discard(li->cache, to_dblock(b)); 3345 } 3346 3347 static int load_discard(void *context, sector_t discard_block_size, 3348 dm_dblock_t dblock, bool discard) 3349 { 3350 struct discard_load_info *li = context; 3351 3352 li->block_size = discard_block_size; 3353 3354 if (discard) { 3355 if (from_dblock(dblock) == li->discard_end) 3356 /* 3357 * We're already in a discard range, just extend it. 3358 */ 3359 li->discard_end = li->discard_end + 1ULL; 3360 3361 else { 3362 /* 3363 * Emit the old range and start a new one. 3364 */ 3365 set_discard_range(li); 3366 li->discard_begin = from_dblock(dblock); 3367 li->discard_end = li->discard_begin + 1ULL; 3368 } 3369 } else { 3370 set_discard_range(li); 3371 li->discard_begin = li->discard_end = 0; 3372 } 3373 3374 return 0; 3375 } 3376 3377 static dm_cblock_t get_cache_dev_size(struct cache *cache) 3378 { 3379 sector_t size = get_dev_size(cache->cache_dev); 3380 (void) sector_div(size, cache->sectors_per_block); 3381 return to_cblock(size); 3382 } 3383 3384 static bool can_resize(struct cache *cache, dm_cblock_t new_size) 3385 { 3386 if (from_cblock(new_size) > from_cblock(cache->cache_size)) 3387 return true; 3388 3389 /* 3390 * We can't drop a dirty block when shrinking the cache. 3391 */ 3392 while (from_cblock(new_size) < from_cblock(cache->cache_size)) { 3393 new_size = to_cblock(from_cblock(new_size) + 1); 3394 if (is_dirty(cache, new_size)) { 3395 DMERR("%s: unable to shrink cache; cache block %llu is dirty", 3396 cache_device_name(cache), 3397 (unsigned long long) from_cblock(new_size)); 3398 return false; 3399 } 3400 } 3401 3402 return true; 3403 } 3404 3405 static int resize_cache_dev(struct cache *cache, dm_cblock_t new_size) 3406 { 3407 int r; 3408 3409 r = dm_cache_resize(cache->cmd, new_size); 3410 if (r) { 3411 DMERR("%s: could not resize cache metadata", cache_device_name(cache)); 3412 metadata_operation_failed(cache, "dm_cache_resize", r); 3413 return r; 3414 } 3415 3416 set_cache_size(cache, new_size); 3417 3418 return 0; 3419 } 3420 3421 static int cache_preresume(struct dm_target *ti) 3422 { 3423 int r = 0; 3424 struct cache *cache = ti->private; 3425 dm_cblock_t csize = get_cache_dev_size(cache); 3426 3427 /* 3428 * Check to see if the cache has resized. 3429 */ 3430 if (!cache->sized) { 3431 r = resize_cache_dev(cache, csize); 3432 if (r) 3433 return r; 3434 3435 cache->sized = true; 3436 3437 } else if (csize != cache->cache_size) { 3438 if (!can_resize(cache, csize)) 3439 return -EINVAL; 3440 3441 r = resize_cache_dev(cache, csize); 3442 if (r) 3443 return r; 3444 } 3445 3446 if (!cache->loaded_mappings) { 3447 r = dm_cache_load_mappings(cache->cmd, cache->policy, 3448 load_mapping, cache); 3449 if (r) { 3450 DMERR("%s: could not load cache mappings", cache_device_name(cache)); 3451 metadata_operation_failed(cache, "dm_cache_load_mappings", r); 3452 return r; 3453 } 3454 3455 cache->loaded_mappings = true; 3456 } 3457 3458 if (!cache->loaded_discards) { 3459 struct discard_load_info li; 3460 3461 /* 3462 * The discard bitset could have been resized, or the 3463 * discard block size changed. To be safe we start by 3464 * setting every dblock to not discarded. 3465 */ 3466 clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks)); 3467 3468 discard_load_info_init(cache, &li); 3469 r = dm_cache_load_discards(cache->cmd, load_discard, &li); 3470 if (r) { 3471 DMERR("%s: could not load origin discards", cache_device_name(cache)); 3472 metadata_operation_failed(cache, "dm_cache_load_discards", r); 3473 return r; 3474 } 3475 set_discard_range(&li); 3476 3477 cache->loaded_discards = true; 3478 } 3479 3480 return r; 3481 } 3482 3483 static void cache_resume(struct dm_target *ti) 3484 { 3485 struct cache *cache = ti->private; 3486 3487 cache->need_tick_bio = true; 3488 do_waker(&cache->waker.work); 3489 } 3490 3491 /* 3492 * Status format: 3493 * 3494 * <metadata block size> <#used metadata blocks>/<#total metadata blocks> 3495 * <cache block size> <#used cache blocks>/<#total cache blocks> 3496 * <#read hits> <#read misses> <#write hits> <#write misses> 3497 * <#demotions> <#promotions> <#dirty> 3498 * <#features> <features>* 3499 * <#core args> <core args> 3500 * <policy name> <#policy args> <policy args>* <cache metadata mode> <needs_check> 3501 */ 3502 static void cache_status(struct dm_target *ti, status_type_t type, 3503 unsigned status_flags, char *result, unsigned maxlen) 3504 { 3505 int r = 0; 3506 unsigned i; 3507 ssize_t sz = 0; 3508 dm_block_t nr_free_blocks_metadata = 0; 3509 dm_block_t nr_blocks_metadata = 0; 3510 char buf[BDEVNAME_SIZE]; 3511 struct cache *cache = ti->private; 3512 dm_cblock_t residency; 3513 3514 switch (type) { 3515 case STATUSTYPE_INFO: 3516 if (get_cache_mode(cache) == CM_FAIL) { 3517 DMEMIT("Fail"); 3518 break; 3519 } 3520 3521 /* Commit to ensure statistics aren't out-of-date */ 3522 if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti)) 3523 (void) commit(cache, false); 3524 3525 r = dm_cache_get_free_metadata_block_count(cache->cmd, &nr_free_blocks_metadata); 3526 if (r) { 3527 DMERR("%s: dm_cache_get_free_metadata_block_count returned %d", 3528 cache_device_name(cache), r); 3529 goto err; 3530 } 3531 3532 r = dm_cache_get_metadata_dev_size(cache->cmd, &nr_blocks_metadata); 3533 if (r) { 3534 DMERR("%s: dm_cache_get_metadata_dev_size returned %d", 3535 cache_device_name(cache), r); 3536 goto err; 3537 } 3538 3539 residency = policy_residency(cache->policy); 3540 3541 DMEMIT("%u %llu/%llu %u %llu/%llu %u %u %u %u %u %u %lu ", 3542 (unsigned)DM_CACHE_METADATA_BLOCK_SIZE, 3543 (unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata), 3544 (unsigned long long)nr_blocks_metadata, 3545 cache->sectors_per_block, 3546 (unsigned long long) from_cblock(residency), 3547 (unsigned long long) from_cblock(cache->cache_size), 3548 (unsigned) atomic_read(&cache->stats.read_hit), 3549 (unsigned) atomic_read(&cache->stats.read_miss), 3550 (unsigned) atomic_read(&cache->stats.write_hit), 3551 (unsigned) atomic_read(&cache->stats.write_miss), 3552 (unsigned) atomic_read(&cache->stats.demotion), 3553 (unsigned) atomic_read(&cache->stats.promotion), 3554 (unsigned long) atomic_read(&cache->nr_dirty)); 3555 3556 if (writethrough_mode(&cache->features)) 3557 DMEMIT("1 writethrough "); 3558 3559 else if (passthrough_mode(&cache->features)) 3560 DMEMIT("1 passthrough "); 3561 3562 else if (writeback_mode(&cache->features)) 3563 DMEMIT("1 writeback "); 3564 3565 else { 3566 DMERR("%s: internal error: unknown io mode: %d", 3567 cache_device_name(cache), (int) cache->features.io_mode); 3568 goto err; 3569 } 3570 3571 DMEMIT("2 migration_threshold %llu ", (unsigned long long) cache->migration_threshold); 3572 3573 DMEMIT("%s ", dm_cache_policy_get_name(cache->policy)); 3574 if (sz < maxlen) { 3575 r = policy_emit_config_values(cache->policy, result, maxlen, &sz); 3576 if (r) 3577 DMERR("%s: policy_emit_config_values returned %d", 3578 cache_device_name(cache), r); 3579 } 3580 3581 if (get_cache_mode(cache) == CM_READ_ONLY) 3582 DMEMIT("ro "); 3583 else 3584 DMEMIT("rw "); 3585 3586 if (dm_cache_metadata_needs_check(cache->cmd)) 3587 DMEMIT("needs_check "); 3588 else 3589 DMEMIT("- "); 3590 3591 break; 3592 3593 case STATUSTYPE_TABLE: 3594 format_dev_t(buf, cache->metadata_dev->bdev->bd_dev); 3595 DMEMIT("%s ", buf); 3596 format_dev_t(buf, cache->cache_dev->bdev->bd_dev); 3597 DMEMIT("%s ", buf); 3598 format_dev_t(buf, cache->origin_dev->bdev->bd_dev); 3599 DMEMIT("%s", buf); 3600 3601 for (i = 0; i < cache->nr_ctr_args - 1; i++) 3602 DMEMIT(" %s", cache->ctr_args[i]); 3603 if (cache->nr_ctr_args) 3604 DMEMIT(" %s", cache->ctr_args[cache->nr_ctr_args - 1]); 3605 } 3606 3607 return; 3608 3609 err: 3610 DMEMIT("Error"); 3611 } 3612 3613 /* 3614 * A cache block range can take two forms: 3615 * 3616 * i) A single cblock, eg. '3456' 3617 * ii) A begin and end cblock with dots between, eg. 123-234 3618 */ 3619 static int parse_cblock_range(struct cache *cache, const char *str, 3620 struct cblock_range *result) 3621 { 3622 char dummy; 3623 uint64_t b, e; 3624 int r; 3625 3626 /* 3627 * Try and parse form (ii) first. 3628 */ 3629 r = sscanf(str, "%llu-%llu%c", &b, &e, &dummy); 3630 if (r < 0) 3631 return r; 3632 3633 if (r == 2) { 3634 result->begin = to_cblock(b); 3635 result->end = to_cblock(e); 3636 return 0; 3637 } 3638 3639 /* 3640 * That didn't work, try form (i). 3641 */ 3642 r = sscanf(str, "%llu%c", &b, &dummy); 3643 if (r < 0) 3644 return r; 3645 3646 if (r == 1) { 3647 result->begin = to_cblock(b); 3648 result->end = to_cblock(from_cblock(result->begin) + 1u); 3649 return 0; 3650 } 3651 3652 DMERR("%s: invalid cblock range '%s'", cache_device_name(cache), str); 3653 return -EINVAL; 3654 } 3655 3656 static int validate_cblock_range(struct cache *cache, struct cblock_range *range) 3657 { 3658 uint64_t b = from_cblock(range->begin); 3659 uint64_t e = from_cblock(range->end); 3660 uint64_t n = from_cblock(cache->cache_size); 3661 3662 if (b >= n) { 3663 DMERR("%s: begin cblock out of range: %llu >= %llu", 3664 cache_device_name(cache), b, n); 3665 return -EINVAL; 3666 } 3667 3668 if (e > n) { 3669 DMERR("%s: end cblock out of range: %llu > %llu", 3670 cache_device_name(cache), e, n); 3671 return -EINVAL; 3672 } 3673 3674 if (b >= e) { 3675 DMERR("%s: invalid cblock range: %llu >= %llu", 3676 cache_device_name(cache), b, e); 3677 return -EINVAL; 3678 } 3679 3680 return 0; 3681 } 3682 3683 static int request_invalidation(struct cache *cache, struct cblock_range *range) 3684 { 3685 struct invalidation_request req; 3686 3687 INIT_LIST_HEAD(&req.list); 3688 req.cblocks = range; 3689 atomic_set(&req.complete, 0); 3690 req.err = 0; 3691 init_waitqueue_head(&req.result_wait); 3692 3693 spin_lock(&cache->invalidation_lock); 3694 list_add(&req.list, &cache->invalidation_requests); 3695 spin_unlock(&cache->invalidation_lock); 3696 wake_worker(cache); 3697 3698 wait_event(req.result_wait, atomic_read(&req.complete)); 3699 return req.err; 3700 } 3701 3702 static int process_invalidate_cblocks_message(struct cache *cache, unsigned count, 3703 const char **cblock_ranges) 3704 { 3705 int r = 0; 3706 unsigned i; 3707 struct cblock_range range; 3708 3709 if (!passthrough_mode(&cache->features)) { 3710 DMERR("%s: cache has to be in passthrough mode for invalidation", 3711 cache_device_name(cache)); 3712 return -EPERM; 3713 } 3714 3715 for (i = 0; i < count; i++) { 3716 r = parse_cblock_range(cache, cblock_ranges[i], &range); 3717 if (r) 3718 break; 3719 3720 r = validate_cblock_range(cache, &range); 3721 if (r) 3722 break; 3723 3724 /* 3725 * Pass begin and end origin blocks to the worker and wake it. 3726 */ 3727 r = request_invalidation(cache, &range); 3728 if (r) 3729 break; 3730 } 3731 3732 return r; 3733 } 3734 3735 /* 3736 * Supports 3737 * "<key> <value>" 3738 * and 3739 * "invalidate_cblocks [(<begin>)|(<begin>-<end>)]* 3740 * 3741 * The key migration_threshold is supported by the cache target core. 3742 */ 3743 static int cache_message(struct dm_target *ti, unsigned argc, char **argv) 3744 { 3745 struct cache *cache = ti->private; 3746 3747 if (!argc) 3748 return -EINVAL; 3749 3750 if (get_cache_mode(cache) >= CM_READ_ONLY) { 3751 DMERR("%s: unable to service cache target messages in READ_ONLY or FAIL mode", 3752 cache_device_name(cache)); 3753 return -EOPNOTSUPP; 3754 } 3755 3756 if (!strcasecmp(argv[0], "invalidate_cblocks")) 3757 return process_invalidate_cblocks_message(cache, argc - 1, (const char **) argv + 1); 3758 3759 if (argc != 2) 3760 return -EINVAL; 3761 3762 return set_config_value(cache, argv[0], argv[1]); 3763 } 3764 3765 static int cache_iterate_devices(struct dm_target *ti, 3766 iterate_devices_callout_fn fn, void *data) 3767 { 3768 int r = 0; 3769 struct cache *cache = ti->private; 3770 3771 r = fn(ti, cache->cache_dev, 0, get_dev_size(cache->cache_dev), data); 3772 if (!r) 3773 r = fn(ti, cache->origin_dev, 0, ti->len, data); 3774 3775 return r; 3776 } 3777 3778 static void set_discard_limits(struct cache *cache, struct queue_limits *limits) 3779 { 3780 /* 3781 * FIXME: these limits may be incompatible with the cache device 3782 */ 3783 limits->max_discard_sectors = min_t(sector_t, cache->discard_block_size * 1024, 3784 cache->origin_sectors); 3785 limits->discard_granularity = cache->discard_block_size << SECTOR_SHIFT; 3786 } 3787 3788 static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits) 3789 { 3790 struct cache *cache = ti->private; 3791 uint64_t io_opt_sectors = limits->io_opt >> SECTOR_SHIFT; 3792 3793 /* 3794 * If the system-determined stacked limits are compatible with the 3795 * cache's blocksize (io_opt is a factor) do not override them. 3796 */ 3797 if (io_opt_sectors < cache->sectors_per_block || 3798 do_div(io_opt_sectors, cache->sectors_per_block)) { 3799 blk_limits_io_min(limits, cache->sectors_per_block << SECTOR_SHIFT); 3800 blk_limits_io_opt(limits, cache->sectors_per_block << SECTOR_SHIFT); 3801 } 3802 set_discard_limits(cache, limits); 3803 } 3804 3805 /*----------------------------------------------------------------*/ 3806 3807 static struct target_type cache_target = { 3808 .name = "cache", 3809 .version = {1, 8, 0}, 3810 .module = THIS_MODULE, 3811 .ctr = cache_ctr, 3812 .dtr = cache_dtr, 3813 .map = cache_map, 3814 .end_io = cache_end_io, 3815 .postsuspend = cache_postsuspend, 3816 .preresume = cache_preresume, 3817 .resume = cache_resume, 3818 .status = cache_status, 3819 .message = cache_message, 3820 .iterate_devices = cache_iterate_devices, 3821 .io_hints = cache_io_hints, 3822 }; 3823 3824 static int __init dm_cache_init(void) 3825 { 3826 int r; 3827 3828 r = dm_register_target(&cache_target); 3829 if (r) { 3830 DMERR("cache target registration failed: %d", r); 3831 return r; 3832 } 3833 3834 migration_cache = KMEM_CACHE(dm_cache_migration, 0); 3835 if (!migration_cache) { 3836 dm_unregister_target(&cache_target); 3837 return -ENOMEM; 3838 } 3839 3840 return 0; 3841 } 3842 3843 static void __exit dm_cache_exit(void) 3844 { 3845 dm_unregister_target(&cache_target); 3846 kmem_cache_destroy(migration_cache); 3847 } 3848 3849 module_init(dm_cache_init); 3850 module_exit(dm_cache_exit); 3851 3852 MODULE_DESCRIPTION(DM_NAME " cache target"); 3853 MODULE_AUTHOR("Joe Thornber <ejt@redhat.com>"); 3854 MODULE_LICENSE("GPL"); 3855