1 /* 2 * dm-snapshot.c 3 * 4 * Copyright (C) 2001-2002 Sistina Software (UK) Limited. 5 * 6 * This file is released under the GPL. 7 */ 8 9 #include <linux/blkdev.h> 10 #include <linux/device-mapper.h> 11 #include <linux/delay.h> 12 #include <linux/fs.h> 13 #include <linux/init.h> 14 #include <linux/kdev_t.h> 15 #include <linux/list.h> 16 #include <linux/list_bl.h> 17 #include <linux/mempool.h> 18 #include <linux/module.h> 19 #include <linux/slab.h> 20 #include <linux/vmalloc.h> 21 #include <linux/log2.h> 22 #include <linux/dm-kcopyd.h> 23 #include <linux/semaphore.h> 24 25 #include "dm.h" 26 27 #include "dm-exception-store.h" 28 29 #define DM_MSG_PREFIX "snapshots" 30 31 static const char dm_snapshot_merge_target_name[] = "snapshot-merge"; 32 33 #define dm_target_is_snapshot_merge(ti) \ 34 ((ti)->type->name == dm_snapshot_merge_target_name) 35 36 /* 37 * The size of the mempool used to track chunks in use. 38 */ 39 #define MIN_IOS 256 40 41 #define DM_TRACKED_CHUNK_HASH_SIZE 16 42 #define DM_TRACKED_CHUNK_HASH(x) ((unsigned long)(x) & \ 43 (DM_TRACKED_CHUNK_HASH_SIZE - 1)) 44 45 struct dm_exception_table { 46 uint32_t hash_mask; 47 unsigned hash_shift; 48 struct hlist_bl_head *table; 49 }; 50 51 struct dm_snapshot { 52 struct rw_semaphore lock; 53 54 struct dm_dev *origin; 55 struct dm_dev *cow; 56 57 struct dm_target *ti; 58 59 /* List of snapshots per Origin */ 60 struct list_head list; 61 62 /* 63 * You can't use a snapshot if this is 0 (e.g. if full). 64 * A snapshot-merge target never clears this. 65 */ 66 int valid; 67 68 /* 69 * The snapshot overflowed because of a write to the snapshot device. 70 * We don't have to invalidate the snapshot in this case, but we need 71 * to prevent further writes. 72 */ 73 int snapshot_overflowed; 74 75 /* Origin writes don't trigger exceptions until this is set */ 76 int active; 77 78 atomic_t pending_exceptions_count; 79 80 spinlock_t pe_allocation_lock; 81 82 /* Protected by "pe_allocation_lock" */ 83 sector_t exception_start_sequence; 84 85 /* Protected by kcopyd single-threaded callback */ 86 sector_t exception_complete_sequence; 87 88 /* 89 * A list of pending exceptions that completed out of order. 90 * Protected by kcopyd single-threaded callback. 91 */ 92 struct rb_root out_of_order_tree; 93 94 mempool_t pending_pool; 95 96 struct dm_exception_table pending; 97 struct dm_exception_table complete; 98 99 /* 100 * pe_lock protects all pending_exception operations and access 101 * as well as the snapshot_bios list. 102 */ 103 spinlock_t pe_lock; 104 105 /* Chunks with outstanding reads */ 106 spinlock_t tracked_chunk_lock; 107 struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE]; 108 109 /* The on disk metadata handler */ 110 struct dm_exception_store *store; 111 112 /* Maximum number of in-flight COW jobs. */ 113 struct semaphore cow_count; 114 115 struct dm_kcopyd_client *kcopyd_client; 116 117 /* Wait for events based on state_bits */ 118 unsigned long state_bits; 119 120 /* Range of chunks currently being merged. */ 121 chunk_t first_merging_chunk; 122 int num_merging_chunks; 123 124 /* 125 * The merge operation failed if this flag is set. 126 * Failure modes are handled as follows: 127 * - I/O error reading the header 128 * => don't load the target; abort. 129 * - Header does not have "valid" flag set 130 * => use the origin; forget about the snapshot. 131 * - I/O error when reading exceptions 132 * => don't load the target; abort. 133 * (We can't use the intermediate origin state.) 134 * - I/O error while merging 135 * => stop merging; set merge_failed; process I/O normally. 136 */ 137 int merge_failed; 138 139 /* 140 * Incoming bios that overlap with chunks being merged must wait 141 * for them to be committed. 142 */ 143 struct bio_list bios_queued_during_merge; 144 }; 145 146 /* 147 * state_bits: 148 * RUNNING_MERGE - Merge operation is in progress. 149 * SHUTDOWN_MERGE - Set to signal that merge needs to be stopped; 150 * cleared afterwards. 151 */ 152 #define RUNNING_MERGE 0 153 #define SHUTDOWN_MERGE 1 154 155 /* 156 * Maximum number of chunks being copied on write. 157 * 158 * The value was decided experimentally as a trade-off between memory 159 * consumption, stalling the kernel's workqueues and maintaining a high enough 160 * throughput. 161 */ 162 #define DEFAULT_COW_THRESHOLD 2048 163 164 static int cow_threshold = DEFAULT_COW_THRESHOLD; 165 module_param_named(snapshot_cow_threshold, cow_threshold, int, 0644); 166 MODULE_PARM_DESC(snapshot_cow_threshold, "Maximum number of chunks being copied on write"); 167 168 DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle, 169 "A percentage of time allocated for copy on write"); 170 171 struct dm_dev *dm_snap_origin(struct dm_snapshot *s) 172 { 173 return s->origin; 174 } 175 EXPORT_SYMBOL(dm_snap_origin); 176 177 struct dm_dev *dm_snap_cow(struct dm_snapshot *s) 178 { 179 return s->cow; 180 } 181 EXPORT_SYMBOL(dm_snap_cow); 182 183 static sector_t chunk_to_sector(struct dm_exception_store *store, 184 chunk_t chunk) 185 { 186 return chunk << store->chunk_shift; 187 } 188 189 static int bdev_equal(struct block_device *lhs, struct block_device *rhs) 190 { 191 /* 192 * There is only ever one instance of a particular block 193 * device so we can compare pointers safely. 194 */ 195 return lhs == rhs; 196 } 197 198 struct dm_snap_pending_exception { 199 struct dm_exception e; 200 201 /* 202 * Origin buffers waiting for this to complete are held 203 * in a bio list 204 */ 205 struct bio_list origin_bios; 206 struct bio_list snapshot_bios; 207 208 /* Pointer back to snapshot context */ 209 struct dm_snapshot *snap; 210 211 /* 212 * 1 indicates the exception has already been sent to 213 * kcopyd. 214 */ 215 int started; 216 217 /* There was copying error. */ 218 int copy_error; 219 220 /* A sequence number, it is used for in-order completion. */ 221 sector_t exception_sequence; 222 223 struct rb_node out_of_order_node; 224 225 /* 226 * For writing a complete chunk, bypassing the copy. 227 */ 228 struct bio *full_bio; 229 bio_end_io_t *full_bio_end_io; 230 }; 231 232 /* 233 * Hash table mapping origin volumes to lists of snapshots and 234 * a lock to protect it 235 */ 236 static struct kmem_cache *exception_cache; 237 static struct kmem_cache *pending_cache; 238 239 struct dm_snap_tracked_chunk { 240 struct hlist_node node; 241 chunk_t chunk; 242 }; 243 244 static void init_tracked_chunk(struct bio *bio) 245 { 246 struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk)); 247 INIT_HLIST_NODE(&c->node); 248 } 249 250 static bool is_bio_tracked(struct bio *bio) 251 { 252 struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk)); 253 return !hlist_unhashed(&c->node); 254 } 255 256 static void track_chunk(struct dm_snapshot *s, struct bio *bio, chunk_t chunk) 257 { 258 struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk)); 259 260 c->chunk = chunk; 261 262 spin_lock_irq(&s->tracked_chunk_lock); 263 hlist_add_head(&c->node, 264 &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)]); 265 spin_unlock_irq(&s->tracked_chunk_lock); 266 } 267 268 static void stop_tracking_chunk(struct dm_snapshot *s, struct bio *bio) 269 { 270 struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk)); 271 unsigned long flags; 272 273 spin_lock_irqsave(&s->tracked_chunk_lock, flags); 274 hlist_del(&c->node); 275 spin_unlock_irqrestore(&s->tracked_chunk_lock, flags); 276 } 277 278 static int __chunk_is_tracked(struct dm_snapshot *s, chunk_t chunk) 279 { 280 struct dm_snap_tracked_chunk *c; 281 int found = 0; 282 283 spin_lock_irq(&s->tracked_chunk_lock); 284 285 hlist_for_each_entry(c, 286 &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)], node) { 287 if (c->chunk == chunk) { 288 found = 1; 289 break; 290 } 291 } 292 293 spin_unlock_irq(&s->tracked_chunk_lock); 294 295 return found; 296 } 297 298 /* 299 * This conflicting I/O is extremely improbable in the caller, 300 * so msleep(1) is sufficient and there is no need for a wait queue. 301 */ 302 static void __check_for_conflicting_io(struct dm_snapshot *s, chunk_t chunk) 303 { 304 while (__chunk_is_tracked(s, chunk)) 305 msleep(1); 306 } 307 308 /* 309 * One of these per registered origin, held in the snapshot_origins hash 310 */ 311 struct origin { 312 /* The origin device */ 313 struct block_device *bdev; 314 315 struct list_head hash_list; 316 317 /* List of snapshots for this origin */ 318 struct list_head snapshots; 319 }; 320 321 /* 322 * This structure is allocated for each origin target 323 */ 324 struct dm_origin { 325 struct dm_dev *dev; 326 struct dm_target *ti; 327 unsigned split_boundary; 328 struct list_head hash_list; 329 }; 330 331 /* 332 * Size of the hash table for origin volumes. If we make this 333 * the size of the minors list then it should be nearly perfect 334 */ 335 #define ORIGIN_HASH_SIZE 256 336 #define ORIGIN_MASK 0xFF 337 static struct list_head *_origins; 338 static struct list_head *_dm_origins; 339 static struct rw_semaphore _origins_lock; 340 341 static DECLARE_WAIT_QUEUE_HEAD(_pending_exceptions_done); 342 static DEFINE_SPINLOCK(_pending_exceptions_done_spinlock); 343 static uint64_t _pending_exceptions_done_count; 344 345 static int init_origin_hash(void) 346 { 347 int i; 348 349 _origins = kmalloc_array(ORIGIN_HASH_SIZE, sizeof(struct list_head), 350 GFP_KERNEL); 351 if (!_origins) { 352 DMERR("unable to allocate memory for _origins"); 353 return -ENOMEM; 354 } 355 for (i = 0; i < ORIGIN_HASH_SIZE; i++) 356 INIT_LIST_HEAD(_origins + i); 357 358 _dm_origins = kmalloc_array(ORIGIN_HASH_SIZE, 359 sizeof(struct list_head), 360 GFP_KERNEL); 361 if (!_dm_origins) { 362 DMERR("unable to allocate memory for _dm_origins"); 363 kfree(_origins); 364 return -ENOMEM; 365 } 366 for (i = 0; i < ORIGIN_HASH_SIZE; i++) 367 INIT_LIST_HEAD(_dm_origins + i); 368 369 init_rwsem(&_origins_lock); 370 371 return 0; 372 } 373 374 static void exit_origin_hash(void) 375 { 376 kfree(_origins); 377 kfree(_dm_origins); 378 } 379 380 static unsigned origin_hash(struct block_device *bdev) 381 { 382 return bdev->bd_dev & ORIGIN_MASK; 383 } 384 385 static struct origin *__lookup_origin(struct block_device *origin) 386 { 387 struct list_head *ol; 388 struct origin *o; 389 390 ol = &_origins[origin_hash(origin)]; 391 list_for_each_entry (o, ol, hash_list) 392 if (bdev_equal(o->bdev, origin)) 393 return o; 394 395 return NULL; 396 } 397 398 static void __insert_origin(struct origin *o) 399 { 400 struct list_head *sl = &_origins[origin_hash(o->bdev)]; 401 list_add_tail(&o->hash_list, sl); 402 } 403 404 static struct dm_origin *__lookup_dm_origin(struct block_device *origin) 405 { 406 struct list_head *ol; 407 struct dm_origin *o; 408 409 ol = &_dm_origins[origin_hash(origin)]; 410 list_for_each_entry (o, ol, hash_list) 411 if (bdev_equal(o->dev->bdev, origin)) 412 return o; 413 414 return NULL; 415 } 416 417 static void __insert_dm_origin(struct dm_origin *o) 418 { 419 struct list_head *sl = &_dm_origins[origin_hash(o->dev->bdev)]; 420 list_add_tail(&o->hash_list, sl); 421 } 422 423 static void __remove_dm_origin(struct dm_origin *o) 424 { 425 list_del(&o->hash_list); 426 } 427 428 /* 429 * _origins_lock must be held when calling this function. 430 * Returns number of snapshots registered using the supplied cow device, plus: 431 * snap_src - a snapshot suitable for use as a source of exception handover 432 * snap_dest - a snapshot capable of receiving exception handover. 433 * snap_merge - an existing snapshot-merge target linked to the same origin. 434 * There can be at most one snapshot-merge target. The parameter is optional. 435 * 436 * Possible return values and states of snap_src and snap_dest. 437 * 0: NULL, NULL - first new snapshot 438 * 1: snap_src, NULL - normal snapshot 439 * 2: snap_src, snap_dest - waiting for handover 440 * 2: snap_src, NULL - handed over, waiting for old to be deleted 441 * 1: NULL, snap_dest - source got destroyed without handover 442 */ 443 static int __find_snapshots_sharing_cow(struct dm_snapshot *snap, 444 struct dm_snapshot **snap_src, 445 struct dm_snapshot **snap_dest, 446 struct dm_snapshot **snap_merge) 447 { 448 struct dm_snapshot *s; 449 struct origin *o; 450 int count = 0; 451 int active; 452 453 o = __lookup_origin(snap->origin->bdev); 454 if (!o) 455 goto out; 456 457 list_for_each_entry(s, &o->snapshots, list) { 458 if (dm_target_is_snapshot_merge(s->ti) && snap_merge) 459 *snap_merge = s; 460 if (!bdev_equal(s->cow->bdev, snap->cow->bdev)) 461 continue; 462 463 down_read(&s->lock); 464 active = s->active; 465 up_read(&s->lock); 466 467 if (active) { 468 if (snap_src) 469 *snap_src = s; 470 } else if (snap_dest) 471 *snap_dest = s; 472 473 count++; 474 } 475 476 out: 477 return count; 478 } 479 480 /* 481 * On success, returns 1 if this snapshot is a handover destination, 482 * otherwise returns 0. 483 */ 484 static int __validate_exception_handover(struct dm_snapshot *snap) 485 { 486 struct dm_snapshot *snap_src = NULL, *snap_dest = NULL; 487 struct dm_snapshot *snap_merge = NULL; 488 489 /* Does snapshot need exceptions handed over to it? */ 490 if ((__find_snapshots_sharing_cow(snap, &snap_src, &snap_dest, 491 &snap_merge) == 2) || 492 snap_dest) { 493 snap->ti->error = "Snapshot cow pairing for exception " 494 "table handover failed"; 495 return -EINVAL; 496 } 497 498 /* 499 * If no snap_src was found, snap cannot become a handover 500 * destination. 501 */ 502 if (!snap_src) 503 return 0; 504 505 /* 506 * Non-snapshot-merge handover? 507 */ 508 if (!dm_target_is_snapshot_merge(snap->ti)) 509 return 1; 510 511 /* 512 * Do not allow more than one merging snapshot. 513 */ 514 if (snap_merge) { 515 snap->ti->error = "A snapshot is already merging."; 516 return -EINVAL; 517 } 518 519 if (!snap_src->store->type->prepare_merge || 520 !snap_src->store->type->commit_merge) { 521 snap->ti->error = "Snapshot exception store does not " 522 "support snapshot-merge."; 523 return -EINVAL; 524 } 525 526 return 1; 527 } 528 529 static void __insert_snapshot(struct origin *o, struct dm_snapshot *s) 530 { 531 struct dm_snapshot *l; 532 533 /* Sort the list according to chunk size, largest-first smallest-last */ 534 list_for_each_entry(l, &o->snapshots, list) 535 if (l->store->chunk_size < s->store->chunk_size) 536 break; 537 list_add_tail(&s->list, &l->list); 538 } 539 540 /* 541 * Make a note of the snapshot and its origin so we can look it 542 * up when the origin has a write on it. 543 * 544 * Also validate snapshot exception store handovers. 545 * On success, returns 1 if this registration is a handover destination, 546 * otherwise returns 0. 547 */ 548 static int register_snapshot(struct dm_snapshot *snap) 549 { 550 struct origin *o, *new_o = NULL; 551 struct block_device *bdev = snap->origin->bdev; 552 int r = 0; 553 554 new_o = kmalloc(sizeof(*new_o), GFP_KERNEL); 555 if (!new_o) 556 return -ENOMEM; 557 558 down_write(&_origins_lock); 559 560 r = __validate_exception_handover(snap); 561 if (r < 0) { 562 kfree(new_o); 563 goto out; 564 } 565 566 o = __lookup_origin(bdev); 567 if (o) 568 kfree(new_o); 569 else { 570 /* New origin */ 571 o = new_o; 572 573 /* Initialise the struct */ 574 INIT_LIST_HEAD(&o->snapshots); 575 o->bdev = bdev; 576 577 __insert_origin(o); 578 } 579 580 __insert_snapshot(o, snap); 581 582 out: 583 up_write(&_origins_lock); 584 585 return r; 586 } 587 588 /* 589 * Move snapshot to correct place in list according to chunk size. 590 */ 591 static void reregister_snapshot(struct dm_snapshot *s) 592 { 593 struct block_device *bdev = s->origin->bdev; 594 595 down_write(&_origins_lock); 596 597 list_del(&s->list); 598 __insert_snapshot(__lookup_origin(bdev), s); 599 600 up_write(&_origins_lock); 601 } 602 603 static void unregister_snapshot(struct dm_snapshot *s) 604 { 605 struct origin *o; 606 607 down_write(&_origins_lock); 608 o = __lookup_origin(s->origin->bdev); 609 610 list_del(&s->list); 611 if (o && list_empty(&o->snapshots)) { 612 list_del(&o->hash_list); 613 kfree(o); 614 } 615 616 up_write(&_origins_lock); 617 } 618 619 /* 620 * Implementation of the exception hash tables. 621 * The lowest hash_shift bits of the chunk number are ignored, allowing 622 * some consecutive chunks to be grouped together. 623 */ 624 static uint32_t exception_hash(struct dm_exception_table *et, chunk_t chunk); 625 626 /* Lock to protect access to the completed and pending exception hash tables. */ 627 struct dm_exception_table_lock { 628 struct hlist_bl_head *complete_slot; 629 struct hlist_bl_head *pending_slot; 630 }; 631 632 static void dm_exception_table_lock_init(struct dm_snapshot *s, chunk_t chunk, 633 struct dm_exception_table_lock *lock) 634 { 635 struct dm_exception_table *complete = &s->complete; 636 struct dm_exception_table *pending = &s->pending; 637 638 lock->complete_slot = &complete->table[exception_hash(complete, chunk)]; 639 lock->pending_slot = &pending->table[exception_hash(pending, chunk)]; 640 } 641 642 static void dm_exception_table_lock(struct dm_exception_table_lock *lock) 643 { 644 hlist_bl_lock(lock->complete_slot); 645 hlist_bl_lock(lock->pending_slot); 646 } 647 648 static void dm_exception_table_unlock(struct dm_exception_table_lock *lock) 649 { 650 hlist_bl_unlock(lock->pending_slot); 651 hlist_bl_unlock(lock->complete_slot); 652 } 653 654 static int dm_exception_table_init(struct dm_exception_table *et, 655 uint32_t size, unsigned hash_shift) 656 { 657 unsigned int i; 658 659 et->hash_shift = hash_shift; 660 et->hash_mask = size - 1; 661 et->table = dm_vcalloc(size, sizeof(struct hlist_bl_head)); 662 if (!et->table) 663 return -ENOMEM; 664 665 for (i = 0; i < size; i++) 666 INIT_HLIST_BL_HEAD(et->table + i); 667 668 return 0; 669 } 670 671 static void dm_exception_table_exit(struct dm_exception_table *et, 672 struct kmem_cache *mem) 673 { 674 struct hlist_bl_head *slot; 675 struct dm_exception *ex; 676 struct hlist_bl_node *pos, *n; 677 int i, size; 678 679 size = et->hash_mask + 1; 680 for (i = 0; i < size; i++) { 681 slot = et->table + i; 682 683 hlist_bl_for_each_entry_safe(ex, pos, n, slot, hash_list) 684 kmem_cache_free(mem, ex); 685 } 686 687 vfree(et->table); 688 } 689 690 static uint32_t exception_hash(struct dm_exception_table *et, chunk_t chunk) 691 { 692 return (chunk >> et->hash_shift) & et->hash_mask; 693 } 694 695 static void dm_remove_exception(struct dm_exception *e) 696 { 697 hlist_bl_del(&e->hash_list); 698 } 699 700 /* 701 * Return the exception data for a sector, or NULL if not 702 * remapped. 703 */ 704 static struct dm_exception *dm_lookup_exception(struct dm_exception_table *et, 705 chunk_t chunk) 706 { 707 struct hlist_bl_head *slot; 708 struct hlist_bl_node *pos; 709 struct dm_exception *e; 710 711 slot = &et->table[exception_hash(et, chunk)]; 712 hlist_bl_for_each_entry(e, pos, slot, hash_list) 713 if (chunk >= e->old_chunk && 714 chunk <= e->old_chunk + dm_consecutive_chunk_count(e)) 715 return e; 716 717 return NULL; 718 } 719 720 static struct dm_exception *alloc_completed_exception(gfp_t gfp) 721 { 722 struct dm_exception *e; 723 724 e = kmem_cache_alloc(exception_cache, gfp); 725 if (!e && gfp == GFP_NOIO) 726 e = kmem_cache_alloc(exception_cache, GFP_ATOMIC); 727 728 return e; 729 } 730 731 static void free_completed_exception(struct dm_exception *e) 732 { 733 kmem_cache_free(exception_cache, e); 734 } 735 736 static struct dm_snap_pending_exception *alloc_pending_exception(struct dm_snapshot *s) 737 { 738 struct dm_snap_pending_exception *pe = mempool_alloc(&s->pending_pool, 739 GFP_NOIO); 740 741 atomic_inc(&s->pending_exceptions_count); 742 pe->snap = s; 743 744 return pe; 745 } 746 747 static void free_pending_exception(struct dm_snap_pending_exception *pe) 748 { 749 struct dm_snapshot *s = pe->snap; 750 751 mempool_free(pe, &s->pending_pool); 752 smp_mb__before_atomic(); 753 atomic_dec(&s->pending_exceptions_count); 754 } 755 756 static void dm_insert_exception(struct dm_exception_table *eh, 757 struct dm_exception *new_e) 758 { 759 struct hlist_bl_head *l; 760 struct hlist_bl_node *pos; 761 struct dm_exception *e = NULL; 762 763 l = &eh->table[exception_hash(eh, new_e->old_chunk)]; 764 765 /* Add immediately if this table doesn't support consecutive chunks */ 766 if (!eh->hash_shift) 767 goto out; 768 769 /* List is ordered by old_chunk */ 770 hlist_bl_for_each_entry(e, pos, l, hash_list) { 771 /* Insert after an existing chunk? */ 772 if (new_e->old_chunk == (e->old_chunk + 773 dm_consecutive_chunk_count(e) + 1) && 774 new_e->new_chunk == (dm_chunk_number(e->new_chunk) + 775 dm_consecutive_chunk_count(e) + 1)) { 776 dm_consecutive_chunk_count_inc(e); 777 free_completed_exception(new_e); 778 return; 779 } 780 781 /* Insert before an existing chunk? */ 782 if (new_e->old_chunk == (e->old_chunk - 1) && 783 new_e->new_chunk == (dm_chunk_number(e->new_chunk) - 1)) { 784 dm_consecutive_chunk_count_inc(e); 785 e->old_chunk--; 786 e->new_chunk--; 787 free_completed_exception(new_e); 788 return; 789 } 790 791 if (new_e->old_chunk < e->old_chunk) 792 break; 793 } 794 795 out: 796 if (!e) { 797 /* 798 * Either the table doesn't support consecutive chunks or slot 799 * l is empty. 800 */ 801 hlist_bl_add_head(&new_e->hash_list, l); 802 } else if (new_e->old_chunk < e->old_chunk) { 803 /* Add before an existing exception */ 804 hlist_bl_add_before(&new_e->hash_list, &e->hash_list); 805 } else { 806 /* Add to l's tail: e is the last exception in this slot */ 807 hlist_bl_add_behind(&new_e->hash_list, &e->hash_list); 808 } 809 } 810 811 /* 812 * Callback used by the exception stores to load exceptions when 813 * initialising. 814 */ 815 static int dm_add_exception(void *context, chunk_t old, chunk_t new) 816 { 817 struct dm_exception_table_lock lock; 818 struct dm_snapshot *s = context; 819 struct dm_exception *e; 820 821 e = alloc_completed_exception(GFP_KERNEL); 822 if (!e) 823 return -ENOMEM; 824 825 e->old_chunk = old; 826 827 /* Consecutive_count is implicitly initialised to zero */ 828 e->new_chunk = new; 829 830 /* 831 * Although there is no need to lock access to the exception tables 832 * here, if we don't then hlist_bl_add_head(), called by 833 * dm_insert_exception(), will complain about accessing the 834 * corresponding list without locking it first. 835 */ 836 dm_exception_table_lock_init(s, old, &lock); 837 838 dm_exception_table_lock(&lock); 839 dm_insert_exception(&s->complete, e); 840 dm_exception_table_unlock(&lock); 841 842 return 0; 843 } 844 845 /* 846 * Return a minimum chunk size of all snapshots that have the specified origin. 847 * Return zero if the origin has no snapshots. 848 */ 849 static uint32_t __minimum_chunk_size(struct origin *o) 850 { 851 struct dm_snapshot *snap; 852 unsigned chunk_size = 0; 853 854 if (o) 855 list_for_each_entry(snap, &o->snapshots, list) 856 chunk_size = min_not_zero(chunk_size, 857 snap->store->chunk_size); 858 859 return (uint32_t) chunk_size; 860 } 861 862 /* 863 * Hard coded magic. 864 */ 865 static int calc_max_buckets(void) 866 { 867 /* use a fixed size of 2MB */ 868 unsigned long mem = 2 * 1024 * 1024; 869 mem /= sizeof(struct hlist_bl_head); 870 871 return mem; 872 } 873 874 /* 875 * Allocate room for a suitable hash table. 876 */ 877 static int init_hash_tables(struct dm_snapshot *s) 878 { 879 sector_t hash_size, cow_dev_size, max_buckets; 880 881 /* 882 * Calculate based on the size of the original volume or 883 * the COW volume... 884 */ 885 cow_dev_size = get_dev_size(s->cow->bdev); 886 max_buckets = calc_max_buckets(); 887 888 hash_size = cow_dev_size >> s->store->chunk_shift; 889 hash_size = min(hash_size, max_buckets); 890 891 if (hash_size < 64) 892 hash_size = 64; 893 hash_size = rounddown_pow_of_two(hash_size); 894 if (dm_exception_table_init(&s->complete, hash_size, 895 DM_CHUNK_CONSECUTIVE_BITS)) 896 return -ENOMEM; 897 898 /* 899 * Allocate hash table for in-flight exceptions 900 * Make this smaller than the real hash table 901 */ 902 hash_size >>= 3; 903 if (hash_size < 64) 904 hash_size = 64; 905 906 if (dm_exception_table_init(&s->pending, hash_size, 0)) { 907 dm_exception_table_exit(&s->complete, exception_cache); 908 return -ENOMEM; 909 } 910 911 return 0; 912 } 913 914 static void merge_shutdown(struct dm_snapshot *s) 915 { 916 clear_bit_unlock(RUNNING_MERGE, &s->state_bits); 917 smp_mb__after_atomic(); 918 wake_up_bit(&s->state_bits, RUNNING_MERGE); 919 } 920 921 static struct bio *__release_queued_bios_after_merge(struct dm_snapshot *s) 922 { 923 s->first_merging_chunk = 0; 924 s->num_merging_chunks = 0; 925 926 return bio_list_get(&s->bios_queued_during_merge); 927 } 928 929 /* 930 * Remove one chunk from the index of completed exceptions. 931 */ 932 static int __remove_single_exception_chunk(struct dm_snapshot *s, 933 chunk_t old_chunk) 934 { 935 struct dm_exception *e; 936 937 e = dm_lookup_exception(&s->complete, old_chunk); 938 if (!e) { 939 DMERR("Corruption detected: exception for block %llu is " 940 "on disk but not in memory", 941 (unsigned long long)old_chunk); 942 return -EINVAL; 943 } 944 945 /* 946 * If this is the only chunk using this exception, remove exception. 947 */ 948 if (!dm_consecutive_chunk_count(e)) { 949 dm_remove_exception(e); 950 free_completed_exception(e); 951 return 0; 952 } 953 954 /* 955 * The chunk may be either at the beginning or the end of a 956 * group of consecutive chunks - never in the middle. We are 957 * removing chunks in the opposite order to that in which they 958 * were added, so this should always be true. 959 * Decrement the consecutive chunk counter and adjust the 960 * starting point if necessary. 961 */ 962 if (old_chunk == e->old_chunk) { 963 e->old_chunk++; 964 e->new_chunk++; 965 } else if (old_chunk != e->old_chunk + 966 dm_consecutive_chunk_count(e)) { 967 DMERR("Attempt to merge block %llu from the " 968 "middle of a chunk range [%llu - %llu]", 969 (unsigned long long)old_chunk, 970 (unsigned long long)e->old_chunk, 971 (unsigned long long) 972 e->old_chunk + dm_consecutive_chunk_count(e)); 973 return -EINVAL; 974 } 975 976 dm_consecutive_chunk_count_dec(e); 977 978 return 0; 979 } 980 981 static void flush_bios(struct bio *bio); 982 983 static int remove_single_exception_chunk(struct dm_snapshot *s) 984 { 985 struct bio *b = NULL; 986 int r; 987 chunk_t old_chunk = s->first_merging_chunk + s->num_merging_chunks - 1; 988 989 down_write(&s->lock); 990 991 /* 992 * Process chunks (and associated exceptions) in reverse order 993 * so that dm_consecutive_chunk_count_dec() accounting works. 994 */ 995 do { 996 r = __remove_single_exception_chunk(s, old_chunk); 997 if (r) 998 goto out; 999 } while (old_chunk-- > s->first_merging_chunk); 1000 1001 b = __release_queued_bios_after_merge(s); 1002 1003 out: 1004 up_write(&s->lock); 1005 if (b) 1006 flush_bios(b); 1007 1008 return r; 1009 } 1010 1011 static int origin_write_extent(struct dm_snapshot *merging_snap, 1012 sector_t sector, unsigned chunk_size); 1013 1014 static void merge_callback(int read_err, unsigned long write_err, 1015 void *context); 1016 1017 static uint64_t read_pending_exceptions_done_count(void) 1018 { 1019 uint64_t pending_exceptions_done; 1020 1021 spin_lock(&_pending_exceptions_done_spinlock); 1022 pending_exceptions_done = _pending_exceptions_done_count; 1023 spin_unlock(&_pending_exceptions_done_spinlock); 1024 1025 return pending_exceptions_done; 1026 } 1027 1028 static void increment_pending_exceptions_done_count(void) 1029 { 1030 spin_lock(&_pending_exceptions_done_spinlock); 1031 _pending_exceptions_done_count++; 1032 spin_unlock(&_pending_exceptions_done_spinlock); 1033 1034 wake_up_all(&_pending_exceptions_done); 1035 } 1036 1037 static void snapshot_merge_next_chunks(struct dm_snapshot *s) 1038 { 1039 int i, linear_chunks; 1040 chunk_t old_chunk, new_chunk; 1041 struct dm_io_region src, dest; 1042 sector_t io_size; 1043 uint64_t previous_count; 1044 1045 BUG_ON(!test_bit(RUNNING_MERGE, &s->state_bits)); 1046 if (unlikely(test_bit(SHUTDOWN_MERGE, &s->state_bits))) 1047 goto shut; 1048 1049 /* 1050 * valid flag never changes during merge, so no lock required. 1051 */ 1052 if (!s->valid) { 1053 DMERR("Snapshot is invalid: can't merge"); 1054 goto shut; 1055 } 1056 1057 linear_chunks = s->store->type->prepare_merge(s->store, &old_chunk, 1058 &new_chunk); 1059 if (linear_chunks <= 0) { 1060 if (linear_chunks < 0) { 1061 DMERR("Read error in exception store: " 1062 "shutting down merge"); 1063 down_write(&s->lock); 1064 s->merge_failed = 1; 1065 up_write(&s->lock); 1066 } 1067 goto shut; 1068 } 1069 1070 /* Adjust old_chunk and new_chunk to reflect start of linear region */ 1071 old_chunk = old_chunk + 1 - linear_chunks; 1072 new_chunk = new_chunk + 1 - linear_chunks; 1073 1074 /* 1075 * Use one (potentially large) I/O to copy all 'linear_chunks' 1076 * from the exception store to the origin 1077 */ 1078 io_size = linear_chunks * s->store->chunk_size; 1079 1080 dest.bdev = s->origin->bdev; 1081 dest.sector = chunk_to_sector(s->store, old_chunk); 1082 dest.count = min(io_size, get_dev_size(dest.bdev) - dest.sector); 1083 1084 src.bdev = s->cow->bdev; 1085 src.sector = chunk_to_sector(s->store, new_chunk); 1086 src.count = dest.count; 1087 1088 /* 1089 * Reallocate any exceptions needed in other snapshots then 1090 * wait for the pending exceptions to complete. 1091 * Each time any pending exception (globally on the system) 1092 * completes we are woken and repeat the process to find out 1093 * if we can proceed. While this may not seem a particularly 1094 * efficient algorithm, it is not expected to have any 1095 * significant impact on performance. 1096 */ 1097 previous_count = read_pending_exceptions_done_count(); 1098 while (origin_write_extent(s, dest.sector, io_size)) { 1099 wait_event(_pending_exceptions_done, 1100 (read_pending_exceptions_done_count() != 1101 previous_count)); 1102 /* Retry after the wait, until all exceptions are done. */ 1103 previous_count = read_pending_exceptions_done_count(); 1104 } 1105 1106 down_write(&s->lock); 1107 s->first_merging_chunk = old_chunk; 1108 s->num_merging_chunks = linear_chunks; 1109 up_write(&s->lock); 1110 1111 /* Wait until writes to all 'linear_chunks' drain */ 1112 for (i = 0; i < linear_chunks; i++) 1113 __check_for_conflicting_io(s, old_chunk + i); 1114 1115 dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, merge_callback, s); 1116 return; 1117 1118 shut: 1119 merge_shutdown(s); 1120 } 1121 1122 static void error_bios(struct bio *bio); 1123 1124 static void merge_callback(int read_err, unsigned long write_err, void *context) 1125 { 1126 struct dm_snapshot *s = context; 1127 struct bio *b = NULL; 1128 1129 if (read_err || write_err) { 1130 if (read_err) 1131 DMERR("Read error: shutting down merge."); 1132 else 1133 DMERR("Write error: shutting down merge."); 1134 goto shut; 1135 } 1136 1137 if (s->store->type->commit_merge(s->store, 1138 s->num_merging_chunks) < 0) { 1139 DMERR("Write error in exception store: shutting down merge"); 1140 goto shut; 1141 } 1142 1143 if (remove_single_exception_chunk(s) < 0) 1144 goto shut; 1145 1146 snapshot_merge_next_chunks(s); 1147 1148 return; 1149 1150 shut: 1151 down_write(&s->lock); 1152 s->merge_failed = 1; 1153 b = __release_queued_bios_after_merge(s); 1154 up_write(&s->lock); 1155 error_bios(b); 1156 1157 merge_shutdown(s); 1158 } 1159 1160 static void start_merge(struct dm_snapshot *s) 1161 { 1162 if (!test_and_set_bit(RUNNING_MERGE, &s->state_bits)) 1163 snapshot_merge_next_chunks(s); 1164 } 1165 1166 /* 1167 * Stop the merging process and wait until it finishes. 1168 */ 1169 static void stop_merge(struct dm_snapshot *s) 1170 { 1171 set_bit(SHUTDOWN_MERGE, &s->state_bits); 1172 wait_on_bit(&s->state_bits, RUNNING_MERGE, TASK_UNINTERRUPTIBLE); 1173 clear_bit(SHUTDOWN_MERGE, &s->state_bits); 1174 } 1175 1176 /* 1177 * Construct a snapshot mapping: <origin_dev> <COW-dev> <p|po|n> <chunk-size> 1178 */ 1179 static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) 1180 { 1181 struct dm_snapshot *s; 1182 int i; 1183 int r = -EINVAL; 1184 char *origin_path, *cow_path; 1185 dev_t origin_dev, cow_dev; 1186 unsigned args_used, num_flush_bios = 1; 1187 fmode_t origin_mode = FMODE_READ; 1188 1189 if (argc != 4) { 1190 ti->error = "requires exactly 4 arguments"; 1191 r = -EINVAL; 1192 goto bad; 1193 } 1194 1195 if (dm_target_is_snapshot_merge(ti)) { 1196 num_flush_bios = 2; 1197 origin_mode = FMODE_WRITE; 1198 } 1199 1200 s = kzalloc(sizeof(*s), GFP_KERNEL); 1201 if (!s) { 1202 ti->error = "Cannot allocate private snapshot structure"; 1203 r = -ENOMEM; 1204 goto bad; 1205 } 1206 1207 origin_path = argv[0]; 1208 argv++; 1209 argc--; 1210 1211 r = dm_get_device(ti, origin_path, origin_mode, &s->origin); 1212 if (r) { 1213 ti->error = "Cannot get origin device"; 1214 goto bad_origin; 1215 } 1216 origin_dev = s->origin->bdev->bd_dev; 1217 1218 cow_path = argv[0]; 1219 argv++; 1220 argc--; 1221 1222 cow_dev = dm_get_dev_t(cow_path); 1223 if (cow_dev && cow_dev == origin_dev) { 1224 ti->error = "COW device cannot be the same as origin device"; 1225 r = -EINVAL; 1226 goto bad_cow; 1227 } 1228 1229 r = dm_get_device(ti, cow_path, dm_table_get_mode(ti->table), &s->cow); 1230 if (r) { 1231 ti->error = "Cannot get COW device"; 1232 goto bad_cow; 1233 } 1234 1235 r = dm_exception_store_create(ti, argc, argv, s, &args_used, &s->store); 1236 if (r) { 1237 ti->error = "Couldn't create exception store"; 1238 r = -EINVAL; 1239 goto bad_store; 1240 } 1241 1242 argv += args_used; 1243 argc -= args_used; 1244 1245 s->ti = ti; 1246 s->valid = 1; 1247 s->snapshot_overflowed = 0; 1248 s->active = 0; 1249 atomic_set(&s->pending_exceptions_count, 0); 1250 spin_lock_init(&s->pe_allocation_lock); 1251 s->exception_start_sequence = 0; 1252 s->exception_complete_sequence = 0; 1253 s->out_of_order_tree = RB_ROOT; 1254 init_rwsem(&s->lock); 1255 INIT_LIST_HEAD(&s->list); 1256 spin_lock_init(&s->pe_lock); 1257 s->state_bits = 0; 1258 s->merge_failed = 0; 1259 s->first_merging_chunk = 0; 1260 s->num_merging_chunks = 0; 1261 bio_list_init(&s->bios_queued_during_merge); 1262 1263 /* Allocate hash table for COW data */ 1264 if (init_hash_tables(s)) { 1265 ti->error = "Unable to allocate hash table space"; 1266 r = -ENOMEM; 1267 goto bad_hash_tables; 1268 } 1269 1270 sema_init(&s->cow_count, (cow_threshold > 0) ? cow_threshold : INT_MAX); 1271 1272 s->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle); 1273 if (IS_ERR(s->kcopyd_client)) { 1274 r = PTR_ERR(s->kcopyd_client); 1275 ti->error = "Could not create kcopyd client"; 1276 goto bad_kcopyd; 1277 } 1278 1279 r = mempool_init_slab_pool(&s->pending_pool, MIN_IOS, pending_cache); 1280 if (r) { 1281 ti->error = "Could not allocate mempool for pending exceptions"; 1282 goto bad_pending_pool; 1283 } 1284 1285 for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++) 1286 INIT_HLIST_HEAD(&s->tracked_chunk_hash[i]); 1287 1288 spin_lock_init(&s->tracked_chunk_lock); 1289 1290 ti->private = s; 1291 ti->num_flush_bios = num_flush_bios; 1292 ti->per_io_data_size = sizeof(struct dm_snap_tracked_chunk); 1293 1294 /* Add snapshot to the list of snapshots for this origin */ 1295 /* Exceptions aren't triggered till snapshot_resume() is called */ 1296 r = register_snapshot(s); 1297 if (r == -ENOMEM) { 1298 ti->error = "Snapshot origin struct allocation failed"; 1299 goto bad_load_and_register; 1300 } else if (r < 0) { 1301 /* invalid handover, register_snapshot has set ti->error */ 1302 goto bad_load_and_register; 1303 } 1304 1305 /* 1306 * Metadata must only be loaded into one table at once, so skip this 1307 * if metadata will be handed over during resume. 1308 * Chunk size will be set during the handover - set it to zero to 1309 * ensure it's ignored. 1310 */ 1311 if (r > 0) { 1312 s->store->chunk_size = 0; 1313 return 0; 1314 } 1315 1316 r = s->store->type->read_metadata(s->store, dm_add_exception, 1317 (void *)s); 1318 if (r < 0) { 1319 ti->error = "Failed to read snapshot metadata"; 1320 goto bad_read_metadata; 1321 } else if (r > 0) { 1322 s->valid = 0; 1323 DMWARN("Snapshot is marked invalid."); 1324 } 1325 1326 if (!s->store->chunk_size) { 1327 ti->error = "Chunk size not set"; 1328 goto bad_read_metadata; 1329 } 1330 1331 r = dm_set_target_max_io_len(ti, s->store->chunk_size); 1332 if (r) 1333 goto bad_read_metadata; 1334 1335 return 0; 1336 1337 bad_read_metadata: 1338 unregister_snapshot(s); 1339 1340 bad_load_and_register: 1341 mempool_exit(&s->pending_pool); 1342 1343 bad_pending_pool: 1344 dm_kcopyd_client_destroy(s->kcopyd_client); 1345 1346 bad_kcopyd: 1347 dm_exception_table_exit(&s->pending, pending_cache); 1348 dm_exception_table_exit(&s->complete, exception_cache); 1349 1350 bad_hash_tables: 1351 dm_exception_store_destroy(s->store); 1352 1353 bad_store: 1354 dm_put_device(ti, s->cow); 1355 1356 bad_cow: 1357 dm_put_device(ti, s->origin); 1358 1359 bad_origin: 1360 kfree(s); 1361 1362 bad: 1363 return r; 1364 } 1365 1366 static void __free_exceptions(struct dm_snapshot *s) 1367 { 1368 dm_kcopyd_client_destroy(s->kcopyd_client); 1369 s->kcopyd_client = NULL; 1370 1371 dm_exception_table_exit(&s->pending, pending_cache); 1372 dm_exception_table_exit(&s->complete, exception_cache); 1373 } 1374 1375 static void __handover_exceptions(struct dm_snapshot *snap_src, 1376 struct dm_snapshot *snap_dest) 1377 { 1378 union { 1379 struct dm_exception_table table_swap; 1380 struct dm_exception_store *store_swap; 1381 } u; 1382 1383 /* 1384 * Swap all snapshot context information between the two instances. 1385 */ 1386 u.table_swap = snap_dest->complete; 1387 snap_dest->complete = snap_src->complete; 1388 snap_src->complete = u.table_swap; 1389 1390 u.store_swap = snap_dest->store; 1391 snap_dest->store = snap_src->store; 1392 snap_dest->store->userspace_supports_overflow = u.store_swap->userspace_supports_overflow; 1393 snap_src->store = u.store_swap; 1394 1395 snap_dest->store->snap = snap_dest; 1396 snap_src->store->snap = snap_src; 1397 1398 snap_dest->ti->max_io_len = snap_dest->store->chunk_size; 1399 snap_dest->valid = snap_src->valid; 1400 snap_dest->snapshot_overflowed = snap_src->snapshot_overflowed; 1401 1402 /* 1403 * Set source invalid to ensure it receives no further I/O. 1404 */ 1405 snap_src->valid = 0; 1406 } 1407 1408 static void snapshot_dtr(struct dm_target *ti) 1409 { 1410 #ifdef CONFIG_DM_DEBUG 1411 int i; 1412 #endif 1413 struct dm_snapshot *s = ti->private; 1414 struct dm_snapshot *snap_src = NULL, *snap_dest = NULL; 1415 1416 down_read(&_origins_lock); 1417 /* Check whether exception handover must be cancelled */ 1418 (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL); 1419 if (snap_src && snap_dest && (s == snap_src)) { 1420 down_write(&snap_dest->lock); 1421 snap_dest->valid = 0; 1422 up_write(&snap_dest->lock); 1423 DMERR("Cancelling snapshot handover."); 1424 } 1425 up_read(&_origins_lock); 1426 1427 if (dm_target_is_snapshot_merge(ti)) 1428 stop_merge(s); 1429 1430 /* Prevent further origin writes from using this snapshot. */ 1431 /* After this returns there can be no new kcopyd jobs. */ 1432 unregister_snapshot(s); 1433 1434 while (atomic_read(&s->pending_exceptions_count)) 1435 msleep(1); 1436 /* 1437 * Ensure instructions in mempool_exit aren't reordered 1438 * before atomic_read. 1439 */ 1440 smp_mb(); 1441 1442 #ifdef CONFIG_DM_DEBUG 1443 for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++) 1444 BUG_ON(!hlist_empty(&s->tracked_chunk_hash[i])); 1445 #endif 1446 1447 __free_exceptions(s); 1448 1449 mempool_exit(&s->pending_pool); 1450 1451 dm_exception_store_destroy(s->store); 1452 1453 dm_put_device(ti, s->cow); 1454 1455 dm_put_device(ti, s->origin); 1456 1457 kfree(s); 1458 } 1459 1460 /* 1461 * Flush a list of buffers. 1462 */ 1463 static void flush_bios(struct bio *bio) 1464 { 1465 struct bio *n; 1466 1467 while (bio) { 1468 n = bio->bi_next; 1469 bio->bi_next = NULL; 1470 generic_make_request(bio); 1471 bio = n; 1472 } 1473 } 1474 1475 static int do_origin(struct dm_dev *origin, struct bio *bio); 1476 1477 /* 1478 * Flush a list of buffers. 1479 */ 1480 static void retry_origin_bios(struct dm_snapshot *s, struct bio *bio) 1481 { 1482 struct bio *n; 1483 int r; 1484 1485 while (bio) { 1486 n = bio->bi_next; 1487 bio->bi_next = NULL; 1488 r = do_origin(s->origin, bio); 1489 if (r == DM_MAPIO_REMAPPED) 1490 generic_make_request(bio); 1491 bio = n; 1492 } 1493 } 1494 1495 /* 1496 * Error a list of buffers. 1497 */ 1498 static void error_bios(struct bio *bio) 1499 { 1500 struct bio *n; 1501 1502 while (bio) { 1503 n = bio->bi_next; 1504 bio->bi_next = NULL; 1505 bio_io_error(bio); 1506 bio = n; 1507 } 1508 } 1509 1510 static void __invalidate_snapshot(struct dm_snapshot *s, int err) 1511 { 1512 if (!s->valid) 1513 return; 1514 1515 if (err == -EIO) 1516 DMERR("Invalidating snapshot: Error reading/writing."); 1517 else if (err == -ENOMEM) 1518 DMERR("Invalidating snapshot: Unable to allocate exception."); 1519 1520 if (s->store->type->drop_snapshot) 1521 s->store->type->drop_snapshot(s->store); 1522 1523 s->valid = 0; 1524 1525 dm_table_event(s->ti->table); 1526 } 1527 1528 static void invalidate_snapshot(struct dm_snapshot *s, int err) 1529 { 1530 down_write(&s->lock); 1531 __invalidate_snapshot(s, err); 1532 up_write(&s->lock); 1533 } 1534 1535 static void pending_complete(void *context, int success) 1536 { 1537 struct dm_snap_pending_exception *pe = context; 1538 struct dm_exception *e; 1539 struct dm_snapshot *s = pe->snap; 1540 struct bio *origin_bios = NULL; 1541 struct bio *snapshot_bios = NULL; 1542 struct bio *full_bio = NULL; 1543 struct dm_exception_table_lock lock; 1544 int error = 0; 1545 1546 dm_exception_table_lock_init(s, pe->e.old_chunk, &lock); 1547 1548 if (!success) { 1549 /* Read/write error - snapshot is unusable */ 1550 invalidate_snapshot(s, -EIO); 1551 error = 1; 1552 1553 dm_exception_table_lock(&lock); 1554 goto out; 1555 } 1556 1557 e = alloc_completed_exception(GFP_NOIO); 1558 if (!e) { 1559 invalidate_snapshot(s, -ENOMEM); 1560 error = 1; 1561 1562 dm_exception_table_lock(&lock); 1563 goto out; 1564 } 1565 *e = pe->e; 1566 1567 down_read(&s->lock); 1568 dm_exception_table_lock(&lock); 1569 if (!s->valid) { 1570 up_read(&s->lock); 1571 free_completed_exception(e); 1572 error = 1; 1573 1574 goto out; 1575 } 1576 1577 /* 1578 * Add a proper exception. After inserting the completed exception all 1579 * subsequent snapshot reads to this chunk will be redirected to the 1580 * COW device. This ensures that we do not starve. Moreover, as long 1581 * as the pending exception exists, neither origin writes nor snapshot 1582 * merging can overwrite the chunk in origin. 1583 */ 1584 dm_insert_exception(&s->complete, e); 1585 up_read(&s->lock); 1586 1587 /* Wait for conflicting reads to drain */ 1588 if (__chunk_is_tracked(s, pe->e.old_chunk)) { 1589 dm_exception_table_unlock(&lock); 1590 __check_for_conflicting_io(s, pe->e.old_chunk); 1591 dm_exception_table_lock(&lock); 1592 } 1593 1594 out: 1595 /* Remove the in-flight exception from the list */ 1596 dm_remove_exception(&pe->e); 1597 1598 dm_exception_table_unlock(&lock); 1599 1600 snapshot_bios = bio_list_get(&pe->snapshot_bios); 1601 origin_bios = bio_list_get(&pe->origin_bios); 1602 full_bio = pe->full_bio; 1603 if (full_bio) 1604 full_bio->bi_end_io = pe->full_bio_end_io; 1605 increment_pending_exceptions_done_count(); 1606 1607 /* Submit any pending write bios */ 1608 if (error) { 1609 if (full_bio) 1610 bio_io_error(full_bio); 1611 error_bios(snapshot_bios); 1612 } else { 1613 if (full_bio) 1614 bio_endio(full_bio); 1615 flush_bios(snapshot_bios); 1616 } 1617 1618 retry_origin_bios(s, origin_bios); 1619 1620 free_pending_exception(pe); 1621 } 1622 1623 static void complete_exception(struct dm_snap_pending_exception *pe) 1624 { 1625 struct dm_snapshot *s = pe->snap; 1626 1627 /* Update the metadata if we are persistent */ 1628 s->store->type->commit_exception(s->store, &pe->e, !pe->copy_error, 1629 pending_complete, pe); 1630 } 1631 1632 /* 1633 * Called when the copy I/O has finished. kcopyd actually runs 1634 * this code so don't block. 1635 */ 1636 static void copy_callback(int read_err, unsigned long write_err, void *context) 1637 { 1638 struct dm_snap_pending_exception *pe = context; 1639 struct dm_snapshot *s = pe->snap; 1640 1641 pe->copy_error = read_err || write_err; 1642 1643 if (pe->exception_sequence == s->exception_complete_sequence) { 1644 struct rb_node *next; 1645 1646 s->exception_complete_sequence++; 1647 complete_exception(pe); 1648 1649 next = rb_first(&s->out_of_order_tree); 1650 while (next) { 1651 pe = rb_entry(next, struct dm_snap_pending_exception, 1652 out_of_order_node); 1653 if (pe->exception_sequence != s->exception_complete_sequence) 1654 break; 1655 next = rb_next(next); 1656 s->exception_complete_sequence++; 1657 rb_erase(&pe->out_of_order_node, &s->out_of_order_tree); 1658 complete_exception(pe); 1659 cond_resched(); 1660 } 1661 } else { 1662 struct rb_node *parent = NULL; 1663 struct rb_node **p = &s->out_of_order_tree.rb_node; 1664 struct dm_snap_pending_exception *pe2; 1665 1666 while (*p) { 1667 pe2 = rb_entry(*p, struct dm_snap_pending_exception, out_of_order_node); 1668 parent = *p; 1669 1670 BUG_ON(pe->exception_sequence == pe2->exception_sequence); 1671 if (pe->exception_sequence < pe2->exception_sequence) 1672 p = &((*p)->rb_left); 1673 else 1674 p = &((*p)->rb_right); 1675 } 1676 1677 rb_link_node(&pe->out_of_order_node, parent, p); 1678 rb_insert_color(&pe->out_of_order_node, &s->out_of_order_tree); 1679 } 1680 up(&s->cow_count); 1681 } 1682 1683 /* 1684 * Dispatches the copy operation to kcopyd. 1685 */ 1686 static void start_copy(struct dm_snap_pending_exception *pe) 1687 { 1688 struct dm_snapshot *s = pe->snap; 1689 struct dm_io_region src, dest; 1690 struct block_device *bdev = s->origin->bdev; 1691 sector_t dev_size; 1692 1693 dev_size = get_dev_size(bdev); 1694 1695 src.bdev = bdev; 1696 src.sector = chunk_to_sector(s->store, pe->e.old_chunk); 1697 src.count = min((sector_t)s->store->chunk_size, dev_size - src.sector); 1698 1699 dest.bdev = s->cow->bdev; 1700 dest.sector = chunk_to_sector(s->store, pe->e.new_chunk); 1701 dest.count = src.count; 1702 1703 /* Hand over to kcopyd */ 1704 down(&s->cow_count); 1705 dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, copy_callback, pe); 1706 } 1707 1708 static void full_bio_end_io(struct bio *bio) 1709 { 1710 void *callback_data = bio->bi_private; 1711 1712 dm_kcopyd_do_callback(callback_data, 0, bio->bi_status ? 1 : 0); 1713 } 1714 1715 static void start_full_bio(struct dm_snap_pending_exception *pe, 1716 struct bio *bio) 1717 { 1718 struct dm_snapshot *s = pe->snap; 1719 void *callback_data; 1720 1721 pe->full_bio = bio; 1722 pe->full_bio_end_io = bio->bi_end_io; 1723 1724 down(&s->cow_count); 1725 callback_data = dm_kcopyd_prepare_callback(s->kcopyd_client, 1726 copy_callback, pe); 1727 1728 bio->bi_end_io = full_bio_end_io; 1729 bio->bi_private = callback_data; 1730 1731 generic_make_request(bio); 1732 } 1733 1734 static struct dm_snap_pending_exception * 1735 __lookup_pending_exception(struct dm_snapshot *s, chunk_t chunk) 1736 { 1737 struct dm_exception *e = dm_lookup_exception(&s->pending, chunk); 1738 1739 if (!e) 1740 return NULL; 1741 1742 return container_of(e, struct dm_snap_pending_exception, e); 1743 } 1744 1745 /* 1746 * Inserts a pending exception into the pending table. 1747 * 1748 * NOTE: a write lock must be held on the chunk's pending exception table slot 1749 * before calling this. 1750 */ 1751 static struct dm_snap_pending_exception * 1752 __insert_pending_exception(struct dm_snapshot *s, 1753 struct dm_snap_pending_exception *pe, chunk_t chunk) 1754 { 1755 pe->e.old_chunk = chunk; 1756 bio_list_init(&pe->origin_bios); 1757 bio_list_init(&pe->snapshot_bios); 1758 pe->started = 0; 1759 pe->full_bio = NULL; 1760 1761 spin_lock(&s->pe_allocation_lock); 1762 if (s->store->type->prepare_exception(s->store, &pe->e)) { 1763 spin_unlock(&s->pe_allocation_lock); 1764 free_pending_exception(pe); 1765 return NULL; 1766 } 1767 1768 pe->exception_sequence = s->exception_start_sequence++; 1769 spin_unlock(&s->pe_allocation_lock); 1770 1771 dm_insert_exception(&s->pending, &pe->e); 1772 1773 return pe; 1774 } 1775 1776 /* 1777 * Looks to see if this snapshot already has a pending exception 1778 * for this chunk, otherwise it allocates a new one and inserts 1779 * it into the pending table. 1780 * 1781 * NOTE: a write lock must be held on the chunk's pending exception table slot 1782 * before calling this. 1783 */ 1784 static struct dm_snap_pending_exception * 1785 __find_pending_exception(struct dm_snapshot *s, 1786 struct dm_snap_pending_exception *pe, chunk_t chunk) 1787 { 1788 struct dm_snap_pending_exception *pe2; 1789 1790 pe2 = __lookup_pending_exception(s, chunk); 1791 if (pe2) { 1792 free_pending_exception(pe); 1793 return pe2; 1794 } 1795 1796 return __insert_pending_exception(s, pe, chunk); 1797 } 1798 1799 static void remap_exception(struct dm_snapshot *s, struct dm_exception *e, 1800 struct bio *bio, chunk_t chunk) 1801 { 1802 bio_set_dev(bio, s->cow->bdev); 1803 bio->bi_iter.bi_sector = 1804 chunk_to_sector(s->store, dm_chunk_number(e->new_chunk) + 1805 (chunk - e->old_chunk)) + 1806 (bio->bi_iter.bi_sector & s->store->chunk_mask); 1807 } 1808 1809 static int snapshot_map(struct dm_target *ti, struct bio *bio) 1810 { 1811 struct dm_exception *e; 1812 struct dm_snapshot *s = ti->private; 1813 int r = DM_MAPIO_REMAPPED; 1814 chunk_t chunk; 1815 struct dm_snap_pending_exception *pe = NULL; 1816 struct dm_exception_table_lock lock; 1817 1818 init_tracked_chunk(bio); 1819 1820 if (bio->bi_opf & REQ_PREFLUSH) { 1821 bio_set_dev(bio, s->cow->bdev); 1822 return DM_MAPIO_REMAPPED; 1823 } 1824 1825 chunk = sector_to_chunk(s->store, bio->bi_iter.bi_sector); 1826 dm_exception_table_lock_init(s, chunk, &lock); 1827 1828 /* Full snapshots are not usable */ 1829 /* To get here the table must be live so s->active is always set. */ 1830 if (!s->valid) 1831 return DM_MAPIO_KILL; 1832 1833 down_read(&s->lock); 1834 dm_exception_table_lock(&lock); 1835 1836 if (!s->valid || (unlikely(s->snapshot_overflowed) && 1837 bio_data_dir(bio) == WRITE)) { 1838 r = DM_MAPIO_KILL; 1839 goto out_unlock; 1840 } 1841 1842 /* If the block is already remapped - use that, else remap it */ 1843 e = dm_lookup_exception(&s->complete, chunk); 1844 if (e) { 1845 remap_exception(s, e, bio, chunk); 1846 goto out_unlock; 1847 } 1848 1849 /* 1850 * Write to snapshot - higher level takes care of RW/RO 1851 * flags so we should only get this if we are 1852 * writeable. 1853 */ 1854 if (bio_data_dir(bio) == WRITE) { 1855 pe = __lookup_pending_exception(s, chunk); 1856 if (!pe) { 1857 dm_exception_table_unlock(&lock); 1858 pe = alloc_pending_exception(s); 1859 dm_exception_table_lock(&lock); 1860 1861 e = dm_lookup_exception(&s->complete, chunk); 1862 if (e) { 1863 free_pending_exception(pe); 1864 remap_exception(s, e, bio, chunk); 1865 goto out_unlock; 1866 } 1867 1868 pe = __find_pending_exception(s, pe, chunk); 1869 if (!pe) { 1870 dm_exception_table_unlock(&lock); 1871 up_read(&s->lock); 1872 1873 down_write(&s->lock); 1874 1875 if (s->store->userspace_supports_overflow) { 1876 if (s->valid && !s->snapshot_overflowed) { 1877 s->snapshot_overflowed = 1; 1878 DMERR("Snapshot overflowed: Unable to allocate exception."); 1879 } 1880 } else 1881 __invalidate_snapshot(s, -ENOMEM); 1882 up_write(&s->lock); 1883 1884 r = DM_MAPIO_KILL; 1885 goto out; 1886 } 1887 } 1888 1889 remap_exception(s, &pe->e, bio, chunk); 1890 1891 r = DM_MAPIO_SUBMITTED; 1892 1893 if (!pe->started && 1894 bio->bi_iter.bi_size == 1895 (s->store->chunk_size << SECTOR_SHIFT)) { 1896 pe->started = 1; 1897 1898 dm_exception_table_unlock(&lock); 1899 up_read(&s->lock); 1900 1901 start_full_bio(pe, bio); 1902 goto out; 1903 } 1904 1905 bio_list_add(&pe->snapshot_bios, bio); 1906 1907 if (!pe->started) { 1908 /* this is protected by the exception table lock */ 1909 pe->started = 1; 1910 1911 dm_exception_table_unlock(&lock); 1912 up_read(&s->lock); 1913 1914 start_copy(pe); 1915 goto out; 1916 } 1917 } else { 1918 bio_set_dev(bio, s->origin->bdev); 1919 track_chunk(s, bio, chunk); 1920 } 1921 1922 out_unlock: 1923 dm_exception_table_unlock(&lock); 1924 up_read(&s->lock); 1925 out: 1926 return r; 1927 } 1928 1929 /* 1930 * A snapshot-merge target behaves like a combination of a snapshot 1931 * target and a snapshot-origin target. It only generates new 1932 * exceptions in other snapshots and not in the one that is being 1933 * merged. 1934 * 1935 * For each chunk, if there is an existing exception, it is used to 1936 * redirect I/O to the cow device. Otherwise I/O is sent to the origin, 1937 * which in turn might generate exceptions in other snapshots. 1938 * If merging is currently taking place on the chunk in question, the 1939 * I/O is deferred by adding it to s->bios_queued_during_merge. 1940 */ 1941 static int snapshot_merge_map(struct dm_target *ti, struct bio *bio) 1942 { 1943 struct dm_exception *e; 1944 struct dm_snapshot *s = ti->private; 1945 int r = DM_MAPIO_REMAPPED; 1946 chunk_t chunk; 1947 1948 init_tracked_chunk(bio); 1949 1950 if (bio->bi_opf & REQ_PREFLUSH) { 1951 if (!dm_bio_get_target_bio_nr(bio)) 1952 bio_set_dev(bio, s->origin->bdev); 1953 else 1954 bio_set_dev(bio, s->cow->bdev); 1955 return DM_MAPIO_REMAPPED; 1956 } 1957 1958 chunk = sector_to_chunk(s->store, bio->bi_iter.bi_sector); 1959 1960 down_write(&s->lock); 1961 1962 /* Full merging snapshots are redirected to the origin */ 1963 if (!s->valid) 1964 goto redirect_to_origin; 1965 1966 /* If the block is already remapped - use that */ 1967 e = dm_lookup_exception(&s->complete, chunk); 1968 if (e) { 1969 /* Queue writes overlapping with chunks being merged */ 1970 if (bio_data_dir(bio) == WRITE && 1971 chunk >= s->first_merging_chunk && 1972 chunk < (s->first_merging_chunk + 1973 s->num_merging_chunks)) { 1974 bio_set_dev(bio, s->origin->bdev); 1975 bio_list_add(&s->bios_queued_during_merge, bio); 1976 r = DM_MAPIO_SUBMITTED; 1977 goto out_unlock; 1978 } 1979 1980 remap_exception(s, e, bio, chunk); 1981 1982 if (bio_data_dir(bio) == WRITE) 1983 track_chunk(s, bio, chunk); 1984 goto out_unlock; 1985 } 1986 1987 redirect_to_origin: 1988 bio_set_dev(bio, s->origin->bdev); 1989 1990 if (bio_data_dir(bio) == WRITE) { 1991 up_write(&s->lock); 1992 return do_origin(s->origin, bio); 1993 } 1994 1995 out_unlock: 1996 up_write(&s->lock); 1997 1998 return r; 1999 } 2000 2001 static int snapshot_end_io(struct dm_target *ti, struct bio *bio, 2002 blk_status_t *error) 2003 { 2004 struct dm_snapshot *s = ti->private; 2005 2006 if (is_bio_tracked(bio)) 2007 stop_tracking_chunk(s, bio); 2008 2009 return DM_ENDIO_DONE; 2010 } 2011 2012 static void snapshot_merge_presuspend(struct dm_target *ti) 2013 { 2014 struct dm_snapshot *s = ti->private; 2015 2016 stop_merge(s); 2017 } 2018 2019 static int snapshot_preresume(struct dm_target *ti) 2020 { 2021 int r = 0; 2022 struct dm_snapshot *s = ti->private; 2023 struct dm_snapshot *snap_src = NULL, *snap_dest = NULL; 2024 2025 down_read(&_origins_lock); 2026 (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL); 2027 if (snap_src && snap_dest) { 2028 down_read(&snap_src->lock); 2029 if (s == snap_src) { 2030 DMERR("Unable to resume snapshot source until " 2031 "handover completes."); 2032 r = -EINVAL; 2033 } else if (!dm_suspended(snap_src->ti)) { 2034 DMERR("Unable to perform snapshot handover until " 2035 "source is suspended."); 2036 r = -EINVAL; 2037 } 2038 up_read(&snap_src->lock); 2039 } 2040 up_read(&_origins_lock); 2041 2042 return r; 2043 } 2044 2045 static void snapshot_resume(struct dm_target *ti) 2046 { 2047 struct dm_snapshot *s = ti->private; 2048 struct dm_snapshot *snap_src = NULL, *snap_dest = NULL, *snap_merging = NULL; 2049 struct dm_origin *o; 2050 struct mapped_device *origin_md = NULL; 2051 bool must_restart_merging = false; 2052 2053 down_read(&_origins_lock); 2054 2055 o = __lookup_dm_origin(s->origin->bdev); 2056 if (o) 2057 origin_md = dm_table_get_md(o->ti->table); 2058 if (!origin_md) { 2059 (void) __find_snapshots_sharing_cow(s, NULL, NULL, &snap_merging); 2060 if (snap_merging) 2061 origin_md = dm_table_get_md(snap_merging->ti->table); 2062 } 2063 if (origin_md == dm_table_get_md(ti->table)) 2064 origin_md = NULL; 2065 if (origin_md) { 2066 if (dm_hold(origin_md)) 2067 origin_md = NULL; 2068 } 2069 2070 up_read(&_origins_lock); 2071 2072 if (origin_md) { 2073 dm_internal_suspend_fast(origin_md); 2074 if (snap_merging && test_bit(RUNNING_MERGE, &snap_merging->state_bits)) { 2075 must_restart_merging = true; 2076 stop_merge(snap_merging); 2077 } 2078 } 2079 2080 down_read(&_origins_lock); 2081 2082 (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL); 2083 if (snap_src && snap_dest) { 2084 down_write(&snap_src->lock); 2085 down_write_nested(&snap_dest->lock, SINGLE_DEPTH_NESTING); 2086 __handover_exceptions(snap_src, snap_dest); 2087 up_write(&snap_dest->lock); 2088 up_write(&snap_src->lock); 2089 } 2090 2091 up_read(&_origins_lock); 2092 2093 if (origin_md) { 2094 if (must_restart_merging) 2095 start_merge(snap_merging); 2096 dm_internal_resume_fast(origin_md); 2097 dm_put(origin_md); 2098 } 2099 2100 /* Now we have correct chunk size, reregister */ 2101 reregister_snapshot(s); 2102 2103 down_write(&s->lock); 2104 s->active = 1; 2105 up_write(&s->lock); 2106 } 2107 2108 static uint32_t get_origin_minimum_chunksize(struct block_device *bdev) 2109 { 2110 uint32_t min_chunksize; 2111 2112 down_read(&_origins_lock); 2113 min_chunksize = __minimum_chunk_size(__lookup_origin(bdev)); 2114 up_read(&_origins_lock); 2115 2116 return min_chunksize; 2117 } 2118 2119 static void snapshot_merge_resume(struct dm_target *ti) 2120 { 2121 struct dm_snapshot *s = ti->private; 2122 2123 /* 2124 * Handover exceptions from existing snapshot. 2125 */ 2126 snapshot_resume(ti); 2127 2128 /* 2129 * snapshot-merge acts as an origin, so set ti->max_io_len 2130 */ 2131 ti->max_io_len = get_origin_minimum_chunksize(s->origin->bdev); 2132 2133 start_merge(s); 2134 } 2135 2136 static void snapshot_status(struct dm_target *ti, status_type_t type, 2137 unsigned status_flags, char *result, unsigned maxlen) 2138 { 2139 unsigned sz = 0; 2140 struct dm_snapshot *snap = ti->private; 2141 2142 switch (type) { 2143 case STATUSTYPE_INFO: 2144 2145 down_write(&snap->lock); 2146 2147 if (!snap->valid) 2148 DMEMIT("Invalid"); 2149 else if (snap->merge_failed) 2150 DMEMIT("Merge failed"); 2151 else if (snap->snapshot_overflowed) 2152 DMEMIT("Overflow"); 2153 else { 2154 if (snap->store->type->usage) { 2155 sector_t total_sectors, sectors_allocated, 2156 metadata_sectors; 2157 snap->store->type->usage(snap->store, 2158 &total_sectors, 2159 §ors_allocated, 2160 &metadata_sectors); 2161 DMEMIT("%llu/%llu %llu", 2162 (unsigned long long)sectors_allocated, 2163 (unsigned long long)total_sectors, 2164 (unsigned long long)metadata_sectors); 2165 } 2166 else 2167 DMEMIT("Unknown"); 2168 } 2169 2170 up_write(&snap->lock); 2171 2172 break; 2173 2174 case STATUSTYPE_TABLE: 2175 /* 2176 * kdevname returns a static pointer so we need 2177 * to make private copies if the output is to 2178 * make sense. 2179 */ 2180 DMEMIT("%s %s", snap->origin->name, snap->cow->name); 2181 snap->store->type->status(snap->store, type, result + sz, 2182 maxlen - sz); 2183 break; 2184 } 2185 } 2186 2187 static int snapshot_iterate_devices(struct dm_target *ti, 2188 iterate_devices_callout_fn fn, void *data) 2189 { 2190 struct dm_snapshot *snap = ti->private; 2191 int r; 2192 2193 r = fn(ti, snap->origin, 0, ti->len, data); 2194 2195 if (!r) 2196 r = fn(ti, snap->cow, 0, get_dev_size(snap->cow->bdev), data); 2197 2198 return r; 2199 } 2200 2201 2202 /*----------------------------------------------------------------- 2203 * Origin methods 2204 *---------------------------------------------------------------*/ 2205 2206 /* 2207 * If no exceptions need creating, DM_MAPIO_REMAPPED is returned and any 2208 * supplied bio was ignored. The caller may submit it immediately. 2209 * (No remapping actually occurs as the origin is always a direct linear 2210 * map.) 2211 * 2212 * If further exceptions are required, DM_MAPIO_SUBMITTED is returned 2213 * and any supplied bio is added to a list to be submitted once all 2214 * the necessary exceptions exist. 2215 */ 2216 static int __origin_write(struct list_head *snapshots, sector_t sector, 2217 struct bio *bio) 2218 { 2219 int r = DM_MAPIO_REMAPPED; 2220 struct dm_snapshot *snap; 2221 struct dm_exception *e; 2222 struct dm_snap_pending_exception *pe, *pe2; 2223 struct dm_snap_pending_exception *pe_to_start_now = NULL; 2224 struct dm_snap_pending_exception *pe_to_start_last = NULL; 2225 struct dm_exception_table_lock lock; 2226 chunk_t chunk; 2227 2228 /* Do all the snapshots on this origin */ 2229 list_for_each_entry (snap, snapshots, list) { 2230 /* 2231 * Don't make new exceptions in a merging snapshot 2232 * because it has effectively been deleted 2233 */ 2234 if (dm_target_is_snapshot_merge(snap->ti)) 2235 continue; 2236 2237 /* Nothing to do if writing beyond end of snapshot */ 2238 if (sector >= dm_table_get_size(snap->ti->table)) 2239 continue; 2240 2241 /* 2242 * Remember, different snapshots can have 2243 * different chunk sizes. 2244 */ 2245 chunk = sector_to_chunk(snap->store, sector); 2246 dm_exception_table_lock_init(snap, chunk, &lock); 2247 2248 down_read(&snap->lock); 2249 dm_exception_table_lock(&lock); 2250 2251 /* Only deal with valid and active snapshots */ 2252 if (!snap->valid || !snap->active) 2253 goto next_snapshot; 2254 2255 pe = __lookup_pending_exception(snap, chunk); 2256 if (!pe) { 2257 /* 2258 * Check exception table to see if block is already 2259 * remapped in this snapshot and trigger an exception 2260 * if not. 2261 */ 2262 e = dm_lookup_exception(&snap->complete, chunk); 2263 if (e) 2264 goto next_snapshot; 2265 2266 dm_exception_table_unlock(&lock); 2267 pe = alloc_pending_exception(snap); 2268 dm_exception_table_lock(&lock); 2269 2270 pe2 = __lookup_pending_exception(snap, chunk); 2271 2272 if (!pe2) { 2273 e = dm_lookup_exception(&snap->complete, chunk); 2274 if (e) { 2275 free_pending_exception(pe); 2276 goto next_snapshot; 2277 } 2278 2279 pe = __insert_pending_exception(snap, pe, chunk); 2280 if (!pe) { 2281 dm_exception_table_unlock(&lock); 2282 up_read(&snap->lock); 2283 2284 invalidate_snapshot(snap, -ENOMEM); 2285 continue; 2286 } 2287 } else { 2288 free_pending_exception(pe); 2289 pe = pe2; 2290 } 2291 } 2292 2293 r = DM_MAPIO_SUBMITTED; 2294 2295 /* 2296 * If an origin bio was supplied, queue it to wait for the 2297 * completion of this exception, and start this one last, 2298 * at the end of the function. 2299 */ 2300 if (bio) { 2301 bio_list_add(&pe->origin_bios, bio); 2302 bio = NULL; 2303 2304 if (!pe->started) { 2305 pe->started = 1; 2306 pe_to_start_last = pe; 2307 } 2308 } 2309 2310 if (!pe->started) { 2311 pe->started = 1; 2312 pe_to_start_now = pe; 2313 } 2314 2315 next_snapshot: 2316 dm_exception_table_unlock(&lock); 2317 up_read(&snap->lock); 2318 2319 if (pe_to_start_now) { 2320 start_copy(pe_to_start_now); 2321 pe_to_start_now = NULL; 2322 } 2323 } 2324 2325 /* 2326 * Submit the exception against which the bio is queued last, 2327 * to give the other exceptions a head start. 2328 */ 2329 if (pe_to_start_last) 2330 start_copy(pe_to_start_last); 2331 2332 return r; 2333 } 2334 2335 /* 2336 * Called on a write from the origin driver. 2337 */ 2338 static int do_origin(struct dm_dev *origin, struct bio *bio) 2339 { 2340 struct origin *o; 2341 int r = DM_MAPIO_REMAPPED; 2342 2343 down_read(&_origins_lock); 2344 o = __lookup_origin(origin->bdev); 2345 if (o) 2346 r = __origin_write(&o->snapshots, bio->bi_iter.bi_sector, bio); 2347 up_read(&_origins_lock); 2348 2349 return r; 2350 } 2351 2352 /* 2353 * Trigger exceptions in all non-merging snapshots. 2354 * 2355 * The chunk size of the merging snapshot may be larger than the chunk 2356 * size of some other snapshot so we may need to reallocate multiple 2357 * chunks in other snapshots. 2358 * 2359 * We scan all the overlapping exceptions in the other snapshots. 2360 * Returns 1 if anything was reallocated and must be waited for, 2361 * otherwise returns 0. 2362 * 2363 * size must be a multiple of merging_snap's chunk_size. 2364 */ 2365 static int origin_write_extent(struct dm_snapshot *merging_snap, 2366 sector_t sector, unsigned size) 2367 { 2368 int must_wait = 0; 2369 sector_t n; 2370 struct origin *o; 2371 2372 /* 2373 * The origin's __minimum_chunk_size() got stored in max_io_len 2374 * by snapshot_merge_resume(). 2375 */ 2376 down_read(&_origins_lock); 2377 o = __lookup_origin(merging_snap->origin->bdev); 2378 for (n = 0; n < size; n += merging_snap->ti->max_io_len) 2379 if (__origin_write(&o->snapshots, sector + n, NULL) == 2380 DM_MAPIO_SUBMITTED) 2381 must_wait = 1; 2382 up_read(&_origins_lock); 2383 2384 return must_wait; 2385 } 2386 2387 /* 2388 * Origin: maps a linear range of a device, with hooks for snapshotting. 2389 */ 2390 2391 /* 2392 * Construct an origin mapping: <dev_path> 2393 * The context for an origin is merely a 'struct dm_dev *' 2394 * pointing to the real device. 2395 */ 2396 static int origin_ctr(struct dm_target *ti, unsigned int argc, char **argv) 2397 { 2398 int r; 2399 struct dm_origin *o; 2400 2401 if (argc != 1) { 2402 ti->error = "origin: incorrect number of arguments"; 2403 return -EINVAL; 2404 } 2405 2406 o = kmalloc(sizeof(struct dm_origin), GFP_KERNEL); 2407 if (!o) { 2408 ti->error = "Cannot allocate private origin structure"; 2409 r = -ENOMEM; 2410 goto bad_alloc; 2411 } 2412 2413 r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &o->dev); 2414 if (r) { 2415 ti->error = "Cannot get target device"; 2416 goto bad_open; 2417 } 2418 2419 o->ti = ti; 2420 ti->private = o; 2421 ti->num_flush_bios = 1; 2422 2423 return 0; 2424 2425 bad_open: 2426 kfree(o); 2427 bad_alloc: 2428 return r; 2429 } 2430 2431 static void origin_dtr(struct dm_target *ti) 2432 { 2433 struct dm_origin *o = ti->private; 2434 2435 dm_put_device(ti, o->dev); 2436 kfree(o); 2437 } 2438 2439 static int origin_map(struct dm_target *ti, struct bio *bio) 2440 { 2441 struct dm_origin *o = ti->private; 2442 unsigned available_sectors; 2443 2444 bio_set_dev(bio, o->dev->bdev); 2445 2446 if (unlikely(bio->bi_opf & REQ_PREFLUSH)) 2447 return DM_MAPIO_REMAPPED; 2448 2449 if (bio_data_dir(bio) != WRITE) 2450 return DM_MAPIO_REMAPPED; 2451 2452 available_sectors = o->split_boundary - 2453 ((unsigned)bio->bi_iter.bi_sector & (o->split_boundary - 1)); 2454 2455 if (bio_sectors(bio) > available_sectors) 2456 dm_accept_partial_bio(bio, available_sectors); 2457 2458 /* Only tell snapshots if this is a write */ 2459 return do_origin(o->dev, bio); 2460 } 2461 2462 /* 2463 * Set the target "max_io_len" field to the minimum of all the snapshots' 2464 * chunk sizes. 2465 */ 2466 static void origin_resume(struct dm_target *ti) 2467 { 2468 struct dm_origin *o = ti->private; 2469 2470 o->split_boundary = get_origin_minimum_chunksize(o->dev->bdev); 2471 2472 down_write(&_origins_lock); 2473 __insert_dm_origin(o); 2474 up_write(&_origins_lock); 2475 } 2476 2477 static void origin_postsuspend(struct dm_target *ti) 2478 { 2479 struct dm_origin *o = ti->private; 2480 2481 down_write(&_origins_lock); 2482 __remove_dm_origin(o); 2483 up_write(&_origins_lock); 2484 } 2485 2486 static void origin_status(struct dm_target *ti, status_type_t type, 2487 unsigned status_flags, char *result, unsigned maxlen) 2488 { 2489 struct dm_origin *o = ti->private; 2490 2491 switch (type) { 2492 case STATUSTYPE_INFO: 2493 result[0] = '\0'; 2494 break; 2495 2496 case STATUSTYPE_TABLE: 2497 snprintf(result, maxlen, "%s", o->dev->name); 2498 break; 2499 } 2500 } 2501 2502 static int origin_iterate_devices(struct dm_target *ti, 2503 iterate_devices_callout_fn fn, void *data) 2504 { 2505 struct dm_origin *o = ti->private; 2506 2507 return fn(ti, o->dev, 0, ti->len, data); 2508 } 2509 2510 static struct target_type origin_target = { 2511 .name = "snapshot-origin", 2512 .version = {1, 9, 0}, 2513 .module = THIS_MODULE, 2514 .ctr = origin_ctr, 2515 .dtr = origin_dtr, 2516 .map = origin_map, 2517 .resume = origin_resume, 2518 .postsuspend = origin_postsuspend, 2519 .status = origin_status, 2520 .iterate_devices = origin_iterate_devices, 2521 }; 2522 2523 static struct target_type snapshot_target = { 2524 .name = "snapshot", 2525 .version = {1, 15, 0}, 2526 .module = THIS_MODULE, 2527 .ctr = snapshot_ctr, 2528 .dtr = snapshot_dtr, 2529 .map = snapshot_map, 2530 .end_io = snapshot_end_io, 2531 .preresume = snapshot_preresume, 2532 .resume = snapshot_resume, 2533 .status = snapshot_status, 2534 .iterate_devices = snapshot_iterate_devices, 2535 }; 2536 2537 static struct target_type merge_target = { 2538 .name = dm_snapshot_merge_target_name, 2539 .version = {1, 4, 0}, 2540 .module = THIS_MODULE, 2541 .ctr = snapshot_ctr, 2542 .dtr = snapshot_dtr, 2543 .map = snapshot_merge_map, 2544 .end_io = snapshot_end_io, 2545 .presuspend = snapshot_merge_presuspend, 2546 .preresume = snapshot_preresume, 2547 .resume = snapshot_merge_resume, 2548 .status = snapshot_status, 2549 .iterate_devices = snapshot_iterate_devices, 2550 }; 2551 2552 static int __init dm_snapshot_init(void) 2553 { 2554 int r; 2555 2556 r = dm_exception_store_init(); 2557 if (r) { 2558 DMERR("Failed to initialize exception stores"); 2559 return r; 2560 } 2561 2562 r = init_origin_hash(); 2563 if (r) { 2564 DMERR("init_origin_hash failed."); 2565 goto bad_origin_hash; 2566 } 2567 2568 exception_cache = KMEM_CACHE(dm_exception, 0); 2569 if (!exception_cache) { 2570 DMERR("Couldn't create exception cache."); 2571 r = -ENOMEM; 2572 goto bad_exception_cache; 2573 } 2574 2575 pending_cache = KMEM_CACHE(dm_snap_pending_exception, 0); 2576 if (!pending_cache) { 2577 DMERR("Couldn't create pending cache."); 2578 r = -ENOMEM; 2579 goto bad_pending_cache; 2580 } 2581 2582 r = dm_register_target(&snapshot_target); 2583 if (r < 0) { 2584 DMERR("snapshot target register failed %d", r); 2585 goto bad_register_snapshot_target; 2586 } 2587 2588 r = dm_register_target(&origin_target); 2589 if (r < 0) { 2590 DMERR("Origin target register failed %d", r); 2591 goto bad_register_origin_target; 2592 } 2593 2594 r = dm_register_target(&merge_target); 2595 if (r < 0) { 2596 DMERR("Merge target register failed %d", r); 2597 goto bad_register_merge_target; 2598 } 2599 2600 return 0; 2601 2602 bad_register_merge_target: 2603 dm_unregister_target(&origin_target); 2604 bad_register_origin_target: 2605 dm_unregister_target(&snapshot_target); 2606 bad_register_snapshot_target: 2607 kmem_cache_destroy(pending_cache); 2608 bad_pending_cache: 2609 kmem_cache_destroy(exception_cache); 2610 bad_exception_cache: 2611 exit_origin_hash(); 2612 bad_origin_hash: 2613 dm_exception_store_exit(); 2614 2615 return r; 2616 } 2617 2618 static void __exit dm_snapshot_exit(void) 2619 { 2620 dm_unregister_target(&snapshot_target); 2621 dm_unregister_target(&origin_target); 2622 dm_unregister_target(&merge_target); 2623 2624 exit_origin_hash(); 2625 kmem_cache_destroy(pending_cache); 2626 kmem_cache_destroy(exception_cache); 2627 2628 dm_exception_store_exit(); 2629 } 2630 2631 /* Module hooks */ 2632 module_init(dm_snapshot_init); 2633 module_exit(dm_snapshot_exit); 2634 2635 MODULE_DESCRIPTION(DM_NAME " snapshot target"); 2636 MODULE_AUTHOR("Joe Thornber"); 2637 MODULE_LICENSE("GPL"); 2638 MODULE_ALIAS("dm-snapshot-origin"); 2639 MODULE_ALIAS("dm-snapshot-merge"); 2640