1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2019 Arrikto, Inc. All Rights Reserved. 4 */ 5 6 #include <linux/mm.h> 7 #include <linux/err.h> 8 #include <linux/slab.h> 9 #include <linux/rwsem.h> 10 #include <linux/bitops.h> 11 #include <linux/bitmap.h> 12 #include <linux/device-mapper.h> 13 14 #include "persistent-data/dm-bitset.h" 15 #include "persistent-data/dm-space-map.h" 16 #include "persistent-data/dm-block-manager.h" 17 #include "persistent-data/dm-transaction-manager.h" 18 19 #include "dm-clone-metadata.h" 20 21 #define DM_MSG_PREFIX "clone metadata" 22 23 #define SUPERBLOCK_LOCATION 0 24 #define SUPERBLOCK_MAGIC 0x8af27f64 25 #define SUPERBLOCK_CSUM_XOR 257649492 26 27 #define DM_CLONE_MAX_CONCURRENT_LOCKS 5 28 29 #define UUID_LEN 16 30 31 /* Min and max dm-clone metadata versions supported */ 32 #define DM_CLONE_MIN_METADATA_VERSION 1 33 #define DM_CLONE_MAX_METADATA_VERSION 1 34 35 /* 36 * On-disk metadata layout 37 */ 38 struct superblock_disk { 39 __le32 csum; 40 __le32 flags; 41 __le64 blocknr; 42 43 __u8 uuid[UUID_LEN]; 44 __le64 magic; 45 __le32 version; 46 47 __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE]; 48 49 __le64 region_size; 50 __le64 target_size; 51 52 __le64 bitset_root; 53 } __packed; 54 55 /* 56 * Region and Dirty bitmaps. 57 * 58 * dm-clone logically splits the source and destination devices in regions of 59 * fixed size. The destination device's regions are gradually hydrated, i.e., 60 * we copy (clone) the source's regions to the destination device. Eventually, 61 * all regions will get hydrated and all I/O will be served from the 62 * destination device. 63 * 64 * We maintain an on-disk bitmap which tracks the state of each of the 65 * destination device's regions, i.e., whether they are hydrated or not. 66 * 67 * To save constantly doing look ups on disk we keep an in core copy of the 68 * on-disk bitmap, the region_map. 69 * 70 * In order to track which regions are hydrated during a metadata transaction, 71 * we use a second set of bitmaps, the dmap (dirty bitmap), which includes two 72 * bitmaps, namely dirty_regions and dirty_words. The dirty_regions bitmap 73 * tracks the regions that got hydrated during the current metadata 74 * transaction. The dirty_words bitmap tracks the dirty words, i.e. longs, of 75 * the dirty_regions bitmap. 76 * 77 * This allows us to precisely track the regions that were hydrated during the 78 * current metadata transaction and update the metadata accordingly, when we 79 * commit the current transaction. This is important because dm-clone should 80 * only commit the metadata of regions that were properly flushed to the 81 * destination device beforehand. Otherwise, in case of a crash, we could end 82 * up with a corrupted dm-clone device. 83 * 84 * When a region finishes hydrating dm-clone calls 85 * dm_clone_set_region_hydrated(), or for discard requests 86 * dm_clone_cond_set_range(), which sets the corresponding bits in region_map 87 * and dmap. 88 * 89 * During a metadata commit we scan dmap->dirty_words and dmap->dirty_regions 90 * and update the on-disk metadata accordingly. Thus, we don't have to flush to 91 * disk the whole region_map. We can just flush the dirty region_map bits. 92 * 93 * We use the helper dmap->dirty_words bitmap, which is smaller than the 94 * original region_map, to reduce the amount of memory accesses during a 95 * metadata commit. Moreover, as dm-bitset also accesses the on-disk bitmap in 96 * 64-bit word granularity, the dirty_words bitmap helps us avoid useless disk 97 * accesses. 98 * 99 * We could update directly the on-disk bitmap, when dm-clone calls either 100 * dm_clone_set_region_hydrated() or dm_clone_cond_set_range(), buts this 101 * inserts significant metadata I/O overhead in dm-clone's I/O path. Also, as 102 * these two functions don't block, we can call them in interrupt context, 103 * e.g., in a hooked overwrite bio's completion routine, and further reduce the 104 * I/O completion latency. 105 * 106 * We maintain two dirty bitmap sets. During a metadata commit we atomically 107 * swap the currently used dmap with the unused one. This allows the metadata 108 * update functions to run concurrently with an ongoing commit. 109 */ 110 struct dirty_map { 111 unsigned long *dirty_words; 112 unsigned long *dirty_regions; 113 unsigned int changed; 114 }; 115 116 struct dm_clone_metadata { 117 /* The metadata block device */ 118 struct block_device *bdev; 119 120 sector_t target_size; 121 sector_t region_size; 122 unsigned long nr_regions; 123 unsigned long nr_words; 124 125 /* Spinlock protecting the region and dirty bitmaps. */ 126 spinlock_t bitmap_lock; 127 struct dirty_map dmap[2]; 128 struct dirty_map *current_dmap; 129 130 /* Protected by lock */ 131 struct dirty_map *committing_dmap; 132 133 /* 134 * In core copy of the on-disk bitmap to save constantly doing look ups 135 * on disk. 136 */ 137 unsigned long *region_map; 138 139 /* Protected by bitmap_lock */ 140 unsigned int read_only; 141 142 struct dm_block_manager *bm; 143 struct dm_space_map *sm; 144 struct dm_transaction_manager *tm; 145 146 struct rw_semaphore lock; 147 148 struct dm_disk_bitset bitset_info; 149 dm_block_t bitset_root; 150 151 /* 152 * Reading the space map root can fail, so we read it into this 153 * buffer before the superblock is locked and updated. 154 */ 155 __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE]; 156 157 bool hydration_done:1; 158 bool fail_io:1; 159 }; 160 161 /*---------------------------------------------------------------------------*/ 162 163 /* 164 * Superblock validation. 165 */ 166 static void sb_prepare_for_write(struct dm_block_validator *v, 167 struct dm_block *b, size_t sb_block_size) 168 { 169 struct superblock_disk *sb; 170 u32 csum; 171 172 sb = dm_block_data(b); 173 sb->blocknr = cpu_to_le64(dm_block_location(b)); 174 175 csum = dm_bm_checksum(&sb->flags, sb_block_size - sizeof(__le32), 176 SUPERBLOCK_CSUM_XOR); 177 sb->csum = cpu_to_le32(csum); 178 } 179 180 static int sb_check(struct dm_block_validator *v, struct dm_block *b, 181 size_t sb_block_size) 182 { 183 struct superblock_disk *sb; 184 u32 csum, metadata_version; 185 186 sb = dm_block_data(b); 187 188 if (dm_block_location(b) != le64_to_cpu(sb->blocknr)) { 189 DMERR("Superblock check failed: blocknr %llu, expected %llu", 190 le64_to_cpu(sb->blocknr), 191 (unsigned long long)dm_block_location(b)); 192 return -ENOTBLK; 193 } 194 195 if (le64_to_cpu(sb->magic) != SUPERBLOCK_MAGIC) { 196 DMERR("Superblock check failed: magic %llu, expected %llu", 197 le64_to_cpu(sb->magic), 198 (unsigned long long)SUPERBLOCK_MAGIC); 199 return -EILSEQ; 200 } 201 202 csum = dm_bm_checksum(&sb->flags, sb_block_size - sizeof(__le32), 203 SUPERBLOCK_CSUM_XOR); 204 if (sb->csum != cpu_to_le32(csum)) { 205 DMERR("Superblock check failed: checksum %u, expected %u", 206 csum, le32_to_cpu(sb->csum)); 207 return -EILSEQ; 208 } 209 210 /* Check metadata version */ 211 metadata_version = le32_to_cpu(sb->version); 212 if (metadata_version < DM_CLONE_MIN_METADATA_VERSION || 213 metadata_version > DM_CLONE_MAX_METADATA_VERSION) { 214 DMERR("Clone metadata version %u found, but only versions between %u and %u supported.", 215 metadata_version, DM_CLONE_MIN_METADATA_VERSION, 216 DM_CLONE_MAX_METADATA_VERSION); 217 return -EINVAL; 218 } 219 220 return 0; 221 } 222 223 static struct dm_block_validator sb_validator = { 224 .name = "superblock", 225 .prepare_for_write = sb_prepare_for_write, 226 .check = sb_check 227 }; 228 229 /* 230 * Check if the superblock is formatted or not. We consider the superblock to 231 * be formatted in case we find non-zero bytes in it. 232 */ 233 static int __superblock_all_zeroes(struct dm_block_manager *bm, bool *formatted) 234 { 235 int r; 236 unsigned int i, nr_words; 237 struct dm_block *sblock; 238 __le64 *data_le, zero = cpu_to_le64(0); 239 240 /* 241 * We don't use a validator here because the superblock could be all 242 * zeroes. 243 */ 244 r = dm_bm_read_lock(bm, SUPERBLOCK_LOCATION, NULL, &sblock); 245 if (r) { 246 DMERR("Failed to read_lock superblock"); 247 return r; 248 } 249 250 data_le = dm_block_data(sblock); 251 *formatted = false; 252 253 /* This assumes that the block size is a multiple of 8 bytes */ 254 BUG_ON(dm_bm_block_size(bm) % sizeof(__le64)); 255 nr_words = dm_bm_block_size(bm) / sizeof(__le64); 256 for (i = 0; i < nr_words; i++) { 257 if (data_le[i] != zero) { 258 *formatted = true; 259 break; 260 } 261 } 262 263 dm_bm_unlock(sblock); 264 265 return 0; 266 } 267 268 /*---------------------------------------------------------------------------*/ 269 270 /* 271 * Low-level metadata handling. 272 */ 273 static inline int superblock_read_lock(struct dm_clone_metadata *cmd, 274 struct dm_block **sblock) 275 { 276 return dm_bm_read_lock(cmd->bm, SUPERBLOCK_LOCATION, &sb_validator, sblock); 277 } 278 279 static inline int superblock_write_lock(struct dm_clone_metadata *cmd, 280 struct dm_block **sblock) 281 { 282 return dm_bm_write_lock(cmd->bm, SUPERBLOCK_LOCATION, &sb_validator, sblock); 283 } 284 285 static inline int superblock_write_lock_zero(struct dm_clone_metadata *cmd, 286 struct dm_block **sblock) 287 { 288 return dm_bm_write_lock_zero(cmd->bm, SUPERBLOCK_LOCATION, &sb_validator, sblock); 289 } 290 291 static int __copy_sm_root(struct dm_clone_metadata *cmd) 292 { 293 int r; 294 size_t root_size; 295 296 r = dm_sm_root_size(cmd->sm, &root_size); 297 if (r) 298 return r; 299 300 return dm_sm_copy_root(cmd->sm, &cmd->metadata_space_map_root, root_size); 301 } 302 303 /* Save dm-clone metadata in superblock */ 304 static void __prepare_superblock(struct dm_clone_metadata *cmd, 305 struct superblock_disk *sb) 306 { 307 sb->flags = cpu_to_le32(0UL); 308 309 /* FIXME: UUID is currently unused */ 310 memset(sb->uuid, 0, sizeof(sb->uuid)); 311 312 sb->magic = cpu_to_le64(SUPERBLOCK_MAGIC); 313 sb->version = cpu_to_le32(DM_CLONE_MAX_METADATA_VERSION); 314 315 /* Save the metadata space_map root */ 316 memcpy(&sb->metadata_space_map_root, &cmd->metadata_space_map_root, 317 sizeof(cmd->metadata_space_map_root)); 318 319 sb->region_size = cpu_to_le64(cmd->region_size); 320 sb->target_size = cpu_to_le64(cmd->target_size); 321 sb->bitset_root = cpu_to_le64(cmd->bitset_root); 322 } 323 324 static int __open_metadata(struct dm_clone_metadata *cmd) 325 { 326 int r; 327 struct dm_block *sblock; 328 struct superblock_disk *sb; 329 330 r = superblock_read_lock(cmd, &sblock); 331 332 if (r) { 333 DMERR("Failed to read_lock superblock"); 334 return r; 335 } 336 337 sb = dm_block_data(sblock); 338 339 /* Verify that target_size and region_size haven't changed. */ 340 if (cmd->region_size != le64_to_cpu(sb->region_size) || 341 cmd->target_size != le64_to_cpu(sb->target_size)) { 342 DMERR("Region and/or target size don't match the ones in metadata"); 343 r = -EINVAL; 344 goto out_with_lock; 345 } 346 347 r = dm_tm_open_with_sm(cmd->bm, SUPERBLOCK_LOCATION, 348 sb->metadata_space_map_root, 349 sizeof(sb->metadata_space_map_root), 350 &cmd->tm, &cmd->sm); 351 352 if (r) { 353 DMERR("dm_tm_open_with_sm failed"); 354 goto out_with_lock; 355 } 356 357 dm_disk_bitset_init(cmd->tm, &cmd->bitset_info); 358 cmd->bitset_root = le64_to_cpu(sb->bitset_root); 359 360 out_with_lock: 361 dm_bm_unlock(sblock); 362 363 return r; 364 } 365 366 static int __format_metadata(struct dm_clone_metadata *cmd) 367 { 368 int r; 369 struct dm_block *sblock; 370 struct superblock_disk *sb; 371 372 r = dm_tm_create_with_sm(cmd->bm, SUPERBLOCK_LOCATION, &cmd->tm, &cmd->sm); 373 if (r) { 374 DMERR("Failed to create transaction manager"); 375 return r; 376 } 377 378 dm_disk_bitset_init(cmd->tm, &cmd->bitset_info); 379 380 r = dm_bitset_empty(&cmd->bitset_info, &cmd->bitset_root); 381 if (r) { 382 DMERR("Failed to create empty on-disk bitset"); 383 goto err_with_tm; 384 } 385 386 r = dm_bitset_resize(&cmd->bitset_info, cmd->bitset_root, 0, 387 cmd->nr_regions, false, &cmd->bitset_root); 388 if (r) { 389 DMERR("Failed to resize on-disk bitset to %lu entries", cmd->nr_regions); 390 goto err_with_tm; 391 } 392 393 /* Flush to disk all blocks, except the superblock */ 394 r = dm_tm_pre_commit(cmd->tm); 395 if (r) { 396 DMERR("dm_tm_pre_commit failed"); 397 goto err_with_tm; 398 } 399 400 r = __copy_sm_root(cmd); 401 if (r) { 402 DMERR("__copy_sm_root failed"); 403 goto err_with_tm; 404 } 405 406 r = superblock_write_lock_zero(cmd, &sblock); 407 if (r) { 408 DMERR("Failed to write_lock superblock"); 409 goto err_with_tm; 410 } 411 412 sb = dm_block_data(sblock); 413 __prepare_superblock(cmd, sb); 414 r = dm_tm_commit(cmd->tm, sblock); 415 if (r) { 416 DMERR("Failed to commit superblock"); 417 goto err_with_tm; 418 } 419 420 return 0; 421 422 err_with_tm: 423 dm_sm_destroy(cmd->sm); 424 dm_tm_destroy(cmd->tm); 425 426 return r; 427 } 428 429 static int __open_or_format_metadata(struct dm_clone_metadata *cmd, bool may_format_device) 430 { 431 int r; 432 bool formatted = false; 433 434 r = __superblock_all_zeroes(cmd->bm, &formatted); 435 if (r) 436 return r; 437 438 if (!formatted) 439 return may_format_device ? __format_metadata(cmd) : -EPERM; 440 441 return __open_metadata(cmd); 442 } 443 444 static int __create_persistent_data_structures(struct dm_clone_metadata *cmd, 445 bool may_format_device) 446 { 447 int r; 448 449 /* Create block manager */ 450 cmd->bm = dm_block_manager_create(cmd->bdev, 451 DM_CLONE_METADATA_BLOCK_SIZE << SECTOR_SHIFT, 452 DM_CLONE_MAX_CONCURRENT_LOCKS); 453 if (IS_ERR(cmd->bm)) { 454 DMERR("Failed to create block manager"); 455 return PTR_ERR(cmd->bm); 456 } 457 458 r = __open_or_format_metadata(cmd, may_format_device); 459 if (r) 460 dm_block_manager_destroy(cmd->bm); 461 462 return r; 463 } 464 465 static void __destroy_persistent_data_structures(struct dm_clone_metadata *cmd) 466 { 467 dm_sm_destroy(cmd->sm); 468 dm_tm_destroy(cmd->tm); 469 dm_block_manager_destroy(cmd->bm); 470 } 471 472 /*---------------------------------------------------------------------------*/ 473 474 static size_t bitmap_size(unsigned long nr_bits) 475 { 476 return BITS_TO_LONGS(nr_bits) * sizeof(long); 477 } 478 479 static int __dirty_map_init(struct dirty_map *dmap, unsigned long nr_words, 480 unsigned long nr_regions) 481 { 482 dmap->changed = 0; 483 484 dmap->dirty_words = kvzalloc(bitmap_size(nr_words), GFP_KERNEL); 485 if (!dmap->dirty_words) 486 return -ENOMEM; 487 488 dmap->dirty_regions = kvzalloc(bitmap_size(nr_regions), GFP_KERNEL); 489 if (!dmap->dirty_regions) { 490 kvfree(dmap->dirty_words); 491 return -ENOMEM; 492 } 493 494 return 0; 495 } 496 497 static void __dirty_map_exit(struct dirty_map *dmap) 498 { 499 kvfree(dmap->dirty_words); 500 kvfree(dmap->dirty_regions); 501 } 502 503 static int dirty_map_init(struct dm_clone_metadata *cmd) 504 { 505 if (__dirty_map_init(&cmd->dmap[0], cmd->nr_words, cmd->nr_regions)) { 506 DMERR("Failed to allocate dirty bitmap"); 507 return -ENOMEM; 508 } 509 510 if (__dirty_map_init(&cmd->dmap[1], cmd->nr_words, cmd->nr_regions)) { 511 DMERR("Failed to allocate dirty bitmap"); 512 __dirty_map_exit(&cmd->dmap[0]); 513 return -ENOMEM; 514 } 515 516 cmd->current_dmap = &cmd->dmap[0]; 517 cmd->committing_dmap = NULL; 518 519 return 0; 520 } 521 522 static void dirty_map_exit(struct dm_clone_metadata *cmd) 523 { 524 __dirty_map_exit(&cmd->dmap[0]); 525 __dirty_map_exit(&cmd->dmap[1]); 526 } 527 528 static int __load_bitset_in_core(struct dm_clone_metadata *cmd) 529 { 530 int r; 531 unsigned long i; 532 struct dm_bitset_cursor c; 533 534 /* Flush bitset cache */ 535 r = dm_bitset_flush(&cmd->bitset_info, cmd->bitset_root, &cmd->bitset_root); 536 if (r) 537 return r; 538 539 r = dm_bitset_cursor_begin(&cmd->bitset_info, cmd->bitset_root, cmd->nr_regions, &c); 540 if (r) 541 return r; 542 543 for (i = 0; ; i++) { 544 if (dm_bitset_cursor_get_value(&c)) 545 __set_bit(i, cmd->region_map); 546 else 547 __clear_bit(i, cmd->region_map); 548 549 if (i >= (cmd->nr_regions - 1)) 550 break; 551 552 r = dm_bitset_cursor_next(&c); 553 554 if (r) 555 break; 556 } 557 558 dm_bitset_cursor_end(&c); 559 560 return r; 561 } 562 563 struct dm_clone_metadata *dm_clone_metadata_open(struct block_device *bdev, 564 sector_t target_size, 565 sector_t region_size) 566 { 567 int r; 568 struct dm_clone_metadata *cmd; 569 570 cmd = kzalloc(sizeof(*cmd), GFP_KERNEL); 571 if (!cmd) { 572 DMERR("Failed to allocate memory for dm-clone metadata"); 573 return ERR_PTR(-ENOMEM); 574 } 575 576 cmd->bdev = bdev; 577 cmd->target_size = target_size; 578 cmd->region_size = region_size; 579 cmd->nr_regions = dm_sector_div_up(cmd->target_size, cmd->region_size); 580 cmd->nr_words = BITS_TO_LONGS(cmd->nr_regions); 581 582 init_rwsem(&cmd->lock); 583 spin_lock_init(&cmd->bitmap_lock); 584 cmd->read_only = 0; 585 cmd->fail_io = false; 586 cmd->hydration_done = false; 587 588 cmd->region_map = kvmalloc(bitmap_size(cmd->nr_regions), GFP_KERNEL); 589 if (!cmd->region_map) { 590 DMERR("Failed to allocate memory for region bitmap"); 591 r = -ENOMEM; 592 goto out_with_md; 593 } 594 595 r = __create_persistent_data_structures(cmd, true); 596 if (r) 597 goto out_with_region_map; 598 599 r = __load_bitset_in_core(cmd); 600 if (r) { 601 DMERR("Failed to load on-disk region map"); 602 goto out_with_pds; 603 } 604 605 r = dirty_map_init(cmd); 606 if (r) 607 goto out_with_pds; 608 609 if (bitmap_full(cmd->region_map, cmd->nr_regions)) 610 cmd->hydration_done = true; 611 612 return cmd; 613 614 out_with_pds: 615 __destroy_persistent_data_structures(cmd); 616 617 out_with_region_map: 618 kvfree(cmd->region_map); 619 620 out_with_md: 621 kfree(cmd); 622 623 return ERR_PTR(r); 624 } 625 626 void dm_clone_metadata_close(struct dm_clone_metadata *cmd) 627 { 628 if (!cmd->fail_io) 629 __destroy_persistent_data_structures(cmd); 630 631 dirty_map_exit(cmd); 632 kvfree(cmd->region_map); 633 kfree(cmd); 634 } 635 636 bool dm_clone_is_hydration_done(struct dm_clone_metadata *cmd) 637 { 638 return cmd->hydration_done; 639 } 640 641 bool dm_clone_is_region_hydrated(struct dm_clone_metadata *cmd, unsigned long region_nr) 642 { 643 return dm_clone_is_hydration_done(cmd) || test_bit(region_nr, cmd->region_map); 644 } 645 646 bool dm_clone_is_range_hydrated(struct dm_clone_metadata *cmd, 647 unsigned long start, unsigned long nr_regions) 648 { 649 unsigned long bit; 650 651 if (dm_clone_is_hydration_done(cmd)) 652 return true; 653 654 bit = find_next_zero_bit(cmd->region_map, cmd->nr_regions, start); 655 656 return (bit >= (start + nr_regions)); 657 } 658 659 unsigned int dm_clone_nr_of_hydrated_regions(struct dm_clone_metadata *cmd) 660 { 661 return bitmap_weight(cmd->region_map, cmd->nr_regions); 662 } 663 664 unsigned long dm_clone_find_next_unhydrated_region(struct dm_clone_metadata *cmd, 665 unsigned long start) 666 { 667 return find_next_zero_bit(cmd->region_map, cmd->nr_regions, start); 668 } 669 670 static int __update_metadata_word(struct dm_clone_metadata *cmd, 671 unsigned long *dirty_regions, 672 unsigned long word) 673 { 674 int r; 675 unsigned long index = word * BITS_PER_LONG; 676 unsigned long max_index = min(cmd->nr_regions, (word + 1) * BITS_PER_LONG); 677 678 while (index < max_index) { 679 if (test_bit(index, dirty_regions)) { 680 r = dm_bitset_set_bit(&cmd->bitset_info, cmd->bitset_root, 681 index, &cmd->bitset_root); 682 if (r) { 683 DMERR("dm_bitset_set_bit failed"); 684 return r; 685 } 686 __clear_bit(index, dirty_regions); 687 } 688 index++; 689 } 690 691 return 0; 692 } 693 694 static int __metadata_commit(struct dm_clone_metadata *cmd) 695 { 696 int r; 697 struct dm_block *sblock; 698 struct superblock_disk *sb; 699 700 /* Flush bitset cache */ 701 r = dm_bitset_flush(&cmd->bitset_info, cmd->bitset_root, &cmd->bitset_root); 702 if (r) { 703 DMERR("dm_bitset_flush failed"); 704 return r; 705 } 706 707 /* Flush to disk all blocks, except the superblock */ 708 r = dm_tm_pre_commit(cmd->tm); 709 if (r) { 710 DMERR("dm_tm_pre_commit failed"); 711 return r; 712 } 713 714 /* Save the space map root in cmd->metadata_space_map_root */ 715 r = __copy_sm_root(cmd); 716 if (r) { 717 DMERR("__copy_sm_root failed"); 718 return r; 719 } 720 721 /* Lock the superblock */ 722 r = superblock_write_lock_zero(cmd, &sblock); 723 if (r) { 724 DMERR("Failed to write_lock superblock"); 725 return r; 726 } 727 728 /* Save the metadata in superblock */ 729 sb = dm_block_data(sblock); 730 __prepare_superblock(cmd, sb); 731 732 /* Unlock superblock and commit it to disk */ 733 r = dm_tm_commit(cmd->tm, sblock); 734 if (r) { 735 DMERR("Failed to commit superblock"); 736 return r; 737 } 738 739 /* 740 * FIXME: Find a more efficient way to check if the hydration is done. 741 */ 742 if (bitmap_full(cmd->region_map, cmd->nr_regions)) 743 cmd->hydration_done = true; 744 745 return 0; 746 } 747 748 static int __flush_dmap(struct dm_clone_metadata *cmd, struct dirty_map *dmap) 749 { 750 int r; 751 unsigned long word; 752 753 word = 0; 754 do { 755 word = find_next_bit(dmap->dirty_words, cmd->nr_words, word); 756 757 if (word == cmd->nr_words) 758 break; 759 760 r = __update_metadata_word(cmd, dmap->dirty_regions, word); 761 762 if (r) 763 return r; 764 765 __clear_bit(word, dmap->dirty_words); 766 word++; 767 } while (word < cmd->nr_words); 768 769 r = __metadata_commit(cmd); 770 771 if (r) 772 return r; 773 774 /* Update the changed flag */ 775 spin_lock_irq(&cmd->bitmap_lock); 776 dmap->changed = 0; 777 spin_unlock_irq(&cmd->bitmap_lock); 778 779 return 0; 780 } 781 782 int dm_clone_metadata_pre_commit(struct dm_clone_metadata *cmd) 783 { 784 int r = 0; 785 struct dirty_map *dmap, *next_dmap; 786 787 down_write(&cmd->lock); 788 789 if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) { 790 r = -EPERM; 791 goto out; 792 } 793 794 /* Get current dirty bitmap */ 795 dmap = cmd->current_dmap; 796 797 /* Get next dirty bitmap */ 798 next_dmap = (dmap == &cmd->dmap[0]) ? &cmd->dmap[1] : &cmd->dmap[0]; 799 800 /* 801 * The last commit failed, so we don't have a clean dirty-bitmap to 802 * use. 803 */ 804 if (WARN_ON(next_dmap->changed || cmd->committing_dmap)) { 805 r = -EINVAL; 806 goto out; 807 } 808 809 /* Swap dirty bitmaps */ 810 spin_lock_irq(&cmd->bitmap_lock); 811 cmd->current_dmap = next_dmap; 812 spin_unlock_irq(&cmd->bitmap_lock); 813 814 /* Set old dirty bitmap as currently committing */ 815 cmd->committing_dmap = dmap; 816 out: 817 up_write(&cmd->lock); 818 819 return r; 820 } 821 822 int dm_clone_metadata_commit(struct dm_clone_metadata *cmd) 823 { 824 int r = -EPERM; 825 826 down_write(&cmd->lock); 827 828 if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) 829 goto out; 830 831 if (WARN_ON(!cmd->committing_dmap)) { 832 r = -EINVAL; 833 goto out; 834 } 835 836 r = __flush_dmap(cmd, cmd->committing_dmap); 837 if (!r) { 838 /* Clear committing dmap */ 839 cmd->committing_dmap = NULL; 840 } 841 out: 842 up_write(&cmd->lock); 843 844 return r; 845 } 846 847 int dm_clone_set_region_hydrated(struct dm_clone_metadata *cmd, unsigned long region_nr) 848 { 849 int r = 0; 850 struct dirty_map *dmap; 851 unsigned long word, flags; 852 853 if (unlikely(region_nr >= cmd->nr_regions)) { 854 DMERR("Region %lu out of range (total number of regions %lu)", 855 region_nr, cmd->nr_regions); 856 return -ERANGE; 857 } 858 859 word = region_nr / BITS_PER_LONG; 860 861 spin_lock_irqsave(&cmd->bitmap_lock, flags); 862 863 if (cmd->read_only) { 864 r = -EPERM; 865 goto out; 866 } 867 868 dmap = cmd->current_dmap; 869 870 __set_bit(word, dmap->dirty_words); 871 __set_bit(region_nr, dmap->dirty_regions); 872 __set_bit(region_nr, cmd->region_map); 873 dmap->changed = 1; 874 875 out: 876 spin_unlock_irqrestore(&cmd->bitmap_lock, flags); 877 878 return r; 879 } 880 881 int dm_clone_cond_set_range(struct dm_clone_metadata *cmd, unsigned long start, 882 unsigned long nr_regions) 883 { 884 int r = 0; 885 struct dirty_map *dmap; 886 unsigned long word, region_nr; 887 888 if (unlikely(start >= cmd->nr_regions || (start + nr_regions) < start || 889 (start + nr_regions) > cmd->nr_regions)) { 890 DMERR("Invalid region range: start %lu, nr_regions %lu (total number of regions %lu)", 891 start, nr_regions, cmd->nr_regions); 892 return -ERANGE; 893 } 894 895 spin_lock_irq(&cmd->bitmap_lock); 896 897 if (cmd->read_only) { 898 r = -EPERM; 899 goto out; 900 } 901 902 dmap = cmd->current_dmap; 903 for (region_nr = start; region_nr < (start + nr_regions); region_nr++) { 904 if (!test_bit(region_nr, cmd->region_map)) { 905 word = region_nr / BITS_PER_LONG; 906 __set_bit(word, dmap->dirty_words); 907 __set_bit(region_nr, dmap->dirty_regions); 908 __set_bit(region_nr, cmd->region_map); 909 dmap->changed = 1; 910 } 911 } 912 out: 913 spin_unlock_irq(&cmd->bitmap_lock); 914 915 return r; 916 } 917 918 /* 919 * WARNING: This must not be called concurrently with either 920 * dm_clone_set_region_hydrated() or dm_clone_cond_set_range(), as it changes 921 * cmd->region_map without taking the cmd->bitmap_lock spinlock. The only 922 * exception is after setting the metadata to read-only mode, using 923 * dm_clone_metadata_set_read_only(). 924 * 925 * We don't take the spinlock because __load_bitset_in_core() does I/O, so it 926 * may block. 927 */ 928 int dm_clone_reload_in_core_bitset(struct dm_clone_metadata *cmd) 929 { 930 int r = -EINVAL; 931 932 down_write(&cmd->lock); 933 934 if (cmd->fail_io) 935 goto out; 936 937 r = __load_bitset_in_core(cmd); 938 out: 939 up_write(&cmd->lock); 940 941 return r; 942 } 943 944 bool dm_clone_changed_this_transaction(struct dm_clone_metadata *cmd) 945 { 946 bool r; 947 unsigned long flags; 948 949 spin_lock_irqsave(&cmd->bitmap_lock, flags); 950 r = cmd->dmap[0].changed || cmd->dmap[1].changed; 951 spin_unlock_irqrestore(&cmd->bitmap_lock, flags); 952 953 return r; 954 } 955 956 int dm_clone_metadata_abort(struct dm_clone_metadata *cmd) 957 { 958 int r = -EPERM; 959 960 down_write(&cmd->lock); 961 962 if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) 963 goto out; 964 965 __destroy_persistent_data_structures(cmd); 966 967 r = __create_persistent_data_structures(cmd, false); 968 if (r) { 969 /* If something went wrong we can neither write nor read the metadata */ 970 cmd->fail_io = true; 971 } 972 out: 973 up_write(&cmd->lock); 974 975 return r; 976 } 977 978 void dm_clone_metadata_set_read_only(struct dm_clone_metadata *cmd) 979 { 980 down_write(&cmd->lock); 981 982 spin_lock_irq(&cmd->bitmap_lock); 983 cmd->read_only = 1; 984 spin_unlock_irq(&cmd->bitmap_lock); 985 986 if (!cmd->fail_io) 987 dm_bm_set_read_only(cmd->bm); 988 989 up_write(&cmd->lock); 990 } 991 992 void dm_clone_metadata_set_read_write(struct dm_clone_metadata *cmd) 993 { 994 down_write(&cmd->lock); 995 996 spin_lock_irq(&cmd->bitmap_lock); 997 cmd->read_only = 0; 998 spin_unlock_irq(&cmd->bitmap_lock); 999 1000 if (!cmd->fail_io) 1001 dm_bm_set_read_write(cmd->bm); 1002 1003 up_write(&cmd->lock); 1004 } 1005 1006 int dm_clone_get_free_metadata_block_count(struct dm_clone_metadata *cmd, 1007 dm_block_t *result) 1008 { 1009 int r = -EINVAL; 1010 1011 down_read(&cmd->lock); 1012 1013 if (!cmd->fail_io) 1014 r = dm_sm_get_nr_free(cmd->sm, result); 1015 1016 up_read(&cmd->lock); 1017 1018 return r; 1019 } 1020 1021 int dm_clone_get_metadata_dev_size(struct dm_clone_metadata *cmd, 1022 dm_block_t *result) 1023 { 1024 int r = -EINVAL; 1025 1026 down_read(&cmd->lock); 1027 1028 if (!cmd->fail_io) 1029 r = dm_sm_get_nr_blocks(cmd->sm, result); 1030 1031 up_read(&cmd->lock); 1032 1033 return r; 1034 } 1035