1 // SPDX-License-Identifier: GPL-2.0-only 2 #include "dm.h" 3 #include "persistent-data/dm-transaction-manager.h" 4 #include "persistent-data/dm-bitset.h" 5 #include "persistent-data/dm-space-map.h" 6 7 #include <linux/dm-io.h> 8 #include <linux/dm-kcopyd.h> 9 #include <linux/init.h> 10 #include <linux/mempool.h> 11 #include <linux/module.h> 12 #include <linux/slab.h> 13 #include <linux/vmalloc.h> 14 15 #define DM_MSG_PREFIX "era" 16 17 #define SUPERBLOCK_LOCATION 0 18 #define SUPERBLOCK_MAGIC 2126579579 19 #define SUPERBLOCK_CSUM_XOR 146538381 20 #define MIN_ERA_VERSION 1 21 #define MAX_ERA_VERSION 1 22 #define INVALID_WRITESET_ROOT SUPERBLOCK_LOCATION 23 #define MIN_BLOCK_SIZE 8 24 25 /*---------------------------------------------------------------- 26 * Writeset 27 *--------------------------------------------------------------*/ 28 struct writeset_metadata { 29 uint32_t nr_bits; 30 dm_block_t root; 31 }; 32 33 struct writeset { 34 struct writeset_metadata md; 35 36 /* 37 * An in core copy of the bits to save constantly doing look ups on 38 * disk. 39 */ 40 unsigned long *bits; 41 }; 42 43 /* 44 * This does not free off the on disk bitset as this will normally be done 45 * after digesting into the era array. 46 */ 47 static void writeset_free(struct writeset *ws) 48 { 49 vfree(ws->bits); 50 } 51 52 static int setup_on_disk_bitset(struct dm_disk_bitset *info, 53 unsigned nr_bits, dm_block_t *root) 54 { 55 int r; 56 57 r = dm_bitset_empty(info, root); 58 if (r) 59 return r; 60 61 return dm_bitset_resize(info, *root, 0, nr_bits, false, root); 62 } 63 64 static size_t bitset_size(unsigned nr_bits) 65 { 66 return sizeof(unsigned long) * dm_div_up(nr_bits, BITS_PER_LONG); 67 } 68 69 /* 70 * Allocates memory for the in core bitset. 71 */ 72 static int writeset_alloc(struct writeset *ws, dm_block_t nr_blocks) 73 { 74 ws->md.nr_bits = nr_blocks; 75 ws->md.root = INVALID_WRITESET_ROOT; 76 ws->bits = vzalloc(bitset_size(nr_blocks)); 77 if (!ws->bits) { 78 DMERR("%s: couldn't allocate in memory bitset", __func__); 79 return -ENOMEM; 80 } 81 82 return 0; 83 } 84 85 /* 86 * Wipes the in-core bitset, and creates a new on disk bitset. 87 */ 88 static int writeset_init(struct dm_disk_bitset *info, struct writeset *ws) 89 { 90 int r; 91 92 memset(ws->bits, 0, bitset_size(ws->md.nr_bits)); 93 94 r = setup_on_disk_bitset(info, ws->md.nr_bits, &ws->md.root); 95 if (r) { 96 DMERR("%s: setup_on_disk_bitset failed", __func__); 97 return r; 98 } 99 100 return 0; 101 } 102 103 static bool writeset_marked(struct writeset *ws, dm_block_t block) 104 { 105 return test_bit(block, ws->bits); 106 } 107 108 static int writeset_marked_on_disk(struct dm_disk_bitset *info, 109 struct writeset_metadata *m, dm_block_t block, 110 bool *result) 111 { 112 dm_block_t old = m->root; 113 114 /* 115 * The bitset was flushed when it was archived, so we know there'll 116 * be no change to the root. 117 */ 118 int r = dm_bitset_test_bit(info, m->root, block, &m->root, result); 119 if (r) { 120 DMERR("%s: dm_bitset_test_bit failed", __func__); 121 return r; 122 } 123 124 BUG_ON(m->root != old); 125 126 return r; 127 } 128 129 /* 130 * Returns < 0 on error, 0 if the bit wasn't previously set, 1 if it was. 131 */ 132 static int writeset_test_and_set(struct dm_disk_bitset *info, 133 struct writeset *ws, uint32_t block) 134 { 135 int r; 136 137 if (!test_and_set_bit(block, ws->bits)) { 138 r = dm_bitset_set_bit(info, ws->md.root, block, &ws->md.root); 139 if (r) { 140 /* FIXME: fail mode */ 141 return r; 142 } 143 144 return 0; 145 } 146 147 return 1; 148 } 149 150 /*---------------------------------------------------------------- 151 * On disk metadata layout 152 *--------------------------------------------------------------*/ 153 #define SPACE_MAP_ROOT_SIZE 128 154 #define UUID_LEN 16 155 156 struct writeset_disk { 157 __le32 nr_bits; 158 __le64 root; 159 } __packed; 160 161 struct superblock_disk { 162 __le32 csum; 163 __le32 flags; 164 __le64 blocknr; 165 166 __u8 uuid[UUID_LEN]; 167 __le64 magic; 168 __le32 version; 169 170 __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE]; 171 172 __le32 data_block_size; 173 __le32 metadata_block_size; 174 __le32 nr_blocks; 175 176 __le32 current_era; 177 struct writeset_disk current_writeset; 178 179 /* 180 * Only these two fields are valid within the metadata snapshot. 181 */ 182 __le64 writeset_tree_root; 183 __le64 era_array_root; 184 185 __le64 metadata_snap; 186 } __packed; 187 188 /*---------------------------------------------------------------- 189 * Superblock validation 190 *--------------------------------------------------------------*/ 191 static void sb_prepare_for_write(struct dm_block_validator *v, 192 struct dm_block *b, 193 size_t sb_block_size) 194 { 195 struct superblock_disk *disk = dm_block_data(b); 196 197 disk->blocknr = cpu_to_le64(dm_block_location(b)); 198 disk->csum = cpu_to_le32(dm_bm_checksum(&disk->flags, 199 sb_block_size - sizeof(__le32), 200 SUPERBLOCK_CSUM_XOR)); 201 } 202 203 static int check_metadata_version(struct superblock_disk *disk) 204 { 205 uint32_t metadata_version = le32_to_cpu(disk->version); 206 if (metadata_version < MIN_ERA_VERSION || metadata_version > MAX_ERA_VERSION) { 207 DMERR("Era metadata version %u found, but only versions between %u and %u supported.", 208 metadata_version, MIN_ERA_VERSION, MAX_ERA_VERSION); 209 return -EINVAL; 210 } 211 212 return 0; 213 } 214 215 static int sb_check(struct dm_block_validator *v, 216 struct dm_block *b, 217 size_t sb_block_size) 218 { 219 struct superblock_disk *disk = dm_block_data(b); 220 __le32 csum_le; 221 222 if (dm_block_location(b) != le64_to_cpu(disk->blocknr)) { 223 DMERR("sb_check failed: blocknr %llu: wanted %llu", 224 le64_to_cpu(disk->blocknr), 225 (unsigned long long)dm_block_location(b)); 226 return -ENOTBLK; 227 } 228 229 if (le64_to_cpu(disk->magic) != SUPERBLOCK_MAGIC) { 230 DMERR("sb_check failed: magic %llu: wanted %llu", 231 le64_to_cpu(disk->magic), 232 (unsigned long long) SUPERBLOCK_MAGIC); 233 return -EILSEQ; 234 } 235 236 csum_le = cpu_to_le32(dm_bm_checksum(&disk->flags, 237 sb_block_size - sizeof(__le32), 238 SUPERBLOCK_CSUM_XOR)); 239 if (csum_le != disk->csum) { 240 DMERR("sb_check failed: csum %u: wanted %u", 241 le32_to_cpu(csum_le), le32_to_cpu(disk->csum)); 242 return -EILSEQ; 243 } 244 245 return check_metadata_version(disk); 246 } 247 248 static struct dm_block_validator sb_validator = { 249 .name = "superblock", 250 .prepare_for_write = sb_prepare_for_write, 251 .check = sb_check 252 }; 253 254 /*---------------------------------------------------------------- 255 * Low level metadata handling 256 *--------------------------------------------------------------*/ 257 #define DM_ERA_METADATA_BLOCK_SIZE 4096 258 #define ERA_MAX_CONCURRENT_LOCKS 5 259 260 struct era_metadata { 261 struct block_device *bdev; 262 struct dm_block_manager *bm; 263 struct dm_space_map *sm; 264 struct dm_transaction_manager *tm; 265 266 dm_block_t block_size; 267 uint32_t nr_blocks; 268 269 uint32_t current_era; 270 271 /* 272 * We preallocate 2 writesets. When an era rolls over we 273 * switch between them. This means the allocation is done at 274 * preresume time, rather than on the io path. 275 */ 276 struct writeset writesets[2]; 277 struct writeset *current_writeset; 278 279 dm_block_t writeset_tree_root; 280 dm_block_t era_array_root; 281 282 struct dm_disk_bitset bitset_info; 283 struct dm_btree_info writeset_tree_info; 284 struct dm_array_info era_array_info; 285 286 dm_block_t metadata_snap; 287 288 /* 289 * A flag that is set whenever a writeset has been archived. 290 */ 291 bool archived_writesets; 292 293 /* 294 * Reading the space map root can fail, so we read it into this 295 * buffer before the superblock is locked and updated. 296 */ 297 __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE]; 298 }; 299 300 static int superblock_read_lock(struct era_metadata *md, 301 struct dm_block **sblock) 302 { 303 return dm_bm_read_lock(md->bm, SUPERBLOCK_LOCATION, 304 &sb_validator, sblock); 305 } 306 307 static int superblock_lock_zero(struct era_metadata *md, 308 struct dm_block **sblock) 309 { 310 return dm_bm_write_lock_zero(md->bm, SUPERBLOCK_LOCATION, 311 &sb_validator, sblock); 312 } 313 314 static int superblock_lock(struct era_metadata *md, 315 struct dm_block **sblock) 316 { 317 return dm_bm_write_lock(md->bm, SUPERBLOCK_LOCATION, 318 &sb_validator, sblock); 319 } 320 321 /* FIXME: duplication with cache and thin */ 322 static int superblock_all_zeroes(struct dm_block_manager *bm, bool *result) 323 { 324 int r; 325 unsigned i; 326 struct dm_block *b; 327 __le64 *data_le, zero = cpu_to_le64(0); 328 unsigned sb_block_size = dm_bm_block_size(bm) / sizeof(__le64); 329 330 /* 331 * We can't use a validator here - it may be all zeroes. 332 */ 333 r = dm_bm_read_lock(bm, SUPERBLOCK_LOCATION, NULL, &b); 334 if (r) 335 return r; 336 337 data_le = dm_block_data(b); 338 *result = true; 339 for (i = 0; i < sb_block_size; i++) { 340 if (data_le[i] != zero) { 341 *result = false; 342 break; 343 } 344 } 345 346 dm_bm_unlock(b); 347 348 return 0; 349 } 350 351 /*----------------------------------------------------------------*/ 352 353 static void ws_pack(const struct writeset_metadata *core, struct writeset_disk *disk) 354 { 355 disk->nr_bits = cpu_to_le32(core->nr_bits); 356 disk->root = cpu_to_le64(core->root); 357 } 358 359 static void ws_unpack(const struct writeset_disk *disk, struct writeset_metadata *core) 360 { 361 core->nr_bits = le32_to_cpu(disk->nr_bits); 362 core->root = le64_to_cpu(disk->root); 363 } 364 365 static void ws_inc(void *context, const void *value) 366 { 367 struct era_metadata *md = context; 368 struct writeset_disk ws_d; 369 dm_block_t b; 370 371 memcpy(&ws_d, value, sizeof(ws_d)); 372 b = le64_to_cpu(ws_d.root); 373 374 dm_tm_inc(md->tm, b); 375 } 376 377 static void ws_dec(void *context, const void *value) 378 { 379 struct era_metadata *md = context; 380 struct writeset_disk ws_d; 381 dm_block_t b; 382 383 memcpy(&ws_d, value, sizeof(ws_d)); 384 b = le64_to_cpu(ws_d.root); 385 386 dm_bitset_del(&md->bitset_info, b); 387 } 388 389 static int ws_eq(void *context, const void *value1, const void *value2) 390 { 391 return !memcmp(value1, value2, sizeof(struct writeset_metadata)); 392 } 393 394 /*----------------------------------------------------------------*/ 395 396 static void setup_writeset_tree_info(struct era_metadata *md) 397 { 398 struct dm_btree_value_type *vt = &md->writeset_tree_info.value_type; 399 md->writeset_tree_info.tm = md->tm; 400 md->writeset_tree_info.levels = 1; 401 vt->context = md; 402 vt->size = sizeof(struct writeset_disk); 403 vt->inc = ws_inc; 404 vt->dec = ws_dec; 405 vt->equal = ws_eq; 406 } 407 408 static void setup_era_array_info(struct era_metadata *md) 409 410 { 411 struct dm_btree_value_type vt; 412 vt.context = NULL; 413 vt.size = sizeof(__le32); 414 vt.inc = NULL; 415 vt.dec = NULL; 416 vt.equal = NULL; 417 418 dm_array_info_init(&md->era_array_info, md->tm, &vt); 419 } 420 421 static void setup_infos(struct era_metadata *md) 422 { 423 dm_disk_bitset_init(md->tm, &md->bitset_info); 424 setup_writeset_tree_info(md); 425 setup_era_array_info(md); 426 } 427 428 /*----------------------------------------------------------------*/ 429 430 static int create_fresh_metadata(struct era_metadata *md) 431 { 432 int r; 433 434 r = dm_tm_create_with_sm(md->bm, SUPERBLOCK_LOCATION, 435 &md->tm, &md->sm); 436 if (r < 0) { 437 DMERR("dm_tm_create_with_sm failed"); 438 return r; 439 } 440 441 setup_infos(md); 442 443 r = dm_btree_empty(&md->writeset_tree_info, &md->writeset_tree_root); 444 if (r) { 445 DMERR("couldn't create new writeset tree"); 446 goto bad; 447 } 448 449 r = dm_array_empty(&md->era_array_info, &md->era_array_root); 450 if (r) { 451 DMERR("couldn't create era array"); 452 goto bad; 453 } 454 455 return 0; 456 457 bad: 458 dm_sm_destroy(md->sm); 459 dm_tm_destroy(md->tm); 460 461 return r; 462 } 463 464 static int save_sm_root(struct era_metadata *md) 465 { 466 int r; 467 size_t metadata_len; 468 469 r = dm_sm_root_size(md->sm, &metadata_len); 470 if (r < 0) 471 return r; 472 473 return dm_sm_copy_root(md->sm, &md->metadata_space_map_root, 474 metadata_len); 475 } 476 477 static void copy_sm_root(struct era_metadata *md, struct superblock_disk *disk) 478 { 479 memcpy(&disk->metadata_space_map_root, 480 &md->metadata_space_map_root, 481 sizeof(md->metadata_space_map_root)); 482 } 483 484 /* 485 * Writes a superblock, including the static fields that don't get updated 486 * with every commit (possible optimisation here). 'md' should be fully 487 * constructed when this is called. 488 */ 489 static void prepare_superblock(struct era_metadata *md, struct superblock_disk *disk) 490 { 491 disk->magic = cpu_to_le64(SUPERBLOCK_MAGIC); 492 disk->flags = cpu_to_le32(0ul); 493 494 /* FIXME: can't keep blanking the uuid (uuid is currently unused though) */ 495 memset(disk->uuid, 0, sizeof(disk->uuid)); 496 disk->version = cpu_to_le32(MAX_ERA_VERSION); 497 498 copy_sm_root(md, disk); 499 500 disk->data_block_size = cpu_to_le32(md->block_size); 501 disk->metadata_block_size = cpu_to_le32(DM_ERA_METADATA_BLOCK_SIZE >> SECTOR_SHIFT); 502 disk->nr_blocks = cpu_to_le32(md->nr_blocks); 503 disk->current_era = cpu_to_le32(md->current_era); 504 505 ws_pack(&md->current_writeset->md, &disk->current_writeset); 506 disk->writeset_tree_root = cpu_to_le64(md->writeset_tree_root); 507 disk->era_array_root = cpu_to_le64(md->era_array_root); 508 disk->metadata_snap = cpu_to_le64(md->metadata_snap); 509 } 510 511 static int write_superblock(struct era_metadata *md) 512 { 513 int r; 514 struct dm_block *sblock; 515 struct superblock_disk *disk; 516 517 r = save_sm_root(md); 518 if (r) { 519 DMERR("%s: save_sm_root failed", __func__); 520 return r; 521 } 522 523 r = superblock_lock_zero(md, &sblock); 524 if (r) 525 return r; 526 527 disk = dm_block_data(sblock); 528 prepare_superblock(md, disk); 529 530 return dm_tm_commit(md->tm, sblock); 531 } 532 533 /* 534 * Assumes block_size and the infos are set. 535 */ 536 static int format_metadata(struct era_metadata *md) 537 { 538 int r; 539 540 r = create_fresh_metadata(md); 541 if (r) 542 return r; 543 544 r = write_superblock(md); 545 if (r) { 546 dm_sm_destroy(md->sm); 547 dm_tm_destroy(md->tm); 548 return r; 549 } 550 551 return 0; 552 } 553 554 static int open_metadata(struct era_metadata *md) 555 { 556 int r; 557 struct dm_block *sblock; 558 struct superblock_disk *disk; 559 560 r = superblock_read_lock(md, &sblock); 561 if (r) { 562 DMERR("couldn't read_lock superblock"); 563 return r; 564 } 565 566 disk = dm_block_data(sblock); 567 r = dm_tm_open_with_sm(md->bm, SUPERBLOCK_LOCATION, 568 disk->metadata_space_map_root, 569 sizeof(disk->metadata_space_map_root), 570 &md->tm, &md->sm); 571 if (r) { 572 DMERR("dm_tm_open_with_sm failed"); 573 goto bad; 574 } 575 576 setup_infos(md); 577 578 md->block_size = le32_to_cpu(disk->data_block_size); 579 md->nr_blocks = le32_to_cpu(disk->nr_blocks); 580 md->current_era = le32_to_cpu(disk->current_era); 581 582 md->writeset_tree_root = le64_to_cpu(disk->writeset_tree_root); 583 md->era_array_root = le64_to_cpu(disk->era_array_root); 584 md->metadata_snap = le64_to_cpu(disk->metadata_snap); 585 md->archived_writesets = true; 586 587 dm_bm_unlock(sblock); 588 589 return 0; 590 591 bad: 592 dm_bm_unlock(sblock); 593 return r; 594 } 595 596 static int open_or_format_metadata(struct era_metadata *md, 597 bool may_format) 598 { 599 int r; 600 bool unformatted = false; 601 602 r = superblock_all_zeroes(md->bm, &unformatted); 603 if (r) 604 return r; 605 606 if (unformatted) 607 return may_format ? format_metadata(md) : -EPERM; 608 609 return open_metadata(md); 610 } 611 612 static int create_persistent_data_objects(struct era_metadata *md, 613 bool may_format) 614 { 615 int r; 616 617 md->bm = dm_block_manager_create(md->bdev, DM_ERA_METADATA_BLOCK_SIZE, 618 ERA_MAX_CONCURRENT_LOCKS); 619 if (IS_ERR(md->bm)) { 620 DMERR("could not create block manager"); 621 return PTR_ERR(md->bm); 622 } 623 624 r = open_or_format_metadata(md, may_format); 625 if (r) 626 dm_block_manager_destroy(md->bm); 627 628 return r; 629 } 630 631 static void destroy_persistent_data_objects(struct era_metadata *md) 632 { 633 dm_sm_destroy(md->sm); 634 dm_tm_destroy(md->tm); 635 dm_block_manager_destroy(md->bm); 636 } 637 638 /* 639 * This waits until all era_map threads have picked up the new filter. 640 */ 641 static void swap_writeset(struct era_metadata *md, struct writeset *new_writeset) 642 { 643 rcu_assign_pointer(md->current_writeset, new_writeset); 644 synchronize_rcu(); 645 } 646 647 /*---------------------------------------------------------------- 648 * Writesets get 'digested' into the main era array. 649 * 650 * We're using a coroutine here so the worker thread can do the digestion, 651 * thus avoiding synchronisation of the metadata. Digesting a whole 652 * writeset in one go would cause too much latency. 653 *--------------------------------------------------------------*/ 654 struct digest { 655 uint32_t era; 656 unsigned nr_bits, current_bit; 657 struct writeset_metadata writeset; 658 __le32 value; 659 struct dm_disk_bitset info; 660 661 int (*step)(struct era_metadata *, struct digest *); 662 }; 663 664 static int metadata_digest_lookup_writeset(struct era_metadata *md, 665 struct digest *d); 666 667 static int metadata_digest_remove_writeset(struct era_metadata *md, 668 struct digest *d) 669 { 670 int r; 671 uint64_t key = d->era; 672 673 r = dm_btree_remove(&md->writeset_tree_info, md->writeset_tree_root, 674 &key, &md->writeset_tree_root); 675 if (r) { 676 DMERR("%s: dm_btree_remove failed", __func__); 677 return r; 678 } 679 680 d->step = metadata_digest_lookup_writeset; 681 return 0; 682 } 683 684 #define INSERTS_PER_STEP 100 685 686 static int metadata_digest_transcribe_writeset(struct era_metadata *md, 687 struct digest *d) 688 { 689 int r; 690 bool marked; 691 unsigned b, e = min(d->current_bit + INSERTS_PER_STEP, d->nr_bits); 692 693 for (b = d->current_bit; b < e; b++) { 694 r = writeset_marked_on_disk(&d->info, &d->writeset, b, &marked); 695 if (r) { 696 DMERR("%s: writeset_marked_on_disk failed", __func__); 697 return r; 698 } 699 700 if (!marked) 701 continue; 702 703 __dm_bless_for_disk(&d->value); 704 r = dm_array_set_value(&md->era_array_info, md->era_array_root, 705 b, &d->value, &md->era_array_root); 706 if (r) { 707 DMERR("%s: dm_array_set_value failed", __func__); 708 return r; 709 } 710 } 711 712 if (b == d->nr_bits) 713 d->step = metadata_digest_remove_writeset; 714 else 715 d->current_bit = b; 716 717 return 0; 718 } 719 720 static int metadata_digest_lookup_writeset(struct era_metadata *md, 721 struct digest *d) 722 { 723 int r; 724 uint64_t key; 725 struct writeset_disk disk; 726 727 r = dm_btree_find_lowest_key(&md->writeset_tree_info, 728 md->writeset_tree_root, &key); 729 if (r < 0) 730 return r; 731 732 d->era = key; 733 734 r = dm_btree_lookup(&md->writeset_tree_info, 735 md->writeset_tree_root, &key, &disk); 736 if (r) { 737 if (r == -ENODATA) { 738 d->step = NULL; 739 return 0; 740 } 741 742 DMERR("%s: dm_btree_lookup failed", __func__); 743 return r; 744 } 745 746 ws_unpack(&disk, &d->writeset); 747 d->value = cpu_to_le32(key); 748 749 d->nr_bits = min(d->writeset.nr_bits, md->nr_blocks); 750 d->current_bit = 0; 751 d->step = metadata_digest_transcribe_writeset; 752 753 return 0; 754 } 755 756 static int metadata_digest_start(struct era_metadata *md, struct digest *d) 757 { 758 if (d->step) 759 return 0; 760 761 memset(d, 0, sizeof(*d)); 762 763 /* 764 * We initialise another bitset info to avoid any caching side 765 * effects with the previous one. 766 */ 767 dm_disk_bitset_init(md->tm, &d->info); 768 d->step = metadata_digest_lookup_writeset; 769 770 return 0; 771 } 772 773 /*---------------------------------------------------------------- 774 * High level metadata interface. Target methods should use these, and not 775 * the lower level ones. 776 *--------------------------------------------------------------*/ 777 static struct era_metadata *metadata_open(struct block_device *bdev, 778 sector_t block_size, 779 bool may_format) 780 { 781 int r; 782 struct era_metadata *md = kzalloc(sizeof(*md), GFP_KERNEL); 783 784 if (!md) 785 return NULL; 786 787 md->bdev = bdev; 788 md->block_size = block_size; 789 790 md->writesets[0].md.root = INVALID_WRITESET_ROOT; 791 md->writesets[1].md.root = INVALID_WRITESET_ROOT; 792 md->current_writeset = &md->writesets[0]; 793 794 r = create_persistent_data_objects(md, may_format); 795 if (r) { 796 kfree(md); 797 return ERR_PTR(r); 798 } 799 800 return md; 801 } 802 803 static void metadata_close(struct era_metadata *md) 804 { 805 destroy_persistent_data_objects(md); 806 kfree(md); 807 } 808 809 static bool valid_nr_blocks(dm_block_t n) 810 { 811 /* 812 * dm_bitset restricts us to 2^32. test_bit & co. restrict us 813 * further to 2^31 - 1 814 */ 815 return n < (1ull << 31); 816 } 817 818 static int metadata_resize(struct era_metadata *md, void *arg) 819 { 820 int r; 821 dm_block_t *new_size = arg; 822 __le32 value; 823 824 if (!valid_nr_blocks(*new_size)) { 825 DMERR("Invalid number of origin blocks %llu", 826 (unsigned long long) *new_size); 827 return -EINVAL; 828 } 829 830 writeset_free(&md->writesets[0]); 831 writeset_free(&md->writesets[1]); 832 833 r = writeset_alloc(&md->writesets[0], *new_size); 834 if (r) { 835 DMERR("%s: writeset_alloc failed for writeset 0", __func__); 836 return r; 837 } 838 839 r = writeset_alloc(&md->writesets[1], *new_size); 840 if (r) { 841 DMERR("%s: writeset_alloc failed for writeset 1", __func__); 842 return r; 843 } 844 845 value = cpu_to_le32(0u); 846 __dm_bless_for_disk(&value); 847 r = dm_array_resize(&md->era_array_info, md->era_array_root, 848 md->nr_blocks, *new_size, 849 &value, &md->era_array_root); 850 if (r) { 851 DMERR("%s: dm_array_resize failed", __func__); 852 return r; 853 } 854 855 md->nr_blocks = *new_size; 856 return 0; 857 } 858 859 static int metadata_era_archive(struct era_metadata *md) 860 { 861 int r; 862 uint64_t keys[1]; 863 struct writeset_disk value; 864 865 r = dm_bitset_flush(&md->bitset_info, md->current_writeset->md.root, 866 &md->current_writeset->md.root); 867 if (r) { 868 DMERR("%s: dm_bitset_flush failed", __func__); 869 return r; 870 } 871 872 ws_pack(&md->current_writeset->md, &value); 873 md->current_writeset->md.root = INVALID_WRITESET_ROOT; 874 875 keys[0] = md->current_era; 876 __dm_bless_for_disk(&value); 877 r = dm_btree_insert(&md->writeset_tree_info, md->writeset_tree_root, 878 keys, &value, &md->writeset_tree_root); 879 if (r) { 880 DMERR("%s: couldn't insert writeset into btree", __func__); 881 /* FIXME: fail mode */ 882 return r; 883 } 884 885 md->archived_writesets = true; 886 887 return 0; 888 } 889 890 static struct writeset *next_writeset(struct era_metadata *md) 891 { 892 return (md->current_writeset == &md->writesets[0]) ? 893 &md->writesets[1] : &md->writesets[0]; 894 } 895 896 static int metadata_new_era(struct era_metadata *md) 897 { 898 int r; 899 struct writeset *new_writeset = next_writeset(md); 900 901 r = writeset_init(&md->bitset_info, new_writeset); 902 if (r) { 903 DMERR("%s: writeset_init failed", __func__); 904 return r; 905 } 906 907 swap_writeset(md, new_writeset); 908 md->current_era++; 909 910 return 0; 911 } 912 913 static int metadata_era_rollover(struct era_metadata *md) 914 { 915 int r; 916 917 if (md->current_writeset->md.root != INVALID_WRITESET_ROOT) { 918 r = metadata_era_archive(md); 919 if (r) { 920 DMERR("%s: metadata_archive_era failed", __func__); 921 /* FIXME: fail mode? */ 922 return r; 923 } 924 } 925 926 r = metadata_new_era(md); 927 if (r) { 928 DMERR("%s: new era failed", __func__); 929 /* FIXME: fail mode */ 930 return r; 931 } 932 933 return 0; 934 } 935 936 static bool metadata_current_marked(struct era_metadata *md, dm_block_t block) 937 { 938 bool r; 939 struct writeset *ws; 940 941 rcu_read_lock(); 942 ws = rcu_dereference(md->current_writeset); 943 r = writeset_marked(ws, block); 944 rcu_read_unlock(); 945 946 return r; 947 } 948 949 static int metadata_commit(struct era_metadata *md) 950 { 951 int r; 952 struct dm_block *sblock; 953 954 if (md->current_writeset->md.root != SUPERBLOCK_LOCATION) { 955 r = dm_bitset_flush(&md->bitset_info, md->current_writeset->md.root, 956 &md->current_writeset->md.root); 957 if (r) { 958 DMERR("%s: bitset flush failed", __func__); 959 return r; 960 } 961 } 962 963 r = dm_tm_pre_commit(md->tm); 964 if (r) { 965 DMERR("%s: pre commit failed", __func__); 966 return r; 967 } 968 969 r = save_sm_root(md); 970 if (r) { 971 DMERR("%s: save_sm_root failed", __func__); 972 return r; 973 } 974 975 r = superblock_lock(md, &sblock); 976 if (r) { 977 DMERR("%s: superblock lock failed", __func__); 978 return r; 979 } 980 981 prepare_superblock(md, dm_block_data(sblock)); 982 983 return dm_tm_commit(md->tm, sblock); 984 } 985 986 static int metadata_checkpoint(struct era_metadata *md) 987 { 988 /* 989 * For now we just rollover, but later I want to put a check in to 990 * avoid this if the filter is still pretty fresh. 991 */ 992 return metadata_era_rollover(md); 993 } 994 995 /* 996 * Metadata snapshots allow userland to access era data. 997 */ 998 static int metadata_take_snap(struct era_metadata *md) 999 { 1000 int r, inc; 1001 struct dm_block *clone; 1002 1003 if (md->metadata_snap != SUPERBLOCK_LOCATION) { 1004 DMERR("%s: metadata snapshot already exists", __func__); 1005 return -EINVAL; 1006 } 1007 1008 r = metadata_era_rollover(md); 1009 if (r) { 1010 DMERR("%s: era rollover failed", __func__); 1011 return r; 1012 } 1013 1014 r = metadata_commit(md); 1015 if (r) { 1016 DMERR("%s: pre commit failed", __func__); 1017 return r; 1018 } 1019 1020 r = dm_sm_inc_block(md->sm, SUPERBLOCK_LOCATION); 1021 if (r) { 1022 DMERR("%s: couldn't increment superblock", __func__); 1023 return r; 1024 } 1025 1026 r = dm_tm_shadow_block(md->tm, SUPERBLOCK_LOCATION, 1027 &sb_validator, &clone, &inc); 1028 if (r) { 1029 DMERR("%s: couldn't shadow superblock", __func__); 1030 dm_sm_dec_block(md->sm, SUPERBLOCK_LOCATION); 1031 return r; 1032 } 1033 BUG_ON(!inc); 1034 1035 r = dm_sm_inc_block(md->sm, md->writeset_tree_root); 1036 if (r) { 1037 DMERR("%s: couldn't inc writeset tree root", __func__); 1038 dm_tm_unlock(md->tm, clone); 1039 return r; 1040 } 1041 1042 r = dm_sm_inc_block(md->sm, md->era_array_root); 1043 if (r) { 1044 DMERR("%s: couldn't inc era tree root", __func__); 1045 dm_sm_dec_block(md->sm, md->writeset_tree_root); 1046 dm_tm_unlock(md->tm, clone); 1047 return r; 1048 } 1049 1050 md->metadata_snap = dm_block_location(clone); 1051 1052 dm_tm_unlock(md->tm, clone); 1053 1054 return 0; 1055 } 1056 1057 static int metadata_drop_snap(struct era_metadata *md) 1058 { 1059 int r; 1060 dm_block_t location; 1061 struct dm_block *clone; 1062 struct superblock_disk *disk; 1063 1064 if (md->metadata_snap == SUPERBLOCK_LOCATION) { 1065 DMERR("%s: no snap to drop", __func__); 1066 return -EINVAL; 1067 } 1068 1069 r = dm_tm_read_lock(md->tm, md->metadata_snap, &sb_validator, &clone); 1070 if (r) { 1071 DMERR("%s: couldn't read lock superblock clone", __func__); 1072 return r; 1073 } 1074 1075 /* 1076 * Whatever happens now we'll commit with no record of the metadata 1077 * snap. 1078 */ 1079 md->metadata_snap = SUPERBLOCK_LOCATION; 1080 1081 disk = dm_block_data(clone); 1082 r = dm_btree_del(&md->writeset_tree_info, 1083 le64_to_cpu(disk->writeset_tree_root)); 1084 if (r) { 1085 DMERR("%s: error deleting writeset tree clone", __func__); 1086 dm_tm_unlock(md->tm, clone); 1087 return r; 1088 } 1089 1090 r = dm_array_del(&md->era_array_info, le64_to_cpu(disk->era_array_root)); 1091 if (r) { 1092 DMERR("%s: error deleting era array clone", __func__); 1093 dm_tm_unlock(md->tm, clone); 1094 return r; 1095 } 1096 1097 location = dm_block_location(clone); 1098 dm_tm_unlock(md->tm, clone); 1099 1100 return dm_sm_dec_block(md->sm, location); 1101 } 1102 1103 struct metadata_stats { 1104 dm_block_t used; 1105 dm_block_t total; 1106 dm_block_t snap; 1107 uint32_t era; 1108 }; 1109 1110 static int metadata_get_stats(struct era_metadata *md, void *ptr) 1111 { 1112 int r; 1113 struct metadata_stats *s = ptr; 1114 dm_block_t nr_free, nr_total; 1115 1116 r = dm_sm_get_nr_free(md->sm, &nr_free); 1117 if (r) { 1118 DMERR("dm_sm_get_nr_free returned %d", r); 1119 return r; 1120 } 1121 1122 r = dm_sm_get_nr_blocks(md->sm, &nr_total); 1123 if (r) { 1124 DMERR("dm_pool_get_metadata_dev_size returned %d", r); 1125 return r; 1126 } 1127 1128 s->used = nr_total - nr_free; 1129 s->total = nr_total; 1130 s->snap = md->metadata_snap; 1131 s->era = md->current_era; 1132 1133 return 0; 1134 } 1135 1136 /*----------------------------------------------------------------*/ 1137 1138 struct era { 1139 struct dm_target *ti; 1140 struct dm_target_callbacks callbacks; 1141 1142 struct dm_dev *metadata_dev; 1143 struct dm_dev *origin_dev; 1144 1145 dm_block_t nr_blocks; 1146 uint32_t sectors_per_block; 1147 int sectors_per_block_shift; 1148 struct era_metadata *md; 1149 1150 struct workqueue_struct *wq; 1151 struct work_struct worker; 1152 1153 spinlock_t deferred_lock; 1154 struct bio_list deferred_bios; 1155 1156 spinlock_t rpc_lock; 1157 struct list_head rpc_calls; 1158 1159 struct digest digest; 1160 atomic_t suspended; 1161 }; 1162 1163 struct rpc { 1164 struct list_head list; 1165 1166 int (*fn0)(struct era_metadata *); 1167 int (*fn1)(struct era_metadata *, void *); 1168 void *arg; 1169 int result; 1170 1171 struct completion complete; 1172 }; 1173 1174 /*---------------------------------------------------------------- 1175 * Remapping. 1176 *---------------------------------------------------------------*/ 1177 static bool block_size_is_power_of_two(struct era *era) 1178 { 1179 return era->sectors_per_block_shift >= 0; 1180 } 1181 1182 static dm_block_t get_block(struct era *era, struct bio *bio) 1183 { 1184 sector_t block_nr = bio->bi_iter.bi_sector; 1185 1186 if (!block_size_is_power_of_two(era)) 1187 (void) sector_div(block_nr, era->sectors_per_block); 1188 else 1189 block_nr >>= era->sectors_per_block_shift; 1190 1191 return block_nr; 1192 } 1193 1194 static void remap_to_origin(struct era *era, struct bio *bio) 1195 { 1196 bio_set_dev(bio, era->origin_dev->bdev); 1197 } 1198 1199 /*---------------------------------------------------------------- 1200 * Worker thread 1201 *--------------------------------------------------------------*/ 1202 static void wake_worker(struct era *era) 1203 { 1204 if (!atomic_read(&era->suspended)) 1205 queue_work(era->wq, &era->worker); 1206 } 1207 1208 static void process_old_eras(struct era *era) 1209 { 1210 int r; 1211 1212 if (!era->digest.step) 1213 return; 1214 1215 r = era->digest.step(era->md, &era->digest); 1216 if (r < 0) { 1217 DMERR("%s: digest step failed, stopping digestion", __func__); 1218 era->digest.step = NULL; 1219 1220 } else if (era->digest.step) 1221 wake_worker(era); 1222 } 1223 1224 static void process_deferred_bios(struct era *era) 1225 { 1226 int r; 1227 struct bio_list deferred_bios, marked_bios; 1228 struct bio *bio; 1229 bool commit_needed = false; 1230 bool failed = false; 1231 1232 bio_list_init(&deferred_bios); 1233 bio_list_init(&marked_bios); 1234 1235 spin_lock(&era->deferred_lock); 1236 bio_list_merge(&deferred_bios, &era->deferred_bios); 1237 bio_list_init(&era->deferred_bios); 1238 spin_unlock(&era->deferred_lock); 1239 1240 while ((bio = bio_list_pop(&deferred_bios))) { 1241 r = writeset_test_and_set(&era->md->bitset_info, 1242 era->md->current_writeset, 1243 get_block(era, bio)); 1244 if (r < 0) { 1245 /* 1246 * This is bad news, we need to rollback. 1247 * FIXME: finish. 1248 */ 1249 failed = true; 1250 1251 } else if (r == 0) 1252 commit_needed = true; 1253 1254 bio_list_add(&marked_bios, bio); 1255 } 1256 1257 if (commit_needed) { 1258 r = metadata_commit(era->md); 1259 if (r) 1260 failed = true; 1261 } 1262 1263 if (failed) 1264 while ((bio = bio_list_pop(&marked_bios))) 1265 bio_io_error(bio); 1266 else 1267 while ((bio = bio_list_pop(&marked_bios))) 1268 generic_make_request(bio); 1269 } 1270 1271 static void process_rpc_calls(struct era *era) 1272 { 1273 int r; 1274 bool need_commit = false; 1275 struct list_head calls; 1276 struct rpc *rpc, *tmp; 1277 1278 INIT_LIST_HEAD(&calls); 1279 spin_lock(&era->rpc_lock); 1280 list_splice_init(&era->rpc_calls, &calls); 1281 spin_unlock(&era->rpc_lock); 1282 1283 list_for_each_entry_safe(rpc, tmp, &calls, list) { 1284 rpc->result = rpc->fn0 ? rpc->fn0(era->md) : rpc->fn1(era->md, rpc->arg); 1285 need_commit = true; 1286 } 1287 1288 if (need_commit) { 1289 r = metadata_commit(era->md); 1290 if (r) 1291 list_for_each_entry_safe(rpc, tmp, &calls, list) 1292 rpc->result = r; 1293 } 1294 1295 list_for_each_entry_safe(rpc, tmp, &calls, list) 1296 complete(&rpc->complete); 1297 } 1298 1299 static void kick_off_digest(struct era *era) 1300 { 1301 if (era->md->archived_writesets) { 1302 era->md->archived_writesets = false; 1303 metadata_digest_start(era->md, &era->digest); 1304 } 1305 } 1306 1307 static void do_work(struct work_struct *ws) 1308 { 1309 struct era *era = container_of(ws, struct era, worker); 1310 1311 kick_off_digest(era); 1312 process_old_eras(era); 1313 process_deferred_bios(era); 1314 process_rpc_calls(era); 1315 } 1316 1317 static void defer_bio(struct era *era, struct bio *bio) 1318 { 1319 spin_lock(&era->deferred_lock); 1320 bio_list_add(&era->deferred_bios, bio); 1321 spin_unlock(&era->deferred_lock); 1322 1323 wake_worker(era); 1324 } 1325 1326 /* 1327 * Make an rpc call to the worker to change the metadata. 1328 */ 1329 static int perform_rpc(struct era *era, struct rpc *rpc) 1330 { 1331 rpc->result = 0; 1332 init_completion(&rpc->complete); 1333 1334 spin_lock(&era->rpc_lock); 1335 list_add(&rpc->list, &era->rpc_calls); 1336 spin_unlock(&era->rpc_lock); 1337 1338 wake_worker(era); 1339 wait_for_completion(&rpc->complete); 1340 1341 return rpc->result; 1342 } 1343 1344 static int in_worker0(struct era *era, int (*fn)(struct era_metadata *)) 1345 { 1346 struct rpc rpc; 1347 rpc.fn0 = fn; 1348 rpc.fn1 = NULL; 1349 1350 return perform_rpc(era, &rpc); 1351 } 1352 1353 static int in_worker1(struct era *era, 1354 int (*fn)(struct era_metadata *, void *), void *arg) 1355 { 1356 struct rpc rpc; 1357 rpc.fn0 = NULL; 1358 rpc.fn1 = fn; 1359 rpc.arg = arg; 1360 1361 return perform_rpc(era, &rpc); 1362 } 1363 1364 static void start_worker(struct era *era) 1365 { 1366 atomic_set(&era->suspended, 0); 1367 } 1368 1369 static void stop_worker(struct era *era) 1370 { 1371 atomic_set(&era->suspended, 1); 1372 flush_workqueue(era->wq); 1373 } 1374 1375 /*---------------------------------------------------------------- 1376 * Target methods 1377 *--------------------------------------------------------------*/ 1378 static int dev_is_congested(struct dm_dev *dev, int bdi_bits) 1379 { 1380 struct request_queue *q = bdev_get_queue(dev->bdev); 1381 return bdi_congested(q->backing_dev_info, bdi_bits); 1382 } 1383 1384 static int era_is_congested(struct dm_target_callbacks *cb, int bdi_bits) 1385 { 1386 struct era *era = container_of(cb, struct era, callbacks); 1387 return dev_is_congested(era->origin_dev, bdi_bits); 1388 } 1389 1390 static void era_destroy(struct era *era) 1391 { 1392 if (era->md) 1393 metadata_close(era->md); 1394 1395 if (era->wq) 1396 destroy_workqueue(era->wq); 1397 1398 if (era->origin_dev) 1399 dm_put_device(era->ti, era->origin_dev); 1400 1401 if (era->metadata_dev) 1402 dm_put_device(era->ti, era->metadata_dev); 1403 1404 kfree(era); 1405 } 1406 1407 static dm_block_t calc_nr_blocks(struct era *era) 1408 { 1409 return dm_sector_div_up(era->ti->len, era->sectors_per_block); 1410 } 1411 1412 static bool valid_block_size(dm_block_t block_size) 1413 { 1414 bool greater_than_zero = block_size > 0; 1415 bool multiple_of_min_block_size = (block_size & (MIN_BLOCK_SIZE - 1)) == 0; 1416 1417 return greater_than_zero && multiple_of_min_block_size; 1418 } 1419 1420 /* 1421 * <metadata dev> <data dev> <data block size (sectors)> 1422 */ 1423 static int era_ctr(struct dm_target *ti, unsigned argc, char **argv) 1424 { 1425 int r; 1426 char dummy; 1427 struct era *era; 1428 struct era_metadata *md; 1429 1430 if (argc != 3) { 1431 ti->error = "Invalid argument count"; 1432 return -EINVAL; 1433 } 1434 1435 era = kzalloc(sizeof(*era), GFP_KERNEL); 1436 if (!era) { 1437 ti->error = "Error allocating era structure"; 1438 return -ENOMEM; 1439 } 1440 1441 era->ti = ti; 1442 1443 r = dm_get_device(ti, argv[0], FMODE_READ | FMODE_WRITE, &era->metadata_dev); 1444 if (r) { 1445 ti->error = "Error opening metadata device"; 1446 era_destroy(era); 1447 return -EINVAL; 1448 } 1449 1450 r = dm_get_device(ti, argv[1], FMODE_READ | FMODE_WRITE, &era->origin_dev); 1451 if (r) { 1452 ti->error = "Error opening data device"; 1453 era_destroy(era); 1454 return -EINVAL; 1455 } 1456 1457 r = sscanf(argv[2], "%u%c", &era->sectors_per_block, &dummy); 1458 if (r != 1) { 1459 ti->error = "Error parsing block size"; 1460 era_destroy(era); 1461 return -EINVAL; 1462 } 1463 1464 r = dm_set_target_max_io_len(ti, era->sectors_per_block); 1465 if (r) { 1466 ti->error = "could not set max io len"; 1467 era_destroy(era); 1468 return -EINVAL; 1469 } 1470 1471 if (!valid_block_size(era->sectors_per_block)) { 1472 ti->error = "Invalid block size"; 1473 era_destroy(era); 1474 return -EINVAL; 1475 } 1476 if (era->sectors_per_block & (era->sectors_per_block - 1)) 1477 era->sectors_per_block_shift = -1; 1478 else 1479 era->sectors_per_block_shift = __ffs(era->sectors_per_block); 1480 1481 md = metadata_open(era->metadata_dev->bdev, era->sectors_per_block, true); 1482 if (IS_ERR(md)) { 1483 ti->error = "Error reading metadata"; 1484 era_destroy(era); 1485 return PTR_ERR(md); 1486 } 1487 era->md = md; 1488 1489 era->nr_blocks = calc_nr_blocks(era); 1490 1491 r = metadata_resize(era->md, &era->nr_blocks); 1492 if (r) { 1493 ti->error = "couldn't resize metadata"; 1494 era_destroy(era); 1495 return -ENOMEM; 1496 } 1497 1498 era->wq = alloc_ordered_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM); 1499 if (!era->wq) { 1500 ti->error = "could not create workqueue for metadata object"; 1501 era_destroy(era); 1502 return -ENOMEM; 1503 } 1504 INIT_WORK(&era->worker, do_work); 1505 1506 spin_lock_init(&era->deferred_lock); 1507 bio_list_init(&era->deferred_bios); 1508 1509 spin_lock_init(&era->rpc_lock); 1510 INIT_LIST_HEAD(&era->rpc_calls); 1511 1512 ti->private = era; 1513 ti->num_flush_bios = 1; 1514 ti->flush_supported = true; 1515 1516 ti->num_discard_bios = 1; 1517 era->callbacks.congested_fn = era_is_congested; 1518 dm_table_add_target_callbacks(ti->table, &era->callbacks); 1519 1520 return 0; 1521 } 1522 1523 static void era_dtr(struct dm_target *ti) 1524 { 1525 era_destroy(ti->private); 1526 } 1527 1528 static int era_map(struct dm_target *ti, struct bio *bio) 1529 { 1530 struct era *era = ti->private; 1531 dm_block_t block = get_block(era, bio); 1532 1533 /* 1534 * All bios get remapped to the origin device. We do this now, but 1535 * it may not get issued until later. Depending on whether the 1536 * block is marked in this era. 1537 */ 1538 remap_to_origin(era, bio); 1539 1540 /* 1541 * REQ_PREFLUSH bios carry no data, so we're not interested in them. 1542 */ 1543 if (!(bio->bi_opf & REQ_PREFLUSH) && 1544 (bio_data_dir(bio) == WRITE) && 1545 !metadata_current_marked(era->md, block)) { 1546 defer_bio(era, bio); 1547 return DM_MAPIO_SUBMITTED; 1548 } 1549 1550 return DM_MAPIO_REMAPPED; 1551 } 1552 1553 static void era_postsuspend(struct dm_target *ti) 1554 { 1555 int r; 1556 struct era *era = ti->private; 1557 1558 r = in_worker0(era, metadata_era_archive); 1559 if (r) { 1560 DMERR("%s: couldn't archive current era", __func__); 1561 /* FIXME: fail mode */ 1562 } 1563 1564 stop_worker(era); 1565 } 1566 1567 static int era_preresume(struct dm_target *ti) 1568 { 1569 int r; 1570 struct era *era = ti->private; 1571 dm_block_t new_size = calc_nr_blocks(era); 1572 1573 if (era->nr_blocks != new_size) { 1574 r = in_worker1(era, metadata_resize, &new_size); 1575 if (r) 1576 return r; 1577 1578 era->nr_blocks = new_size; 1579 } 1580 1581 start_worker(era); 1582 1583 r = in_worker0(era, metadata_new_era); 1584 if (r) { 1585 DMERR("%s: metadata_era_rollover failed", __func__); 1586 return r; 1587 } 1588 1589 return 0; 1590 } 1591 1592 /* 1593 * Status format: 1594 * 1595 * <metadata block size> <#used metadata blocks>/<#total metadata blocks> 1596 * <current era> <held metadata root | '-'> 1597 */ 1598 static void era_status(struct dm_target *ti, status_type_t type, 1599 unsigned status_flags, char *result, unsigned maxlen) 1600 { 1601 int r; 1602 struct era *era = ti->private; 1603 ssize_t sz = 0; 1604 struct metadata_stats stats; 1605 char buf[BDEVNAME_SIZE]; 1606 1607 switch (type) { 1608 case STATUSTYPE_INFO: 1609 r = in_worker1(era, metadata_get_stats, &stats); 1610 if (r) 1611 goto err; 1612 1613 DMEMIT("%u %llu/%llu %u", 1614 (unsigned) (DM_ERA_METADATA_BLOCK_SIZE >> SECTOR_SHIFT), 1615 (unsigned long long) stats.used, 1616 (unsigned long long) stats.total, 1617 (unsigned) stats.era); 1618 1619 if (stats.snap != SUPERBLOCK_LOCATION) 1620 DMEMIT(" %llu", stats.snap); 1621 else 1622 DMEMIT(" -"); 1623 break; 1624 1625 case STATUSTYPE_TABLE: 1626 format_dev_t(buf, era->metadata_dev->bdev->bd_dev); 1627 DMEMIT("%s ", buf); 1628 format_dev_t(buf, era->origin_dev->bdev->bd_dev); 1629 DMEMIT("%s %u", buf, era->sectors_per_block); 1630 break; 1631 } 1632 1633 return; 1634 1635 err: 1636 DMEMIT("Error"); 1637 } 1638 1639 static int era_message(struct dm_target *ti, unsigned argc, char **argv, 1640 char *result, unsigned maxlen) 1641 { 1642 struct era *era = ti->private; 1643 1644 if (argc != 1) { 1645 DMERR("incorrect number of message arguments"); 1646 return -EINVAL; 1647 } 1648 1649 if (!strcasecmp(argv[0], "checkpoint")) 1650 return in_worker0(era, metadata_checkpoint); 1651 1652 if (!strcasecmp(argv[0], "take_metadata_snap")) 1653 return in_worker0(era, metadata_take_snap); 1654 1655 if (!strcasecmp(argv[0], "drop_metadata_snap")) 1656 return in_worker0(era, metadata_drop_snap); 1657 1658 DMERR("unsupported message '%s'", argv[0]); 1659 return -EINVAL; 1660 } 1661 1662 static sector_t get_dev_size(struct dm_dev *dev) 1663 { 1664 return i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT; 1665 } 1666 1667 static int era_iterate_devices(struct dm_target *ti, 1668 iterate_devices_callout_fn fn, void *data) 1669 { 1670 struct era *era = ti->private; 1671 return fn(ti, era->origin_dev, 0, get_dev_size(era->origin_dev), data); 1672 } 1673 1674 static void era_io_hints(struct dm_target *ti, struct queue_limits *limits) 1675 { 1676 struct era *era = ti->private; 1677 uint64_t io_opt_sectors = limits->io_opt >> SECTOR_SHIFT; 1678 1679 /* 1680 * If the system-determined stacked limits are compatible with the 1681 * era device's blocksize (io_opt is a factor) do not override them. 1682 */ 1683 if (io_opt_sectors < era->sectors_per_block || 1684 do_div(io_opt_sectors, era->sectors_per_block)) { 1685 blk_limits_io_min(limits, 0); 1686 blk_limits_io_opt(limits, era->sectors_per_block << SECTOR_SHIFT); 1687 } 1688 } 1689 1690 /*----------------------------------------------------------------*/ 1691 1692 static struct target_type era_target = { 1693 .name = "era", 1694 .version = {1, 0, 0}, 1695 .module = THIS_MODULE, 1696 .ctr = era_ctr, 1697 .dtr = era_dtr, 1698 .map = era_map, 1699 .postsuspend = era_postsuspend, 1700 .preresume = era_preresume, 1701 .status = era_status, 1702 .message = era_message, 1703 .iterate_devices = era_iterate_devices, 1704 .io_hints = era_io_hints 1705 }; 1706 1707 static int __init dm_era_init(void) 1708 { 1709 int r; 1710 1711 r = dm_register_target(&era_target); 1712 if (r) { 1713 DMERR("era target registration failed: %d", r); 1714 return r; 1715 } 1716 1717 return 0; 1718 } 1719 1720 static void __exit dm_era_exit(void) 1721 { 1722 dm_unregister_target(&era_target); 1723 } 1724 1725 module_init(dm_era_init); 1726 module_exit(dm_era_exit); 1727 1728 MODULE_DESCRIPTION(DM_NAME " era target"); 1729 MODULE_AUTHOR("Joe Thornber <ejt@redhat.com>"); 1730 MODULE_LICENSE("GPL"); 1731