1 /* 2 * Copyright (C) 2011-2012 Red Hat, Inc. 3 * 4 * This file is released under the GPL. 5 */ 6 7 #include "dm-thin-metadata.h" 8 #include "persistent-data/dm-btree.h" 9 #include "persistent-data/dm-space-map.h" 10 #include "persistent-data/dm-space-map-disk.h" 11 #include "persistent-data/dm-transaction-manager.h" 12 13 #include <linux/list.h> 14 #include <linux/device-mapper.h> 15 #include <linux/workqueue.h> 16 17 /*-------------------------------------------------------------------------- 18 * As far as the metadata goes, there is: 19 * 20 * - A superblock in block zero, taking up fewer than 512 bytes for 21 * atomic writes. 22 * 23 * - A space map managing the metadata blocks. 24 * 25 * - A space map managing the data blocks. 26 * 27 * - A btree mapping our internal thin dev ids onto struct disk_device_details. 28 * 29 * - A hierarchical btree, with 2 levels which effectively maps (thin 30 * dev id, virtual block) -> block_time. Block time is a 64-bit 31 * field holding the time in the low 24 bits, and block in the top 48 32 * bits. 33 * 34 * BTrees consist solely of btree_nodes, that fill a block. Some are 35 * internal nodes, as such their values are a __le64 pointing to other 36 * nodes. Leaf nodes can store data of any reasonable size (ie. much 37 * smaller than the block size). The nodes consist of the header, 38 * followed by an array of keys, followed by an array of values. We have 39 * to binary search on the keys so they're all held together to help the 40 * cpu cache. 41 * 42 * Space maps have 2 btrees: 43 * 44 * - One maps a uint64_t onto a struct index_entry. Which points to a 45 * bitmap block, and has some details about how many free entries there 46 * are etc. 47 * 48 * - The bitmap blocks have a header (for the checksum). Then the rest 49 * of the block is pairs of bits. With the meaning being: 50 * 51 * 0 - ref count is 0 52 * 1 - ref count is 1 53 * 2 - ref count is 2 54 * 3 - ref count is higher than 2 55 * 56 * - If the count is higher than 2 then the ref count is entered in a 57 * second btree that directly maps the block_address to a uint32_t ref 58 * count. 59 * 60 * The space map metadata variant doesn't have a bitmaps btree. Instead 61 * it has one single blocks worth of index_entries. This avoids 62 * recursive issues with the bitmap btree needing to allocate space in 63 * order to insert. With a small data block size such as 64k the 64 * metadata support data devices that are hundreds of terrabytes. 65 * 66 * The space maps allocate space linearly from front to back. Space that 67 * is freed in a transaction is never recycled within that transaction. 68 * To try and avoid fragmenting _free_ space the allocator always goes 69 * back and fills in gaps. 70 * 71 * All metadata io is in THIN_METADATA_BLOCK_SIZE sized/aligned chunks 72 * from the block manager. 73 *--------------------------------------------------------------------------*/ 74 75 #define DM_MSG_PREFIX "thin metadata" 76 77 #define THIN_SUPERBLOCK_MAGIC 27022010 78 #define THIN_SUPERBLOCK_LOCATION 0 79 #define THIN_VERSION 1 80 #define THIN_METADATA_CACHE_SIZE 64 81 #define SECTOR_TO_BLOCK_SHIFT 3 82 83 /* 84 * 3 for btree insert + 85 * 2 for btree lookup used within space map 86 */ 87 #define THIN_MAX_CONCURRENT_LOCKS 5 88 89 /* This should be plenty */ 90 #define SPACE_MAP_ROOT_SIZE 128 91 92 /* 93 * Little endian on-disk superblock and device details. 94 */ 95 struct thin_disk_superblock { 96 __le32 csum; /* Checksum of superblock except for this field. */ 97 __le32 flags; 98 __le64 blocknr; /* This block number, dm_block_t. */ 99 100 __u8 uuid[16]; 101 __le64 magic; 102 __le32 version; 103 __le32 time; 104 105 __le64 trans_id; 106 107 /* 108 * Root held by userspace transactions. 109 */ 110 __le64 held_root; 111 112 __u8 data_space_map_root[SPACE_MAP_ROOT_SIZE]; 113 __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE]; 114 115 /* 116 * 2-level btree mapping (dev_id, (dev block, time)) -> data block 117 */ 118 __le64 data_mapping_root; 119 120 /* 121 * Device detail root mapping dev_id -> device_details 122 */ 123 __le64 device_details_root; 124 125 __le32 data_block_size; /* In 512-byte sectors. */ 126 127 __le32 metadata_block_size; /* In 512-byte sectors. */ 128 __le64 metadata_nr_blocks; 129 130 __le32 compat_flags; 131 __le32 compat_ro_flags; 132 __le32 incompat_flags; 133 } __packed; 134 135 struct disk_device_details { 136 __le64 mapped_blocks; 137 __le64 transaction_id; /* When created. */ 138 __le32 creation_time; 139 __le32 snapshotted_time; 140 } __packed; 141 142 struct dm_pool_metadata { 143 struct hlist_node hash; 144 145 struct block_device *bdev; 146 struct dm_block_manager *bm; 147 struct dm_space_map *metadata_sm; 148 struct dm_space_map *data_sm; 149 struct dm_transaction_manager *tm; 150 struct dm_transaction_manager *nb_tm; 151 152 /* 153 * Two-level btree. 154 * First level holds thin_dev_t. 155 * Second level holds mappings. 156 */ 157 struct dm_btree_info info; 158 159 /* 160 * Non-blocking version of the above. 161 */ 162 struct dm_btree_info nb_info; 163 164 /* 165 * Just the top level for deleting whole devices. 166 */ 167 struct dm_btree_info tl_info; 168 169 /* 170 * Just the bottom level for creating new devices. 171 */ 172 struct dm_btree_info bl_info; 173 174 /* 175 * Describes the device details btree. 176 */ 177 struct dm_btree_info details_info; 178 179 struct rw_semaphore root_lock; 180 uint32_t time; 181 dm_block_t root; 182 dm_block_t details_root; 183 struct list_head thin_devices; 184 uint64_t trans_id; 185 unsigned long flags; 186 sector_t data_block_size; 187 bool read_only:1; 188 189 /* 190 * Set if a transaction has to be aborted but the attempt to roll back 191 * to the previous (good) transaction failed. The only pool metadata 192 * operation possible in this state is the closing of the device. 193 */ 194 bool fail_io:1; 195 }; 196 197 struct dm_thin_device { 198 struct list_head list; 199 struct dm_pool_metadata *pmd; 200 dm_thin_id id; 201 202 int open_count; 203 bool changed:1; 204 bool aborted_with_changes:1; 205 uint64_t mapped_blocks; 206 uint64_t transaction_id; 207 uint32_t creation_time; 208 uint32_t snapshotted_time; 209 }; 210 211 /*---------------------------------------------------------------- 212 * superblock validator 213 *--------------------------------------------------------------*/ 214 215 #define SUPERBLOCK_CSUM_XOR 160774 216 217 static void sb_prepare_for_write(struct dm_block_validator *v, 218 struct dm_block *b, 219 size_t block_size) 220 { 221 struct thin_disk_superblock *disk_super = dm_block_data(b); 222 223 disk_super->blocknr = cpu_to_le64(dm_block_location(b)); 224 disk_super->csum = cpu_to_le32(dm_bm_checksum(&disk_super->flags, 225 block_size - sizeof(__le32), 226 SUPERBLOCK_CSUM_XOR)); 227 } 228 229 static int sb_check(struct dm_block_validator *v, 230 struct dm_block *b, 231 size_t block_size) 232 { 233 struct thin_disk_superblock *disk_super = dm_block_data(b); 234 __le32 csum_le; 235 236 if (dm_block_location(b) != le64_to_cpu(disk_super->blocknr)) { 237 DMERR("sb_check failed: blocknr %llu: " 238 "wanted %llu", le64_to_cpu(disk_super->blocknr), 239 (unsigned long long)dm_block_location(b)); 240 return -ENOTBLK; 241 } 242 243 if (le64_to_cpu(disk_super->magic) != THIN_SUPERBLOCK_MAGIC) { 244 DMERR("sb_check failed: magic %llu: " 245 "wanted %llu", le64_to_cpu(disk_super->magic), 246 (unsigned long long)THIN_SUPERBLOCK_MAGIC); 247 return -EILSEQ; 248 } 249 250 csum_le = cpu_to_le32(dm_bm_checksum(&disk_super->flags, 251 block_size - sizeof(__le32), 252 SUPERBLOCK_CSUM_XOR)); 253 if (csum_le != disk_super->csum) { 254 DMERR("sb_check failed: csum %u: wanted %u", 255 le32_to_cpu(csum_le), le32_to_cpu(disk_super->csum)); 256 return -EILSEQ; 257 } 258 259 return 0; 260 } 261 262 static struct dm_block_validator sb_validator = { 263 .name = "superblock", 264 .prepare_for_write = sb_prepare_for_write, 265 .check = sb_check 266 }; 267 268 /*---------------------------------------------------------------- 269 * Methods for the btree value types 270 *--------------------------------------------------------------*/ 271 272 static uint64_t pack_block_time(dm_block_t b, uint32_t t) 273 { 274 return (b << 24) | t; 275 } 276 277 static void unpack_block_time(uint64_t v, dm_block_t *b, uint32_t *t) 278 { 279 *b = v >> 24; 280 *t = v & ((1 << 24) - 1); 281 } 282 283 static void data_block_inc(void *context, const void *value_le) 284 { 285 struct dm_space_map *sm = context; 286 __le64 v_le; 287 uint64_t b; 288 uint32_t t; 289 290 memcpy(&v_le, value_le, sizeof(v_le)); 291 unpack_block_time(le64_to_cpu(v_le), &b, &t); 292 dm_sm_inc_block(sm, b); 293 } 294 295 static void data_block_dec(void *context, const void *value_le) 296 { 297 struct dm_space_map *sm = context; 298 __le64 v_le; 299 uint64_t b; 300 uint32_t t; 301 302 memcpy(&v_le, value_le, sizeof(v_le)); 303 unpack_block_time(le64_to_cpu(v_le), &b, &t); 304 dm_sm_dec_block(sm, b); 305 } 306 307 static int data_block_equal(void *context, const void *value1_le, const void *value2_le) 308 { 309 __le64 v1_le, v2_le; 310 uint64_t b1, b2; 311 uint32_t t; 312 313 memcpy(&v1_le, value1_le, sizeof(v1_le)); 314 memcpy(&v2_le, value2_le, sizeof(v2_le)); 315 unpack_block_time(le64_to_cpu(v1_le), &b1, &t); 316 unpack_block_time(le64_to_cpu(v2_le), &b2, &t); 317 318 return b1 == b2; 319 } 320 321 static void subtree_inc(void *context, const void *value) 322 { 323 struct dm_btree_info *info = context; 324 __le64 root_le; 325 uint64_t root; 326 327 memcpy(&root_le, value, sizeof(root_le)); 328 root = le64_to_cpu(root_le); 329 dm_tm_inc(info->tm, root); 330 } 331 332 static void subtree_dec(void *context, const void *value) 333 { 334 struct dm_btree_info *info = context; 335 __le64 root_le; 336 uint64_t root; 337 338 memcpy(&root_le, value, sizeof(root_le)); 339 root = le64_to_cpu(root_le); 340 if (dm_btree_del(info, root)) 341 DMERR("btree delete failed\n"); 342 } 343 344 static int subtree_equal(void *context, const void *value1_le, const void *value2_le) 345 { 346 __le64 v1_le, v2_le; 347 memcpy(&v1_le, value1_le, sizeof(v1_le)); 348 memcpy(&v2_le, value2_le, sizeof(v2_le)); 349 350 return v1_le == v2_le; 351 } 352 353 /*----------------------------------------------------------------*/ 354 355 static int superblock_lock_zero(struct dm_pool_metadata *pmd, 356 struct dm_block **sblock) 357 { 358 return dm_bm_write_lock_zero(pmd->bm, THIN_SUPERBLOCK_LOCATION, 359 &sb_validator, sblock); 360 } 361 362 static int superblock_lock(struct dm_pool_metadata *pmd, 363 struct dm_block **sblock) 364 { 365 return dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION, 366 &sb_validator, sblock); 367 } 368 369 static int __superblock_all_zeroes(struct dm_block_manager *bm, int *result) 370 { 371 int r; 372 unsigned i; 373 struct dm_block *b; 374 __le64 *data_le, zero = cpu_to_le64(0); 375 unsigned block_size = dm_bm_block_size(bm) / sizeof(__le64); 376 377 /* 378 * We can't use a validator here - it may be all zeroes. 379 */ 380 r = dm_bm_read_lock(bm, THIN_SUPERBLOCK_LOCATION, NULL, &b); 381 if (r) 382 return r; 383 384 data_le = dm_block_data(b); 385 *result = 1; 386 for (i = 0; i < block_size; i++) { 387 if (data_le[i] != zero) { 388 *result = 0; 389 break; 390 } 391 } 392 393 return dm_bm_unlock(b); 394 } 395 396 static void __setup_btree_details(struct dm_pool_metadata *pmd) 397 { 398 pmd->info.tm = pmd->tm; 399 pmd->info.levels = 2; 400 pmd->info.value_type.context = pmd->data_sm; 401 pmd->info.value_type.size = sizeof(__le64); 402 pmd->info.value_type.inc = data_block_inc; 403 pmd->info.value_type.dec = data_block_dec; 404 pmd->info.value_type.equal = data_block_equal; 405 406 memcpy(&pmd->nb_info, &pmd->info, sizeof(pmd->nb_info)); 407 pmd->nb_info.tm = pmd->nb_tm; 408 409 pmd->tl_info.tm = pmd->tm; 410 pmd->tl_info.levels = 1; 411 pmd->tl_info.value_type.context = &pmd->bl_info; 412 pmd->tl_info.value_type.size = sizeof(__le64); 413 pmd->tl_info.value_type.inc = subtree_inc; 414 pmd->tl_info.value_type.dec = subtree_dec; 415 pmd->tl_info.value_type.equal = subtree_equal; 416 417 pmd->bl_info.tm = pmd->tm; 418 pmd->bl_info.levels = 1; 419 pmd->bl_info.value_type.context = pmd->data_sm; 420 pmd->bl_info.value_type.size = sizeof(__le64); 421 pmd->bl_info.value_type.inc = data_block_inc; 422 pmd->bl_info.value_type.dec = data_block_dec; 423 pmd->bl_info.value_type.equal = data_block_equal; 424 425 pmd->details_info.tm = pmd->tm; 426 pmd->details_info.levels = 1; 427 pmd->details_info.value_type.context = NULL; 428 pmd->details_info.value_type.size = sizeof(struct disk_device_details); 429 pmd->details_info.value_type.inc = NULL; 430 pmd->details_info.value_type.dec = NULL; 431 pmd->details_info.value_type.equal = NULL; 432 } 433 434 static int __write_initial_superblock(struct dm_pool_metadata *pmd) 435 { 436 int r; 437 struct dm_block *sblock; 438 size_t metadata_len, data_len; 439 struct thin_disk_superblock *disk_super; 440 sector_t bdev_size = i_size_read(pmd->bdev->bd_inode) >> SECTOR_SHIFT; 441 442 if (bdev_size > THIN_METADATA_MAX_SECTORS) 443 bdev_size = THIN_METADATA_MAX_SECTORS; 444 445 r = dm_sm_root_size(pmd->metadata_sm, &metadata_len); 446 if (r < 0) 447 return r; 448 449 r = dm_sm_root_size(pmd->data_sm, &data_len); 450 if (r < 0) 451 return r; 452 453 r = dm_sm_commit(pmd->data_sm); 454 if (r < 0) 455 return r; 456 457 r = dm_tm_pre_commit(pmd->tm); 458 if (r < 0) 459 return r; 460 461 r = superblock_lock_zero(pmd, &sblock); 462 if (r) 463 return r; 464 465 disk_super = dm_block_data(sblock); 466 disk_super->flags = 0; 467 memset(disk_super->uuid, 0, sizeof(disk_super->uuid)); 468 disk_super->magic = cpu_to_le64(THIN_SUPERBLOCK_MAGIC); 469 disk_super->version = cpu_to_le32(THIN_VERSION); 470 disk_super->time = 0; 471 disk_super->trans_id = 0; 472 disk_super->held_root = 0; 473 474 r = dm_sm_copy_root(pmd->metadata_sm, &disk_super->metadata_space_map_root, 475 metadata_len); 476 if (r < 0) 477 goto bad_locked; 478 479 r = dm_sm_copy_root(pmd->data_sm, &disk_super->data_space_map_root, 480 data_len); 481 if (r < 0) 482 goto bad_locked; 483 484 disk_super->data_mapping_root = cpu_to_le64(pmd->root); 485 disk_super->device_details_root = cpu_to_le64(pmd->details_root); 486 disk_super->metadata_block_size = cpu_to_le32(THIN_METADATA_BLOCK_SIZE >> SECTOR_SHIFT); 487 disk_super->metadata_nr_blocks = cpu_to_le64(bdev_size >> SECTOR_TO_BLOCK_SHIFT); 488 disk_super->data_block_size = cpu_to_le32(pmd->data_block_size); 489 490 return dm_tm_commit(pmd->tm, sblock); 491 492 bad_locked: 493 dm_bm_unlock(sblock); 494 return r; 495 } 496 497 static int __format_metadata(struct dm_pool_metadata *pmd) 498 { 499 int r; 500 501 r = dm_tm_create_with_sm(pmd->bm, THIN_SUPERBLOCK_LOCATION, 502 &pmd->tm, &pmd->metadata_sm); 503 if (r < 0) { 504 DMERR("tm_create_with_sm failed"); 505 return r; 506 } 507 508 pmd->data_sm = dm_sm_disk_create(pmd->tm, 0); 509 if (IS_ERR(pmd->data_sm)) { 510 DMERR("sm_disk_create failed"); 511 r = PTR_ERR(pmd->data_sm); 512 goto bad_cleanup_tm; 513 } 514 515 pmd->nb_tm = dm_tm_create_non_blocking_clone(pmd->tm); 516 if (!pmd->nb_tm) { 517 DMERR("could not create non-blocking clone tm"); 518 r = -ENOMEM; 519 goto bad_cleanup_data_sm; 520 } 521 522 __setup_btree_details(pmd); 523 524 r = dm_btree_empty(&pmd->info, &pmd->root); 525 if (r < 0) 526 goto bad_cleanup_nb_tm; 527 528 r = dm_btree_empty(&pmd->details_info, &pmd->details_root); 529 if (r < 0) { 530 DMERR("couldn't create devices root"); 531 goto bad_cleanup_nb_tm; 532 } 533 534 r = __write_initial_superblock(pmd); 535 if (r) 536 goto bad_cleanup_nb_tm; 537 538 return 0; 539 540 bad_cleanup_nb_tm: 541 dm_tm_destroy(pmd->nb_tm); 542 bad_cleanup_data_sm: 543 dm_sm_destroy(pmd->data_sm); 544 bad_cleanup_tm: 545 dm_tm_destroy(pmd->tm); 546 dm_sm_destroy(pmd->metadata_sm); 547 548 return r; 549 } 550 551 static int __check_incompat_features(struct thin_disk_superblock *disk_super, 552 struct dm_pool_metadata *pmd) 553 { 554 uint32_t features; 555 556 features = le32_to_cpu(disk_super->incompat_flags) & ~THIN_FEATURE_INCOMPAT_SUPP; 557 if (features) { 558 DMERR("could not access metadata due to unsupported optional features (%lx).", 559 (unsigned long)features); 560 return -EINVAL; 561 } 562 563 /* 564 * Check for read-only metadata to skip the following RDWR checks. 565 */ 566 if (get_disk_ro(pmd->bdev->bd_disk)) 567 return 0; 568 569 features = le32_to_cpu(disk_super->compat_ro_flags) & ~THIN_FEATURE_COMPAT_RO_SUPP; 570 if (features) { 571 DMERR("could not access metadata RDWR due to unsupported optional features (%lx).", 572 (unsigned long)features); 573 return -EINVAL; 574 } 575 576 return 0; 577 } 578 579 static int __open_metadata(struct dm_pool_metadata *pmd) 580 { 581 int r; 582 struct dm_block *sblock; 583 struct thin_disk_superblock *disk_super; 584 585 r = dm_bm_read_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION, 586 &sb_validator, &sblock); 587 if (r < 0) { 588 DMERR("couldn't read superblock"); 589 return r; 590 } 591 592 disk_super = dm_block_data(sblock); 593 594 r = __check_incompat_features(disk_super, pmd); 595 if (r < 0) 596 goto bad_unlock_sblock; 597 598 r = dm_tm_open_with_sm(pmd->bm, THIN_SUPERBLOCK_LOCATION, 599 disk_super->metadata_space_map_root, 600 sizeof(disk_super->metadata_space_map_root), 601 &pmd->tm, &pmd->metadata_sm); 602 if (r < 0) { 603 DMERR("tm_open_with_sm failed"); 604 goto bad_unlock_sblock; 605 } 606 607 pmd->data_sm = dm_sm_disk_open(pmd->tm, disk_super->data_space_map_root, 608 sizeof(disk_super->data_space_map_root)); 609 if (IS_ERR(pmd->data_sm)) { 610 DMERR("sm_disk_open failed"); 611 r = PTR_ERR(pmd->data_sm); 612 goto bad_cleanup_tm; 613 } 614 615 pmd->nb_tm = dm_tm_create_non_blocking_clone(pmd->tm); 616 if (!pmd->nb_tm) { 617 DMERR("could not create non-blocking clone tm"); 618 r = -ENOMEM; 619 goto bad_cleanup_data_sm; 620 } 621 622 __setup_btree_details(pmd); 623 return dm_bm_unlock(sblock); 624 625 bad_cleanup_data_sm: 626 dm_sm_destroy(pmd->data_sm); 627 bad_cleanup_tm: 628 dm_tm_destroy(pmd->tm); 629 dm_sm_destroy(pmd->metadata_sm); 630 bad_unlock_sblock: 631 dm_bm_unlock(sblock); 632 633 return r; 634 } 635 636 static int __open_or_format_metadata(struct dm_pool_metadata *pmd, bool format_device) 637 { 638 int r, unformatted; 639 640 r = __superblock_all_zeroes(pmd->bm, &unformatted); 641 if (r) 642 return r; 643 644 if (unformatted) 645 return format_device ? __format_metadata(pmd) : -EPERM; 646 647 return __open_metadata(pmd); 648 } 649 650 static int __create_persistent_data_objects(struct dm_pool_metadata *pmd, bool format_device) 651 { 652 int r; 653 654 pmd->bm = dm_block_manager_create(pmd->bdev, THIN_METADATA_BLOCK_SIZE, 655 THIN_METADATA_CACHE_SIZE, 656 THIN_MAX_CONCURRENT_LOCKS); 657 if (IS_ERR(pmd->bm)) { 658 DMERR("could not create block manager"); 659 return PTR_ERR(pmd->bm); 660 } 661 662 r = __open_or_format_metadata(pmd, format_device); 663 if (r) 664 dm_block_manager_destroy(pmd->bm); 665 666 return r; 667 } 668 669 static void __destroy_persistent_data_objects(struct dm_pool_metadata *pmd) 670 { 671 dm_sm_destroy(pmd->data_sm); 672 dm_sm_destroy(pmd->metadata_sm); 673 dm_tm_destroy(pmd->nb_tm); 674 dm_tm_destroy(pmd->tm); 675 dm_block_manager_destroy(pmd->bm); 676 } 677 678 static int __begin_transaction(struct dm_pool_metadata *pmd) 679 { 680 int r; 681 struct thin_disk_superblock *disk_super; 682 struct dm_block *sblock; 683 684 /* 685 * We re-read the superblock every time. Shouldn't need to do this 686 * really. 687 */ 688 r = dm_bm_read_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION, 689 &sb_validator, &sblock); 690 if (r) 691 return r; 692 693 disk_super = dm_block_data(sblock); 694 pmd->time = le32_to_cpu(disk_super->time); 695 pmd->root = le64_to_cpu(disk_super->data_mapping_root); 696 pmd->details_root = le64_to_cpu(disk_super->device_details_root); 697 pmd->trans_id = le64_to_cpu(disk_super->trans_id); 698 pmd->flags = le32_to_cpu(disk_super->flags); 699 pmd->data_block_size = le32_to_cpu(disk_super->data_block_size); 700 701 dm_bm_unlock(sblock); 702 return 0; 703 } 704 705 static int __write_changed_details(struct dm_pool_metadata *pmd) 706 { 707 int r; 708 struct dm_thin_device *td, *tmp; 709 struct disk_device_details details; 710 uint64_t key; 711 712 list_for_each_entry_safe(td, tmp, &pmd->thin_devices, list) { 713 if (!td->changed) 714 continue; 715 716 key = td->id; 717 718 details.mapped_blocks = cpu_to_le64(td->mapped_blocks); 719 details.transaction_id = cpu_to_le64(td->transaction_id); 720 details.creation_time = cpu_to_le32(td->creation_time); 721 details.snapshotted_time = cpu_to_le32(td->snapshotted_time); 722 __dm_bless_for_disk(&details); 723 724 r = dm_btree_insert(&pmd->details_info, pmd->details_root, 725 &key, &details, &pmd->details_root); 726 if (r) 727 return r; 728 729 if (td->open_count) 730 td->changed = 0; 731 else { 732 list_del(&td->list); 733 kfree(td); 734 } 735 } 736 737 return 0; 738 } 739 740 static int __commit_transaction(struct dm_pool_metadata *pmd) 741 { 742 int r; 743 size_t metadata_len, data_len; 744 struct thin_disk_superblock *disk_super; 745 struct dm_block *sblock; 746 747 /* 748 * We need to know if the thin_disk_superblock exceeds a 512-byte sector. 749 */ 750 BUILD_BUG_ON(sizeof(struct thin_disk_superblock) > 512); 751 752 r = __write_changed_details(pmd); 753 if (r < 0) 754 return r; 755 756 r = dm_sm_commit(pmd->data_sm); 757 if (r < 0) 758 return r; 759 760 r = dm_tm_pre_commit(pmd->tm); 761 if (r < 0) 762 return r; 763 764 r = dm_sm_root_size(pmd->metadata_sm, &metadata_len); 765 if (r < 0) 766 return r; 767 768 r = dm_sm_root_size(pmd->data_sm, &data_len); 769 if (r < 0) 770 return r; 771 772 r = superblock_lock(pmd, &sblock); 773 if (r) 774 return r; 775 776 disk_super = dm_block_data(sblock); 777 disk_super->time = cpu_to_le32(pmd->time); 778 disk_super->data_mapping_root = cpu_to_le64(pmd->root); 779 disk_super->device_details_root = cpu_to_le64(pmd->details_root); 780 disk_super->trans_id = cpu_to_le64(pmd->trans_id); 781 disk_super->flags = cpu_to_le32(pmd->flags); 782 783 r = dm_sm_copy_root(pmd->metadata_sm, &disk_super->metadata_space_map_root, 784 metadata_len); 785 if (r < 0) 786 goto out_locked; 787 788 r = dm_sm_copy_root(pmd->data_sm, &disk_super->data_space_map_root, 789 data_len); 790 if (r < 0) 791 goto out_locked; 792 793 return dm_tm_commit(pmd->tm, sblock); 794 795 out_locked: 796 dm_bm_unlock(sblock); 797 return r; 798 } 799 800 struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev, 801 sector_t data_block_size, 802 bool format_device) 803 { 804 int r; 805 struct dm_pool_metadata *pmd; 806 807 pmd = kmalloc(sizeof(*pmd), GFP_KERNEL); 808 if (!pmd) { 809 DMERR("could not allocate metadata struct"); 810 return ERR_PTR(-ENOMEM); 811 } 812 813 init_rwsem(&pmd->root_lock); 814 pmd->time = 0; 815 INIT_LIST_HEAD(&pmd->thin_devices); 816 pmd->read_only = false; 817 pmd->fail_io = false; 818 pmd->bdev = bdev; 819 pmd->data_block_size = data_block_size; 820 821 r = __create_persistent_data_objects(pmd, format_device); 822 if (r) { 823 kfree(pmd); 824 return ERR_PTR(r); 825 } 826 827 r = __begin_transaction(pmd); 828 if (r < 0) { 829 if (dm_pool_metadata_close(pmd) < 0) 830 DMWARN("%s: dm_pool_metadata_close() failed.", __func__); 831 return ERR_PTR(r); 832 } 833 834 return pmd; 835 } 836 837 int dm_pool_metadata_close(struct dm_pool_metadata *pmd) 838 { 839 int r; 840 unsigned open_devices = 0; 841 struct dm_thin_device *td, *tmp; 842 843 down_read(&pmd->root_lock); 844 list_for_each_entry_safe(td, tmp, &pmd->thin_devices, list) { 845 if (td->open_count) 846 open_devices++; 847 else { 848 list_del(&td->list); 849 kfree(td); 850 } 851 } 852 up_read(&pmd->root_lock); 853 854 if (open_devices) { 855 DMERR("attempt to close pmd when %u device(s) are still open", 856 open_devices); 857 return -EBUSY; 858 } 859 860 if (!pmd->read_only && !pmd->fail_io) { 861 r = __commit_transaction(pmd); 862 if (r < 0) 863 DMWARN("%s: __commit_transaction() failed, error = %d", 864 __func__, r); 865 } 866 867 if (!pmd->fail_io) 868 __destroy_persistent_data_objects(pmd); 869 870 kfree(pmd); 871 return 0; 872 } 873 874 /* 875 * __open_device: Returns @td corresponding to device with id @dev, 876 * creating it if @create is set and incrementing @td->open_count. 877 * On failure, @td is undefined. 878 */ 879 static int __open_device(struct dm_pool_metadata *pmd, 880 dm_thin_id dev, int create, 881 struct dm_thin_device **td) 882 { 883 int r, changed = 0; 884 struct dm_thin_device *td2; 885 uint64_t key = dev; 886 struct disk_device_details details_le; 887 888 /* 889 * If the device is already open, return it. 890 */ 891 list_for_each_entry(td2, &pmd->thin_devices, list) 892 if (td2->id == dev) { 893 /* 894 * May not create an already-open device. 895 */ 896 if (create) 897 return -EEXIST; 898 899 td2->open_count++; 900 *td = td2; 901 return 0; 902 } 903 904 /* 905 * Check the device exists. 906 */ 907 r = dm_btree_lookup(&pmd->details_info, pmd->details_root, 908 &key, &details_le); 909 if (r) { 910 if (r != -ENODATA || !create) 911 return r; 912 913 /* 914 * Create new device. 915 */ 916 changed = 1; 917 details_le.mapped_blocks = 0; 918 details_le.transaction_id = cpu_to_le64(pmd->trans_id); 919 details_le.creation_time = cpu_to_le32(pmd->time); 920 details_le.snapshotted_time = cpu_to_le32(pmd->time); 921 } 922 923 *td = kmalloc(sizeof(**td), GFP_NOIO); 924 if (!*td) 925 return -ENOMEM; 926 927 (*td)->pmd = pmd; 928 (*td)->id = dev; 929 (*td)->open_count = 1; 930 (*td)->changed = changed; 931 (*td)->aborted_with_changes = false; 932 (*td)->mapped_blocks = le64_to_cpu(details_le.mapped_blocks); 933 (*td)->transaction_id = le64_to_cpu(details_le.transaction_id); 934 (*td)->creation_time = le32_to_cpu(details_le.creation_time); 935 (*td)->snapshotted_time = le32_to_cpu(details_le.snapshotted_time); 936 937 list_add(&(*td)->list, &pmd->thin_devices); 938 939 return 0; 940 } 941 942 static void __close_device(struct dm_thin_device *td) 943 { 944 --td->open_count; 945 } 946 947 static int __create_thin(struct dm_pool_metadata *pmd, 948 dm_thin_id dev) 949 { 950 int r; 951 dm_block_t dev_root; 952 uint64_t key = dev; 953 struct disk_device_details details_le; 954 struct dm_thin_device *td; 955 __le64 value; 956 957 r = dm_btree_lookup(&pmd->details_info, pmd->details_root, 958 &key, &details_le); 959 if (!r) 960 return -EEXIST; 961 962 /* 963 * Create an empty btree for the mappings. 964 */ 965 r = dm_btree_empty(&pmd->bl_info, &dev_root); 966 if (r) 967 return r; 968 969 /* 970 * Insert it into the main mapping tree. 971 */ 972 value = cpu_to_le64(dev_root); 973 __dm_bless_for_disk(&value); 974 r = dm_btree_insert(&pmd->tl_info, pmd->root, &key, &value, &pmd->root); 975 if (r) { 976 dm_btree_del(&pmd->bl_info, dev_root); 977 return r; 978 } 979 980 r = __open_device(pmd, dev, 1, &td); 981 if (r) { 982 dm_btree_remove(&pmd->tl_info, pmd->root, &key, &pmd->root); 983 dm_btree_del(&pmd->bl_info, dev_root); 984 return r; 985 } 986 __close_device(td); 987 988 return r; 989 } 990 991 int dm_pool_create_thin(struct dm_pool_metadata *pmd, dm_thin_id dev) 992 { 993 int r = -EINVAL; 994 995 down_write(&pmd->root_lock); 996 if (!pmd->fail_io) 997 r = __create_thin(pmd, dev); 998 up_write(&pmd->root_lock); 999 1000 return r; 1001 } 1002 1003 static int __set_snapshot_details(struct dm_pool_metadata *pmd, 1004 struct dm_thin_device *snap, 1005 dm_thin_id origin, uint32_t time) 1006 { 1007 int r; 1008 struct dm_thin_device *td; 1009 1010 r = __open_device(pmd, origin, 0, &td); 1011 if (r) 1012 return r; 1013 1014 td->changed = 1; 1015 td->snapshotted_time = time; 1016 1017 snap->mapped_blocks = td->mapped_blocks; 1018 snap->snapshotted_time = time; 1019 __close_device(td); 1020 1021 return 0; 1022 } 1023 1024 static int __create_snap(struct dm_pool_metadata *pmd, 1025 dm_thin_id dev, dm_thin_id origin) 1026 { 1027 int r; 1028 dm_block_t origin_root; 1029 uint64_t key = origin, dev_key = dev; 1030 struct dm_thin_device *td; 1031 struct disk_device_details details_le; 1032 __le64 value; 1033 1034 /* check this device is unused */ 1035 r = dm_btree_lookup(&pmd->details_info, pmd->details_root, 1036 &dev_key, &details_le); 1037 if (!r) 1038 return -EEXIST; 1039 1040 /* find the mapping tree for the origin */ 1041 r = dm_btree_lookup(&pmd->tl_info, pmd->root, &key, &value); 1042 if (r) 1043 return r; 1044 origin_root = le64_to_cpu(value); 1045 1046 /* clone the origin, an inc will do */ 1047 dm_tm_inc(pmd->tm, origin_root); 1048 1049 /* insert into the main mapping tree */ 1050 value = cpu_to_le64(origin_root); 1051 __dm_bless_for_disk(&value); 1052 key = dev; 1053 r = dm_btree_insert(&pmd->tl_info, pmd->root, &key, &value, &pmd->root); 1054 if (r) { 1055 dm_tm_dec(pmd->tm, origin_root); 1056 return r; 1057 } 1058 1059 pmd->time++; 1060 1061 r = __open_device(pmd, dev, 1, &td); 1062 if (r) 1063 goto bad; 1064 1065 r = __set_snapshot_details(pmd, td, origin, pmd->time); 1066 __close_device(td); 1067 1068 if (r) 1069 goto bad; 1070 1071 return 0; 1072 1073 bad: 1074 dm_btree_remove(&pmd->tl_info, pmd->root, &key, &pmd->root); 1075 dm_btree_remove(&pmd->details_info, pmd->details_root, 1076 &key, &pmd->details_root); 1077 return r; 1078 } 1079 1080 int dm_pool_create_snap(struct dm_pool_metadata *pmd, 1081 dm_thin_id dev, 1082 dm_thin_id origin) 1083 { 1084 int r = -EINVAL; 1085 1086 down_write(&pmd->root_lock); 1087 if (!pmd->fail_io) 1088 r = __create_snap(pmd, dev, origin); 1089 up_write(&pmd->root_lock); 1090 1091 return r; 1092 } 1093 1094 static int __delete_device(struct dm_pool_metadata *pmd, dm_thin_id dev) 1095 { 1096 int r; 1097 uint64_t key = dev; 1098 struct dm_thin_device *td; 1099 1100 /* TODO: failure should mark the transaction invalid */ 1101 r = __open_device(pmd, dev, 0, &td); 1102 if (r) 1103 return r; 1104 1105 if (td->open_count > 1) { 1106 __close_device(td); 1107 return -EBUSY; 1108 } 1109 1110 list_del(&td->list); 1111 kfree(td); 1112 r = dm_btree_remove(&pmd->details_info, pmd->details_root, 1113 &key, &pmd->details_root); 1114 if (r) 1115 return r; 1116 1117 r = dm_btree_remove(&pmd->tl_info, pmd->root, &key, &pmd->root); 1118 if (r) 1119 return r; 1120 1121 return 0; 1122 } 1123 1124 int dm_pool_delete_thin_device(struct dm_pool_metadata *pmd, 1125 dm_thin_id dev) 1126 { 1127 int r = -EINVAL; 1128 1129 down_write(&pmd->root_lock); 1130 if (!pmd->fail_io) 1131 r = __delete_device(pmd, dev); 1132 up_write(&pmd->root_lock); 1133 1134 return r; 1135 } 1136 1137 int dm_pool_set_metadata_transaction_id(struct dm_pool_metadata *pmd, 1138 uint64_t current_id, 1139 uint64_t new_id) 1140 { 1141 int r = -EINVAL; 1142 1143 down_write(&pmd->root_lock); 1144 1145 if (pmd->fail_io) 1146 goto out; 1147 1148 if (pmd->trans_id != current_id) { 1149 DMERR("mismatched transaction id"); 1150 goto out; 1151 } 1152 1153 pmd->trans_id = new_id; 1154 r = 0; 1155 1156 out: 1157 up_write(&pmd->root_lock); 1158 1159 return r; 1160 } 1161 1162 int dm_pool_get_metadata_transaction_id(struct dm_pool_metadata *pmd, 1163 uint64_t *result) 1164 { 1165 int r = -EINVAL; 1166 1167 down_read(&pmd->root_lock); 1168 if (!pmd->fail_io) { 1169 *result = pmd->trans_id; 1170 r = 0; 1171 } 1172 up_read(&pmd->root_lock); 1173 1174 return r; 1175 } 1176 1177 static int __reserve_metadata_snap(struct dm_pool_metadata *pmd) 1178 { 1179 int r, inc; 1180 struct thin_disk_superblock *disk_super; 1181 struct dm_block *copy, *sblock; 1182 dm_block_t held_root; 1183 1184 /* 1185 * Copy the superblock. 1186 */ 1187 dm_sm_inc_block(pmd->metadata_sm, THIN_SUPERBLOCK_LOCATION); 1188 r = dm_tm_shadow_block(pmd->tm, THIN_SUPERBLOCK_LOCATION, 1189 &sb_validator, ©, &inc); 1190 if (r) 1191 return r; 1192 1193 BUG_ON(!inc); 1194 1195 held_root = dm_block_location(copy); 1196 disk_super = dm_block_data(copy); 1197 1198 if (le64_to_cpu(disk_super->held_root)) { 1199 DMWARN("Pool metadata snapshot already exists: release this before taking another."); 1200 1201 dm_tm_dec(pmd->tm, held_root); 1202 dm_tm_unlock(pmd->tm, copy); 1203 return -EBUSY; 1204 } 1205 1206 /* 1207 * Wipe the spacemap since we're not publishing this. 1208 */ 1209 memset(&disk_super->data_space_map_root, 0, 1210 sizeof(disk_super->data_space_map_root)); 1211 memset(&disk_super->metadata_space_map_root, 0, 1212 sizeof(disk_super->metadata_space_map_root)); 1213 1214 /* 1215 * Increment the data structures that need to be preserved. 1216 */ 1217 dm_tm_inc(pmd->tm, le64_to_cpu(disk_super->data_mapping_root)); 1218 dm_tm_inc(pmd->tm, le64_to_cpu(disk_super->device_details_root)); 1219 dm_tm_unlock(pmd->tm, copy); 1220 1221 /* 1222 * Write the held root into the superblock. 1223 */ 1224 r = superblock_lock(pmd, &sblock); 1225 if (r) { 1226 dm_tm_dec(pmd->tm, held_root); 1227 return r; 1228 } 1229 1230 disk_super = dm_block_data(sblock); 1231 disk_super->held_root = cpu_to_le64(held_root); 1232 dm_bm_unlock(sblock); 1233 return 0; 1234 } 1235 1236 int dm_pool_reserve_metadata_snap(struct dm_pool_metadata *pmd) 1237 { 1238 int r = -EINVAL; 1239 1240 down_write(&pmd->root_lock); 1241 if (!pmd->fail_io) 1242 r = __reserve_metadata_snap(pmd); 1243 up_write(&pmd->root_lock); 1244 1245 return r; 1246 } 1247 1248 static int __release_metadata_snap(struct dm_pool_metadata *pmd) 1249 { 1250 int r; 1251 struct thin_disk_superblock *disk_super; 1252 struct dm_block *sblock, *copy; 1253 dm_block_t held_root; 1254 1255 r = superblock_lock(pmd, &sblock); 1256 if (r) 1257 return r; 1258 1259 disk_super = dm_block_data(sblock); 1260 held_root = le64_to_cpu(disk_super->held_root); 1261 disk_super->held_root = cpu_to_le64(0); 1262 1263 dm_bm_unlock(sblock); 1264 1265 if (!held_root) { 1266 DMWARN("No pool metadata snapshot found: nothing to release."); 1267 return -EINVAL; 1268 } 1269 1270 r = dm_tm_read_lock(pmd->tm, held_root, &sb_validator, ©); 1271 if (r) 1272 return r; 1273 1274 disk_super = dm_block_data(copy); 1275 dm_sm_dec_block(pmd->metadata_sm, le64_to_cpu(disk_super->data_mapping_root)); 1276 dm_sm_dec_block(pmd->metadata_sm, le64_to_cpu(disk_super->device_details_root)); 1277 dm_sm_dec_block(pmd->metadata_sm, held_root); 1278 1279 return dm_tm_unlock(pmd->tm, copy); 1280 } 1281 1282 int dm_pool_release_metadata_snap(struct dm_pool_metadata *pmd) 1283 { 1284 int r = -EINVAL; 1285 1286 down_write(&pmd->root_lock); 1287 if (!pmd->fail_io) 1288 r = __release_metadata_snap(pmd); 1289 up_write(&pmd->root_lock); 1290 1291 return r; 1292 } 1293 1294 static int __get_metadata_snap(struct dm_pool_metadata *pmd, 1295 dm_block_t *result) 1296 { 1297 int r; 1298 struct thin_disk_superblock *disk_super; 1299 struct dm_block *sblock; 1300 1301 r = dm_bm_read_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION, 1302 &sb_validator, &sblock); 1303 if (r) 1304 return r; 1305 1306 disk_super = dm_block_data(sblock); 1307 *result = le64_to_cpu(disk_super->held_root); 1308 1309 return dm_bm_unlock(sblock); 1310 } 1311 1312 int dm_pool_get_metadata_snap(struct dm_pool_metadata *pmd, 1313 dm_block_t *result) 1314 { 1315 int r = -EINVAL; 1316 1317 down_read(&pmd->root_lock); 1318 if (!pmd->fail_io) 1319 r = __get_metadata_snap(pmd, result); 1320 up_read(&pmd->root_lock); 1321 1322 return r; 1323 } 1324 1325 int dm_pool_open_thin_device(struct dm_pool_metadata *pmd, dm_thin_id dev, 1326 struct dm_thin_device **td) 1327 { 1328 int r = -EINVAL; 1329 1330 down_write(&pmd->root_lock); 1331 if (!pmd->fail_io) 1332 r = __open_device(pmd, dev, 0, td); 1333 up_write(&pmd->root_lock); 1334 1335 return r; 1336 } 1337 1338 int dm_pool_close_thin_device(struct dm_thin_device *td) 1339 { 1340 down_write(&td->pmd->root_lock); 1341 __close_device(td); 1342 up_write(&td->pmd->root_lock); 1343 1344 return 0; 1345 } 1346 1347 dm_thin_id dm_thin_dev_id(struct dm_thin_device *td) 1348 { 1349 return td->id; 1350 } 1351 1352 static bool __snapshotted_since(struct dm_thin_device *td, uint32_t time) 1353 { 1354 return td->snapshotted_time > time; 1355 } 1356 1357 int dm_thin_find_block(struct dm_thin_device *td, dm_block_t block, 1358 int can_block, struct dm_thin_lookup_result *result) 1359 { 1360 int r = -EINVAL; 1361 uint64_t block_time = 0; 1362 __le64 value; 1363 struct dm_pool_metadata *pmd = td->pmd; 1364 dm_block_t keys[2] = { td->id, block }; 1365 struct dm_btree_info *info; 1366 1367 if (can_block) { 1368 down_read(&pmd->root_lock); 1369 info = &pmd->info; 1370 } else if (down_read_trylock(&pmd->root_lock)) 1371 info = &pmd->nb_info; 1372 else 1373 return -EWOULDBLOCK; 1374 1375 if (pmd->fail_io) 1376 goto out; 1377 1378 r = dm_btree_lookup(info, pmd->root, keys, &value); 1379 if (!r) 1380 block_time = le64_to_cpu(value); 1381 1382 out: 1383 up_read(&pmd->root_lock); 1384 1385 if (!r) { 1386 dm_block_t exception_block; 1387 uint32_t exception_time; 1388 unpack_block_time(block_time, &exception_block, 1389 &exception_time); 1390 result->block = exception_block; 1391 result->shared = __snapshotted_since(td, exception_time); 1392 } 1393 1394 return r; 1395 } 1396 1397 static int __insert(struct dm_thin_device *td, dm_block_t block, 1398 dm_block_t data_block) 1399 { 1400 int r, inserted; 1401 __le64 value; 1402 struct dm_pool_metadata *pmd = td->pmd; 1403 dm_block_t keys[2] = { td->id, block }; 1404 1405 value = cpu_to_le64(pack_block_time(data_block, pmd->time)); 1406 __dm_bless_for_disk(&value); 1407 1408 r = dm_btree_insert_notify(&pmd->info, pmd->root, keys, &value, 1409 &pmd->root, &inserted); 1410 if (r) 1411 return r; 1412 1413 td->changed = 1; 1414 if (inserted) 1415 td->mapped_blocks++; 1416 1417 return 0; 1418 } 1419 1420 int dm_thin_insert_block(struct dm_thin_device *td, dm_block_t block, 1421 dm_block_t data_block) 1422 { 1423 int r = -EINVAL; 1424 1425 down_write(&td->pmd->root_lock); 1426 if (!td->pmd->fail_io) 1427 r = __insert(td, block, data_block); 1428 up_write(&td->pmd->root_lock); 1429 1430 return r; 1431 } 1432 1433 static int __remove(struct dm_thin_device *td, dm_block_t block) 1434 { 1435 int r; 1436 struct dm_pool_metadata *pmd = td->pmd; 1437 dm_block_t keys[2] = { td->id, block }; 1438 1439 r = dm_btree_remove(&pmd->info, pmd->root, keys, &pmd->root); 1440 if (r) 1441 return r; 1442 1443 td->mapped_blocks--; 1444 td->changed = 1; 1445 1446 return 0; 1447 } 1448 1449 int dm_thin_remove_block(struct dm_thin_device *td, dm_block_t block) 1450 { 1451 int r = -EINVAL; 1452 1453 down_write(&td->pmd->root_lock); 1454 if (!td->pmd->fail_io) 1455 r = __remove(td, block); 1456 up_write(&td->pmd->root_lock); 1457 1458 return r; 1459 } 1460 1461 bool dm_thin_changed_this_transaction(struct dm_thin_device *td) 1462 { 1463 int r; 1464 1465 down_read(&td->pmd->root_lock); 1466 r = td->changed; 1467 up_read(&td->pmd->root_lock); 1468 1469 return r; 1470 } 1471 1472 bool dm_thin_aborted_changes(struct dm_thin_device *td) 1473 { 1474 bool r; 1475 1476 down_read(&td->pmd->root_lock); 1477 r = td->aborted_with_changes; 1478 up_read(&td->pmd->root_lock); 1479 1480 return r; 1481 } 1482 1483 int dm_pool_alloc_data_block(struct dm_pool_metadata *pmd, dm_block_t *result) 1484 { 1485 int r = -EINVAL; 1486 1487 down_write(&pmd->root_lock); 1488 if (!pmd->fail_io) 1489 r = dm_sm_new_block(pmd->data_sm, result); 1490 up_write(&pmd->root_lock); 1491 1492 return r; 1493 } 1494 1495 int dm_pool_commit_metadata(struct dm_pool_metadata *pmd) 1496 { 1497 int r = -EINVAL; 1498 1499 down_write(&pmd->root_lock); 1500 if (pmd->fail_io) 1501 goto out; 1502 1503 r = __commit_transaction(pmd); 1504 if (r <= 0) 1505 goto out; 1506 1507 /* 1508 * Open the next transaction. 1509 */ 1510 r = __begin_transaction(pmd); 1511 out: 1512 up_write(&pmd->root_lock); 1513 return r; 1514 } 1515 1516 static void __set_abort_with_changes_flags(struct dm_pool_metadata *pmd) 1517 { 1518 struct dm_thin_device *td; 1519 1520 list_for_each_entry(td, &pmd->thin_devices, list) 1521 td->aborted_with_changes = td->changed; 1522 } 1523 1524 int dm_pool_abort_metadata(struct dm_pool_metadata *pmd) 1525 { 1526 int r = -EINVAL; 1527 1528 down_write(&pmd->root_lock); 1529 if (pmd->fail_io) 1530 goto out; 1531 1532 __set_abort_with_changes_flags(pmd); 1533 __destroy_persistent_data_objects(pmd); 1534 r = __create_persistent_data_objects(pmd, false); 1535 if (r) 1536 pmd->fail_io = true; 1537 1538 out: 1539 up_write(&pmd->root_lock); 1540 1541 return r; 1542 } 1543 1544 int dm_pool_get_free_block_count(struct dm_pool_metadata *pmd, dm_block_t *result) 1545 { 1546 int r = -EINVAL; 1547 1548 down_read(&pmd->root_lock); 1549 if (!pmd->fail_io) 1550 r = dm_sm_get_nr_free(pmd->data_sm, result); 1551 up_read(&pmd->root_lock); 1552 1553 return r; 1554 } 1555 1556 int dm_pool_get_free_metadata_block_count(struct dm_pool_metadata *pmd, 1557 dm_block_t *result) 1558 { 1559 int r = -EINVAL; 1560 1561 down_read(&pmd->root_lock); 1562 if (!pmd->fail_io) 1563 r = dm_sm_get_nr_free(pmd->metadata_sm, result); 1564 up_read(&pmd->root_lock); 1565 1566 return r; 1567 } 1568 1569 int dm_pool_get_metadata_dev_size(struct dm_pool_metadata *pmd, 1570 dm_block_t *result) 1571 { 1572 int r = -EINVAL; 1573 1574 down_read(&pmd->root_lock); 1575 if (!pmd->fail_io) 1576 r = dm_sm_get_nr_blocks(pmd->metadata_sm, result); 1577 up_read(&pmd->root_lock); 1578 1579 return r; 1580 } 1581 1582 int dm_pool_get_data_block_size(struct dm_pool_metadata *pmd, sector_t *result) 1583 { 1584 down_read(&pmd->root_lock); 1585 *result = pmd->data_block_size; 1586 up_read(&pmd->root_lock); 1587 1588 return 0; 1589 } 1590 1591 int dm_pool_get_data_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result) 1592 { 1593 int r = -EINVAL; 1594 1595 down_read(&pmd->root_lock); 1596 if (!pmd->fail_io) 1597 r = dm_sm_get_nr_blocks(pmd->data_sm, result); 1598 up_read(&pmd->root_lock); 1599 1600 return r; 1601 } 1602 1603 int dm_thin_get_mapped_count(struct dm_thin_device *td, dm_block_t *result) 1604 { 1605 int r = -EINVAL; 1606 struct dm_pool_metadata *pmd = td->pmd; 1607 1608 down_read(&pmd->root_lock); 1609 if (!pmd->fail_io) { 1610 *result = td->mapped_blocks; 1611 r = 0; 1612 } 1613 up_read(&pmd->root_lock); 1614 1615 return r; 1616 } 1617 1618 static int __highest_block(struct dm_thin_device *td, dm_block_t *result) 1619 { 1620 int r; 1621 __le64 value_le; 1622 dm_block_t thin_root; 1623 struct dm_pool_metadata *pmd = td->pmd; 1624 1625 r = dm_btree_lookup(&pmd->tl_info, pmd->root, &td->id, &value_le); 1626 if (r) 1627 return r; 1628 1629 thin_root = le64_to_cpu(value_le); 1630 1631 return dm_btree_find_highest_key(&pmd->bl_info, thin_root, result); 1632 } 1633 1634 int dm_thin_get_highest_mapped_block(struct dm_thin_device *td, 1635 dm_block_t *result) 1636 { 1637 int r = -EINVAL; 1638 struct dm_pool_metadata *pmd = td->pmd; 1639 1640 down_read(&pmd->root_lock); 1641 if (!pmd->fail_io) 1642 r = __highest_block(td, result); 1643 up_read(&pmd->root_lock); 1644 1645 return r; 1646 } 1647 1648 static int __resize_space_map(struct dm_space_map *sm, dm_block_t new_count) 1649 { 1650 int r; 1651 dm_block_t old_count; 1652 1653 r = dm_sm_get_nr_blocks(sm, &old_count); 1654 if (r) 1655 return r; 1656 1657 if (new_count == old_count) 1658 return 0; 1659 1660 if (new_count < old_count) { 1661 DMERR("cannot reduce size of space map"); 1662 return -EINVAL; 1663 } 1664 1665 return dm_sm_extend(sm, new_count - old_count); 1666 } 1667 1668 int dm_pool_resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_count) 1669 { 1670 int r = -EINVAL; 1671 1672 down_write(&pmd->root_lock); 1673 if (!pmd->fail_io) 1674 r = __resize_space_map(pmd->data_sm, new_count); 1675 up_write(&pmd->root_lock); 1676 1677 return r; 1678 } 1679 1680 int dm_pool_resize_metadata_dev(struct dm_pool_metadata *pmd, dm_block_t new_count) 1681 { 1682 int r = -EINVAL; 1683 1684 down_write(&pmd->root_lock); 1685 if (!pmd->fail_io) 1686 r = __resize_space_map(pmd->metadata_sm, new_count); 1687 up_write(&pmd->root_lock); 1688 1689 return r; 1690 } 1691 1692 void dm_pool_metadata_read_only(struct dm_pool_metadata *pmd) 1693 { 1694 down_write(&pmd->root_lock); 1695 pmd->read_only = true; 1696 dm_bm_set_read_only(pmd->bm); 1697 up_write(&pmd->root_lock); 1698 } 1699 1700 int dm_pool_register_metadata_threshold(struct dm_pool_metadata *pmd, 1701 dm_block_t threshold, 1702 dm_sm_threshold_fn fn, 1703 void *context) 1704 { 1705 int r; 1706 1707 down_write(&pmd->root_lock); 1708 r = dm_sm_register_threshold_callback(pmd->metadata_sm, threshold, fn, context); 1709 up_write(&pmd->root_lock); 1710 1711 return r; 1712 } 1713