1 /* 2 * Copyright (C) 2011 Red Hat, Inc. 3 * 4 * This file is released under the GPL. 5 */ 6 7 #include "dm-space-map-common.h" 8 #include "dm-transaction-manager.h" 9 #include "dm-btree-internal.h" 10 #include "dm-persistent-data-internal.h" 11 12 #include <linux/bitops.h> 13 #include <linux/device-mapper.h> 14 15 #define DM_MSG_PREFIX "space map common" 16 17 /*----------------------------------------------------------------*/ 18 19 /* 20 * Index validator. 21 */ 22 #define INDEX_CSUM_XOR 160478 23 24 static void index_prepare_for_write(struct dm_block_validator *v, 25 struct dm_block *b, 26 size_t block_size) 27 { 28 struct disk_metadata_index *mi_le = dm_block_data(b); 29 30 mi_le->blocknr = cpu_to_le64(dm_block_location(b)); 31 mi_le->csum = cpu_to_le32(dm_bm_checksum(&mi_le->padding, 32 block_size - sizeof(__le32), 33 INDEX_CSUM_XOR)); 34 } 35 36 static int index_check(struct dm_block_validator *v, 37 struct dm_block *b, 38 size_t block_size) 39 { 40 struct disk_metadata_index *mi_le = dm_block_data(b); 41 __le32 csum_disk; 42 43 if (dm_block_location(b) != le64_to_cpu(mi_le->blocknr)) { 44 DMERR_LIMIT("index_check failed: blocknr %llu != wanted %llu", 45 le64_to_cpu(mi_le->blocknr), dm_block_location(b)); 46 return -ENOTBLK; 47 } 48 49 csum_disk = cpu_to_le32(dm_bm_checksum(&mi_le->padding, 50 block_size - sizeof(__le32), 51 INDEX_CSUM_XOR)); 52 if (csum_disk != mi_le->csum) { 53 DMERR_LIMIT("index_check failed: csum %u != wanted %u", 54 le32_to_cpu(csum_disk), le32_to_cpu(mi_le->csum)); 55 return -EILSEQ; 56 } 57 58 return 0; 59 } 60 61 static struct dm_block_validator index_validator = { 62 .name = "index", 63 .prepare_for_write = index_prepare_for_write, 64 .check = index_check 65 }; 66 67 /*----------------------------------------------------------------*/ 68 69 /* 70 * Bitmap validator 71 */ 72 #define BITMAP_CSUM_XOR 240779 73 74 static void dm_bitmap_prepare_for_write(struct dm_block_validator *v, 75 struct dm_block *b, 76 size_t block_size) 77 { 78 struct disk_bitmap_header *disk_header = dm_block_data(b); 79 80 disk_header->blocknr = cpu_to_le64(dm_block_location(b)); 81 disk_header->csum = cpu_to_le32(dm_bm_checksum(&disk_header->not_used, 82 block_size - sizeof(__le32), 83 BITMAP_CSUM_XOR)); 84 } 85 86 static int dm_bitmap_check(struct dm_block_validator *v, 87 struct dm_block *b, 88 size_t block_size) 89 { 90 struct disk_bitmap_header *disk_header = dm_block_data(b); 91 __le32 csum_disk; 92 93 if (dm_block_location(b) != le64_to_cpu(disk_header->blocknr)) { 94 DMERR_LIMIT("bitmap check failed: blocknr %llu != wanted %llu", 95 le64_to_cpu(disk_header->blocknr), dm_block_location(b)); 96 return -ENOTBLK; 97 } 98 99 csum_disk = cpu_to_le32(dm_bm_checksum(&disk_header->not_used, 100 block_size - sizeof(__le32), 101 BITMAP_CSUM_XOR)); 102 if (csum_disk != disk_header->csum) { 103 DMERR_LIMIT("bitmap check failed: csum %u != wanted %u", 104 le32_to_cpu(csum_disk), le32_to_cpu(disk_header->csum)); 105 return -EILSEQ; 106 } 107 108 return 0; 109 } 110 111 static struct dm_block_validator dm_sm_bitmap_validator = { 112 .name = "sm_bitmap", 113 .prepare_for_write = dm_bitmap_prepare_for_write, 114 .check = dm_bitmap_check, 115 }; 116 117 /*----------------------------------------------------------------*/ 118 119 #define ENTRIES_PER_WORD 32 120 #define ENTRIES_SHIFT 5 121 122 static void *dm_bitmap_data(struct dm_block *b) 123 { 124 return dm_block_data(b) + sizeof(struct disk_bitmap_header); 125 } 126 127 #define WORD_MASK_HIGH 0xAAAAAAAAAAAAAAAAULL 128 129 static unsigned dm_bitmap_word_used(void *addr, unsigned b) 130 { 131 __le64 *words_le = addr; 132 __le64 *w_le = words_le + (b >> ENTRIES_SHIFT); 133 134 uint64_t bits = le64_to_cpu(*w_le); 135 uint64_t mask = (bits + WORD_MASK_HIGH + 1) & WORD_MASK_HIGH; 136 137 return !(~bits & mask); 138 } 139 140 static unsigned sm_lookup_bitmap(void *addr, unsigned b) 141 { 142 __le64 *words_le = addr; 143 __le64 *w_le = words_le + (b >> ENTRIES_SHIFT); 144 unsigned hi, lo; 145 146 b = (b & (ENTRIES_PER_WORD - 1)) << 1; 147 hi = !!test_bit_le(b, (void *) w_le); 148 lo = !!test_bit_le(b + 1, (void *) w_le); 149 return (hi << 1) | lo; 150 } 151 152 static void sm_set_bitmap(void *addr, unsigned b, unsigned val) 153 { 154 __le64 *words_le = addr; 155 __le64 *w_le = words_le + (b >> ENTRIES_SHIFT); 156 157 b = (b & (ENTRIES_PER_WORD - 1)) << 1; 158 159 if (val & 2) 160 __set_bit_le(b, (void *) w_le); 161 else 162 __clear_bit_le(b, (void *) w_le); 163 164 if (val & 1) 165 __set_bit_le(b + 1, (void *) w_le); 166 else 167 __clear_bit_le(b + 1, (void *) w_le); 168 } 169 170 static int sm_find_free(void *addr, unsigned begin, unsigned end, 171 unsigned *result) 172 { 173 while (begin < end) { 174 if (!(begin & (ENTRIES_PER_WORD - 1)) && 175 dm_bitmap_word_used(addr, begin)) { 176 begin += ENTRIES_PER_WORD; 177 continue; 178 } 179 180 if (!sm_lookup_bitmap(addr, begin)) { 181 *result = begin; 182 return 0; 183 } 184 185 begin++; 186 } 187 188 return -ENOSPC; 189 } 190 191 /*----------------------------------------------------------------*/ 192 193 static int sm_ll_init(struct ll_disk *ll, struct dm_transaction_manager *tm) 194 { 195 memset(ll, 0, sizeof(struct ll_disk)); 196 197 ll->tm = tm; 198 199 ll->bitmap_info.tm = tm; 200 ll->bitmap_info.levels = 1; 201 202 /* 203 * Because the new bitmap blocks are created via a shadow 204 * operation, the old entry has already had its reference count 205 * decremented and we don't need the btree to do any bookkeeping. 206 */ 207 ll->bitmap_info.value_type.size = sizeof(struct disk_index_entry); 208 ll->bitmap_info.value_type.inc = NULL; 209 ll->bitmap_info.value_type.dec = NULL; 210 ll->bitmap_info.value_type.equal = NULL; 211 212 ll->ref_count_info.tm = tm; 213 ll->ref_count_info.levels = 1; 214 ll->ref_count_info.value_type.size = sizeof(uint32_t); 215 ll->ref_count_info.value_type.inc = NULL; 216 ll->ref_count_info.value_type.dec = NULL; 217 ll->ref_count_info.value_type.equal = NULL; 218 219 ll->block_size = dm_bm_block_size(dm_tm_get_bm(tm)); 220 221 if (ll->block_size > (1 << 30)) { 222 DMERR("block size too big to hold bitmaps"); 223 return -EINVAL; 224 } 225 226 ll->entries_per_block = (ll->block_size - sizeof(struct disk_bitmap_header)) * 227 ENTRIES_PER_BYTE; 228 ll->nr_blocks = 0; 229 ll->bitmap_root = 0; 230 ll->ref_count_root = 0; 231 ll->bitmap_index_changed = false; 232 233 return 0; 234 } 235 236 int sm_ll_extend(struct ll_disk *ll, dm_block_t extra_blocks) 237 { 238 int r; 239 dm_block_t i, nr_blocks, nr_indexes; 240 unsigned old_blocks, blocks; 241 242 nr_blocks = ll->nr_blocks + extra_blocks; 243 old_blocks = dm_sector_div_up(ll->nr_blocks, ll->entries_per_block); 244 blocks = dm_sector_div_up(nr_blocks, ll->entries_per_block); 245 246 nr_indexes = dm_sector_div_up(nr_blocks, ll->entries_per_block); 247 if (nr_indexes > ll->max_entries(ll)) { 248 DMERR("space map too large"); 249 return -EINVAL; 250 } 251 252 /* 253 * We need to set this before the dm_tm_new_block() call below. 254 */ 255 ll->nr_blocks = nr_blocks; 256 for (i = old_blocks; i < blocks; i++) { 257 struct dm_block *b; 258 struct disk_index_entry idx; 259 260 r = dm_tm_new_block(ll->tm, &dm_sm_bitmap_validator, &b); 261 if (r < 0) 262 return r; 263 264 idx.blocknr = cpu_to_le64(dm_block_location(b)); 265 266 dm_tm_unlock(ll->tm, b); 267 268 idx.nr_free = cpu_to_le32(ll->entries_per_block); 269 idx.none_free_before = 0; 270 271 r = ll->save_ie(ll, i, &idx); 272 if (r < 0) 273 return r; 274 } 275 276 return 0; 277 } 278 279 int sm_ll_lookup_bitmap(struct ll_disk *ll, dm_block_t b, uint32_t *result) 280 { 281 int r; 282 dm_block_t index = b; 283 struct disk_index_entry ie_disk; 284 struct dm_block *blk; 285 286 b = do_div(index, ll->entries_per_block); 287 r = ll->load_ie(ll, index, &ie_disk); 288 if (r < 0) 289 return r; 290 291 r = dm_tm_read_lock(ll->tm, le64_to_cpu(ie_disk.blocknr), 292 &dm_sm_bitmap_validator, &blk); 293 if (r < 0) 294 return r; 295 296 *result = sm_lookup_bitmap(dm_bitmap_data(blk), b); 297 298 dm_tm_unlock(ll->tm, blk); 299 300 return 0; 301 } 302 303 static int sm_ll_lookup_big_ref_count(struct ll_disk *ll, dm_block_t b, 304 uint32_t *result) 305 { 306 __le32 le_rc; 307 int r; 308 309 r = dm_btree_lookup(&ll->ref_count_info, ll->ref_count_root, &b, &le_rc); 310 if (r < 0) 311 return r; 312 313 *result = le32_to_cpu(le_rc); 314 315 return r; 316 } 317 318 int sm_ll_lookup(struct ll_disk *ll, dm_block_t b, uint32_t *result) 319 { 320 int r = sm_ll_lookup_bitmap(ll, b, result); 321 322 if (r) 323 return r; 324 325 if (*result != 3) 326 return r; 327 328 return sm_ll_lookup_big_ref_count(ll, b, result); 329 } 330 331 int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin, 332 dm_block_t end, dm_block_t *result) 333 { 334 int r; 335 struct disk_index_entry ie_disk; 336 dm_block_t i, index_begin = begin; 337 dm_block_t index_end = dm_sector_div_up(end, ll->entries_per_block); 338 339 /* 340 * FIXME: Use shifts 341 */ 342 begin = do_div(index_begin, ll->entries_per_block); 343 end = do_div(end, ll->entries_per_block); 344 if (end == 0) 345 end = ll->entries_per_block; 346 347 for (i = index_begin; i < index_end; i++, begin = 0) { 348 struct dm_block *blk; 349 unsigned position; 350 uint32_t bit_end; 351 352 r = ll->load_ie(ll, i, &ie_disk); 353 if (r < 0) 354 return r; 355 356 if (le32_to_cpu(ie_disk.nr_free) == 0) 357 continue; 358 359 r = dm_tm_read_lock(ll->tm, le64_to_cpu(ie_disk.blocknr), 360 &dm_sm_bitmap_validator, &blk); 361 if (r < 0) 362 return r; 363 364 bit_end = (i == index_end - 1) ? end : ll->entries_per_block; 365 366 r = sm_find_free(dm_bitmap_data(blk), 367 max_t(unsigned, begin, le32_to_cpu(ie_disk.none_free_before)), 368 bit_end, &position); 369 if (r == -ENOSPC) { 370 /* 371 * This might happen because we started searching 372 * part way through the bitmap. 373 */ 374 dm_tm_unlock(ll->tm, blk); 375 continue; 376 } 377 378 dm_tm_unlock(ll->tm, blk); 379 380 *result = i * ll->entries_per_block + (dm_block_t) position; 381 return 0; 382 } 383 384 return -ENOSPC; 385 } 386 387 int sm_ll_find_common_free_block(struct ll_disk *old_ll, struct ll_disk *new_ll, 388 dm_block_t begin, dm_block_t end, dm_block_t *b) 389 { 390 int r; 391 uint32_t count; 392 393 do { 394 r = sm_ll_find_free_block(new_ll, begin, new_ll->nr_blocks, b); 395 if (r) 396 break; 397 398 /* double check this block wasn't used in the old transaction */ 399 if (*b >= old_ll->nr_blocks) 400 count = 0; 401 else { 402 r = sm_ll_lookup(old_ll, *b, &count); 403 if (r) 404 break; 405 406 if (count) 407 begin = *b + 1; 408 } 409 } while (count); 410 411 return r; 412 } 413 414 /*----------------------------------------------------------------*/ 415 416 int sm_ll_insert(struct ll_disk *ll, dm_block_t b, 417 uint32_t ref_count, int32_t *nr_allocations) 418 { 419 int r; 420 uint32_t bit, old; 421 struct dm_block *nb; 422 dm_block_t index = b; 423 struct disk_index_entry ie_disk; 424 void *bm_le; 425 int inc; 426 427 bit = do_div(index, ll->entries_per_block); 428 r = ll->load_ie(ll, index, &ie_disk); 429 if (r < 0) 430 return r; 431 432 r = dm_tm_shadow_block(ll->tm, le64_to_cpu(ie_disk.blocknr), 433 &dm_sm_bitmap_validator, &nb, &inc); 434 if (r < 0) { 435 DMERR("dm_tm_shadow_block() failed"); 436 return r; 437 } 438 ie_disk.blocknr = cpu_to_le64(dm_block_location(nb)); 439 bm_le = dm_bitmap_data(nb); 440 441 old = sm_lookup_bitmap(bm_le, bit); 442 if (old > 2) { 443 r = sm_ll_lookup_big_ref_count(ll, b, &old); 444 if (r < 0) { 445 dm_tm_unlock(ll->tm, nb); 446 return r; 447 } 448 } 449 450 if (r) { 451 dm_tm_unlock(ll->tm, nb); 452 return r; 453 } 454 455 if (ref_count <= 2) { 456 sm_set_bitmap(bm_le, bit, ref_count); 457 dm_tm_unlock(ll->tm, nb); 458 459 if (old > 2) { 460 r = dm_btree_remove(&ll->ref_count_info, 461 ll->ref_count_root, 462 &b, &ll->ref_count_root); 463 if (r) 464 return r; 465 } 466 467 } else { 468 __le32 le_rc = cpu_to_le32(ref_count); 469 470 sm_set_bitmap(bm_le, bit, 3); 471 dm_tm_unlock(ll->tm, nb); 472 473 __dm_bless_for_disk(&le_rc); 474 r = dm_btree_insert(&ll->ref_count_info, ll->ref_count_root, 475 &b, &le_rc, &ll->ref_count_root); 476 if (r < 0) { 477 DMERR("ref count insert failed"); 478 return r; 479 } 480 } 481 482 if (ref_count && !old) { 483 *nr_allocations = 1; 484 ll->nr_allocated++; 485 le32_add_cpu(&ie_disk.nr_free, -1); 486 if (le32_to_cpu(ie_disk.none_free_before) == bit) 487 ie_disk.none_free_before = cpu_to_le32(bit + 1); 488 489 } else if (old && !ref_count) { 490 *nr_allocations = -1; 491 ll->nr_allocated--; 492 le32_add_cpu(&ie_disk.nr_free, 1); 493 ie_disk.none_free_before = cpu_to_le32(min(le32_to_cpu(ie_disk.none_free_before), bit)); 494 } else 495 *nr_allocations = 0; 496 497 return ll->save_ie(ll, index, &ie_disk); 498 } 499 500 /*----------------------------------------------------------------*/ 501 502 /* 503 * Holds useful intermediate results for the range based inc and dec 504 * operations. 505 */ 506 struct inc_context { 507 struct disk_index_entry ie_disk; 508 struct dm_block *bitmap_block; 509 void *bitmap; 510 511 struct dm_block *overflow_leaf; 512 }; 513 514 static inline void init_inc_context(struct inc_context *ic) 515 { 516 ic->bitmap_block = NULL; 517 ic->bitmap = NULL; 518 ic->overflow_leaf = NULL; 519 } 520 521 static inline void exit_inc_context(struct ll_disk *ll, struct inc_context *ic) 522 { 523 if (ic->bitmap_block) 524 dm_tm_unlock(ll->tm, ic->bitmap_block); 525 if (ic->overflow_leaf) 526 dm_tm_unlock(ll->tm, ic->overflow_leaf); 527 } 528 529 static inline void reset_inc_context(struct ll_disk *ll, struct inc_context *ic) 530 { 531 exit_inc_context(ll, ic); 532 init_inc_context(ic); 533 } 534 535 /* 536 * Confirms a btree node contains a particular key at an index. 537 */ 538 static bool contains_key(struct btree_node *n, uint64_t key, int index) 539 { 540 return index >= 0 && 541 index < le32_to_cpu(n->header.nr_entries) && 542 le64_to_cpu(n->keys[index]) == key; 543 } 544 545 static int __sm_ll_inc_overflow(struct ll_disk *ll, dm_block_t b, struct inc_context *ic) 546 { 547 int r; 548 int index; 549 struct btree_node *n; 550 __le32 *v_ptr; 551 uint32_t rc; 552 553 /* 554 * bitmap_block needs to be unlocked because getting the 555 * overflow_leaf may need to allocate, and thus use the space map. 556 */ 557 reset_inc_context(ll, ic); 558 559 r = btree_get_overwrite_leaf(&ll->ref_count_info, ll->ref_count_root, 560 b, &index, &ll->ref_count_root, &ic->overflow_leaf); 561 if (r < 0) 562 return r; 563 564 n = dm_block_data(ic->overflow_leaf); 565 566 if (!contains_key(n, b, index)) { 567 DMERR("overflow btree is missing an entry"); 568 return -EINVAL; 569 } 570 571 v_ptr = value_ptr(n, index); 572 rc = le32_to_cpu(*v_ptr) + 1; 573 *v_ptr = cpu_to_le32(rc); 574 575 return 0; 576 } 577 578 static int sm_ll_inc_overflow(struct ll_disk *ll, dm_block_t b, struct inc_context *ic) 579 { 580 int index; 581 struct btree_node *n; 582 __le32 *v_ptr; 583 uint32_t rc; 584 585 /* 586 * Do we already have the correct overflow leaf? 587 */ 588 if (ic->overflow_leaf) { 589 n = dm_block_data(ic->overflow_leaf); 590 index = lower_bound(n, b); 591 if (contains_key(n, b, index)) { 592 v_ptr = value_ptr(n, index); 593 rc = le32_to_cpu(*v_ptr) + 1; 594 *v_ptr = cpu_to_le32(rc); 595 596 return 0; 597 } 598 } 599 600 return __sm_ll_inc_overflow(ll, b, ic); 601 } 602 603 static inline int shadow_bitmap(struct ll_disk *ll, struct inc_context *ic) 604 { 605 int r, inc; 606 r = dm_tm_shadow_block(ll->tm, le64_to_cpu(ic->ie_disk.blocknr), 607 &dm_sm_bitmap_validator, &ic->bitmap_block, &inc); 608 if (r < 0) { 609 DMERR("dm_tm_shadow_block() failed"); 610 return r; 611 } 612 ic->ie_disk.blocknr = cpu_to_le64(dm_block_location(ic->bitmap_block)); 613 ic->bitmap = dm_bitmap_data(ic->bitmap_block); 614 return 0; 615 } 616 617 /* 618 * Once shadow_bitmap has been called, which always happens at the start of inc/dec, 619 * we can reopen the bitmap with a simple write lock, rather than re calling 620 * dm_tm_shadow_block(). 621 */ 622 static inline int ensure_bitmap(struct ll_disk *ll, struct inc_context *ic) 623 { 624 if (!ic->bitmap_block) { 625 int r = dm_bm_write_lock(dm_tm_get_bm(ll->tm), le64_to_cpu(ic->ie_disk.blocknr), 626 &dm_sm_bitmap_validator, &ic->bitmap_block); 627 if (r) { 628 DMERR("unable to re-get write lock for bitmap"); 629 return r; 630 } 631 ic->bitmap = dm_bitmap_data(ic->bitmap_block); 632 } 633 634 return 0; 635 } 636 637 /* 638 * Loops round incrementing entries in a single bitmap. 639 */ 640 static inline int sm_ll_inc_bitmap(struct ll_disk *ll, dm_block_t b, 641 uint32_t bit, uint32_t bit_end, 642 int32_t *nr_allocations, dm_block_t *new_b, 643 struct inc_context *ic) 644 { 645 int r; 646 __le32 le_rc; 647 uint32_t old; 648 649 for (; bit != bit_end; bit++, b++) { 650 /* 651 * We only need to drop the bitmap if we need to find a new btree 652 * leaf for the overflow. So if it was dropped last iteration, 653 * we now re-get it. 654 */ 655 r = ensure_bitmap(ll, ic); 656 if (r) 657 return r; 658 659 old = sm_lookup_bitmap(ic->bitmap, bit); 660 switch (old) { 661 case 0: 662 /* inc bitmap, adjust nr_allocated */ 663 sm_set_bitmap(ic->bitmap, bit, 1); 664 (*nr_allocations)++; 665 ll->nr_allocated++; 666 le32_add_cpu(&ic->ie_disk.nr_free, -1); 667 if (le32_to_cpu(ic->ie_disk.none_free_before) == bit) 668 ic->ie_disk.none_free_before = cpu_to_le32(bit + 1); 669 break; 670 671 case 1: 672 /* inc bitmap */ 673 sm_set_bitmap(ic->bitmap, bit, 2); 674 break; 675 676 case 2: 677 /* inc bitmap and insert into overflow */ 678 sm_set_bitmap(ic->bitmap, bit, 3); 679 reset_inc_context(ll, ic); 680 681 le_rc = cpu_to_le32(3); 682 __dm_bless_for_disk(&le_rc); 683 r = dm_btree_insert(&ll->ref_count_info, ll->ref_count_root, 684 &b, &le_rc, &ll->ref_count_root); 685 if (r < 0) { 686 DMERR("ref count insert failed"); 687 return r; 688 } 689 break; 690 691 default: 692 /* 693 * inc within the overflow tree only. 694 */ 695 r = sm_ll_inc_overflow(ll, b, ic); 696 if (r < 0) 697 return r; 698 } 699 } 700 701 *new_b = b; 702 return 0; 703 } 704 705 /* 706 * Finds a bitmap that contains entries in the block range, and increments 707 * them. 708 */ 709 static int __sm_ll_inc(struct ll_disk *ll, dm_block_t b, dm_block_t e, 710 int32_t *nr_allocations, dm_block_t *new_b) 711 { 712 int r; 713 struct inc_context ic; 714 uint32_t bit, bit_end; 715 dm_block_t index = b; 716 717 init_inc_context(&ic); 718 719 bit = do_div(index, ll->entries_per_block); 720 r = ll->load_ie(ll, index, &ic.ie_disk); 721 if (r < 0) 722 return r; 723 724 r = shadow_bitmap(ll, &ic); 725 if (r) 726 return r; 727 728 bit_end = min(bit + (e - b), (dm_block_t) ll->entries_per_block); 729 r = sm_ll_inc_bitmap(ll, b, bit, bit_end, nr_allocations, new_b, &ic); 730 731 exit_inc_context(ll, &ic); 732 733 if (r) 734 return r; 735 736 return ll->save_ie(ll, index, &ic.ie_disk); 737 } 738 739 int sm_ll_inc(struct ll_disk *ll, dm_block_t b, dm_block_t e, 740 int32_t *nr_allocations) 741 { 742 *nr_allocations = 0; 743 while (b != e) { 744 int r = __sm_ll_inc(ll, b, e, nr_allocations, &b); 745 if (r) 746 return r; 747 } 748 749 return 0; 750 } 751 752 /*----------------------------------------------------------------*/ 753 754 static int __sm_ll_del_overflow(struct ll_disk *ll, dm_block_t b, 755 struct inc_context *ic) 756 { 757 reset_inc_context(ll, ic); 758 return dm_btree_remove(&ll->ref_count_info, ll->ref_count_root, 759 &b, &ll->ref_count_root); 760 } 761 762 static int __sm_ll_dec_overflow(struct ll_disk *ll, dm_block_t b, 763 struct inc_context *ic, uint32_t *old_rc) 764 { 765 int r; 766 int index = -1; 767 struct btree_node *n; 768 __le32 *v_ptr; 769 uint32_t rc; 770 771 reset_inc_context(ll, ic); 772 r = btree_get_overwrite_leaf(&ll->ref_count_info, ll->ref_count_root, 773 b, &index, &ll->ref_count_root, &ic->overflow_leaf); 774 if (r < 0) 775 return r; 776 777 n = dm_block_data(ic->overflow_leaf); 778 779 if (!contains_key(n, b, index)) { 780 DMERR("overflow btree is missing an entry"); 781 return -EINVAL; 782 } 783 784 v_ptr = value_ptr(n, index); 785 rc = le32_to_cpu(*v_ptr); 786 *old_rc = rc; 787 788 if (rc == 3) { 789 return __sm_ll_del_overflow(ll, b, ic); 790 } else { 791 rc--; 792 *v_ptr = cpu_to_le32(rc); 793 return 0; 794 } 795 } 796 797 static int sm_ll_dec_overflow(struct ll_disk *ll, dm_block_t b, 798 struct inc_context *ic, uint32_t *old_rc) 799 { 800 /* 801 * Do we already have the correct overflow leaf? 802 */ 803 if (ic->overflow_leaf) { 804 int index; 805 struct btree_node *n; 806 __le32 *v_ptr; 807 uint32_t rc; 808 809 n = dm_block_data(ic->overflow_leaf); 810 index = lower_bound(n, b); 811 if (contains_key(n, b, index)) { 812 v_ptr = value_ptr(n, index); 813 rc = le32_to_cpu(*v_ptr); 814 *old_rc = rc; 815 816 if (rc > 3) { 817 rc--; 818 *v_ptr = cpu_to_le32(rc); 819 return 0; 820 } else { 821 return __sm_ll_del_overflow(ll, b, ic); 822 } 823 824 } 825 } 826 827 return __sm_ll_dec_overflow(ll, b, ic, old_rc); 828 } 829 830 /* 831 * Loops round incrementing entries in a single bitmap. 832 */ 833 static inline int sm_ll_dec_bitmap(struct ll_disk *ll, dm_block_t b, 834 uint32_t bit, uint32_t bit_end, 835 struct inc_context *ic, 836 int32_t *nr_allocations, dm_block_t *new_b) 837 { 838 int r; 839 uint32_t old; 840 841 for (; bit != bit_end; bit++, b++) { 842 /* 843 * We only need to drop the bitmap if we need to find a new btree 844 * leaf for the overflow. So if it was dropped last iteration, 845 * we now re-get it. 846 */ 847 r = ensure_bitmap(ll, ic); 848 if (r) 849 return r; 850 851 old = sm_lookup_bitmap(ic->bitmap, bit); 852 switch (old) { 853 case 0: 854 DMERR("unable to decrement block"); 855 return -EINVAL; 856 857 case 1: 858 /* dec bitmap */ 859 sm_set_bitmap(ic->bitmap, bit, 0); 860 (*nr_allocations)--; 861 ll->nr_allocated--; 862 le32_add_cpu(&ic->ie_disk.nr_free, 1); 863 ic->ie_disk.none_free_before = 864 cpu_to_le32(min(le32_to_cpu(ic->ie_disk.none_free_before), bit)); 865 break; 866 867 case 2: 868 /* dec bitmap and insert into overflow */ 869 sm_set_bitmap(ic->bitmap, bit, 1); 870 break; 871 872 case 3: 873 r = sm_ll_dec_overflow(ll, b, ic, &old); 874 if (r < 0) 875 return r; 876 877 if (old == 3) { 878 r = ensure_bitmap(ll, ic); 879 if (r) 880 return r; 881 882 sm_set_bitmap(ic->bitmap, bit, 2); 883 } 884 break; 885 } 886 } 887 888 *new_b = b; 889 return 0; 890 } 891 892 static int __sm_ll_dec(struct ll_disk *ll, dm_block_t b, dm_block_t e, 893 int32_t *nr_allocations, dm_block_t *new_b) 894 { 895 int r; 896 uint32_t bit, bit_end; 897 struct inc_context ic; 898 dm_block_t index = b; 899 900 init_inc_context(&ic); 901 902 bit = do_div(index, ll->entries_per_block); 903 r = ll->load_ie(ll, index, &ic.ie_disk); 904 if (r < 0) 905 return r; 906 907 r = shadow_bitmap(ll, &ic); 908 if (r) 909 return r; 910 911 bit_end = min(bit + (e - b), (dm_block_t) ll->entries_per_block); 912 r = sm_ll_dec_bitmap(ll, b, bit, bit_end, &ic, nr_allocations, new_b); 913 exit_inc_context(ll, &ic); 914 915 if (r) 916 return r; 917 918 return ll->save_ie(ll, index, &ic.ie_disk); 919 } 920 921 int sm_ll_dec(struct ll_disk *ll, dm_block_t b, dm_block_t e, 922 int32_t *nr_allocations) 923 { 924 *nr_allocations = 0; 925 while (b != e) { 926 int r = __sm_ll_dec(ll, b, e, nr_allocations, &b); 927 if (r) 928 return r; 929 } 930 931 return 0; 932 } 933 934 /*----------------------------------------------------------------*/ 935 936 int sm_ll_commit(struct ll_disk *ll) 937 { 938 int r = 0; 939 940 if (ll->bitmap_index_changed) { 941 r = ll->commit(ll); 942 if (!r) 943 ll->bitmap_index_changed = false; 944 } 945 946 return r; 947 } 948 949 /*----------------------------------------------------------------*/ 950 951 static int metadata_ll_load_ie(struct ll_disk *ll, dm_block_t index, 952 struct disk_index_entry *ie) 953 { 954 memcpy(ie, ll->mi_le.index + index, sizeof(*ie)); 955 return 0; 956 } 957 958 static int metadata_ll_save_ie(struct ll_disk *ll, dm_block_t index, 959 struct disk_index_entry *ie) 960 { 961 ll->bitmap_index_changed = true; 962 memcpy(ll->mi_le.index + index, ie, sizeof(*ie)); 963 return 0; 964 } 965 966 static int metadata_ll_init_index(struct ll_disk *ll) 967 { 968 int r; 969 struct dm_block *b; 970 971 r = dm_tm_new_block(ll->tm, &index_validator, &b); 972 if (r < 0) 973 return r; 974 975 ll->bitmap_root = dm_block_location(b); 976 977 dm_tm_unlock(ll->tm, b); 978 979 return 0; 980 } 981 982 static int metadata_ll_open(struct ll_disk *ll) 983 { 984 int r; 985 struct dm_block *block; 986 987 r = dm_tm_read_lock(ll->tm, ll->bitmap_root, 988 &index_validator, &block); 989 if (r) 990 return r; 991 992 memcpy(&ll->mi_le, dm_block_data(block), sizeof(ll->mi_le)); 993 dm_tm_unlock(ll->tm, block); 994 995 return 0; 996 } 997 998 static dm_block_t metadata_ll_max_entries(struct ll_disk *ll) 999 { 1000 return MAX_METADATA_BITMAPS; 1001 } 1002 1003 static int metadata_ll_commit(struct ll_disk *ll) 1004 { 1005 int r, inc; 1006 struct dm_block *b; 1007 1008 r = dm_tm_shadow_block(ll->tm, ll->bitmap_root, &index_validator, &b, &inc); 1009 if (r) 1010 return r; 1011 1012 memcpy(dm_block_data(b), &ll->mi_le, sizeof(ll->mi_le)); 1013 ll->bitmap_root = dm_block_location(b); 1014 1015 dm_tm_unlock(ll->tm, b); 1016 1017 return 0; 1018 } 1019 1020 int sm_ll_new_metadata(struct ll_disk *ll, struct dm_transaction_manager *tm) 1021 { 1022 int r; 1023 1024 r = sm_ll_init(ll, tm); 1025 if (r < 0) 1026 return r; 1027 1028 ll->load_ie = metadata_ll_load_ie; 1029 ll->save_ie = metadata_ll_save_ie; 1030 ll->init_index = metadata_ll_init_index; 1031 ll->open_index = metadata_ll_open; 1032 ll->max_entries = metadata_ll_max_entries; 1033 ll->commit = metadata_ll_commit; 1034 1035 ll->nr_blocks = 0; 1036 ll->nr_allocated = 0; 1037 1038 r = ll->init_index(ll); 1039 if (r < 0) 1040 return r; 1041 1042 r = dm_btree_empty(&ll->ref_count_info, &ll->ref_count_root); 1043 if (r < 0) 1044 return r; 1045 1046 return 0; 1047 } 1048 1049 int sm_ll_open_metadata(struct ll_disk *ll, struct dm_transaction_manager *tm, 1050 void *root_le, size_t len) 1051 { 1052 int r; 1053 struct disk_sm_root smr; 1054 1055 if (len < sizeof(struct disk_sm_root)) { 1056 DMERR("sm_metadata root too small"); 1057 return -ENOMEM; 1058 } 1059 1060 /* 1061 * We don't know the alignment of the root_le buffer, so need to 1062 * copy into a new structure. 1063 */ 1064 memcpy(&smr, root_le, sizeof(smr)); 1065 1066 r = sm_ll_init(ll, tm); 1067 if (r < 0) 1068 return r; 1069 1070 ll->load_ie = metadata_ll_load_ie; 1071 ll->save_ie = metadata_ll_save_ie; 1072 ll->init_index = metadata_ll_init_index; 1073 ll->open_index = metadata_ll_open; 1074 ll->max_entries = metadata_ll_max_entries; 1075 ll->commit = metadata_ll_commit; 1076 1077 ll->nr_blocks = le64_to_cpu(smr.nr_blocks); 1078 ll->nr_allocated = le64_to_cpu(smr.nr_allocated); 1079 ll->bitmap_root = le64_to_cpu(smr.bitmap_root); 1080 ll->ref_count_root = le64_to_cpu(smr.ref_count_root); 1081 1082 return ll->open_index(ll); 1083 } 1084 1085 /*----------------------------------------------------------------*/ 1086 1087 static inline int ie_cache_writeback(struct ll_disk *ll, struct ie_cache *iec) 1088 { 1089 iec->dirty = false; 1090 __dm_bless_for_disk(iec->ie); 1091 return dm_btree_insert(&ll->bitmap_info, ll->bitmap_root, 1092 &iec->index, &iec->ie, &ll->bitmap_root); 1093 } 1094 1095 static inline unsigned hash_index(dm_block_t index) 1096 { 1097 return dm_hash_block(index, IE_CACHE_MASK); 1098 } 1099 1100 static int disk_ll_load_ie(struct ll_disk *ll, dm_block_t index, 1101 struct disk_index_entry *ie) 1102 { 1103 int r; 1104 unsigned h = hash_index(index); 1105 struct ie_cache *iec = ll->ie_cache + h; 1106 1107 if (iec->valid) { 1108 if (iec->index == index) { 1109 memcpy(ie, &iec->ie, sizeof(*ie)); 1110 return 0; 1111 } 1112 1113 if (iec->dirty) { 1114 r = ie_cache_writeback(ll, iec); 1115 if (r) 1116 return r; 1117 } 1118 } 1119 1120 r = dm_btree_lookup(&ll->bitmap_info, ll->bitmap_root, &index, ie); 1121 if (!r) { 1122 iec->valid = true; 1123 iec->dirty = false; 1124 iec->index = index; 1125 memcpy(&iec->ie, ie, sizeof(*ie)); 1126 } 1127 1128 return r; 1129 } 1130 1131 static int disk_ll_save_ie(struct ll_disk *ll, dm_block_t index, 1132 struct disk_index_entry *ie) 1133 { 1134 int r; 1135 unsigned h = hash_index(index); 1136 struct ie_cache *iec = ll->ie_cache + h; 1137 1138 ll->bitmap_index_changed = true; 1139 if (iec->valid) { 1140 if (iec->index == index) { 1141 memcpy(&iec->ie, ie, sizeof(*ie)); 1142 iec->dirty = true; 1143 return 0; 1144 } 1145 1146 if (iec->dirty) { 1147 r = ie_cache_writeback(ll, iec); 1148 if (r) 1149 return r; 1150 } 1151 } 1152 1153 iec->valid = true; 1154 iec->dirty = true; 1155 iec->index = index; 1156 memcpy(&iec->ie, ie, sizeof(*ie)); 1157 return 0; 1158 } 1159 1160 static int disk_ll_init_index(struct ll_disk *ll) 1161 { 1162 unsigned i; 1163 for (i = 0; i < IE_CACHE_SIZE; i++) { 1164 struct ie_cache *iec = ll->ie_cache + i; 1165 iec->valid = false; 1166 iec->dirty = false; 1167 } 1168 return dm_btree_empty(&ll->bitmap_info, &ll->bitmap_root); 1169 } 1170 1171 static int disk_ll_open(struct ll_disk *ll) 1172 { 1173 return 0; 1174 } 1175 1176 static dm_block_t disk_ll_max_entries(struct ll_disk *ll) 1177 { 1178 return -1ULL; 1179 } 1180 1181 static int disk_ll_commit(struct ll_disk *ll) 1182 { 1183 int r = 0; 1184 unsigned i; 1185 1186 for (i = 0; i < IE_CACHE_SIZE; i++) { 1187 struct ie_cache *iec = ll->ie_cache + i; 1188 if (iec->valid && iec->dirty) 1189 r = ie_cache_writeback(ll, iec); 1190 } 1191 1192 return r; 1193 } 1194 1195 int sm_ll_new_disk(struct ll_disk *ll, struct dm_transaction_manager *tm) 1196 { 1197 int r; 1198 1199 r = sm_ll_init(ll, tm); 1200 if (r < 0) 1201 return r; 1202 1203 ll->load_ie = disk_ll_load_ie; 1204 ll->save_ie = disk_ll_save_ie; 1205 ll->init_index = disk_ll_init_index; 1206 ll->open_index = disk_ll_open; 1207 ll->max_entries = disk_ll_max_entries; 1208 ll->commit = disk_ll_commit; 1209 1210 ll->nr_blocks = 0; 1211 ll->nr_allocated = 0; 1212 1213 r = ll->init_index(ll); 1214 if (r < 0) 1215 return r; 1216 1217 r = dm_btree_empty(&ll->ref_count_info, &ll->ref_count_root); 1218 if (r < 0) 1219 return r; 1220 1221 return 0; 1222 } 1223 1224 int sm_ll_open_disk(struct ll_disk *ll, struct dm_transaction_manager *tm, 1225 void *root_le, size_t len) 1226 { 1227 int r; 1228 struct disk_sm_root *smr = root_le; 1229 1230 if (len < sizeof(struct disk_sm_root)) { 1231 DMERR("sm_metadata root too small"); 1232 return -ENOMEM; 1233 } 1234 1235 r = sm_ll_init(ll, tm); 1236 if (r < 0) 1237 return r; 1238 1239 ll->load_ie = disk_ll_load_ie; 1240 ll->save_ie = disk_ll_save_ie; 1241 ll->init_index = disk_ll_init_index; 1242 ll->open_index = disk_ll_open; 1243 ll->max_entries = disk_ll_max_entries; 1244 ll->commit = disk_ll_commit; 1245 1246 ll->nr_blocks = le64_to_cpu(smr->nr_blocks); 1247 ll->nr_allocated = le64_to_cpu(smr->nr_allocated); 1248 ll->bitmap_root = le64_to_cpu(smr->bitmap_root); 1249 ll->ref_count_root = le64_to_cpu(smr->ref_count_root); 1250 1251 return ll->open_index(ll); 1252 } 1253 1254 /*----------------------------------------------------------------*/ 1255