1 /* 2 * Copyright (C) 2011 Red Hat, Inc. 3 * 4 * This file is released under the GPL. 5 */ 6 7 #include "dm-space-map.h" 8 #include "dm-space-map-common.h" 9 #include "dm-space-map-metadata.h" 10 11 #include <linux/list.h> 12 #include <linux/slab.h> 13 #include <linux/device-mapper.h> 14 #include <linux/kernel.h> 15 16 #define DM_MSG_PREFIX "space map metadata" 17 18 /*----------------------------------------------------------------*/ 19 20 /* 21 * An edge triggered threshold. 22 */ 23 struct threshold { 24 bool threshold_set; 25 bool value_set; 26 dm_block_t threshold; 27 dm_block_t current_value; 28 dm_sm_threshold_fn fn; 29 void *context; 30 }; 31 32 static void threshold_init(struct threshold *t) 33 { 34 t->threshold_set = false; 35 t->value_set = false; 36 } 37 38 static void set_threshold(struct threshold *t, dm_block_t value, 39 dm_sm_threshold_fn fn, void *context) 40 { 41 t->threshold_set = true; 42 t->threshold = value; 43 t->fn = fn; 44 t->context = context; 45 } 46 47 static bool below_threshold(struct threshold *t, dm_block_t value) 48 { 49 return t->threshold_set && value <= t->threshold; 50 } 51 52 static bool threshold_already_triggered(struct threshold *t) 53 { 54 return t->value_set && below_threshold(t, t->current_value); 55 } 56 57 static void check_threshold(struct threshold *t, dm_block_t value) 58 { 59 if (below_threshold(t, value) && 60 !threshold_already_triggered(t)) 61 t->fn(t->context); 62 63 t->value_set = true; 64 t->current_value = value; 65 } 66 67 /*----------------------------------------------------------------*/ 68 69 /* 70 * Space map interface. 71 * 72 * The low level disk format is written using the standard btree and 73 * transaction manager. This means that performing disk operations may 74 * cause us to recurse into the space map in order to allocate new blocks. 75 * For this reason we have a pool of pre-allocated blocks large enough to 76 * service any metadata_ll_disk operation. 77 */ 78 79 /* 80 * FIXME: we should calculate this based on the size of the device. 81 * Only the metadata space map needs this functionality. 82 */ 83 #define MAX_RECURSIVE_ALLOCATIONS 1024 84 85 enum block_op_type { 86 BOP_INC, 87 BOP_DEC 88 }; 89 90 struct block_op { 91 enum block_op_type type; 92 dm_block_t b; 93 dm_block_t e; 94 }; 95 96 struct bop_ring_buffer { 97 unsigned begin; 98 unsigned end; 99 struct block_op bops[MAX_RECURSIVE_ALLOCATIONS + 1]; 100 }; 101 102 static void brb_init(struct bop_ring_buffer *brb) 103 { 104 brb->begin = 0; 105 brb->end = 0; 106 } 107 108 static bool brb_empty(struct bop_ring_buffer *brb) 109 { 110 return brb->begin == brb->end; 111 } 112 113 static unsigned brb_next(struct bop_ring_buffer *brb, unsigned old) 114 { 115 unsigned r = old + 1; 116 return r >= ARRAY_SIZE(brb->bops) ? 0 : r; 117 } 118 119 static int brb_push(struct bop_ring_buffer *brb, 120 enum block_op_type type, dm_block_t b, dm_block_t e) 121 { 122 struct block_op *bop; 123 unsigned next = brb_next(brb, brb->end); 124 125 /* 126 * We don't allow the last bop to be filled, this way we can 127 * differentiate between full and empty. 128 */ 129 if (next == brb->begin) 130 return -ENOMEM; 131 132 bop = brb->bops + brb->end; 133 bop->type = type; 134 bop->b = b; 135 bop->e = e; 136 137 brb->end = next; 138 139 return 0; 140 } 141 142 static int brb_peek(struct bop_ring_buffer *brb, struct block_op *result) 143 { 144 struct block_op *bop; 145 146 if (brb_empty(brb)) 147 return -ENODATA; 148 149 bop = brb->bops + brb->begin; 150 memcpy(result, bop, sizeof(*result)); 151 return 0; 152 } 153 154 static int brb_pop(struct bop_ring_buffer *brb) 155 { 156 if (brb_empty(brb)) 157 return -ENODATA; 158 159 brb->begin = brb_next(brb, brb->begin); 160 161 return 0; 162 } 163 164 /*----------------------------------------------------------------*/ 165 166 struct sm_metadata { 167 struct dm_space_map sm; 168 169 struct ll_disk ll; 170 struct ll_disk old_ll; 171 172 dm_block_t begin; 173 174 unsigned recursion_count; 175 unsigned allocated_this_transaction; 176 struct bop_ring_buffer uncommitted; 177 178 struct threshold threshold; 179 }; 180 181 static int add_bop(struct sm_metadata *smm, enum block_op_type type, dm_block_t b, dm_block_t e) 182 { 183 int r = brb_push(&smm->uncommitted, type, b, e); 184 if (r) { 185 DMERR("too many recursive allocations"); 186 return -ENOMEM; 187 } 188 189 return 0; 190 } 191 192 static int commit_bop(struct sm_metadata *smm, struct block_op *op) 193 { 194 int r = 0; 195 int32_t nr_allocations; 196 197 switch (op->type) { 198 case BOP_INC: 199 r = sm_ll_inc(&smm->ll, op->b, op->e, &nr_allocations); 200 break; 201 202 case BOP_DEC: 203 r = sm_ll_dec(&smm->ll, op->b, op->e, &nr_allocations); 204 break; 205 } 206 207 return r; 208 } 209 210 static void in(struct sm_metadata *smm) 211 { 212 smm->recursion_count++; 213 } 214 215 static int apply_bops(struct sm_metadata *smm) 216 { 217 int r = 0; 218 219 while (!brb_empty(&smm->uncommitted)) { 220 struct block_op bop; 221 222 r = brb_peek(&smm->uncommitted, &bop); 223 if (r) { 224 DMERR("bug in bop ring buffer"); 225 break; 226 } 227 228 r = commit_bop(smm, &bop); 229 if (r) 230 break; 231 232 brb_pop(&smm->uncommitted); 233 } 234 235 return r; 236 } 237 238 static int out(struct sm_metadata *smm) 239 { 240 int r = 0; 241 242 /* 243 * If we're not recursing then very bad things are happening. 244 */ 245 if (!smm->recursion_count) { 246 DMERR("lost track of recursion depth"); 247 return -ENOMEM; 248 } 249 250 if (smm->recursion_count == 1) 251 r = apply_bops(smm); 252 253 smm->recursion_count--; 254 255 return r; 256 } 257 258 /* 259 * When using the out() function above, we often want to combine an error 260 * code for the operation run in the recursive context with that from 261 * out(). 262 */ 263 static int combine_errors(int r1, int r2) 264 { 265 return r1 ? r1 : r2; 266 } 267 268 static int recursing(struct sm_metadata *smm) 269 { 270 return smm->recursion_count; 271 } 272 273 static void sm_metadata_destroy(struct dm_space_map *sm) 274 { 275 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 276 277 kfree(smm); 278 } 279 280 static int sm_metadata_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count) 281 { 282 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 283 284 *count = smm->ll.nr_blocks; 285 286 return 0; 287 } 288 289 static int sm_metadata_get_nr_free(struct dm_space_map *sm, dm_block_t *count) 290 { 291 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 292 293 *count = smm->old_ll.nr_blocks - smm->old_ll.nr_allocated - 294 smm->allocated_this_transaction; 295 296 return 0; 297 } 298 299 static int sm_metadata_get_count(struct dm_space_map *sm, dm_block_t b, 300 uint32_t *result) 301 { 302 int r; 303 unsigned i; 304 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 305 unsigned adjustment = 0; 306 307 /* 308 * We may have some uncommitted adjustments to add. This list 309 * should always be really short. 310 */ 311 for (i = smm->uncommitted.begin; 312 i != smm->uncommitted.end; 313 i = brb_next(&smm->uncommitted, i)) { 314 struct block_op *op = smm->uncommitted.bops + i; 315 316 if (b < op->b || b >= op->e) 317 continue; 318 319 switch (op->type) { 320 case BOP_INC: 321 adjustment++; 322 break; 323 324 case BOP_DEC: 325 adjustment--; 326 break; 327 } 328 } 329 330 r = sm_ll_lookup(&smm->ll, b, result); 331 if (r) 332 return r; 333 334 *result += adjustment; 335 336 return 0; 337 } 338 339 static int sm_metadata_count_is_more_than_one(struct dm_space_map *sm, 340 dm_block_t b, int *result) 341 { 342 int r, adjustment = 0; 343 unsigned i; 344 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 345 uint32_t rc; 346 347 /* 348 * We may have some uncommitted adjustments to add. This list 349 * should always be really short. 350 */ 351 for (i = smm->uncommitted.begin; 352 i != smm->uncommitted.end; 353 i = brb_next(&smm->uncommitted, i)) { 354 355 struct block_op *op = smm->uncommitted.bops + i; 356 357 if (b < op->b || b >= op->e) 358 continue; 359 360 switch (op->type) { 361 case BOP_INC: 362 adjustment++; 363 break; 364 365 case BOP_DEC: 366 adjustment--; 367 break; 368 } 369 } 370 371 if (adjustment > 1) { 372 *result = 1; 373 return 0; 374 } 375 376 r = sm_ll_lookup_bitmap(&smm->ll, b, &rc); 377 if (r) 378 return r; 379 380 if (rc == 3) 381 /* 382 * We err on the side of caution, and always return true. 383 */ 384 *result = 1; 385 else 386 *result = rc + adjustment > 1; 387 388 return 0; 389 } 390 391 static int sm_metadata_set_count(struct dm_space_map *sm, dm_block_t b, 392 uint32_t count) 393 { 394 int r, r2; 395 int32_t nr_allocations; 396 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 397 398 if (smm->recursion_count) { 399 DMERR("cannot recurse set_count()"); 400 return -EINVAL; 401 } 402 403 in(smm); 404 r = sm_ll_insert(&smm->ll, b, count, &nr_allocations); 405 r2 = out(smm); 406 407 return combine_errors(r, r2); 408 } 409 410 static int sm_metadata_inc_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e) 411 { 412 int r, r2 = 0; 413 int32_t nr_allocations; 414 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 415 416 if (recursing(smm)) { 417 r = add_bop(smm, BOP_INC, b, e); 418 if (r) 419 return r; 420 } else { 421 in(smm); 422 r = sm_ll_inc(&smm->ll, b, e, &nr_allocations); 423 r2 = out(smm); 424 } 425 426 return combine_errors(r, r2); 427 } 428 429 static int sm_metadata_dec_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e) 430 { 431 int r, r2 = 0; 432 int32_t nr_allocations; 433 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 434 435 if (recursing(smm)) 436 r = add_bop(smm, BOP_DEC, b, e); 437 else { 438 in(smm); 439 r = sm_ll_dec(&smm->ll, b, e, &nr_allocations); 440 r2 = out(smm); 441 } 442 443 return combine_errors(r, r2); 444 } 445 446 static int sm_metadata_new_block_(struct dm_space_map *sm, dm_block_t *b) 447 { 448 int r, r2 = 0; 449 int32_t nr_allocations; 450 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 451 452 /* 453 * Any block we allocate has to be free in both the old and current ll. 454 */ 455 r = sm_ll_find_common_free_block(&smm->old_ll, &smm->ll, smm->begin, smm->ll.nr_blocks, b); 456 if (r == -ENOSPC) { 457 /* 458 * There's no free block between smm->begin and the end of the metadata device. 459 * We search before smm->begin in case something has been freed. 460 */ 461 r = sm_ll_find_common_free_block(&smm->old_ll, &smm->ll, 0, smm->begin, b); 462 } 463 464 if (r) 465 return r; 466 467 smm->begin = *b + 1; 468 469 if (recursing(smm)) 470 r = add_bop(smm, BOP_INC, *b, *b + 1); 471 else { 472 in(smm); 473 r = sm_ll_inc(&smm->ll, *b, *b + 1, &nr_allocations); 474 r2 = out(smm); 475 } 476 477 if (!r) 478 smm->allocated_this_transaction++; 479 480 return combine_errors(r, r2); 481 } 482 483 static int sm_metadata_new_block(struct dm_space_map *sm, dm_block_t *b) 484 { 485 dm_block_t count; 486 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 487 488 int r = sm_metadata_new_block_(sm, b); 489 if (r) { 490 DMERR_LIMIT("unable to allocate new metadata block"); 491 return r; 492 } 493 494 r = sm_metadata_get_nr_free(sm, &count); 495 if (r) { 496 DMERR_LIMIT("couldn't get free block count"); 497 return r; 498 } 499 500 check_threshold(&smm->threshold, count); 501 502 return r; 503 } 504 505 static int sm_metadata_commit(struct dm_space_map *sm) 506 { 507 int r; 508 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 509 510 r = sm_ll_commit(&smm->ll); 511 if (r) 512 return r; 513 514 memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll)); 515 smm->allocated_this_transaction = 0; 516 517 return 0; 518 } 519 520 static int sm_metadata_register_threshold_callback(struct dm_space_map *sm, 521 dm_block_t threshold, 522 dm_sm_threshold_fn fn, 523 void *context) 524 { 525 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 526 527 set_threshold(&smm->threshold, threshold, fn, context); 528 529 return 0; 530 } 531 532 static int sm_metadata_root_size(struct dm_space_map *sm, size_t *result) 533 { 534 *result = sizeof(struct disk_sm_root); 535 536 return 0; 537 } 538 539 static int sm_metadata_copy_root(struct dm_space_map *sm, void *where_le, size_t max) 540 { 541 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 542 struct disk_sm_root root_le; 543 544 root_le.nr_blocks = cpu_to_le64(smm->ll.nr_blocks); 545 root_le.nr_allocated = cpu_to_le64(smm->ll.nr_allocated); 546 root_le.bitmap_root = cpu_to_le64(smm->ll.bitmap_root); 547 root_le.ref_count_root = cpu_to_le64(smm->ll.ref_count_root); 548 549 if (max < sizeof(root_le)) 550 return -ENOSPC; 551 552 memcpy(where_le, &root_le, sizeof(root_le)); 553 554 return 0; 555 } 556 557 static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks); 558 559 static const struct dm_space_map ops = { 560 .destroy = sm_metadata_destroy, 561 .extend = sm_metadata_extend, 562 .get_nr_blocks = sm_metadata_get_nr_blocks, 563 .get_nr_free = sm_metadata_get_nr_free, 564 .get_count = sm_metadata_get_count, 565 .count_is_more_than_one = sm_metadata_count_is_more_than_one, 566 .set_count = sm_metadata_set_count, 567 .inc_blocks = sm_metadata_inc_blocks, 568 .dec_blocks = sm_metadata_dec_blocks, 569 .new_block = sm_metadata_new_block, 570 .commit = sm_metadata_commit, 571 .root_size = sm_metadata_root_size, 572 .copy_root = sm_metadata_copy_root, 573 .register_threshold_callback = sm_metadata_register_threshold_callback 574 }; 575 576 /*----------------------------------------------------------------*/ 577 578 /* 579 * When a new space map is created that manages its own space. We use 580 * this tiny bootstrap allocator. 581 */ 582 static void sm_bootstrap_destroy(struct dm_space_map *sm) 583 { 584 } 585 586 static int sm_bootstrap_extend(struct dm_space_map *sm, dm_block_t extra_blocks) 587 { 588 DMERR("bootstrap doesn't support extend"); 589 590 return -EINVAL; 591 } 592 593 static int sm_bootstrap_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count) 594 { 595 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 596 597 *count = smm->ll.nr_blocks; 598 599 return 0; 600 } 601 602 static int sm_bootstrap_get_nr_free(struct dm_space_map *sm, dm_block_t *count) 603 { 604 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 605 606 *count = smm->ll.nr_blocks - smm->begin; 607 608 return 0; 609 } 610 611 static int sm_bootstrap_get_count(struct dm_space_map *sm, dm_block_t b, 612 uint32_t *result) 613 { 614 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 615 616 *result = (b < smm->begin) ? 1 : 0; 617 618 return 0; 619 } 620 621 static int sm_bootstrap_count_is_more_than_one(struct dm_space_map *sm, 622 dm_block_t b, int *result) 623 { 624 *result = 0; 625 626 return 0; 627 } 628 629 static int sm_bootstrap_set_count(struct dm_space_map *sm, dm_block_t b, 630 uint32_t count) 631 { 632 DMERR("bootstrap doesn't support set_count"); 633 634 return -EINVAL; 635 } 636 637 static int sm_bootstrap_new_block(struct dm_space_map *sm, dm_block_t *b) 638 { 639 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 640 641 /* 642 * We know the entire device is unused. 643 */ 644 if (smm->begin == smm->ll.nr_blocks) 645 return -ENOSPC; 646 647 *b = smm->begin++; 648 649 return 0; 650 } 651 652 static int sm_bootstrap_inc_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e) 653 { 654 int r; 655 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 656 657 r = add_bop(smm, BOP_INC, b, e); 658 if (r) 659 return r; 660 661 return 0; 662 } 663 664 static int sm_bootstrap_dec_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e) 665 { 666 int r; 667 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 668 669 r = add_bop(smm, BOP_DEC, b, e); 670 if (r) 671 return r; 672 673 return 0; 674 } 675 676 static int sm_bootstrap_commit(struct dm_space_map *sm) 677 { 678 return 0; 679 } 680 681 static int sm_bootstrap_root_size(struct dm_space_map *sm, size_t *result) 682 { 683 DMERR("bootstrap doesn't support root_size"); 684 685 return -EINVAL; 686 } 687 688 static int sm_bootstrap_copy_root(struct dm_space_map *sm, void *where, 689 size_t max) 690 { 691 DMERR("bootstrap doesn't support copy_root"); 692 693 return -EINVAL; 694 } 695 696 static const struct dm_space_map bootstrap_ops = { 697 .destroy = sm_bootstrap_destroy, 698 .extend = sm_bootstrap_extend, 699 .get_nr_blocks = sm_bootstrap_get_nr_blocks, 700 .get_nr_free = sm_bootstrap_get_nr_free, 701 .get_count = sm_bootstrap_get_count, 702 .count_is_more_than_one = sm_bootstrap_count_is_more_than_one, 703 .set_count = sm_bootstrap_set_count, 704 .inc_blocks = sm_bootstrap_inc_blocks, 705 .dec_blocks = sm_bootstrap_dec_blocks, 706 .new_block = sm_bootstrap_new_block, 707 .commit = sm_bootstrap_commit, 708 .root_size = sm_bootstrap_root_size, 709 .copy_root = sm_bootstrap_copy_root, 710 .register_threshold_callback = NULL 711 }; 712 713 /*----------------------------------------------------------------*/ 714 715 static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks) 716 { 717 int r; 718 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 719 dm_block_t old_len = smm->ll.nr_blocks; 720 721 /* 722 * Flick into a mode where all blocks get allocated in the new area. 723 */ 724 smm->begin = old_len; 725 memcpy(sm, &bootstrap_ops, sizeof(*sm)); 726 727 /* 728 * Extend. 729 */ 730 r = sm_ll_extend(&smm->ll, extra_blocks); 731 if (r) 732 goto out; 733 734 /* 735 * We repeatedly increment then commit until the commit doesn't 736 * allocate any new blocks. 737 */ 738 do { 739 r = add_bop(smm, BOP_INC, old_len, smm->begin); 740 if (r) 741 goto out; 742 743 old_len = smm->begin; 744 745 r = apply_bops(smm); 746 if (r) { 747 DMERR("%s: apply_bops failed", __func__); 748 goto out; 749 } 750 751 r = sm_ll_commit(&smm->ll); 752 if (r) 753 goto out; 754 755 } while (old_len != smm->begin); 756 757 out: 758 /* 759 * Switch back to normal behaviour. 760 */ 761 memcpy(sm, &ops, sizeof(*sm)); 762 return r; 763 } 764 765 /*----------------------------------------------------------------*/ 766 767 struct dm_space_map *dm_sm_metadata_init(void) 768 { 769 struct sm_metadata *smm; 770 771 smm = kmalloc(sizeof(*smm), GFP_KERNEL); 772 if (!smm) 773 return ERR_PTR(-ENOMEM); 774 775 memcpy(&smm->sm, &ops, sizeof(smm->sm)); 776 777 return &smm->sm; 778 } 779 780 int dm_sm_metadata_create(struct dm_space_map *sm, 781 struct dm_transaction_manager *tm, 782 dm_block_t nr_blocks, 783 dm_block_t superblock) 784 { 785 int r; 786 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 787 788 smm->begin = superblock + 1; 789 smm->recursion_count = 0; 790 smm->allocated_this_transaction = 0; 791 brb_init(&smm->uncommitted); 792 threshold_init(&smm->threshold); 793 794 memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm)); 795 796 r = sm_ll_new_metadata(&smm->ll, tm); 797 if (!r) { 798 if (nr_blocks > DM_SM_METADATA_MAX_BLOCKS) 799 nr_blocks = DM_SM_METADATA_MAX_BLOCKS; 800 r = sm_ll_extend(&smm->ll, nr_blocks); 801 } 802 memcpy(&smm->sm, &ops, sizeof(smm->sm)); 803 if (r) 804 return r; 805 806 /* 807 * Now we need to update the newly created data structures with the 808 * allocated blocks that they were built from. 809 */ 810 r = add_bop(smm, BOP_INC, superblock, smm->begin); 811 if (r) 812 return r; 813 814 r = apply_bops(smm); 815 if (r) { 816 DMERR("%s: apply_bops failed", __func__); 817 return r; 818 } 819 820 return sm_metadata_commit(sm); 821 } 822 823 int dm_sm_metadata_open(struct dm_space_map *sm, 824 struct dm_transaction_manager *tm, 825 void *root_le, size_t len) 826 { 827 int r; 828 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 829 830 r = sm_ll_open_metadata(&smm->ll, tm, root_le, len); 831 if (r) 832 return r; 833 834 smm->begin = 0; 835 smm->recursion_count = 0; 836 smm->allocated_this_transaction = 0; 837 brb_init(&smm->uncommitted); 838 threshold_init(&smm->threshold); 839 840 memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll)); 841 return 0; 842 } 843