1 /* 2 * Copyright (C) 2011 Red Hat, Inc. 3 * 4 * This file is released under the GPL. 5 */ 6 7 #include "dm-space-map.h" 8 #include "dm-space-map-common.h" 9 #include "dm-space-map-metadata.h" 10 11 #include <linux/list.h> 12 #include <linux/slab.h> 13 #include <linux/device-mapper.h> 14 #include <linux/kernel.h> 15 16 #define DM_MSG_PREFIX "space map metadata" 17 18 /*----------------------------------------------------------------*/ 19 20 /* 21 * An edge triggered threshold. 22 */ 23 struct threshold { 24 bool threshold_set; 25 bool value_set; 26 dm_block_t threshold; 27 dm_block_t current_value; 28 dm_sm_threshold_fn fn; 29 void *context; 30 }; 31 32 static void threshold_init(struct threshold *t) 33 { 34 t->threshold_set = false; 35 t->value_set = false; 36 } 37 38 static void set_threshold(struct threshold *t, dm_block_t value, 39 dm_sm_threshold_fn fn, void *context) 40 { 41 t->threshold_set = true; 42 t->threshold = value; 43 t->fn = fn; 44 t->context = context; 45 } 46 47 static bool below_threshold(struct threshold *t, dm_block_t value) 48 { 49 return t->threshold_set && value <= t->threshold; 50 } 51 52 static bool threshold_already_triggered(struct threshold *t) 53 { 54 return t->value_set && below_threshold(t, t->current_value); 55 } 56 57 static void check_threshold(struct threshold *t, dm_block_t value) 58 { 59 if (below_threshold(t, value) && 60 !threshold_already_triggered(t)) 61 t->fn(t->context); 62 63 t->value_set = true; 64 t->current_value = value; 65 } 66 67 /*----------------------------------------------------------------*/ 68 69 /* 70 * Space map interface. 71 * 72 * The low level disk format is written using the standard btree and 73 * transaction manager. This means that performing disk operations may 74 * cause us to recurse into the space map in order to allocate new blocks. 75 * For this reason we have a pool of pre-allocated blocks large enough to 76 * service any metadata_ll_disk operation. 77 */ 78 79 /* 80 * FIXME: we should calculate this based on the size of the device. 81 * Only the metadata space map needs this functionality. 82 */ 83 #define MAX_RECURSIVE_ALLOCATIONS 1024 84 85 enum block_op_type { 86 BOP_INC, 87 BOP_DEC 88 }; 89 90 struct block_op { 91 enum block_op_type type; 92 dm_block_t block; 93 }; 94 95 struct bop_ring_buffer { 96 unsigned begin; 97 unsigned end; 98 struct block_op bops[MAX_RECURSIVE_ALLOCATIONS + 1]; 99 }; 100 101 static void brb_init(struct bop_ring_buffer *brb) 102 { 103 brb->begin = 0; 104 brb->end = 0; 105 } 106 107 static bool brb_empty(struct bop_ring_buffer *brb) 108 { 109 return brb->begin == brb->end; 110 } 111 112 static unsigned brb_next(struct bop_ring_buffer *brb, unsigned old) 113 { 114 unsigned r = old + 1; 115 return r >= ARRAY_SIZE(brb->bops) ? 0 : r; 116 } 117 118 static int brb_push(struct bop_ring_buffer *brb, 119 enum block_op_type type, dm_block_t b) 120 { 121 struct block_op *bop; 122 unsigned next = brb_next(brb, brb->end); 123 124 /* 125 * We don't allow the last bop to be filled, this way we can 126 * differentiate between full and empty. 127 */ 128 if (next == brb->begin) 129 return -ENOMEM; 130 131 bop = brb->bops + brb->end; 132 bop->type = type; 133 bop->block = b; 134 135 brb->end = next; 136 137 return 0; 138 } 139 140 static int brb_peek(struct bop_ring_buffer *brb, struct block_op *result) 141 { 142 struct block_op *bop; 143 144 if (brb_empty(brb)) 145 return -ENODATA; 146 147 bop = brb->bops + brb->begin; 148 result->type = bop->type; 149 result->block = bop->block; 150 151 return 0; 152 } 153 154 static int brb_pop(struct bop_ring_buffer *brb) 155 { 156 if (brb_empty(brb)) 157 return -ENODATA; 158 159 brb->begin = brb_next(brb, brb->begin); 160 161 return 0; 162 } 163 164 /*----------------------------------------------------------------*/ 165 166 struct sm_metadata { 167 struct dm_space_map sm; 168 169 struct ll_disk ll; 170 struct ll_disk old_ll; 171 172 dm_block_t begin; 173 174 unsigned recursion_count; 175 unsigned allocated_this_transaction; 176 struct bop_ring_buffer uncommitted; 177 178 struct threshold threshold; 179 }; 180 181 static int add_bop(struct sm_metadata *smm, enum block_op_type type, dm_block_t b) 182 { 183 int r = brb_push(&smm->uncommitted, type, b); 184 185 if (r) { 186 DMERR("too many recursive allocations"); 187 return -ENOMEM; 188 } 189 190 return 0; 191 } 192 193 static int commit_bop(struct sm_metadata *smm, struct block_op *op) 194 { 195 int r = 0; 196 enum allocation_event ev; 197 198 switch (op->type) { 199 case BOP_INC: 200 r = sm_ll_inc(&smm->ll, op->block, &ev); 201 break; 202 203 case BOP_DEC: 204 r = sm_ll_dec(&smm->ll, op->block, &ev); 205 break; 206 } 207 208 return r; 209 } 210 211 static void in(struct sm_metadata *smm) 212 { 213 smm->recursion_count++; 214 } 215 216 static int apply_bops(struct sm_metadata *smm) 217 { 218 int r = 0; 219 220 while (!brb_empty(&smm->uncommitted)) { 221 struct block_op bop; 222 223 r = brb_peek(&smm->uncommitted, &bop); 224 if (r) { 225 DMERR("bug in bop ring buffer"); 226 break; 227 } 228 229 r = commit_bop(smm, &bop); 230 if (r) 231 break; 232 233 brb_pop(&smm->uncommitted); 234 } 235 236 return r; 237 } 238 239 static int out(struct sm_metadata *smm) 240 { 241 int r = 0; 242 243 /* 244 * If we're not recursing then very bad things are happening. 245 */ 246 if (!smm->recursion_count) { 247 DMERR("lost track of recursion depth"); 248 return -ENOMEM; 249 } 250 251 if (smm->recursion_count == 1) 252 r = apply_bops(smm); 253 254 smm->recursion_count--; 255 256 return r; 257 } 258 259 /* 260 * When using the out() function above, we often want to combine an error 261 * code for the operation run in the recursive context with that from 262 * out(). 263 */ 264 static int combine_errors(int r1, int r2) 265 { 266 return r1 ? r1 : r2; 267 } 268 269 static int recursing(struct sm_metadata *smm) 270 { 271 return smm->recursion_count; 272 } 273 274 static void sm_metadata_destroy(struct dm_space_map *sm) 275 { 276 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 277 278 kfree(smm); 279 } 280 281 static int sm_metadata_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count) 282 { 283 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 284 285 *count = smm->ll.nr_blocks; 286 287 return 0; 288 } 289 290 static int sm_metadata_get_nr_free(struct dm_space_map *sm, dm_block_t *count) 291 { 292 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 293 294 *count = smm->old_ll.nr_blocks - smm->old_ll.nr_allocated - 295 smm->allocated_this_transaction; 296 297 return 0; 298 } 299 300 static int sm_metadata_get_count(struct dm_space_map *sm, dm_block_t b, 301 uint32_t *result) 302 { 303 int r; 304 unsigned i; 305 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 306 unsigned adjustment = 0; 307 308 /* 309 * We may have some uncommitted adjustments to add. This list 310 * should always be really short. 311 */ 312 for (i = smm->uncommitted.begin; 313 i != smm->uncommitted.end; 314 i = brb_next(&smm->uncommitted, i)) { 315 struct block_op *op = smm->uncommitted.bops + i; 316 317 if (op->block != b) 318 continue; 319 320 switch (op->type) { 321 case BOP_INC: 322 adjustment++; 323 break; 324 325 case BOP_DEC: 326 adjustment--; 327 break; 328 } 329 } 330 331 r = sm_ll_lookup(&smm->ll, b, result); 332 if (r) 333 return r; 334 335 *result += adjustment; 336 337 return 0; 338 } 339 340 static int sm_metadata_count_is_more_than_one(struct dm_space_map *sm, 341 dm_block_t b, int *result) 342 { 343 int r, adjustment = 0; 344 unsigned i; 345 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 346 uint32_t rc; 347 348 /* 349 * We may have some uncommitted adjustments to add. This list 350 * should always be really short. 351 */ 352 for (i = smm->uncommitted.begin; 353 i != smm->uncommitted.end; 354 i = brb_next(&smm->uncommitted, i)) { 355 356 struct block_op *op = smm->uncommitted.bops + i; 357 358 if (op->block != b) 359 continue; 360 361 switch (op->type) { 362 case BOP_INC: 363 adjustment++; 364 break; 365 366 case BOP_DEC: 367 adjustment--; 368 break; 369 } 370 } 371 372 if (adjustment > 1) { 373 *result = 1; 374 return 0; 375 } 376 377 r = sm_ll_lookup_bitmap(&smm->ll, b, &rc); 378 if (r) 379 return r; 380 381 if (rc == 3) 382 /* 383 * We err on the side of caution, and always return true. 384 */ 385 *result = 1; 386 else 387 *result = rc + adjustment > 1; 388 389 return 0; 390 } 391 392 static int sm_metadata_set_count(struct dm_space_map *sm, dm_block_t b, 393 uint32_t count) 394 { 395 int r, r2; 396 enum allocation_event ev; 397 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 398 399 if (smm->recursion_count) { 400 DMERR("cannot recurse set_count()"); 401 return -EINVAL; 402 } 403 404 in(smm); 405 r = sm_ll_insert(&smm->ll, b, count, &ev); 406 r2 = out(smm); 407 408 return combine_errors(r, r2); 409 } 410 411 static int sm_metadata_inc_block(struct dm_space_map *sm, dm_block_t b) 412 { 413 int r, r2 = 0; 414 enum allocation_event ev; 415 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 416 417 if (recursing(smm)) 418 r = add_bop(smm, BOP_INC, b); 419 else { 420 in(smm); 421 r = sm_ll_inc(&smm->ll, b, &ev); 422 r2 = out(smm); 423 } 424 425 return combine_errors(r, r2); 426 } 427 428 static int sm_metadata_dec_block(struct dm_space_map *sm, dm_block_t b) 429 { 430 int r, r2 = 0; 431 enum allocation_event ev; 432 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 433 434 if (recursing(smm)) 435 r = add_bop(smm, BOP_DEC, b); 436 else { 437 in(smm); 438 r = sm_ll_dec(&smm->ll, b, &ev); 439 r2 = out(smm); 440 } 441 442 return combine_errors(r, r2); 443 } 444 445 static int sm_metadata_new_block_(struct dm_space_map *sm, dm_block_t *b) 446 { 447 int r, r2 = 0; 448 enum allocation_event ev; 449 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 450 451 /* 452 * Any block we allocate has to be free in both the old and current ll. 453 */ 454 r = sm_ll_find_common_free_block(&smm->old_ll, &smm->ll, smm->begin, smm->ll.nr_blocks, b); 455 if (r) 456 return r; 457 458 smm->begin = *b + 1; 459 460 if (recursing(smm)) 461 r = add_bop(smm, BOP_INC, *b); 462 else { 463 in(smm); 464 r = sm_ll_inc(&smm->ll, *b, &ev); 465 r2 = out(smm); 466 } 467 468 if (!r) 469 smm->allocated_this_transaction++; 470 471 return combine_errors(r, r2); 472 } 473 474 static int sm_metadata_new_block(struct dm_space_map *sm, dm_block_t *b) 475 { 476 dm_block_t count; 477 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 478 479 int r = sm_metadata_new_block_(sm, b); 480 if (r) { 481 DMERR_LIMIT("unable to allocate new metadata block"); 482 return r; 483 } 484 485 r = sm_metadata_get_nr_free(sm, &count); 486 if (r) { 487 DMERR_LIMIT("couldn't get free block count"); 488 return r; 489 } 490 491 check_threshold(&smm->threshold, count); 492 493 return r; 494 } 495 496 static int sm_metadata_commit(struct dm_space_map *sm) 497 { 498 int r; 499 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 500 501 r = sm_ll_commit(&smm->ll); 502 if (r) 503 return r; 504 505 memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll)); 506 smm->begin = 0; 507 smm->allocated_this_transaction = 0; 508 509 return 0; 510 } 511 512 static int sm_metadata_register_threshold_callback(struct dm_space_map *sm, 513 dm_block_t threshold, 514 dm_sm_threshold_fn fn, 515 void *context) 516 { 517 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 518 519 set_threshold(&smm->threshold, threshold, fn, context); 520 521 return 0; 522 } 523 524 static int sm_metadata_root_size(struct dm_space_map *sm, size_t *result) 525 { 526 *result = sizeof(struct disk_sm_root); 527 528 return 0; 529 } 530 531 static int sm_metadata_copy_root(struct dm_space_map *sm, void *where_le, size_t max) 532 { 533 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 534 struct disk_sm_root root_le; 535 536 root_le.nr_blocks = cpu_to_le64(smm->ll.nr_blocks); 537 root_le.nr_allocated = cpu_to_le64(smm->ll.nr_allocated); 538 root_le.bitmap_root = cpu_to_le64(smm->ll.bitmap_root); 539 root_le.ref_count_root = cpu_to_le64(smm->ll.ref_count_root); 540 541 if (max < sizeof(root_le)) 542 return -ENOSPC; 543 544 memcpy(where_le, &root_le, sizeof(root_le)); 545 546 return 0; 547 } 548 549 static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks); 550 551 static const struct dm_space_map ops = { 552 .destroy = sm_metadata_destroy, 553 .extend = sm_metadata_extend, 554 .get_nr_blocks = sm_metadata_get_nr_blocks, 555 .get_nr_free = sm_metadata_get_nr_free, 556 .get_count = sm_metadata_get_count, 557 .count_is_more_than_one = sm_metadata_count_is_more_than_one, 558 .set_count = sm_metadata_set_count, 559 .inc_block = sm_metadata_inc_block, 560 .dec_block = sm_metadata_dec_block, 561 .new_block = sm_metadata_new_block, 562 .commit = sm_metadata_commit, 563 .root_size = sm_metadata_root_size, 564 .copy_root = sm_metadata_copy_root, 565 .register_threshold_callback = sm_metadata_register_threshold_callback 566 }; 567 568 /*----------------------------------------------------------------*/ 569 570 /* 571 * When a new space map is created that manages its own space. We use 572 * this tiny bootstrap allocator. 573 */ 574 static void sm_bootstrap_destroy(struct dm_space_map *sm) 575 { 576 } 577 578 static int sm_bootstrap_extend(struct dm_space_map *sm, dm_block_t extra_blocks) 579 { 580 DMERR("bootstrap doesn't support extend"); 581 582 return -EINVAL; 583 } 584 585 static int sm_bootstrap_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count) 586 { 587 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 588 589 *count = smm->ll.nr_blocks; 590 591 return 0; 592 } 593 594 static int sm_bootstrap_get_nr_free(struct dm_space_map *sm, dm_block_t *count) 595 { 596 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 597 598 *count = smm->ll.nr_blocks - smm->begin; 599 600 return 0; 601 } 602 603 static int sm_bootstrap_get_count(struct dm_space_map *sm, dm_block_t b, 604 uint32_t *result) 605 { 606 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 607 608 *result = (b < smm->begin) ? 1 : 0; 609 610 return 0; 611 } 612 613 static int sm_bootstrap_count_is_more_than_one(struct dm_space_map *sm, 614 dm_block_t b, int *result) 615 { 616 *result = 0; 617 618 return 0; 619 } 620 621 static int sm_bootstrap_set_count(struct dm_space_map *sm, dm_block_t b, 622 uint32_t count) 623 { 624 DMERR("bootstrap doesn't support set_count"); 625 626 return -EINVAL; 627 } 628 629 static int sm_bootstrap_new_block(struct dm_space_map *sm, dm_block_t *b) 630 { 631 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 632 633 /* 634 * We know the entire device is unused. 635 */ 636 if (smm->begin == smm->ll.nr_blocks) 637 return -ENOSPC; 638 639 *b = smm->begin++; 640 641 return 0; 642 } 643 644 static int sm_bootstrap_inc_block(struct dm_space_map *sm, dm_block_t b) 645 { 646 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 647 648 return add_bop(smm, BOP_INC, b); 649 } 650 651 static int sm_bootstrap_dec_block(struct dm_space_map *sm, dm_block_t b) 652 { 653 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 654 655 return add_bop(smm, BOP_DEC, b); 656 } 657 658 static int sm_bootstrap_commit(struct dm_space_map *sm) 659 { 660 return 0; 661 } 662 663 static int sm_bootstrap_root_size(struct dm_space_map *sm, size_t *result) 664 { 665 DMERR("bootstrap doesn't support root_size"); 666 667 return -EINVAL; 668 } 669 670 static int sm_bootstrap_copy_root(struct dm_space_map *sm, void *where, 671 size_t max) 672 { 673 DMERR("bootstrap doesn't support copy_root"); 674 675 return -EINVAL; 676 } 677 678 static const struct dm_space_map bootstrap_ops = { 679 .destroy = sm_bootstrap_destroy, 680 .extend = sm_bootstrap_extend, 681 .get_nr_blocks = sm_bootstrap_get_nr_blocks, 682 .get_nr_free = sm_bootstrap_get_nr_free, 683 .get_count = sm_bootstrap_get_count, 684 .count_is_more_than_one = sm_bootstrap_count_is_more_than_one, 685 .set_count = sm_bootstrap_set_count, 686 .inc_block = sm_bootstrap_inc_block, 687 .dec_block = sm_bootstrap_dec_block, 688 .new_block = sm_bootstrap_new_block, 689 .commit = sm_bootstrap_commit, 690 .root_size = sm_bootstrap_root_size, 691 .copy_root = sm_bootstrap_copy_root, 692 .register_threshold_callback = NULL 693 }; 694 695 /*----------------------------------------------------------------*/ 696 697 static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks) 698 { 699 int r, i; 700 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 701 dm_block_t old_len = smm->ll.nr_blocks; 702 703 /* 704 * Flick into a mode where all blocks get allocated in the new area. 705 */ 706 smm->begin = old_len; 707 memcpy(sm, &bootstrap_ops, sizeof(*sm)); 708 709 /* 710 * Extend. 711 */ 712 r = sm_ll_extend(&smm->ll, extra_blocks); 713 if (r) 714 goto out; 715 716 /* 717 * We repeatedly increment then commit until the commit doesn't 718 * allocate any new blocks. 719 */ 720 do { 721 for (i = old_len; !r && i < smm->begin; i++) 722 r = add_bop(smm, BOP_INC, i); 723 724 if (r) 725 goto out; 726 727 old_len = smm->begin; 728 729 r = apply_bops(smm); 730 if (r) { 731 DMERR("%s: apply_bops failed", __func__); 732 goto out; 733 } 734 735 r = sm_ll_commit(&smm->ll); 736 if (r) 737 goto out; 738 739 } while (old_len != smm->begin); 740 741 out: 742 /* 743 * Switch back to normal behaviour. 744 */ 745 memcpy(sm, &ops, sizeof(*sm)); 746 return r; 747 } 748 749 /*----------------------------------------------------------------*/ 750 751 struct dm_space_map *dm_sm_metadata_init(void) 752 { 753 struct sm_metadata *smm; 754 755 smm = kmalloc(sizeof(*smm), GFP_KERNEL); 756 if (!smm) 757 return ERR_PTR(-ENOMEM); 758 759 memcpy(&smm->sm, &ops, sizeof(smm->sm)); 760 761 return &smm->sm; 762 } 763 764 int dm_sm_metadata_create(struct dm_space_map *sm, 765 struct dm_transaction_manager *tm, 766 dm_block_t nr_blocks, 767 dm_block_t superblock) 768 { 769 int r; 770 dm_block_t i; 771 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 772 773 smm->begin = superblock + 1; 774 smm->recursion_count = 0; 775 smm->allocated_this_transaction = 0; 776 brb_init(&smm->uncommitted); 777 threshold_init(&smm->threshold); 778 779 memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm)); 780 781 r = sm_ll_new_metadata(&smm->ll, tm); 782 if (!r) { 783 if (nr_blocks > DM_SM_METADATA_MAX_BLOCKS) 784 nr_blocks = DM_SM_METADATA_MAX_BLOCKS; 785 r = sm_ll_extend(&smm->ll, nr_blocks); 786 } 787 memcpy(&smm->sm, &ops, sizeof(smm->sm)); 788 if (r) 789 return r; 790 791 /* 792 * Now we need to update the newly created data structures with the 793 * allocated blocks that they were built from. 794 */ 795 for (i = superblock; !r && i < smm->begin; i++) 796 r = add_bop(smm, BOP_INC, i); 797 798 if (r) 799 return r; 800 801 r = apply_bops(smm); 802 if (r) { 803 DMERR("%s: apply_bops failed", __func__); 804 return r; 805 } 806 807 return sm_metadata_commit(sm); 808 } 809 810 int dm_sm_metadata_open(struct dm_space_map *sm, 811 struct dm_transaction_manager *tm, 812 void *root_le, size_t len) 813 { 814 int r; 815 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 816 817 r = sm_ll_open_metadata(&smm->ll, tm, root_le, len); 818 if (r) 819 return r; 820 821 smm->begin = 0; 822 smm->recursion_count = 0; 823 smm->allocated_this_transaction = 0; 824 brb_init(&smm->uncommitted); 825 threshold_init(&smm->threshold); 826 827 memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll)); 828 return 0; 829 } 830