1 /* 2 * Copyright (C) 2011 Red Hat, Inc. 3 * 4 * This file is released under the GPL. 5 */ 6 7 #include "dm-space-map.h" 8 #include "dm-space-map-common.h" 9 #include "dm-space-map-metadata.h" 10 11 #include <linux/list.h> 12 #include <linux/slab.h> 13 #include <linux/device-mapper.h> 14 15 #define DM_MSG_PREFIX "space map metadata" 16 17 /*----------------------------------------------------------------*/ 18 19 /* 20 * An edge triggered threshold. 21 */ 22 struct threshold { 23 bool threshold_set; 24 bool value_set; 25 dm_block_t threshold; 26 dm_block_t current_value; 27 dm_sm_threshold_fn fn; 28 void *context; 29 }; 30 31 static void threshold_init(struct threshold *t) 32 { 33 t->threshold_set = false; 34 t->value_set = false; 35 } 36 37 static void set_threshold(struct threshold *t, dm_block_t value, 38 dm_sm_threshold_fn fn, void *context) 39 { 40 t->threshold_set = true; 41 t->threshold = value; 42 t->fn = fn; 43 t->context = context; 44 } 45 46 static bool below_threshold(struct threshold *t, dm_block_t value) 47 { 48 return t->threshold_set && value <= t->threshold; 49 } 50 51 static bool threshold_already_triggered(struct threshold *t) 52 { 53 return t->value_set && below_threshold(t, t->current_value); 54 } 55 56 static void check_threshold(struct threshold *t, dm_block_t value) 57 { 58 if (below_threshold(t, value) && 59 !threshold_already_triggered(t)) 60 t->fn(t->context); 61 62 t->value_set = true; 63 t->current_value = value; 64 } 65 66 /*----------------------------------------------------------------*/ 67 68 /* 69 * Space map interface. 70 * 71 * The low level disk format is written using the standard btree and 72 * transaction manager. This means that performing disk operations may 73 * cause us to recurse into the space map in order to allocate new blocks. 74 * For this reason we have a pool of pre-allocated blocks large enough to 75 * service any metadata_ll_disk operation. 76 */ 77 78 /* 79 * FIXME: we should calculate this based on the size of the device. 80 * Only the metadata space map needs this functionality. 81 */ 82 #define MAX_RECURSIVE_ALLOCATIONS 1024 83 84 enum block_op_type { 85 BOP_INC, 86 BOP_DEC 87 }; 88 89 struct block_op { 90 enum block_op_type type; 91 dm_block_t block; 92 }; 93 94 struct bop_ring_buffer { 95 unsigned begin; 96 unsigned end; 97 struct block_op bops[MAX_RECURSIVE_ALLOCATIONS + 1]; 98 }; 99 100 static void brb_init(struct bop_ring_buffer *brb) 101 { 102 brb->begin = 0; 103 brb->end = 0; 104 } 105 106 static bool brb_empty(struct bop_ring_buffer *brb) 107 { 108 return brb->begin == brb->end; 109 } 110 111 static unsigned brb_next(struct bop_ring_buffer *brb, unsigned old) 112 { 113 unsigned r = old + 1; 114 return (r >= (sizeof(brb->bops) / sizeof(*brb->bops))) ? 0 : r; 115 } 116 117 static int brb_push(struct bop_ring_buffer *brb, 118 enum block_op_type type, dm_block_t b) 119 { 120 struct block_op *bop; 121 unsigned next = brb_next(brb, brb->end); 122 123 /* 124 * We don't allow the last bop to be filled, this way we can 125 * differentiate between full and empty. 126 */ 127 if (next == brb->begin) 128 return -ENOMEM; 129 130 bop = brb->bops + brb->end; 131 bop->type = type; 132 bop->block = b; 133 134 brb->end = next; 135 136 return 0; 137 } 138 139 static int brb_peek(struct bop_ring_buffer *brb, struct block_op *result) 140 { 141 struct block_op *bop; 142 143 if (brb_empty(brb)) 144 return -ENODATA; 145 146 bop = brb->bops + brb->begin; 147 result->type = bop->type; 148 result->block = bop->block; 149 150 return 0; 151 } 152 153 static int brb_pop(struct bop_ring_buffer *brb) 154 { 155 struct block_op *bop; 156 157 if (brb_empty(brb)) 158 return -ENODATA; 159 160 bop = brb->bops + brb->begin; 161 brb->begin = brb_next(brb, brb->begin); 162 163 return 0; 164 } 165 166 /*----------------------------------------------------------------*/ 167 168 struct sm_metadata { 169 struct dm_space_map sm; 170 171 struct ll_disk ll; 172 struct ll_disk old_ll; 173 174 dm_block_t begin; 175 176 unsigned recursion_count; 177 unsigned allocated_this_transaction; 178 struct bop_ring_buffer uncommitted; 179 180 struct threshold threshold; 181 }; 182 183 static int add_bop(struct sm_metadata *smm, enum block_op_type type, dm_block_t b) 184 { 185 int r = brb_push(&smm->uncommitted, type, b); 186 187 if (r) { 188 DMERR("too many recursive allocations"); 189 return -ENOMEM; 190 } 191 192 return 0; 193 } 194 195 static int commit_bop(struct sm_metadata *smm, struct block_op *op) 196 { 197 int r = 0; 198 enum allocation_event ev; 199 200 switch (op->type) { 201 case BOP_INC: 202 r = sm_ll_inc(&smm->ll, op->block, &ev); 203 break; 204 205 case BOP_DEC: 206 r = sm_ll_dec(&smm->ll, op->block, &ev); 207 break; 208 } 209 210 return r; 211 } 212 213 static void in(struct sm_metadata *smm) 214 { 215 smm->recursion_count++; 216 } 217 218 static int apply_bops(struct sm_metadata *smm) 219 { 220 int r = 0; 221 222 while (!brb_empty(&smm->uncommitted)) { 223 struct block_op bop; 224 225 r = brb_peek(&smm->uncommitted, &bop); 226 if (r) { 227 DMERR("bug in bop ring buffer"); 228 break; 229 } 230 231 r = commit_bop(smm, &bop); 232 if (r) 233 break; 234 235 brb_pop(&smm->uncommitted); 236 } 237 238 return r; 239 } 240 241 static int out(struct sm_metadata *smm) 242 { 243 int r = 0; 244 245 /* 246 * If we're not recursing then very bad things are happening. 247 */ 248 if (!smm->recursion_count) { 249 DMERR("lost track of recursion depth"); 250 return -ENOMEM; 251 } 252 253 if (smm->recursion_count == 1) 254 apply_bops(smm); 255 256 smm->recursion_count--; 257 258 return r; 259 } 260 261 /* 262 * When using the out() function above, we often want to combine an error 263 * code for the operation run in the recursive context with that from 264 * out(). 265 */ 266 static int combine_errors(int r1, int r2) 267 { 268 return r1 ? r1 : r2; 269 } 270 271 static int recursing(struct sm_metadata *smm) 272 { 273 return smm->recursion_count; 274 } 275 276 static void sm_metadata_destroy(struct dm_space_map *sm) 277 { 278 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 279 280 kfree(smm); 281 } 282 283 static int sm_metadata_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count) 284 { 285 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 286 287 *count = smm->ll.nr_blocks; 288 289 return 0; 290 } 291 292 static int sm_metadata_get_nr_free(struct dm_space_map *sm, dm_block_t *count) 293 { 294 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 295 296 *count = smm->old_ll.nr_blocks - smm->old_ll.nr_allocated - 297 smm->allocated_this_transaction; 298 299 return 0; 300 } 301 302 static int sm_metadata_get_count(struct dm_space_map *sm, dm_block_t b, 303 uint32_t *result) 304 { 305 int r; 306 unsigned i; 307 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 308 unsigned adjustment = 0; 309 310 /* 311 * We may have some uncommitted adjustments to add. This list 312 * should always be really short. 313 */ 314 for (i = smm->uncommitted.begin; 315 i != smm->uncommitted.end; 316 i = brb_next(&smm->uncommitted, i)) { 317 struct block_op *op = smm->uncommitted.bops + i; 318 319 if (op->block != b) 320 continue; 321 322 switch (op->type) { 323 case BOP_INC: 324 adjustment++; 325 break; 326 327 case BOP_DEC: 328 adjustment--; 329 break; 330 } 331 } 332 333 r = sm_ll_lookup(&smm->ll, b, result); 334 if (r) 335 return r; 336 337 *result += adjustment; 338 339 return 0; 340 } 341 342 static int sm_metadata_count_is_more_than_one(struct dm_space_map *sm, 343 dm_block_t b, int *result) 344 { 345 int r, adjustment = 0; 346 unsigned i; 347 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 348 uint32_t rc; 349 350 /* 351 * We may have some uncommitted adjustments to add. This list 352 * should always be really short. 353 */ 354 for (i = smm->uncommitted.begin; 355 i != smm->uncommitted.end; 356 i = brb_next(&smm->uncommitted, i)) { 357 358 struct block_op *op = smm->uncommitted.bops + i; 359 360 if (op->block != b) 361 continue; 362 363 switch (op->type) { 364 case BOP_INC: 365 adjustment++; 366 break; 367 368 case BOP_DEC: 369 adjustment--; 370 break; 371 } 372 } 373 374 if (adjustment > 1) { 375 *result = 1; 376 return 0; 377 } 378 379 r = sm_ll_lookup_bitmap(&smm->ll, b, &rc); 380 if (r) 381 return r; 382 383 if (rc == 3) 384 /* 385 * We err on the side of caution, and always return true. 386 */ 387 *result = 1; 388 else 389 *result = rc + adjustment > 1; 390 391 return 0; 392 } 393 394 static int sm_metadata_set_count(struct dm_space_map *sm, dm_block_t b, 395 uint32_t count) 396 { 397 int r, r2; 398 enum allocation_event ev; 399 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 400 401 if (smm->recursion_count) { 402 DMERR("cannot recurse set_count()"); 403 return -EINVAL; 404 } 405 406 in(smm); 407 r = sm_ll_insert(&smm->ll, b, count, &ev); 408 r2 = out(smm); 409 410 return combine_errors(r, r2); 411 } 412 413 static int sm_metadata_inc_block(struct dm_space_map *sm, dm_block_t b) 414 { 415 int r, r2 = 0; 416 enum allocation_event ev; 417 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 418 419 if (recursing(smm)) 420 r = add_bop(smm, BOP_INC, b); 421 else { 422 in(smm); 423 r = sm_ll_inc(&smm->ll, b, &ev); 424 r2 = out(smm); 425 } 426 427 return combine_errors(r, r2); 428 } 429 430 static int sm_metadata_dec_block(struct dm_space_map *sm, dm_block_t b) 431 { 432 int r, r2 = 0; 433 enum allocation_event ev; 434 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 435 436 if (recursing(smm)) 437 r = add_bop(smm, BOP_DEC, b); 438 else { 439 in(smm); 440 r = sm_ll_dec(&smm->ll, b, &ev); 441 r2 = out(smm); 442 } 443 444 return combine_errors(r, r2); 445 } 446 447 static int sm_metadata_new_block_(struct dm_space_map *sm, dm_block_t *b) 448 { 449 int r, r2 = 0; 450 enum allocation_event ev; 451 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 452 453 r = sm_ll_find_free_block(&smm->old_ll, smm->begin, smm->old_ll.nr_blocks, b); 454 if (r) 455 return r; 456 457 smm->begin = *b + 1; 458 459 if (recursing(smm)) 460 r = add_bop(smm, BOP_INC, *b); 461 else { 462 in(smm); 463 r = sm_ll_inc(&smm->ll, *b, &ev); 464 r2 = out(smm); 465 } 466 467 if (!r) 468 smm->allocated_this_transaction++; 469 470 return combine_errors(r, r2); 471 } 472 473 static int sm_metadata_new_block(struct dm_space_map *sm, dm_block_t *b) 474 { 475 dm_block_t count; 476 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 477 478 int r = sm_metadata_new_block_(sm, b); 479 if (r) { 480 DMERR_LIMIT("unable to allocate new metadata block"); 481 return r; 482 } 483 484 r = sm_metadata_get_nr_free(sm, &count); 485 if (r) { 486 DMERR_LIMIT("couldn't get free block count"); 487 return r; 488 } 489 490 check_threshold(&smm->threshold, count); 491 492 return r; 493 } 494 495 static int sm_metadata_commit(struct dm_space_map *sm) 496 { 497 int r; 498 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 499 500 r = sm_ll_commit(&smm->ll); 501 if (r) 502 return r; 503 504 memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll)); 505 smm->begin = 0; 506 smm->allocated_this_transaction = 0; 507 508 return 0; 509 } 510 511 static int sm_metadata_register_threshold_callback(struct dm_space_map *sm, 512 dm_block_t threshold, 513 dm_sm_threshold_fn fn, 514 void *context) 515 { 516 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 517 518 set_threshold(&smm->threshold, threshold, fn, context); 519 520 return 0; 521 } 522 523 static int sm_metadata_root_size(struct dm_space_map *sm, size_t *result) 524 { 525 *result = sizeof(struct disk_sm_root); 526 527 return 0; 528 } 529 530 static int sm_metadata_copy_root(struct dm_space_map *sm, void *where_le, size_t max) 531 { 532 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 533 struct disk_sm_root root_le; 534 535 root_le.nr_blocks = cpu_to_le64(smm->ll.nr_blocks); 536 root_le.nr_allocated = cpu_to_le64(smm->ll.nr_allocated); 537 root_le.bitmap_root = cpu_to_le64(smm->ll.bitmap_root); 538 root_le.ref_count_root = cpu_to_le64(smm->ll.ref_count_root); 539 540 if (max < sizeof(root_le)) 541 return -ENOSPC; 542 543 memcpy(where_le, &root_le, sizeof(root_le)); 544 545 return 0; 546 } 547 548 static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks); 549 550 static struct dm_space_map ops = { 551 .destroy = sm_metadata_destroy, 552 .extend = sm_metadata_extend, 553 .get_nr_blocks = sm_metadata_get_nr_blocks, 554 .get_nr_free = sm_metadata_get_nr_free, 555 .get_count = sm_metadata_get_count, 556 .count_is_more_than_one = sm_metadata_count_is_more_than_one, 557 .set_count = sm_metadata_set_count, 558 .inc_block = sm_metadata_inc_block, 559 .dec_block = sm_metadata_dec_block, 560 .new_block = sm_metadata_new_block, 561 .commit = sm_metadata_commit, 562 .root_size = sm_metadata_root_size, 563 .copy_root = sm_metadata_copy_root, 564 .register_threshold_callback = sm_metadata_register_threshold_callback 565 }; 566 567 /*----------------------------------------------------------------*/ 568 569 /* 570 * When a new space map is created that manages its own space. We use 571 * this tiny bootstrap allocator. 572 */ 573 static void sm_bootstrap_destroy(struct dm_space_map *sm) 574 { 575 } 576 577 static int sm_bootstrap_extend(struct dm_space_map *sm, dm_block_t extra_blocks) 578 { 579 DMERR("bootstrap doesn't support extend"); 580 581 return -EINVAL; 582 } 583 584 static int sm_bootstrap_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count) 585 { 586 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 587 588 *count = smm->ll.nr_blocks; 589 590 return 0; 591 } 592 593 static int sm_bootstrap_get_nr_free(struct dm_space_map *sm, dm_block_t *count) 594 { 595 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 596 597 *count = smm->ll.nr_blocks - smm->begin; 598 599 return 0; 600 } 601 602 static int sm_bootstrap_get_count(struct dm_space_map *sm, dm_block_t b, 603 uint32_t *result) 604 { 605 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 606 607 *result = (b < smm->begin) ? 1 : 0; 608 609 return 0; 610 } 611 612 static int sm_bootstrap_count_is_more_than_one(struct dm_space_map *sm, 613 dm_block_t b, int *result) 614 { 615 *result = 0; 616 617 return 0; 618 } 619 620 static int sm_bootstrap_set_count(struct dm_space_map *sm, dm_block_t b, 621 uint32_t count) 622 { 623 DMERR("bootstrap doesn't support set_count"); 624 625 return -EINVAL; 626 } 627 628 static int sm_bootstrap_new_block(struct dm_space_map *sm, dm_block_t *b) 629 { 630 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 631 632 /* 633 * We know the entire device is unused. 634 */ 635 if (smm->begin == smm->ll.nr_blocks) 636 return -ENOSPC; 637 638 *b = smm->begin++; 639 640 return 0; 641 } 642 643 static int sm_bootstrap_inc_block(struct dm_space_map *sm, dm_block_t b) 644 { 645 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 646 647 return add_bop(smm, BOP_INC, b); 648 } 649 650 static int sm_bootstrap_dec_block(struct dm_space_map *sm, dm_block_t b) 651 { 652 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 653 654 return add_bop(smm, BOP_DEC, b); 655 } 656 657 static int sm_bootstrap_commit(struct dm_space_map *sm) 658 { 659 return 0; 660 } 661 662 static int sm_bootstrap_root_size(struct dm_space_map *sm, size_t *result) 663 { 664 DMERR("bootstrap doesn't support root_size"); 665 666 return -EINVAL; 667 } 668 669 static int sm_bootstrap_copy_root(struct dm_space_map *sm, void *where, 670 size_t max) 671 { 672 DMERR("bootstrap doesn't support copy_root"); 673 674 return -EINVAL; 675 } 676 677 static struct dm_space_map bootstrap_ops = { 678 .destroy = sm_bootstrap_destroy, 679 .extend = sm_bootstrap_extend, 680 .get_nr_blocks = sm_bootstrap_get_nr_blocks, 681 .get_nr_free = sm_bootstrap_get_nr_free, 682 .get_count = sm_bootstrap_get_count, 683 .count_is_more_than_one = sm_bootstrap_count_is_more_than_one, 684 .set_count = sm_bootstrap_set_count, 685 .inc_block = sm_bootstrap_inc_block, 686 .dec_block = sm_bootstrap_dec_block, 687 .new_block = sm_bootstrap_new_block, 688 .commit = sm_bootstrap_commit, 689 .root_size = sm_bootstrap_root_size, 690 .copy_root = sm_bootstrap_copy_root, 691 .register_threshold_callback = NULL 692 }; 693 694 /*----------------------------------------------------------------*/ 695 696 static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks) 697 { 698 int r, i; 699 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 700 dm_block_t old_len = smm->ll.nr_blocks; 701 702 /* 703 * Flick into a mode where all blocks get allocated in the new area. 704 */ 705 smm->begin = old_len; 706 memcpy(sm, &bootstrap_ops, sizeof(*sm)); 707 708 /* 709 * Extend. 710 */ 711 r = sm_ll_extend(&smm->ll, extra_blocks); 712 if (r) 713 goto out; 714 715 /* 716 * We repeatedly increment then commit until the commit doesn't 717 * allocate any new blocks. 718 */ 719 do { 720 for (i = old_len; !r && i < smm->begin; i++) 721 r = add_bop(smm, BOP_INC, i); 722 723 if (r) 724 goto out; 725 726 old_len = smm->begin; 727 728 r = apply_bops(smm); 729 if (r) { 730 DMERR("%s: apply_bops failed", __func__); 731 goto out; 732 } 733 734 r = sm_ll_commit(&smm->ll); 735 if (r) 736 goto out; 737 738 } while (old_len != smm->begin); 739 740 out: 741 /* 742 * Switch back to normal behaviour. 743 */ 744 memcpy(sm, &ops, sizeof(*sm)); 745 return r; 746 } 747 748 /*----------------------------------------------------------------*/ 749 750 struct dm_space_map *dm_sm_metadata_init(void) 751 { 752 struct sm_metadata *smm; 753 754 smm = kmalloc(sizeof(*smm), GFP_KERNEL); 755 if (!smm) 756 return ERR_PTR(-ENOMEM); 757 758 memcpy(&smm->sm, &ops, sizeof(smm->sm)); 759 760 return &smm->sm; 761 } 762 763 int dm_sm_metadata_create(struct dm_space_map *sm, 764 struct dm_transaction_manager *tm, 765 dm_block_t nr_blocks, 766 dm_block_t superblock) 767 { 768 int r; 769 dm_block_t i; 770 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 771 772 smm->begin = superblock + 1; 773 smm->recursion_count = 0; 774 smm->allocated_this_transaction = 0; 775 brb_init(&smm->uncommitted); 776 threshold_init(&smm->threshold); 777 778 memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm)); 779 780 r = sm_ll_new_metadata(&smm->ll, tm); 781 if (r) 782 return r; 783 784 if (nr_blocks > DM_SM_METADATA_MAX_BLOCKS) 785 nr_blocks = DM_SM_METADATA_MAX_BLOCKS; 786 r = sm_ll_extend(&smm->ll, nr_blocks); 787 if (r) 788 return r; 789 790 memcpy(&smm->sm, &ops, sizeof(smm->sm)); 791 792 /* 793 * Now we need to update the newly created data structures with the 794 * allocated blocks that they were built from. 795 */ 796 for (i = superblock; !r && i < smm->begin; i++) 797 r = add_bop(smm, BOP_INC, i); 798 799 if (r) 800 return r; 801 802 r = apply_bops(smm); 803 if (r) { 804 DMERR("%s: apply_bops failed", __func__); 805 return r; 806 } 807 808 return sm_metadata_commit(sm); 809 } 810 811 int dm_sm_metadata_open(struct dm_space_map *sm, 812 struct dm_transaction_manager *tm, 813 void *root_le, size_t len) 814 { 815 int r; 816 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 817 818 r = sm_ll_open_metadata(&smm->ll, tm, root_le, len); 819 if (r) 820 return r; 821 822 smm->begin = 0; 823 smm->recursion_count = 0; 824 smm->allocated_this_transaction = 0; 825 brb_init(&smm->uncommitted); 826 threshold_init(&smm->threshold); 827 828 memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll)); 829 return 0; 830 } 831