1 /* 2 * Copyright (C) 2011 Red Hat, Inc. 3 * 4 * This file is released under the GPL. 5 */ 6 7 #include "dm-space-map.h" 8 #include "dm-space-map-common.h" 9 #include "dm-space-map-metadata.h" 10 11 #include <linux/list.h> 12 #include <linux/slab.h> 13 #include <linux/device-mapper.h> 14 15 #define DM_MSG_PREFIX "space map metadata" 16 17 /*----------------------------------------------------------------*/ 18 19 /* 20 * An edge triggered threshold. 21 */ 22 struct threshold { 23 bool threshold_set; 24 bool value_set; 25 dm_block_t threshold; 26 dm_block_t current_value; 27 dm_sm_threshold_fn fn; 28 void *context; 29 }; 30 31 static void threshold_init(struct threshold *t) 32 { 33 t->threshold_set = false; 34 t->value_set = false; 35 } 36 37 static void set_threshold(struct threshold *t, dm_block_t value, 38 dm_sm_threshold_fn fn, void *context) 39 { 40 t->threshold_set = true; 41 t->threshold = value; 42 t->fn = fn; 43 t->context = context; 44 } 45 46 static bool below_threshold(struct threshold *t, dm_block_t value) 47 { 48 return t->threshold_set && value <= t->threshold; 49 } 50 51 static bool threshold_already_triggered(struct threshold *t) 52 { 53 return t->value_set && below_threshold(t, t->current_value); 54 } 55 56 static void check_threshold(struct threshold *t, dm_block_t value) 57 { 58 if (below_threshold(t, value) && 59 !threshold_already_triggered(t)) 60 t->fn(t->context); 61 62 t->value_set = true; 63 t->current_value = value; 64 } 65 66 /*----------------------------------------------------------------*/ 67 68 /* 69 * Space map interface. 70 * 71 * The low level disk format is written using the standard btree and 72 * transaction manager. This means that performing disk operations may 73 * cause us to recurse into the space map in order to allocate new blocks. 74 * For this reason we have a pool of pre-allocated blocks large enough to 75 * service any metadata_ll_disk operation. 76 */ 77 78 /* 79 * FIXME: we should calculate this based on the size of the device. 80 * Only the metadata space map needs this functionality. 81 */ 82 #define MAX_RECURSIVE_ALLOCATIONS 1024 83 84 enum block_op_type { 85 BOP_INC, 86 BOP_DEC 87 }; 88 89 struct block_op { 90 enum block_op_type type; 91 dm_block_t block; 92 }; 93 94 struct bop_ring_buffer { 95 unsigned begin; 96 unsigned end; 97 struct block_op bops[MAX_RECURSIVE_ALLOCATIONS + 1]; 98 }; 99 100 static void brb_init(struct bop_ring_buffer *brb) 101 { 102 brb->begin = 0; 103 brb->end = 0; 104 } 105 106 static bool brb_empty(struct bop_ring_buffer *brb) 107 { 108 return brb->begin == brb->end; 109 } 110 111 static unsigned brb_next(struct bop_ring_buffer *brb, unsigned old) 112 { 113 unsigned r = old + 1; 114 return (r >= (sizeof(brb->bops) / sizeof(*brb->bops))) ? 0 : r; 115 } 116 117 static int brb_push(struct bop_ring_buffer *brb, 118 enum block_op_type type, dm_block_t b) 119 { 120 struct block_op *bop; 121 unsigned next = brb_next(brb, brb->end); 122 123 /* 124 * We don't allow the last bop to be filled, this way we can 125 * differentiate between full and empty. 126 */ 127 if (next == brb->begin) 128 return -ENOMEM; 129 130 bop = brb->bops + brb->end; 131 bop->type = type; 132 bop->block = b; 133 134 brb->end = next; 135 136 return 0; 137 } 138 139 static int brb_peek(struct bop_ring_buffer *brb, struct block_op *result) 140 { 141 struct block_op *bop; 142 143 if (brb_empty(brb)) 144 return -ENODATA; 145 146 bop = brb->bops + brb->begin; 147 result->type = bop->type; 148 result->block = bop->block; 149 150 return 0; 151 } 152 153 static int brb_pop(struct bop_ring_buffer *brb) 154 { 155 if (brb_empty(brb)) 156 return -ENODATA; 157 158 brb->begin = brb_next(brb, brb->begin); 159 160 return 0; 161 } 162 163 /*----------------------------------------------------------------*/ 164 165 struct sm_metadata { 166 struct dm_space_map sm; 167 168 struct ll_disk ll; 169 struct ll_disk old_ll; 170 171 dm_block_t begin; 172 173 unsigned recursion_count; 174 unsigned allocated_this_transaction; 175 struct bop_ring_buffer uncommitted; 176 177 struct threshold threshold; 178 }; 179 180 static int add_bop(struct sm_metadata *smm, enum block_op_type type, dm_block_t b) 181 { 182 int r = brb_push(&smm->uncommitted, type, b); 183 184 if (r) { 185 DMERR("too many recursive allocations"); 186 return -ENOMEM; 187 } 188 189 return 0; 190 } 191 192 static int commit_bop(struct sm_metadata *smm, struct block_op *op) 193 { 194 int r = 0; 195 enum allocation_event ev; 196 197 switch (op->type) { 198 case BOP_INC: 199 r = sm_ll_inc(&smm->ll, op->block, &ev); 200 break; 201 202 case BOP_DEC: 203 r = sm_ll_dec(&smm->ll, op->block, &ev); 204 break; 205 } 206 207 return r; 208 } 209 210 static void in(struct sm_metadata *smm) 211 { 212 smm->recursion_count++; 213 } 214 215 static int apply_bops(struct sm_metadata *smm) 216 { 217 int r = 0; 218 219 while (!brb_empty(&smm->uncommitted)) { 220 struct block_op bop; 221 222 r = brb_peek(&smm->uncommitted, &bop); 223 if (r) { 224 DMERR("bug in bop ring buffer"); 225 break; 226 } 227 228 r = commit_bop(smm, &bop); 229 if (r) 230 break; 231 232 brb_pop(&smm->uncommitted); 233 } 234 235 return r; 236 } 237 238 static int out(struct sm_metadata *smm) 239 { 240 int r = 0; 241 242 /* 243 * If we're not recursing then very bad things are happening. 244 */ 245 if (!smm->recursion_count) { 246 DMERR("lost track of recursion depth"); 247 return -ENOMEM; 248 } 249 250 if (smm->recursion_count == 1) 251 apply_bops(smm); 252 253 smm->recursion_count--; 254 255 return r; 256 } 257 258 /* 259 * When using the out() function above, we often want to combine an error 260 * code for the operation run in the recursive context with that from 261 * out(). 262 */ 263 static int combine_errors(int r1, int r2) 264 { 265 return r1 ? r1 : r2; 266 } 267 268 static int recursing(struct sm_metadata *smm) 269 { 270 return smm->recursion_count; 271 } 272 273 static void sm_metadata_destroy(struct dm_space_map *sm) 274 { 275 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 276 277 kfree(smm); 278 } 279 280 static int sm_metadata_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count) 281 { 282 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 283 284 *count = smm->ll.nr_blocks; 285 286 return 0; 287 } 288 289 static int sm_metadata_get_nr_free(struct dm_space_map *sm, dm_block_t *count) 290 { 291 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 292 293 *count = smm->old_ll.nr_blocks - smm->old_ll.nr_allocated - 294 smm->allocated_this_transaction; 295 296 return 0; 297 } 298 299 static int sm_metadata_get_count(struct dm_space_map *sm, dm_block_t b, 300 uint32_t *result) 301 { 302 int r; 303 unsigned i; 304 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 305 unsigned adjustment = 0; 306 307 /* 308 * We may have some uncommitted adjustments to add. This list 309 * should always be really short. 310 */ 311 for (i = smm->uncommitted.begin; 312 i != smm->uncommitted.end; 313 i = brb_next(&smm->uncommitted, i)) { 314 struct block_op *op = smm->uncommitted.bops + i; 315 316 if (op->block != b) 317 continue; 318 319 switch (op->type) { 320 case BOP_INC: 321 adjustment++; 322 break; 323 324 case BOP_DEC: 325 adjustment--; 326 break; 327 } 328 } 329 330 r = sm_ll_lookup(&smm->ll, b, result); 331 if (r) 332 return r; 333 334 *result += adjustment; 335 336 return 0; 337 } 338 339 static int sm_metadata_count_is_more_than_one(struct dm_space_map *sm, 340 dm_block_t b, int *result) 341 { 342 int r, adjustment = 0; 343 unsigned i; 344 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 345 uint32_t rc; 346 347 /* 348 * We may have some uncommitted adjustments to add. This list 349 * should always be really short. 350 */ 351 for (i = smm->uncommitted.begin; 352 i != smm->uncommitted.end; 353 i = brb_next(&smm->uncommitted, i)) { 354 355 struct block_op *op = smm->uncommitted.bops + i; 356 357 if (op->block != b) 358 continue; 359 360 switch (op->type) { 361 case BOP_INC: 362 adjustment++; 363 break; 364 365 case BOP_DEC: 366 adjustment--; 367 break; 368 } 369 } 370 371 if (adjustment > 1) { 372 *result = 1; 373 return 0; 374 } 375 376 r = sm_ll_lookup_bitmap(&smm->ll, b, &rc); 377 if (r) 378 return r; 379 380 if (rc == 3) 381 /* 382 * We err on the side of caution, and always return true. 383 */ 384 *result = 1; 385 else 386 *result = rc + adjustment > 1; 387 388 return 0; 389 } 390 391 static int sm_metadata_set_count(struct dm_space_map *sm, dm_block_t b, 392 uint32_t count) 393 { 394 int r, r2; 395 enum allocation_event ev; 396 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 397 398 if (smm->recursion_count) { 399 DMERR("cannot recurse set_count()"); 400 return -EINVAL; 401 } 402 403 in(smm); 404 r = sm_ll_insert(&smm->ll, b, count, &ev); 405 r2 = out(smm); 406 407 return combine_errors(r, r2); 408 } 409 410 static int sm_metadata_inc_block(struct dm_space_map *sm, dm_block_t b) 411 { 412 int r, r2 = 0; 413 enum allocation_event ev; 414 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 415 416 if (recursing(smm)) 417 r = add_bop(smm, BOP_INC, b); 418 else { 419 in(smm); 420 r = sm_ll_inc(&smm->ll, b, &ev); 421 r2 = out(smm); 422 } 423 424 return combine_errors(r, r2); 425 } 426 427 static int sm_metadata_dec_block(struct dm_space_map *sm, dm_block_t b) 428 { 429 int r, r2 = 0; 430 enum allocation_event ev; 431 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 432 433 if (recursing(smm)) 434 r = add_bop(smm, BOP_DEC, b); 435 else { 436 in(smm); 437 r = sm_ll_dec(&smm->ll, b, &ev); 438 r2 = out(smm); 439 } 440 441 return combine_errors(r, r2); 442 } 443 444 static int sm_metadata_new_block_(struct dm_space_map *sm, dm_block_t *b) 445 { 446 int r, r2 = 0; 447 enum allocation_event ev; 448 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 449 450 r = sm_ll_find_free_block(&smm->old_ll, smm->begin, smm->old_ll.nr_blocks, b); 451 if (r) 452 return r; 453 454 smm->begin = *b + 1; 455 456 if (recursing(smm)) 457 r = add_bop(smm, BOP_INC, *b); 458 else { 459 in(smm); 460 r = sm_ll_inc(&smm->ll, *b, &ev); 461 r2 = out(smm); 462 } 463 464 if (!r) 465 smm->allocated_this_transaction++; 466 467 return combine_errors(r, r2); 468 } 469 470 static int sm_metadata_new_block(struct dm_space_map *sm, dm_block_t *b) 471 { 472 dm_block_t count; 473 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 474 475 int r = sm_metadata_new_block_(sm, b); 476 if (r) { 477 DMERR_LIMIT("unable to allocate new metadata block"); 478 return r; 479 } 480 481 r = sm_metadata_get_nr_free(sm, &count); 482 if (r) { 483 DMERR_LIMIT("couldn't get free block count"); 484 return r; 485 } 486 487 check_threshold(&smm->threshold, count); 488 489 return r; 490 } 491 492 static int sm_metadata_commit(struct dm_space_map *sm) 493 { 494 int r; 495 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 496 497 r = sm_ll_commit(&smm->ll); 498 if (r) 499 return r; 500 501 memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll)); 502 smm->begin = 0; 503 smm->allocated_this_transaction = 0; 504 505 return 0; 506 } 507 508 static int sm_metadata_register_threshold_callback(struct dm_space_map *sm, 509 dm_block_t threshold, 510 dm_sm_threshold_fn fn, 511 void *context) 512 { 513 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 514 515 set_threshold(&smm->threshold, threshold, fn, context); 516 517 return 0; 518 } 519 520 static int sm_metadata_root_size(struct dm_space_map *sm, size_t *result) 521 { 522 *result = sizeof(struct disk_sm_root); 523 524 return 0; 525 } 526 527 static int sm_metadata_copy_root(struct dm_space_map *sm, void *where_le, size_t max) 528 { 529 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 530 struct disk_sm_root root_le; 531 532 root_le.nr_blocks = cpu_to_le64(smm->ll.nr_blocks); 533 root_le.nr_allocated = cpu_to_le64(smm->ll.nr_allocated); 534 root_le.bitmap_root = cpu_to_le64(smm->ll.bitmap_root); 535 root_le.ref_count_root = cpu_to_le64(smm->ll.ref_count_root); 536 537 if (max < sizeof(root_le)) 538 return -ENOSPC; 539 540 memcpy(where_le, &root_le, sizeof(root_le)); 541 542 return 0; 543 } 544 545 static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks); 546 547 static struct dm_space_map ops = { 548 .destroy = sm_metadata_destroy, 549 .extend = sm_metadata_extend, 550 .get_nr_blocks = sm_metadata_get_nr_blocks, 551 .get_nr_free = sm_metadata_get_nr_free, 552 .get_count = sm_metadata_get_count, 553 .count_is_more_than_one = sm_metadata_count_is_more_than_one, 554 .set_count = sm_metadata_set_count, 555 .inc_block = sm_metadata_inc_block, 556 .dec_block = sm_metadata_dec_block, 557 .new_block = sm_metadata_new_block, 558 .commit = sm_metadata_commit, 559 .root_size = sm_metadata_root_size, 560 .copy_root = sm_metadata_copy_root, 561 .register_threshold_callback = sm_metadata_register_threshold_callback 562 }; 563 564 /*----------------------------------------------------------------*/ 565 566 /* 567 * When a new space map is created that manages its own space. We use 568 * this tiny bootstrap allocator. 569 */ 570 static void sm_bootstrap_destroy(struct dm_space_map *sm) 571 { 572 } 573 574 static int sm_bootstrap_extend(struct dm_space_map *sm, dm_block_t extra_blocks) 575 { 576 DMERR("bootstrap doesn't support extend"); 577 578 return -EINVAL; 579 } 580 581 static int sm_bootstrap_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count) 582 { 583 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 584 585 *count = smm->ll.nr_blocks; 586 587 return 0; 588 } 589 590 static int sm_bootstrap_get_nr_free(struct dm_space_map *sm, dm_block_t *count) 591 { 592 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 593 594 *count = smm->ll.nr_blocks - smm->begin; 595 596 return 0; 597 } 598 599 static int sm_bootstrap_get_count(struct dm_space_map *sm, dm_block_t b, 600 uint32_t *result) 601 { 602 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 603 604 *result = (b < smm->begin) ? 1 : 0; 605 606 return 0; 607 } 608 609 static int sm_bootstrap_count_is_more_than_one(struct dm_space_map *sm, 610 dm_block_t b, int *result) 611 { 612 *result = 0; 613 614 return 0; 615 } 616 617 static int sm_bootstrap_set_count(struct dm_space_map *sm, dm_block_t b, 618 uint32_t count) 619 { 620 DMERR("bootstrap doesn't support set_count"); 621 622 return -EINVAL; 623 } 624 625 static int sm_bootstrap_new_block(struct dm_space_map *sm, dm_block_t *b) 626 { 627 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 628 629 /* 630 * We know the entire device is unused. 631 */ 632 if (smm->begin == smm->ll.nr_blocks) 633 return -ENOSPC; 634 635 *b = smm->begin++; 636 637 return 0; 638 } 639 640 static int sm_bootstrap_inc_block(struct dm_space_map *sm, dm_block_t b) 641 { 642 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 643 644 return add_bop(smm, BOP_INC, b); 645 } 646 647 static int sm_bootstrap_dec_block(struct dm_space_map *sm, dm_block_t b) 648 { 649 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 650 651 return add_bop(smm, BOP_DEC, b); 652 } 653 654 static int sm_bootstrap_commit(struct dm_space_map *sm) 655 { 656 return 0; 657 } 658 659 static int sm_bootstrap_root_size(struct dm_space_map *sm, size_t *result) 660 { 661 DMERR("bootstrap doesn't support root_size"); 662 663 return -EINVAL; 664 } 665 666 static int sm_bootstrap_copy_root(struct dm_space_map *sm, void *where, 667 size_t max) 668 { 669 DMERR("bootstrap doesn't support copy_root"); 670 671 return -EINVAL; 672 } 673 674 static struct dm_space_map bootstrap_ops = { 675 .destroy = sm_bootstrap_destroy, 676 .extend = sm_bootstrap_extend, 677 .get_nr_blocks = sm_bootstrap_get_nr_blocks, 678 .get_nr_free = sm_bootstrap_get_nr_free, 679 .get_count = sm_bootstrap_get_count, 680 .count_is_more_than_one = sm_bootstrap_count_is_more_than_one, 681 .set_count = sm_bootstrap_set_count, 682 .inc_block = sm_bootstrap_inc_block, 683 .dec_block = sm_bootstrap_dec_block, 684 .new_block = sm_bootstrap_new_block, 685 .commit = sm_bootstrap_commit, 686 .root_size = sm_bootstrap_root_size, 687 .copy_root = sm_bootstrap_copy_root, 688 .register_threshold_callback = NULL 689 }; 690 691 /*----------------------------------------------------------------*/ 692 693 static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks) 694 { 695 int r, i; 696 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 697 dm_block_t old_len = smm->ll.nr_blocks; 698 699 /* 700 * Flick into a mode where all blocks get allocated in the new area. 701 */ 702 smm->begin = old_len; 703 memcpy(sm, &bootstrap_ops, sizeof(*sm)); 704 705 /* 706 * Extend. 707 */ 708 r = sm_ll_extend(&smm->ll, extra_blocks); 709 if (r) 710 goto out; 711 712 /* 713 * We repeatedly increment then commit until the commit doesn't 714 * allocate any new blocks. 715 */ 716 do { 717 for (i = old_len; !r && i < smm->begin; i++) 718 r = add_bop(smm, BOP_INC, i); 719 720 if (r) 721 goto out; 722 723 old_len = smm->begin; 724 725 r = apply_bops(smm); 726 if (r) { 727 DMERR("%s: apply_bops failed", __func__); 728 goto out; 729 } 730 731 r = sm_ll_commit(&smm->ll); 732 if (r) 733 goto out; 734 735 } while (old_len != smm->begin); 736 737 out: 738 /* 739 * Switch back to normal behaviour. 740 */ 741 memcpy(sm, &ops, sizeof(*sm)); 742 return r; 743 } 744 745 /*----------------------------------------------------------------*/ 746 747 struct dm_space_map *dm_sm_metadata_init(void) 748 { 749 struct sm_metadata *smm; 750 751 smm = kmalloc(sizeof(*smm), GFP_KERNEL); 752 if (!smm) 753 return ERR_PTR(-ENOMEM); 754 755 memcpy(&smm->sm, &ops, sizeof(smm->sm)); 756 757 return &smm->sm; 758 } 759 760 int dm_sm_metadata_create(struct dm_space_map *sm, 761 struct dm_transaction_manager *tm, 762 dm_block_t nr_blocks, 763 dm_block_t superblock) 764 { 765 int r; 766 dm_block_t i; 767 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 768 769 smm->begin = superblock + 1; 770 smm->recursion_count = 0; 771 smm->allocated_this_transaction = 0; 772 brb_init(&smm->uncommitted); 773 threshold_init(&smm->threshold); 774 775 memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm)); 776 777 r = sm_ll_new_metadata(&smm->ll, tm); 778 if (r) 779 return r; 780 781 if (nr_blocks > DM_SM_METADATA_MAX_BLOCKS) 782 nr_blocks = DM_SM_METADATA_MAX_BLOCKS; 783 r = sm_ll_extend(&smm->ll, nr_blocks); 784 if (r) 785 return r; 786 787 memcpy(&smm->sm, &ops, sizeof(smm->sm)); 788 789 /* 790 * Now we need to update the newly created data structures with the 791 * allocated blocks that they were built from. 792 */ 793 for (i = superblock; !r && i < smm->begin; i++) 794 r = add_bop(smm, BOP_INC, i); 795 796 if (r) 797 return r; 798 799 r = apply_bops(smm); 800 if (r) { 801 DMERR("%s: apply_bops failed", __func__); 802 return r; 803 } 804 805 return sm_metadata_commit(sm); 806 } 807 808 int dm_sm_metadata_open(struct dm_space_map *sm, 809 struct dm_transaction_manager *tm, 810 void *root_le, size_t len) 811 { 812 int r; 813 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 814 815 r = sm_ll_open_metadata(&smm->ll, tm, root_le, len); 816 if (r) 817 return r; 818 819 smm->begin = 0; 820 smm->recursion_count = 0; 821 smm->allocated_this_transaction = 0; 822 brb_init(&smm->uncommitted); 823 threshold_init(&smm->threshold); 824 825 memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll)); 826 return 0; 827 } 828