1 /* 2 * Copyright (C) 2011 Red Hat, Inc. 3 * 4 * This file is released under the GPL. 5 */ 6 7 #include "dm-space-map.h" 8 #include "dm-space-map-common.h" 9 #include "dm-space-map-metadata.h" 10 11 #include <linux/list.h> 12 #include <linux/slab.h> 13 #include <linux/device-mapper.h> 14 #include <linux/kernel.h> 15 16 #define DM_MSG_PREFIX "space map metadata" 17 18 /*----------------------------------------------------------------*/ 19 20 /* 21 * An edge triggered threshold. 22 */ 23 struct threshold { 24 bool threshold_set; 25 bool value_set; 26 dm_block_t threshold; 27 dm_block_t current_value; 28 dm_sm_threshold_fn fn; 29 void *context; 30 }; 31 32 static void threshold_init(struct threshold *t) 33 { 34 t->threshold_set = false; 35 t->value_set = false; 36 } 37 38 static void set_threshold(struct threshold *t, dm_block_t value, 39 dm_sm_threshold_fn fn, void *context) 40 { 41 t->threshold_set = true; 42 t->threshold = value; 43 t->fn = fn; 44 t->context = context; 45 } 46 47 static bool below_threshold(struct threshold *t, dm_block_t value) 48 { 49 return t->threshold_set && value <= t->threshold; 50 } 51 52 static bool threshold_already_triggered(struct threshold *t) 53 { 54 return t->value_set && below_threshold(t, t->current_value); 55 } 56 57 static void check_threshold(struct threshold *t, dm_block_t value) 58 { 59 if (below_threshold(t, value) && 60 !threshold_already_triggered(t)) 61 t->fn(t->context); 62 63 t->value_set = true; 64 t->current_value = value; 65 } 66 67 /*----------------------------------------------------------------*/ 68 69 /* 70 * Space map interface. 71 * 72 * The low level disk format is written using the standard btree and 73 * transaction manager. This means that performing disk operations may 74 * cause us to recurse into the space map in order to allocate new blocks. 75 * For this reason we have a pool of pre-allocated blocks large enough to 76 * service any metadata_ll_disk operation. 77 */ 78 79 /* 80 * FIXME: we should calculate this based on the size of the device. 81 * Only the metadata space map needs this functionality. 82 */ 83 #define MAX_RECURSIVE_ALLOCATIONS 1024 84 85 enum block_op_type { 86 BOP_INC, 87 BOP_DEC 88 }; 89 90 struct block_op { 91 enum block_op_type type; 92 dm_block_t block; 93 }; 94 95 struct bop_ring_buffer { 96 unsigned begin; 97 unsigned end; 98 struct block_op bops[MAX_RECURSIVE_ALLOCATIONS + 1]; 99 }; 100 101 static void brb_init(struct bop_ring_buffer *brb) 102 { 103 brb->begin = 0; 104 brb->end = 0; 105 } 106 107 static bool brb_empty(struct bop_ring_buffer *brb) 108 { 109 return brb->begin == brb->end; 110 } 111 112 static unsigned brb_next(struct bop_ring_buffer *brb, unsigned old) 113 { 114 unsigned r = old + 1; 115 return r >= ARRAY_SIZE(brb->bops) ? 0 : r; 116 } 117 118 static int brb_push(struct bop_ring_buffer *brb, 119 enum block_op_type type, dm_block_t b) 120 { 121 struct block_op *bop; 122 unsigned next = brb_next(brb, brb->end); 123 124 /* 125 * We don't allow the last bop to be filled, this way we can 126 * differentiate between full and empty. 127 */ 128 if (next == brb->begin) 129 return -ENOMEM; 130 131 bop = brb->bops + brb->end; 132 bop->type = type; 133 bop->block = b; 134 135 brb->end = next; 136 137 return 0; 138 } 139 140 static int brb_peek(struct bop_ring_buffer *brb, struct block_op *result) 141 { 142 struct block_op *bop; 143 144 if (brb_empty(brb)) 145 return -ENODATA; 146 147 bop = brb->bops + brb->begin; 148 result->type = bop->type; 149 result->block = bop->block; 150 151 return 0; 152 } 153 154 static int brb_pop(struct bop_ring_buffer *brb) 155 { 156 if (brb_empty(brb)) 157 return -ENODATA; 158 159 brb->begin = brb_next(brb, brb->begin); 160 161 return 0; 162 } 163 164 /*----------------------------------------------------------------*/ 165 166 struct sm_metadata { 167 struct dm_space_map sm; 168 169 struct ll_disk ll; 170 struct ll_disk old_ll; 171 172 dm_block_t begin; 173 174 unsigned recursion_count; 175 unsigned allocated_this_transaction; 176 struct bop_ring_buffer uncommitted; 177 178 struct threshold threshold; 179 }; 180 181 static int add_bop(struct sm_metadata *smm, enum block_op_type type, dm_block_t b) 182 { 183 int r = brb_push(&smm->uncommitted, type, b); 184 185 if (r) { 186 DMERR("too many recursive allocations"); 187 return -ENOMEM; 188 } 189 190 return 0; 191 } 192 193 static int commit_bop(struct sm_metadata *smm, struct block_op *op) 194 { 195 int r = 0; 196 enum allocation_event ev; 197 198 switch (op->type) { 199 case BOP_INC: 200 r = sm_ll_inc(&smm->ll, op->block, &ev); 201 break; 202 203 case BOP_DEC: 204 r = sm_ll_dec(&smm->ll, op->block, &ev); 205 break; 206 } 207 208 return r; 209 } 210 211 static void in(struct sm_metadata *smm) 212 { 213 smm->recursion_count++; 214 } 215 216 static int apply_bops(struct sm_metadata *smm) 217 { 218 int r = 0; 219 220 while (!brb_empty(&smm->uncommitted)) { 221 struct block_op bop; 222 223 r = brb_peek(&smm->uncommitted, &bop); 224 if (r) { 225 DMERR("bug in bop ring buffer"); 226 break; 227 } 228 229 r = commit_bop(smm, &bop); 230 if (r) 231 break; 232 233 brb_pop(&smm->uncommitted); 234 } 235 236 return r; 237 } 238 239 static int out(struct sm_metadata *smm) 240 { 241 int r = 0; 242 243 /* 244 * If we're not recursing then very bad things are happening. 245 */ 246 if (!smm->recursion_count) { 247 DMERR("lost track of recursion depth"); 248 return -ENOMEM; 249 } 250 251 if (smm->recursion_count == 1) 252 r = apply_bops(smm); 253 254 smm->recursion_count--; 255 256 return r; 257 } 258 259 /* 260 * When using the out() function above, we often want to combine an error 261 * code for the operation run in the recursive context with that from 262 * out(). 263 */ 264 static int combine_errors(int r1, int r2) 265 { 266 return r1 ? r1 : r2; 267 } 268 269 static int recursing(struct sm_metadata *smm) 270 { 271 return smm->recursion_count; 272 } 273 274 static void sm_metadata_destroy(struct dm_space_map *sm) 275 { 276 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 277 278 kfree(smm); 279 } 280 281 static int sm_metadata_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count) 282 { 283 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 284 285 *count = smm->ll.nr_blocks; 286 287 return 0; 288 } 289 290 static int sm_metadata_get_nr_free(struct dm_space_map *sm, dm_block_t *count) 291 { 292 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 293 294 *count = smm->old_ll.nr_blocks - smm->old_ll.nr_allocated - 295 smm->allocated_this_transaction; 296 297 return 0; 298 } 299 300 static int sm_metadata_get_count(struct dm_space_map *sm, dm_block_t b, 301 uint32_t *result) 302 { 303 int r; 304 unsigned i; 305 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 306 unsigned adjustment = 0; 307 308 /* 309 * We may have some uncommitted adjustments to add. This list 310 * should always be really short. 311 */ 312 for (i = smm->uncommitted.begin; 313 i != smm->uncommitted.end; 314 i = brb_next(&smm->uncommitted, i)) { 315 struct block_op *op = smm->uncommitted.bops + i; 316 317 if (op->block != b) 318 continue; 319 320 switch (op->type) { 321 case BOP_INC: 322 adjustment++; 323 break; 324 325 case BOP_DEC: 326 adjustment--; 327 break; 328 } 329 } 330 331 r = sm_ll_lookup(&smm->ll, b, result); 332 if (r) 333 return r; 334 335 *result += adjustment; 336 337 return 0; 338 } 339 340 static int sm_metadata_count_is_more_than_one(struct dm_space_map *sm, 341 dm_block_t b, int *result) 342 { 343 int r, adjustment = 0; 344 unsigned i; 345 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 346 uint32_t rc; 347 348 /* 349 * We may have some uncommitted adjustments to add. This list 350 * should always be really short. 351 */ 352 for (i = smm->uncommitted.begin; 353 i != smm->uncommitted.end; 354 i = brb_next(&smm->uncommitted, i)) { 355 356 struct block_op *op = smm->uncommitted.bops + i; 357 358 if (op->block != b) 359 continue; 360 361 switch (op->type) { 362 case BOP_INC: 363 adjustment++; 364 break; 365 366 case BOP_DEC: 367 adjustment--; 368 break; 369 } 370 } 371 372 if (adjustment > 1) { 373 *result = 1; 374 return 0; 375 } 376 377 r = sm_ll_lookup_bitmap(&smm->ll, b, &rc); 378 if (r) 379 return r; 380 381 if (rc == 3) 382 /* 383 * We err on the side of caution, and always return true. 384 */ 385 *result = 1; 386 else 387 *result = rc + adjustment > 1; 388 389 return 0; 390 } 391 392 static int sm_metadata_set_count(struct dm_space_map *sm, dm_block_t b, 393 uint32_t count) 394 { 395 int r, r2; 396 enum allocation_event ev; 397 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 398 399 if (smm->recursion_count) { 400 DMERR("cannot recurse set_count()"); 401 return -EINVAL; 402 } 403 404 in(smm); 405 r = sm_ll_insert(&smm->ll, b, count, &ev); 406 r2 = out(smm); 407 408 return combine_errors(r, r2); 409 } 410 411 static int sm_metadata_inc_block(struct dm_space_map *sm, dm_block_t b) 412 { 413 int r, r2 = 0; 414 enum allocation_event ev; 415 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 416 417 if (recursing(smm)) 418 r = add_bop(smm, BOP_INC, b); 419 else { 420 in(smm); 421 r = sm_ll_inc(&smm->ll, b, &ev); 422 r2 = out(smm); 423 } 424 425 return combine_errors(r, r2); 426 } 427 428 static int sm_metadata_dec_block(struct dm_space_map *sm, dm_block_t b) 429 { 430 int r, r2 = 0; 431 enum allocation_event ev; 432 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 433 434 if (recursing(smm)) 435 r = add_bop(smm, BOP_DEC, b); 436 else { 437 in(smm); 438 r = sm_ll_dec(&smm->ll, b, &ev); 439 r2 = out(smm); 440 } 441 442 return combine_errors(r, r2); 443 } 444 445 static int sm_metadata_new_block_(struct dm_space_map *sm, dm_block_t *b) 446 { 447 int r, r2 = 0; 448 enum allocation_event ev; 449 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 450 451 r = sm_ll_find_free_block(&smm->old_ll, smm->begin, smm->old_ll.nr_blocks, b); 452 if (r) 453 return r; 454 455 smm->begin = *b + 1; 456 457 if (recursing(smm)) 458 r = add_bop(smm, BOP_INC, *b); 459 else { 460 in(smm); 461 r = sm_ll_inc(&smm->ll, *b, &ev); 462 r2 = out(smm); 463 } 464 465 if (!r) 466 smm->allocated_this_transaction++; 467 468 return combine_errors(r, r2); 469 } 470 471 static int sm_metadata_new_block(struct dm_space_map *sm, dm_block_t *b) 472 { 473 dm_block_t count; 474 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 475 476 int r = sm_metadata_new_block_(sm, b); 477 if (r) { 478 DMERR_LIMIT("unable to allocate new metadata block"); 479 return r; 480 } 481 482 r = sm_metadata_get_nr_free(sm, &count); 483 if (r) { 484 DMERR_LIMIT("couldn't get free block count"); 485 return r; 486 } 487 488 check_threshold(&smm->threshold, count); 489 490 return r; 491 } 492 493 static int sm_metadata_commit(struct dm_space_map *sm) 494 { 495 int r; 496 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 497 498 r = sm_ll_commit(&smm->ll); 499 if (r) 500 return r; 501 502 memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll)); 503 smm->begin = 0; 504 smm->allocated_this_transaction = 0; 505 506 return 0; 507 } 508 509 static int sm_metadata_register_threshold_callback(struct dm_space_map *sm, 510 dm_block_t threshold, 511 dm_sm_threshold_fn fn, 512 void *context) 513 { 514 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 515 516 set_threshold(&smm->threshold, threshold, fn, context); 517 518 return 0; 519 } 520 521 static int sm_metadata_root_size(struct dm_space_map *sm, size_t *result) 522 { 523 *result = sizeof(struct disk_sm_root); 524 525 return 0; 526 } 527 528 static int sm_metadata_copy_root(struct dm_space_map *sm, void *where_le, size_t max) 529 { 530 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 531 struct disk_sm_root root_le; 532 533 root_le.nr_blocks = cpu_to_le64(smm->ll.nr_blocks); 534 root_le.nr_allocated = cpu_to_le64(smm->ll.nr_allocated); 535 root_le.bitmap_root = cpu_to_le64(smm->ll.bitmap_root); 536 root_le.ref_count_root = cpu_to_le64(smm->ll.ref_count_root); 537 538 if (max < sizeof(root_le)) 539 return -ENOSPC; 540 541 memcpy(where_le, &root_le, sizeof(root_le)); 542 543 return 0; 544 } 545 546 static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks); 547 548 static const struct dm_space_map ops = { 549 .destroy = sm_metadata_destroy, 550 .extend = sm_metadata_extend, 551 .get_nr_blocks = sm_metadata_get_nr_blocks, 552 .get_nr_free = sm_metadata_get_nr_free, 553 .get_count = sm_metadata_get_count, 554 .count_is_more_than_one = sm_metadata_count_is_more_than_one, 555 .set_count = sm_metadata_set_count, 556 .inc_block = sm_metadata_inc_block, 557 .dec_block = sm_metadata_dec_block, 558 .new_block = sm_metadata_new_block, 559 .commit = sm_metadata_commit, 560 .root_size = sm_metadata_root_size, 561 .copy_root = sm_metadata_copy_root, 562 .register_threshold_callback = sm_metadata_register_threshold_callback 563 }; 564 565 /*----------------------------------------------------------------*/ 566 567 /* 568 * When a new space map is created that manages its own space. We use 569 * this tiny bootstrap allocator. 570 */ 571 static void sm_bootstrap_destroy(struct dm_space_map *sm) 572 { 573 } 574 575 static int sm_bootstrap_extend(struct dm_space_map *sm, dm_block_t extra_blocks) 576 { 577 DMERR("bootstrap doesn't support extend"); 578 579 return -EINVAL; 580 } 581 582 static int sm_bootstrap_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count) 583 { 584 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 585 586 *count = smm->ll.nr_blocks; 587 588 return 0; 589 } 590 591 static int sm_bootstrap_get_nr_free(struct dm_space_map *sm, dm_block_t *count) 592 { 593 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 594 595 *count = smm->ll.nr_blocks - smm->begin; 596 597 return 0; 598 } 599 600 static int sm_bootstrap_get_count(struct dm_space_map *sm, dm_block_t b, 601 uint32_t *result) 602 { 603 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 604 605 *result = (b < smm->begin) ? 1 : 0; 606 607 return 0; 608 } 609 610 static int sm_bootstrap_count_is_more_than_one(struct dm_space_map *sm, 611 dm_block_t b, int *result) 612 { 613 *result = 0; 614 615 return 0; 616 } 617 618 static int sm_bootstrap_set_count(struct dm_space_map *sm, dm_block_t b, 619 uint32_t count) 620 { 621 DMERR("bootstrap doesn't support set_count"); 622 623 return -EINVAL; 624 } 625 626 static int sm_bootstrap_new_block(struct dm_space_map *sm, dm_block_t *b) 627 { 628 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 629 630 /* 631 * We know the entire device is unused. 632 */ 633 if (smm->begin == smm->ll.nr_blocks) 634 return -ENOSPC; 635 636 *b = smm->begin++; 637 638 return 0; 639 } 640 641 static int sm_bootstrap_inc_block(struct dm_space_map *sm, dm_block_t b) 642 { 643 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 644 645 return add_bop(smm, BOP_INC, b); 646 } 647 648 static int sm_bootstrap_dec_block(struct dm_space_map *sm, dm_block_t b) 649 { 650 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 651 652 return add_bop(smm, BOP_DEC, b); 653 } 654 655 static int sm_bootstrap_commit(struct dm_space_map *sm) 656 { 657 return 0; 658 } 659 660 static int sm_bootstrap_root_size(struct dm_space_map *sm, size_t *result) 661 { 662 DMERR("bootstrap doesn't support root_size"); 663 664 return -EINVAL; 665 } 666 667 static int sm_bootstrap_copy_root(struct dm_space_map *sm, void *where, 668 size_t max) 669 { 670 DMERR("bootstrap doesn't support copy_root"); 671 672 return -EINVAL; 673 } 674 675 static const struct dm_space_map bootstrap_ops = { 676 .destroy = sm_bootstrap_destroy, 677 .extend = sm_bootstrap_extend, 678 .get_nr_blocks = sm_bootstrap_get_nr_blocks, 679 .get_nr_free = sm_bootstrap_get_nr_free, 680 .get_count = sm_bootstrap_get_count, 681 .count_is_more_than_one = sm_bootstrap_count_is_more_than_one, 682 .set_count = sm_bootstrap_set_count, 683 .inc_block = sm_bootstrap_inc_block, 684 .dec_block = sm_bootstrap_dec_block, 685 .new_block = sm_bootstrap_new_block, 686 .commit = sm_bootstrap_commit, 687 .root_size = sm_bootstrap_root_size, 688 .copy_root = sm_bootstrap_copy_root, 689 .register_threshold_callback = NULL 690 }; 691 692 /*----------------------------------------------------------------*/ 693 694 static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks) 695 { 696 int r, i; 697 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 698 dm_block_t old_len = smm->ll.nr_blocks; 699 700 /* 701 * Flick into a mode where all blocks get allocated in the new area. 702 */ 703 smm->begin = old_len; 704 memcpy(sm, &bootstrap_ops, sizeof(*sm)); 705 706 /* 707 * Extend. 708 */ 709 r = sm_ll_extend(&smm->ll, extra_blocks); 710 if (r) 711 goto out; 712 713 /* 714 * We repeatedly increment then commit until the commit doesn't 715 * allocate any new blocks. 716 */ 717 do { 718 for (i = old_len; !r && i < smm->begin; i++) 719 r = add_bop(smm, BOP_INC, i); 720 721 if (r) 722 goto out; 723 724 old_len = smm->begin; 725 726 r = apply_bops(smm); 727 if (r) { 728 DMERR("%s: apply_bops failed", __func__); 729 goto out; 730 } 731 732 r = sm_ll_commit(&smm->ll); 733 if (r) 734 goto out; 735 736 } while (old_len != smm->begin); 737 738 out: 739 /* 740 * Switch back to normal behaviour. 741 */ 742 memcpy(sm, &ops, sizeof(*sm)); 743 return r; 744 } 745 746 /*----------------------------------------------------------------*/ 747 748 struct dm_space_map *dm_sm_metadata_init(void) 749 { 750 struct sm_metadata *smm; 751 752 smm = kmalloc(sizeof(*smm), GFP_KERNEL); 753 if (!smm) 754 return ERR_PTR(-ENOMEM); 755 756 memcpy(&smm->sm, &ops, sizeof(smm->sm)); 757 758 return &smm->sm; 759 } 760 761 int dm_sm_metadata_create(struct dm_space_map *sm, 762 struct dm_transaction_manager *tm, 763 dm_block_t nr_blocks, 764 dm_block_t superblock) 765 { 766 int r; 767 dm_block_t i; 768 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 769 770 smm->begin = superblock + 1; 771 smm->recursion_count = 0; 772 smm->allocated_this_transaction = 0; 773 brb_init(&smm->uncommitted); 774 threshold_init(&smm->threshold); 775 776 memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm)); 777 778 r = sm_ll_new_metadata(&smm->ll, tm); 779 if (!r) { 780 if (nr_blocks > DM_SM_METADATA_MAX_BLOCKS) 781 nr_blocks = DM_SM_METADATA_MAX_BLOCKS; 782 r = sm_ll_extend(&smm->ll, nr_blocks); 783 } 784 memcpy(&smm->sm, &ops, sizeof(smm->sm)); 785 if (r) 786 return r; 787 788 /* 789 * Now we need to update the newly created data structures with the 790 * allocated blocks that they were built from. 791 */ 792 for (i = superblock; !r && i < smm->begin; i++) 793 r = add_bop(smm, BOP_INC, i); 794 795 if (r) 796 return r; 797 798 r = apply_bops(smm); 799 if (r) { 800 DMERR("%s: apply_bops failed", __func__); 801 return r; 802 } 803 804 return sm_metadata_commit(sm); 805 } 806 807 int dm_sm_metadata_open(struct dm_space_map *sm, 808 struct dm_transaction_manager *tm, 809 void *root_le, size_t len) 810 { 811 int r; 812 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 813 814 r = sm_ll_open_metadata(&smm->ll, tm, root_le, len); 815 if (r) 816 return r; 817 818 smm->begin = 0; 819 smm->recursion_count = 0; 820 smm->allocated_this_transaction = 0; 821 brb_init(&smm->uncommitted); 822 threshold_init(&smm->threshold); 823 824 memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll)); 825 return 0; 826 } 827