1 /* 2 * Copyright (C) 2011 Red Hat, Inc. 3 * 4 * This file is released under the GPL. 5 */ 6 7 #include "dm-space-map.h" 8 #include "dm-space-map-common.h" 9 #include "dm-space-map-metadata.h" 10 11 #include <linux/list.h> 12 #include <linux/slab.h> 13 #include <linux/device-mapper.h> 14 15 #define DM_MSG_PREFIX "space map metadata" 16 17 /*----------------------------------------------------------------*/ 18 19 /* 20 * An edge triggered threshold. 21 */ 22 struct threshold { 23 bool threshold_set; 24 bool value_set; 25 dm_block_t threshold; 26 dm_block_t current_value; 27 dm_sm_threshold_fn fn; 28 void *context; 29 }; 30 31 static void threshold_init(struct threshold *t) 32 { 33 t->threshold_set = false; 34 t->value_set = false; 35 } 36 37 static void set_threshold(struct threshold *t, dm_block_t value, 38 dm_sm_threshold_fn fn, void *context) 39 { 40 t->threshold_set = true; 41 t->threshold = value; 42 t->fn = fn; 43 t->context = context; 44 } 45 46 static bool below_threshold(struct threshold *t, dm_block_t value) 47 { 48 return t->threshold_set && value <= t->threshold; 49 } 50 51 static bool threshold_already_triggered(struct threshold *t) 52 { 53 return t->value_set && below_threshold(t, t->current_value); 54 } 55 56 static void check_threshold(struct threshold *t, dm_block_t value) 57 { 58 if (below_threshold(t, value) && 59 !threshold_already_triggered(t)) 60 t->fn(t->context); 61 62 t->value_set = true; 63 t->current_value = value; 64 } 65 66 /*----------------------------------------------------------------*/ 67 68 /* 69 * Space map interface. 70 * 71 * The low level disk format is written using the standard btree and 72 * transaction manager. This means that performing disk operations may 73 * cause us to recurse into the space map in order to allocate new blocks. 74 * For this reason we have a pool of pre-allocated blocks large enough to 75 * service any metadata_ll_disk operation. 76 */ 77 78 /* 79 * FIXME: we should calculate this based on the size of the device. 80 * Only the metadata space map needs this functionality. 81 */ 82 #define MAX_RECURSIVE_ALLOCATIONS 1024 83 84 enum block_op_type { 85 BOP_INC, 86 BOP_DEC 87 }; 88 89 struct block_op { 90 enum block_op_type type; 91 dm_block_t block; 92 }; 93 94 struct bop_ring_buffer { 95 unsigned begin; 96 unsigned end; 97 struct block_op bops[MAX_RECURSIVE_ALLOCATIONS + 1]; 98 }; 99 100 static void brb_init(struct bop_ring_buffer *brb) 101 { 102 brb->begin = 0; 103 brb->end = 0; 104 } 105 106 static bool brb_empty(struct bop_ring_buffer *brb) 107 { 108 return brb->begin == brb->end; 109 } 110 111 static unsigned brb_next(struct bop_ring_buffer *brb, unsigned old) 112 { 113 unsigned r = old + 1; 114 return (r >= (sizeof(brb->bops) / sizeof(*brb->bops))) ? 0 : r; 115 } 116 117 static int brb_push(struct bop_ring_buffer *brb, 118 enum block_op_type type, dm_block_t b) 119 { 120 struct block_op *bop; 121 unsigned next = brb_next(brb, brb->end); 122 123 /* 124 * We don't allow the last bop to be filled, this way we can 125 * differentiate between full and empty. 126 */ 127 if (next == brb->begin) 128 return -ENOMEM; 129 130 bop = brb->bops + brb->end; 131 bop->type = type; 132 bop->block = b; 133 134 brb->end = next; 135 136 return 0; 137 } 138 139 static int brb_pop(struct bop_ring_buffer *brb, struct block_op *result) 140 { 141 struct block_op *bop; 142 143 if (brb_empty(brb)) 144 return -ENODATA; 145 146 bop = brb->bops + brb->begin; 147 result->type = bop->type; 148 result->block = bop->block; 149 150 brb->begin = brb_next(brb, brb->begin); 151 152 return 0; 153 } 154 155 /*----------------------------------------------------------------*/ 156 157 struct sm_metadata { 158 struct dm_space_map sm; 159 160 struct ll_disk ll; 161 struct ll_disk old_ll; 162 163 dm_block_t begin; 164 165 unsigned recursion_count; 166 unsigned allocated_this_transaction; 167 struct bop_ring_buffer uncommitted; 168 169 struct threshold threshold; 170 }; 171 172 static int add_bop(struct sm_metadata *smm, enum block_op_type type, dm_block_t b) 173 { 174 int r = brb_push(&smm->uncommitted, type, b); 175 176 if (r) { 177 DMERR("too many recursive allocations"); 178 return -ENOMEM; 179 } 180 181 return 0; 182 } 183 184 static int commit_bop(struct sm_metadata *smm, struct block_op *op) 185 { 186 int r = 0; 187 enum allocation_event ev; 188 189 switch (op->type) { 190 case BOP_INC: 191 r = sm_ll_inc(&smm->ll, op->block, &ev); 192 break; 193 194 case BOP_DEC: 195 r = sm_ll_dec(&smm->ll, op->block, &ev); 196 break; 197 } 198 199 return r; 200 } 201 202 static void in(struct sm_metadata *smm) 203 { 204 smm->recursion_count++; 205 } 206 207 static int apply_bops(struct sm_metadata *smm) 208 { 209 int r = 0; 210 211 while (!brb_empty(&smm->uncommitted)) { 212 struct block_op bop; 213 214 r = brb_pop(&smm->uncommitted, &bop); 215 if (r) { 216 DMERR("bug in bop ring buffer"); 217 break; 218 } 219 220 r = commit_bop(smm, &bop); 221 if (r) 222 break; 223 } 224 225 return r; 226 } 227 228 static int out(struct sm_metadata *smm) 229 { 230 int r = 0; 231 232 /* 233 * If we're not recursing then very bad things are happening. 234 */ 235 if (!smm->recursion_count) { 236 DMERR("lost track of recursion depth"); 237 return -ENOMEM; 238 } 239 240 if (smm->recursion_count == 1) 241 apply_bops(smm); 242 243 smm->recursion_count--; 244 245 return r; 246 } 247 248 /* 249 * When using the out() function above, we often want to combine an error 250 * code for the operation run in the recursive context with that from 251 * out(). 252 */ 253 static int combine_errors(int r1, int r2) 254 { 255 return r1 ? r1 : r2; 256 } 257 258 static int recursing(struct sm_metadata *smm) 259 { 260 return smm->recursion_count; 261 } 262 263 static void sm_metadata_destroy(struct dm_space_map *sm) 264 { 265 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 266 267 kfree(smm); 268 } 269 270 static int sm_metadata_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count) 271 { 272 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 273 274 *count = smm->ll.nr_blocks; 275 276 return 0; 277 } 278 279 static int sm_metadata_get_nr_free(struct dm_space_map *sm, dm_block_t *count) 280 { 281 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 282 283 *count = smm->old_ll.nr_blocks - smm->old_ll.nr_allocated - 284 smm->allocated_this_transaction; 285 286 return 0; 287 } 288 289 static int sm_metadata_get_count(struct dm_space_map *sm, dm_block_t b, 290 uint32_t *result) 291 { 292 int r; 293 unsigned i; 294 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 295 unsigned adjustment = 0; 296 297 /* 298 * We may have some uncommitted adjustments to add. This list 299 * should always be really short. 300 */ 301 for (i = smm->uncommitted.begin; 302 i != smm->uncommitted.end; 303 i = brb_next(&smm->uncommitted, i)) { 304 struct block_op *op = smm->uncommitted.bops + i; 305 306 if (op->block != b) 307 continue; 308 309 switch (op->type) { 310 case BOP_INC: 311 adjustment++; 312 break; 313 314 case BOP_DEC: 315 adjustment--; 316 break; 317 } 318 } 319 320 r = sm_ll_lookup(&smm->ll, b, result); 321 if (r) 322 return r; 323 324 *result += adjustment; 325 326 return 0; 327 } 328 329 static int sm_metadata_count_is_more_than_one(struct dm_space_map *sm, 330 dm_block_t b, int *result) 331 { 332 int r, adjustment = 0; 333 unsigned i; 334 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 335 uint32_t rc; 336 337 /* 338 * We may have some uncommitted adjustments to add. This list 339 * should always be really short. 340 */ 341 for (i = smm->uncommitted.begin; 342 i != smm->uncommitted.end; 343 i = brb_next(&smm->uncommitted, i)) { 344 345 struct block_op *op = smm->uncommitted.bops + i; 346 347 if (op->block != b) 348 continue; 349 350 switch (op->type) { 351 case BOP_INC: 352 adjustment++; 353 break; 354 355 case BOP_DEC: 356 adjustment--; 357 break; 358 } 359 } 360 361 if (adjustment > 1) { 362 *result = 1; 363 return 0; 364 } 365 366 r = sm_ll_lookup_bitmap(&smm->ll, b, &rc); 367 if (r) 368 return r; 369 370 if (rc == 3) 371 /* 372 * We err on the side of caution, and always return true. 373 */ 374 *result = 1; 375 else 376 *result = rc + adjustment > 1; 377 378 return 0; 379 } 380 381 static int sm_metadata_set_count(struct dm_space_map *sm, dm_block_t b, 382 uint32_t count) 383 { 384 int r, r2; 385 enum allocation_event ev; 386 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 387 388 if (smm->recursion_count) { 389 DMERR("cannot recurse set_count()"); 390 return -EINVAL; 391 } 392 393 in(smm); 394 r = sm_ll_insert(&smm->ll, b, count, &ev); 395 r2 = out(smm); 396 397 return combine_errors(r, r2); 398 } 399 400 static int sm_metadata_inc_block(struct dm_space_map *sm, dm_block_t b) 401 { 402 int r, r2 = 0; 403 enum allocation_event ev; 404 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 405 406 if (recursing(smm)) 407 r = add_bop(smm, BOP_INC, b); 408 else { 409 in(smm); 410 r = sm_ll_inc(&smm->ll, b, &ev); 411 r2 = out(smm); 412 } 413 414 return combine_errors(r, r2); 415 } 416 417 static int sm_metadata_dec_block(struct dm_space_map *sm, dm_block_t b) 418 { 419 int r, r2 = 0; 420 enum allocation_event ev; 421 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 422 423 if (recursing(smm)) 424 r = add_bop(smm, BOP_DEC, b); 425 else { 426 in(smm); 427 r = sm_ll_dec(&smm->ll, b, &ev); 428 r2 = out(smm); 429 } 430 431 return combine_errors(r, r2); 432 } 433 434 static int sm_metadata_new_block_(struct dm_space_map *sm, dm_block_t *b) 435 { 436 int r, r2 = 0; 437 enum allocation_event ev; 438 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 439 440 r = sm_ll_find_free_block(&smm->old_ll, smm->begin, smm->old_ll.nr_blocks, b); 441 if (r) 442 return r; 443 444 smm->begin = *b + 1; 445 446 if (recursing(smm)) 447 r = add_bop(smm, BOP_INC, *b); 448 else { 449 in(smm); 450 r = sm_ll_inc(&smm->ll, *b, &ev); 451 r2 = out(smm); 452 } 453 454 if (!r) 455 smm->allocated_this_transaction++; 456 457 return combine_errors(r, r2); 458 } 459 460 static int sm_metadata_new_block(struct dm_space_map *sm, dm_block_t *b) 461 { 462 dm_block_t count; 463 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 464 465 int r = sm_metadata_new_block_(sm, b); 466 if (r) { 467 DMERR_LIMIT("unable to allocate new metadata block"); 468 return r; 469 } 470 471 r = sm_metadata_get_nr_free(sm, &count); 472 if (r) { 473 DMERR_LIMIT("couldn't get free block count"); 474 return r; 475 } 476 477 check_threshold(&smm->threshold, count); 478 479 return r; 480 } 481 482 static int sm_metadata_commit(struct dm_space_map *sm) 483 { 484 int r; 485 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 486 487 r = sm_ll_commit(&smm->ll); 488 if (r) 489 return r; 490 491 memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll)); 492 smm->begin = 0; 493 smm->allocated_this_transaction = 0; 494 495 return 0; 496 } 497 498 static int sm_metadata_register_threshold_callback(struct dm_space_map *sm, 499 dm_block_t threshold, 500 dm_sm_threshold_fn fn, 501 void *context) 502 { 503 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 504 505 set_threshold(&smm->threshold, threshold, fn, context); 506 507 return 0; 508 } 509 510 static int sm_metadata_root_size(struct dm_space_map *sm, size_t *result) 511 { 512 *result = sizeof(struct disk_sm_root); 513 514 return 0; 515 } 516 517 static int sm_metadata_copy_root(struct dm_space_map *sm, void *where_le, size_t max) 518 { 519 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 520 struct disk_sm_root root_le; 521 522 root_le.nr_blocks = cpu_to_le64(smm->ll.nr_blocks); 523 root_le.nr_allocated = cpu_to_le64(smm->ll.nr_allocated); 524 root_le.bitmap_root = cpu_to_le64(smm->ll.bitmap_root); 525 root_le.ref_count_root = cpu_to_le64(smm->ll.ref_count_root); 526 527 if (max < sizeof(root_le)) 528 return -ENOSPC; 529 530 memcpy(where_le, &root_le, sizeof(root_le)); 531 532 return 0; 533 } 534 535 static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks); 536 537 static struct dm_space_map ops = { 538 .destroy = sm_metadata_destroy, 539 .extend = sm_metadata_extend, 540 .get_nr_blocks = sm_metadata_get_nr_blocks, 541 .get_nr_free = sm_metadata_get_nr_free, 542 .get_count = sm_metadata_get_count, 543 .count_is_more_than_one = sm_metadata_count_is_more_than_one, 544 .set_count = sm_metadata_set_count, 545 .inc_block = sm_metadata_inc_block, 546 .dec_block = sm_metadata_dec_block, 547 .new_block = sm_metadata_new_block, 548 .commit = sm_metadata_commit, 549 .root_size = sm_metadata_root_size, 550 .copy_root = sm_metadata_copy_root, 551 .register_threshold_callback = sm_metadata_register_threshold_callback 552 }; 553 554 /*----------------------------------------------------------------*/ 555 556 /* 557 * When a new space map is created that manages its own space. We use 558 * this tiny bootstrap allocator. 559 */ 560 static void sm_bootstrap_destroy(struct dm_space_map *sm) 561 { 562 } 563 564 static int sm_bootstrap_extend(struct dm_space_map *sm, dm_block_t extra_blocks) 565 { 566 DMERR("bootstrap doesn't support extend"); 567 568 return -EINVAL; 569 } 570 571 static int sm_bootstrap_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count) 572 { 573 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 574 575 *count = smm->ll.nr_blocks; 576 577 return 0; 578 } 579 580 static int sm_bootstrap_get_nr_free(struct dm_space_map *sm, dm_block_t *count) 581 { 582 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 583 584 *count = smm->ll.nr_blocks - smm->begin; 585 586 return 0; 587 } 588 589 static int sm_bootstrap_get_count(struct dm_space_map *sm, dm_block_t b, 590 uint32_t *result) 591 { 592 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 593 594 *result = (b < smm->begin) ? 1 : 0; 595 596 return 0; 597 } 598 599 static int sm_bootstrap_count_is_more_than_one(struct dm_space_map *sm, 600 dm_block_t b, int *result) 601 { 602 *result = 0; 603 604 return 0; 605 } 606 607 static int sm_bootstrap_set_count(struct dm_space_map *sm, dm_block_t b, 608 uint32_t count) 609 { 610 DMERR("bootstrap doesn't support set_count"); 611 612 return -EINVAL; 613 } 614 615 static int sm_bootstrap_new_block(struct dm_space_map *sm, dm_block_t *b) 616 { 617 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 618 619 /* 620 * We know the entire device is unused. 621 */ 622 if (smm->begin == smm->ll.nr_blocks) 623 return -ENOSPC; 624 625 *b = smm->begin++; 626 627 return 0; 628 } 629 630 static int sm_bootstrap_inc_block(struct dm_space_map *sm, dm_block_t b) 631 { 632 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 633 634 return add_bop(smm, BOP_INC, b); 635 } 636 637 static int sm_bootstrap_dec_block(struct dm_space_map *sm, dm_block_t b) 638 { 639 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 640 641 return add_bop(smm, BOP_DEC, b); 642 } 643 644 static int sm_bootstrap_commit(struct dm_space_map *sm) 645 { 646 return 0; 647 } 648 649 static int sm_bootstrap_root_size(struct dm_space_map *sm, size_t *result) 650 { 651 DMERR("bootstrap doesn't support root_size"); 652 653 return -EINVAL; 654 } 655 656 static int sm_bootstrap_copy_root(struct dm_space_map *sm, void *where, 657 size_t max) 658 { 659 DMERR("bootstrap doesn't support copy_root"); 660 661 return -EINVAL; 662 } 663 664 static struct dm_space_map bootstrap_ops = { 665 .destroy = sm_bootstrap_destroy, 666 .extend = sm_bootstrap_extend, 667 .get_nr_blocks = sm_bootstrap_get_nr_blocks, 668 .get_nr_free = sm_bootstrap_get_nr_free, 669 .get_count = sm_bootstrap_get_count, 670 .count_is_more_than_one = sm_bootstrap_count_is_more_than_one, 671 .set_count = sm_bootstrap_set_count, 672 .inc_block = sm_bootstrap_inc_block, 673 .dec_block = sm_bootstrap_dec_block, 674 .new_block = sm_bootstrap_new_block, 675 .commit = sm_bootstrap_commit, 676 .root_size = sm_bootstrap_root_size, 677 .copy_root = sm_bootstrap_copy_root, 678 .register_threshold_callback = NULL 679 }; 680 681 /*----------------------------------------------------------------*/ 682 683 static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks) 684 { 685 int r, i; 686 enum allocation_event ev; 687 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 688 dm_block_t old_len = smm->ll.nr_blocks; 689 690 /* 691 * Flick into a mode where all blocks get allocated in the new area. 692 */ 693 smm->begin = old_len; 694 memcpy(sm, &bootstrap_ops, sizeof(*sm)); 695 696 /* 697 * Extend. 698 */ 699 r = sm_ll_extend(&smm->ll, extra_blocks); 700 if (r) 701 goto out; 702 703 /* 704 * We repeatedly increment then commit until the commit doesn't 705 * allocate any new blocks. 706 */ 707 do { 708 for (i = old_len; !r && i < smm->begin; i++) { 709 r = sm_ll_inc(&smm->ll, i, &ev); 710 if (r) 711 goto out; 712 } 713 old_len = smm->begin; 714 715 r = apply_bops(smm); 716 if (r) { 717 DMERR("%s: apply_bops failed", __func__); 718 goto out; 719 } 720 721 r = sm_ll_commit(&smm->ll); 722 if (r) 723 goto out; 724 725 } while (old_len != smm->begin); 726 727 out: 728 /* 729 * Switch back to normal behaviour. 730 */ 731 memcpy(sm, &ops, sizeof(*sm)); 732 return r; 733 } 734 735 /*----------------------------------------------------------------*/ 736 737 struct dm_space_map *dm_sm_metadata_init(void) 738 { 739 struct sm_metadata *smm; 740 741 smm = kmalloc(sizeof(*smm), GFP_KERNEL); 742 if (!smm) 743 return ERR_PTR(-ENOMEM); 744 745 memcpy(&smm->sm, &ops, sizeof(smm->sm)); 746 747 return &smm->sm; 748 } 749 750 int dm_sm_metadata_create(struct dm_space_map *sm, 751 struct dm_transaction_manager *tm, 752 dm_block_t nr_blocks, 753 dm_block_t superblock) 754 { 755 int r; 756 dm_block_t i; 757 enum allocation_event ev; 758 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 759 760 smm->begin = superblock + 1; 761 smm->recursion_count = 0; 762 smm->allocated_this_transaction = 0; 763 brb_init(&smm->uncommitted); 764 threshold_init(&smm->threshold); 765 766 memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm)); 767 768 r = sm_ll_new_metadata(&smm->ll, tm); 769 if (r) 770 return r; 771 772 if (nr_blocks > DM_SM_METADATA_MAX_BLOCKS) 773 nr_blocks = DM_SM_METADATA_MAX_BLOCKS; 774 r = sm_ll_extend(&smm->ll, nr_blocks); 775 if (r) 776 return r; 777 778 memcpy(&smm->sm, &ops, sizeof(smm->sm)); 779 780 /* 781 * Now we need to update the newly created data structures with the 782 * allocated blocks that they were built from. 783 */ 784 for (i = superblock; !r && i < smm->begin; i++) 785 r = sm_ll_inc(&smm->ll, i, &ev); 786 787 if (r) 788 return r; 789 790 r = apply_bops(smm); 791 if (r) { 792 DMERR("%s: apply_bops failed", __func__); 793 return r; 794 } 795 796 return sm_metadata_commit(sm); 797 } 798 799 int dm_sm_metadata_open(struct dm_space_map *sm, 800 struct dm_transaction_manager *tm, 801 void *root_le, size_t len) 802 { 803 int r; 804 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 805 806 r = sm_ll_open_metadata(&smm->ll, tm, root_le, len); 807 if (r) 808 return r; 809 810 smm->begin = 0; 811 smm->recursion_count = 0; 812 smm->allocated_this_transaction = 0; 813 brb_init(&smm->uncommitted); 814 threshold_init(&smm->threshold); 815 816 memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll)); 817 return 0; 818 } 819