1 /* 2 * Copyright (C) 2011 Red Hat, Inc. 3 * 4 * This file is released under the GPL. 5 */ 6 7 #include "dm-space-map.h" 8 #include "dm-space-map-common.h" 9 #include "dm-space-map-metadata.h" 10 11 #include <linux/list.h> 12 #include <linux/slab.h> 13 #include <linux/device-mapper.h> 14 15 #define DM_MSG_PREFIX "space map metadata" 16 17 /*----------------------------------------------------------------*/ 18 19 /* 20 * An edge triggered threshold. 21 */ 22 struct threshold { 23 bool threshold_set; 24 bool value_set; 25 dm_block_t threshold; 26 dm_block_t current_value; 27 dm_sm_threshold_fn fn; 28 void *context; 29 }; 30 31 static void threshold_init(struct threshold *t) 32 { 33 t->threshold_set = false; 34 t->value_set = false; 35 } 36 37 static void set_threshold(struct threshold *t, dm_block_t value, 38 dm_sm_threshold_fn fn, void *context) 39 { 40 t->threshold_set = true; 41 t->threshold = value; 42 t->fn = fn; 43 t->context = context; 44 } 45 46 static bool below_threshold(struct threshold *t, dm_block_t value) 47 { 48 return t->threshold_set && value <= t->threshold; 49 } 50 51 static bool threshold_already_triggered(struct threshold *t) 52 { 53 return t->value_set && below_threshold(t, t->current_value); 54 } 55 56 static void check_threshold(struct threshold *t, dm_block_t value) 57 { 58 if (below_threshold(t, value) && 59 !threshold_already_triggered(t)) 60 t->fn(t->context); 61 62 t->value_set = true; 63 t->current_value = value; 64 } 65 66 /*----------------------------------------------------------------*/ 67 68 /* 69 * Space map interface. 70 * 71 * The low level disk format is written using the standard btree and 72 * transaction manager. This means that performing disk operations may 73 * cause us to recurse into the space map in order to allocate new blocks. 74 * For this reason we have a pool of pre-allocated blocks large enough to 75 * service any metadata_ll_disk operation. 76 */ 77 78 /* 79 * FIXME: we should calculate this based on the size of the device. 80 * Only the metadata space map needs this functionality. 81 */ 82 #define MAX_RECURSIVE_ALLOCATIONS 1024 83 84 enum block_op_type { 85 BOP_INC, 86 BOP_DEC 87 }; 88 89 struct block_op { 90 enum block_op_type type; 91 dm_block_t block; 92 }; 93 94 struct bop_ring_buffer { 95 unsigned begin; 96 unsigned end; 97 struct block_op bops[MAX_RECURSIVE_ALLOCATIONS + 1]; 98 }; 99 100 static void brb_init(struct bop_ring_buffer *brb) 101 { 102 brb->begin = 0; 103 brb->end = 0; 104 } 105 106 static bool brb_empty(struct bop_ring_buffer *brb) 107 { 108 return brb->begin == brb->end; 109 } 110 111 static unsigned brb_next(struct bop_ring_buffer *brb, unsigned old) 112 { 113 unsigned r = old + 1; 114 return (r >= (sizeof(brb->bops) / sizeof(*brb->bops))) ? 0 : r; 115 } 116 117 static int brb_push(struct bop_ring_buffer *brb, 118 enum block_op_type type, dm_block_t b) 119 { 120 struct block_op *bop; 121 unsigned next = brb_next(brb, brb->end); 122 123 /* 124 * We don't allow the last bop to be filled, this way we can 125 * differentiate between full and empty. 126 */ 127 if (next == brb->begin) 128 return -ENOMEM; 129 130 bop = brb->bops + brb->end; 131 bop->type = type; 132 bop->block = b; 133 134 brb->end = next; 135 136 return 0; 137 } 138 139 static int brb_pop(struct bop_ring_buffer *brb, struct block_op *result) 140 { 141 struct block_op *bop; 142 143 if (brb_empty(brb)) 144 return -ENODATA; 145 146 bop = brb->bops + brb->begin; 147 result->type = bop->type; 148 result->block = bop->block; 149 150 brb->begin = brb_next(brb, brb->begin); 151 152 return 0; 153 } 154 155 /*----------------------------------------------------------------*/ 156 157 struct sm_metadata { 158 struct dm_space_map sm; 159 160 struct ll_disk ll; 161 struct ll_disk old_ll; 162 163 dm_block_t begin; 164 165 unsigned recursion_count; 166 unsigned allocated_this_transaction; 167 struct bop_ring_buffer uncommitted; 168 169 struct threshold threshold; 170 }; 171 172 static int add_bop(struct sm_metadata *smm, enum block_op_type type, dm_block_t b) 173 { 174 int r = brb_push(&smm->uncommitted, type, b); 175 176 if (r) { 177 DMERR("too many recursive allocations"); 178 return -ENOMEM; 179 } 180 181 return 0; 182 } 183 184 static int commit_bop(struct sm_metadata *smm, struct block_op *op) 185 { 186 int r = 0; 187 enum allocation_event ev; 188 189 switch (op->type) { 190 case BOP_INC: 191 r = sm_ll_inc(&smm->ll, op->block, &ev); 192 break; 193 194 case BOP_DEC: 195 r = sm_ll_dec(&smm->ll, op->block, &ev); 196 break; 197 } 198 199 return r; 200 } 201 202 static void in(struct sm_metadata *smm) 203 { 204 smm->recursion_count++; 205 } 206 207 static int out(struct sm_metadata *smm) 208 { 209 int r = 0; 210 211 /* 212 * If we're not recursing then very bad things are happening. 213 */ 214 if (!smm->recursion_count) { 215 DMERR("lost track of recursion depth"); 216 return -ENOMEM; 217 } 218 219 if (smm->recursion_count == 1) { 220 while (!brb_empty(&smm->uncommitted)) { 221 struct block_op bop; 222 223 r = brb_pop(&smm->uncommitted, &bop); 224 if (r) { 225 DMERR("bug in bop ring buffer"); 226 break; 227 } 228 229 r = commit_bop(smm, &bop); 230 if (r) 231 break; 232 } 233 } 234 235 smm->recursion_count--; 236 237 return r; 238 } 239 240 /* 241 * When using the out() function above, we often want to combine an error 242 * code for the operation run in the recursive context with that from 243 * out(). 244 */ 245 static int combine_errors(int r1, int r2) 246 { 247 return r1 ? r1 : r2; 248 } 249 250 static int recursing(struct sm_metadata *smm) 251 { 252 return smm->recursion_count; 253 } 254 255 static void sm_metadata_destroy(struct dm_space_map *sm) 256 { 257 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 258 259 kfree(smm); 260 } 261 262 static int sm_metadata_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count) 263 { 264 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 265 266 *count = smm->ll.nr_blocks; 267 268 return 0; 269 } 270 271 static int sm_metadata_get_nr_free(struct dm_space_map *sm, dm_block_t *count) 272 { 273 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 274 275 *count = smm->old_ll.nr_blocks - smm->old_ll.nr_allocated - 276 smm->allocated_this_transaction; 277 278 return 0; 279 } 280 281 static int sm_metadata_get_count(struct dm_space_map *sm, dm_block_t b, 282 uint32_t *result) 283 { 284 int r; 285 unsigned i; 286 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 287 unsigned adjustment = 0; 288 289 /* 290 * We may have some uncommitted adjustments to add. This list 291 * should always be really short. 292 */ 293 for (i = smm->uncommitted.begin; 294 i != smm->uncommitted.end; 295 i = brb_next(&smm->uncommitted, i)) { 296 struct block_op *op = smm->uncommitted.bops + i; 297 298 if (op->block != b) 299 continue; 300 301 switch (op->type) { 302 case BOP_INC: 303 adjustment++; 304 break; 305 306 case BOP_DEC: 307 adjustment--; 308 break; 309 } 310 } 311 312 r = sm_ll_lookup(&smm->ll, b, result); 313 if (r) 314 return r; 315 316 *result += adjustment; 317 318 return 0; 319 } 320 321 static int sm_metadata_count_is_more_than_one(struct dm_space_map *sm, 322 dm_block_t b, int *result) 323 { 324 int r, adjustment = 0; 325 unsigned i; 326 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 327 uint32_t rc; 328 329 /* 330 * We may have some uncommitted adjustments to add. This list 331 * should always be really short. 332 */ 333 for (i = smm->uncommitted.begin; 334 i != smm->uncommitted.end; 335 i = brb_next(&smm->uncommitted, i)) { 336 337 struct block_op *op = smm->uncommitted.bops + i; 338 339 if (op->block != b) 340 continue; 341 342 switch (op->type) { 343 case BOP_INC: 344 adjustment++; 345 break; 346 347 case BOP_DEC: 348 adjustment--; 349 break; 350 } 351 } 352 353 if (adjustment > 1) { 354 *result = 1; 355 return 0; 356 } 357 358 r = sm_ll_lookup_bitmap(&smm->ll, b, &rc); 359 if (r) 360 return r; 361 362 if (rc == 3) 363 /* 364 * We err on the side of caution, and always return true. 365 */ 366 *result = 1; 367 else 368 *result = rc + adjustment > 1; 369 370 return 0; 371 } 372 373 static int sm_metadata_set_count(struct dm_space_map *sm, dm_block_t b, 374 uint32_t count) 375 { 376 int r, r2; 377 enum allocation_event ev; 378 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 379 380 if (smm->recursion_count) { 381 DMERR("cannot recurse set_count()"); 382 return -EINVAL; 383 } 384 385 in(smm); 386 r = sm_ll_insert(&smm->ll, b, count, &ev); 387 r2 = out(smm); 388 389 return combine_errors(r, r2); 390 } 391 392 static int sm_metadata_inc_block(struct dm_space_map *sm, dm_block_t b) 393 { 394 int r, r2 = 0; 395 enum allocation_event ev; 396 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 397 398 if (recursing(smm)) 399 r = add_bop(smm, BOP_INC, b); 400 else { 401 in(smm); 402 r = sm_ll_inc(&smm->ll, b, &ev); 403 r2 = out(smm); 404 } 405 406 return combine_errors(r, r2); 407 } 408 409 static int sm_metadata_dec_block(struct dm_space_map *sm, dm_block_t b) 410 { 411 int r, r2 = 0; 412 enum allocation_event ev; 413 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 414 415 if (recursing(smm)) 416 r = add_bop(smm, BOP_DEC, b); 417 else { 418 in(smm); 419 r = sm_ll_dec(&smm->ll, b, &ev); 420 r2 = out(smm); 421 } 422 423 return combine_errors(r, r2); 424 } 425 426 static int sm_metadata_new_block_(struct dm_space_map *sm, dm_block_t *b) 427 { 428 int r, r2 = 0; 429 enum allocation_event ev; 430 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 431 432 r = sm_ll_find_free_block(&smm->old_ll, smm->begin, smm->old_ll.nr_blocks, b); 433 if (r) 434 return r; 435 436 smm->begin = *b + 1; 437 438 if (recursing(smm)) 439 r = add_bop(smm, BOP_INC, *b); 440 else { 441 in(smm); 442 r = sm_ll_inc(&smm->ll, *b, &ev); 443 r2 = out(smm); 444 } 445 446 if (!r) 447 smm->allocated_this_transaction++; 448 449 return combine_errors(r, r2); 450 } 451 452 static int sm_metadata_new_block(struct dm_space_map *sm, dm_block_t *b) 453 { 454 dm_block_t count; 455 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 456 457 int r = sm_metadata_new_block_(sm, b); 458 if (r) { 459 DMERR_LIMIT("unable to allocate new metadata block"); 460 return r; 461 } 462 463 r = sm_metadata_get_nr_free(sm, &count); 464 if (r) { 465 DMERR_LIMIT("couldn't get free block count"); 466 return r; 467 } 468 469 check_threshold(&smm->threshold, count); 470 471 return r; 472 } 473 474 static int sm_metadata_commit(struct dm_space_map *sm) 475 { 476 int r; 477 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 478 479 r = sm_ll_commit(&smm->ll); 480 if (r) 481 return r; 482 483 memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll)); 484 smm->begin = 0; 485 smm->allocated_this_transaction = 0; 486 487 return 0; 488 } 489 490 static int sm_metadata_register_threshold_callback(struct dm_space_map *sm, 491 dm_block_t threshold, 492 dm_sm_threshold_fn fn, 493 void *context) 494 { 495 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 496 497 set_threshold(&smm->threshold, threshold, fn, context); 498 499 return 0; 500 } 501 502 static int sm_metadata_root_size(struct dm_space_map *sm, size_t *result) 503 { 504 *result = sizeof(struct disk_sm_root); 505 506 return 0; 507 } 508 509 static int sm_metadata_copy_root(struct dm_space_map *sm, void *where_le, size_t max) 510 { 511 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 512 struct disk_sm_root root_le; 513 514 root_le.nr_blocks = cpu_to_le64(smm->ll.nr_blocks); 515 root_le.nr_allocated = cpu_to_le64(smm->ll.nr_allocated); 516 root_le.bitmap_root = cpu_to_le64(smm->ll.bitmap_root); 517 root_le.ref_count_root = cpu_to_le64(smm->ll.ref_count_root); 518 519 if (max < sizeof(root_le)) 520 return -ENOSPC; 521 522 memcpy(where_le, &root_le, sizeof(root_le)); 523 524 return 0; 525 } 526 527 static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks); 528 529 static struct dm_space_map ops = { 530 .destroy = sm_metadata_destroy, 531 .extend = sm_metadata_extend, 532 .get_nr_blocks = sm_metadata_get_nr_blocks, 533 .get_nr_free = sm_metadata_get_nr_free, 534 .get_count = sm_metadata_get_count, 535 .count_is_more_than_one = sm_metadata_count_is_more_than_one, 536 .set_count = sm_metadata_set_count, 537 .inc_block = sm_metadata_inc_block, 538 .dec_block = sm_metadata_dec_block, 539 .new_block = sm_metadata_new_block, 540 .commit = sm_metadata_commit, 541 .root_size = sm_metadata_root_size, 542 .copy_root = sm_metadata_copy_root, 543 .register_threshold_callback = sm_metadata_register_threshold_callback 544 }; 545 546 /*----------------------------------------------------------------*/ 547 548 /* 549 * When a new space map is created that manages its own space. We use 550 * this tiny bootstrap allocator. 551 */ 552 static void sm_bootstrap_destroy(struct dm_space_map *sm) 553 { 554 } 555 556 static int sm_bootstrap_extend(struct dm_space_map *sm, dm_block_t extra_blocks) 557 { 558 DMERR("bootstrap doesn't support extend"); 559 560 return -EINVAL; 561 } 562 563 static int sm_bootstrap_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count) 564 { 565 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 566 567 return smm->ll.nr_blocks; 568 } 569 570 static int sm_bootstrap_get_nr_free(struct dm_space_map *sm, dm_block_t *count) 571 { 572 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 573 574 *count = smm->ll.nr_blocks - smm->begin; 575 576 return 0; 577 } 578 579 static int sm_bootstrap_get_count(struct dm_space_map *sm, dm_block_t b, 580 uint32_t *result) 581 { 582 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 583 584 return b < smm->begin ? 1 : 0; 585 } 586 587 static int sm_bootstrap_count_is_more_than_one(struct dm_space_map *sm, 588 dm_block_t b, int *result) 589 { 590 *result = 0; 591 592 return 0; 593 } 594 595 static int sm_bootstrap_set_count(struct dm_space_map *sm, dm_block_t b, 596 uint32_t count) 597 { 598 DMERR("bootstrap doesn't support set_count"); 599 600 return -EINVAL; 601 } 602 603 static int sm_bootstrap_new_block(struct dm_space_map *sm, dm_block_t *b) 604 { 605 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 606 607 /* 608 * We know the entire device is unused. 609 */ 610 if (smm->begin == smm->ll.nr_blocks) 611 return -ENOSPC; 612 613 *b = smm->begin++; 614 615 return 0; 616 } 617 618 static int sm_bootstrap_inc_block(struct dm_space_map *sm, dm_block_t b) 619 { 620 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 621 622 return add_bop(smm, BOP_INC, b); 623 } 624 625 static int sm_bootstrap_dec_block(struct dm_space_map *sm, dm_block_t b) 626 { 627 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 628 629 return add_bop(smm, BOP_DEC, b); 630 } 631 632 static int sm_bootstrap_commit(struct dm_space_map *sm) 633 { 634 return 0; 635 } 636 637 static int sm_bootstrap_root_size(struct dm_space_map *sm, size_t *result) 638 { 639 DMERR("bootstrap doesn't support root_size"); 640 641 return -EINVAL; 642 } 643 644 static int sm_bootstrap_copy_root(struct dm_space_map *sm, void *where, 645 size_t max) 646 { 647 DMERR("bootstrap doesn't support copy_root"); 648 649 return -EINVAL; 650 } 651 652 static struct dm_space_map bootstrap_ops = { 653 .destroy = sm_bootstrap_destroy, 654 .extend = sm_bootstrap_extend, 655 .get_nr_blocks = sm_bootstrap_get_nr_blocks, 656 .get_nr_free = sm_bootstrap_get_nr_free, 657 .get_count = sm_bootstrap_get_count, 658 .count_is_more_than_one = sm_bootstrap_count_is_more_than_one, 659 .set_count = sm_bootstrap_set_count, 660 .inc_block = sm_bootstrap_inc_block, 661 .dec_block = sm_bootstrap_dec_block, 662 .new_block = sm_bootstrap_new_block, 663 .commit = sm_bootstrap_commit, 664 .root_size = sm_bootstrap_root_size, 665 .copy_root = sm_bootstrap_copy_root, 666 .register_threshold_callback = NULL 667 }; 668 669 /*----------------------------------------------------------------*/ 670 671 static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks) 672 { 673 int r, i; 674 enum allocation_event ev; 675 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 676 dm_block_t old_len = smm->ll.nr_blocks; 677 678 /* 679 * Flick into a mode where all blocks get allocated in the new area. 680 */ 681 smm->begin = old_len; 682 memcpy(sm, &bootstrap_ops, sizeof(*sm)); 683 684 /* 685 * Extend. 686 */ 687 r = sm_ll_extend(&smm->ll, extra_blocks); 688 if (r) 689 goto out; 690 691 /* 692 * We repeatedly increment then commit until the commit doesn't 693 * allocate any new blocks. 694 */ 695 do { 696 for (i = old_len; !r && i < smm->begin; i++) { 697 r = sm_ll_inc(&smm->ll, i, &ev); 698 if (r) 699 goto out; 700 } 701 old_len = smm->begin; 702 703 r = sm_ll_commit(&smm->ll); 704 if (r) 705 goto out; 706 707 } while (old_len != smm->begin); 708 709 out: 710 /* 711 * Switch back to normal behaviour. 712 */ 713 memcpy(sm, &ops, sizeof(*sm)); 714 return r; 715 } 716 717 /*----------------------------------------------------------------*/ 718 719 struct dm_space_map *dm_sm_metadata_init(void) 720 { 721 struct sm_metadata *smm; 722 723 smm = kmalloc(sizeof(*smm), GFP_KERNEL); 724 if (!smm) 725 return ERR_PTR(-ENOMEM); 726 727 memcpy(&smm->sm, &ops, sizeof(smm->sm)); 728 729 return &smm->sm; 730 } 731 732 int dm_sm_metadata_create(struct dm_space_map *sm, 733 struct dm_transaction_manager *tm, 734 dm_block_t nr_blocks, 735 dm_block_t superblock) 736 { 737 int r; 738 dm_block_t i; 739 enum allocation_event ev; 740 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 741 742 smm->begin = superblock + 1; 743 smm->recursion_count = 0; 744 smm->allocated_this_transaction = 0; 745 brb_init(&smm->uncommitted); 746 threshold_init(&smm->threshold); 747 748 memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm)); 749 750 r = sm_ll_new_metadata(&smm->ll, tm); 751 if (r) 752 return r; 753 754 if (nr_blocks > DM_SM_METADATA_MAX_BLOCKS) 755 nr_blocks = DM_SM_METADATA_MAX_BLOCKS; 756 r = sm_ll_extend(&smm->ll, nr_blocks); 757 if (r) 758 return r; 759 760 memcpy(&smm->sm, &ops, sizeof(smm->sm)); 761 762 /* 763 * Now we need to update the newly created data structures with the 764 * allocated blocks that they were built from. 765 */ 766 for (i = superblock; !r && i < smm->begin; i++) 767 r = sm_ll_inc(&smm->ll, i, &ev); 768 769 if (r) 770 return r; 771 772 return sm_metadata_commit(sm); 773 } 774 775 int dm_sm_metadata_open(struct dm_space_map *sm, 776 struct dm_transaction_manager *tm, 777 void *root_le, size_t len) 778 { 779 int r; 780 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 781 782 r = sm_ll_open_metadata(&smm->ll, tm, root_le, len); 783 if (r) 784 return r; 785 786 smm->begin = 0; 787 smm->recursion_count = 0; 788 smm->allocated_this_transaction = 0; 789 brb_init(&smm->uncommitted); 790 threshold_init(&smm->threshold); 791 792 memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll)); 793 return 0; 794 } 795