1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2011 Red Hat, Inc. 4 * 5 * This file is released under the GPL. 6 */ 7 8 #include "dm-space-map.h" 9 #include "dm-space-map-common.h" 10 #include "dm-space-map-metadata.h" 11 12 #include <linux/list.h> 13 #include <linux/slab.h> 14 #include <linux/device-mapper.h> 15 #include <linux/kernel.h> 16 17 #define DM_MSG_PREFIX "space map metadata" 18 19 /*----------------------------------------------------------------*/ 20 21 /* 22 * An edge triggered threshold. 23 */ 24 struct threshold { 25 bool threshold_set; 26 bool value_set; 27 dm_block_t threshold; 28 dm_block_t current_value; 29 dm_sm_threshold_fn fn; 30 void *context; 31 }; 32 33 static void threshold_init(struct threshold *t) 34 { 35 t->threshold_set = false; 36 t->value_set = false; 37 } 38 39 static void set_threshold(struct threshold *t, dm_block_t value, 40 dm_sm_threshold_fn fn, void *context) 41 { 42 t->threshold_set = true; 43 t->threshold = value; 44 t->fn = fn; 45 t->context = context; 46 } 47 48 static bool below_threshold(struct threshold *t, dm_block_t value) 49 { 50 return t->threshold_set && value <= t->threshold; 51 } 52 53 static bool threshold_already_triggered(struct threshold *t) 54 { 55 return t->value_set && below_threshold(t, t->current_value); 56 } 57 58 static void check_threshold(struct threshold *t, dm_block_t value) 59 { 60 if (below_threshold(t, value) && 61 !threshold_already_triggered(t)) 62 t->fn(t->context); 63 64 t->value_set = true; 65 t->current_value = value; 66 } 67 68 /*----------------------------------------------------------------*/ 69 70 /* 71 * Space map interface. 72 * 73 * The low level disk format is written using the standard btree and 74 * transaction manager. This means that performing disk operations may 75 * cause us to recurse into the space map in order to allocate new blocks. 76 * For this reason we have a pool of pre-allocated blocks large enough to 77 * service any metadata_ll_disk operation. 78 */ 79 80 /* 81 * FIXME: we should calculate this based on the size of the device. 82 * Only the metadata space map needs this functionality. 83 */ 84 #define MAX_RECURSIVE_ALLOCATIONS 1024 85 86 enum block_op_type { 87 BOP_INC, 88 BOP_DEC 89 }; 90 91 struct block_op { 92 enum block_op_type type; 93 dm_block_t b; 94 dm_block_t e; 95 }; 96 97 struct bop_ring_buffer { 98 unsigned int begin; 99 unsigned int end; 100 struct block_op bops[MAX_RECURSIVE_ALLOCATIONS + 1]; 101 }; 102 103 static void brb_init(struct bop_ring_buffer *brb) 104 { 105 brb->begin = 0; 106 brb->end = 0; 107 } 108 109 static bool brb_empty(struct bop_ring_buffer *brb) 110 { 111 return brb->begin == brb->end; 112 } 113 114 static unsigned int brb_next(struct bop_ring_buffer *brb, unsigned int old) 115 { 116 unsigned int r = old + 1; 117 118 return r >= ARRAY_SIZE(brb->bops) ? 0 : r; 119 } 120 121 static int brb_push(struct bop_ring_buffer *brb, 122 enum block_op_type type, dm_block_t b, dm_block_t e) 123 { 124 struct block_op *bop; 125 unsigned int next = brb_next(brb, brb->end); 126 127 /* 128 * We don't allow the last bop to be filled, this way we can 129 * differentiate between full and empty. 130 */ 131 if (next == brb->begin) 132 return -ENOMEM; 133 134 bop = brb->bops + brb->end; 135 bop->type = type; 136 bop->b = b; 137 bop->e = e; 138 139 brb->end = next; 140 141 return 0; 142 } 143 144 static int brb_peek(struct bop_ring_buffer *brb, struct block_op *result) 145 { 146 struct block_op *bop; 147 148 if (brb_empty(brb)) 149 return -ENODATA; 150 151 bop = brb->bops + brb->begin; 152 memcpy(result, bop, sizeof(*result)); 153 return 0; 154 } 155 156 static int brb_pop(struct bop_ring_buffer *brb) 157 { 158 if (brb_empty(brb)) 159 return -ENODATA; 160 161 brb->begin = brb_next(brb, brb->begin); 162 163 return 0; 164 } 165 166 /*----------------------------------------------------------------*/ 167 168 struct sm_metadata { 169 struct dm_space_map sm; 170 171 struct ll_disk ll; 172 struct ll_disk old_ll; 173 174 dm_block_t begin; 175 176 unsigned int recursion_count; 177 unsigned int allocated_this_transaction; 178 struct bop_ring_buffer uncommitted; 179 180 struct threshold threshold; 181 }; 182 183 static int add_bop(struct sm_metadata *smm, enum block_op_type type, dm_block_t b, dm_block_t e) 184 { 185 int r = brb_push(&smm->uncommitted, type, b, e); 186 187 if (r) { 188 DMERR("too many recursive allocations"); 189 return -ENOMEM; 190 } 191 192 return 0; 193 } 194 195 static int commit_bop(struct sm_metadata *smm, struct block_op *op) 196 { 197 int r = 0; 198 int32_t nr_allocations; 199 200 switch (op->type) { 201 case BOP_INC: 202 r = sm_ll_inc(&smm->ll, op->b, op->e, &nr_allocations); 203 break; 204 205 case BOP_DEC: 206 r = sm_ll_dec(&smm->ll, op->b, op->e, &nr_allocations); 207 break; 208 } 209 210 return r; 211 } 212 213 static void in(struct sm_metadata *smm) 214 { 215 smm->recursion_count++; 216 } 217 218 static int apply_bops(struct sm_metadata *smm) 219 { 220 int r = 0; 221 222 while (!brb_empty(&smm->uncommitted)) { 223 struct block_op bop; 224 225 r = brb_peek(&smm->uncommitted, &bop); 226 if (r) { 227 DMERR("bug in bop ring buffer"); 228 break; 229 } 230 231 r = commit_bop(smm, &bop); 232 if (r) 233 break; 234 235 brb_pop(&smm->uncommitted); 236 } 237 238 return r; 239 } 240 241 static int out(struct sm_metadata *smm) 242 { 243 int r = 0; 244 245 /* 246 * If we're not recursing then very bad things are happening. 247 */ 248 if (!smm->recursion_count) { 249 DMERR("lost track of recursion depth"); 250 return -ENOMEM; 251 } 252 253 if (smm->recursion_count == 1) 254 r = apply_bops(smm); 255 256 smm->recursion_count--; 257 258 return r; 259 } 260 261 /* 262 * When using the out() function above, we often want to combine an error 263 * code for the operation run in the recursive context with that from 264 * out(). 265 */ 266 static int combine_errors(int r1, int r2) 267 { 268 return r1 ? r1 : r2; 269 } 270 271 static int recursing(struct sm_metadata *smm) 272 { 273 return smm->recursion_count; 274 } 275 276 static void sm_metadata_destroy(struct dm_space_map *sm) 277 { 278 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 279 280 kvfree(smm); 281 } 282 283 static int sm_metadata_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count) 284 { 285 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 286 287 *count = smm->ll.nr_blocks; 288 289 return 0; 290 } 291 292 static int sm_metadata_get_nr_free(struct dm_space_map *sm, dm_block_t *count) 293 { 294 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 295 296 *count = smm->old_ll.nr_blocks - smm->old_ll.nr_allocated - 297 smm->allocated_this_transaction; 298 299 return 0; 300 } 301 302 static int sm_metadata_get_count(struct dm_space_map *sm, dm_block_t b, 303 uint32_t *result) 304 { 305 int r; 306 unsigned int i; 307 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 308 unsigned int adjustment = 0; 309 310 /* 311 * We may have some uncommitted adjustments to add. This list 312 * should always be really short. 313 */ 314 for (i = smm->uncommitted.begin; 315 i != smm->uncommitted.end; 316 i = brb_next(&smm->uncommitted, i)) { 317 struct block_op *op = smm->uncommitted.bops + i; 318 319 if (b < op->b || b >= op->e) 320 continue; 321 322 switch (op->type) { 323 case BOP_INC: 324 adjustment++; 325 break; 326 327 case BOP_DEC: 328 adjustment--; 329 break; 330 } 331 } 332 333 r = sm_ll_lookup(&smm->ll, b, result); 334 if (r) 335 return r; 336 337 *result += adjustment; 338 339 return 0; 340 } 341 342 static int sm_metadata_count_is_more_than_one(struct dm_space_map *sm, 343 dm_block_t b, int *result) 344 { 345 int r, adjustment = 0; 346 unsigned int i; 347 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 348 uint32_t rc; 349 350 /* 351 * We may have some uncommitted adjustments to add. This list 352 * should always be really short. 353 */ 354 for (i = smm->uncommitted.begin; 355 i != smm->uncommitted.end; 356 i = brb_next(&smm->uncommitted, i)) { 357 358 struct block_op *op = smm->uncommitted.bops + i; 359 360 if (b < op->b || b >= op->e) 361 continue; 362 363 switch (op->type) { 364 case BOP_INC: 365 adjustment++; 366 break; 367 368 case BOP_DEC: 369 adjustment--; 370 break; 371 } 372 } 373 374 if (adjustment > 1) { 375 *result = 1; 376 return 0; 377 } 378 379 r = sm_ll_lookup_bitmap(&smm->ll, b, &rc); 380 if (r) 381 return r; 382 383 if (rc == 3) 384 /* 385 * We err on the side of caution, and always return true. 386 */ 387 *result = 1; 388 else 389 *result = rc + adjustment > 1; 390 391 return 0; 392 } 393 394 static int sm_metadata_set_count(struct dm_space_map *sm, dm_block_t b, 395 uint32_t count) 396 { 397 int r, r2; 398 int32_t nr_allocations; 399 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 400 401 if (smm->recursion_count) { 402 DMERR("cannot recurse set_count()"); 403 return -EINVAL; 404 } 405 406 in(smm); 407 r = sm_ll_insert(&smm->ll, b, count, &nr_allocations); 408 r2 = out(smm); 409 410 return combine_errors(r, r2); 411 } 412 413 static int sm_metadata_inc_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e) 414 { 415 int r, r2 = 0; 416 int32_t nr_allocations; 417 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 418 419 if (recursing(smm)) { 420 r = add_bop(smm, BOP_INC, b, e); 421 if (r) 422 return r; 423 } else { 424 in(smm); 425 r = sm_ll_inc(&smm->ll, b, e, &nr_allocations); 426 r2 = out(smm); 427 } 428 429 return combine_errors(r, r2); 430 } 431 432 static int sm_metadata_dec_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e) 433 { 434 int r, r2 = 0; 435 int32_t nr_allocations; 436 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 437 438 if (recursing(smm)) 439 r = add_bop(smm, BOP_DEC, b, e); 440 else { 441 in(smm); 442 r = sm_ll_dec(&smm->ll, b, e, &nr_allocations); 443 r2 = out(smm); 444 } 445 446 return combine_errors(r, r2); 447 } 448 449 static int sm_metadata_new_block_(struct dm_space_map *sm, dm_block_t *b) 450 { 451 int r, r2 = 0; 452 int32_t nr_allocations; 453 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 454 455 /* 456 * Any block we allocate has to be free in both the old and current ll. 457 */ 458 r = sm_ll_find_common_free_block(&smm->old_ll, &smm->ll, smm->begin, smm->ll.nr_blocks, b); 459 if (r == -ENOSPC) { 460 /* 461 * There's no free block between smm->begin and the end of the metadata device. 462 * We search before smm->begin in case something has been freed. 463 */ 464 r = sm_ll_find_common_free_block(&smm->old_ll, &smm->ll, 0, smm->begin, b); 465 } 466 467 if (r) 468 return r; 469 470 smm->begin = *b + 1; 471 472 if (recursing(smm)) 473 r = add_bop(smm, BOP_INC, *b, *b + 1); 474 else { 475 in(smm); 476 r = sm_ll_inc(&smm->ll, *b, *b + 1, &nr_allocations); 477 r2 = out(smm); 478 } 479 480 if (!r) 481 smm->allocated_this_transaction++; 482 483 return combine_errors(r, r2); 484 } 485 486 static int sm_metadata_new_block(struct dm_space_map *sm, dm_block_t *b) 487 { 488 dm_block_t count; 489 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 490 491 int r = sm_metadata_new_block_(sm, b); 492 493 if (r) { 494 DMERR_LIMIT("unable to allocate new metadata block"); 495 return r; 496 } 497 498 r = sm_metadata_get_nr_free(sm, &count); 499 if (r) { 500 DMERR_LIMIT("couldn't get free block count"); 501 return r; 502 } 503 504 check_threshold(&smm->threshold, count); 505 506 return r; 507 } 508 509 static int sm_metadata_commit(struct dm_space_map *sm) 510 { 511 int r; 512 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 513 514 r = sm_ll_commit(&smm->ll); 515 if (r) 516 return r; 517 518 memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll)); 519 smm->allocated_this_transaction = 0; 520 521 return 0; 522 } 523 524 static int sm_metadata_register_threshold_callback(struct dm_space_map *sm, 525 dm_block_t threshold, 526 dm_sm_threshold_fn fn, 527 void *context) 528 { 529 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 530 531 set_threshold(&smm->threshold, threshold, fn, context); 532 533 return 0; 534 } 535 536 static int sm_metadata_root_size(struct dm_space_map *sm, size_t *result) 537 { 538 *result = sizeof(struct disk_sm_root); 539 540 return 0; 541 } 542 543 static int sm_metadata_copy_root(struct dm_space_map *sm, void *where_le, size_t max) 544 { 545 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 546 struct disk_sm_root root_le; 547 548 root_le.nr_blocks = cpu_to_le64(smm->ll.nr_blocks); 549 root_le.nr_allocated = cpu_to_le64(smm->ll.nr_allocated); 550 root_le.bitmap_root = cpu_to_le64(smm->ll.bitmap_root); 551 root_le.ref_count_root = cpu_to_le64(smm->ll.ref_count_root); 552 553 if (max < sizeof(root_le)) 554 return -ENOSPC; 555 556 memcpy(where_le, &root_le, sizeof(root_le)); 557 558 return 0; 559 } 560 561 static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks); 562 563 static const struct dm_space_map ops = { 564 .destroy = sm_metadata_destroy, 565 .extend = sm_metadata_extend, 566 .get_nr_blocks = sm_metadata_get_nr_blocks, 567 .get_nr_free = sm_metadata_get_nr_free, 568 .get_count = sm_metadata_get_count, 569 .count_is_more_than_one = sm_metadata_count_is_more_than_one, 570 .set_count = sm_metadata_set_count, 571 .inc_blocks = sm_metadata_inc_blocks, 572 .dec_blocks = sm_metadata_dec_blocks, 573 .new_block = sm_metadata_new_block, 574 .commit = sm_metadata_commit, 575 .root_size = sm_metadata_root_size, 576 .copy_root = sm_metadata_copy_root, 577 .register_threshold_callback = sm_metadata_register_threshold_callback 578 }; 579 580 /*----------------------------------------------------------------*/ 581 582 /* 583 * When a new space map is created that manages its own space. We use 584 * this tiny bootstrap allocator. 585 */ 586 static void sm_bootstrap_destroy(struct dm_space_map *sm) 587 { 588 } 589 590 static int sm_bootstrap_extend(struct dm_space_map *sm, dm_block_t extra_blocks) 591 { 592 DMERR("bootstrap doesn't support extend"); 593 594 return -EINVAL; 595 } 596 597 static int sm_bootstrap_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count) 598 { 599 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 600 601 *count = smm->ll.nr_blocks; 602 603 return 0; 604 } 605 606 static int sm_bootstrap_get_nr_free(struct dm_space_map *sm, dm_block_t *count) 607 { 608 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 609 610 *count = smm->ll.nr_blocks - smm->begin; 611 612 return 0; 613 } 614 615 static int sm_bootstrap_get_count(struct dm_space_map *sm, dm_block_t b, 616 uint32_t *result) 617 { 618 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 619 620 *result = (b < smm->begin) ? 1 : 0; 621 622 return 0; 623 } 624 625 static int sm_bootstrap_count_is_more_than_one(struct dm_space_map *sm, 626 dm_block_t b, int *result) 627 { 628 *result = 0; 629 630 return 0; 631 } 632 633 static int sm_bootstrap_set_count(struct dm_space_map *sm, dm_block_t b, 634 uint32_t count) 635 { 636 DMERR("bootstrap doesn't support set_count"); 637 638 return -EINVAL; 639 } 640 641 static int sm_bootstrap_new_block(struct dm_space_map *sm, dm_block_t *b) 642 { 643 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 644 645 /* 646 * We know the entire device is unused. 647 */ 648 if (smm->begin == smm->ll.nr_blocks) 649 return -ENOSPC; 650 651 *b = smm->begin++; 652 653 return 0; 654 } 655 656 static int sm_bootstrap_inc_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e) 657 { 658 int r; 659 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 660 661 r = add_bop(smm, BOP_INC, b, e); 662 if (r) 663 return r; 664 665 return 0; 666 } 667 668 static int sm_bootstrap_dec_blocks(struct dm_space_map *sm, dm_block_t b, dm_block_t e) 669 { 670 int r; 671 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 672 673 r = add_bop(smm, BOP_DEC, b, e); 674 if (r) 675 return r; 676 677 return 0; 678 } 679 680 static int sm_bootstrap_commit(struct dm_space_map *sm) 681 { 682 return 0; 683 } 684 685 static int sm_bootstrap_root_size(struct dm_space_map *sm, size_t *result) 686 { 687 DMERR("bootstrap doesn't support root_size"); 688 689 return -EINVAL; 690 } 691 692 static int sm_bootstrap_copy_root(struct dm_space_map *sm, void *where, 693 size_t max) 694 { 695 DMERR("bootstrap doesn't support copy_root"); 696 697 return -EINVAL; 698 } 699 700 static const struct dm_space_map bootstrap_ops = { 701 .destroy = sm_bootstrap_destroy, 702 .extend = sm_bootstrap_extend, 703 .get_nr_blocks = sm_bootstrap_get_nr_blocks, 704 .get_nr_free = sm_bootstrap_get_nr_free, 705 .get_count = sm_bootstrap_get_count, 706 .count_is_more_than_one = sm_bootstrap_count_is_more_than_one, 707 .set_count = sm_bootstrap_set_count, 708 .inc_blocks = sm_bootstrap_inc_blocks, 709 .dec_blocks = sm_bootstrap_dec_blocks, 710 .new_block = sm_bootstrap_new_block, 711 .commit = sm_bootstrap_commit, 712 .root_size = sm_bootstrap_root_size, 713 .copy_root = sm_bootstrap_copy_root, 714 .register_threshold_callback = NULL 715 }; 716 717 /*----------------------------------------------------------------*/ 718 719 static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks) 720 { 721 int r; 722 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 723 dm_block_t old_len = smm->ll.nr_blocks; 724 725 /* 726 * Flick into a mode where all blocks get allocated in the new area. 727 */ 728 smm->begin = old_len; 729 memcpy(sm, &bootstrap_ops, sizeof(*sm)); 730 731 /* 732 * Extend. 733 */ 734 r = sm_ll_extend(&smm->ll, extra_blocks); 735 if (r) 736 goto out; 737 738 /* 739 * We repeatedly increment then commit until the commit doesn't 740 * allocate any new blocks. 741 */ 742 do { 743 r = add_bop(smm, BOP_INC, old_len, smm->begin); 744 if (r) 745 goto out; 746 747 old_len = smm->begin; 748 749 r = apply_bops(smm); 750 if (r) { 751 DMERR("%s: apply_bops failed", __func__); 752 goto out; 753 } 754 755 r = sm_ll_commit(&smm->ll); 756 if (r) 757 goto out; 758 759 } while (old_len != smm->begin); 760 761 out: 762 /* 763 * Switch back to normal behaviour. 764 */ 765 memcpy(sm, &ops, sizeof(*sm)); 766 return r; 767 } 768 769 /*----------------------------------------------------------------*/ 770 771 struct dm_space_map *dm_sm_metadata_init(void) 772 { 773 struct sm_metadata *smm; 774 775 smm = kvmalloc(sizeof(*smm), GFP_KERNEL); 776 if (!smm) 777 return ERR_PTR(-ENOMEM); 778 779 memcpy(&smm->sm, &ops, sizeof(smm->sm)); 780 781 return &smm->sm; 782 } 783 784 int dm_sm_metadata_create(struct dm_space_map *sm, 785 struct dm_transaction_manager *tm, 786 dm_block_t nr_blocks, 787 dm_block_t superblock) 788 { 789 int r; 790 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 791 792 smm->begin = superblock + 1; 793 smm->recursion_count = 0; 794 smm->allocated_this_transaction = 0; 795 brb_init(&smm->uncommitted); 796 threshold_init(&smm->threshold); 797 798 memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm)); 799 800 r = sm_ll_new_metadata(&smm->ll, tm); 801 if (!r) { 802 if (nr_blocks > DM_SM_METADATA_MAX_BLOCKS) 803 nr_blocks = DM_SM_METADATA_MAX_BLOCKS; 804 r = sm_ll_extend(&smm->ll, nr_blocks); 805 } 806 memcpy(&smm->sm, &ops, sizeof(smm->sm)); 807 if (r) 808 return r; 809 810 /* 811 * Now we need to update the newly created data structures with the 812 * allocated blocks that they were built from. 813 */ 814 r = add_bop(smm, BOP_INC, superblock, smm->begin); 815 if (r) 816 return r; 817 818 r = apply_bops(smm); 819 if (r) { 820 DMERR("%s: apply_bops failed", __func__); 821 return r; 822 } 823 824 return sm_metadata_commit(sm); 825 } 826 827 int dm_sm_metadata_open(struct dm_space_map *sm, 828 struct dm_transaction_manager *tm, 829 void *root_le, size_t len) 830 { 831 int r; 832 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 833 834 r = sm_ll_open_metadata(&smm->ll, tm, root_le, len); 835 if (r) 836 return r; 837 838 smm->begin = 0; 839 smm->recursion_count = 0; 840 smm->allocated_this_transaction = 0; 841 brb_init(&smm->uncommitted); 842 threshold_init(&smm->threshold); 843 844 memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll)); 845 return 0; 846 } 847