1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * localalloc.c 4 * 5 * Node local data allocation 6 * 7 * Copyright (C) 2002, 2004 Oracle. All rights reserved. 8 */ 9 10 #include <linux/fs.h> 11 #include <linux/types.h> 12 #include <linux/slab.h> 13 #include <linux/highmem.h> 14 #include <linux/bitops.h> 15 16 #include <cluster/masklog.h> 17 18 #include "ocfs2.h" 19 20 #include "alloc.h" 21 #include "blockcheck.h" 22 #include "dlmglue.h" 23 #include "inode.h" 24 #include "journal.h" 25 #include "localalloc.h" 26 #include "suballoc.h" 27 #include "super.h" 28 #include "sysfile.h" 29 #include "ocfs2_trace.h" 30 31 #include "buffer_head_io.h" 32 33 #define OCFS2_LOCAL_ALLOC(dinode) (&((dinode)->id2.i_lab)) 34 35 static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc); 36 37 static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, 38 struct ocfs2_dinode *alloc, 39 u32 *numbits, 40 struct ocfs2_alloc_reservation *resv); 41 42 static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc); 43 44 static int ocfs2_sync_local_to_main(struct ocfs2_super *osb, 45 handle_t *handle, 46 struct ocfs2_dinode *alloc, 47 struct inode *main_bm_inode, 48 struct buffer_head *main_bm_bh); 49 50 static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb, 51 struct ocfs2_alloc_context **ac, 52 struct inode **bitmap_inode, 53 struct buffer_head **bitmap_bh); 54 55 static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb, 56 handle_t *handle, 57 struct ocfs2_alloc_context *ac); 58 59 static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, 60 struct inode *local_alloc_inode); 61 62 /* 63 * ocfs2_la_default_mb() - determine a default size, in megabytes of 64 * the local alloc. 65 * 66 * Generally, we'd like to pick as large a local alloc as 67 * possible. Performance on large workloads tends to scale 68 * proportionally to la size. In addition to that, the reservations 69 * code functions more efficiently as it can reserve more windows for 70 * write. 71 * 72 * Some things work against us when trying to choose a large local alloc: 73 * 74 * - We need to ensure our sizing is picked to leave enough space in 75 * group descriptors for other allocations (such as block groups, 76 * etc). Picking default sizes which are a multiple of 4 could help 77 * - block groups are allocated in 2mb and 4mb chunks. 78 * 79 * - Likewise, we don't want to starve other nodes of bits on small 80 * file systems. This can easily be taken care of by limiting our 81 * default to a reasonable size (256M) on larger cluster sizes. 82 * 83 * - Some file systems can't support very large sizes - 4k and 8k in 84 * particular are limited to less than 128 and 256 megabytes respectively. 85 * 86 * The following reference table shows group descriptor and local 87 * alloc maximums at various cluster sizes (4k blocksize) 88 * 89 * csize: 4K group: 126M la: 121M 90 * csize: 8K group: 252M la: 243M 91 * csize: 16K group: 504M la: 486M 92 * csize: 32K group: 1008M la: 972M 93 * csize: 64K group: 2016M la: 1944M 94 * csize: 128K group: 4032M la: 3888M 95 * csize: 256K group: 8064M la: 7776M 96 * csize: 512K group: 16128M la: 15552M 97 * csize: 1024K group: 32256M la: 31104M 98 */ 99 #define OCFS2_LA_MAX_DEFAULT_MB 256 100 #define OCFS2_LA_OLD_DEFAULT 8 101 unsigned int ocfs2_la_default_mb(struct ocfs2_super *osb) 102 { 103 unsigned int la_mb; 104 unsigned int gd_mb; 105 unsigned int la_max_mb; 106 unsigned int megs_per_slot; 107 struct super_block *sb = osb->sb; 108 109 gd_mb = ocfs2_clusters_to_megabytes(osb->sb, 110 8 * ocfs2_group_bitmap_size(sb, 0, osb->s_feature_incompat)); 111 112 /* 113 * This takes care of files systems with very small group 114 * descriptors - 512 byte blocksize at cluster sizes lower 115 * than 16K and also 1k blocksize with 4k cluster size. 116 */ 117 if ((sb->s_blocksize == 512 && osb->s_clustersize <= 8192) 118 || (sb->s_blocksize == 1024 && osb->s_clustersize == 4096)) 119 return OCFS2_LA_OLD_DEFAULT; 120 121 /* 122 * Leave enough room for some block groups and make the final 123 * value we work from a multiple of 4. 124 */ 125 gd_mb -= 16; 126 gd_mb &= 0xFFFFFFFB; 127 128 la_mb = gd_mb; 129 130 /* 131 * Keep window sizes down to a reasonable default 132 */ 133 if (la_mb > OCFS2_LA_MAX_DEFAULT_MB) { 134 /* 135 * Some clustersize / blocksize combinations will have 136 * given us a larger than OCFS2_LA_MAX_DEFAULT_MB 137 * default size, but get poor distribution when 138 * limited to exactly 256 megabytes. 139 * 140 * As an example, 16K clustersize at 4K blocksize 141 * gives us a cluster group size of 504M. Paring the 142 * local alloc size down to 256 however, would give us 143 * only one window and around 200MB left in the 144 * cluster group. Instead, find the first size below 145 * 256 which would give us an even distribution. 146 * 147 * Larger cluster group sizes actually work out pretty 148 * well when pared to 256, so we don't have to do this 149 * for any group that fits more than two 150 * OCFS2_LA_MAX_DEFAULT_MB windows. 151 */ 152 if (gd_mb > (2 * OCFS2_LA_MAX_DEFAULT_MB)) 153 la_mb = 256; 154 else { 155 unsigned int gd_mult = gd_mb; 156 157 while (gd_mult > 256) 158 gd_mult = gd_mult >> 1; 159 160 la_mb = gd_mult; 161 } 162 } 163 164 megs_per_slot = osb->osb_clusters_at_boot / osb->max_slots; 165 megs_per_slot = ocfs2_clusters_to_megabytes(osb->sb, megs_per_slot); 166 /* Too many nodes, too few disk clusters. */ 167 if (megs_per_slot < la_mb) 168 la_mb = megs_per_slot; 169 170 /* We can't store more bits than we can in a block. */ 171 la_max_mb = ocfs2_clusters_to_megabytes(osb->sb, 172 ocfs2_local_alloc_size(sb) * 8); 173 if (la_mb > la_max_mb) 174 la_mb = la_max_mb; 175 176 return la_mb; 177 } 178 179 void ocfs2_la_set_sizes(struct ocfs2_super *osb, int requested_mb) 180 { 181 struct super_block *sb = osb->sb; 182 unsigned int la_default_mb = ocfs2_la_default_mb(osb); 183 unsigned int la_max_mb; 184 185 la_max_mb = ocfs2_clusters_to_megabytes(sb, 186 ocfs2_local_alloc_size(sb) * 8); 187 188 trace_ocfs2_la_set_sizes(requested_mb, la_max_mb, la_default_mb); 189 190 if (requested_mb == -1) { 191 /* No user request - use defaults */ 192 osb->local_alloc_default_bits = 193 ocfs2_megabytes_to_clusters(sb, la_default_mb); 194 } else if (requested_mb > la_max_mb) { 195 /* Request is too big, we give the maximum available */ 196 osb->local_alloc_default_bits = 197 ocfs2_megabytes_to_clusters(sb, la_max_mb); 198 } else { 199 osb->local_alloc_default_bits = 200 ocfs2_megabytes_to_clusters(sb, requested_mb); 201 } 202 203 osb->local_alloc_bits = osb->local_alloc_default_bits; 204 } 205 206 static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb) 207 { 208 return (osb->local_alloc_state == OCFS2_LA_THROTTLED || 209 osb->local_alloc_state == OCFS2_LA_ENABLED); 210 } 211 212 void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb, 213 unsigned int num_clusters) 214 { 215 spin_lock(&osb->osb_lock); 216 if (osb->local_alloc_state == OCFS2_LA_DISABLED || 217 osb->local_alloc_state == OCFS2_LA_THROTTLED) 218 if (num_clusters >= osb->local_alloc_default_bits) { 219 cancel_delayed_work(&osb->la_enable_wq); 220 osb->local_alloc_state = OCFS2_LA_ENABLED; 221 } 222 spin_unlock(&osb->osb_lock); 223 } 224 225 void ocfs2_la_enable_worker(struct work_struct *work) 226 { 227 struct ocfs2_super *osb = 228 container_of(work, struct ocfs2_super, 229 la_enable_wq.work); 230 spin_lock(&osb->osb_lock); 231 osb->local_alloc_state = OCFS2_LA_ENABLED; 232 spin_unlock(&osb->osb_lock); 233 } 234 235 /* 236 * Tell us whether a given allocation should use the local alloc 237 * file. Otherwise, it has to go to the main bitmap. 238 * 239 * This function does semi-dirty reads of local alloc size and state! 240 * This is ok however, as the values are re-checked once under mutex. 241 */ 242 int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits) 243 { 244 int ret = 0; 245 int la_bits; 246 247 spin_lock(&osb->osb_lock); 248 la_bits = osb->local_alloc_bits; 249 250 if (!ocfs2_la_state_enabled(osb)) 251 goto bail; 252 253 /* la_bits should be at least twice the size (in clusters) of 254 * a new block group. We want to be sure block group 255 * allocations go through the local alloc, so allow an 256 * allocation to take up to half the bitmap. */ 257 if (bits > (la_bits / 2)) 258 goto bail; 259 260 ret = 1; 261 bail: 262 trace_ocfs2_alloc_should_use_local( 263 (unsigned long long)bits, osb->local_alloc_state, la_bits, ret); 264 spin_unlock(&osb->osb_lock); 265 return ret; 266 } 267 268 int ocfs2_load_local_alloc(struct ocfs2_super *osb) 269 { 270 int status = 0; 271 struct ocfs2_dinode *alloc = NULL; 272 struct buffer_head *alloc_bh = NULL; 273 u32 num_used; 274 struct inode *inode = NULL; 275 struct ocfs2_local_alloc *la; 276 277 if (osb->local_alloc_bits == 0) 278 goto bail; 279 280 if (osb->local_alloc_bits >= osb->bitmap_cpg) { 281 mlog(ML_NOTICE, "Requested local alloc window %d is larger " 282 "than max possible %u. Using defaults.\n", 283 osb->local_alloc_bits, (osb->bitmap_cpg - 1)); 284 osb->local_alloc_bits = 285 ocfs2_megabytes_to_clusters(osb->sb, 286 ocfs2_la_default_mb(osb)); 287 } 288 289 /* read the alloc off disk */ 290 inode = ocfs2_get_system_file_inode(osb, LOCAL_ALLOC_SYSTEM_INODE, 291 osb->slot_num); 292 if (!inode) { 293 status = -EINVAL; 294 mlog_errno(status); 295 goto bail; 296 } 297 298 status = ocfs2_read_inode_block_full(inode, &alloc_bh, 299 OCFS2_BH_IGNORE_CACHE); 300 if (status < 0) { 301 mlog_errno(status); 302 goto bail; 303 } 304 305 alloc = (struct ocfs2_dinode *) alloc_bh->b_data; 306 la = OCFS2_LOCAL_ALLOC(alloc); 307 308 if (!(le32_to_cpu(alloc->i_flags) & 309 (OCFS2_LOCAL_ALLOC_FL|OCFS2_BITMAP_FL))) { 310 mlog(ML_ERROR, "Invalid local alloc inode, %llu\n", 311 (unsigned long long)OCFS2_I(inode)->ip_blkno); 312 status = -EINVAL; 313 goto bail; 314 } 315 316 if ((la->la_size == 0) || 317 (le16_to_cpu(la->la_size) > ocfs2_local_alloc_size(inode->i_sb))) { 318 mlog(ML_ERROR, "Local alloc size is invalid (la_size = %u)\n", 319 le16_to_cpu(la->la_size)); 320 status = -EINVAL; 321 goto bail; 322 } 323 324 /* do a little verification. */ 325 num_used = ocfs2_local_alloc_count_bits(alloc); 326 327 /* hopefully the local alloc has always been recovered before 328 * we load it. */ 329 if (num_used 330 || alloc->id1.bitmap1.i_used 331 || alloc->id1.bitmap1.i_total 332 || la->la_bm_off) { 333 mlog(ML_ERROR, "inconsistent detected, clean journal with" 334 " unrecovered local alloc, please run fsck.ocfs2!\n" 335 "found = %u, set = %u, taken = %u, off = %u\n", 336 num_used, le32_to_cpu(alloc->id1.bitmap1.i_used), 337 le32_to_cpu(alloc->id1.bitmap1.i_total), 338 OCFS2_LOCAL_ALLOC(alloc)->la_bm_off); 339 340 status = -EINVAL; 341 goto bail; 342 } 343 344 osb->local_alloc_bh = alloc_bh; 345 osb->local_alloc_state = OCFS2_LA_ENABLED; 346 347 bail: 348 if (status < 0) 349 brelse(alloc_bh); 350 iput(inode); 351 352 trace_ocfs2_load_local_alloc(osb->local_alloc_bits); 353 354 if (status) 355 mlog_errno(status); 356 return status; 357 } 358 359 /* 360 * return any unused bits to the bitmap and write out a clean 361 * local_alloc. 362 * 363 * local_alloc_bh is optional. If not passed, we will simply use the 364 * one off osb. If you do pass it however, be warned that it *will* be 365 * returned brelse'd and NULL'd out.*/ 366 void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb) 367 { 368 int status; 369 handle_t *handle; 370 struct inode *local_alloc_inode = NULL; 371 struct buffer_head *bh = NULL; 372 struct buffer_head *main_bm_bh = NULL; 373 struct inode *main_bm_inode = NULL; 374 struct ocfs2_dinode *alloc_copy = NULL; 375 struct ocfs2_dinode *alloc = NULL; 376 377 cancel_delayed_work(&osb->la_enable_wq); 378 if (osb->ocfs2_wq) 379 flush_workqueue(osb->ocfs2_wq); 380 381 if (osb->local_alloc_state == OCFS2_LA_UNUSED) 382 goto out; 383 384 local_alloc_inode = 385 ocfs2_get_system_file_inode(osb, 386 LOCAL_ALLOC_SYSTEM_INODE, 387 osb->slot_num); 388 if (!local_alloc_inode) { 389 status = -ENOENT; 390 mlog_errno(status); 391 goto out; 392 } 393 394 osb->local_alloc_state = OCFS2_LA_DISABLED; 395 396 ocfs2_resmap_uninit(&osb->osb_la_resmap); 397 398 main_bm_inode = ocfs2_get_system_file_inode(osb, 399 GLOBAL_BITMAP_SYSTEM_INODE, 400 OCFS2_INVALID_SLOT); 401 if (!main_bm_inode) { 402 status = -EINVAL; 403 mlog_errno(status); 404 goto out; 405 } 406 407 inode_lock(main_bm_inode); 408 409 status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1); 410 if (status < 0) { 411 mlog_errno(status); 412 goto out_mutex; 413 } 414 415 /* WINDOW_MOVE_CREDITS is a bit heavy... */ 416 handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS); 417 if (IS_ERR(handle)) { 418 mlog_errno(PTR_ERR(handle)); 419 handle = NULL; 420 goto out_unlock; 421 } 422 423 bh = osb->local_alloc_bh; 424 alloc = (struct ocfs2_dinode *) bh->b_data; 425 426 alloc_copy = kmemdup(alloc, bh->b_size, GFP_NOFS); 427 if (!alloc_copy) { 428 status = -ENOMEM; 429 goto out_commit; 430 } 431 432 status = ocfs2_journal_access_di(handle, INODE_CACHE(local_alloc_inode), 433 bh, OCFS2_JOURNAL_ACCESS_WRITE); 434 if (status < 0) { 435 mlog_errno(status); 436 goto out_commit; 437 } 438 439 ocfs2_clear_local_alloc(alloc); 440 ocfs2_journal_dirty(handle, bh); 441 442 brelse(bh); 443 osb->local_alloc_bh = NULL; 444 osb->local_alloc_state = OCFS2_LA_UNUSED; 445 446 status = ocfs2_sync_local_to_main(osb, handle, alloc_copy, 447 main_bm_inode, main_bm_bh); 448 if (status < 0) 449 mlog_errno(status); 450 451 out_commit: 452 ocfs2_commit_trans(osb, handle); 453 454 out_unlock: 455 brelse(main_bm_bh); 456 457 ocfs2_inode_unlock(main_bm_inode, 1); 458 459 out_mutex: 460 inode_unlock(main_bm_inode); 461 iput(main_bm_inode); 462 463 out: 464 iput(local_alloc_inode); 465 466 kfree(alloc_copy); 467 } 468 469 /* 470 * We want to free the bitmap bits outside of any recovery context as 471 * we'll need a cluster lock to do so, but we must clear the local 472 * alloc before giving up the recovered nodes journal. To solve this, 473 * we kmalloc a copy of the local alloc before it's change for the 474 * caller to process with ocfs2_complete_local_alloc_recovery 475 */ 476 int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb, 477 int slot_num, 478 struct ocfs2_dinode **alloc_copy) 479 { 480 int status = 0; 481 struct buffer_head *alloc_bh = NULL; 482 struct inode *inode = NULL; 483 struct ocfs2_dinode *alloc; 484 485 trace_ocfs2_begin_local_alloc_recovery(slot_num); 486 487 *alloc_copy = NULL; 488 489 inode = ocfs2_get_system_file_inode(osb, 490 LOCAL_ALLOC_SYSTEM_INODE, 491 slot_num); 492 if (!inode) { 493 status = -EINVAL; 494 mlog_errno(status); 495 goto bail; 496 } 497 498 inode_lock(inode); 499 500 status = ocfs2_read_inode_block_full(inode, &alloc_bh, 501 OCFS2_BH_IGNORE_CACHE); 502 if (status < 0) { 503 mlog_errno(status); 504 goto bail; 505 } 506 507 *alloc_copy = kmalloc(alloc_bh->b_size, GFP_KERNEL); 508 if (!(*alloc_copy)) { 509 status = -ENOMEM; 510 goto bail; 511 } 512 memcpy((*alloc_copy), alloc_bh->b_data, alloc_bh->b_size); 513 514 alloc = (struct ocfs2_dinode *) alloc_bh->b_data; 515 ocfs2_clear_local_alloc(alloc); 516 517 ocfs2_compute_meta_ecc(osb->sb, alloc_bh->b_data, &alloc->i_check); 518 status = ocfs2_write_block(osb, alloc_bh, INODE_CACHE(inode)); 519 if (status < 0) 520 mlog_errno(status); 521 522 bail: 523 if (status < 0) { 524 kfree(*alloc_copy); 525 *alloc_copy = NULL; 526 } 527 528 brelse(alloc_bh); 529 530 if (inode) { 531 inode_unlock(inode); 532 iput(inode); 533 } 534 535 if (status) 536 mlog_errno(status); 537 return status; 538 } 539 540 /* 541 * Step 2: By now, we've completed the journal recovery, we've stamped 542 * a clean local alloc on disk and dropped the node out of the 543 * recovery map. Dlm locks will no longer stall, so lets clear out the 544 * main bitmap. 545 */ 546 int ocfs2_complete_local_alloc_recovery(struct ocfs2_super *osb, 547 struct ocfs2_dinode *alloc) 548 { 549 int status; 550 handle_t *handle; 551 struct buffer_head *main_bm_bh = NULL; 552 struct inode *main_bm_inode; 553 554 main_bm_inode = ocfs2_get_system_file_inode(osb, 555 GLOBAL_BITMAP_SYSTEM_INODE, 556 OCFS2_INVALID_SLOT); 557 if (!main_bm_inode) { 558 status = -EINVAL; 559 mlog_errno(status); 560 goto out; 561 } 562 563 inode_lock(main_bm_inode); 564 565 status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1); 566 if (status < 0) { 567 mlog_errno(status); 568 goto out_mutex; 569 } 570 571 handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS); 572 if (IS_ERR(handle)) { 573 status = PTR_ERR(handle); 574 handle = NULL; 575 mlog_errno(status); 576 goto out_unlock; 577 } 578 579 /* we want the bitmap change to be recorded on disk asap */ 580 handle->h_sync = 1; 581 582 status = ocfs2_sync_local_to_main(osb, handle, alloc, 583 main_bm_inode, main_bm_bh); 584 if (status < 0) 585 mlog_errno(status); 586 587 ocfs2_commit_trans(osb, handle); 588 589 out_unlock: 590 ocfs2_inode_unlock(main_bm_inode, 1); 591 592 out_mutex: 593 inode_unlock(main_bm_inode); 594 595 brelse(main_bm_bh); 596 597 iput(main_bm_inode); 598 599 out: 600 if (!status) 601 ocfs2_init_steal_slots(osb); 602 if (status) 603 mlog_errno(status); 604 return status; 605 } 606 607 /* 608 * make sure we've got at least bits_wanted contiguous bits in the 609 * local alloc. You lose them when you drop i_rwsem. 610 * 611 * We will add ourselves to the transaction passed in, but may start 612 * our own in order to shift windows. 613 */ 614 int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb, 615 u32 bits_wanted, 616 struct ocfs2_alloc_context *ac) 617 { 618 int status; 619 struct ocfs2_dinode *alloc; 620 struct inode *local_alloc_inode; 621 unsigned int free_bits; 622 623 BUG_ON(!ac); 624 625 local_alloc_inode = 626 ocfs2_get_system_file_inode(osb, 627 LOCAL_ALLOC_SYSTEM_INODE, 628 osb->slot_num); 629 if (!local_alloc_inode) { 630 status = -ENOENT; 631 mlog_errno(status); 632 goto bail; 633 } 634 635 inode_lock(local_alloc_inode); 636 637 /* 638 * We must double check state and allocator bits because 639 * another process may have changed them while holding i_rwsem. 640 */ 641 spin_lock(&osb->osb_lock); 642 if (!ocfs2_la_state_enabled(osb) || 643 (bits_wanted > osb->local_alloc_bits)) { 644 spin_unlock(&osb->osb_lock); 645 status = -ENOSPC; 646 goto bail; 647 } 648 spin_unlock(&osb->osb_lock); 649 650 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 651 652 #ifdef CONFIG_OCFS2_DEBUG_FS 653 if (le32_to_cpu(alloc->id1.bitmap1.i_used) != 654 ocfs2_local_alloc_count_bits(alloc)) { 655 status = ocfs2_error(osb->sb, "local alloc inode %llu says it has %u used bits, but a count shows %u\n", 656 (unsigned long long)le64_to_cpu(alloc->i_blkno), 657 le32_to_cpu(alloc->id1.bitmap1.i_used), 658 ocfs2_local_alloc_count_bits(alloc)); 659 goto bail; 660 } 661 #endif 662 663 free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) - 664 le32_to_cpu(alloc->id1.bitmap1.i_used); 665 if (bits_wanted > free_bits) { 666 /* uhoh, window change time. */ 667 status = 668 ocfs2_local_alloc_slide_window(osb, local_alloc_inode); 669 if (status < 0) { 670 if (status != -ENOSPC) 671 mlog_errno(status); 672 goto bail; 673 } 674 675 /* 676 * Under certain conditions, the window slide code 677 * might have reduced the number of bits available or 678 * disabled the local alloc entirely. Re-check 679 * here and return -ENOSPC if necessary. 680 */ 681 status = -ENOSPC; 682 if (!ocfs2_la_state_enabled(osb)) 683 goto bail; 684 685 free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) - 686 le32_to_cpu(alloc->id1.bitmap1.i_used); 687 if (bits_wanted > free_bits) 688 goto bail; 689 } 690 691 ac->ac_inode = local_alloc_inode; 692 /* We should never use localalloc from another slot */ 693 ac->ac_alloc_slot = osb->slot_num; 694 ac->ac_which = OCFS2_AC_USE_LOCAL; 695 get_bh(osb->local_alloc_bh); 696 ac->ac_bh = osb->local_alloc_bh; 697 status = 0; 698 bail: 699 if (status < 0 && local_alloc_inode) { 700 inode_unlock(local_alloc_inode); 701 iput(local_alloc_inode); 702 } 703 704 trace_ocfs2_reserve_local_alloc_bits( 705 (unsigned long long)ac->ac_max_block, 706 bits_wanted, osb->slot_num, status); 707 708 if (status) 709 mlog_errno(status); 710 return status; 711 } 712 713 int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb, 714 handle_t *handle, 715 struct ocfs2_alloc_context *ac, 716 u32 bits_wanted, 717 u32 *bit_off, 718 u32 *num_bits) 719 { 720 int status, start; 721 struct inode *local_alloc_inode; 722 void *bitmap; 723 struct ocfs2_dinode *alloc; 724 struct ocfs2_local_alloc *la; 725 726 BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL); 727 728 local_alloc_inode = ac->ac_inode; 729 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 730 la = OCFS2_LOCAL_ALLOC(alloc); 731 732 start = ocfs2_local_alloc_find_clear_bits(osb, alloc, &bits_wanted, 733 ac->ac_resv); 734 if (start == -1) { 735 /* TODO: Shouldn't we just BUG here? */ 736 status = -ENOSPC; 737 mlog_errno(status); 738 goto bail; 739 } 740 741 bitmap = la->la_bitmap; 742 *bit_off = le32_to_cpu(la->la_bm_off) + start; 743 *num_bits = bits_wanted; 744 745 status = ocfs2_journal_access_di(handle, 746 INODE_CACHE(local_alloc_inode), 747 osb->local_alloc_bh, 748 OCFS2_JOURNAL_ACCESS_WRITE); 749 if (status < 0) { 750 mlog_errno(status); 751 goto bail; 752 } 753 754 ocfs2_resmap_claimed_bits(&osb->osb_la_resmap, ac->ac_resv, start, 755 bits_wanted); 756 757 while(bits_wanted--) 758 ocfs2_set_bit(start++, bitmap); 759 760 le32_add_cpu(&alloc->id1.bitmap1.i_used, *num_bits); 761 ocfs2_journal_dirty(handle, osb->local_alloc_bh); 762 763 bail: 764 if (status) 765 mlog_errno(status); 766 return status; 767 } 768 769 int ocfs2_free_local_alloc_bits(struct ocfs2_super *osb, 770 handle_t *handle, 771 struct ocfs2_alloc_context *ac, 772 u32 bit_off, 773 u32 num_bits) 774 { 775 int status, start; 776 u32 clear_bits; 777 struct inode *local_alloc_inode; 778 void *bitmap; 779 struct ocfs2_dinode *alloc; 780 struct ocfs2_local_alloc *la; 781 782 BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL); 783 784 local_alloc_inode = ac->ac_inode; 785 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 786 la = OCFS2_LOCAL_ALLOC(alloc); 787 788 bitmap = la->la_bitmap; 789 start = bit_off - le32_to_cpu(la->la_bm_off); 790 clear_bits = num_bits; 791 792 status = ocfs2_journal_access_di(handle, 793 INODE_CACHE(local_alloc_inode), 794 osb->local_alloc_bh, 795 OCFS2_JOURNAL_ACCESS_WRITE); 796 if (status < 0) { 797 mlog_errno(status); 798 goto bail; 799 } 800 801 while (clear_bits--) 802 ocfs2_clear_bit(start++, bitmap); 803 804 le32_add_cpu(&alloc->id1.bitmap1.i_used, -num_bits); 805 ocfs2_journal_dirty(handle, osb->local_alloc_bh); 806 807 bail: 808 return status; 809 } 810 811 static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc) 812 { 813 u32 count; 814 struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc); 815 816 count = memweight(la->la_bitmap, le16_to_cpu(la->la_size)); 817 818 trace_ocfs2_local_alloc_count_bits(count); 819 return count; 820 } 821 822 static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, 823 struct ocfs2_dinode *alloc, 824 u32 *numbits, 825 struct ocfs2_alloc_reservation *resv) 826 { 827 int numfound = 0, bitoff, left, startoff; 828 int local_resv = 0; 829 struct ocfs2_alloc_reservation r; 830 void *bitmap = NULL; 831 struct ocfs2_reservation_map *resmap = &osb->osb_la_resmap; 832 833 if (!alloc->id1.bitmap1.i_total) { 834 bitoff = -1; 835 goto bail; 836 } 837 838 if (!resv) { 839 local_resv = 1; 840 ocfs2_resv_init_once(&r); 841 ocfs2_resv_set_type(&r, OCFS2_RESV_FLAG_TMP); 842 resv = &r; 843 } 844 845 numfound = *numbits; 846 if (ocfs2_resmap_resv_bits(resmap, resv, &bitoff, &numfound) == 0) { 847 if (numfound < *numbits) 848 *numbits = numfound; 849 goto bail; 850 } 851 852 /* 853 * Code error. While reservations are enabled, local 854 * allocation should _always_ go through them. 855 */ 856 BUG_ON(osb->osb_resv_level != 0); 857 858 /* 859 * Reservations are disabled. Handle this the old way. 860 */ 861 862 bitmap = OCFS2_LOCAL_ALLOC(alloc)->la_bitmap; 863 864 numfound = bitoff = startoff = 0; 865 left = le32_to_cpu(alloc->id1.bitmap1.i_total); 866 while ((bitoff = ocfs2_find_next_zero_bit(bitmap, left, startoff)) != -1) { 867 if (bitoff == left) { 868 /* mlog(0, "bitoff (%d) == left", bitoff); */ 869 break; 870 } 871 /* mlog(0, "Found a zero: bitoff = %d, startoff = %d, " 872 "numfound = %d\n", bitoff, startoff, numfound);*/ 873 874 /* Ok, we found a zero bit... is it contig. or do we 875 * start over?*/ 876 if (bitoff == startoff) { 877 /* we found a zero */ 878 numfound++; 879 startoff++; 880 } else { 881 /* got a zero after some ones */ 882 numfound = 1; 883 startoff = bitoff+1; 884 } 885 /* we got everything we needed */ 886 if (numfound == *numbits) { 887 /* mlog(0, "Found it all!\n"); */ 888 break; 889 } 890 } 891 892 trace_ocfs2_local_alloc_find_clear_bits_search_bitmap(bitoff, numfound); 893 894 if (numfound == *numbits) 895 bitoff = startoff - numfound; 896 else 897 bitoff = -1; 898 899 bail: 900 if (local_resv) 901 ocfs2_resv_discard(resmap, resv); 902 903 trace_ocfs2_local_alloc_find_clear_bits(*numbits, 904 le32_to_cpu(alloc->id1.bitmap1.i_total), 905 bitoff, numfound); 906 907 return bitoff; 908 } 909 910 static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc) 911 { 912 struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc); 913 int i; 914 915 alloc->id1.bitmap1.i_total = 0; 916 alloc->id1.bitmap1.i_used = 0; 917 la->la_bm_off = 0; 918 for(i = 0; i < le16_to_cpu(la->la_size); i++) 919 la->la_bitmap[i] = 0; 920 } 921 922 #if 0 923 /* turn this on and uncomment below to aid debugging window shifts. */ 924 static void ocfs2_verify_zero_bits(unsigned long *bitmap, 925 unsigned int start, 926 unsigned int count) 927 { 928 unsigned int tmp = count; 929 while(tmp--) { 930 if (ocfs2_test_bit(start + tmp, bitmap)) { 931 printk("ocfs2_verify_zero_bits: start = %u, count = " 932 "%u\n", start, count); 933 printk("ocfs2_verify_zero_bits: bit %u is set!", 934 start + tmp); 935 BUG(); 936 } 937 } 938 } 939 #endif 940 941 /* 942 * sync the local alloc to main bitmap. 943 * 944 * assumes you've already locked the main bitmap -- the bitmap inode 945 * passed is used for caching. 946 */ 947 static int ocfs2_sync_local_to_main(struct ocfs2_super *osb, 948 handle_t *handle, 949 struct ocfs2_dinode *alloc, 950 struct inode *main_bm_inode, 951 struct buffer_head *main_bm_bh) 952 { 953 int status = 0; 954 int bit_off, left, count, start; 955 u64 la_start_blk; 956 u64 blkno; 957 void *bitmap; 958 struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc); 959 960 trace_ocfs2_sync_local_to_main( 961 le32_to_cpu(alloc->id1.bitmap1.i_total), 962 le32_to_cpu(alloc->id1.bitmap1.i_used)); 963 964 if (!alloc->id1.bitmap1.i_total) { 965 goto bail; 966 } 967 968 if (le32_to_cpu(alloc->id1.bitmap1.i_used) == 969 le32_to_cpu(alloc->id1.bitmap1.i_total)) { 970 goto bail; 971 } 972 973 la_start_blk = ocfs2_clusters_to_blocks(osb->sb, 974 le32_to_cpu(la->la_bm_off)); 975 bitmap = la->la_bitmap; 976 start = count = 0; 977 left = le32_to_cpu(alloc->id1.bitmap1.i_total); 978 979 while ((bit_off = ocfs2_find_next_zero_bit(bitmap, left, start)) 980 != -1) { 981 if ((bit_off < left) && (bit_off == start)) { 982 count++; 983 start++; 984 continue; 985 } 986 if (count) { 987 blkno = la_start_blk + 988 ocfs2_clusters_to_blocks(osb->sb, 989 start - count); 990 991 trace_ocfs2_sync_local_to_main_free( 992 count, start - count, 993 (unsigned long long)la_start_blk, 994 (unsigned long long)blkno); 995 996 status = ocfs2_release_clusters(handle, 997 main_bm_inode, 998 main_bm_bh, blkno, 999 count); 1000 if (status < 0) { 1001 mlog_errno(status); 1002 goto bail; 1003 } 1004 } 1005 if (bit_off >= left) 1006 break; 1007 count = 1; 1008 start = bit_off + 1; 1009 } 1010 1011 /* clear the contiguous bits until the end boundary */ 1012 if (count) { 1013 blkno = la_start_blk + 1014 ocfs2_clusters_to_blocks(osb->sb, 1015 start - count); 1016 1017 trace_ocfs2_sync_local_to_main_free( 1018 count, start - count, 1019 (unsigned long long)la_start_blk, 1020 (unsigned long long)blkno); 1021 1022 status = ocfs2_release_clusters(handle, 1023 main_bm_inode, 1024 main_bm_bh, blkno, 1025 count); 1026 if (status < 0) 1027 mlog_errno(status); 1028 } 1029 1030 bail: 1031 if (status) 1032 mlog_errno(status); 1033 return status; 1034 } 1035 1036 enum ocfs2_la_event { 1037 OCFS2_LA_EVENT_SLIDE, /* Normal window slide. */ 1038 OCFS2_LA_EVENT_FRAGMENTED, /* The global bitmap has 1039 * enough bits theoretically 1040 * free, but a contiguous 1041 * allocation could not be 1042 * found. */ 1043 OCFS2_LA_EVENT_ENOSPC, /* Global bitmap doesn't have 1044 * enough bits free to satisfy 1045 * our request. */ 1046 }; 1047 #define OCFS2_LA_ENABLE_INTERVAL (30 * HZ) 1048 /* 1049 * Given an event, calculate the size of our next local alloc window. 1050 * 1051 * This should always be called under i_rwsem of the local alloc inode 1052 * so that local alloc disabling doesn't race with processes trying to 1053 * use the allocator. 1054 * 1055 * Returns the state which the local alloc was left in. This value can 1056 * be ignored by some paths. 1057 */ 1058 static int ocfs2_recalc_la_window(struct ocfs2_super *osb, 1059 enum ocfs2_la_event event) 1060 { 1061 unsigned int bits; 1062 int state; 1063 1064 spin_lock(&osb->osb_lock); 1065 if (osb->local_alloc_state == OCFS2_LA_DISABLED) { 1066 WARN_ON_ONCE(osb->local_alloc_state == OCFS2_LA_DISABLED); 1067 goto out_unlock; 1068 } 1069 1070 /* 1071 * ENOSPC and fragmentation are treated similarly for now. 1072 */ 1073 if (event == OCFS2_LA_EVENT_ENOSPC || 1074 event == OCFS2_LA_EVENT_FRAGMENTED) { 1075 /* 1076 * We ran out of contiguous space in the primary 1077 * bitmap. Drastically reduce the number of bits used 1078 * by local alloc until we have to disable it. 1079 */ 1080 bits = osb->local_alloc_bits >> 1; 1081 if (bits > ocfs2_megabytes_to_clusters(osb->sb, 1)) { 1082 /* 1083 * By setting state to THROTTLED, we'll keep 1084 * the number of local alloc bits used down 1085 * until an event occurs which would give us 1086 * reason to assume the bitmap situation might 1087 * have changed. 1088 */ 1089 osb->local_alloc_state = OCFS2_LA_THROTTLED; 1090 osb->local_alloc_bits = bits; 1091 } else { 1092 osb->local_alloc_state = OCFS2_LA_DISABLED; 1093 } 1094 queue_delayed_work(osb->ocfs2_wq, &osb->la_enable_wq, 1095 OCFS2_LA_ENABLE_INTERVAL); 1096 goto out_unlock; 1097 } 1098 1099 /* 1100 * Don't increase the size of the local alloc window until we 1101 * know we might be able to fulfill the request. Otherwise, we 1102 * risk bouncing around the global bitmap during periods of 1103 * low space. 1104 */ 1105 if (osb->local_alloc_state != OCFS2_LA_THROTTLED) 1106 osb->local_alloc_bits = osb->local_alloc_default_bits; 1107 1108 out_unlock: 1109 state = osb->local_alloc_state; 1110 spin_unlock(&osb->osb_lock); 1111 1112 return state; 1113 } 1114 1115 static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb, 1116 struct ocfs2_alloc_context **ac, 1117 struct inode **bitmap_inode, 1118 struct buffer_head **bitmap_bh) 1119 { 1120 int status; 1121 1122 *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL); 1123 if (!(*ac)) { 1124 status = -ENOMEM; 1125 mlog_errno(status); 1126 goto bail; 1127 } 1128 1129 retry_enospc: 1130 (*ac)->ac_bits_wanted = osb->local_alloc_bits; 1131 status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac); 1132 if (status == -ENOSPC) { 1133 if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_ENOSPC) == 1134 OCFS2_LA_DISABLED) 1135 goto bail; 1136 1137 ocfs2_free_ac_resource(*ac); 1138 memset(*ac, 0, sizeof(struct ocfs2_alloc_context)); 1139 goto retry_enospc; 1140 } 1141 if (status < 0) { 1142 mlog_errno(status); 1143 goto bail; 1144 } 1145 1146 *bitmap_inode = (*ac)->ac_inode; 1147 igrab(*bitmap_inode); 1148 *bitmap_bh = (*ac)->ac_bh; 1149 get_bh(*bitmap_bh); 1150 status = 0; 1151 bail: 1152 if ((status < 0) && *ac) { 1153 ocfs2_free_alloc_context(*ac); 1154 *ac = NULL; 1155 } 1156 1157 if (status) 1158 mlog_errno(status); 1159 return status; 1160 } 1161 1162 /* 1163 * pass it the bitmap lock in lock_bh if you have it. 1164 */ 1165 static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb, 1166 handle_t *handle, 1167 struct ocfs2_alloc_context *ac) 1168 { 1169 int status = 0; 1170 u32 cluster_off, cluster_count; 1171 struct ocfs2_dinode *alloc = NULL; 1172 struct ocfs2_local_alloc *la; 1173 1174 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 1175 la = OCFS2_LOCAL_ALLOC(alloc); 1176 1177 trace_ocfs2_local_alloc_new_window( 1178 le32_to_cpu(alloc->id1.bitmap1.i_total), 1179 osb->local_alloc_bits); 1180 1181 /* Instruct the allocation code to try the most recently used 1182 * cluster group. We'll re-record the group used this pass 1183 * below. */ 1184 ac->ac_last_group = osb->la_last_gd; 1185 1186 /* we used the generic suballoc reserve function, but we set 1187 * everything up nicely, so there's no reason why we can't use 1188 * the more specific cluster api to claim bits. */ 1189 status = ocfs2_claim_clusters(handle, ac, osb->local_alloc_bits, 1190 &cluster_off, &cluster_count); 1191 if (status == -ENOSPC) { 1192 retry_enospc: 1193 /* 1194 * Note: We could also try syncing the journal here to 1195 * allow use of any free bits which the current 1196 * transaction can't give us access to. --Mark 1197 */ 1198 if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_FRAGMENTED) == 1199 OCFS2_LA_DISABLED) 1200 goto bail; 1201 1202 ac->ac_bits_wanted = osb->local_alloc_bits; 1203 status = ocfs2_claim_clusters(handle, ac, 1204 osb->local_alloc_bits, 1205 &cluster_off, 1206 &cluster_count); 1207 if (status == -ENOSPC) 1208 goto retry_enospc; 1209 /* 1210 * We only shrunk the *minimum* number of in our 1211 * request - it's entirely possible that the allocator 1212 * might give us more than we asked for. 1213 */ 1214 if (status == 0) { 1215 spin_lock(&osb->osb_lock); 1216 osb->local_alloc_bits = cluster_count; 1217 spin_unlock(&osb->osb_lock); 1218 } 1219 } 1220 if (status < 0) { 1221 if (status != -ENOSPC) 1222 mlog_errno(status); 1223 goto bail; 1224 } 1225 1226 osb->la_last_gd = ac->ac_last_group; 1227 1228 la->la_bm_off = cpu_to_le32(cluster_off); 1229 alloc->id1.bitmap1.i_total = cpu_to_le32(cluster_count); 1230 /* just in case... In the future when we find space ourselves, 1231 * we don't have to get all contiguous -- but we'll have to 1232 * set all previously used bits in bitmap and update 1233 * la_bits_set before setting the bits in the main bitmap. */ 1234 alloc->id1.bitmap1.i_used = 0; 1235 memset(OCFS2_LOCAL_ALLOC(alloc)->la_bitmap, 0, 1236 le16_to_cpu(la->la_size)); 1237 1238 ocfs2_resmap_restart(&osb->osb_la_resmap, cluster_count, 1239 OCFS2_LOCAL_ALLOC(alloc)->la_bitmap); 1240 1241 trace_ocfs2_local_alloc_new_window_result( 1242 OCFS2_LOCAL_ALLOC(alloc)->la_bm_off, 1243 le32_to_cpu(alloc->id1.bitmap1.i_total)); 1244 1245 bail: 1246 if (status) 1247 mlog_errno(status); 1248 return status; 1249 } 1250 1251 /* Note that we do *NOT* lock the local alloc inode here as 1252 * it's been locked already for us. */ 1253 static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, 1254 struct inode *local_alloc_inode) 1255 { 1256 int status = 0; 1257 struct buffer_head *main_bm_bh = NULL; 1258 struct inode *main_bm_inode = NULL; 1259 handle_t *handle = NULL; 1260 struct ocfs2_dinode *alloc; 1261 struct ocfs2_dinode *alloc_copy = NULL; 1262 struct ocfs2_alloc_context *ac = NULL; 1263 1264 ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_SLIDE); 1265 1266 /* This will lock the main bitmap for us. */ 1267 status = ocfs2_local_alloc_reserve_for_window(osb, 1268 &ac, 1269 &main_bm_inode, 1270 &main_bm_bh); 1271 if (status < 0) { 1272 if (status != -ENOSPC) 1273 mlog_errno(status); 1274 goto bail; 1275 } 1276 1277 handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS); 1278 if (IS_ERR(handle)) { 1279 status = PTR_ERR(handle); 1280 handle = NULL; 1281 mlog_errno(status); 1282 goto bail; 1283 } 1284 1285 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 1286 1287 /* We want to clear the local alloc before doing anything 1288 * else, so that if we error later during this operation, 1289 * local alloc shutdown won't try to double free main bitmap 1290 * bits. Make a copy so the sync function knows which bits to 1291 * free. */ 1292 alloc_copy = kmemdup(alloc, osb->local_alloc_bh->b_size, GFP_NOFS); 1293 if (!alloc_copy) { 1294 status = -ENOMEM; 1295 mlog_errno(status); 1296 goto bail; 1297 } 1298 1299 status = ocfs2_journal_access_di(handle, 1300 INODE_CACHE(local_alloc_inode), 1301 osb->local_alloc_bh, 1302 OCFS2_JOURNAL_ACCESS_WRITE); 1303 if (status < 0) { 1304 mlog_errno(status); 1305 goto bail; 1306 } 1307 1308 ocfs2_clear_local_alloc(alloc); 1309 ocfs2_journal_dirty(handle, osb->local_alloc_bh); 1310 1311 status = ocfs2_sync_local_to_main(osb, handle, alloc_copy, 1312 main_bm_inode, main_bm_bh); 1313 if (status < 0) { 1314 mlog_errno(status); 1315 goto bail; 1316 } 1317 1318 status = ocfs2_local_alloc_new_window(osb, handle, ac); 1319 if (status < 0) { 1320 if (status != -ENOSPC) 1321 mlog_errno(status); 1322 goto bail; 1323 } 1324 1325 atomic_inc(&osb->alloc_stats.moves); 1326 1327 bail: 1328 if (handle) 1329 ocfs2_commit_trans(osb, handle); 1330 1331 brelse(main_bm_bh); 1332 1333 iput(main_bm_inode); 1334 kfree(alloc_copy); 1335 1336 if (ac) 1337 ocfs2_free_alloc_context(ac); 1338 1339 if (status) 1340 mlog_errno(status); 1341 return status; 1342 } 1343 1344