1 /* -*- mode: c; c-basic-offset: 8; -*- 2 * vim: noexpandtab sw=8 ts=8 sts=0: 3 * 4 * localalloc.c 5 * 6 * Node local data allocation 7 * 8 * Copyright (C) 2002, 2004 Oracle. All rights reserved. 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public 12 * License as published by the Free Software Foundation; either 13 * version 2 of the License, or (at your option) any later version. 14 * 15 * This program is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * General Public License for more details. 19 * 20 * You should have received a copy of the GNU General Public 21 * License along with this program; if not, write to the 22 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 23 * Boston, MA 021110-1307, USA. 24 */ 25 26 #include <linux/fs.h> 27 #include <linux/types.h> 28 #include <linux/slab.h> 29 #include <linux/highmem.h> 30 #include <linux/bitops.h> 31 32 #define MLOG_MASK_PREFIX ML_DISK_ALLOC 33 #include <cluster/masklog.h> 34 35 #include "ocfs2.h" 36 37 #include "alloc.h" 38 #include "blockcheck.h" 39 #include "dlmglue.h" 40 #include "inode.h" 41 #include "journal.h" 42 #include "localalloc.h" 43 #include "suballoc.h" 44 #include "super.h" 45 #include "sysfile.h" 46 47 #include "buffer_head_io.h" 48 49 #define OCFS2_LOCAL_ALLOC(dinode) (&((dinode)->id2.i_lab)) 50 51 static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc); 52 53 static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, 54 struct ocfs2_dinode *alloc, 55 u32 numbits); 56 57 static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc); 58 59 static int ocfs2_sync_local_to_main(struct ocfs2_super *osb, 60 handle_t *handle, 61 struct ocfs2_dinode *alloc, 62 struct inode *main_bm_inode, 63 struct buffer_head *main_bm_bh); 64 65 static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb, 66 struct ocfs2_alloc_context **ac, 67 struct inode **bitmap_inode, 68 struct buffer_head **bitmap_bh); 69 70 static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb, 71 handle_t *handle, 72 struct ocfs2_alloc_context *ac); 73 74 static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, 75 struct inode *local_alloc_inode); 76 77 static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb) 78 { 79 return (osb->local_alloc_state == OCFS2_LA_THROTTLED || 80 osb->local_alloc_state == OCFS2_LA_ENABLED); 81 } 82 83 void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb, 84 unsigned int num_clusters) 85 { 86 spin_lock(&osb->osb_lock); 87 if (osb->local_alloc_state == OCFS2_LA_DISABLED || 88 osb->local_alloc_state == OCFS2_LA_THROTTLED) 89 if (num_clusters >= osb->local_alloc_default_bits) { 90 cancel_delayed_work(&osb->la_enable_wq); 91 osb->local_alloc_state = OCFS2_LA_ENABLED; 92 } 93 spin_unlock(&osb->osb_lock); 94 } 95 96 void ocfs2_la_enable_worker(struct work_struct *work) 97 { 98 struct ocfs2_super *osb = 99 container_of(work, struct ocfs2_super, 100 la_enable_wq.work); 101 spin_lock(&osb->osb_lock); 102 osb->local_alloc_state = OCFS2_LA_ENABLED; 103 spin_unlock(&osb->osb_lock); 104 } 105 106 /* 107 * Tell us whether a given allocation should use the local alloc 108 * file. Otherwise, it has to go to the main bitmap. 109 * 110 * This function does semi-dirty reads of local alloc size and state! 111 * This is ok however, as the values are re-checked once under mutex. 112 */ 113 int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits) 114 { 115 int ret = 0; 116 int la_bits; 117 118 spin_lock(&osb->osb_lock); 119 la_bits = osb->local_alloc_bits; 120 121 if (!ocfs2_la_state_enabled(osb)) 122 goto bail; 123 124 /* la_bits should be at least twice the size (in clusters) of 125 * a new block group. We want to be sure block group 126 * allocations go through the local alloc, so allow an 127 * allocation to take up to half the bitmap. */ 128 if (bits > (la_bits / 2)) 129 goto bail; 130 131 ret = 1; 132 bail: 133 mlog(0, "state=%d, bits=%llu, la_bits=%d, ret=%d\n", 134 osb->local_alloc_state, (unsigned long long)bits, la_bits, ret); 135 spin_unlock(&osb->osb_lock); 136 return ret; 137 } 138 139 int ocfs2_load_local_alloc(struct ocfs2_super *osb) 140 { 141 int status = 0; 142 struct ocfs2_dinode *alloc = NULL; 143 struct buffer_head *alloc_bh = NULL; 144 u32 num_used; 145 struct inode *inode = NULL; 146 struct ocfs2_local_alloc *la; 147 148 mlog_entry_void(); 149 150 if (osb->local_alloc_bits == 0) 151 goto bail; 152 153 if (osb->local_alloc_bits >= osb->bitmap_cpg) { 154 mlog(ML_NOTICE, "Requested local alloc window %d is larger " 155 "than max possible %u. Using defaults.\n", 156 osb->local_alloc_bits, (osb->bitmap_cpg - 1)); 157 osb->local_alloc_bits = 158 ocfs2_megabytes_to_clusters(osb->sb, 159 OCFS2_DEFAULT_LOCAL_ALLOC_SIZE); 160 } 161 162 /* read the alloc off disk */ 163 inode = ocfs2_get_system_file_inode(osb, LOCAL_ALLOC_SYSTEM_INODE, 164 osb->slot_num); 165 if (!inode) { 166 status = -EINVAL; 167 mlog_errno(status); 168 goto bail; 169 } 170 171 status = ocfs2_read_inode_block_full(inode, &alloc_bh, 172 OCFS2_BH_IGNORE_CACHE); 173 if (status < 0) { 174 mlog_errno(status); 175 goto bail; 176 } 177 178 alloc = (struct ocfs2_dinode *) alloc_bh->b_data; 179 la = OCFS2_LOCAL_ALLOC(alloc); 180 181 if (!(le32_to_cpu(alloc->i_flags) & 182 (OCFS2_LOCAL_ALLOC_FL|OCFS2_BITMAP_FL))) { 183 mlog(ML_ERROR, "Invalid local alloc inode, %llu\n", 184 (unsigned long long)OCFS2_I(inode)->ip_blkno); 185 status = -EINVAL; 186 goto bail; 187 } 188 189 if ((la->la_size == 0) || 190 (le16_to_cpu(la->la_size) > ocfs2_local_alloc_size(inode->i_sb))) { 191 mlog(ML_ERROR, "Local alloc size is invalid (la_size = %u)\n", 192 le16_to_cpu(la->la_size)); 193 status = -EINVAL; 194 goto bail; 195 } 196 197 /* do a little verification. */ 198 num_used = ocfs2_local_alloc_count_bits(alloc); 199 200 /* hopefully the local alloc has always been recovered before 201 * we load it. */ 202 if (num_used 203 || alloc->id1.bitmap1.i_used 204 || alloc->id1.bitmap1.i_total 205 || la->la_bm_off) 206 mlog(ML_ERROR, "Local alloc hasn't been recovered!\n" 207 "found = %u, set = %u, taken = %u, off = %u\n", 208 num_used, le32_to_cpu(alloc->id1.bitmap1.i_used), 209 le32_to_cpu(alloc->id1.bitmap1.i_total), 210 OCFS2_LOCAL_ALLOC(alloc)->la_bm_off); 211 212 osb->local_alloc_bh = alloc_bh; 213 osb->local_alloc_state = OCFS2_LA_ENABLED; 214 215 bail: 216 if (status < 0) 217 brelse(alloc_bh); 218 if (inode) 219 iput(inode); 220 221 mlog(0, "Local alloc window bits = %d\n", osb->local_alloc_bits); 222 223 mlog_exit(status); 224 return status; 225 } 226 227 /* 228 * return any unused bits to the bitmap and write out a clean 229 * local_alloc. 230 * 231 * local_alloc_bh is optional. If not passed, we will simply use the 232 * one off osb. If you do pass it however, be warned that it *will* be 233 * returned brelse'd and NULL'd out.*/ 234 void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb) 235 { 236 int status; 237 handle_t *handle; 238 struct inode *local_alloc_inode = NULL; 239 struct buffer_head *bh = NULL; 240 struct buffer_head *main_bm_bh = NULL; 241 struct inode *main_bm_inode = NULL; 242 struct ocfs2_dinode *alloc_copy = NULL; 243 struct ocfs2_dinode *alloc = NULL; 244 245 mlog_entry_void(); 246 247 cancel_delayed_work(&osb->la_enable_wq); 248 flush_workqueue(ocfs2_wq); 249 250 if (osb->local_alloc_state == OCFS2_LA_UNUSED) 251 goto out; 252 253 local_alloc_inode = 254 ocfs2_get_system_file_inode(osb, 255 LOCAL_ALLOC_SYSTEM_INODE, 256 osb->slot_num); 257 if (!local_alloc_inode) { 258 status = -ENOENT; 259 mlog_errno(status); 260 goto out; 261 } 262 263 osb->local_alloc_state = OCFS2_LA_DISABLED; 264 265 main_bm_inode = ocfs2_get_system_file_inode(osb, 266 GLOBAL_BITMAP_SYSTEM_INODE, 267 OCFS2_INVALID_SLOT); 268 if (!main_bm_inode) { 269 status = -EINVAL; 270 mlog_errno(status); 271 goto out; 272 } 273 274 mutex_lock(&main_bm_inode->i_mutex); 275 276 status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1); 277 if (status < 0) { 278 mlog_errno(status); 279 goto out_mutex; 280 } 281 282 /* WINDOW_MOVE_CREDITS is a bit heavy... */ 283 handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS); 284 if (IS_ERR(handle)) { 285 mlog_errno(PTR_ERR(handle)); 286 handle = NULL; 287 goto out_unlock; 288 } 289 290 bh = osb->local_alloc_bh; 291 alloc = (struct ocfs2_dinode *) bh->b_data; 292 293 alloc_copy = kmalloc(bh->b_size, GFP_NOFS); 294 if (!alloc_copy) { 295 status = -ENOMEM; 296 goto out_commit; 297 } 298 memcpy(alloc_copy, alloc, bh->b_size); 299 300 status = ocfs2_journal_access_di(handle, INODE_CACHE(local_alloc_inode), 301 bh, OCFS2_JOURNAL_ACCESS_WRITE); 302 if (status < 0) { 303 mlog_errno(status); 304 goto out_commit; 305 } 306 307 ocfs2_clear_local_alloc(alloc); 308 309 status = ocfs2_journal_dirty(handle, bh); 310 if (status < 0) { 311 mlog_errno(status); 312 goto out_commit; 313 } 314 315 brelse(bh); 316 osb->local_alloc_bh = NULL; 317 osb->local_alloc_state = OCFS2_LA_UNUSED; 318 319 status = ocfs2_sync_local_to_main(osb, handle, alloc_copy, 320 main_bm_inode, main_bm_bh); 321 if (status < 0) 322 mlog_errno(status); 323 324 out_commit: 325 ocfs2_commit_trans(osb, handle); 326 327 out_unlock: 328 brelse(main_bm_bh); 329 330 ocfs2_inode_unlock(main_bm_inode, 1); 331 332 out_mutex: 333 mutex_unlock(&main_bm_inode->i_mutex); 334 iput(main_bm_inode); 335 336 out: 337 if (local_alloc_inode) 338 iput(local_alloc_inode); 339 340 if (alloc_copy) 341 kfree(alloc_copy); 342 343 mlog_exit_void(); 344 } 345 346 /* 347 * We want to free the bitmap bits outside of any recovery context as 348 * we'll need a cluster lock to do so, but we must clear the local 349 * alloc before giving up the recovered nodes journal. To solve this, 350 * we kmalloc a copy of the local alloc before it's change for the 351 * caller to process with ocfs2_complete_local_alloc_recovery 352 */ 353 int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb, 354 int slot_num, 355 struct ocfs2_dinode **alloc_copy) 356 { 357 int status = 0; 358 struct buffer_head *alloc_bh = NULL; 359 struct inode *inode = NULL; 360 struct ocfs2_dinode *alloc; 361 362 mlog_entry("(slot_num = %d)\n", slot_num); 363 364 *alloc_copy = NULL; 365 366 inode = ocfs2_get_system_file_inode(osb, 367 LOCAL_ALLOC_SYSTEM_INODE, 368 slot_num); 369 if (!inode) { 370 status = -EINVAL; 371 mlog_errno(status); 372 goto bail; 373 } 374 375 mutex_lock(&inode->i_mutex); 376 377 status = ocfs2_read_inode_block_full(inode, &alloc_bh, 378 OCFS2_BH_IGNORE_CACHE); 379 if (status < 0) { 380 mlog_errno(status); 381 goto bail; 382 } 383 384 *alloc_copy = kmalloc(alloc_bh->b_size, GFP_KERNEL); 385 if (!(*alloc_copy)) { 386 status = -ENOMEM; 387 goto bail; 388 } 389 memcpy((*alloc_copy), alloc_bh->b_data, alloc_bh->b_size); 390 391 alloc = (struct ocfs2_dinode *) alloc_bh->b_data; 392 ocfs2_clear_local_alloc(alloc); 393 394 ocfs2_compute_meta_ecc(osb->sb, alloc_bh->b_data, &alloc->i_check); 395 status = ocfs2_write_block(osb, alloc_bh, INODE_CACHE(inode)); 396 if (status < 0) 397 mlog_errno(status); 398 399 bail: 400 if ((status < 0) && (*alloc_copy)) { 401 kfree(*alloc_copy); 402 *alloc_copy = NULL; 403 } 404 405 brelse(alloc_bh); 406 407 if (inode) { 408 mutex_unlock(&inode->i_mutex); 409 iput(inode); 410 } 411 412 mlog_exit(status); 413 return status; 414 } 415 416 /* 417 * Step 2: By now, we've completed the journal recovery, we've stamped 418 * a clean local alloc on disk and dropped the node out of the 419 * recovery map. Dlm locks will no longer stall, so lets clear out the 420 * main bitmap. 421 */ 422 int ocfs2_complete_local_alloc_recovery(struct ocfs2_super *osb, 423 struct ocfs2_dinode *alloc) 424 { 425 int status; 426 handle_t *handle; 427 struct buffer_head *main_bm_bh = NULL; 428 struct inode *main_bm_inode; 429 430 mlog_entry_void(); 431 432 main_bm_inode = ocfs2_get_system_file_inode(osb, 433 GLOBAL_BITMAP_SYSTEM_INODE, 434 OCFS2_INVALID_SLOT); 435 if (!main_bm_inode) { 436 status = -EINVAL; 437 mlog_errno(status); 438 goto out; 439 } 440 441 mutex_lock(&main_bm_inode->i_mutex); 442 443 status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1); 444 if (status < 0) { 445 mlog_errno(status); 446 goto out_mutex; 447 } 448 449 handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS); 450 if (IS_ERR(handle)) { 451 status = PTR_ERR(handle); 452 handle = NULL; 453 mlog_errno(status); 454 goto out_unlock; 455 } 456 457 /* we want the bitmap change to be recorded on disk asap */ 458 handle->h_sync = 1; 459 460 status = ocfs2_sync_local_to_main(osb, handle, alloc, 461 main_bm_inode, main_bm_bh); 462 if (status < 0) 463 mlog_errno(status); 464 465 ocfs2_commit_trans(osb, handle); 466 467 out_unlock: 468 ocfs2_inode_unlock(main_bm_inode, 1); 469 470 out_mutex: 471 mutex_unlock(&main_bm_inode->i_mutex); 472 473 brelse(main_bm_bh); 474 475 iput(main_bm_inode); 476 477 out: 478 if (!status) 479 ocfs2_init_inode_steal_slot(osb); 480 mlog_exit(status); 481 return status; 482 } 483 484 /* Check to see if the local alloc window is within ac->ac_max_block */ 485 static int ocfs2_local_alloc_in_range(struct inode *inode, 486 struct ocfs2_alloc_context *ac, 487 u32 bits_wanted) 488 { 489 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 490 struct ocfs2_dinode *alloc; 491 struct ocfs2_local_alloc *la; 492 int start; 493 u64 block_off; 494 495 if (!ac->ac_max_block) 496 return 1; 497 498 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 499 la = OCFS2_LOCAL_ALLOC(alloc); 500 501 start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted); 502 if (start == -1) { 503 mlog_errno(-ENOSPC); 504 return 0; 505 } 506 507 /* 508 * Converting (bm_off + start + bits_wanted) to blocks gives us 509 * the blkno just past our actual allocation. This is perfect 510 * to compare with ac_max_block. 511 */ 512 block_off = ocfs2_clusters_to_blocks(inode->i_sb, 513 le32_to_cpu(la->la_bm_off) + 514 start + bits_wanted); 515 mlog(0, "Checking %llu against %llu\n", 516 (unsigned long long)block_off, 517 (unsigned long long)ac->ac_max_block); 518 if (block_off > ac->ac_max_block) 519 return 0; 520 521 return 1; 522 } 523 524 /* 525 * make sure we've got at least bits_wanted contiguous bits in the 526 * local alloc. You lose them when you drop i_mutex. 527 * 528 * We will add ourselves to the transaction passed in, but may start 529 * our own in order to shift windows. 530 */ 531 int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb, 532 u32 bits_wanted, 533 struct ocfs2_alloc_context *ac) 534 { 535 int status; 536 struct ocfs2_dinode *alloc; 537 struct inode *local_alloc_inode; 538 unsigned int free_bits; 539 540 mlog_entry_void(); 541 542 BUG_ON(!ac); 543 544 local_alloc_inode = 545 ocfs2_get_system_file_inode(osb, 546 LOCAL_ALLOC_SYSTEM_INODE, 547 osb->slot_num); 548 if (!local_alloc_inode) { 549 status = -ENOENT; 550 mlog_errno(status); 551 goto bail; 552 } 553 554 mutex_lock(&local_alloc_inode->i_mutex); 555 556 /* 557 * We must double check state and allocator bits because 558 * another process may have changed them while holding i_mutex. 559 */ 560 spin_lock(&osb->osb_lock); 561 if (!ocfs2_la_state_enabled(osb) || 562 (bits_wanted > osb->local_alloc_bits)) { 563 spin_unlock(&osb->osb_lock); 564 status = -ENOSPC; 565 goto bail; 566 } 567 spin_unlock(&osb->osb_lock); 568 569 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 570 571 #ifdef CONFIG_OCFS2_DEBUG_FS 572 if (le32_to_cpu(alloc->id1.bitmap1.i_used) != 573 ocfs2_local_alloc_count_bits(alloc)) { 574 ocfs2_error(osb->sb, "local alloc inode %llu says it has " 575 "%u free bits, but a count shows %u", 576 (unsigned long long)le64_to_cpu(alloc->i_blkno), 577 le32_to_cpu(alloc->id1.bitmap1.i_used), 578 ocfs2_local_alloc_count_bits(alloc)); 579 status = -EIO; 580 goto bail; 581 } 582 #endif 583 584 free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) - 585 le32_to_cpu(alloc->id1.bitmap1.i_used); 586 if (bits_wanted > free_bits) { 587 /* uhoh, window change time. */ 588 status = 589 ocfs2_local_alloc_slide_window(osb, local_alloc_inode); 590 if (status < 0) { 591 if (status != -ENOSPC) 592 mlog_errno(status); 593 goto bail; 594 } 595 596 /* 597 * Under certain conditions, the window slide code 598 * might have reduced the number of bits available or 599 * disabled the the local alloc entirely. Re-check 600 * here and return -ENOSPC if necessary. 601 */ 602 status = -ENOSPC; 603 if (!ocfs2_la_state_enabled(osb)) 604 goto bail; 605 606 free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) - 607 le32_to_cpu(alloc->id1.bitmap1.i_used); 608 if (bits_wanted > free_bits) 609 goto bail; 610 } 611 612 if (ac->ac_max_block) 613 mlog(0, "Calling in_range for max block %llu\n", 614 (unsigned long long)ac->ac_max_block); 615 616 if (!ocfs2_local_alloc_in_range(local_alloc_inode, ac, 617 bits_wanted)) { 618 /* 619 * The window is outside ac->ac_max_block. 620 * This errno tells the caller to keep localalloc enabled 621 * but to get the allocation from the main bitmap. 622 */ 623 status = -EFBIG; 624 goto bail; 625 } 626 627 ac->ac_inode = local_alloc_inode; 628 /* We should never use localalloc from another slot */ 629 ac->ac_alloc_slot = osb->slot_num; 630 ac->ac_which = OCFS2_AC_USE_LOCAL; 631 get_bh(osb->local_alloc_bh); 632 ac->ac_bh = osb->local_alloc_bh; 633 status = 0; 634 bail: 635 if (status < 0 && local_alloc_inode) { 636 mutex_unlock(&local_alloc_inode->i_mutex); 637 iput(local_alloc_inode); 638 } 639 640 mlog(0, "bits=%d, slot=%d, ret=%d\n", bits_wanted, osb->slot_num, 641 status); 642 643 mlog_exit(status); 644 return status; 645 } 646 647 int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb, 648 handle_t *handle, 649 struct ocfs2_alloc_context *ac, 650 u32 bits_wanted, 651 u32 *bit_off, 652 u32 *num_bits) 653 { 654 int status, start; 655 struct inode *local_alloc_inode; 656 void *bitmap; 657 struct ocfs2_dinode *alloc; 658 struct ocfs2_local_alloc *la; 659 660 mlog_entry_void(); 661 BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL); 662 663 local_alloc_inode = ac->ac_inode; 664 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 665 la = OCFS2_LOCAL_ALLOC(alloc); 666 667 start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted); 668 if (start == -1) { 669 /* TODO: Shouldn't we just BUG here? */ 670 status = -ENOSPC; 671 mlog_errno(status); 672 goto bail; 673 } 674 675 bitmap = la->la_bitmap; 676 *bit_off = le32_to_cpu(la->la_bm_off) + start; 677 /* local alloc is always contiguous by nature -- we never 678 * delete bits from it! */ 679 *num_bits = bits_wanted; 680 681 status = ocfs2_journal_access_di(handle, 682 INODE_CACHE(local_alloc_inode), 683 osb->local_alloc_bh, 684 OCFS2_JOURNAL_ACCESS_WRITE); 685 if (status < 0) { 686 mlog_errno(status); 687 goto bail; 688 } 689 690 while(bits_wanted--) 691 ocfs2_set_bit(start++, bitmap); 692 693 le32_add_cpu(&alloc->id1.bitmap1.i_used, *num_bits); 694 695 status = ocfs2_journal_dirty(handle, osb->local_alloc_bh); 696 if (status < 0) { 697 mlog_errno(status); 698 goto bail; 699 } 700 701 status = 0; 702 bail: 703 mlog_exit(status); 704 return status; 705 } 706 707 static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc) 708 { 709 int i; 710 u8 *buffer; 711 u32 count = 0; 712 struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc); 713 714 mlog_entry_void(); 715 716 buffer = la->la_bitmap; 717 for (i = 0; i < le16_to_cpu(la->la_size); i++) 718 count += hweight8(buffer[i]); 719 720 mlog_exit(count); 721 return count; 722 } 723 724 static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, 725 struct ocfs2_dinode *alloc, 726 u32 numbits) 727 { 728 int numfound, bitoff, left, startoff, lastzero; 729 void *bitmap = NULL; 730 731 mlog_entry("(numbits wanted = %u)\n", numbits); 732 733 if (!alloc->id1.bitmap1.i_total) { 734 mlog(0, "No bits in my window!\n"); 735 bitoff = -1; 736 goto bail; 737 } 738 739 bitmap = OCFS2_LOCAL_ALLOC(alloc)->la_bitmap; 740 741 numfound = bitoff = startoff = 0; 742 lastzero = -1; 743 left = le32_to_cpu(alloc->id1.bitmap1.i_total); 744 while ((bitoff = ocfs2_find_next_zero_bit(bitmap, left, startoff)) != -1) { 745 if (bitoff == left) { 746 /* mlog(0, "bitoff (%d) == left", bitoff); */ 747 break; 748 } 749 /* mlog(0, "Found a zero: bitoff = %d, startoff = %d, " 750 "numfound = %d\n", bitoff, startoff, numfound);*/ 751 752 /* Ok, we found a zero bit... is it contig. or do we 753 * start over?*/ 754 if (bitoff == startoff) { 755 /* we found a zero */ 756 numfound++; 757 startoff++; 758 } else { 759 /* got a zero after some ones */ 760 numfound = 1; 761 startoff = bitoff+1; 762 } 763 /* we got everything we needed */ 764 if (numfound == numbits) { 765 /* mlog(0, "Found it all!\n"); */ 766 break; 767 } 768 } 769 770 mlog(0, "Exiting loop, bitoff = %d, numfound = %d\n", bitoff, 771 numfound); 772 773 if (numfound == numbits) 774 bitoff = startoff - numfound; 775 else 776 bitoff = -1; 777 778 bail: 779 mlog_exit(bitoff); 780 return bitoff; 781 } 782 783 static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc) 784 { 785 struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc); 786 int i; 787 mlog_entry_void(); 788 789 alloc->id1.bitmap1.i_total = 0; 790 alloc->id1.bitmap1.i_used = 0; 791 la->la_bm_off = 0; 792 for(i = 0; i < le16_to_cpu(la->la_size); i++) 793 la->la_bitmap[i] = 0; 794 795 mlog_exit_void(); 796 } 797 798 #if 0 799 /* turn this on and uncomment below to aid debugging window shifts. */ 800 static void ocfs2_verify_zero_bits(unsigned long *bitmap, 801 unsigned int start, 802 unsigned int count) 803 { 804 unsigned int tmp = count; 805 while(tmp--) { 806 if (ocfs2_test_bit(start + tmp, bitmap)) { 807 printk("ocfs2_verify_zero_bits: start = %u, count = " 808 "%u\n", start, count); 809 printk("ocfs2_verify_zero_bits: bit %u is set!", 810 start + tmp); 811 BUG(); 812 } 813 } 814 } 815 #endif 816 817 /* 818 * sync the local alloc to main bitmap. 819 * 820 * assumes you've already locked the main bitmap -- the bitmap inode 821 * passed is used for caching. 822 */ 823 static int ocfs2_sync_local_to_main(struct ocfs2_super *osb, 824 handle_t *handle, 825 struct ocfs2_dinode *alloc, 826 struct inode *main_bm_inode, 827 struct buffer_head *main_bm_bh) 828 { 829 int status = 0; 830 int bit_off, left, count, start; 831 u64 la_start_blk; 832 u64 blkno; 833 void *bitmap; 834 struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc); 835 836 mlog_entry("total = %u, used = %u\n", 837 le32_to_cpu(alloc->id1.bitmap1.i_total), 838 le32_to_cpu(alloc->id1.bitmap1.i_used)); 839 840 if (!alloc->id1.bitmap1.i_total) { 841 mlog(0, "nothing to sync!\n"); 842 goto bail; 843 } 844 845 if (le32_to_cpu(alloc->id1.bitmap1.i_used) == 846 le32_to_cpu(alloc->id1.bitmap1.i_total)) { 847 mlog(0, "all bits were taken!\n"); 848 goto bail; 849 } 850 851 la_start_blk = ocfs2_clusters_to_blocks(osb->sb, 852 le32_to_cpu(la->la_bm_off)); 853 bitmap = la->la_bitmap; 854 start = count = bit_off = 0; 855 left = le32_to_cpu(alloc->id1.bitmap1.i_total); 856 857 while ((bit_off = ocfs2_find_next_zero_bit(bitmap, left, start)) 858 != -1) { 859 if ((bit_off < left) && (bit_off == start)) { 860 count++; 861 start++; 862 continue; 863 } 864 if (count) { 865 blkno = la_start_blk + 866 ocfs2_clusters_to_blocks(osb->sb, 867 start - count); 868 869 mlog(0, "freeing %u bits starting at local alloc bit " 870 "%u (la_start_blk = %llu, blkno = %llu)\n", 871 count, start - count, 872 (unsigned long long)la_start_blk, 873 (unsigned long long)blkno); 874 875 status = ocfs2_free_clusters(handle, main_bm_inode, 876 main_bm_bh, blkno, count); 877 if (status < 0) { 878 mlog_errno(status); 879 goto bail; 880 } 881 } 882 if (bit_off >= left) 883 break; 884 count = 1; 885 start = bit_off + 1; 886 } 887 888 bail: 889 mlog_exit(status); 890 return status; 891 } 892 893 enum ocfs2_la_event { 894 OCFS2_LA_EVENT_SLIDE, /* Normal window slide. */ 895 OCFS2_LA_EVENT_FRAGMENTED, /* The global bitmap has 896 * enough bits theoretically 897 * free, but a contiguous 898 * allocation could not be 899 * found. */ 900 OCFS2_LA_EVENT_ENOSPC, /* Global bitmap doesn't have 901 * enough bits free to satisfy 902 * our request. */ 903 }; 904 #define OCFS2_LA_ENABLE_INTERVAL (30 * HZ) 905 /* 906 * Given an event, calculate the size of our next local alloc window. 907 * 908 * This should always be called under i_mutex of the local alloc inode 909 * so that local alloc disabling doesn't race with processes trying to 910 * use the allocator. 911 * 912 * Returns the state which the local alloc was left in. This value can 913 * be ignored by some paths. 914 */ 915 static int ocfs2_recalc_la_window(struct ocfs2_super *osb, 916 enum ocfs2_la_event event) 917 { 918 unsigned int bits; 919 int state; 920 921 spin_lock(&osb->osb_lock); 922 if (osb->local_alloc_state == OCFS2_LA_DISABLED) { 923 WARN_ON_ONCE(osb->local_alloc_state == OCFS2_LA_DISABLED); 924 goto out_unlock; 925 } 926 927 /* 928 * ENOSPC and fragmentation are treated similarly for now. 929 */ 930 if (event == OCFS2_LA_EVENT_ENOSPC || 931 event == OCFS2_LA_EVENT_FRAGMENTED) { 932 /* 933 * We ran out of contiguous space in the primary 934 * bitmap. Drastically reduce the number of bits used 935 * by local alloc until we have to disable it. 936 */ 937 bits = osb->local_alloc_bits >> 1; 938 if (bits > ocfs2_megabytes_to_clusters(osb->sb, 1)) { 939 /* 940 * By setting state to THROTTLED, we'll keep 941 * the number of local alloc bits used down 942 * until an event occurs which would give us 943 * reason to assume the bitmap situation might 944 * have changed. 945 */ 946 osb->local_alloc_state = OCFS2_LA_THROTTLED; 947 osb->local_alloc_bits = bits; 948 } else { 949 osb->local_alloc_state = OCFS2_LA_DISABLED; 950 } 951 queue_delayed_work(ocfs2_wq, &osb->la_enable_wq, 952 OCFS2_LA_ENABLE_INTERVAL); 953 goto out_unlock; 954 } 955 956 /* 957 * Don't increase the size of the local alloc window until we 958 * know we might be able to fulfill the request. Otherwise, we 959 * risk bouncing around the global bitmap during periods of 960 * low space. 961 */ 962 if (osb->local_alloc_state != OCFS2_LA_THROTTLED) 963 osb->local_alloc_bits = osb->local_alloc_default_bits; 964 965 out_unlock: 966 state = osb->local_alloc_state; 967 spin_unlock(&osb->osb_lock); 968 969 return state; 970 } 971 972 static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb, 973 struct ocfs2_alloc_context **ac, 974 struct inode **bitmap_inode, 975 struct buffer_head **bitmap_bh) 976 { 977 int status; 978 979 *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL); 980 if (!(*ac)) { 981 status = -ENOMEM; 982 mlog_errno(status); 983 goto bail; 984 } 985 986 retry_enospc: 987 (*ac)->ac_bits_wanted = osb->local_alloc_bits; 988 989 status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac); 990 if (status == -ENOSPC) { 991 if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_ENOSPC) == 992 OCFS2_LA_DISABLED) 993 goto bail; 994 995 ocfs2_free_ac_resource(*ac); 996 memset(*ac, 0, sizeof(struct ocfs2_alloc_context)); 997 goto retry_enospc; 998 } 999 if (status < 0) { 1000 mlog_errno(status); 1001 goto bail; 1002 } 1003 1004 *bitmap_inode = (*ac)->ac_inode; 1005 igrab(*bitmap_inode); 1006 *bitmap_bh = (*ac)->ac_bh; 1007 get_bh(*bitmap_bh); 1008 status = 0; 1009 bail: 1010 if ((status < 0) && *ac) { 1011 ocfs2_free_alloc_context(*ac); 1012 *ac = NULL; 1013 } 1014 1015 mlog_exit(status); 1016 return status; 1017 } 1018 1019 /* 1020 * pass it the bitmap lock in lock_bh if you have it. 1021 */ 1022 static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb, 1023 handle_t *handle, 1024 struct ocfs2_alloc_context *ac) 1025 { 1026 int status = 0; 1027 u32 cluster_off, cluster_count; 1028 struct ocfs2_dinode *alloc = NULL; 1029 struct ocfs2_local_alloc *la; 1030 1031 mlog_entry_void(); 1032 1033 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 1034 la = OCFS2_LOCAL_ALLOC(alloc); 1035 1036 if (alloc->id1.bitmap1.i_total) 1037 mlog(0, "asking me to alloc a new window over a non-empty " 1038 "one\n"); 1039 1040 mlog(0, "Allocating %u clusters for a new window.\n", 1041 osb->local_alloc_bits); 1042 1043 /* Instruct the allocation code to try the most recently used 1044 * cluster group. We'll re-record the group used this pass 1045 * below. */ 1046 ac->ac_last_group = osb->la_last_gd; 1047 1048 /* we used the generic suballoc reserve function, but we set 1049 * everything up nicely, so there's no reason why we can't use 1050 * the more specific cluster api to claim bits. */ 1051 status = ocfs2_claim_clusters(osb, handle, ac, osb->local_alloc_bits, 1052 &cluster_off, &cluster_count); 1053 if (status == -ENOSPC) { 1054 retry_enospc: 1055 /* 1056 * Note: We could also try syncing the journal here to 1057 * allow use of any free bits which the current 1058 * transaction can't give us access to. --Mark 1059 */ 1060 if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_FRAGMENTED) == 1061 OCFS2_LA_DISABLED) 1062 goto bail; 1063 1064 status = ocfs2_claim_clusters(osb, handle, ac, 1065 osb->local_alloc_bits, 1066 &cluster_off, 1067 &cluster_count); 1068 if (status == -ENOSPC) 1069 goto retry_enospc; 1070 /* 1071 * We only shrunk the *minimum* number of in our 1072 * request - it's entirely possible that the allocator 1073 * might give us more than we asked for. 1074 */ 1075 if (status == 0) { 1076 spin_lock(&osb->osb_lock); 1077 osb->local_alloc_bits = cluster_count; 1078 spin_unlock(&osb->osb_lock); 1079 } 1080 } 1081 if (status < 0) { 1082 if (status != -ENOSPC) 1083 mlog_errno(status); 1084 goto bail; 1085 } 1086 1087 osb->la_last_gd = ac->ac_last_group; 1088 1089 la->la_bm_off = cpu_to_le32(cluster_off); 1090 alloc->id1.bitmap1.i_total = cpu_to_le32(cluster_count); 1091 /* just in case... In the future when we find space ourselves, 1092 * we don't have to get all contiguous -- but we'll have to 1093 * set all previously used bits in bitmap and update 1094 * la_bits_set before setting the bits in the main bitmap. */ 1095 alloc->id1.bitmap1.i_used = 0; 1096 memset(OCFS2_LOCAL_ALLOC(alloc)->la_bitmap, 0, 1097 le16_to_cpu(la->la_size)); 1098 1099 mlog(0, "New window allocated:\n"); 1100 mlog(0, "window la_bm_off = %u\n", 1101 OCFS2_LOCAL_ALLOC(alloc)->la_bm_off); 1102 mlog(0, "window bits = %u\n", le32_to_cpu(alloc->id1.bitmap1.i_total)); 1103 1104 bail: 1105 mlog_exit(status); 1106 return status; 1107 } 1108 1109 /* Note that we do *NOT* lock the local alloc inode here as 1110 * it's been locked already for us. */ 1111 static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, 1112 struct inode *local_alloc_inode) 1113 { 1114 int status = 0; 1115 struct buffer_head *main_bm_bh = NULL; 1116 struct inode *main_bm_inode = NULL; 1117 handle_t *handle = NULL; 1118 struct ocfs2_dinode *alloc; 1119 struct ocfs2_dinode *alloc_copy = NULL; 1120 struct ocfs2_alloc_context *ac = NULL; 1121 1122 mlog_entry_void(); 1123 1124 ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_SLIDE); 1125 1126 /* This will lock the main bitmap for us. */ 1127 status = ocfs2_local_alloc_reserve_for_window(osb, 1128 &ac, 1129 &main_bm_inode, 1130 &main_bm_bh); 1131 if (status < 0) { 1132 if (status != -ENOSPC) 1133 mlog_errno(status); 1134 goto bail; 1135 } 1136 1137 handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS); 1138 if (IS_ERR(handle)) { 1139 status = PTR_ERR(handle); 1140 handle = NULL; 1141 mlog_errno(status); 1142 goto bail; 1143 } 1144 1145 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 1146 1147 /* We want to clear the local alloc before doing anything 1148 * else, so that if we error later during this operation, 1149 * local alloc shutdown won't try to double free main bitmap 1150 * bits. Make a copy so the sync function knows which bits to 1151 * free. */ 1152 alloc_copy = kmalloc(osb->local_alloc_bh->b_size, GFP_NOFS); 1153 if (!alloc_copy) { 1154 status = -ENOMEM; 1155 mlog_errno(status); 1156 goto bail; 1157 } 1158 memcpy(alloc_copy, alloc, osb->local_alloc_bh->b_size); 1159 1160 status = ocfs2_journal_access_di(handle, 1161 INODE_CACHE(local_alloc_inode), 1162 osb->local_alloc_bh, 1163 OCFS2_JOURNAL_ACCESS_WRITE); 1164 if (status < 0) { 1165 mlog_errno(status); 1166 goto bail; 1167 } 1168 1169 ocfs2_clear_local_alloc(alloc); 1170 1171 status = ocfs2_journal_dirty(handle, osb->local_alloc_bh); 1172 if (status < 0) { 1173 mlog_errno(status); 1174 goto bail; 1175 } 1176 1177 status = ocfs2_sync_local_to_main(osb, handle, alloc_copy, 1178 main_bm_inode, main_bm_bh); 1179 if (status < 0) { 1180 mlog_errno(status); 1181 goto bail; 1182 } 1183 1184 status = ocfs2_local_alloc_new_window(osb, handle, ac); 1185 if (status < 0) { 1186 if (status != -ENOSPC) 1187 mlog_errno(status); 1188 goto bail; 1189 } 1190 1191 atomic_inc(&osb->alloc_stats.moves); 1192 1193 status = 0; 1194 bail: 1195 if (handle) 1196 ocfs2_commit_trans(osb, handle); 1197 1198 brelse(main_bm_bh); 1199 1200 if (main_bm_inode) 1201 iput(main_bm_inode); 1202 1203 if (alloc_copy) 1204 kfree(alloc_copy); 1205 1206 if (ac) 1207 ocfs2_free_alloc_context(ac); 1208 1209 mlog_exit(status); 1210 return status; 1211 } 1212 1213