1 /* -*- mode: c; c-basic-offset: 8; -*- 2 * vim: noexpandtab sw=8 ts=8 sts=0: 3 * 4 * localalloc.c 5 * 6 * Node local data allocation 7 * 8 * Copyright (C) 2002, 2004 Oracle. All rights reserved. 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public 12 * License as published by the Free Software Foundation; either 13 * version 2 of the License, or (at your option) any later version. 14 * 15 * This program is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * General Public License for more details. 19 * 20 * You should have received a copy of the GNU General Public 21 * License along with this program; if not, write to the 22 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 23 * Boston, MA 021110-1307, USA. 24 */ 25 26 #include <linux/fs.h> 27 #include <linux/types.h> 28 #include <linux/slab.h> 29 #include <linux/highmem.h> 30 #include <linux/bitops.h> 31 32 #define MLOG_MASK_PREFIX ML_DISK_ALLOC 33 #include <cluster/masklog.h> 34 35 #include "ocfs2.h" 36 37 #include "alloc.h" 38 #include "blockcheck.h" 39 #include "dlmglue.h" 40 #include "inode.h" 41 #include "journal.h" 42 #include "localalloc.h" 43 #include "suballoc.h" 44 #include "super.h" 45 #include "sysfile.h" 46 47 #include "buffer_head_io.h" 48 49 #define OCFS2_LOCAL_ALLOC(dinode) (&((dinode)->id2.i_lab)) 50 51 static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc); 52 53 static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, 54 struct ocfs2_dinode *alloc, 55 u32 numbits); 56 57 static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc); 58 59 static int ocfs2_sync_local_to_main(struct ocfs2_super *osb, 60 handle_t *handle, 61 struct ocfs2_dinode *alloc, 62 struct inode *main_bm_inode, 63 struct buffer_head *main_bm_bh); 64 65 static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb, 66 struct ocfs2_alloc_context **ac, 67 struct inode **bitmap_inode, 68 struct buffer_head **bitmap_bh); 69 70 static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb, 71 handle_t *handle, 72 struct ocfs2_alloc_context *ac); 73 74 static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, 75 struct inode *local_alloc_inode); 76 77 static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb) 78 { 79 return (osb->local_alloc_state == OCFS2_LA_THROTTLED || 80 osb->local_alloc_state == OCFS2_LA_ENABLED); 81 } 82 83 void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb, 84 unsigned int num_clusters) 85 { 86 spin_lock(&osb->osb_lock); 87 if (osb->local_alloc_state == OCFS2_LA_DISABLED || 88 osb->local_alloc_state == OCFS2_LA_THROTTLED) 89 if (num_clusters >= osb->local_alloc_default_bits) { 90 cancel_delayed_work(&osb->la_enable_wq); 91 osb->local_alloc_state = OCFS2_LA_ENABLED; 92 } 93 spin_unlock(&osb->osb_lock); 94 } 95 96 void ocfs2_la_enable_worker(struct work_struct *work) 97 { 98 struct ocfs2_super *osb = 99 container_of(work, struct ocfs2_super, 100 la_enable_wq.work); 101 spin_lock(&osb->osb_lock); 102 osb->local_alloc_state = OCFS2_LA_ENABLED; 103 spin_unlock(&osb->osb_lock); 104 } 105 106 /* 107 * Tell us whether a given allocation should use the local alloc 108 * file. Otherwise, it has to go to the main bitmap. 109 * 110 * This function does semi-dirty reads of local alloc size and state! 111 * This is ok however, as the values are re-checked once under mutex. 112 */ 113 int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits) 114 { 115 int ret = 0; 116 int la_bits; 117 118 spin_lock(&osb->osb_lock); 119 la_bits = osb->local_alloc_bits; 120 121 if (!ocfs2_la_state_enabled(osb)) 122 goto bail; 123 124 /* la_bits should be at least twice the size (in clusters) of 125 * a new block group. We want to be sure block group 126 * allocations go through the local alloc, so allow an 127 * allocation to take up to half the bitmap. */ 128 if (bits > (la_bits / 2)) 129 goto bail; 130 131 ret = 1; 132 bail: 133 mlog(0, "state=%d, bits=%llu, la_bits=%d, ret=%d\n", 134 osb->local_alloc_state, (unsigned long long)bits, la_bits, ret); 135 spin_unlock(&osb->osb_lock); 136 return ret; 137 } 138 139 int ocfs2_load_local_alloc(struct ocfs2_super *osb) 140 { 141 int status = 0; 142 struct ocfs2_dinode *alloc = NULL; 143 struct buffer_head *alloc_bh = NULL; 144 u32 num_used; 145 struct inode *inode = NULL; 146 struct ocfs2_local_alloc *la; 147 148 mlog_entry_void(); 149 150 if (osb->local_alloc_bits == 0) 151 goto bail; 152 153 if (osb->local_alloc_bits >= osb->bitmap_cpg) { 154 mlog(ML_NOTICE, "Requested local alloc window %d is larger " 155 "than max possible %u. Using defaults.\n", 156 osb->local_alloc_bits, (osb->bitmap_cpg - 1)); 157 osb->local_alloc_bits = 158 ocfs2_megabytes_to_clusters(osb->sb, 159 OCFS2_DEFAULT_LOCAL_ALLOC_SIZE); 160 } 161 162 /* read the alloc off disk */ 163 inode = ocfs2_get_system_file_inode(osb, LOCAL_ALLOC_SYSTEM_INODE, 164 osb->slot_num); 165 if (!inode) { 166 status = -EINVAL; 167 mlog_errno(status); 168 goto bail; 169 } 170 171 status = ocfs2_read_inode_block_full(inode, &alloc_bh, 172 OCFS2_BH_IGNORE_CACHE); 173 if (status < 0) { 174 mlog_errno(status); 175 goto bail; 176 } 177 178 alloc = (struct ocfs2_dinode *) alloc_bh->b_data; 179 la = OCFS2_LOCAL_ALLOC(alloc); 180 181 if (!(le32_to_cpu(alloc->i_flags) & 182 (OCFS2_LOCAL_ALLOC_FL|OCFS2_BITMAP_FL))) { 183 mlog(ML_ERROR, "Invalid local alloc inode, %llu\n", 184 (unsigned long long)OCFS2_I(inode)->ip_blkno); 185 status = -EINVAL; 186 goto bail; 187 } 188 189 if ((la->la_size == 0) || 190 (le16_to_cpu(la->la_size) > ocfs2_local_alloc_size(inode->i_sb))) { 191 mlog(ML_ERROR, "Local alloc size is invalid (la_size = %u)\n", 192 le16_to_cpu(la->la_size)); 193 status = -EINVAL; 194 goto bail; 195 } 196 197 /* do a little verification. */ 198 num_used = ocfs2_local_alloc_count_bits(alloc); 199 200 /* hopefully the local alloc has always been recovered before 201 * we load it. */ 202 if (num_used 203 || alloc->id1.bitmap1.i_used 204 || alloc->id1.bitmap1.i_total 205 || la->la_bm_off) 206 mlog(ML_ERROR, "Local alloc hasn't been recovered!\n" 207 "found = %u, set = %u, taken = %u, off = %u\n", 208 num_used, le32_to_cpu(alloc->id1.bitmap1.i_used), 209 le32_to_cpu(alloc->id1.bitmap1.i_total), 210 OCFS2_LOCAL_ALLOC(alloc)->la_bm_off); 211 212 osb->local_alloc_bh = alloc_bh; 213 osb->local_alloc_state = OCFS2_LA_ENABLED; 214 215 bail: 216 if (status < 0) 217 brelse(alloc_bh); 218 if (inode) 219 iput(inode); 220 221 mlog(0, "Local alloc window bits = %d\n", osb->local_alloc_bits); 222 223 mlog_exit(status); 224 return status; 225 } 226 227 /* 228 * return any unused bits to the bitmap and write out a clean 229 * local_alloc. 230 * 231 * local_alloc_bh is optional. If not passed, we will simply use the 232 * one off osb. If you do pass it however, be warned that it *will* be 233 * returned brelse'd and NULL'd out.*/ 234 void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb) 235 { 236 int status; 237 handle_t *handle; 238 struct inode *local_alloc_inode = NULL; 239 struct buffer_head *bh = NULL; 240 struct buffer_head *main_bm_bh = NULL; 241 struct inode *main_bm_inode = NULL; 242 struct ocfs2_dinode *alloc_copy = NULL; 243 struct ocfs2_dinode *alloc = NULL; 244 245 mlog_entry_void(); 246 247 cancel_delayed_work(&osb->la_enable_wq); 248 flush_workqueue(ocfs2_wq); 249 250 if (osb->local_alloc_state == OCFS2_LA_UNUSED) 251 goto out; 252 253 local_alloc_inode = 254 ocfs2_get_system_file_inode(osb, 255 LOCAL_ALLOC_SYSTEM_INODE, 256 osb->slot_num); 257 if (!local_alloc_inode) { 258 status = -ENOENT; 259 mlog_errno(status); 260 goto out; 261 } 262 263 osb->local_alloc_state = OCFS2_LA_DISABLED; 264 265 main_bm_inode = ocfs2_get_system_file_inode(osb, 266 GLOBAL_BITMAP_SYSTEM_INODE, 267 OCFS2_INVALID_SLOT); 268 if (!main_bm_inode) { 269 status = -EINVAL; 270 mlog_errno(status); 271 goto out; 272 } 273 274 mutex_lock(&main_bm_inode->i_mutex); 275 276 status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1); 277 if (status < 0) { 278 mlog_errno(status); 279 goto out_mutex; 280 } 281 282 /* WINDOW_MOVE_CREDITS is a bit heavy... */ 283 handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS); 284 if (IS_ERR(handle)) { 285 mlog_errno(PTR_ERR(handle)); 286 handle = NULL; 287 goto out_unlock; 288 } 289 290 bh = osb->local_alloc_bh; 291 alloc = (struct ocfs2_dinode *) bh->b_data; 292 293 alloc_copy = kmalloc(bh->b_size, GFP_NOFS); 294 if (!alloc_copy) { 295 status = -ENOMEM; 296 goto out_commit; 297 } 298 memcpy(alloc_copy, alloc, bh->b_size); 299 300 status = ocfs2_journal_access_di(handle, local_alloc_inode, bh, 301 OCFS2_JOURNAL_ACCESS_WRITE); 302 if (status < 0) { 303 mlog_errno(status); 304 goto out_commit; 305 } 306 307 ocfs2_clear_local_alloc(alloc); 308 309 status = ocfs2_journal_dirty(handle, bh); 310 if (status < 0) { 311 mlog_errno(status); 312 goto out_commit; 313 } 314 315 brelse(bh); 316 osb->local_alloc_bh = NULL; 317 osb->local_alloc_state = OCFS2_LA_UNUSED; 318 319 status = ocfs2_sync_local_to_main(osb, handle, alloc_copy, 320 main_bm_inode, main_bm_bh); 321 if (status < 0) 322 mlog_errno(status); 323 324 out_commit: 325 ocfs2_commit_trans(osb, handle); 326 327 out_unlock: 328 brelse(main_bm_bh); 329 330 ocfs2_inode_unlock(main_bm_inode, 1); 331 332 out_mutex: 333 mutex_unlock(&main_bm_inode->i_mutex); 334 iput(main_bm_inode); 335 336 out: 337 if (local_alloc_inode) 338 iput(local_alloc_inode); 339 340 if (alloc_copy) 341 kfree(alloc_copy); 342 343 mlog_exit_void(); 344 } 345 346 /* 347 * We want to free the bitmap bits outside of any recovery context as 348 * we'll need a cluster lock to do so, but we must clear the local 349 * alloc before giving up the recovered nodes journal. To solve this, 350 * we kmalloc a copy of the local alloc before it's change for the 351 * caller to process with ocfs2_complete_local_alloc_recovery 352 */ 353 int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb, 354 int slot_num, 355 struct ocfs2_dinode **alloc_copy) 356 { 357 int status = 0; 358 struct buffer_head *alloc_bh = NULL; 359 struct inode *inode = NULL; 360 struct ocfs2_dinode *alloc; 361 362 mlog_entry("(slot_num = %d)\n", slot_num); 363 364 *alloc_copy = NULL; 365 366 inode = ocfs2_get_system_file_inode(osb, 367 LOCAL_ALLOC_SYSTEM_INODE, 368 slot_num); 369 if (!inode) { 370 status = -EINVAL; 371 mlog_errno(status); 372 goto bail; 373 } 374 375 mutex_lock(&inode->i_mutex); 376 377 status = ocfs2_read_inode_block_full(inode, &alloc_bh, 378 OCFS2_BH_IGNORE_CACHE); 379 if (status < 0) { 380 mlog_errno(status); 381 goto bail; 382 } 383 384 *alloc_copy = kmalloc(alloc_bh->b_size, GFP_KERNEL); 385 if (!(*alloc_copy)) { 386 status = -ENOMEM; 387 goto bail; 388 } 389 memcpy((*alloc_copy), alloc_bh->b_data, alloc_bh->b_size); 390 391 alloc = (struct ocfs2_dinode *) alloc_bh->b_data; 392 ocfs2_clear_local_alloc(alloc); 393 394 ocfs2_compute_meta_ecc(osb->sb, alloc_bh->b_data, &alloc->i_check); 395 status = ocfs2_write_block(osb, alloc_bh, inode); 396 if (status < 0) 397 mlog_errno(status); 398 399 bail: 400 if ((status < 0) && (*alloc_copy)) { 401 kfree(*alloc_copy); 402 *alloc_copy = NULL; 403 } 404 405 brelse(alloc_bh); 406 407 if (inode) { 408 mutex_unlock(&inode->i_mutex); 409 iput(inode); 410 } 411 412 mlog_exit(status); 413 return status; 414 } 415 416 /* 417 * Step 2: By now, we've completed the journal recovery, we've stamped 418 * a clean local alloc on disk and dropped the node out of the 419 * recovery map. Dlm locks will no longer stall, so lets clear out the 420 * main bitmap. 421 */ 422 int ocfs2_complete_local_alloc_recovery(struct ocfs2_super *osb, 423 struct ocfs2_dinode *alloc) 424 { 425 int status; 426 handle_t *handle; 427 struct buffer_head *main_bm_bh = NULL; 428 struct inode *main_bm_inode; 429 430 mlog_entry_void(); 431 432 main_bm_inode = ocfs2_get_system_file_inode(osb, 433 GLOBAL_BITMAP_SYSTEM_INODE, 434 OCFS2_INVALID_SLOT); 435 if (!main_bm_inode) { 436 status = -EINVAL; 437 mlog_errno(status); 438 goto out; 439 } 440 441 mutex_lock(&main_bm_inode->i_mutex); 442 443 status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1); 444 if (status < 0) { 445 mlog_errno(status); 446 goto out_mutex; 447 } 448 449 handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS); 450 if (IS_ERR(handle)) { 451 status = PTR_ERR(handle); 452 handle = NULL; 453 mlog_errno(status); 454 goto out_unlock; 455 } 456 457 /* we want the bitmap change to be recorded on disk asap */ 458 handle->h_sync = 1; 459 460 status = ocfs2_sync_local_to_main(osb, handle, alloc, 461 main_bm_inode, main_bm_bh); 462 if (status < 0) 463 mlog_errno(status); 464 465 ocfs2_commit_trans(osb, handle); 466 467 out_unlock: 468 ocfs2_inode_unlock(main_bm_inode, 1); 469 470 out_mutex: 471 mutex_unlock(&main_bm_inode->i_mutex); 472 473 brelse(main_bm_bh); 474 475 iput(main_bm_inode); 476 477 out: 478 if (!status) 479 ocfs2_init_inode_steal_slot(osb); 480 mlog_exit(status); 481 return status; 482 } 483 484 /* Check to see if the local alloc window is within ac->ac_max_block */ 485 static int ocfs2_local_alloc_in_range(struct inode *inode, 486 struct ocfs2_alloc_context *ac, 487 u32 bits_wanted) 488 { 489 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 490 struct ocfs2_dinode *alloc; 491 struct ocfs2_local_alloc *la; 492 int start; 493 u64 block_off; 494 495 if (!ac->ac_max_block) 496 return 1; 497 498 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 499 la = OCFS2_LOCAL_ALLOC(alloc); 500 501 start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted); 502 if (start == -1) { 503 mlog_errno(-ENOSPC); 504 return 0; 505 } 506 507 /* 508 * Converting (bm_off + start + bits_wanted) to blocks gives us 509 * the blkno just past our actual allocation. This is perfect 510 * to compare with ac_max_block. 511 */ 512 block_off = ocfs2_clusters_to_blocks(inode->i_sb, 513 le32_to_cpu(la->la_bm_off) + 514 start + bits_wanted); 515 mlog(0, "Checking %llu against %llu\n", 516 (unsigned long long)block_off, 517 (unsigned long long)ac->ac_max_block); 518 if (block_off > ac->ac_max_block) 519 return 0; 520 521 return 1; 522 } 523 524 /* 525 * make sure we've got at least bits_wanted contiguous bits in the 526 * local alloc. You lose them when you drop i_mutex. 527 * 528 * We will add ourselves to the transaction passed in, but may start 529 * our own in order to shift windows. 530 */ 531 int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb, 532 u32 bits_wanted, 533 struct ocfs2_alloc_context *ac) 534 { 535 int status; 536 struct ocfs2_dinode *alloc; 537 struct inode *local_alloc_inode; 538 unsigned int free_bits; 539 540 mlog_entry_void(); 541 542 BUG_ON(!ac); 543 544 local_alloc_inode = 545 ocfs2_get_system_file_inode(osb, 546 LOCAL_ALLOC_SYSTEM_INODE, 547 osb->slot_num); 548 if (!local_alloc_inode) { 549 status = -ENOENT; 550 mlog_errno(status); 551 goto bail; 552 } 553 554 mutex_lock(&local_alloc_inode->i_mutex); 555 556 /* 557 * We must double check state and allocator bits because 558 * another process may have changed them while holding i_mutex. 559 */ 560 spin_lock(&osb->osb_lock); 561 if (!ocfs2_la_state_enabled(osb) || 562 (bits_wanted > osb->local_alloc_bits)) { 563 spin_unlock(&osb->osb_lock); 564 status = -ENOSPC; 565 goto bail; 566 } 567 spin_unlock(&osb->osb_lock); 568 569 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 570 571 #ifdef CONFIG_OCFS2_DEBUG_FS 572 if (le32_to_cpu(alloc->id1.bitmap1.i_used) != 573 ocfs2_local_alloc_count_bits(alloc)) { 574 ocfs2_error(osb->sb, "local alloc inode %llu says it has " 575 "%u free bits, but a count shows %u", 576 (unsigned long long)le64_to_cpu(alloc->i_blkno), 577 le32_to_cpu(alloc->id1.bitmap1.i_used), 578 ocfs2_local_alloc_count_bits(alloc)); 579 status = -EIO; 580 goto bail; 581 } 582 #endif 583 584 free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) - 585 le32_to_cpu(alloc->id1.bitmap1.i_used); 586 if (bits_wanted > free_bits) { 587 /* uhoh, window change time. */ 588 status = 589 ocfs2_local_alloc_slide_window(osb, local_alloc_inode); 590 if (status < 0) { 591 if (status != -ENOSPC) 592 mlog_errno(status); 593 goto bail; 594 } 595 596 /* 597 * Under certain conditions, the window slide code 598 * might have reduced the number of bits available or 599 * disabled the the local alloc entirely. Re-check 600 * here and return -ENOSPC if necessary. 601 */ 602 status = -ENOSPC; 603 if (!ocfs2_la_state_enabled(osb)) 604 goto bail; 605 606 free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) - 607 le32_to_cpu(alloc->id1.bitmap1.i_used); 608 if (bits_wanted > free_bits) 609 goto bail; 610 } 611 612 if (ac->ac_max_block) 613 mlog(0, "Calling in_range for max block %llu\n", 614 (unsigned long long)ac->ac_max_block); 615 616 if (!ocfs2_local_alloc_in_range(local_alloc_inode, ac, 617 bits_wanted)) { 618 /* 619 * The window is outside ac->ac_max_block. 620 * This errno tells the caller to keep localalloc enabled 621 * but to get the allocation from the main bitmap. 622 */ 623 status = -EFBIG; 624 goto bail; 625 } 626 627 ac->ac_inode = local_alloc_inode; 628 /* We should never use localalloc from another slot */ 629 ac->ac_alloc_slot = osb->slot_num; 630 ac->ac_which = OCFS2_AC_USE_LOCAL; 631 get_bh(osb->local_alloc_bh); 632 ac->ac_bh = osb->local_alloc_bh; 633 status = 0; 634 bail: 635 if (status < 0 && local_alloc_inode) { 636 mutex_unlock(&local_alloc_inode->i_mutex); 637 iput(local_alloc_inode); 638 } 639 640 mlog(0, "bits=%d, slot=%d, ret=%d\n", bits_wanted, osb->slot_num, 641 status); 642 643 mlog_exit(status); 644 return status; 645 } 646 647 int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb, 648 handle_t *handle, 649 struct ocfs2_alloc_context *ac, 650 u32 bits_wanted, 651 u32 *bit_off, 652 u32 *num_bits) 653 { 654 int status, start; 655 struct inode *local_alloc_inode; 656 void *bitmap; 657 struct ocfs2_dinode *alloc; 658 struct ocfs2_local_alloc *la; 659 660 mlog_entry_void(); 661 BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL); 662 663 local_alloc_inode = ac->ac_inode; 664 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 665 la = OCFS2_LOCAL_ALLOC(alloc); 666 667 start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted); 668 if (start == -1) { 669 /* TODO: Shouldn't we just BUG here? */ 670 status = -ENOSPC; 671 mlog_errno(status); 672 goto bail; 673 } 674 675 bitmap = la->la_bitmap; 676 *bit_off = le32_to_cpu(la->la_bm_off) + start; 677 /* local alloc is always contiguous by nature -- we never 678 * delete bits from it! */ 679 *num_bits = bits_wanted; 680 681 status = ocfs2_journal_access_di(handle, local_alloc_inode, 682 osb->local_alloc_bh, 683 OCFS2_JOURNAL_ACCESS_WRITE); 684 if (status < 0) { 685 mlog_errno(status); 686 goto bail; 687 } 688 689 while(bits_wanted--) 690 ocfs2_set_bit(start++, bitmap); 691 692 le32_add_cpu(&alloc->id1.bitmap1.i_used, *num_bits); 693 694 status = ocfs2_journal_dirty(handle, osb->local_alloc_bh); 695 if (status < 0) { 696 mlog_errno(status); 697 goto bail; 698 } 699 700 status = 0; 701 bail: 702 mlog_exit(status); 703 return status; 704 } 705 706 static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc) 707 { 708 int i; 709 u8 *buffer; 710 u32 count = 0; 711 struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc); 712 713 mlog_entry_void(); 714 715 buffer = la->la_bitmap; 716 for (i = 0; i < le16_to_cpu(la->la_size); i++) 717 count += hweight8(buffer[i]); 718 719 mlog_exit(count); 720 return count; 721 } 722 723 static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, 724 struct ocfs2_dinode *alloc, 725 u32 numbits) 726 { 727 int numfound, bitoff, left, startoff, lastzero; 728 void *bitmap = NULL; 729 730 mlog_entry("(numbits wanted = %u)\n", numbits); 731 732 if (!alloc->id1.bitmap1.i_total) { 733 mlog(0, "No bits in my window!\n"); 734 bitoff = -1; 735 goto bail; 736 } 737 738 bitmap = OCFS2_LOCAL_ALLOC(alloc)->la_bitmap; 739 740 numfound = bitoff = startoff = 0; 741 lastzero = -1; 742 left = le32_to_cpu(alloc->id1.bitmap1.i_total); 743 while ((bitoff = ocfs2_find_next_zero_bit(bitmap, left, startoff)) != -1) { 744 if (bitoff == left) { 745 /* mlog(0, "bitoff (%d) == left", bitoff); */ 746 break; 747 } 748 /* mlog(0, "Found a zero: bitoff = %d, startoff = %d, " 749 "numfound = %d\n", bitoff, startoff, numfound);*/ 750 751 /* Ok, we found a zero bit... is it contig. or do we 752 * start over?*/ 753 if (bitoff == startoff) { 754 /* we found a zero */ 755 numfound++; 756 startoff++; 757 } else { 758 /* got a zero after some ones */ 759 numfound = 1; 760 startoff = bitoff+1; 761 } 762 /* we got everything we needed */ 763 if (numfound == numbits) { 764 /* mlog(0, "Found it all!\n"); */ 765 break; 766 } 767 } 768 769 mlog(0, "Exiting loop, bitoff = %d, numfound = %d\n", bitoff, 770 numfound); 771 772 if (numfound == numbits) 773 bitoff = startoff - numfound; 774 else 775 bitoff = -1; 776 777 bail: 778 mlog_exit(bitoff); 779 return bitoff; 780 } 781 782 static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc) 783 { 784 struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc); 785 int i; 786 mlog_entry_void(); 787 788 alloc->id1.bitmap1.i_total = 0; 789 alloc->id1.bitmap1.i_used = 0; 790 la->la_bm_off = 0; 791 for(i = 0; i < le16_to_cpu(la->la_size); i++) 792 la->la_bitmap[i] = 0; 793 794 mlog_exit_void(); 795 } 796 797 #if 0 798 /* turn this on and uncomment below to aid debugging window shifts. */ 799 static void ocfs2_verify_zero_bits(unsigned long *bitmap, 800 unsigned int start, 801 unsigned int count) 802 { 803 unsigned int tmp = count; 804 while(tmp--) { 805 if (ocfs2_test_bit(start + tmp, bitmap)) { 806 printk("ocfs2_verify_zero_bits: start = %u, count = " 807 "%u\n", start, count); 808 printk("ocfs2_verify_zero_bits: bit %u is set!", 809 start + tmp); 810 BUG(); 811 } 812 } 813 } 814 #endif 815 816 /* 817 * sync the local alloc to main bitmap. 818 * 819 * assumes you've already locked the main bitmap -- the bitmap inode 820 * passed is used for caching. 821 */ 822 static int ocfs2_sync_local_to_main(struct ocfs2_super *osb, 823 handle_t *handle, 824 struct ocfs2_dinode *alloc, 825 struct inode *main_bm_inode, 826 struct buffer_head *main_bm_bh) 827 { 828 int status = 0; 829 int bit_off, left, count, start; 830 u64 la_start_blk; 831 u64 blkno; 832 void *bitmap; 833 struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc); 834 835 mlog_entry("total = %u, used = %u\n", 836 le32_to_cpu(alloc->id1.bitmap1.i_total), 837 le32_to_cpu(alloc->id1.bitmap1.i_used)); 838 839 if (!alloc->id1.bitmap1.i_total) { 840 mlog(0, "nothing to sync!\n"); 841 goto bail; 842 } 843 844 if (le32_to_cpu(alloc->id1.bitmap1.i_used) == 845 le32_to_cpu(alloc->id1.bitmap1.i_total)) { 846 mlog(0, "all bits were taken!\n"); 847 goto bail; 848 } 849 850 la_start_blk = ocfs2_clusters_to_blocks(osb->sb, 851 le32_to_cpu(la->la_bm_off)); 852 bitmap = la->la_bitmap; 853 start = count = bit_off = 0; 854 left = le32_to_cpu(alloc->id1.bitmap1.i_total); 855 856 while ((bit_off = ocfs2_find_next_zero_bit(bitmap, left, start)) 857 != -1) { 858 if ((bit_off < left) && (bit_off == start)) { 859 count++; 860 start++; 861 continue; 862 } 863 if (count) { 864 blkno = la_start_blk + 865 ocfs2_clusters_to_blocks(osb->sb, 866 start - count); 867 868 mlog(0, "freeing %u bits starting at local alloc bit " 869 "%u (la_start_blk = %llu, blkno = %llu)\n", 870 count, start - count, 871 (unsigned long long)la_start_blk, 872 (unsigned long long)blkno); 873 874 status = ocfs2_free_clusters(handle, main_bm_inode, 875 main_bm_bh, blkno, count); 876 if (status < 0) { 877 mlog_errno(status); 878 goto bail; 879 } 880 } 881 if (bit_off >= left) 882 break; 883 count = 1; 884 start = bit_off + 1; 885 } 886 887 bail: 888 mlog_exit(status); 889 return status; 890 } 891 892 enum ocfs2_la_event { 893 OCFS2_LA_EVENT_SLIDE, /* Normal window slide. */ 894 OCFS2_LA_EVENT_FRAGMENTED, /* The global bitmap has 895 * enough bits theoretically 896 * free, but a contiguous 897 * allocation could not be 898 * found. */ 899 OCFS2_LA_EVENT_ENOSPC, /* Global bitmap doesn't have 900 * enough bits free to satisfy 901 * our request. */ 902 }; 903 #define OCFS2_LA_ENABLE_INTERVAL (30 * HZ) 904 /* 905 * Given an event, calculate the size of our next local alloc window. 906 * 907 * This should always be called under i_mutex of the local alloc inode 908 * so that local alloc disabling doesn't race with processes trying to 909 * use the allocator. 910 * 911 * Returns the state which the local alloc was left in. This value can 912 * be ignored by some paths. 913 */ 914 static int ocfs2_recalc_la_window(struct ocfs2_super *osb, 915 enum ocfs2_la_event event) 916 { 917 unsigned int bits; 918 int state; 919 920 spin_lock(&osb->osb_lock); 921 if (osb->local_alloc_state == OCFS2_LA_DISABLED) { 922 WARN_ON_ONCE(osb->local_alloc_state == OCFS2_LA_DISABLED); 923 goto out_unlock; 924 } 925 926 /* 927 * ENOSPC and fragmentation are treated similarly for now. 928 */ 929 if (event == OCFS2_LA_EVENT_ENOSPC || 930 event == OCFS2_LA_EVENT_FRAGMENTED) { 931 /* 932 * We ran out of contiguous space in the primary 933 * bitmap. Drastically reduce the number of bits used 934 * by local alloc until we have to disable it. 935 */ 936 bits = osb->local_alloc_bits >> 1; 937 if (bits > ocfs2_megabytes_to_clusters(osb->sb, 1)) { 938 /* 939 * By setting state to THROTTLED, we'll keep 940 * the number of local alloc bits used down 941 * until an event occurs which would give us 942 * reason to assume the bitmap situation might 943 * have changed. 944 */ 945 osb->local_alloc_state = OCFS2_LA_THROTTLED; 946 osb->local_alloc_bits = bits; 947 } else { 948 osb->local_alloc_state = OCFS2_LA_DISABLED; 949 } 950 queue_delayed_work(ocfs2_wq, &osb->la_enable_wq, 951 OCFS2_LA_ENABLE_INTERVAL); 952 goto out_unlock; 953 } 954 955 /* 956 * Don't increase the size of the local alloc window until we 957 * know we might be able to fulfill the request. Otherwise, we 958 * risk bouncing around the global bitmap during periods of 959 * low space. 960 */ 961 if (osb->local_alloc_state != OCFS2_LA_THROTTLED) 962 osb->local_alloc_bits = osb->local_alloc_default_bits; 963 964 out_unlock: 965 state = osb->local_alloc_state; 966 spin_unlock(&osb->osb_lock); 967 968 return state; 969 } 970 971 static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb, 972 struct ocfs2_alloc_context **ac, 973 struct inode **bitmap_inode, 974 struct buffer_head **bitmap_bh) 975 { 976 int status; 977 978 *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL); 979 if (!(*ac)) { 980 status = -ENOMEM; 981 mlog_errno(status); 982 goto bail; 983 } 984 985 retry_enospc: 986 (*ac)->ac_bits_wanted = osb->local_alloc_bits; 987 988 status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac); 989 if (status == -ENOSPC) { 990 if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_ENOSPC) == 991 OCFS2_LA_DISABLED) 992 goto bail; 993 994 ocfs2_free_ac_resource(*ac); 995 memset(*ac, 0, sizeof(struct ocfs2_alloc_context)); 996 goto retry_enospc; 997 } 998 if (status < 0) { 999 mlog_errno(status); 1000 goto bail; 1001 } 1002 1003 *bitmap_inode = (*ac)->ac_inode; 1004 igrab(*bitmap_inode); 1005 *bitmap_bh = (*ac)->ac_bh; 1006 get_bh(*bitmap_bh); 1007 status = 0; 1008 bail: 1009 if ((status < 0) && *ac) { 1010 ocfs2_free_alloc_context(*ac); 1011 *ac = NULL; 1012 } 1013 1014 mlog_exit(status); 1015 return status; 1016 } 1017 1018 /* 1019 * pass it the bitmap lock in lock_bh if you have it. 1020 */ 1021 static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb, 1022 handle_t *handle, 1023 struct ocfs2_alloc_context *ac) 1024 { 1025 int status = 0; 1026 u32 cluster_off, cluster_count; 1027 struct ocfs2_dinode *alloc = NULL; 1028 struct ocfs2_local_alloc *la; 1029 1030 mlog_entry_void(); 1031 1032 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 1033 la = OCFS2_LOCAL_ALLOC(alloc); 1034 1035 if (alloc->id1.bitmap1.i_total) 1036 mlog(0, "asking me to alloc a new window over a non-empty " 1037 "one\n"); 1038 1039 mlog(0, "Allocating %u clusters for a new window.\n", 1040 osb->local_alloc_bits); 1041 1042 /* Instruct the allocation code to try the most recently used 1043 * cluster group. We'll re-record the group used this pass 1044 * below. */ 1045 ac->ac_last_group = osb->la_last_gd; 1046 1047 /* we used the generic suballoc reserve function, but we set 1048 * everything up nicely, so there's no reason why we can't use 1049 * the more specific cluster api to claim bits. */ 1050 status = ocfs2_claim_clusters(osb, handle, ac, osb->local_alloc_bits, 1051 &cluster_off, &cluster_count); 1052 if (status == -ENOSPC) { 1053 retry_enospc: 1054 /* 1055 * Note: We could also try syncing the journal here to 1056 * allow use of any free bits which the current 1057 * transaction can't give us access to. --Mark 1058 */ 1059 if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_FRAGMENTED) == 1060 OCFS2_LA_DISABLED) 1061 goto bail; 1062 1063 status = ocfs2_claim_clusters(osb, handle, ac, 1064 osb->local_alloc_bits, 1065 &cluster_off, 1066 &cluster_count); 1067 if (status == -ENOSPC) 1068 goto retry_enospc; 1069 /* 1070 * We only shrunk the *minimum* number of in our 1071 * request - it's entirely possible that the allocator 1072 * might give us more than we asked for. 1073 */ 1074 if (status == 0) { 1075 spin_lock(&osb->osb_lock); 1076 osb->local_alloc_bits = cluster_count; 1077 spin_unlock(&osb->osb_lock); 1078 } 1079 } 1080 if (status < 0) { 1081 if (status != -ENOSPC) 1082 mlog_errno(status); 1083 goto bail; 1084 } 1085 1086 osb->la_last_gd = ac->ac_last_group; 1087 1088 la->la_bm_off = cpu_to_le32(cluster_off); 1089 alloc->id1.bitmap1.i_total = cpu_to_le32(cluster_count); 1090 /* just in case... In the future when we find space ourselves, 1091 * we don't have to get all contiguous -- but we'll have to 1092 * set all previously used bits in bitmap and update 1093 * la_bits_set before setting the bits in the main bitmap. */ 1094 alloc->id1.bitmap1.i_used = 0; 1095 memset(OCFS2_LOCAL_ALLOC(alloc)->la_bitmap, 0, 1096 le16_to_cpu(la->la_size)); 1097 1098 mlog(0, "New window allocated:\n"); 1099 mlog(0, "window la_bm_off = %u\n", 1100 OCFS2_LOCAL_ALLOC(alloc)->la_bm_off); 1101 mlog(0, "window bits = %u\n", le32_to_cpu(alloc->id1.bitmap1.i_total)); 1102 1103 bail: 1104 mlog_exit(status); 1105 return status; 1106 } 1107 1108 /* Note that we do *NOT* lock the local alloc inode here as 1109 * it's been locked already for us. */ 1110 static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, 1111 struct inode *local_alloc_inode) 1112 { 1113 int status = 0; 1114 struct buffer_head *main_bm_bh = NULL; 1115 struct inode *main_bm_inode = NULL; 1116 handle_t *handle = NULL; 1117 struct ocfs2_dinode *alloc; 1118 struct ocfs2_dinode *alloc_copy = NULL; 1119 struct ocfs2_alloc_context *ac = NULL; 1120 1121 mlog_entry_void(); 1122 1123 ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_SLIDE); 1124 1125 /* This will lock the main bitmap for us. */ 1126 status = ocfs2_local_alloc_reserve_for_window(osb, 1127 &ac, 1128 &main_bm_inode, 1129 &main_bm_bh); 1130 if (status < 0) { 1131 if (status != -ENOSPC) 1132 mlog_errno(status); 1133 goto bail; 1134 } 1135 1136 handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS); 1137 if (IS_ERR(handle)) { 1138 status = PTR_ERR(handle); 1139 handle = NULL; 1140 mlog_errno(status); 1141 goto bail; 1142 } 1143 1144 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 1145 1146 /* We want to clear the local alloc before doing anything 1147 * else, so that if we error later during this operation, 1148 * local alloc shutdown won't try to double free main bitmap 1149 * bits. Make a copy so the sync function knows which bits to 1150 * free. */ 1151 alloc_copy = kmalloc(osb->local_alloc_bh->b_size, GFP_NOFS); 1152 if (!alloc_copy) { 1153 status = -ENOMEM; 1154 mlog_errno(status); 1155 goto bail; 1156 } 1157 memcpy(alloc_copy, alloc, osb->local_alloc_bh->b_size); 1158 1159 status = ocfs2_journal_access_di(handle, local_alloc_inode, 1160 osb->local_alloc_bh, 1161 OCFS2_JOURNAL_ACCESS_WRITE); 1162 if (status < 0) { 1163 mlog_errno(status); 1164 goto bail; 1165 } 1166 1167 ocfs2_clear_local_alloc(alloc); 1168 1169 status = ocfs2_journal_dirty(handle, osb->local_alloc_bh); 1170 if (status < 0) { 1171 mlog_errno(status); 1172 goto bail; 1173 } 1174 1175 status = ocfs2_sync_local_to_main(osb, handle, alloc_copy, 1176 main_bm_inode, main_bm_bh); 1177 if (status < 0) { 1178 mlog_errno(status); 1179 goto bail; 1180 } 1181 1182 status = ocfs2_local_alloc_new_window(osb, handle, ac); 1183 if (status < 0) { 1184 if (status != -ENOSPC) 1185 mlog_errno(status); 1186 goto bail; 1187 } 1188 1189 atomic_inc(&osb->alloc_stats.moves); 1190 1191 status = 0; 1192 bail: 1193 if (handle) 1194 ocfs2_commit_trans(osb, handle); 1195 1196 brelse(main_bm_bh); 1197 1198 if (main_bm_inode) 1199 iput(main_bm_inode); 1200 1201 if (alloc_copy) 1202 kfree(alloc_copy); 1203 1204 if (ac) 1205 ocfs2_free_alloc_context(ac); 1206 1207 mlog_exit(status); 1208 return status; 1209 } 1210 1211