1 /* -*- mode: c; c-basic-offset: 8; -*- 2 * vim: noexpandtab sw=8 ts=8 sts=0: 3 * 4 * suballoc.c 5 * 6 * metadata alloc and free 7 * Inspired by ext3 block groups. 8 * 9 * Copyright (C) 2002, 2004 Oracle. All rights reserved. 10 * 11 * This program is free software; you can redistribute it and/or 12 * modify it under the terms of the GNU General Public 13 * License as published by the Free Software Foundation; either 14 * version 2 of the License, or (at your option) any later version. 15 * 16 * This program is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 * General Public License for more details. 20 * 21 * You should have received a copy of the GNU General Public 22 * License along with this program; if not, write to the 23 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 24 * Boston, MA 021110-1307, USA. 25 */ 26 27 #include <linux/fs.h> 28 #include <linux/types.h> 29 #include <linux/slab.h> 30 #include <linux/highmem.h> 31 32 #define MLOG_MASK_PREFIX ML_DISK_ALLOC 33 #include <cluster/masklog.h> 34 35 #include "ocfs2.h" 36 37 #include "alloc.h" 38 #include "dlmglue.h" 39 #include "inode.h" 40 #include "journal.h" 41 #include "localalloc.h" 42 #include "suballoc.h" 43 #include "super.h" 44 #include "sysfile.h" 45 #include "uptodate.h" 46 47 #include "buffer_head_io.h" 48 49 static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg); 50 static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe); 51 static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl); 52 static int ocfs2_block_group_fill(struct ocfs2_journal_handle *handle, 53 struct inode *alloc_inode, 54 struct buffer_head *bg_bh, 55 u64 group_blkno, 56 u16 my_chain, 57 struct ocfs2_chain_list *cl); 58 static int ocfs2_block_group_alloc(struct ocfs2_super *osb, 59 struct inode *alloc_inode, 60 struct buffer_head *bh); 61 62 static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb, 63 struct ocfs2_alloc_context *ac); 64 65 static int ocfs2_cluster_group_search(struct inode *inode, 66 struct buffer_head *group_bh, 67 u32 bits_wanted, u32 min_bits, 68 u16 *bit_off, u16 *bits_found); 69 static int ocfs2_block_group_search(struct inode *inode, 70 struct buffer_head *group_bh, 71 u32 bits_wanted, u32 min_bits, 72 u16 *bit_off, u16 *bits_found); 73 static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, 74 u32 bits_wanted, 75 u32 min_bits, 76 u16 *bit_off, 77 unsigned int *num_bits, 78 u64 *bg_blkno); 79 static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, 80 struct ocfs2_alloc_context *ac, 81 u32 bits_wanted, 82 u32 min_bits, 83 u16 *bit_off, 84 unsigned int *num_bits, 85 u64 *bg_blkno); 86 static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, 87 int nr); 88 static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb, 89 struct buffer_head *bg_bh, 90 unsigned int bits_wanted, 91 u16 *bit_off, 92 u16 *bits_found); 93 static inline int ocfs2_block_group_set_bits(struct ocfs2_journal_handle *handle, 94 struct inode *alloc_inode, 95 struct ocfs2_group_desc *bg, 96 struct buffer_head *group_bh, 97 unsigned int bit_off, 98 unsigned int num_bits); 99 static inline int ocfs2_block_group_clear_bits(struct ocfs2_journal_handle *handle, 100 struct inode *alloc_inode, 101 struct ocfs2_group_desc *bg, 102 struct buffer_head *group_bh, 103 unsigned int bit_off, 104 unsigned int num_bits); 105 106 static int ocfs2_relink_block_group(struct ocfs2_journal_handle *handle, 107 struct inode *alloc_inode, 108 struct buffer_head *fe_bh, 109 struct buffer_head *bg_bh, 110 struct buffer_head *prev_bg_bh, 111 u16 chain); 112 static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg, 113 u32 wanted); 114 static int ocfs2_free_suballoc_bits(struct ocfs2_journal_handle *handle, 115 struct inode *alloc_inode, 116 struct buffer_head *alloc_bh, 117 unsigned int start_bit, 118 u64 bg_blkno, 119 unsigned int count); 120 static inline u64 ocfs2_which_suballoc_group(u64 block, 121 unsigned int bit); 122 static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode, 123 u64 bg_blkno, 124 u16 bg_bit_off); 125 static inline u64 ocfs2_which_cluster_group(struct inode *inode, 126 u32 cluster); 127 static inline void ocfs2_block_to_cluster_group(struct inode *inode, 128 u64 data_blkno, 129 u64 *bg_blkno, 130 u16 *bg_bit_off); 131 132 void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac) 133 { 134 if (ac->ac_inode) 135 iput(ac->ac_inode); 136 if (ac->ac_bh) 137 brelse(ac->ac_bh); 138 kfree(ac); 139 } 140 141 static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl) 142 { 143 return (u32)le16_to_cpu(cl->cl_cpg) * (u32)le16_to_cpu(cl->cl_bpc); 144 } 145 146 static int ocfs2_block_group_fill(struct ocfs2_journal_handle *handle, 147 struct inode *alloc_inode, 148 struct buffer_head *bg_bh, 149 u64 group_blkno, 150 u16 my_chain, 151 struct ocfs2_chain_list *cl) 152 { 153 int status = 0; 154 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; 155 struct super_block * sb = alloc_inode->i_sb; 156 157 mlog_entry_void(); 158 159 if (((unsigned long long) bg_bh->b_blocknr) != group_blkno) { 160 ocfs2_error(alloc_inode->i_sb, "group block (%"MLFu64") " 161 "!= b_blocknr (%llu)", group_blkno, 162 (unsigned long long) bg_bh->b_blocknr); 163 status = -EIO; 164 goto bail; 165 } 166 167 status = ocfs2_journal_access(handle, 168 alloc_inode, 169 bg_bh, 170 OCFS2_JOURNAL_ACCESS_CREATE); 171 if (status < 0) { 172 mlog_errno(status); 173 goto bail; 174 } 175 176 memset(bg, 0, sb->s_blocksize); 177 strcpy(bg->bg_signature, OCFS2_GROUP_DESC_SIGNATURE); 178 bg->bg_generation = cpu_to_le32(OCFS2_SB(sb)->fs_generation); 179 bg->bg_size = cpu_to_le16(ocfs2_group_bitmap_size(sb)); 180 bg->bg_bits = cpu_to_le16(ocfs2_bits_per_group(cl)); 181 bg->bg_chain = cpu_to_le16(my_chain); 182 bg->bg_next_group = cl->cl_recs[my_chain].c_blkno; 183 bg->bg_parent_dinode = cpu_to_le64(OCFS2_I(alloc_inode)->ip_blkno); 184 bg->bg_blkno = cpu_to_le64(group_blkno); 185 /* set the 1st bit in the bitmap to account for the descriptor block */ 186 ocfs2_set_bit(0, (unsigned long *)bg->bg_bitmap); 187 bg->bg_free_bits_count = cpu_to_le16(le16_to_cpu(bg->bg_bits) - 1); 188 189 status = ocfs2_journal_dirty(handle, bg_bh); 190 if (status < 0) 191 mlog_errno(status); 192 193 /* There is no need to zero out or otherwise initialize the 194 * other blocks in a group - All valid FS metadata in a block 195 * group stores the superblock fs_generation value at 196 * allocation time. */ 197 198 bail: 199 mlog_exit(status); 200 return status; 201 } 202 203 static inline u16 ocfs2_find_smallest_chain(struct ocfs2_chain_list *cl) 204 { 205 u16 curr, best; 206 207 best = curr = 0; 208 while (curr < le16_to_cpu(cl->cl_count)) { 209 if (le32_to_cpu(cl->cl_recs[best].c_total) > 210 le32_to_cpu(cl->cl_recs[curr].c_total)) 211 best = curr; 212 curr++; 213 } 214 return best; 215 } 216 217 /* 218 * We expect the block group allocator to already be locked. 219 */ 220 static int ocfs2_block_group_alloc(struct ocfs2_super *osb, 221 struct inode *alloc_inode, 222 struct buffer_head *bh) 223 { 224 int status, credits; 225 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data; 226 struct ocfs2_chain_list *cl; 227 struct ocfs2_alloc_context *ac = NULL; 228 struct ocfs2_journal_handle *handle = NULL; 229 u32 bit_off, num_bits; 230 u16 alloc_rec; 231 u64 bg_blkno; 232 struct buffer_head *bg_bh = NULL; 233 struct ocfs2_group_desc *bg; 234 235 BUG_ON(ocfs2_is_cluster_bitmap(alloc_inode)); 236 237 mlog_entry_void(); 238 239 handle = ocfs2_alloc_handle(osb); 240 if (!handle) { 241 status = -ENOMEM; 242 mlog_errno(status); 243 goto bail; 244 } 245 246 cl = &fe->id2.i_chain; 247 status = ocfs2_reserve_clusters(osb, 248 handle, 249 le16_to_cpu(cl->cl_cpg), 250 &ac); 251 if (status < 0) { 252 if (status != -ENOSPC) 253 mlog_errno(status); 254 goto bail; 255 } 256 257 credits = ocfs2_calc_group_alloc_credits(osb->sb, 258 le16_to_cpu(cl->cl_cpg)); 259 handle = ocfs2_start_trans(osb, handle, credits); 260 if (IS_ERR(handle)) { 261 status = PTR_ERR(handle); 262 handle = NULL; 263 mlog_errno(status); 264 goto bail; 265 } 266 267 status = ocfs2_claim_clusters(osb, 268 handle, 269 ac, 270 le16_to_cpu(cl->cl_cpg), 271 &bit_off, 272 &num_bits); 273 if (status < 0) { 274 if (status != -ENOSPC) 275 mlog_errno(status); 276 goto bail; 277 } 278 279 alloc_rec = ocfs2_find_smallest_chain(cl); 280 281 /* setup the group */ 282 bg_blkno = ocfs2_clusters_to_blocks(osb->sb, bit_off); 283 mlog(0, "new descriptor, record %u, at block %"MLFu64"\n", 284 alloc_rec, bg_blkno); 285 286 bg_bh = sb_getblk(osb->sb, bg_blkno); 287 if (!bg_bh) { 288 status = -EIO; 289 mlog_errno(status); 290 goto bail; 291 } 292 ocfs2_set_new_buffer_uptodate(alloc_inode, bg_bh); 293 294 status = ocfs2_block_group_fill(handle, 295 alloc_inode, 296 bg_bh, 297 bg_blkno, 298 alloc_rec, 299 cl); 300 if (status < 0) { 301 mlog_errno(status); 302 goto bail; 303 } 304 305 bg = (struct ocfs2_group_desc *) bg_bh->b_data; 306 307 status = ocfs2_journal_access(handle, alloc_inode, 308 bh, OCFS2_JOURNAL_ACCESS_WRITE); 309 if (status < 0) { 310 mlog_errno(status); 311 goto bail; 312 } 313 314 le32_add_cpu(&cl->cl_recs[alloc_rec].c_free, 315 le16_to_cpu(bg->bg_free_bits_count)); 316 le32_add_cpu(&cl->cl_recs[alloc_rec].c_total, le16_to_cpu(bg->bg_bits)); 317 cl->cl_recs[alloc_rec].c_blkno = cpu_to_le64(bg_blkno); 318 if (le16_to_cpu(cl->cl_next_free_rec) < le16_to_cpu(cl->cl_count)) 319 le16_add_cpu(&cl->cl_next_free_rec, 1); 320 321 le32_add_cpu(&fe->id1.bitmap1.i_used, le16_to_cpu(bg->bg_bits) - 322 le16_to_cpu(bg->bg_free_bits_count)); 323 le32_add_cpu(&fe->id1.bitmap1.i_total, le16_to_cpu(bg->bg_bits)); 324 le32_add_cpu(&fe->i_clusters, le16_to_cpu(cl->cl_cpg)); 325 326 status = ocfs2_journal_dirty(handle, bh); 327 if (status < 0) { 328 mlog_errno(status); 329 goto bail; 330 } 331 332 spin_lock(&OCFS2_I(alloc_inode)->ip_lock); 333 OCFS2_I(alloc_inode)->ip_clusters = le32_to_cpu(fe->i_clusters); 334 fe->i_size = cpu_to_le64(ocfs2_clusters_to_bytes(alloc_inode->i_sb, 335 le32_to_cpu(fe->i_clusters))); 336 spin_unlock(&OCFS2_I(alloc_inode)->ip_lock); 337 i_size_write(alloc_inode, le64_to_cpu(fe->i_size)); 338 alloc_inode->i_blocks = 339 ocfs2_align_bytes_to_sectors(i_size_read(alloc_inode)); 340 341 status = 0; 342 bail: 343 if (handle) 344 ocfs2_commit_trans(handle); 345 346 if (ac) 347 ocfs2_free_alloc_context(ac); 348 349 if (bg_bh) 350 brelse(bg_bh); 351 352 mlog_exit(status); 353 return status; 354 } 355 356 static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb, 357 struct ocfs2_alloc_context *ac) 358 { 359 int status; 360 u32 bits_wanted = ac->ac_bits_wanted; 361 struct inode *alloc_inode = ac->ac_inode; 362 struct buffer_head *bh = NULL; 363 struct ocfs2_journal_handle *handle = ac->ac_handle; 364 struct ocfs2_dinode *fe; 365 u32 free_bits; 366 367 mlog_entry_void(); 368 369 BUG_ON(handle->flags & OCFS2_HANDLE_STARTED); 370 371 ocfs2_handle_add_inode(handle, alloc_inode); 372 status = ocfs2_meta_lock(alloc_inode, handle, &bh, 1); 373 if (status < 0) { 374 mlog_errno(status); 375 goto bail; 376 } 377 378 fe = (struct ocfs2_dinode *) bh->b_data; 379 if (!OCFS2_IS_VALID_DINODE(fe)) { 380 OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe); 381 status = -EIO; 382 goto bail; 383 } 384 if (!(fe->i_flags & cpu_to_le32(OCFS2_CHAIN_FL))) { 385 ocfs2_error(alloc_inode->i_sb, "Invalid chain allocator " 386 "# %"MLFu64, le64_to_cpu(fe->i_blkno)); 387 status = -EIO; 388 goto bail; 389 } 390 391 free_bits = le32_to_cpu(fe->id1.bitmap1.i_total) - 392 le32_to_cpu(fe->id1.bitmap1.i_used); 393 394 if (bits_wanted > free_bits) { 395 /* cluster bitmap never grows */ 396 if (ocfs2_is_cluster_bitmap(alloc_inode)) { 397 mlog(0, "Disk Full: wanted=%u, free_bits=%u\n", 398 bits_wanted, free_bits); 399 status = -ENOSPC; 400 goto bail; 401 } 402 403 status = ocfs2_block_group_alloc(osb, alloc_inode, bh); 404 if (status < 0) { 405 if (status != -ENOSPC) 406 mlog_errno(status); 407 goto bail; 408 } 409 atomic_inc(&osb->alloc_stats.bg_extends); 410 411 /* You should never ask for this much metadata */ 412 BUG_ON(bits_wanted > 413 (le32_to_cpu(fe->id1.bitmap1.i_total) 414 - le32_to_cpu(fe->id1.bitmap1.i_used))); 415 } 416 417 get_bh(bh); 418 ac->ac_bh = bh; 419 bail: 420 if (bh) 421 brelse(bh); 422 423 mlog_exit(status); 424 return status; 425 } 426 427 int ocfs2_reserve_new_metadata(struct ocfs2_super *osb, 428 struct ocfs2_journal_handle *handle, 429 struct ocfs2_dinode *fe, 430 struct ocfs2_alloc_context **ac) 431 { 432 int status; 433 struct inode *alloc_inode = NULL; 434 435 *ac = kcalloc(1, sizeof(struct ocfs2_alloc_context), GFP_KERNEL); 436 if (!(*ac)) { 437 status = -ENOMEM; 438 mlog_errno(status); 439 goto bail; 440 } 441 442 (*ac)->ac_bits_wanted = ocfs2_extend_meta_needed(fe); 443 (*ac)->ac_handle = handle; 444 (*ac)->ac_which = OCFS2_AC_USE_META; 445 446 #ifndef OCFS2_USE_ALL_METADATA_SUBALLOCATORS 447 alloc_inode = ocfs2_get_system_file_inode(osb, 448 EXTENT_ALLOC_SYSTEM_INODE, 449 0); 450 #else 451 alloc_inode = ocfs2_get_system_file_inode(osb, 452 EXTENT_ALLOC_SYSTEM_INODE, 453 osb->slot_num); 454 #endif 455 if (!alloc_inode) { 456 status = -ENOMEM; 457 mlog_errno(status); 458 goto bail; 459 } 460 461 (*ac)->ac_inode = igrab(alloc_inode); 462 (*ac)->ac_group_search = ocfs2_block_group_search; 463 464 status = ocfs2_reserve_suballoc_bits(osb, (*ac)); 465 if (status < 0) { 466 if (status != -ENOSPC) 467 mlog_errno(status); 468 goto bail; 469 } 470 471 status = 0; 472 bail: 473 if ((status < 0) && *ac) { 474 ocfs2_free_alloc_context(*ac); 475 *ac = NULL; 476 } 477 478 if (alloc_inode) 479 iput(alloc_inode); 480 481 mlog_exit(status); 482 return status; 483 } 484 485 int ocfs2_reserve_new_inode(struct ocfs2_super *osb, 486 struct ocfs2_journal_handle *handle, 487 struct ocfs2_alloc_context **ac) 488 { 489 int status; 490 struct inode *alloc_inode = NULL; 491 492 *ac = kcalloc(1, sizeof(struct ocfs2_alloc_context), GFP_KERNEL); 493 if (!(*ac)) { 494 status = -ENOMEM; 495 mlog_errno(status); 496 goto bail; 497 } 498 499 (*ac)->ac_bits_wanted = 1; 500 (*ac)->ac_handle = handle; 501 (*ac)->ac_which = OCFS2_AC_USE_INODE; 502 503 alloc_inode = ocfs2_get_system_file_inode(osb, 504 INODE_ALLOC_SYSTEM_INODE, 505 osb->slot_num); 506 if (!alloc_inode) { 507 status = -ENOMEM; 508 mlog_errno(status); 509 goto bail; 510 } 511 512 (*ac)->ac_inode = igrab(alloc_inode); 513 (*ac)->ac_group_search = ocfs2_block_group_search; 514 515 status = ocfs2_reserve_suballoc_bits(osb, *ac); 516 if (status < 0) { 517 if (status != -ENOSPC) 518 mlog_errno(status); 519 goto bail; 520 } 521 522 status = 0; 523 bail: 524 if ((status < 0) && *ac) { 525 ocfs2_free_alloc_context(*ac); 526 *ac = NULL; 527 } 528 529 if (alloc_inode) 530 iput(alloc_inode); 531 532 mlog_exit(status); 533 return status; 534 } 535 536 /* local alloc code has to do the same thing, so rather than do this 537 * twice.. */ 538 int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb, 539 struct ocfs2_alloc_context *ac) 540 { 541 int status; 542 543 ac->ac_inode = ocfs2_get_system_file_inode(osb, 544 GLOBAL_BITMAP_SYSTEM_INODE, 545 OCFS2_INVALID_SLOT); 546 if (!ac->ac_inode) { 547 status = -EINVAL; 548 mlog(ML_ERROR, "Could not get bitmap inode!\n"); 549 goto bail; 550 } 551 ac->ac_which = OCFS2_AC_USE_MAIN; 552 ac->ac_group_search = ocfs2_cluster_group_search; 553 554 status = ocfs2_reserve_suballoc_bits(osb, ac); 555 if (status < 0 && status != -ENOSPC) 556 mlog_errno(status); 557 bail: 558 return status; 559 } 560 561 /* Callers don't need to care which bitmap (local alloc or main) to 562 * use so we figure it out for them, but unfortunately this clutters 563 * things a bit. */ 564 int ocfs2_reserve_clusters(struct ocfs2_super *osb, 565 struct ocfs2_journal_handle *handle, 566 u32 bits_wanted, 567 struct ocfs2_alloc_context **ac) 568 { 569 int status; 570 571 mlog_entry_void(); 572 573 BUG_ON(!handle); 574 575 *ac = kcalloc(1, sizeof(struct ocfs2_alloc_context), GFP_KERNEL); 576 if (!(*ac)) { 577 status = -ENOMEM; 578 mlog_errno(status); 579 goto bail; 580 } 581 582 (*ac)->ac_bits_wanted = bits_wanted; 583 (*ac)->ac_handle = handle; 584 585 status = -ENOSPC; 586 if (ocfs2_alloc_should_use_local(osb, bits_wanted)) { 587 status = ocfs2_reserve_local_alloc_bits(osb, 588 handle, 589 bits_wanted, 590 *ac); 591 if ((status < 0) && (status != -ENOSPC)) { 592 mlog_errno(status); 593 goto bail; 594 } else if (status == -ENOSPC) { 595 /* reserve_local_bits will return enospc with 596 * the local alloc inode still locked, so we 597 * can change this safely here. */ 598 mlog(0, "Disabling local alloc\n"); 599 /* We set to OCFS2_LA_DISABLED so that umount 600 * can clean up what's left of the local 601 * allocation */ 602 osb->local_alloc_state = OCFS2_LA_DISABLED; 603 } 604 } 605 606 if (status == -ENOSPC) { 607 status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac); 608 if (status < 0) { 609 if (status != -ENOSPC) 610 mlog_errno(status); 611 goto bail; 612 } 613 } 614 615 status = 0; 616 bail: 617 if ((status < 0) && *ac) { 618 ocfs2_free_alloc_context(*ac); 619 *ac = NULL; 620 } 621 622 mlog_exit(status); 623 return status; 624 } 625 626 /* 627 * More or less lifted from ext3. I'll leave their description below: 628 * 629 * "For ext3 allocations, we must not reuse any blocks which are 630 * allocated in the bitmap buffer's "last committed data" copy. This 631 * prevents deletes from freeing up the page for reuse until we have 632 * committed the delete transaction. 633 * 634 * If we didn't do this, then deleting something and reallocating it as 635 * data would allow the old block to be overwritten before the 636 * transaction committed (because we force data to disk before commit). 637 * This would lead to corruption if we crashed between overwriting the 638 * data and committing the delete. 639 * 640 * @@@ We may want to make this allocation behaviour conditional on 641 * data-writes at some point, and disable it for metadata allocations or 642 * sync-data inodes." 643 * 644 * Note: OCFS2 already does this differently for metadata vs data 645 * allocations, as those bitmaps are seperate and undo access is never 646 * called on a metadata group descriptor. 647 */ 648 static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, 649 int nr) 650 { 651 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; 652 653 if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap)) 654 return 0; 655 if (!buffer_jbd(bg_bh) || !bh2jh(bg_bh)->b_committed_data) 656 return 1; 657 658 bg = (struct ocfs2_group_desc *) bh2jh(bg_bh)->b_committed_data; 659 return !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap); 660 } 661 662 static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb, 663 struct buffer_head *bg_bh, 664 unsigned int bits_wanted, 665 u16 *bit_off, 666 u16 *bits_found) 667 { 668 void *bitmap; 669 u16 best_offset, best_size; 670 int offset, start, found, status = 0; 671 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; 672 673 if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { 674 OCFS2_RO_ON_INVALID_GROUP_DESC(osb->sb, bg); 675 return -EIO; 676 } 677 678 found = start = best_offset = best_size = 0; 679 bitmap = bg->bg_bitmap; 680 681 while((offset = ocfs2_find_next_zero_bit(bitmap, 682 le16_to_cpu(bg->bg_bits), 683 start)) != -1) { 684 if (offset == le16_to_cpu(bg->bg_bits)) 685 break; 686 687 if (!ocfs2_test_bg_bit_allocatable(bg_bh, offset)) { 688 /* We found a zero, but we can't use it as it 689 * hasn't been put to disk yet! */ 690 found = 0; 691 start = offset + 1; 692 } else if (offset == start) { 693 /* we found a zero */ 694 found++; 695 /* move start to the next bit to test */ 696 start++; 697 } else { 698 /* got a zero after some ones */ 699 found = 1; 700 start = offset + 1; 701 } 702 if (found > best_size) { 703 best_size = found; 704 best_offset = start - found; 705 } 706 /* we got everything we needed */ 707 if (found == bits_wanted) { 708 /* mlog(0, "Found it all!\n"); */ 709 break; 710 } 711 } 712 713 /* XXX: I think the first clause is equivalent to the second 714 * - jlbec */ 715 if (found == bits_wanted) { 716 *bit_off = start - found; 717 *bits_found = found; 718 } else if (best_size) { 719 *bit_off = best_offset; 720 *bits_found = best_size; 721 } else { 722 status = -ENOSPC; 723 /* No error log here -- see the comment above 724 * ocfs2_test_bg_bit_allocatable */ 725 } 726 727 return status; 728 } 729 730 static inline int ocfs2_block_group_set_bits(struct ocfs2_journal_handle *handle, 731 struct inode *alloc_inode, 732 struct ocfs2_group_desc *bg, 733 struct buffer_head *group_bh, 734 unsigned int bit_off, 735 unsigned int num_bits) 736 { 737 int status; 738 void *bitmap = bg->bg_bitmap; 739 int journal_type = OCFS2_JOURNAL_ACCESS_WRITE; 740 741 mlog_entry_void(); 742 743 if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { 744 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg); 745 status = -EIO; 746 goto bail; 747 } 748 BUG_ON(le16_to_cpu(bg->bg_free_bits_count) < num_bits); 749 750 mlog(0, "block_group_set_bits: off = %u, num = %u\n", bit_off, 751 num_bits); 752 753 if (ocfs2_is_cluster_bitmap(alloc_inode)) 754 journal_type = OCFS2_JOURNAL_ACCESS_UNDO; 755 756 status = ocfs2_journal_access(handle, 757 alloc_inode, 758 group_bh, 759 journal_type); 760 if (status < 0) { 761 mlog_errno(status); 762 goto bail; 763 } 764 765 le16_add_cpu(&bg->bg_free_bits_count, -num_bits); 766 767 while(num_bits--) 768 ocfs2_set_bit(bit_off++, bitmap); 769 770 status = ocfs2_journal_dirty(handle, 771 group_bh); 772 if (status < 0) { 773 mlog_errno(status); 774 goto bail; 775 } 776 777 bail: 778 mlog_exit(status); 779 return status; 780 } 781 782 /* find the one with the most empty bits */ 783 static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl) 784 { 785 u16 curr, best; 786 787 BUG_ON(!cl->cl_next_free_rec); 788 789 best = curr = 0; 790 while (curr < le16_to_cpu(cl->cl_next_free_rec)) { 791 if (le32_to_cpu(cl->cl_recs[curr].c_free) > 792 le32_to_cpu(cl->cl_recs[best].c_free)) 793 best = curr; 794 curr++; 795 } 796 797 BUG_ON(best >= le16_to_cpu(cl->cl_next_free_rec)); 798 return best; 799 } 800 801 static int ocfs2_relink_block_group(struct ocfs2_journal_handle *handle, 802 struct inode *alloc_inode, 803 struct buffer_head *fe_bh, 804 struct buffer_head *bg_bh, 805 struct buffer_head *prev_bg_bh, 806 u16 chain) 807 { 808 int status; 809 /* there is a really tiny chance the journal calls could fail, 810 * but we wouldn't want inconsistent blocks in *any* case. */ 811 u64 fe_ptr, bg_ptr, prev_bg_ptr; 812 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data; 813 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; 814 struct ocfs2_group_desc *prev_bg = (struct ocfs2_group_desc *) prev_bg_bh->b_data; 815 816 if (!OCFS2_IS_VALID_DINODE(fe)) { 817 OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe); 818 status = -EIO; 819 goto out; 820 } 821 if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { 822 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg); 823 status = -EIO; 824 goto out; 825 } 826 if (!OCFS2_IS_VALID_GROUP_DESC(prev_bg)) { 827 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, prev_bg); 828 status = -EIO; 829 goto out; 830 } 831 832 mlog(0, "In suballoc %"MLFu64", chain %u, move group %"MLFu64" to " 833 "top, prev = %"MLFu64"\n", 834 fe->i_blkno, chain, bg->bg_blkno, prev_bg->bg_blkno); 835 836 fe_ptr = le64_to_cpu(fe->id2.i_chain.cl_recs[chain].c_blkno); 837 bg_ptr = le64_to_cpu(bg->bg_next_group); 838 prev_bg_ptr = le64_to_cpu(prev_bg->bg_next_group); 839 840 status = ocfs2_journal_access(handle, alloc_inode, prev_bg_bh, 841 OCFS2_JOURNAL_ACCESS_WRITE); 842 if (status < 0) { 843 mlog_errno(status); 844 goto out_rollback; 845 } 846 847 prev_bg->bg_next_group = bg->bg_next_group; 848 849 status = ocfs2_journal_dirty(handle, prev_bg_bh); 850 if (status < 0) { 851 mlog_errno(status); 852 goto out_rollback; 853 } 854 855 status = ocfs2_journal_access(handle, alloc_inode, bg_bh, 856 OCFS2_JOURNAL_ACCESS_WRITE); 857 if (status < 0) { 858 mlog_errno(status); 859 goto out_rollback; 860 } 861 862 bg->bg_next_group = fe->id2.i_chain.cl_recs[chain].c_blkno; 863 864 status = ocfs2_journal_dirty(handle, bg_bh); 865 if (status < 0) { 866 mlog_errno(status); 867 goto out_rollback; 868 } 869 870 status = ocfs2_journal_access(handle, alloc_inode, fe_bh, 871 OCFS2_JOURNAL_ACCESS_WRITE); 872 if (status < 0) { 873 mlog_errno(status); 874 goto out_rollback; 875 } 876 877 fe->id2.i_chain.cl_recs[chain].c_blkno = bg->bg_blkno; 878 879 status = ocfs2_journal_dirty(handle, fe_bh); 880 if (status < 0) { 881 mlog_errno(status); 882 goto out_rollback; 883 } 884 885 status = 0; 886 out_rollback: 887 if (status < 0) { 888 fe->id2.i_chain.cl_recs[chain].c_blkno = cpu_to_le64(fe_ptr); 889 bg->bg_next_group = cpu_to_le64(bg_ptr); 890 prev_bg->bg_next_group = cpu_to_le64(prev_bg_ptr); 891 } 892 out: 893 mlog_exit(status); 894 return status; 895 } 896 897 static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg, 898 u32 wanted) 899 { 900 return le16_to_cpu(bg->bg_free_bits_count) > wanted; 901 } 902 903 /* return 0 on success, -ENOSPC to keep searching and any other < 0 904 * value on error. */ 905 static int ocfs2_cluster_group_search(struct inode *inode, 906 struct buffer_head *group_bh, 907 u32 bits_wanted, u32 min_bits, 908 u16 *bit_off, u16 *bits_found) 909 { 910 int search = -ENOSPC; 911 int ret; 912 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) group_bh->b_data; 913 u16 tmp_off, tmp_found; 914 915 BUG_ON(!ocfs2_is_cluster_bitmap(inode)); 916 917 if (bg->bg_free_bits_count) { 918 ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb), 919 group_bh, bits_wanted, 920 &tmp_off, &tmp_found); 921 if (ret) 922 return ret; 923 924 /* ocfs2_block_group_find_clear_bits() might 925 * return success, but we still want to return 926 * -ENOSPC unless it found the minimum number 927 * of bits. */ 928 if (min_bits <= tmp_found) { 929 *bit_off = tmp_off; 930 *bits_found = tmp_found; 931 search = 0; /* success */ 932 } 933 } 934 935 return search; 936 } 937 938 static int ocfs2_block_group_search(struct inode *inode, 939 struct buffer_head *group_bh, 940 u32 bits_wanted, u32 min_bits, 941 u16 *bit_off, u16 *bits_found) 942 { 943 int ret = -ENOSPC; 944 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) group_bh->b_data; 945 946 BUG_ON(min_bits != 1); 947 BUG_ON(ocfs2_is_cluster_bitmap(inode)); 948 949 if (bg->bg_free_bits_count) 950 ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb), 951 group_bh, bits_wanted, 952 bit_off, bits_found); 953 954 return ret; 955 } 956 957 static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, 958 u32 bits_wanted, 959 u32 min_bits, 960 u16 *bit_off, 961 unsigned int *num_bits, 962 u64 *bg_blkno) 963 { 964 int status; 965 u16 chain, tmp_bits; 966 u32 tmp_used; 967 u64 next_group; 968 struct ocfs2_journal_handle *handle = ac->ac_handle; 969 struct inode *alloc_inode = ac->ac_inode; 970 struct buffer_head *group_bh = NULL; 971 struct buffer_head *prev_group_bh = NULL; 972 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) ac->ac_bh->b_data; 973 struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &fe->id2.i_chain; 974 struct ocfs2_group_desc *bg; 975 976 chain = ac->ac_chain; 977 mlog(0, "trying to alloc %u bits from chain %u, inode %"MLFu64"\n", 978 bits_wanted, chain, OCFS2_I(alloc_inode)->ip_blkno); 979 980 status = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb), 981 le64_to_cpu(cl->cl_recs[chain].c_blkno), 982 &group_bh, OCFS2_BH_CACHED, alloc_inode); 983 if (status < 0) { 984 mlog_errno(status); 985 goto bail; 986 } 987 bg = (struct ocfs2_group_desc *) group_bh->b_data; 988 if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { 989 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg); 990 status = -EIO; 991 goto bail; 992 } 993 994 status = -ENOSPC; 995 /* for now, the chain search is a bit simplistic. We just use 996 * the 1st group with any empty bits. */ 997 while ((status = ac->ac_group_search(alloc_inode, group_bh, 998 bits_wanted, min_bits, bit_off, 999 &tmp_bits)) == -ENOSPC) { 1000 if (!bg->bg_next_group) 1001 break; 1002 1003 if (prev_group_bh) { 1004 brelse(prev_group_bh); 1005 prev_group_bh = NULL; 1006 } 1007 next_group = le64_to_cpu(bg->bg_next_group); 1008 prev_group_bh = group_bh; 1009 group_bh = NULL; 1010 status = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb), 1011 next_group, &group_bh, 1012 OCFS2_BH_CACHED, alloc_inode); 1013 if (status < 0) { 1014 mlog_errno(status); 1015 goto bail; 1016 } 1017 bg = (struct ocfs2_group_desc *) group_bh->b_data; 1018 if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { 1019 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg); 1020 status = -EIO; 1021 goto bail; 1022 } 1023 } 1024 if (status < 0) { 1025 if (status != -ENOSPC) 1026 mlog_errno(status); 1027 goto bail; 1028 } 1029 1030 mlog(0, "alloc succeeds: we give %u bits from block group %"MLFu64"\n", 1031 tmp_bits, bg->bg_blkno); 1032 1033 *num_bits = tmp_bits; 1034 1035 BUG_ON(*num_bits == 0); 1036 1037 /* 1038 * Keep track of previous block descriptor read. When 1039 * we find a target, if we have read more than X 1040 * number of descriptors, and the target is reasonably 1041 * empty, relink him to top of his chain. 1042 * 1043 * We've read 0 extra blocks and only send one more to 1044 * the transaction, yet the next guy to search has a 1045 * much easier time. 1046 * 1047 * Do this *after* figuring out how many bits we're taking out 1048 * of our target group. 1049 */ 1050 if (ac->ac_allow_chain_relink && 1051 (prev_group_bh) && 1052 (ocfs2_block_group_reasonably_empty(bg, *num_bits))) { 1053 status = ocfs2_relink_block_group(handle, alloc_inode, 1054 ac->ac_bh, group_bh, 1055 prev_group_bh, chain); 1056 if (status < 0) { 1057 mlog_errno(status); 1058 goto bail; 1059 } 1060 } 1061 1062 /* Ok, claim our bits now: set the info on dinode, chainlist 1063 * and then the group */ 1064 status = ocfs2_journal_access(handle, 1065 alloc_inode, 1066 ac->ac_bh, 1067 OCFS2_JOURNAL_ACCESS_WRITE); 1068 if (status < 0) { 1069 mlog_errno(status); 1070 goto bail; 1071 } 1072 1073 tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used); 1074 fe->id1.bitmap1.i_used = cpu_to_le32(*num_bits + tmp_used); 1075 le32_add_cpu(&cl->cl_recs[chain].c_free, -(*num_bits)); 1076 1077 status = ocfs2_journal_dirty(handle, 1078 ac->ac_bh); 1079 if (status < 0) { 1080 mlog_errno(status); 1081 goto bail; 1082 } 1083 1084 status = ocfs2_block_group_set_bits(handle, 1085 alloc_inode, 1086 bg, 1087 group_bh, 1088 *bit_off, 1089 *num_bits); 1090 if (status < 0) { 1091 mlog_errno(status); 1092 goto bail; 1093 } 1094 1095 mlog(0, "Allocated %u bits from suballocator %"MLFu64"\n", 1096 *num_bits, fe->i_blkno); 1097 1098 *bg_blkno = le64_to_cpu(bg->bg_blkno); 1099 bail: 1100 if (group_bh) 1101 brelse(group_bh); 1102 if (prev_group_bh) 1103 brelse(prev_group_bh); 1104 1105 mlog_exit(status); 1106 return status; 1107 } 1108 1109 /* will give out up to bits_wanted contiguous bits. */ 1110 static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, 1111 struct ocfs2_alloc_context *ac, 1112 u32 bits_wanted, 1113 u32 min_bits, 1114 u16 *bit_off, 1115 unsigned int *num_bits, 1116 u64 *bg_blkno) 1117 { 1118 int status; 1119 u16 victim, i; 1120 struct ocfs2_chain_list *cl; 1121 struct ocfs2_dinode *fe; 1122 1123 mlog_entry_void(); 1124 1125 BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted); 1126 BUG_ON(bits_wanted > (ac->ac_bits_wanted - ac->ac_bits_given)); 1127 BUG_ON(!ac->ac_bh); 1128 1129 fe = (struct ocfs2_dinode *) ac->ac_bh->b_data; 1130 if (!OCFS2_IS_VALID_DINODE(fe)) { 1131 OCFS2_RO_ON_INVALID_DINODE(osb->sb, fe); 1132 status = -EIO; 1133 goto bail; 1134 } 1135 if (le32_to_cpu(fe->id1.bitmap1.i_used) >= 1136 le32_to_cpu(fe->id1.bitmap1.i_total)) { 1137 ocfs2_error(osb->sb, "Chain allocator dinode %"MLFu64" has %u" 1138 "used bits but only %u total.", 1139 le64_to_cpu(fe->i_blkno), 1140 le32_to_cpu(fe->id1.bitmap1.i_used), 1141 le32_to_cpu(fe->id1.bitmap1.i_total)); 1142 status = -EIO; 1143 goto bail; 1144 } 1145 1146 cl = (struct ocfs2_chain_list *) &fe->id2.i_chain; 1147 1148 victim = ocfs2_find_victim_chain(cl); 1149 ac->ac_chain = victim; 1150 ac->ac_allow_chain_relink = 1; 1151 1152 status = ocfs2_search_chain(ac, bits_wanted, min_bits, bit_off, 1153 num_bits, bg_blkno); 1154 if (!status) 1155 goto bail; 1156 if (status < 0 && status != -ENOSPC) { 1157 mlog_errno(status); 1158 goto bail; 1159 } 1160 1161 mlog(0, "Search of victim chain %u came up with nothing, " 1162 "trying all chains now.\n", victim); 1163 1164 /* If we didn't pick a good victim, then just default to 1165 * searching each chain in order. Don't allow chain relinking 1166 * because we only calculate enough journal credits for one 1167 * relink per alloc. */ 1168 ac->ac_allow_chain_relink = 0; 1169 for (i = 0; i < le16_to_cpu(cl->cl_next_free_rec); i ++) { 1170 if (i == victim) 1171 continue; 1172 if (!cl->cl_recs[i].c_free) 1173 continue; 1174 1175 ac->ac_chain = i; 1176 status = ocfs2_search_chain(ac, bits_wanted, min_bits, 1177 bit_off, num_bits, 1178 bg_blkno); 1179 if (!status) 1180 break; 1181 if (status < 0 && status != -ENOSPC) { 1182 mlog_errno(status); 1183 goto bail; 1184 } 1185 } 1186 bail: 1187 1188 mlog_exit(status); 1189 return status; 1190 } 1191 1192 int ocfs2_claim_metadata(struct ocfs2_super *osb, 1193 struct ocfs2_journal_handle *handle, 1194 struct ocfs2_alloc_context *ac, 1195 u32 bits_wanted, 1196 u16 *suballoc_bit_start, 1197 unsigned int *num_bits, 1198 u64 *blkno_start) 1199 { 1200 int status; 1201 u64 bg_blkno; 1202 1203 BUG_ON(!ac); 1204 BUG_ON(ac->ac_bits_wanted < (ac->ac_bits_given + bits_wanted)); 1205 BUG_ON(ac->ac_which != OCFS2_AC_USE_META); 1206 BUG_ON(ac->ac_handle != handle); 1207 1208 status = ocfs2_claim_suballoc_bits(osb, 1209 ac, 1210 bits_wanted, 1211 1, 1212 suballoc_bit_start, 1213 num_bits, 1214 &bg_blkno); 1215 if (status < 0) { 1216 mlog_errno(status); 1217 goto bail; 1218 } 1219 atomic_inc(&osb->alloc_stats.bg_allocs); 1220 1221 *blkno_start = bg_blkno + (u64) *suballoc_bit_start; 1222 ac->ac_bits_given += (*num_bits); 1223 status = 0; 1224 bail: 1225 mlog_exit(status); 1226 return status; 1227 } 1228 1229 int ocfs2_claim_new_inode(struct ocfs2_super *osb, 1230 struct ocfs2_journal_handle *handle, 1231 struct ocfs2_alloc_context *ac, 1232 u16 *suballoc_bit, 1233 u64 *fe_blkno) 1234 { 1235 int status; 1236 unsigned int num_bits; 1237 u64 bg_blkno; 1238 1239 mlog_entry_void(); 1240 1241 BUG_ON(!ac); 1242 BUG_ON(ac->ac_bits_given != 0); 1243 BUG_ON(ac->ac_bits_wanted != 1); 1244 BUG_ON(ac->ac_which != OCFS2_AC_USE_INODE); 1245 BUG_ON(ac->ac_handle != handle); 1246 1247 status = ocfs2_claim_suballoc_bits(osb, 1248 ac, 1249 1, 1250 1, 1251 suballoc_bit, 1252 &num_bits, 1253 &bg_blkno); 1254 if (status < 0) { 1255 mlog_errno(status); 1256 goto bail; 1257 } 1258 atomic_inc(&osb->alloc_stats.bg_allocs); 1259 1260 BUG_ON(num_bits != 1); 1261 1262 *fe_blkno = bg_blkno + (u64) (*suballoc_bit); 1263 ac->ac_bits_given++; 1264 status = 0; 1265 bail: 1266 mlog_exit(status); 1267 return status; 1268 } 1269 1270 /* translate a group desc. blkno and it's bitmap offset into 1271 * disk cluster offset. */ 1272 static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode, 1273 u64 bg_blkno, 1274 u16 bg_bit_off) 1275 { 1276 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1277 u32 cluster = 0; 1278 1279 BUG_ON(!ocfs2_is_cluster_bitmap(inode)); 1280 1281 if (bg_blkno != osb->first_cluster_group_blkno) 1282 cluster = ocfs2_blocks_to_clusters(inode->i_sb, bg_blkno); 1283 cluster += (u32) bg_bit_off; 1284 return cluster; 1285 } 1286 1287 /* given a cluster offset, calculate which block group it belongs to 1288 * and return that block offset. */ 1289 static inline u64 ocfs2_which_cluster_group(struct inode *inode, 1290 u32 cluster) 1291 { 1292 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1293 u32 group_no; 1294 1295 BUG_ON(!ocfs2_is_cluster_bitmap(inode)); 1296 1297 group_no = cluster / osb->bitmap_cpg; 1298 if (!group_no) 1299 return osb->first_cluster_group_blkno; 1300 return ocfs2_clusters_to_blocks(inode->i_sb, 1301 group_no * osb->bitmap_cpg); 1302 } 1303 1304 /* given the block number of a cluster start, calculate which cluster 1305 * group and descriptor bitmap offset that corresponds to. */ 1306 static inline void ocfs2_block_to_cluster_group(struct inode *inode, 1307 u64 data_blkno, 1308 u64 *bg_blkno, 1309 u16 *bg_bit_off) 1310 { 1311 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1312 u32 data_cluster = ocfs2_blocks_to_clusters(osb->sb, data_blkno); 1313 1314 BUG_ON(!ocfs2_is_cluster_bitmap(inode)); 1315 1316 *bg_blkno = ocfs2_which_cluster_group(inode, 1317 data_cluster); 1318 1319 if (*bg_blkno == osb->first_cluster_group_blkno) 1320 *bg_bit_off = (u16) data_cluster; 1321 else 1322 *bg_bit_off = (u16) ocfs2_blocks_to_clusters(osb->sb, 1323 data_blkno - *bg_blkno); 1324 } 1325 1326 /* 1327 * min_bits - minimum contiguous chunk from this total allocation we 1328 * can handle. set to what we asked for originally for a full 1329 * contig. allocation, set to '1' to indicate we can deal with extents 1330 * of any size. 1331 */ 1332 int ocfs2_claim_clusters(struct ocfs2_super *osb, 1333 struct ocfs2_journal_handle *handle, 1334 struct ocfs2_alloc_context *ac, 1335 u32 min_clusters, 1336 u32 *cluster_start, 1337 u32 *num_clusters) 1338 { 1339 int status; 1340 unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given; 1341 u64 bg_blkno; 1342 u16 bg_bit_off; 1343 1344 mlog_entry_void(); 1345 1346 BUG_ON(!ac); 1347 BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted); 1348 1349 BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL 1350 && ac->ac_which != OCFS2_AC_USE_MAIN); 1351 BUG_ON(ac->ac_handle != handle); 1352 1353 if (ac->ac_which == OCFS2_AC_USE_LOCAL) { 1354 status = ocfs2_claim_local_alloc_bits(osb, 1355 handle, 1356 ac, 1357 bits_wanted, 1358 cluster_start, 1359 num_clusters); 1360 if (!status) 1361 atomic_inc(&osb->alloc_stats.local_data); 1362 } else { 1363 if (min_clusters > (osb->bitmap_cpg - 1)) { 1364 /* The only paths asking for contiguousness 1365 * should know about this already. */ 1366 mlog(ML_ERROR, "minimum allocation requested exceeds " 1367 "group bitmap size!"); 1368 status = -ENOSPC; 1369 goto bail; 1370 } 1371 /* clamp the current request down to a realistic size. */ 1372 if (bits_wanted > (osb->bitmap_cpg - 1)) 1373 bits_wanted = osb->bitmap_cpg - 1; 1374 1375 status = ocfs2_claim_suballoc_bits(osb, 1376 ac, 1377 bits_wanted, 1378 min_clusters, 1379 &bg_bit_off, 1380 num_clusters, 1381 &bg_blkno); 1382 if (!status) { 1383 *cluster_start = 1384 ocfs2_desc_bitmap_to_cluster_off(ac->ac_inode, 1385 bg_blkno, 1386 bg_bit_off); 1387 atomic_inc(&osb->alloc_stats.bitmap_data); 1388 } 1389 } 1390 if (status < 0) { 1391 if (status != -ENOSPC) 1392 mlog_errno(status); 1393 goto bail; 1394 } 1395 1396 ac->ac_bits_given += *num_clusters; 1397 1398 bail: 1399 mlog_exit(status); 1400 return status; 1401 } 1402 1403 static inline int ocfs2_block_group_clear_bits(struct ocfs2_journal_handle *handle, 1404 struct inode *alloc_inode, 1405 struct ocfs2_group_desc *bg, 1406 struct buffer_head *group_bh, 1407 unsigned int bit_off, 1408 unsigned int num_bits) 1409 { 1410 int status; 1411 unsigned int tmp; 1412 int journal_type = OCFS2_JOURNAL_ACCESS_WRITE; 1413 struct ocfs2_group_desc *undo_bg = NULL; 1414 1415 mlog_entry_void(); 1416 1417 if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { 1418 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg); 1419 status = -EIO; 1420 goto bail; 1421 } 1422 1423 mlog(0, "off = %u, num = %u\n", bit_off, num_bits); 1424 1425 if (ocfs2_is_cluster_bitmap(alloc_inode)) 1426 journal_type = OCFS2_JOURNAL_ACCESS_UNDO; 1427 1428 status = ocfs2_journal_access(handle, alloc_inode, group_bh, 1429 journal_type); 1430 if (status < 0) { 1431 mlog_errno(status); 1432 goto bail; 1433 } 1434 1435 if (ocfs2_is_cluster_bitmap(alloc_inode)) 1436 undo_bg = (struct ocfs2_group_desc *) bh2jh(group_bh)->b_committed_data; 1437 1438 tmp = num_bits; 1439 while(tmp--) { 1440 ocfs2_clear_bit((bit_off + tmp), 1441 (unsigned long *) bg->bg_bitmap); 1442 if (ocfs2_is_cluster_bitmap(alloc_inode)) 1443 ocfs2_set_bit(bit_off + tmp, 1444 (unsigned long *) undo_bg->bg_bitmap); 1445 } 1446 le16_add_cpu(&bg->bg_free_bits_count, num_bits); 1447 1448 status = ocfs2_journal_dirty(handle, group_bh); 1449 if (status < 0) 1450 mlog_errno(status); 1451 bail: 1452 return status; 1453 } 1454 1455 /* 1456 * expects the suballoc inode to already be locked. 1457 */ 1458 static int ocfs2_free_suballoc_bits(struct ocfs2_journal_handle *handle, 1459 struct inode *alloc_inode, 1460 struct buffer_head *alloc_bh, 1461 unsigned int start_bit, 1462 u64 bg_blkno, 1463 unsigned int count) 1464 { 1465 int status = 0; 1466 u32 tmp_used; 1467 struct ocfs2_super *osb = OCFS2_SB(alloc_inode->i_sb); 1468 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) alloc_bh->b_data; 1469 struct ocfs2_chain_list *cl = &fe->id2.i_chain; 1470 struct buffer_head *group_bh = NULL; 1471 struct ocfs2_group_desc *group; 1472 1473 mlog_entry_void(); 1474 1475 if (!OCFS2_IS_VALID_DINODE(fe)) { 1476 OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe); 1477 status = -EIO; 1478 goto bail; 1479 } 1480 BUG_ON((count + start_bit) > ocfs2_bits_per_group(cl)); 1481 1482 mlog(0, "suballocator %"MLFu64": freeing %u bits from group %"MLFu64 1483 ", starting at %u\n", 1484 OCFS2_I(alloc_inode)->ip_blkno, count, bg_blkno, 1485 start_bit); 1486 1487 status = ocfs2_read_block(osb, bg_blkno, &group_bh, OCFS2_BH_CACHED, 1488 alloc_inode); 1489 if (status < 0) { 1490 mlog_errno(status); 1491 goto bail; 1492 } 1493 1494 group = (struct ocfs2_group_desc *) group_bh->b_data; 1495 if (!OCFS2_IS_VALID_GROUP_DESC(group)) { 1496 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, group); 1497 status = -EIO; 1498 goto bail; 1499 } 1500 BUG_ON((count + start_bit) > le16_to_cpu(group->bg_bits)); 1501 1502 status = ocfs2_block_group_clear_bits(handle, alloc_inode, 1503 group, group_bh, 1504 start_bit, count); 1505 if (status < 0) { 1506 mlog_errno(status); 1507 goto bail; 1508 } 1509 1510 status = ocfs2_journal_access(handle, alloc_inode, alloc_bh, 1511 OCFS2_JOURNAL_ACCESS_WRITE); 1512 if (status < 0) { 1513 mlog_errno(status); 1514 goto bail; 1515 } 1516 1517 le32_add_cpu(&cl->cl_recs[le16_to_cpu(group->bg_chain)].c_free, 1518 count); 1519 tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used); 1520 fe->id1.bitmap1.i_used = cpu_to_le32(tmp_used - count); 1521 1522 status = ocfs2_journal_dirty(handle, alloc_bh); 1523 if (status < 0) { 1524 mlog_errno(status); 1525 goto bail; 1526 } 1527 1528 bail: 1529 if (group_bh) 1530 brelse(group_bh); 1531 1532 mlog_exit(status); 1533 return status; 1534 } 1535 1536 static inline u64 ocfs2_which_suballoc_group(u64 block, unsigned int bit) 1537 { 1538 u64 group = block - (u64) bit; 1539 1540 return group; 1541 } 1542 1543 int ocfs2_free_dinode(struct ocfs2_journal_handle *handle, 1544 struct inode *inode_alloc_inode, 1545 struct buffer_head *inode_alloc_bh, 1546 struct ocfs2_dinode *di) 1547 { 1548 u64 blk = le64_to_cpu(di->i_blkno); 1549 u16 bit = le16_to_cpu(di->i_suballoc_bit); 1550 u64 bg_blkno = ocfs2_which_suballoc_group(blk, bit); 1551 1552 return ocfs2_free_suballoc_bits(handle, inode_alloc_inode, 1553 inode_alloc_bh, bit, bg_blkno, 1); 1554 } 1555 1556 int ocfs2_free_extent_block(struct ocfs2_journal_handle *handle, 1557 struct inode *eb_alloc_inode, 1558 struct buffer_head *eb_alloc_bh, 1559 struct ocfs2_extent_block *eb) 1560 { 1561 u64 blk = le64_to_cpu(eb->h_blkno); 1562 u16 bit = le16_to_cpu(eb->h_suballoc_bit); 1563 u64 bg_blkno = ocfs2_which_suballoc_group(blk, bit); 1564 1565 return ocfs2_free_suballoc_bits(handle, eb_alloc_inode, eb_alloc_bh, 1566 bit, bg_blkno, 1); 1567 } 1568 1569 int ocfs2_free_clusters(struct ocfs2_journal_handle *handle, 1570 struct inode *bitmap_inode, 1571 struct buffer_head *bitmap_bh, 1572 u64 start_blk, 1573 unsigned int num_clusters) 1574 { 1575 int status; 1576 u16 bg_start_bit; 1577 u64 bg_blkno; 1578 struct ocfs2_dinode *fe; 1579 1580 /* You can't ever have a contiguous set of clusters 1581 * bigger than a block group bitmap so we never have to worry 1582 * about looping on them. */ 1583 1584 mlog_entry_void(); 1585 1586 /* This is expensive. We can safely remove once this stuff has 1587 * gotten tested really well. */ 1588 BUG_ON(start_blk != ocfs2_clusters_to_blocks(bitmap_inode->i_sb, ocfs2_blocks_to_clusters(bitmap_inode->i_sb, start_blk))); 1589 1590 fe = (struct ocfs2_dinode *) bitmap_bh->b_data; 1591 1592 ocfs2_block_to_cluster_group(bitmap_inode, start_blk, &bg_blkno, 1593 &bg_start_bit); 1594 1595 mlog(0, "want to free %u clusters starting at block %"MLFu64"\n", 1596 num_clusters, start_blk); 1597 mlog(0, "bg_blkno = %"MLFu64", bg_start_bit = %u\n", 1598 bg_blkno, bg_start_bit); 1599 1600 status = ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh, 1601 bg_start_bit, bg_blkno, 1602 num_clusters); 1603 if (status < 0) 1604 mlog_errno(status); 1605 1606 mlog_exit(status); 1607 return status; 1608 } 1609 1610 static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg) 1611 { 1612 printk("Block Group:\n"); 1613 printk("bg_signature: %s\n", bg->bg_signature); 1614 printk("bg_size: %u\n", bg->bg_size); 1615 printk("bg_bits: %u\n", bg->bg_bits); 1616 printk("bg_free_bits_count: %u\n", bg->bg_free_bits_count); 1617 printk("bg_chain: %u\n", bg->bg_chain); 1618 printk("bg_generation: %u\n", le32_to_cpu(bg->bg_generation)); 1619 printk("bg_next_group: %"MLFu64"\n", bg->bg_next_group); 1620 printk("bg_parent_dinode: %"MLFu64"\n", bg->bg_parent_dinode); 1621 printk("bg_blkno: %"MLFu64"\n", bg->bg_blkno); 1622 } 1623 1624 static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe) 1625 { 1626 int i; 1627 1628 printk("Suballoc Inode %"MLFu64":\n", fe->i_blkno); 1629 printk("i_signature: %s\n", fe->i_signature); 1630 printk("i_size: %"MLFu64"\n", fe->i_size); 1631 printk("i_clusters: %u\n", fe->i_clusters); 1632 printk("i_generation: %u\n", 1633 le32_to_cpu(fe->i_generation)); 1634 printk("id1.bitmap1.i_used: %u\n", 1635 le32_to_cpu(fe->id1.bitmap1.i_used)); 1636 printk("id1.bitmap1.i_total: %u\n", 1637 le32_to_cpu(fe->id1.bitmap1.i_total)); 1638 printk("id2.i_chain.cl_cpg: %u\n", fe->id2.i_chain.cl_cpg); 1639 printk("id2.i_chain.cl_bpc: %u\n", fe->id2.i_chain.cl_bpc); 1640 printk("id2.i_chain.cl_count: %u\n", fe->id2.i_chain.cl_count); 1641 printk("id2.i_chain.cl_next_free_rec: %u\n", 1642 fe->id2.i_chain.cl_next_free_rec); 1643 for(i = 0; i < fe->id2.i_chain.cl_next_free_rec; i++) { 1644 printk("fe->id2.i_chain.cl_recs[%d].c_free: %u\n", i, 1645 fe->id2.i_chain.cl_recs[i].c_free); 1646 printk("fe->id2.i_chain.cl_recs[%d].c_total: %u\n", i, 1647 fe->id2.i_chain.cl_recs[i].c_total); 1648 printk("fe->id2.i_chain.cl_recs[%d].c_blkno: %"MLFu64"\n", i, 1649 fe->id2.i_chain.cl_recs[i].c_blkno); 1650 } 1651 } 1652