1 /* -*- mode: c; c-basic-offset: 8; -*- 2 * vim: noexpandtab sw=8 ts=8 sts=0: 3 * 4 * suballoc.c 5 * 6 * metadata alloc and free 7 * Inspired by ext3 block groups. 8 * 9 * Copyright (C) 2002, 2004 Oracle. All rights reserved. 10 * 11 * This program is free software; you can redistribute it and/or 12 * modify it under the terms of the GNU General Public 13 * License as published by the Free Software Foundation; either 14 * version 2 of the License, or (at your option) any later version. 15 * 16 * This program is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 * General Public License for more details. 20 * 21 * You should have received a copy of the GNU General Public 22 * License along with this program; if not, write to the 23 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 24 * Boston, MA 021110-1307, USA. 25 */ 26 27 #include <linux/fs.h> 28 #include <linux/types.h> 29 #include <linux/slab.h> 30 #include <linux/highmem.h> 31 32 #define MLOG_MASK_PREFIX ML_DISK_ALLOC 33 #include <cluster/masklog.h> 34 35 #include "ocfs2.h" 36 37 #include "alloc.h" 38 #include "dlmglue.h" 39 #include "inode.h" 40 #include "journal.h" 41 #include "localalloc.h" 42 #include "suballoc.h" 43 #include "super.h" 44 #include "sysfile.h" 45 #include "uptodate.h" 46 47 #include "buffer_head_io.h" 48 49 static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg); 50 static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe); 51 static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl); 52 static int ocfs2_block_group_fill(handle_t *handle, 53 struct inode *alloc_inode, 54 struct buffer_head *bg_bh, 55 u64 group_blkno, 56 u16 my_chain, 57 struct ocfs2_chain_list *cl); 58 static int ocfs2_block_group_alloc(struct ocfs2_super *osb, 59 struct inode *alloc_inode, 60 struct buffer_head *bh); 61 62 static int ocfs2_cluster_group_search(struct inode *inode, 63 struct buffer_head *group_bh, 64 u32 bits_wanted, u32 min_bits, 65 u16 *bit_off, u16 *bits_found); 66 static int ocfs2_block_group_search(struct inode *inode, 67 struct buffer_head *group_bh, 68 u32 bits_wanted, u32 min_bits, 69 u16 *bit_off, u16 *bits_found); 70 static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, 71 struct ocfs2_alloc_context *ac, 72 handle_t *handle, 73 u32 bits_wanted, 74 u32 min_bits, 75 u16 *bit_off, 76 unsigned int *num_bits, 77 u64 *bg_blkno); 78 static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, 79 int nr); 80 static inline int ocfs2_block_group_set_bits(handle_t *handle, 81 struct inode *alloc_inode, 82 struct ocfs2_group_desc *bg, 83 struct buffer_head *group_bh, 84 unsigned int bit_off, 85 unsigned int num_bits); 86 static inline int ocfs2_block_group_clear_bits(handle_t *handle, 87 struct inode *alloc_inode, 88 struct ocfs2_group_desc *bg, 89 struct buffer_head *group_bh, 90 unsigned int bit_off, 91 unsigned int num_bits); 92 93 static int ocfs2_relink_block_group(handle_t *handle, 94 struct inode *alloc_inode, 95 struct buffer_head *fe_bh, 96 struct buffer_head *bg_bh, 97 struct buffer_head *prev_bg_bh, 98 u16 chain); 99 static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg, 100 u32 wanted); 101 static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode, 102 u64 bg_blkno, 103 u16 bg_bit_off); 104 static inline void ocfs2_block_to_cluster_group(struct inode *inode, 105 u64 data_blkno, 106 u64 *bg_blkno, 107 u16 *bg_bit_off); 108 109 void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac) 110 { 111 struct inode *inode = ac->ac_inode; 112 113 if (inode) { 114 if (ac->ac_which != OCFS2_AC_USE_LOCAL) 115 ocfs2_inode_unlock(inode, 1); 116 117 mutex_unlock(&inode->i_mutex); 118 119 iput(inode); 120 } 121 if (ac->ac_bh) 122 brelse(ac->ac_bh); 123 kfree(ac); 124 } 125 126 static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl) 127 { 128 return (u32)le16_to_cpu(cl->cl_cpg) * (u32)le16_to_cpu(cl->cl_bpc); 129 } 130 131 /* somewhat more expensive than our other checks, so use sparingly. */ 132 int ocfs2_check_group_descriptor(struct super_block *sb, 133 struct ocfs2_dinode *di, 134 struct ocfs2_group_desc *gd) 135 { 136 unsigned int max_bits; 137 138 if (!OCFS2_IS_VALID_GROUP_DESC(gd)) { 139 OCFS2_RO_ON_INVALID_GROUP_DESC(sb, gd); 140 return -EIO; 141 } 142 143 if (di->i_blkno != gd->bg_parent_dinode) { 144 ocfs2_error(sb, "Group descriptor # %llu has bad parent " 145 "pointer (%llu, expected %llu)", 146 (unsigned long long)le64_to_cpu(gd->bg_blkno), 147 (unsigned long long)le64_to_cpu(gd->bg_parent_dinode), 148 (unsigned long long)le64_to_cpu(di->i_blkno)); 149 return -EIO; 150 } 151 152 max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) * le16_to_cpu(di->id2.i_chain.cl_bpc); 153 if (le16_to_cpu(gd->bg_bits) > max_bits) { 154 ocfs2_error(sb, "Group descriptor # %llu has bit count of %u", 155 (unsigned long long)le64_to_cpu(gd->bg_blkno), 156 le16_to_cpu(gd->bg_bits)); 157 return -EIO; 158 } 159 160 if (le16_to_cpu(gd->bg_chain) >= 161 le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) { 162 ocfs2_error(sb, "Group descriptor # %llu has bad chain %u", 163 (unsigned long long)le64_to_cpu(gd->bg_blkno), 164 le16_to_cpu(gd->bg_chain)); 165 return -EIO; 166 } 167 168 if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) { 169 ocfs2_error(sb, "Group descriptor # %llu has bit count %u but " 170 "claims that %u are free", 171 (unsigned long long)le64_to_cpu(gd->bg_blkno), 172 le16_to_cpu(gd->bg_bits), 173 le16_to_cpu(gd->bg_free_bits_count)); 174 return -EIO; 175 } 176 177 if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) { 178 ocfs2_error(sb, "Group descriptor # %llu has bit count %u but " 179 "max bitmap bits of %u", 180 (unsigned long long)le64_to_cpu(gd->bg_blkno), 181 le16_to_cpu(gd->bg_bits), 182 8 * le16_to_cpu(gd->bg_size)); 183 return -EIO; 184 } 185 186 return 0; 187 } 188 189 static int ocfs2_block_group_fill(handle_t *handle, 190 struct inode *alloc_inode, 191 struct buffer_head *bg_bh, 192 u64 group_blkno, 193 u16 my_chain, 194 struct ocfs2_chain_list *cl) 195 { 196 int status = 0; 197 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; 198 struct super_block * sb = alloc_inode->i_sb; 199 200 mlog_entry_void(); 201 202 if (((unsigned long long) bg_bh->b_blocknr) != group_blkno) { 203 ocfs2_error(alloc_inode->i_sb, "group block (%llu) != " 204 "b_blocknr (%llu)", 205 (unsigned long long)group_blkno, 206 (unsigned long long) bg_bh->b_blocknr); 207 status = -EIO; 208 goto bail; 209 } 210 211 status = ocfs2_journal_access(handle, 212 alloc_inode, 213 bg_bh, 214 OCFS2_JOURNAL_ACCESS_CREATE); 215 if (status < 0) { 216 mlog_errno(status); 217 goto bail; 218 } 219 220 memset(bg, 0, sb->s_blocksize); 221 strcpy(bg->bg_signature, OCFS2_GROUP_DESC_SIGNATURE); 222 bg->bg_generation = cpu_to_le32(OCFS2_SB(sb)->fs_generation); 223 bg->bg_size = cpu_to_le16(ocfs2_group_bitmap_size(sb)); 224 bg->bg_bits = cpu_to_le16(ocfs2_bits_per_group(cl)); 225 bg->bg_chain = cpu_to_le16(my_chain); 226 bg->bg_next_group = cl->cl_recs[my_chain].c_blkno; 227 bg->bg_parent_dinode = cpu_to_le64(OCFS2_I(alloc_inode)->ip_blkno); 228 bg->bg_blkno = cpu_to_le64(group_blkno); 229 /* set the 1st bit in the bitmap to account for the descriptor block */ 230 ocfs2_set_bit(0, (unsigned long *)bg->bg_bitmap); 231 bg->bg_free_bits_count = cpu_to_le16(le16_to_cpu(bg->bg_bits) - 1); 232 233 status = ocfs2_journal_dirty(handle, bg_bh); 234 if (status < 0) 235 mlog_errno(status); 236 237 /* There is no need to zero out or otherwise initialize the 238 * other blocks in a group - All valid FS metadata in a block 239 * group stores the superblock fs_generation value at 240 * allocation time. */ 241 242 bail: 243 mlog_exit(status); 244 return status; 245 } 246 247 static inline u16 ocfs2_find_smallest_chain(struct ocfs2_chain_list *cl) 248 { 249 u16 curr, best; 250 251 best = curr = 0; 252 while (curr < le16_to_cpu(cl->cl_count)) { 253 if (le32_to_cpu(cl->cl_recs[best].c_total) > 254 le32_to_cpu(cl->cl_recs[curr].c_total)) 255 best = curr; 256 curr++; 257 } 258 return best; 259 } 260 261 /* 262 * We expect the block group allocator to already be locked. 263 */ 264 static int ocfs2_block_group_alloc(struct ocfs2_super *osb, 265 struct inode *alloc_inode, 266 struct buffer_head *bh) 267 { 268 int status, credits; 269 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data; 270 struct ocfs2_chain_list *cl; 271 struct ocfs2_alloc_context *ac = NULL; 272 handle_t *handle = NULL; 273 u32 bit_off, num_bits; 274 u16 alloc_rec; 275 u64 bg_blkno; 276 struct buffer_head *bg_bh = NULL; 277 struct ocfs2_group_desc *bg; 278 279 BUG_ON(ocfs2_is_cluster_bitmap(alloc_inode)); 280 281 mlog_entry_void(); 282 283 cl = &fe->id2.i_chain; 284 status = ocfs2_reserve_clusters(osb, 285 le16_to_cpu(cl->cl_cpg), 286 &ac); 287 if (status < 0) { 288 if (status != -ENOSPC) 289 mlog_errno(status); 290 goto bail; 291 } 292 293 credits = ocfs2_calc_group_alloc_credits(osb->sb, 294 le16_to_cpu(cl->cl_cpg)); 295 handle = ocfs2_start_trans(osb, credits); 296 if (IS_ERR(handle)) { 297 status = PTR_ERR(handle); 298 handle = NULL; 299 mlog_errno(status); 300 goto bail; 301 } 302 303 status = ocfs2_claim_clusters(osb, 304 handle, 305 ac, 306 le16_to_cpu(cl->cl_cpg), 307 &bit_off, 308 &num_bits); 309 if (status < 0) { 310 if (status != -ENOSPC) 311 mlog_errno(status); 312 goto bail; 313 } 314 315 alloc_rec = ocfs2_find_smallest_chain(cl); 316 317 /* setup the group */ 318 bg_blkno = ocfs2_clusters_to_blocks(osb->sb, bit_off); 319 mlog(0, "new descriptor, record %u, at block %llu\n", 320 alloc_rec, (unsigned long long)bg_blkno); 321 322 bg_bh = sb_getblk(osb->sb, bg_blkno); 323 if (!bg_bh) { 324 status = -EIO; 325 mlog_errno(status); 326 goto bail; 327 } 328 ocfs2_set_new_buffer_uptodate(alloc_inode, bg_bh); 329 330 status = ocfs2_block_group_fill(handle, 331 alloc_inode, 332 bg_bh, 333 bg_blkno, 334 alloc_rec, 335 cl); 336 if (status < 0) { 337 mlog_errno(status); 338 goto bail; 339 } 340 341 bg = (struct ocfs2_group_desc *) bg_bh->b_data; 342 343 status = ocfs2_journal_access(handle, alloc_inode, 344 bh, OCFS2_JOURNAL_ACCESS_WRITE); 345 if (status < 0) { 346 mlog_errno(status); 347 goto bail; 348 } 349 350 le32_add_cpu(&cl->cl_recs[alloc_rec].c_free, 351 le16_to_cpu(bg->bg_free_bits_count)); 352 le32_add_cpu(&cl->cl_recs[alloc_rec].c_total, le16_to_cpu(bg->bg_bits)); 353 cl->cl_recs[alloc_rec].c_blkno = cpu_to_le64(bg_blkno); 354 if (le16_to_cpu(cl->cl_next_free_rec) < le16_to_cpu(cl->cl_count)) 355 le16_add_cpu(&cl->cl_next_free_rec, 1); 356 357 le32_add_cpu(&fe->id1.bitmap1.i_used, le16_to_cpu(bg->bg_bits) - 358 le16_to_cpu(bg->bg_free_bits_count)); 359 le32_add_cpu(&fe->id1.bitmap1.i_total, le16_to_cpu(bg->bg_bits)); 360 le32_add_cpu(&fe->i_clusters, le16_to_cpu(cl->cl_cpg)); 361 362 status = ocfs2_journal_dirty(handle, bh); 363 if (status < 0) { 364 mlog_errno(status); 365 goto bail; 366 } 367 368 spin_lock(&OCFS2_I(alloc_inode)->ip_lock); 369 OCFS2_I(alloc_inode)->ip_clusters = le32_to_cpu(fe->i_clusters); 370 fe->i_size = cpu_to_le64(ocfs2_clusters_to_bytes(alloc_inode->i_sb, 371 le32_to_cpu(fe->i_clusters))); 372 spin_unlock(&OCFS2_I(alloc_inode)->ip_lock); 373 i_size_write(alloc_inode, le64_to_cpu(fe->i_size)); 374 alloc_inode->i_blocks = ocfs2_inode_sector_count(alloc_inode); 375 376 status = 0; 377 bail: 378 if (handle) 379 ocfs2_commit_trans(osb, handle); 380 381 if (ac) 382 ocfs2_free_alloc_context(ac); 383 384 if (bg_bh) 385 brelse(bg_bh); 386 387 mlog_exit(status); 388 return status; 389 } 390 391 static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb, 392 struct ocfs2_alloc_context *ac, 393 int type, 394 u32 slot) 395 { 396 int status; 397 u32 bits_wanted = ac->ac_bits_wanted; 398 struct inode *alloc_inode; 399 struct buffer_head *bh = NULL; 400 struct ocfs2_dinode *fe; 401 u32 free_bits; 402 403 mlog_entry_void(); 404 405 alloc_inode = ocfs2_get_system_file_inode(osb, type, slot); 406 if (!alloc_inode) { 407 mlog_errno(-EINVAL); 408 return -EINVAL; 409 } 410 411 mutex_lock(&alloc_inode->i_mutex); 412 413 status = ocfs2_inode_lock(alloc_inode, &bh, 1); 414 if (status < 0) { 415 mutex_unlock(&alloc_inode->i_mutex); 416 iput(alloc_inode); 417 418 mlog_errno(status); 419 return status; 420 } 421 422 ac->ac_inode = alloc_inode; 423 424 fe = (struct ocfs2_dinode *) bh->b_data; 425 if (!OCFS2_IS_VALID_DINODE(fe)) { 426 OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe); 427 status = -EIO; 428 goto bail; 429 } 430 if (!(fe->i_flags & cpu_to_le32(OCFS2_CHAIN_FL))) { 431 ocfs2_error(alloc_inode->i_sb, "Invalid chain allocator %llu", 432 (unsigned long long)le64_to_cpu(fe->i_blkno)); 433 status = -EIO; 434 goto bail; 435 } 436 437 free_bits = le32_to_cpu(fe->id1.bitmap1.i_total) - 438 le32_to_cpu(fe->id1.bitmap1.i_used); 439 440 if (bits_wanted > free_bits) { 441 /* cluster bitmap never grows */ 442 if (ocfs2_is_cluster_bitmap(alloc_inode)) { 443 mlog(0, "Disk Full: wanted=%u, free_bits=%u\n", 444 bits_wanted, free_bits); 445 status = -ENOSPC; 446 goto bail; 447 } 448 449 status = ocfs2_block_group_alloc(osb, alloc_inode, bh); 450 if (status < 0) { 451 if (status != -ENOSPC) 452 mlog_errno(status); 453 goto bail; 454 } 455 atomic_inc(&osb->alloc_stats.bg_extends); 456 457 /* You should never ask for this much metadata */ 458 BUG_ON(bits_wanted > 459 (le32_to_cpu(fe->id1.bitmap1.i_total) 460 - le32_to_cpu(fe->id1.bitmap1.i_used))); 461 } 462 463 get_bh(bh); 464 ac->ac_bh = bh; 465 bail: 466 if (bh) 467 brelse(bh); 468 469 mlog_exit(status); 470 return status; 471 } 472 473 int ocfs2_reserve_new_metadata(struct ocfs2_super *osb, 474 struct ocfs2_dinode *fe, 475 struct ocfs2_alloc_context **ac) 476 { 477 int status; 478 u32 slot; 479 480 *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL); 481 if (!(*ac)) { 482 status = -ENOMEM; 483 mlog_errno(status); 484 goto bail; 485 } 486 487 (*ac)->ac_bits_wanted = ocfs2_extend_meta_needed(fe); 488 (*ac)->ac_which = OCFS2_AC_USE_META; 489 slot = osb->slot_num; 490 (*ac)->ac_group_search = ocfs2_block_group_search; 491 492 status = ocfs2_reserve_suballoc_bits(osb, (*ac), 493 EXTENT_ALLOC_SYSTEM_INODE, slot); 494 if (status < 0) { 495 if (status != -ENOSPC) 496 mlog_errno(status); 497 goto bail; 498 } 499 500 status = 0; 501 bail: 502 if ((status < 0) && *ac) { 503 ocfs2_free_alloc_context(*ac); 504 *ac = NULL; 505 } 506 507 mlog_exit(status); 508 return status; 509 } 510 511 int ocfs2_reserve_new_inode(struct ocfs2_super *osb, 512 struct ocfs2_alloc_context **ac) 513 { 514 int status; 515 516 *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL); 517 if (!(*ac)) { 518 status = -ENOMEM; 519 mlog_errno(status); 520 goto bail; 521 } 522 523 (*ac)->ac_bits_wanted = 1; 524 (*ac)->ac_which = OCFS2_AC_USE_INODE; 525 526 (*ac)->ac_group_search = ocfs2_block_group_search; 527 528 status = ocfs2_reserve_suballoc_bits(osb, *ac, 529 INODE_ALLOC_SYSTEM_INODE, 530 osb->slot_num); 531 if (status < 0) { 532 if (status != -ENOSPC) 533 mlog_errno(status); 534 goto bail; 535 } 536 537 status = 0; 538 bail: 539 if ((status < 0) && *ac) { 540 ocfs2_free_alloc_context(*ac); 541 *ac = NULL; 542 } 543 544 mlog_exit(status); 545 return status; 546 } 547 548 /* local alloc code has to do the same thing, so rather than do this 549 * twice.. */ 550 int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb, 551 struct ocfs2_alloc_context *ac) 552 { 553 int status; 554 555 ac->ac_which = OCFS2_AC_USE_MAIN; 556 ac->ac_group_search = ocfs2_cluster_group_search; 557 558 status = ocfs2_reserve_suballoc_bits(osb, ac, 559 GLOBAL_BITMAP_SYSTEM_INODE, 560 OCFS2_INVALID_SLOT); 561 if (status < 0 && status != -ENOSPC) { 562 mlog_errno(status); 563 goto bail; 564 } 565 566 bail: 567 return status; 568 } 569 570 /* Callers don't need to care which bitmap (local alloc or main) to 571 * use so we figure it out for them, but unfortunately this clutters 572 * things a bit. */ 573 int ocfs2_reserve_clusters(struct ocfs2_super *osb, 574 u32 bits_wanted, 575 struct ocfs2_alloc_context **ac) 576 { 577 int status; 578 579 mlog_entry_void(); 580 581 *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL); 582 if (!(*ac)) { 583 status = -ENOMEM; 584 mlog_errno(status); 585 goto bail; 586 } 587 588 (*ac)->ac_bits_wanted = bits_wanted; 589 590 status = -ENOSPC; 591 if (ocfs2_alloc_should_use_local(osb, bits_wanted)) { 592 status = ocfs2_reserve_local_alloc_bits(osb, 593 bits_wanted, 594 *ac); 595 if ((status < 0) && (status != -ENOSPC)) { 596 mlog_errno(status); 597 goto bail; 598 } else if (status == -ENOSPC) { 599 /* reserve_local_bits will return enospc with 600 * the local alloc inode still locked, so we 601 * can change this safely here. */ 602 mlog(0, "Disabling local alloc\n"); 603 /* We set to OCFS2_LA_DISABLED so that umount 604 * can clean up what's left of the local 605 * allocation */ 606 osb->local_alloc_state = OCFS2_LA_DISABLED; 607 } 608 } 609 610 if (status == -ENOSPC) { 611 status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac); 612 if (status < 0) { 613 if (status != -ENOSPC) 614 mlog_errno(status); 615 goto bail; 616 } 617 } 618 619 status = 0; 620 bail: 621 if ((status < 0) && *ac) { 622 ocfs2_free_alloc_context(*ac); 623 *ac = NULL; 624 } 625 626 mlog_exit(status); 627 return status; 628 } 629 630 /* 631 * More or less lifted from ext3. I'll leave their description below: 632 * 633 * "For ext3 allocations, we must not reuse any blocks which are 634 * allocated in the bitmap buffer's "last committed data" copy. This 635 * prevents deletes from freeing up the page for reuse until we have 636 * committed the delete transaction. 637 * 638 * If we didn't do this, then deleting something and reallocating it as 639 * data would allow the old block to be overwritten before the 640 * transaction committed (because we force data to disk before commit). 641 * This would lead to corruption if we crashed between overwriting the 642 * data and committing the delete. 643 * 644 * @@@ We may want to make this allocation behaviour conditional on 645 * data-writes at some point, and disable it for metadata allocations or 646 * sync-data inodes." 647 * 648 * Note: OCFS2 already does this differently for metadata vs data 649 * allocations, as those bitmaps are separate and undo access is never 650 * called on a metadata group descriptor. 651 */ 652 static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, 653 int nr) 654 { 655 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; 656 657 if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap)) 658 return 0; 659 if (!buffer_jbd(bg_bh) || !bh2jh(bg_bh)->b_committed_data) 660 return 1; 661 662 bg = (struct ocfs2_group_desc *) bh2jh(bg_bh)->b_committed_data; 663 return !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap); 664 } 665 666 static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb, 667 struct buffer_head *bg_bh, 668 unsigned int bits_wanted, 669 unsigned int total_bits, 670 u16 *bit_off, 671 u16 *bits_found) 672 { 673 void *bitmap; 674 u16 best_offset, best_size; 675 int offset, start, found, status = 0; 676 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; 677 678 if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { 679 OCFS2_RO_ON_INVALID_GROUP_DESC(osb->sb, bg); 680 return -EIO; 681 } 682 683 found = start = best_offset = best_size = 0; 684 bitmap = bg->bg_bitmap; 685 686 while((offset = ocfs2_find_next_zero_bit(bitmap, total_bits, start)) != -1) { 687 if (offset == total_bits) 688 break; 689 690 if (!ocfs2_test_bg_bit_allocatable(bg_bh, offset)) { 691 /* We found a zero, but we can't use it as it 692 * hasn't been put to disk yet! */ 693 found = 0; 694 start = offset + 1; 695 } else if (offset == start) { 696 /* we found a zero */ 697 found++; 698 /* move start to the next bit to test */ 699 start++; 700 } else { 701 /* got a zero after some ones */ 702 found = 1; 703 start = offset + 1; 704 } 705 if (found > best_size) { 706 best_size = found; 707 best_offset = start - found; 708 } 709 /* we got everything we needed */ 710 if (found == bits_wanted) { 711 /* mlog(0, "Found it all!\n"); */ 712 break; 713 } 714 } 715 716 /* XXX: I think the first clause is equivalent to the second 717 * - jlbec */ 718 if (found == bits_wanted) { 719 *bit_off = start - found; 720 *bits_found = found; 721 } else if (best_size) { 722 *bit_off = best_offset; 723 *bits_found = best_size; 724 } else { 725 status = -ENOSPC; 726 /* No error log here -- see the comment above 727 * ocfs2_test_bg_bit_allocatable */ 728 } 729 730 return status; 731 } 732 733 static inline int ocfs2_block_group_set_bits(handle_t *handle, 734 struct inode *alloc_inode, 735 struct ocfs2_group_desc *bg, 736 struct buffer_head *group_bh, 737 unsigned int bit_off, 738 unsigned int num_bits) 739 { 740 int status; 741 void *bitmap = bg->bg_bitmap; 742 int journal_type = OCFS2_JOURNAL_ACCESS_WRITE; 743 744 mlog_entry_void(); 745 746 if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { 747 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg); 748 status = -EIO; 749 goto bail; 750 } 751 BUG_ON(le16_to_cpu(bg->bg_free_bits_count) < num_bits); 752 753 mlog(0, "block_group_set_bits: off = %u, num = %u\n", bit_off, 754 num_bits); 755 756 if (ocfs2_is_cluster_bitmap(alloc_inode)) 757 journal_type = OCFS2_JOURNAL_ACCESS_UNDO; 758 759 status = ocfs2_journal_access(handle, 760 alloc_inode, 761 group_bh, 762 journal_type); 763 if (status < 0) { 764 mlog_errno(status); 765 goto bail; 766 } 767 768 le16_add_cpu(&bg->bg_free_bits_count, -num_bits); 769 770 while(num_bits--) 771 ocfs2_set_bit(bit_off++, bitmap); 772 773 status = ocfs2_journal_dirty(handle, 774 group_bh); 775 if (status < 0) { 776 mlog_errno(status); 777 goto bail; 778 } 779 780 bail: 781 mlog_exit(status); 782 return status; 783 } 784 785 /* find the one with the most empty bits */ 786 static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl) 787 { 788 u16 curr, best; 789 790 BUG_ON(!cl->cl_next_free_rec); 791 792 best = curr = 0; 793 while (curr < le16_to_cpu(cl->cl_next_free_rec)) { 794 if (le32_to_cpu(cl->cl_recs[curr].c_free) > 795 le32_to_cpu(cl->cl_recs[best].c_free)) 796 best = curr; 797 curr++; 798 } 799 800 BUG_ON(best >= le16_to_cpu(cl->cl_next_free_rec)); 801 return best; 802 } 803 804 static int ocfs2_relink_block_group(handle_t *handle, 805 struct inode *alloc_inode, 806 struct buffer_head *fe_bh, 807 struct buffer_head *bg_bh, 808 struct buffer_head *prev_bg_bh, 809 u16 chain) 810 { 811 int status; 812 /* there is a really tiny chance the journal calls could fail, 813 * but we wouldn't want inconsistent blocks in *any* case. */ 814 u64 fe_ptr, bg_ptr, prev_bg_ptr; 815 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data; 816 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; 817 struct ocfs2_group_desc *prev_bg = (struct ocfs2_group_desc *) prev_bg_bh->b_data; 818 819 if (!OCFS2_IS_VALID_DINODE(fe)) { 820 OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe); 821 status = -EIO; 822 goto out; 823 } 824 if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { 825 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg); 826 status = -EIO; 827 goto out; 828 } 829 if (!OCFS2_IS_VALID_GROUP_DESC(prev_bg)) { 830 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, prev_bg); 831 status = -EIO; 832 goto out; 833 } 834 835 mlog(0, "Suballoc %llu, chain %u, move group %llu to top, prev = %llu\n", 836 (unsigned long long)le64_to_cpu(fe->i_blkno), chain, 837 (unsigned long long)le64_to_cpu(bg->bg_blkno), 838 (unsigned long long)le64_to_cpu(prev_bg->bg_blkno)); 839 840 fe_ptr = le64_to_cpu(fe->id2.i_chain.cl_recs[chain].c_blkno); 841 bg_ptr = le64_to_cpu(bg->bg_next_group); 842 prev_bg_ptr = le64_to_cpu(prev_bg->bg_next_group); 843 844 status = ocfs2_journal_access(handle, alloc_inode, prev_bg_bh, 845 OCFS2_JOURNAL_ACCESS_WRITE); 846 if (status < 0) { 847 mlog_errno(status); 848 goto out_rollback; 849 } 850 851 prev_bg->bg_next_group = bg->bg_next_group; 852 853 status = ocfs2_journal_dirty(handle, prev_bg_bh); 854 if (status < 0) { 855 mlog_errno(status); 856 goto out_rollback; 857 } 858 859 status = ocfs2_journal_access(handle, alloc_inode, bg_bh, 860 OCFS2_JOURNAL_ACCESS_WRITE); 861 if (status < 0) { 862 mlog_errno(status); 863 goto out_rollback; 864 } 865 866 bg->bg_next_group = fe->id2.i_chain.cl_recs[chain].c_blkno; 867 868 status = ocfs2_journal_dirty(handle, bg_bh); 869 if (status < 0) { 870 mlog_errno(status); 871 goto out_rollback; 872 } 873 874 status = ocfs2_journal_access(handle, alloc_inode, fe_bh, 875 OCFS2_JOURNAL_ACCESS_WRITE); 876 if (status < 0) { 877 mlog_errno(status); 878 goto out_rollback; 879 } 880 881 fe->id2.i_chain.cl_recs[chain].c_blkno = bg->bg_blkno; 882 883 status = ocfs2_journal_dirty(handle, fe_bh); 884 if (status < 0) { 885 mlog_errno(status); 886 goto out_rollback; 887 } 888 889 status = 0; 890 out_rollback: 891 if (status < 0) { 892 fe->id2.i_chain.cl_recs[chain].c_blkno = cpu_to_le64(fe_ptr); 893 bg->bg_next_group = cpu_to_le64(bg_ptr); 894 prev_bg->bg_next_group = cpu_to_le64(prev_bg_ptr); 895 } 896 out: 897 mlog_exit(status); 898 return status; 899 } 900 901 static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg, 902 u32 wanted) 903 { 904 return le16_to_cpu(bg->bg_free_bits_count) > wanted; 905 } 906 907 /* return 0 on success, -ENOSPC to keep searching and any other < 0 908 * value on error. */ 909 static int ocfs2_cluster_group_search(struct inode *inode, 910 struct buffer_head *group_bh, 911 u32 bits_wanted, u32 min_bits, 912 u16 *bit_off, u16 *bits_found) 913 { 914 int search = -ENOSPC; 915 int ret; 916 struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data; 917 u16 tmp_off, tmp_found; 918 unsigned int max_bits, gd_cluster_off; 919 920 BUG_ON(!ocfs2_is_cluster_bitmap(inode)); 921 922 if (gd->bg_free_bits_count) { 923 max_bits = le16_to_cpu(gd->bg_bits); 924 925 /* Tail groups in cluster bitmaps which aren't cpg 926 * aligned are prone to partial extention by a failed 927 * fs resize. If the file system resize never got to 928 * update the dinode cluster count, then we don't want 929 * to trust any clusters past it, regardless of what 930 * the group descriptor says. */ 931 gd_cluster_off = ocfs2_blocks_to_clusters(inode->i_sb, 932 le64_to_cpu(gd->bg_blkno)); 933 if ((gd_cluster_off + max_bits) > 934 OCFS2_I(inode)->ip_clusters) { 935 max_bits = OCFS2_I(inode)->ip_clusters - gd_cluster_off; 936 mlog(0, "Desc %llu, bg_bits %u, clusters %u, use %u\n", 937 (unsigned long long)le64_to_cpu(gd->bg_blkno), 938 le16_to_cpu(gd->bg_bits), 939 OCFS2_I(inode)->ip_clusters, max_bits); 940 } 941 942 ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb), 943 group_bh, bits_wanted, 944 max_bits, 945 &tmp_off, &tmp_found); 946 if (ret) 947 return ret; 948 949 /* ocfs2_block_group_find_clear_bits() might 950 * return success, but we still want to return 951 * -ENOSPC unless it found the minimum number 952 * of bits. */ 953 if (min_bits <= tmp_found) { 954 *bit_off = tmp_off; 955 *bits_found = tmp_found; 956 search = 0; /* success */ 957 } 958 } 959 960 return search; 961 } 962 963 static int ocfs2_block_group_search(struct inode *inode, 964 struct buffer_head *group_bh, 965 u32 bits_wanted, u32 min_bits, 966 u16 *bit_off, u16 *bits_found) 967 { 968 int ret = -ENOSPC; 969 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) group_bh->b_data; 970 971 BUG_ON(min_bits != 1); 972 BUG_ON(ocfs2_is_cluster_bitmap(inode)); 973 974 if (bg->bg_free_bits_count) 975 ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb), 976 group_bh, bits_wanted, 977 le16_to_cpu(bg->bg_bits), 978 bit_off, bits_found); 979 980 return ret; 981 } 982 983 static int ocfs2_alloc_dinode_update_counts(struct inode *inode, 984 handle_t *handle, 985 struct buffer_head *di_bh, 986 u32 num_bits, 987 u16 chain) 988 { 989 int ret; 990 u32 tmp_used; 991 struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data; 992 struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &di->id2.i_chain; 993 994 ret = ocfs2_journal_access(handle, inode, di_bh, 995 OCFS2_JOURNAL_ACCESS_WRITE); 996 if (ret < 0) { 997 mlog_errno(ret); 998 goto out; 999 } 1000 1001 tmp_used = le32_to_cpu(di->id1.bitmap1.i_used); 1002 di->id1.bitmap1.i_used = cpu_to_le32(num_bits + tmp_used); 1003 le32_add_cpu(&cl->cl_recs[chain].c_free, -num_bits); 1004 1005 ret = ocfs2_journal_dirty(handle, di_bh); 1006 if (ret < 0) 1007 mlog_errno(ret); 1008 1009 out: 1010 return ret; 1011 } 1012 1013 static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac, 1014 handle_t *handle, 1015 u32 bits_wanted, 1016 u32 min_bits, 1017 u16 *bit_off, 1018 unsigned int *num_bits, 1019 u64 gd_blkno, 1020 u16 *bits_left) 1021 { 1022 int ret; 1023 u16 found; 1024 struct buffer_head *group_bh = NULL; 1025 struct ocfs2_group_desc *gd; 1026 struct inode *alloc_inode = ac->ac_inode; 1027 1028 ret = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb), gd_blkno, 1029 &group_bh, OCFS2_BH_CACHED, alloc_inode); 1030 if (ret < 0) { 1031 mlog_errno(ret); 1032 return ret; 1033 } 1034 1035 gd = (struct ocfs2_group_desc *) group_bh->b_data; 1036 if (!OCFS2_IS_VALID_GROUP_DESC(gd)) { 1037 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, gd); 1038 ret = -EIO; 1039 goto out; 1040 } 1041 1042 ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits, 1043 bit_off, &found); 1044 if (ret < 0) { 1045 if (ret != -ENOSPC) 1046 mlog_errno(ret); 1047 goto out; 1048 } 1049 1050 *num_bits = found; 1051 1052 ret = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, ac->ac_bh, 1053 *num_bits, 1054 le16_to_cpu(gd->bg_chain)); 1055 if (ret < 0) { 1056 mlog_errno(ret); 1057 goto out; 1058 } 1059 1060 ret = ocfs2_block_group_set_bits(handle, alloc_inode, gd, group_bh, 1061 *bit_off, *num_bits); 1062 if (ret < 0) 1063 mlog_errno(ret); 1064 1065 *bits_left = le16_to_cpu(gd->bg_free_bits_count); 1066 1067 out: 1068 brelse(group_bh); 1069 1070 return ret; 1071 } 1072 1073 static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, 1074 handle_t *handle, 1075 u32 bits_wanted, 1076 u32 min_bits, 1077 u16 *bit_off, 1078 unsigned int *num_bits, 1079 u64 *bg_blkno, 1080 u16 *bits_left) 1081 { 1082 int status; 1083 u16 chain, tmp_bits; 1084 u32 tmp_used; 1085 u64 next_group; 1086 struct inode *alloc_inode = ac->ac_inode; 1087 struct buffer_head *group_bh = NULL; 1088 struct buffer_head *prev_group_bh = NULL; 1089 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) ac->ac_bh->b_data; 1090 struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &fe->id2.i_chain; 1091 struct ocfs2_group_desc *bg; 1092 1093 chain = ac->ac_chain; 1094 mlog(0, "trying to alloc %u bits from chain %u, inode %llu\n", 1095 bits_wanted, chain, 1096 (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno); 1097 1098 status = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb), 1099 le64_to_cpu(cl->cl_recs[chain].c_blkno), 1100 &group_bh, OCFS2_BH_CACHED, alloc_inode); 1101 if (status < 0) { 1102 mlog_errno(status); 1103 goto bail; 1104 } 1105 bg = (struct ocfs2_group_desc *) group_bh->b_data; 1106 status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg); 1107 if (status) { 1108 mlog_errno(status); 1109 goto bail; 1110 } 1111 1112 status = -ENOSPC; 1113 /* for now, the chain search is a bit simplistic. We just use 1114 * the 1st group with any empty bits. */ 1115 while ((status = ac->ac_group_search(alloc_inode, group_bh, 1116 bits_wanted, min_bits, bit_off, 1117 &tmp_bits)) == -ENOSPC) { 1118 if (!bg->bg_next_group) 1119 break; 1120 1121 if (prev_group_bh) { 1122 brelse(prev_group_bh); 1123 prev_group_bh = NULL; 1124 } 1125 next_group = le64_to_cpu(bg->bg_next_group); 1126 prev_group_bh = group_bh; 1127 group_bh = NULL; 1128 status = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb), 1129 next_group, &group_bh, 1130 OCFS2_BH_CACHED, alloc_inode); 1131 if (status < 0) { 1132 mlog_errno(status); 1133 goto bail; 1134 } 1135 bg = (struct ocfs2_group_desc *) group_bh->b_data; 1136 status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg); 1137 if (status) { 1138 mlog_errno(status); 1139 goto bail; 1140 } 1141 } 1142 if (status < 0) { 1143 if (status != -ENOSPC) 1144 mlog_errno(status); 1145 goto bail; 1146 } 1147 1148 mlog(0, "alloc succeeds: we give %u bits from block group %llu\n", 1149 tmp_bits, (unsigned long long)le64_to_cpu(bg->bg_blkno)); 1150 1151 *num_bits = tmp_bits; 1152 1153 BUG_ON(*num_bits == 0); 1154 1155 /* 1156 * Keep track of previous block descriptor read. When 1157 * we find a target, if we have read more than X 1158 * number of descriptors, and the target is reasonably 1159 * empty, relink him to top of his chain. 1160 * 1161 * We've read 0 extra blocks and only send one more to 1162 * the transaction, yet the next guy to search has a 1163 * much easier time. 1164 * 1165 * Do this *after* figuring out how many bits we're taking out 1166 * of our target group. 1167 */ 1168 if (ac->ac_allow_chain_relink && 1169 (prev_group_bh) && 1170 (ocfs2_block_group_reasonably_empty(bg, *num_bits))) { 1171 status = ocfs2_relink_block_group(handle, alloc_inode, 1172 ac->ac_bh, group_bh, 1173 prev_group_bh, chain); 1174 if (status < 0) { 1175 mlog_errno(status); 1176 goto bail; 1177 } 1178 } 1179 1180 /* Ok, claim our bits now: set the info on dinode, chainlist 1181 * and then the group */ 1182 status = ocfs2_journal_access(handle, 1183 alloc_inode, 1184 ac->ac_bh, 1185 OCFS2_JOURNAL_ACCESS_WRITE); 1186 if (status < 0) { 1187 mlog_errno(status); 1188 goto bail; 1189 } 1190 1191 tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used); 1192 fe->id1.bitmap1.i_used = cpu_to_le32(*num_bits + tmp_used); 1193 le32_add_cpu(&cl->cl_recs[chain].c_free, -(*num_bits)); 1194 1195 status = ocfs2_journal_dirty(handle, 1196 ac->ac_bh); 1197 if (status < 0) { 1198 mlog_errno(status); 1199 goto bail; 1200 } 1201 1202 status = ocfs2_block_group_set_bits(handle, 1203 alloc_inode, 1204 bg, 1205 group_bh, 1206 *bit_off, 1207 *num_bits); 1208 if (status < 0) { 1209 mlog_errno(status); 1210 goto bail; 1211 } 1212 1213 mlog(0, "Allocated %u bits from suballocator %llu\n", *num_bits, 1214 (unsigned long long)le64_to_cpu(fe->i_blkno)); 1215 1216 *bg_blkno = le64_to_cpu(bg->bg_blkno); 1217 *bits_left = le16_to_cpu(bg->bg_free_bits_count); 1218 bail: 1219 if (group_bh) 1220 brelse(group_bh); 1221 if (prev_group_bh) 1222 brelse(prev_group_bh); 1223 1224 mlog_exit(status); 1225 return status; 1226 } 1227 1228 /* will give out up to bits_wanted contiguous bits. */ 1229 static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, 1230 struct ocfs2_alloc_context *ac, 1231 handle_t *handle, 1232 u32 bits_wanted, 1233 u32 min_bits, 1234 u16 *bit_off, 1235 unsigned int *num_bits, 1236 u64 *bg_blkno) 1237 { 1238 int status; 1239 u16 victim, i; 1240 u16 bits_left = 0; 1241 u64 hint_blkno = ac->ac_last_group; 1242 struct ocfs2_chain_list *cl; 1243 struct ocfs2_dinode *fe; 1244 1245 mlog_entry_void(); 1246 1247 BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted); 1248 BUG_ON(bits_wanted > (ac->ac_bits_wanted - ac->ac_bits_given)); 1249 BUG_ON(!ac->ac_bh); 1250 1251 fe = (struct ocfs2_dinode *) ac->ac_bh->b_data; 1252 if (!OCFS2_IS_VALID_DINODE(fe)) { 1253 OCFS2_RO_ON_INVALID_DINODE(osb->sb, fe); 1254 status = -EIO; 1255 goto bail; 1256 } 1257 if (le32_to_cpu(fe->id1.bitmap1.i_used) >= 1258 le32_to_cpu(fe->id1.bitmap1.i_total)) { 1259 ocfs2_error(osb->sb, "Chain allocator dinode %llu has %u used " 1260 "bits but only %u total.", 1261 (unsigned long long)le64_to_cpu(fe->i_blkno), 1262 le32_to_cpu(fe->id1.bitmap1.i_used), 1263 le32_to_cpu(fe->id1.bitmap1.i_total)); 1264 status = -EIO; 1265 goto bail; 1266 } 1267 1268 if (hint_blkno) { 1269 /* Attempt to short-circuit the usual search mechanism 1270 * by jumping straight to the most recently used 1271 * allocation group. This helps us mantain some 1272 * contiguousness across allocations. */ 1273 status = ocfs2_search_one_group(ac, handle, bits_wanted, 1274 min_bits, bit_off, num_bits, 1275 hint_blkno, &bits_left); 1276 if (!status) { 1277 /* Be careful to update *bg_blkno here as the 1278 * caller is expecting it to be filled in, and 1279 * ocfs2_search_one_group() won't do that for 1280 * us. */ 1281 *bg_blkno = hint_blkno; 1282 goto set_hint; 1283 } 1284 if (status < 0 && status != -ENOSPC) { 1285 mlog_errno(status); 1286 goto bail; 1287 } 1288 } 1289 1290 cl = (struct ocfs2_chain_list *) &fe->id2.i_chain; 1291 1292 victim = ocfs2_find_victim_chain(cl); 1293 ac->ac_chain = victim; 1294 ac->ac_allow_chain_relink = 1; 1295 1296 status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, bit_off, 1297 num_bits, bg_blkno, &bits_left); 1298 if (!status) 1299 goto set_hint; 1300 if (status < 0 && status != -ENOSPC) { 1301 mlog_errno(status); 1302 goto bail; 1303 } 1304 1305 mlog(0, "Search of victim chain %u came up with nothing, " 1306 "trying all chains now.\n", victim); 1307 1308 /* If we didn't pick a good victim, then just default to 1309 * searching each chain in order. Don't allow chain relinking 1310 * because we only calculate enough journal credits for one 1311 * relink per alloc. */ 1312 ac->ac_allow_chain_relink = 0; 1313 for (i = 0; i < le16_to_cpu(cl->cl_next_free_rec); i ++) { 1314 if (i == victim) 1315 continue; 1316 if (!cl->cl_recs[i].c_free) 1317 continue; 1318 1319 ac->ac_chain = i; 1320 status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, 1321 bit_off, num_bits, bg_blkno, 1322 &bits_left); 1323 if (!status) 1324 break; 1325 if (status < 0 && status != -ENOSPC) { 1326 mlog_errno(status); 1327 goto bail; 1328 } 1329 } 1330 1331 set_hint: 1332 if (status != -ENOSPC) { 1333 /* If the next search of this group is not likely to 1334 * yield a suitable extent, then we reset the last 1335 * group hint so as to not waste a disk read */ 1336 if (bits_left < min_bits) 1337 ac->ac_last_group = 0; 1338 else 1339 ac->ac_last_group = *bg_blkno; 1340 } 1341 1342 bail: 1343 mlog_exit(status); 1344 return status; 1345 } 1346 1347 int ocfs2_claim_metadata(struct ocfs2_super *osb, 1348 handle_t *handle, 1349 struct ocfs2_alloc_context *ac, 1350 u32 bits_wanted, 1351 u16 *suballoc_bit_start, 1352 unsigned int *num_bits, 1353 u64 *blkno_start) 1354 { 1355 int status; 1356 u64 bg_blkno; 1357 1358 BUG_ON(!ac); 1359 BUG_ON(ac->ac_bits_wanted < (ac->ac_bits_given + bits_wanted)); 1360 BUG_ON(ac->ac_which != OCFS2_AC_USE_META); 1361 1362 status = ocfs2_claim_suballoc_bits(osb, 1363 ac, 1364 handle, 1365 bits_wanted, 1366 1, 1367 suballoc_bit_start, 1368 num_bits, 1369 &bg_blkno); 1370 if (status < 0) { 1371 mlog_errno(status); 1372 goto bail; 1373 } 1374 atomic_inc(&osb->alloc_stats.bg_allocs); 1375 1376 *blkno_start = bg_blkno + (u64) *suballoc_bit_start; 1377 ac->ac_bits_given += (*num_bits); 1378 status = 0; 1379 bail: 1380 mlog_exit(status); 1381 return status; 1382 } 1383 1384 int ocfs2_claim_new_inode(struct ocfs2_super *osb, 1385 handle_t *handle, 1386 struct ocfs2_alloc_context *ac, 1387 u16 *suballoc_bit, 1388 u64 *fe_blkno) 1389 { 1390 int status; 1391 unsigned int num_bits; 1392 u64 bg_blkno; 1393 1394 mlog_entry_void(); 1395 1396 BUG_ON(!ac); 1397 BUG_ON(ac->ac_bits_given != 0); 1398 BUG_ON(ac->ac_bits_wanted != 1); 1399 BUG_ON(ac->ac_which != OCFS2_AC_USE_INODE); 1400 1401 status = ocfs2_claim_suballoc_bits(osb, 1402 ac, 1403 handle, 1404 1, 1405 1, 1406 suballoc_bit, 1407 &num_bits, 1408 &bg_blkno); 1409 if (status < 0) { 1410 mlog_errno(status); 1411 goto bail; 1412 } 1413 atomic_inc(&osb->alloc_stats.bg_allocs); 1414 1415 BUG_ON(num_bits != 1); 1416 1417 *fe_blkno = bg_blkno + (u64) (*suballoc_bit); 1418 ac->ac_bits_given++; 1419 status = 0; 1420 bail: 1421 mlog_exit(status); 1422 return status; 1423 } 1424 1425 /* translate a group desc. blkno and it's bitmap offset into 1426 * disk cluster offset. */ 1427 static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode, 1428 u64 bg_blkno, 1429 u16 bg_bit_off) 1430 { 1431 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1432 u32 cluster = 0; 1433 1434 BUG_ON(!ocfs2_is_cluster_bitmap(inode)); 1435 1436 if (bg_blkno != osb->first_cluster_group_blkno) 1437 cluster = ocfs2_blocks_to_clusters(inode->i_sb, bg_blkno); 1438 cluster += (u32) bg_bit_off; 1439 return cluster; 1440 } 1441 1442 /* given a cluster offset, calculate which block group it belongs to 1443 * and return that block offset. */ 1444 u64 ocfs2_which_cluster_group(struct inode *inode, u32 cluster) 1445 { 1446 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1447 u32 group_no; 1448 1449 BUG_ON(!ocfs2_is_cluster_bitmap(inode)); 1450 1451 group_no = cluster / osb->bitmap_cpg; 1452 if (!group_no) 1453 return osb->first_cluster_group_blkno; 1454 return ocfs2_clusters_to_blocks(inode->i_sb, 1455 group_no * osb->bitmap_cpg); 1456 } 1457 1458 /* given the block number of a cluster start, calculate which cluster 1459 * group and descriptor bitmap offset that corresponds to. */ 1460 static inline void ocfs2_block_to_cluster_group(struct inode *inode, 1461 u64 data_blkno, 1462 u64 *bg_blkno, 1463 u16 *bg_bit_off) 1464 { 1465 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1466 u32 data_cluster = ocfs2_blocks_to_clusters(osb->sb, data_blkno); 1467 1468 BUG_ON(!ocfs2_is_cluster_bitmap(inode)); 1469 1470 *bg_blkno = ocfs2_which_cluster_group(inode, 1471 data_cluster); 1472 1473 if (*bg_blkno == osb->first_cluster_group_blkno) 1474 *bg_bit_off = (u16) data_cluster; 1475 else 1476 *bg_bit_off = (u16) ocfs2_blocks_to_clusters(osb->sb, 1477 data_blkno - *bg_blkno); 1478 } 1479 1480 /* 1481 * min_bits - minimum contiguous chunk from this total allocation we 1482 * can handle. set to what we asked for originally for a full 1483 * contig. allocation, set to '1' to indicate we can deal with extents 1484 * of any size. 1485 */ 1486 int __ocfs2_claim_clusters(struct ocfs2_super *osb, 1487 handle_t *handle, 1488 struct ocfs2_alloc_context *ac, 1489 u32 min_clusters, 1490 u32 max_clusters, 1491 u32 *cluster_start, 1492 u32 *num_clusters) 1493 { 1494 int status; 1495 unsigned int bits_wanted = max_clusters; 1496 u64 bg_blkno = 0; 1497 u16 bg_bit_off; 1498 1499 mlog_entry_void(); 1500 1501 BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted); 1502 1503 BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL 1504 && ac->ac_which != OCFS2_AC_USE_MAIN); 1505 1506 if (ac->ac_which == OCFS2_AC_USE_LOCAL) { 1507 status = ocfs2_claim_local_alloc_bits(osb, 1508 handle, 1509 ac, 1510 bits_wanted, 1511 cluster_start, 1512 num_clusters); 1513 if (!status) 1514 atomic_inc(&osb->alloc_stats.local_data); 1515 } else { 1516 if (min_clusters > (osb->bitmap_cpg - 1)) { 1517 /* The only paths asking for contiguousness 1518 * should know about this already. */ 1519 mlog(ML_ERROR, "minimum allocation requested %u exceeds " 1520 "group bitmap size %u!\n", min_clusters, 1521 osb->bitmap_cpg); 1522 status = -ENOSPC; 1523 goto bail; 1524 } 1525 /* clamp the current request down to a realistic size. */ 1526 if (bits_wanted > (osb->bitmap_cpg - 1)) 1527 bits_wanted = osb->bitmap_cpg - 1; 1528 1529 status = ocfs2_claim_suballoc_bits(osb, 1530 ac, 1531 handle, 1532 bits_wanted, 1533 min_clusters, 1534 &bg_bit_off, 1535 num_clusters, 1536 &bg_blkno); 1537 if (!status) { 1538 *cluster_start = 1539 ocfs2_desc_bitmap_to_cluster_off(ac->ac_inode, 1540 bg_blkno, 1541 bg_bit_off); 1542 atomic_inc(&osb->alloc_stats.bitmap_data); 1543 } 1544 } 1545 if (status < 0) { 1546 if (status != -ENOSPC) 1547 mlog_errno(status); 1548 goto bail; 1549 } 1550 1551 ac->ac_bits_given += *num_clusters; 1552 1553 bail: 1554 mlog_exit(status); 1555 return status; 1556 } 1557 1558 int ocfs2_claim_clusters(struct ocfs2_super *osb, 1559 handle_t *handle, 1560 struct ocfs2_alloc_context *ac, 1561 u32 min_clusters, 1562 u32 *cluster_start, 1563 u32 *num_clusters) 1564 { 1565 unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given; 1566 1567 return __ocfs2_claim_clusters(osb, handle, ac, min_clusters, 1568 bits_wanted, cluster_start, num_clusters); 1569 } 1570 1571 static inline int ocfs2_block_group_clear_bits(handle_t *handle, 1572 struct inode *alloc_inode, 1573 struct ocfs2_group_desc *bg, 1574 struct buffer_head *group_bh, 1575 unsigned int bit_off, 1576 unsigned int num_bits) 1577 { 1578 int status; 1579 unsigned int tmp; 1580 int journal_type = OCFS2_JOURNAL_ACCESS_WRITE; 1581 struct ocfs2_group_desc *undo_bg = NULL; 1582 1583 mlog_entry_void(); 1584 1585 if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { 1586 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg); 1587 status = -EIO; 1588 goto bail; 1589 } 1590 1591 mlog(0, "off = %u, num = %u\n", bit_off, num_bits); 1592 1593 if (ocfs2_is_cluster_bitmap(alloc_inode)) 1594 journal_type = OCFS2_JOURNAL_ACCESS_UNDO; 1595 1596 status = ocfs2_journal_access(handle, alloc_inode, group_bh, 1597 journal_type); 1598 if (status < 0) { 1599 mlog_errno(status); 1600 goto bail; 1601 } 1602 1603 if (ocfs2_is_cluster_bitmap(alloc_inode)) 1604 undo_bg = (struct ocfs2_group_desc *) bh2jh(group_bh)->b_committed_data; 1605 1606 tmp = num_bits; 1607 while(tmp--) { 1608 ocfs2_clear_bit((bit_off + tmp), 1609 (unsigned long *) bg->bg_bitmap); 1610 if (ocfs2_is_cluster_bitmap(alloc_inode)) 1611 ocfs2_set_bit(bit_off + tmp, 1612 (unsigned long *) undo_bg->bg_bitmap); 1613 } 1614 le16_add_cpu(&bg->bg_free_bits_count, num_bits); 1615 1616 status = ocfs2_journal_dirty(handle, group_bh); 1617 if (status < 0) 1618 mlog_errno(status); 1619 bail: 1620 return status; 1621 } 1622 1623 /* 1624 * expects the suballoc inode to already be locked. 1625 */ 1626 int ocfs2_free_suballoc_bits(handle_t *handle, 1627 struct inode *alloc_inode, 1628 struct buffer_head *alloc_bh, 1629 unsigned int start_bit, 1630 u64 bg_blkno, 1631 unsigned int count) 1632 { 1633 int status = 0; 1634 u32 tmp_used; 1635 struct ocfs2_super *osb = OCFS2_SB(alloc_inode->i_sb); 1636 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) alloc_bh->b_data; 1637 struct ocfs2_chain_list *cl = &fe->id2.i_chain; 1638 struct buffer_head *group_bh = NULL; 1639 struct ocfs2_group_desc *group; 1640 1641 mlog_entry_void(); 1642 1643 if (!OCFS2_IS_VALID_DINODE(fe)) { 1644 OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe); 1645 status = -EIO; 1646 goto bail; 1647 } 1648 BUG_ON((count + start_bit) > ocfs2_bits_per_group(cl)); 1649 1650 mlog(0, "%llu: freeing %u bits from group %llu, starting at %u\n", 1651 (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno, count, 1652 (unsigned long long)bg_blkno, start_bit); 1653 1654 status = ocfs2_read_block(osb, bg_blkno, &group_bh, OCFS2_BH_CACHED, 1655 alloc_inode); 1656 if (status < 0) { 1657 mlog_errno(status); 1658 goto bail; 1659 } 1660 1661 group = (struct ocfs2_group_desc *) group_bh->b_data; 1662 status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, group); 1663 if (status) { 1664 mlog_errno(status); 1665 goto bail; 1666 } 1667 BUG_ON((count + start_bit) > le16_to_cpu(group->bg_bits)); 1668 1669 status = ocfs2_block_group_clear_bits(handle, alloc_inode, 1670 group, group_bh, 1671 start_bit, count); 1672 if (status < 0) { 1673 mlog_errno(status); 1674 goto bail; 1675 } 1676 1677 status = ocfs2_journal_access(handle, alloc_inode, alloc_bh, 1678 OCFS2_JOURNAL_ACCESS_WRITE); 1679 if (status < 0) { 1680 mlog_errno(status); 1681 goto bail; 1682 } 1683 1684 le32_add_cpu(&cl->cl_recs[le16_to_cpu(group->bg_chain)].c_free, 1685 count); 1686 tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used); 1687 fe->id1.bitmap1.i_used = cpu_to_le32(tmp_used - count); 1688 1689 status = ocfs2_journal_dirty(handle, alloc_bh); 1690 if (status < 0) { 1691 mlog_errno(status); 1692 goto bail; 1693 } 1694 1695 bail: 1696 if (group_bh) 1697 brelse(group_bh); 1698 1699 mlog_exit(status); 1700 return status; 1701 } 1702 1703 int ocfs2_free_dinode(handle_t *handle, 1704 struct inode *inode_alloc_inode, 1705 struct buffer_head *inode_alloc_bh, 1706 struct ocfs2_dinode *di) 1707 { 1708 u64 blk = le64_to_cpu(di->i_blkno); 1709 u16 bit = le16_to_cpu(di->i_suballoc_bit); 1710 u64 bg_blkno = ocfs2_which_suballoc_group(blk, bit); 1711 1712 return ocfs2_free_suballoc_bits(handle, inode_alloc_inode, 1713 inode_alloc_bh, bit, bg_blkno, 1); 1714 } 1715 1716 int ocfs2_free_clusters(handle_t *handle, 1717 struct inode *bitmap_inode, 1718 struct buffer_head *bitmap_bh, 1719 u64 start_blk, 1720 unsigned int num_clusters) 1721 { 1722 int status; 1723 u16 bg_start_bit; 1724 u64 bg_blkno; 1725 struct ocfs2_dinode *fe; 1726 1727 /* You can't ever have a contiguous set of clusters 1728 * bigger than a block group bitmap so we never have to worry 1729 * about looping on them. */ 1730 1731 mlog_entry_void(); 1732 1733 /* This is expensive. We can safely remove once this stuff has 1734 * gotten tested really well. */ 1735 BUG_ON(start_blk != ocfs2_clusters_to_blocks(bitmap_inode->i_sb, ocfs2_blocks_to_clusters(bitmap_inode->i_sb, start_blk))); 1736 1737 fe = (struct ocfs2_dinode *) bitmap_bh->b_data; 1738 1739 ocfs2_block_to_cluster_group(bitmap_inode, start_blk, &bg_blkno, 1740 &bg_start_bit); 1741 1742 mlog(0, "want to free %u clusters starting at block %llu\n", 1743 num_clusters, (unsigned long long)start_blk); 1744 mlog(0, "bg_blkno = %llu, bg_start_bit = %u\n", 1745 (unsigned long long)bg_blkno, bg_start_bit); 1746 1747 status = ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh, 1748 bg_start_bit, bg_blkno, 1749 num_clusters); 1750 if (status < 0) 1751 mlog_errno(status); 1752 1753 mlog_exit(status); 1754 return status; 1755 } 1756 1757 static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg) 1758 { 1759 printk("Block Group:\n"); 1760 printk("bg_signature: %s\n", bg->bg_signature); 1761 printk("bg_size: %u\n", bg->bg_size); 1762 printk("bg_bits: %u\n", bg->bg_bits); 1763 printk("bg_free_bits_count: %u\n", bg->bg_free_bits_count); 1764 printk("bg_chain: %u\n", bg->bg_chain); 1765 printk("bg_generation: %u\n", le32_to_cpu(bg->bg_generation)); 1766 printk("bg_next_group: %llu\n", 1767 (unsigned long long)bg->bg_next_group); 1768 printk("bg_parent_dinode: %llu\n", 1769 (unsigned long long)bg->bg_parent_dinode); 1770 printk("bg_blkno: %llu\n", 1771 (unsigned long long)bg->bg_blkno); 1772 } 1773 1774 static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe) 1775 { 1776 int i; 1777 1778 printk("Suballoc Inode %llu:\n", (unsigned long long)fe->i_blkno); 1779 printk("i_signature: %s\n", fe->i_signature); 1780 printk("i_size: %llu\n", 1781 (unsigned long long)fe->i_size); 1782 printk("i_clusters: %u\n", fe->i_clusters); 1783 printk("i_generation: %u\n", 1784 le32_to_cpu(fe->i_generation)); 1785 printk("id1.bitmap1.i_used: %u\n", 1786 le32_to_cpu(fe->id1.bitmap1.i_used)); 1787 printk("id1.bitmap1.i_total: %u\n", 1788 le32_to_cpu(fe->id1.bitmap1.i_total)); 1789 printk("id2.i_chain.cl_cpg: %u\n", fe->id2.i_chain.cl_cpg); 1790 printk("id2.i_chain.cl_bpc: %u\n", fe->id2.i_chain.cl_bpc); 1791 printk("id2.i_chain.cl_count: %u\n", fe->id2.i_chain.cl_count); 1792 printk("id2.i_chain.cl_next_free_rec: %u\n", 1793 fe->id2.i_chain.cl_next_free_rec); 1794 for(i = 0; i < fe->id2.i_chain.cl_next_free_rec; i++) { 1795 printk("fe->id2.i_chain.cl_recs[%d].c_free: %u\n", i, 1796 fe->id2.i_chain.cl_recs[i].c_free); 1797 printk("fe->id2.i_chain.cl_recs[%d].c_total: %u\n", i, 1798 fe->id2.i_chain.cl_recs[i].c_total); 1799 printk("fe->id2.i_chain.cl_recs[%d].c_blkno: %llu\n", i, 1800 (unsigned long long)fe->id2.i_chain.cl_recs[i].c_blkno); 1801 } 1802 } 1803