1 /* 2 * linux/fs/ext4/balloc.c 3 * 4 * Copyright (C) 1992, 1993, 1994, 1995 5 * Remy Card (card@masi.ibp.fr) 6 * Laboratoire MASI - Institut Blaise Pascal 7 * Universite Pierre et Marie Curie (Paris VI) 8 * 9 * Enhanced block allocation by Stephen Tweedie (sct@redhat.com), 1993 10 * Big-endian to little-endian byte-swapping/bitmaps by 11 * David S. Miller (davem@caip.rutgers.edu), 1995 12 */ 13 14 #include <linux/time.h> 15 #include <linux/capability.h> 16 #include <linux/fs.h> 17 #include <linux/jbd2.h> 18 #include <linux/quotaops.h> 19 #include <linux/buffer_head.h> 20 #include "ext4.h" 21 #include "ext4_jbd2.h" 22 #include "mballoc.h" 23 24 #include <trace/events/ext4.h> 25 26 /* 27 * balloc.c contains the blocks allocation and deallocation routines 28 */ 29 30 /* 31 * Calculate the block group number and offset, given a block number 32 */ 33 void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, 34 ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp) 35 { 36 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 37 ext4_grpblk_t offset; 38 39 blocknr = blocknr - le32_to_cpu(es->s_first_data_block); 40 offset = do_div(blocknr, EXT4_BLOCKS_PER_GROUP(sb)); 41 if (offsetp) 42 *offsetp = offset; 43 if (blockgrpp) 44 *blockgrpp = blocknr; 45 46 } 47 48 static int ext4_block_in_group(struct super_block *sb, ext4_fsblk_t block, 49 ext4_group_t block_group) 50 { 51 ext4_group_t actual_group; 52 ext4_get_group_no_and_offset(sb, block, &actual_group, NULL); 53 if (actual_group == block_group) 54 return 1; 55 return 0; 56 } 57 58 static int ext4_group_used_meta_blocks(struct super_block *sb, 59 ext4_group_t block_group, 60 struct ext4_group_desc *gdp) 61 { 62 ext4_fsblk_t tmp; 63 struct ext4_sb_info *sbi = EXT4_SB(sb); 64 /* block bitmap, inode bitmap, and inode table blocks */ 65 int used_blocks = sbi->s_itb_per_group + 2; 66 67 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) { 68 if (!ext4_block_in_group(sb, ext4_block_bitmap(sb, gdp), 69 block_group)) 70 used_blocks--; 71 72 if (!ext4_block_in_group(sb, ext4_inode_bitmap(sb, gdp), 73 block_group)) 74 used_blocks--; 75 76 tmp = ext4_inode_table(sb, gdp); 77 for (; tmp < ext4_inode_table(sb, gdp) + 78 sbi->s_itb_per_group; tmp++) { 79 if (!ext4_block_in_group(sb, tmp, block_group)) 80 used_blocks -= 1; 81 } 82 } 83 return used_blocks; 84 } 85 86 /* Initializes an uninitialized block bitmap if given, and returns the 87 * number of blocks free in the group. */ 88 unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, 89 ext4_group_t block_group, struct ext4_group_desc *gdp) 90 { 91 int bit, bit_max; 92 ext4_group_t ngroups = ext4_get_groups_count(sb); 93 unsigned free_blocks, group_blocks; 94 struct ext4_sb_info *sbi = EXT4_SB(sb); 95 96 if (bh) { 97 J_ASSERT_BH(bh, buffer_locked(bh)); 98 99 /* If checksum is bad mark all blocks used to prevent allocation 100 * essentially implementing a per-group read-only flag. */ 101 if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { 102 ext4_error(sb, "Checksum bad for group %u", 103 block_group); 104 ext4_free_blks_set(sb, gdp, 0); 105 ext4_free_inodes_set(sb, gdp, 0); 106 ext4_itable_unused_set(sb, gdp, 0); 107 memset(bh->b_data, 0xff, sb->s_blocksize); 108 return 0; 109 } 110 memset(bh->b_data, 0, sb->s_blocksize); 111 } 112 113 /* Check for superblock and gdt backups in this group */ 114 bit_max = ext4_bg_has_super(sb, block_group); 115 116 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) || 117 block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg) * 118 sbi->s_desc_per_block) { 119 if (bit_max) { 120 bit_max += ext4_bg_num_gdb(sb, block_group); 121 bit_max += 122 le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks); 123 } 124 } else { /* For META_BG_BLOCK_GROUPS */ 125 bit_max += ext4_bg_num_gdb(sb, block_group); 126 } 127 128 if (block_group == ngroups - 1) { 129 /* 130 * Even though mke2fs always initialize first and last group 131 * if some other tool enabled the EXT4_BG_BLOCK_UNINIT we need 132 * to make sure we calculate the right free blocks 133 */ 134 group_blocks = ext4_blocks_count(sbi->s_es) - 135 ext4_group_first_block_no(sb, ngroups - 1); 136 } else { 137 group_blocks = EXT4_BLOCKS_PER_GROUP(sb); 138 } 139 140 free_blocks = group_blocks - bit_max; 141 142 if (bh) { 143 ext4_fsblk_t start, tmp; 144 int flex_bg = 0; 145 146 for (bit = 0; bit < bit_max; bit++) 147 ext4_set_bit(bit, bh->b_data); 148 149 start = ext4_group_first_block_no(sb, block_group); 150 151 if (EXT4_HAS_INCOMPAT_FEATURE(sb, 152 EXT4_FEATURE_INCOMPAT_FLEX_BG)) 153 flex_bg = 1; 154 155 /* Set bits for block and inode bitmaps, and inode table */ 156 tmp = ext4_block_bitmap(sb, gdp); 157 if (!flex_bg || ext4_block_in_group(sb, tmp, block_group)) 158 ext4_set_bit(tmp - start, bh->b_data); 159 160 tmp = ext4_inode_bitmap(sb, gdp); 161 if (!flex_bg || ext4_block_in_group(sb, tmp, block_group)) 162 ext4_set_bit(tmp - start, bh->b_data); 163 164 tmp = ext4_inode_table(sb, gdp); 165 for (; tmp < ext4_inode_table(sb, gdp) + 166 sbi->s_itb_per_group; tmp++) { 167 if (!flex_bg || 168 ext4_block_in_group(sb, tmp, block_group)) 169 ext4_set_bit(tmp - start, bh->b_data); 170 } 171 /* 172 * Also if the number of blocks within the group is 173 * less than the blocksize * 8 ( which is the size 174 * of bitmap ), set rest of the block bitmap to 1 175 */ 176 ext4_mark_bitmap_end(group_blocks, sb->s_blocksize * 8, 177 bh->b_data); 178 } 179 return free_blocks - ext4_group_used_meta_blocks(sb, block_group, gdp); 180 } 181 182 183 /* 184 * The free blocks are managed by bitmaps. A file system contains several 185 * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap 186 * block for inodes, N blocks for the inode table and data blocks. 187 * 188 * The file system contains group descriptors which are located after the 189 * super block. Each descriptor contains the number of the bitmap block and 190 * the free blocks count in the block. The descriptors are loaded in memory 191 * when a file system is mounted (see ext4_fill_super). 192 */ 193 194 /** 195 * ext4_get_group_desc() -- load group descriptor from disk 196 * @sb: super block 197 * @block_group: given block group 198 * @bh: pointer to the buffer head to store the block 199 * group descriptor 200 */ 201 struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, 202 ext4_group_t block_group, 203 struct buffer_head **bh) 204 { 205 unsigned int group_desc; 206 unsigned int offset; 207 ext4_group_t ngroups = ext4_get_groups_count(sb); 208 struct ext4_group_desc *desc; 209 struct ext4_sb_info *sbi = EXT4_SB(sb); 210 211 if (block_group >= ngroups) { 212 ext4_error(sb, "block_group >= groups_count - block_group = %u," 213 " groups_count = %u", block_group, ngroups); 214 215 return NULL; 216 } 217 218 group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb); 219 offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1); 220 if (!sbi->s_group_desc[group_desc]) { 221 ext4_error(sb, "Group descriptor not loaded - " 222 "block_group = %u, group_desc = %u, desc = %u", 223 block_group, group_desc, offset); 224 return NULL; 225 } 226 227 desc = (struct ext4_group_desc *)( 228 (__u8 *)sbi->s_group_desc[group_desc]->b_data + 229 offset * EXT4_DESC_SIZE(sb)); 230 if (bh) 231 *bh = sbi->s_group_desc[group_desc]; 232 return desc; 233 } 234 235 static int ext4_valid_block_bitmap(struct super_block *sb, 236 struct ext4_group_desc *desc, 237 unsigned int block_group, 238 struct buffer_head *bh) 239 { 240 ext4_grpblk_t offset; 241 ext4_grpblk_t next_zero_bit; 242 ext4_fsblk_t bitmap_blk; 243 ext4_fsblk_t group_first_block; 244 245 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) { 246 /* with FLEX_BG, the inode/block bitmaps and itable 247 * blocks may not be in the group at all 248 * so the bitmap validation will be skipped for those groups 249 * or it has to also read the block group where the bitmaps 250 * are located to verify they are set. 251 */ 252 return 1; 253 } 254 group_first_block = ext4_group_first_block_no(sb, block_group); 255 256 /* check whether block bitmap block number is set */ 257 bitmap_blk = ext4_block_bitmap(sb, desc); 258 offset = bitmap_blk - group_first_block; 259 if (!ext4_test_bit(offset, bh->b_data)) 260 /* bad block bitmap */ 261 goto err_out; 262 263 /* check whether the inode bitmap block number is set */ 264 bitmap_blk = ext4_inode_bitmap(sb, desc); 265 offset = bitmap_blk - group_first_block; 266 if (!ext4_test_bit(offset, bh->b_data)) 267 /* bad block bitmap */ 268 goto err_out; 269 270 /* check whether the inode table block number is set */ 271 bitmap_blk = ext4_inode_table(sb, desc); 272 offset = bitmap_blk - group_first_block; 273 next_zero_bit = ext4_find_next_zero_bit(bh->b_data, 274 offset + EXT4_SB(sb)->s_itb_per_group, 275 offset); 276 if (next_zero_bit >= offset + EXT4_SB(sb)->s_itb_per_group) 277 /* good bitmap for inode tables */ 278 return 1; 279 280 err_out: 281 ext4_error(sb, "Invalid block bitmap - block_group = %d, block = %llu", 282 block_group, bitmap_blk); 283 return 0; 284 } 285 /** 286 * ext4_read_block_bitmap() 287 * @sb: super block 288 * @block_group: given block group 289 * 290 * Read the bitmap for a given block_group,and validate the 291 * bits for block/inode/inode tables are set in the bitmaps 292 * 293 * Return buffer_head on success or NULL in case of failure. 294 */ 295 struct buffer_head * 296 ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) 297 { 298 struct ext4_group_desc *desc; 299 struct buffer_head *bh = NULL; 300 ext4_fsblk_t bitmap_blk; 301 302 desc = ext4_get_group_desc(sb, block_group, NULL); 303 if (!desc) 304 return NULL; 305 bitmap_blk = ext4_block_bitmap(sb, desc); 306 bh = sb_getblk(sb, bitmap_blk); 307 if (unlikely(!bh)) { 308 ext4_error(sb, "Cannot read block bitmap - " 309 "block_group = %u, block_bitmap = %llu", 310 block_group, bitmap_blk); 311 return NULL; 312 } 313 314 if (bitmap_uptodate(bh)) 315 return bh; 316 317 lock_buffer(bh); 318 if (bitmap_uptodate(bh)) { 319 unlock_buffer(bh); 320 return bh; 321 } 322 ext4_lock_group(sb, block_group); 323 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 324 ext4_init_block_bitmap(sb, bh, block_group, desc); 325 set_bitmap_uptodate(bh); 326 set_buffer_uptodate(bh); 327 ext4_unlock_group(sb, block_group); 328 unlock_buffer(bh); 329 return bh; 330 } 331 ext4_unlock_group(sb, block_group); 332 if (buffer_uptodate(bh)) { 333 /* 334 * if not uninit if bh is uptodate, 335 * bitmap is also uptodate 336 */ 337 set_bitmap_uptodate(bh); 338 unlock_buffer(bh); 339 return bh; 340 } 341 /* 342 * submit the buffer_head for read. We can 343 * safely mark the bitmap as uptodate now. 344 * We do it here so the bitmap uptodate bit 345 * get set with buffer lock held. 346 */ 347 trace_ext4_read_block_bitmap_load(sb, block_group); 348 set_bitmap_uptodate(bh); 349 if (bh_submit_read(bh) < 0) { 350 put_bh(bh); 351 ext4_error(sb, "Cannot read block bitmap - " 352 "block_group = %u, block_bitmap = %llu", 353 block_group, bitmap_blk); 354 return NULL; 355 } 356 ext4_valid_block_bitmap(sb, desc, block_group, bh); 357 /* 358 * file system mounted not to panic on error, 359 * continue with corrupt bitmap 360 */ 361 return bh; 362 } 363 364 /** 365 * ext4_has_free_blocks() 366 * @sbi: in-core super block structure. 367 * @nblocks: number of needed blocks 368 * 369 * Check if filesystem has nblocks free & available for allocation. 370 * On success return 1, return 0 on failure. 371 */ 372 static int ext4_has_free_blocks(struct ext4_sb_info *sbi, 373 s64 nblocks, unsigned int flags) 374 { 375 s64 free_blocks, dirty_blocks, root_blocks; 376 struct percpu_counter *fbc = &sbi->s_freeblocks_counter; 377 struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter; 378 379 free_blocks = percpu_counter_read_positive(fbc); 380 dirty_blocks = percpu_counter_read_positive(dbc); 381 root_blocks = ext4_r_blocks_count(sbi->s_es); 382 383 if (free_blocks - (nblocks + root_blocks + dirty_blocks) < 384 EXT4_FREEBLOCKS_WATERMARK) { 385 free_blocks = percpu_counter_sum_positive(fbc); 386 dirty_blocks = percpu_counter_sum_positive(dbc); 387 } 388 /* Check whether we have space after 389 * accounting for current dirty blocks & root reserved blocks. 390 */ 391 if (free_blocks >= ((root_blocks + nblocks) + dirty_blocks)) 392 return 1; 393 394 /* Hm, nope. Are (enough) root reserved blocks available? */ 395 if (sbi->s_resuid == current_fsuid() || 396 ((sbi->s_resgid != 0) && in_group_p(sbi->s_resgid)) || 397 capable(CAP_SYS_RESOURCE) || 398 (flags & EXT4_MB_USE_ROOT_BLOCKS)) { 399 400 if (free_blocks >= (nblocks + dirty_blocks)) 401 return 1; 402 } 403 404 return 0; 405 } 406 407 int ext4_claim_free_blocks(struct ext4_sb_info *sbi, 408 s64 nblocks, unsigned int flags) 409 { 410 if (ext4_has_free_blocks(sbi, nblocks, flags)) { 411 percpu_counter_add(&sbi->s_dirtyblocks_counter, nblocks); 412 return 0; 413 } else 414 return -ENOSPC; 415 } 416 417 /** 418 * ext4_should_retry_alloc() 419 * @sb: super block 420 * @retries number of attemps has been made 421 * 422 * ext4_should_retry_alloc() is called when ENOSPC is returned, and if 423 * it is profitable to retry the operation, this function will wait 424 * for the current or committing transaction to complete, and then 425 * return TRUE. 426 * 427 * if the total number of retries exceed three times, return FALSE. 428 */ 429 int ext4_should_retry_alloc(struct super_block *sb, int *retries) 430 { 431 if (!ext4_has_free_blocks(EXT4_SB(sb), 1, 0) || 432 (*retries)++ > 3 || 433 !EXT4_SB(sb)->s_journal) 434 return 0; 435 436 jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id); 437 438 return jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal); 439 } 440 441 /* 442 * ext4_new_meta_blocks() -- allocate block for meta data (indexing) blocks 443 * 444 * @handle: handle to this transaction 445 * @inode: file inode 446 * @goal: given target block(filesystem wide) 447 * @count: pointer to total number of blocks needed 448 * @errp: error code 449 * 450 * Return 1st allocated block number on success, *count stores total account 451 * error stores in errp pointer 452 */ 453 ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, 454 ext4_fsblk_t goal, unsigned int flags, 455 unsigned long *count, int *errp) 456 { 457 struct ext4_allocation_request ar; 458 ext4_fsblk_t ret; 459 460 memset(&ar, 0, sizeof(ar)); 461 /* Fill with neighbour allocated blocks */ 462 ar.inode = inode; 463 ar.goal = goal; 464 ar.len = count ? *count : 1; 465 ar.flags = flags; 466 467 ret = ext4_mb_new_blocks(handle, &ar, errp); 468 if (count) 469 *count = ar.len; 470 /* 471 * Account for the allocated meta blocks. We will never 472 * fail EDQUOT for metdata, but we do account for it. 473 */ 474 if (!(*errp) && 475 ext4_test_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED)) { 476 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 477 EXT4_I(inode)->i_allocated_meta_blocks += ar.len; 478 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 479 dquot_alloc_block_nofail(inode, ar.len); 480 } 481 return ret; 482 } 483 484 /** 485 * ext4_count_free_blocks() -- count filesystem free blocks 486 * @sb: superblock 487 * 488 * Adds up the number of free blocks from each block group. 489 */ 490 ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) 491 { 492 ext4_fsblk_t desc_count; 493 struct ext4_group_desc *gdp; 494 ext4_group_t i; 495 ext4_group_t ngroups = ext4_get_groups_count(sb); 496 #ifdef EXT4FS_DEBUG 497 struct ext4_super_block *es; 498 ext4_fsblk_t bitmap_count; 499 unsigned int x; 500 struct buffer_head *bitmap_bh = NULL; 501 502 es = EXT4_SB(sb)->s_es; 503 desc_count = 0; 504 bitmap_count = 0; 505 gdp = NULL; 506 507 for (i = 0; i < ngroups; i++) { 508 gdp = ext4_get_group_desc(sb, i, NULL); 509 if (!gdp) 510 continue; 511 desc_count += ext4_free_blks_count(sb, gdp); 512 brelse(bitmap_bh); 513 bitmap_bh = ext4_read_block_bitmap(sb, i); 514 if (bitmap_bh == NULL) 515 continue; 516 517 x = ext4_count_free(bitmap_bh, sb->s_blocksize); 518 printk(KERN_DEBUG "group %u: stored = %d, counted = %u\n", 519 i, ext4_free_blks_count(sb, gdp), x); 520 bitmap_count += x; 521 } 522 brelse(bitmap_bh); 523 printk(KERN_DEBUG "ext4_count_free_blocks: stored = %llu" 524 ", computed = %llu, %llu\n", ext4_free_blocks_count(es), 525 desc_count, bitmap_count); 526 return bitmap_count; 527 #else 528 desc_count = 0; 529 for (i = 0; i < ngroups; i++) { 530 gdp = ext4_get_group_desc(sb, i, NULL); 531 if (!gdp) 532 continue; 533 desc_count += ext4_free_blks_count(sb, gdp); 534 } 535 536 return desc_count; 537 #endif 538 } 539 540 static inline int test_root(ext4_group_t a, int b) 541 { 542 int num = b; 543 544 while (a > num) 545 num *= b; 546 return num == a; 547 } 548 549 static int ext4_group_sparse(ext4_group_t group) 550 { 551 if (group <= 1) 552 return 1; 553 if (!(group & 1)) 554 return 0; 555 return (test_root(group, 7) || test_root(group, 5) || 556 test_root(group, 3)); 557 } 558 559 /** 560 * ext4_bg_has_super - number of blocks used by the superblock in group 561 * @sb: superblock for filesystem 562 * @group: group number to check 563 * 564 * Return the number of blocks used by the superblock (primary or backup) 565 * in this group. Currently this will be only 0 or 1. 566 */ 567 int ext4_bg_has_super(struct super_block *sb, ext4_group_t group) 568 { 569 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 570 EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER) && 571 !ext4_group_sparse(group)) 572 return 0; 573 return 1; 574 } 575 576 static unsigned long ext4_bg_num_gdb_meta(struct super_block *sb, 577 ext4_group_t group) 578 { 579 unsigned long metagroup = group / EXT4_DESC_PER_BLOCK(sb); 580 ext4_group_t first = metagroup * EXT4_DESC_PER_BLOCK(sb); 581 ext4_group_t last = first + EXT4_DESC_PER_BLOCK(sb) - 1; 582 583 if (group == first || group == first + 1 || group == last) 584 return 1; 585 return 0; 586 } 587 588 static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb, 589 ext4_group_t group) 590 { 591 if (!ext4_bg_has_super(sb, group)) 592 return 0; 593 594 if (EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG)) 595 return le32_to_cpu(EXT4_SB(sb)->s_es->s_first_meta_bg); 596 else 597 return EXT4_SB(sb)->s_gdb_count; 598 } 599 600 /** 601 * ext4_bg_num_gdb - number of blocks used by the group table in group 602 * @sb: superblock for filesystem 603 * @group: group number to check 604 * 605 * Return the number of blocks used by the group descriptor table 606 * (primary or backup) in this group. In the future there may be a 607 * different number of descriptor blocks in each group. 608 */ 609 unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group) 610 { 611 unsigned long first_meta_bg = 612 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_meta_bg); 613 unsigned long metagroup = group / EXT4_DESC_PER_BLOCK(sb); 614 615 if (!EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG) || 616 metagroup < first_meta_bg) 617 return ext4_bg_num_gdb_nometa(sb, group); 618 619 return ext4_bg_num_gdb_meta(sb,group); 620 621 } 622 623