1 /* 2 * linux/fs/ext4/balloc.c 3 * 4 * Copyright (C) 1992, 1993, 1994, 1995 5 * Remy Card (card@masi.ibp.fr) 6 * Laboratoire MASI - Institut Blaise Pascal 7 * Universite Pierre et Marie Curie (Paris VI) 8 * 9 * Enhanced block allocation by Stephen Tweedie (sct@redhat.com), 1993 10 * Big-endian to little-endian byte-swapping/bitmaps by 11 * David S. Miller (davem@caip.rutgers.edu), 1995 12 */ 13 14 #include <linux/time.h> 15 #include <linux/capability.h> 16 #include <linux/fs.h> 17 #include <linux/jbd2.h> 18 #include <linux/quotaops.h> 19 #include <linux/buffer_head.h> 20 #include "ext4.h" 21 #include "ext4_jbd2.h" 22 #include "group.h" 23 24 /* 25 * balloc.c contains the blocks allocation and deallocation routines 26 */ 27 28 /* 29 * Calculate the block group number and offset, given a block number 30 */ 31 void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, 32 ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp) 33 { 34 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 35 ext4_grpblk_t offset; 36 37 blocknr = blocknr - le32_to_cpu(es->s_first_data_block); 38 offset = do_div(blocknr, EXT4_BLOCKS_PER_GROUP(sb)); 39 if (offsetp) 40 *offsetp = offset; 41 if (blockgrpp) 42 *blockgrpp = blocknr; 43 44 } 45 46 static int ext4_block_in_group(struct super_block *sb, ext4_fsblk_t block, 47 ext4_group_t block_group) 48 { 49 ext4_group_t actual_group; 50 ext4_get_group_no_and_offset(sb, block, &actual_group, NULL); 51 if (actual_group == block_group) 52 return 1; 53 return 0; 54 } 55 56 static int ext4_group_used_meta_blocks(struct super_block *sb, 57 ext4_group_t block_group) 58 { 59 ext4_fsblk_t tmp; 60 struct ext4_sb_info *sbi = EXT4_SB(sb); 61 /* block bitmap, inode bitmap, and inode table blocks */ 62 int used_blocks = sbi->s_itb_per_group + 2; 63 64 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) { 65 struct ext4_group_desc *gdp; 66 struct buffer_head *bh; 67 68 gdp = ext4_get_group_desc(sb, block_group, &bh); 69 if (!ext4_block_in_group(sb, ext4_block_bitmap(sb, gdp), 70 block_group)) 71 used_blocks--; 72 73 if (!ext4_block_in_group(sb, ext4_inode_bitmap(sb, gdp), 74 block_group)) 75 used_blocks--; 76 77 tmp = ext4_inode_table(sb, gdp); 78 for (; tmp < ext4_inode_table(sb, gdp) + 79 sbi->s_itb_per_group; tmp++) { 80 if (!ext4_block_in_group(sb, tmp, block_group)) 81 used_blocks -= 1; 82 } 83 } 84 return used_blocks; 85 } 86 /* Initializes an uninitialized block bitmap if given, and returns the 87 * number of blocks free in the group. */ 88 unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, 89 ext4_group_t block_group, struct ext4_group_desc *gdp) 90 { 91 int bit, bit_max; 92 unsigned free_blocks, group_blocks; 93 struct ext4_sb_info *sbi = EXT4_SB(sb); 94 95 if (bh) { 96 J_ASSERT_BH(bh, buffer_locked(bh)); 97 98 /* If checksum is bad mark all blocks used to prevent allocation 99 * essentially implementing a per-group read-only flag. */ 100 if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { 101 ext4_error(sb, __func__, 102 "Checksum bad for group %lu\n", block_group); 103 gdp->bg_free_blocks_count = 0; 104 gdp->bg_free_inodes_count = 0; 105 gdp->bg_itable_unused = 0; 106 memset(bh->b_data, 0xff, sb->s_blocksize); 107 return 0; 108 } 109 memset(bh->b_data, 0, sb->s_blocksize); 110 } 111 112 /* Check for superblock and gdt backups in this group */ 113 bit_max = ext4_bg_has_super(sb, block_group); 114 115 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) || 116 block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg) * 117 sbi->s_desc_per_block) { 118 if (bit_max) { 119 bit_max += ext4_bg_num_gdb(sb, block_group); 120 bit_max += 121 le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks); 122 } 123 } else { /* For META_BG_BLOCK_GROUPS */ 124 bit_max += ext4_bg_num_gdb(sb, block_group); 125 } 126 127 if (block_group == sbi->s_groups_count - 1) { 128 /* 129 * Even though mke2fs always initialize first and last group 130 * if some other tool enabled the EXT4_BG_BLOCK_UNINIT we need 131 * to make sure we calculate the right free blocks 132 */ 133 group_blocks = ext4_blocks_count(sbi->s_es) - 134 le32_to_cpu(sbi->s_es->s_first_data_block) - 135 (EXT4_BLOCKS_PER_GROUP(sb) * (sbi->s_groups_count -1)); 136 } else { 137 group_blocks = EXT4_BLOCKS_PER_GROUP(sb); 138 } 139 140 free_blocks = group_blocks - bit_max; 141 142 if (bh) { 143 ext4_fsblk_t start, tmp; 144 int flex_bg = 0; 145 146 for (bit = 0; bit < bit_max; bit++) 147 ext4_set_bit(bit, bh->b_data); 148 149 start = ext4_group_first_block_no(sb, block_group); 150 151 if (EXT4_HAS_INCOMPAT_FEATURE(sb, 152 EXT4_FEATURE_INCOMPAT_FLEX_BG)) 153 flex_bg = 1; 154 155 /* Set bits for block and inode bitmaps, and inode table */ 156 tmp = ext4_block_bitmap(sb, gdp); 157 if (!flex_bg || ext4_block_in_group(sb, tmp, block_group)) 158 ext4_set_bit(tmp - start, bh->b_data); 159 160 tmp = ext4_inode_bitmap(sb, gdp); 161 if (!flex_bg || ext4_block_in_group(sb, tmp, block_group)) 162 ext4_set_bit(tmp - start, bh->b_data); 163 164 tmp = ext4_inode_table(sb, gdp); 165 for (; tmp < ext4_inode_table(sb, gdp) + 166 sbi->s_itb_per_group; tmp++) { 167 if (!flex_bg || 168 ext4_block_in_group(sb, tmp, block_group)) 169 ext4_set_bit(tmp - start, bh->b_data); 170 } 171 /* 172 * Also if the number of blocks within the group is 173 * less than the blocksize * 8 ( which is the size 174 * of bitmap ), set rest of the block bitmap to 1 175 */ 176 mark_bitmap_end(group_blocks, sb->s_blocksize * 8, bh->b_data); 177 } 178 return free_blocks - ext4_group_used_meta_blocks(sb, block_group); 179 } 180 181 182 /* 183 * The free blocks are managed by bitmaps. A file system contains several 184 * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap 185 * block for inodes, N blocks for the inode table and data blocks. 186 * 187 * The file system contains group descriptors which are located after the 188 * super block. Each descriptor contains the number of the bitmap block and 189 * the free blocks count in the block. The descriptors are loaded in memory 190 * when a file system is mounted (see ext4_fill_super). 191 */ 192 193 194 #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) 195 196 /** 197 * ext4_get_group_desc() -- load group descriptor from disk 198 * @sb: super block 199 * @block_group: given block group 200 * @bh: pointer to the buffer head to store the block 201 * group descriptor 202 */ 203 struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, 204 ext4_group_t block_group, 205 struct buffer_head ** bh) 206 { 207 unsigned long group_desc; 208 unsigned long offset; 209 struct ext4_group_desc * desc; 210 struct ext4_sb_info *sbi = EXT4_SB(sb); 211 212 if (block_group >= sbi->s_groups_count) { 213 ext4_error (sb, "ext4_get_group_desc", 214 "block_group >= groups_count - " 215 "block_group = %lu, groups_count = %lu", 216 block_group, sbi->s_groups_count); 217 218 return NULL; 219 } 220 smp_rmb(); 221 222 group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb); 223 offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1); 224 if (!sbi->s_group_desc[group_desc]) { 225 ext4_error (sb, "ext4_get_group_desc", 226 "Group descriptor not loaded - " 227 "block_group = %lu, group_desc = %lu, desc = %lu", 228 block_group, group_desc, offset); 229 return NULL; 230 } 231 232 desc = (struct ext4_group_desc *)( 233 (__u8 *)sbi->s_group_desc[group_desc]->b_data + 234 offset * EXT4_DESC_SIZE(sb)); 235 if (bh) 236 *bh = sbi->s_group_desc[group_desc]; 237 return desc; 238 } 239 240 static int ext4_valid_block_bitmap(struct super_block *sb, 241 struct ext4_group_desc *desc, 242 unsigned int block_group, 243 struct buffer_head *bh) 244 { 245 ext4_grpblk_t offset; 246 ext4_grpblk_t next_zero_bit; 247 ext4_fsblk_t bitmap_blk; 248 ext4_fsblk_t group_first_block; 249 250 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) { 251 /* with FLEX_BG, the inode/block bitmaps and itable 252 * blocks may not be in the group at all 253 * so the bitmap validation will be skipped for those groups 254 * or it has to also read the block group where the bitmaps 255 * are located to verify they are set. 256 */ 257 return 1; 258 } 259 group_first_block = ext4_group_first_block_no(sb, block_group); 260 261 /* check whether block bitmap block number is set */ 262 bitmap_blk = ext4_block_bitmap(sb, desc); 263 offset = bitmap_blk - group_first_block; 264 if (!ext4_test_bit(offset, bh->b_data)) 265 /* bad block bitmap */ 266 goto err_out; 267 268 /* check whether the inode bitmap block number is set */ 269 bitmap_blk = ext4_inode_bitmap(sb, desc); 270 offset = bitmap_blk - group_first_block; 271 if (!ext4_test_bit(offset, bh->b_data)) 272 /* bad block bitmap */ 273 goto err_out; 274 275 /* check whether the inode table block number is set */ 276 bitmap_blk = ext4_inode_table(sb, desc); 277 offset = bitmap_blk - group_first_block; 278 next_zero_bit = ext4_find_next_zero_bit(bh->b_data, 279 offset + EXT4_SB(sb)->s_itb_per_group, 280 offset); 281 if (next_zero_bit >= offset + EXT4_SB(sb)->s_itb_per_group) 282 /* good bitmap for inode tables */ 283 return 1; 284 285 err_out: 286 ext4_error(sb, __func__, 287 "Invalid block bitmap - " 288 "block_group = %d, block = %llu", 289 block_group, bitmap_blk); 290 return 0; 291 } 292 /** 293 * ext4_read_block_bitmap() 294 * @sb: super block 295 * @block_group: given block group 296 * 297 * Read the bitmap for a given block_group,and validate the 298 * bits for block/inode/inode tables are set in the bitmaps 299 * 300 * Return buffer_head on success or NULL in case of failure. 301 */ 302 struct buffer_head * 303 ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) 304 { 305 struct ext4_group_desc * desc; 306 struct buffer_head * bh = NULL; 307 ext4_fsblk_t bitmap_blk; 308 309 desc = ext4_get_group_desc(sb, block_group, NULL); 310 if (!desc) 311 return NULL; 312 bitmap_blk = ext4_block_bitmap(sb, desc); 313 bh = sb_getblk(sb, bitmap_blk); 314 if (unlikely(!bh)) { 315 ext4_error(sb, __func__, 316 "Cannot read block bitmap - " 317 "block_group = %lu, block_bitmap = %llu", 318 block_group, bitmap_blk); 319 return NULL; 320 } 321 if (bh_uptodate_or_lock(bh)) 322 return bh; 323 324 spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); 325 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 326 ext4_init_block_bitmap(sb, bh, block_group, desc); 327 set_buffer_uptodate(bh); 328 unlock_buffer(bh); 329 spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); 330 return bh; 331 } 332 spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); 333 if (bh_submit_read(bh) < 0) { 334 put_bh(bh); 335 ext4_error(sb, __func__, 336 "Cannot read block bitmap - " 337 "block_group = %lu, block_bitmap = %llu", 338 block_group, bitmap_blk); 339 return NULL; 340 } 341 ext4_valid_block_bitmap(sb, desc, block_group, bh); 342 /* 343 * file system mounted not to panic on error, 344 * continue with corrupt bitmap 345 */ 346 return bh; 347 } 348 /* 349 * The reservation window structure operations 350 * -------------------------------------------- 351 * Operations include: 352 * dump, find, add, remove, is_empty, find_next_reservable_window, etc. 353 * 354 * We use a red-black tree to represent per-filesystem reservation 355 * windows. 356 * 357 */ 358 359 /** 360 * __rsv_window_dump() -- Dump the filesystem block allocation reservation map 361 * @rb_root: root of per-filesystem reservation rb tree 362 * @verbose: verbose mode 363 * @fn: function which wishes to dump the reservation map 364 * 365 * If verbose is turned on, it will print the whole block reservation 366 * windows(start, end). Otherwise, it will only print out the "bad" windows, 367 * those windows that overlap with their immediate neighbors. 368 */ 369 #if 1 370 static void __rsv_window_dump(struct rb_root *root, int verbose, 371 const char *fn) 372 { 373 struct rb_node *n; 374 struct ext4_reserve_window_node *rsv, *prev; 375 int bad; 376 377 restart: 378 n = rb_first(root); 379 bad = 0; 380 prev = NULL; 381 382 printk("Block Allocation Reservation Windows Map (%s):\n", fn); 383 while (n) { 384 rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node); 385 if (verbose) 386 printk("reservation window 0x%p " 387 "start: %llu, end: %llu\n", 388 rsv, rsv->rsv_start, rsv->rsv_end); 389 if (rsv->rsv_start && rsv->rsv_start >= rsv->rsv_end) { 390 printk("Bad reservation %p (start >= end)\n", 391 rsv); 392 bad = 1; 393 } 394 if (prev && prev->rsv_end >= rsv->rsv_start) { 395 printk("Bad reservation %p (prev->end >= start)\n", 396 rsv); 397 bad = 1; 398 } 399 if (bad) { 400 if (!verbose) { 401 printk("Restarting reservation walk in verbose mode\n"); 402 verbose = 1; 403 goto restart; 404 } 405 } 406 n = rb_next(n); 407 prev = rsv; 408 } 409 printk("Window map complete.\n"); 410 BUG_ON(bad); 411 } 412 #define rsv_window_dump(root, verbose) \ 413 __rsv_window_dump((root), (verbose), __func__) 414 #else 415 #define rsv_window_dump(root, verbose) do {} while (0) 416 #endif 417 418 /** 419 * goal_in_my_reservation() 420 * @rsv: inode's reservation window 421 * @grp_goal: given goal block relative to the allocation block group 422 * @group: the current allocation block group 423 * @sb: filesystem super block 424 * 425 * Test if the given goal block (group relative) is within the file's 426 * own block reservation window range. 427 * 428 * If the reservation window is outside the goal allocation group, return 0; 429 * grp_goal (given goal block) could be -1, which means no specific 430 * goal block. In this case, always return 1. 431 * If the goal block is within the reservation window, return 1; 432 * otherwise, return 0; 433 */ 434 static int 435 goal_in_my_reservation(struct ext4_reserve_window *rsv, ext4_grpblk_t grp_goal, 436 ext4_group_t group, struct super_block *sb) 437 { 438 ext4_fsblk_t group_first_block, group_last_block; 439 440 group_first_block = ext4_group_first_block_no(sb, group); 441 group_last_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1); 442 443 if ((rsv->_rsv_start > group_last_block) || 444 (rsv->_rsv_end < group_first_block)) 445 return 0; 446 if ((grp_goal >= 0) && ((grp_goal + group_first_block < rsv->_rsv_start) 447 || (grp_goal + group_first_block > rsv->_rsv_end))) 448 return 0; 449 return 1; 450 } 451 452 /** 453 * search_reserve_window() 454 * @rb_root: root of reservation tree 455 * @goal: target allocation block 456 * 457 * Find the reserved window which includes the goal, or the previous one 458 * if the goal is not in any window. 459 * Returns NULL if there are no windows or if all windows start after the goal. 460 */ 461 static struct ext4_reserve_window_node * 462 search_reserve_window(struct rb_root *root, ext4_fsblk_t goal) 463 { 464 struct rb_node *n = root->rb_node; 465 struct ext4_reserve_window_node *rsv; 466 467 if (!n) 468 return NULL; 469 470 do { 471 rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node); 472 473 if (goal < rsv->rsv_start) 474 n = n->rb_left; 475 else if (goal > rsv->rsv_end) 476 n = n->rb_right; 477 else 478 return rsv; 479 } while (n); 480 /* 481 * We've fallen off the end of the tree: the goal wasn't inside 482 * any particular node. OK, the previous node must be to one 483 * side of the interval containing the goal. If it's the RHS, 484 * we need to back up one. 485 */ 486 if (rsv->rsv_start > goal) { 487 n = rb_prev(&rsv->rsv_node); 488 rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node); 489 } 490 return rsv; 491 } 492 493 /** 494 * ext4_rsv_window_add() -- Insert a window to the block reservation rb tree. 495 * @sb: super block 496 * @rsv: reservation window to add 497 * 498 * Must be called with rsv_lock hold. 499 */ 500 void ext4_rsv_window_add(struct super_block *sb, 501 struct ext4_reserve_window_node *rsv) 502 { 503 struct rb_root *root = &EXT4_SB(sb)->s_rsv_window_root; 504 struct rb_node *node = &rsv->rsv_node; 505 ext4_fsblk_t start = rsv->rsv_start; 506 507 struct rb_node ** p = &root->rb_node; 508 struct rb_node * parent = NULL; 509 struct ext4_reserve_window_node *this; 510 511 while (*p) 512 { 513 parent = *p; 514 this = rb_entry(parent, struct ext4_reserve_window_node, rsv_node); 515 516 if (start < this->rsv_start) 517 p = &(*p)->rb_left; 518 else if (start > this->rsv_end) 519 p = &(*p)->rb_right; 520 else { 521 rsv_window_dump(root, 1); 522 BUG(); 523 } 524 } 525 526 rb_link_node(node, parent, p); 527 rb_insert_color(node, root); 528 } 529 530 /** 531 * ext4_rsv_window_remove() -- unlink a window from the reservation rb tree 532 * @sb: super block 533 * @rsv: reservation window to remove 534 * 535 * Mark the block reservation window as not allocated, and unlink it 536 * from the filesystem reservation window rb tree. Must be called with 537 * rsv_lock hold. 538 */ 539 static void rsv_window_remove(struct super_block *sb, 540 struct ext4_reserve_window_node *rsv) 541 { 542 rsv->rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; 543 rsv->rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; 544 rsv->rsv_alloc_hit = 0; 545 rb_erase(&rsv->rsv_node, &EXT4_SB(sb)->s_rsv_window_root); 546 } 547 548 /* 549 * rsv_is_empty() -- Check if the reservation window is allocated. 550 * @rsv: given reservation window to check 551 * 552 * returns 1 if the end block is EXT4_RESERVE_WINDOW_NOT_ALLOCATED. 553 */ 554 static inline int rsv_is_empty(struct ext4_reserve_window *rsv) 555 { 556 /* a valid reservation end block could not be 0 */ 557 return rsv->_rsv_end == EXT4_RESERVE_WINDOW_NOT_ALLOCATED; 558 } 559 560 /** 561 * ext4_init_block_alloc_info() 562 * @inode: file inode structure 563 * 564 * Allocate and initialize the reservation window structure, and 565 * link the window to the ext4 inode structure at last 566 * 567 * The reservation window structure is only dynamically allocated 568 * and linked to ext4 inode the first time the open file 569 * needs a new block. So, before every ext4_new_block(s) call, for 570 * regular files, we should check whether the reservation window 571 * structure exists or not. In the latter case, this function is called. 572 * Fail to do so will result in block reservation being turned off for that 573 * open file. 574 * 575 * This function is called from ext4_get_blocks_handle(), also called 576 * when setting the reservation window size through ioctl before the file 577 * is open for write (needs block allocation). 578 * 579 * Needs down_write(i_data_sem) protection prior to call this function. 580 */ 581 void ext4_init_block_alloc_info(struct inode *inode) 582 { 583 struct ext4_inode_info *ei = EXT4_I(inode); 584 struct ext4_block_alloc_info *block_i = ei->i_block_alloc_info; 585 struct super_block *sb = inode->i_sb; 586 587 block_i = kmalloc(sizeof(*block_i), GFP_NOFS); 588 if (block_i) { 589 struct ext4_reserve_window_node *rsv = &block_i->rsv_window_node; 590 591 rsv->rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; 592 rsv->rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; 593 594 /* 595 * if filesystem is mounted with NORESERVATION, the goal 596 * reservation window size is set to zero to indicate 597 * block reservation is off 598 */ 599 if (!test_opt(sb, RESERVATION)) 600 rsv->rsv_goal_size = 0; 601 else 602 rsv->rsv_goal_size = EXT4_DEFAULT_RESERVE_BLOCKS; 603 rsv->rsv_alloc_hit = 0; 604 block_i->last_alloc_logical_block = 0; 605 block_i->last_alloc_physical_block = 0; 606 } 607 ei->i_block_alloc_info = block_i; 608 } 609 610 /** 611 * ext4_discard_reservation() 612 * @inode: inode 613 * 614 * Discard(free) block reservation window on last file close, or truncate 615 * or at last iput(). 616 * 617 * It is being called in three cases: 618 * ext4_release_file(): last writer close the file 619 * ext4_clear_inode(): last iput(), when nobody link to this file. 620 * ext4_truncate(): when the block indirect map is about to change. 621 * 622 */ 623 void ext4_discard_reservation(struct inode *inode) 624 { 625 struct ext4_inode_info *ei = EXT4_I(inode); 626 struct ext4_block_alloc_info *block_i = ei->i_block_alloc_info; 627 struct ext4_reserve_window_node *rsv; 628 spinlock_t *rsv_lock = &EXT4_SB(inode->i_sb)->s_rsv_window_lock; 629 630 ext4_mb_discard_inode_preallocations(inode); 631 632 if (!block_i) 633 return; 634 635 rsv = &block_i->rsv_window_node; 636 if (!rsv_is_empty(&rsv->rsv_window)) { 637 spin_lock(rsv_lock); 638 if (!rsv_is_empty(&rsv->rsv_window)) 639 rsv_window_remove(inode->i_sb, rsv); 640 spin_unlock(rsv_lock); 641 } 642 } 643 644 /** 645 * ext4_free_blocks_sb() -- Free given blocks and update quota 646 * @handle: handle to this transaction 647 * @sb: super block 648 * @block: start physcial block to free 649 * @count: number of blocks to free 650 * @pdquot_freed_blocks: pointer to quota 651 */ 652 void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, 653 ext4_fsblk_t block, unsigned long count, 654 unsigned long *pdquot_freed_blocks) 655 { 656 struct buffer_head *bitmap_bh = NULL; 657 struct buffer_head *gd_bh; 658 ext4_group_t block_group; 659 ext4_grpblk_t bit; 660 unsigned long i; 661 unsigned long overflow; 662 struct ext4_group_desc * desc; 663 struct ext4_super_block * es; 664 struct ext4_sb_info *sbi; 665 int err = 0, ret; 666 ext4_grpblk_t group_freed; 667 668 *pdquot_freed_blocks = 0; 669 sbi = EXT4_SB(sb); 670 es = sbi->s_es; 671 if (block < le32_to_cpu(es->s_first_data_block) || 672 block + count < block || 673 block + count > ext4_blocks_count(es)) { 674 ext4_error (sb, "ext4_free_blocks", 675 "Freeing blocks not in datazone - " 676 "block = %llu, count = %lu", block, count); 677 goto error_return; 678 } 679 680 ext4_debug ("freeing block(s) %llu-%llu\n", block, block + count - 1); 681 682 do_more: 683 overflow = 0; 684 ext4_get_group_no_and_offset(sb, block, &block_group, &bit); 685 /* 686 * Check to see if we are freeing blocks across a group 687 * boundary. 688 */ 689 if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) { 690 overflow = bit + count - EXT4_BLOCKS_PER_GROUP(sb); 691 count -= overflow; 692 } 693 brelse(bitmap_bh); 694 bitmap_bh = ext4_read_block_bitmap(sb, block_group); 695 if (!bitmap_bh) 696 goto error_return; 697 desc = ext4_get_group_desc (sb, block_group, &gd_bh); 698 if (!desc) 699 goto error_return; 700 701 if (in_range(ext4_block_bitmap(sb, desc), block, count) || 702 in_range(ext4_inode_bitmap(sb, desc), block, count) || 703 in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) || 704 in_range(block + count - 1, ext4_inode_table(sb, desc), 705 sbi->s_itb_per_group)) { 706 ext4_error (sb, "ext4_free_blocks", 707 "Freeing blocks in system zones - " 708 "Block = %llu, count = %lu", 709 block, count); 710 goto error_return; 711 } 712 713 /* 714 * We are about to start releasing blocks in the bitmap, 715 * so we need undo access. 716 */ 717 /* @@@ check errors */ 718 BUFFER_TRACE(bitmap_bh, "getting undo access"); 719 err = ext4_journal_get_undo_access(handle, bitmap_bh); 720 if (err) 721 goto error_return; 722 723 /* 724 * We are about to modify some metadata. Call the journal APIs 725 * to unshare ->b_data if a currently-committing transaction is 726 * using it 727 */ 728 BUFFER_TRACE(gd_bh, "get_write_access"); 729 err = ext4_journal_get_write_access(handle, gd_bh); 730 if (err) 731 goto error_return; 732 733 jbd_lock_bh_state(bitmap_bh); 734 735 for (i = 0, group_freed = 0; i < count; i++) { 736 /* 737 * An HJ special. This is expensive... 738 */ 739 #ifdef CONFIG_JBD2_DEBUG 740 jbd_unlock_bh_state(bitmap_bh); 741 { 742 struct buffer_head *debug_bh; 743 debug_bh = sb_find_get_block(sb, block + i); 744 if (debug_bh) { 745 BUFFER_TRACE(debug_bh, "Deleted!"); 746 if (!bh2jh(bitmap_bh)->b_committed_data) 747 BUFFER_TRACE(debug_bh, 748 "No commited data in bitmap"); 749 BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap"); 750 __brelse(debug_bh); 751 } 752 } 753 jbd_lock_bh_state(bitmap_bh); 754 #endif 755 if (need_resched()) { 756 jbd_unlock_bh_state(bitmap_bh); 757 cond_resched(); 758 jbd_lock_bh_state(bitmap_bh); 759 } 760 /* @@@ This prevents newly-allocated data from being 761 * freed and then reallocated within the same 762 * transaction. 763 * 764 * Ideally we would want to allow that to happen, but to 765 * do so requires making jbd2_journal_forget() capable of 766 * revoking the queued write of a data block, which 767 * implies blocking on the journal lock. *forget() 768 * cannot block due to truncate races. 769 * 770 * Eventually we can fix this by making jbd2_journal_forget() 771 * return a status indicating whether or not it was able 772 * to revoke the buffer. On successful revoke, it is 773 * safe not to set the allocation bit in the committed 774 * bitmap, because we know that there is no outstanding 775 * activity on the buffer any more and so it is safe to 776 * reallocate it. 777 */ 778 BUFFER_TRACE(bitmap_bh, "set in b_committed_data"); 779 J_ASSERT_BH(bitmap_bh, 780 bh2jh(bitmap_bh)->b_committed_data != NULL); 781 ext4_set_bit_atomic(sb_bgl_lock(sbi, block_group), bit + i, 782 bh2jh(bitmap_bh)->b_committed_data); 783 784 /* 785 * We clear the bit in the bitmap after setting the committed 786 * data bit, because this is the reverse order to that which 787 * the allocator uses. 788 */ 789 BUFFER_TRACE(bitmap_bh, "clear bit"); 790 if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group), 791 bit + i, bitmap_bh->b_data)) { 792 jbd_unlock_bh_state(bitmap_bh); 793 ext4_error(sb, __func__, 794 "bit already cleared for block %llu", 795 (ext4_fsblk_t)(block + i)); 796 jbd_lock_bh_state(bitmap_bh); 797 BUFFER_TRACE(bitmap_bh, "bit already cleared"); 798 } else { 799 group_freed++; 800 } 801 } 802 jbd_unlock_bh_state(bitmap_bh); 803 804 spin_lock(sb_bgl_lock(sbi, block_group)); 805 le16_add_cpu(&desc->bg_free_blocks_count, group_freed); 806 desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc); 807 spin_unlock(sb_bgl_lock(sbi, block_group)); 808 percpu_counter_add(&sbi->s_freeblocks_counter, count); 809 810 if (sbi->s_log_groups_per_flex) { 811 ext4_group_t flex_group = ext4_flex_group(sbi, block_group); 812 spin_lock(sb_bgl_lock(sbi, flex_group)); 813 sbi->s_flex_groups[flex_group].free_blocks += count; 814 spin_unlock(sb_bgl_lock(sbi, flex_group)); 815 } 816 817 /* We dirtied the bitmap block */ 818 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); 819 err = ext4_journal_dirty_metadata(handle, bitmap_bh); 820 821 /* And the group descriptor block */ 822 BUFFER_TRACE(gd_bh, "dirtied group descriptor block"); 823 ret = ext4_journal_dirty_metadata(handle, gd_bh); 824 if (!err) err = ret; 825 *pdquot_freed_blocks += group_freed; 826 827 if (overflow && !err) { 828 block += count; 829 count = overflow; 830 goto do_more; 831 } 832 sb->s_dirt = 1; 833 error_return: 834 brelse(bitmap_bh); 835 ext4_std_error(sb, err); 836 return; 837 } 838 839 /** 840 * ext4_free_blocks() -- Free given blocks and update quota 841 * @handle: handle for this transaction 842 * @inode: inode 843 * @block: start physical block to free 844 * @count: number of blocks to count 845 * @metadata: Are these metadata blocks 846 */ 847 void ext4_free_blocks(handle_t *handle, struct inode *inode, 848 ext4_fsblk_t block, unsigned long count, 849 int metadata) 850 { 851 struct super_block * sb; 852 unsigned long dquot_freed_blocks; 853 854 /* this isn't the right place to decide whether block is metadata 855 * inode.c/extents.c knows better, but for safety ... */ 856 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) || 857 ext4_should_journal_data(inode)) 858 metadata = 1; 859 860 sb = inode->i_sb; 861 862 if (!test_opt(sb, MBALLOC) || !EXT4_SB(sb)->s_group_info) 863 ext4_free_blocks_sb(handle, sb, block, count, 864 &dquot_freed_blocks); 865 else 866 ext4_mb_free_blocks(handle, inode, block, count, 867 metadata, &dquot_freed_blocks); 868 if (dquot_freed_blocks) 869 DQUOT_FREE_BLOCK(inode, dquot_freed_blocks); 870 return; 871 } 872 873 /** 874 * ext4_test_allocatable() 875 * @nr: given allocation block group 876 * @bh: bufferhead contains the bitmap of the given block group 877 * 878 * For ext4 allocations, we must not reuse any blocks which are 879 * allocated in the bitmap buffer's "last committed data" copy. This 880 * prevents deletes from freeing up the page for reuse until we have 881 * committed the delete transaction. 882 * 883 * If we didn't do this, then deleting something and reallocating it as 884 * data would allow the old block to be overwritten before the 885 * transaction committed (because we force data to disk before commit). 886 * This would lead to corruption if we crashed between overwriting the 887 * data and committing the delete. 888 * 889 * @@@ We may want to make this allocation behaviour conditional on 890 * data-writes at some point, and disable it for metadata allocations or 891 * sync-data inodes. 892 */ 893 static int ext4_test_allocatable(ext4_grpblk_t nr, struct buffer_head *bh) 894 { 895 int ret; 896 struct journal_head *jh = bh2jh(bh); 897 898 if (ext4_test_bit(nr, bh->b_data)) 899 return 0; 900 901 jbd_lock_bh_state(bh); 902 if (!jh->b_committed_data) 903 ret = 1; 904 else 905 ret = !ext4_test_bit(nr, jh->b_committed_data); 906 jbd_unlock_bh_state(bh); 907 return ret; 908 } 909 910 /** 911 * bitmap_search_next_usable_block() 912 * @start: the starting block (group relative) of the search 913 * @bh: bufferhead contains the block group bitmap 914 * @maxblocks: the ending block (group relative) of the reservation 915 * 916 * The bitmap search --- search forward alternately through the actual 917 * bitmap on disk and the last-committed copy in journal, until we find a 918 * bit free in both bitmaps. 919 */ 920 static ext4_grpblk_t 921 bitmap_search_next_usable_block(ext4_grpblk_t start, struct buffer_head *bh, 922 ext4_grpblk_t maxblocks) 923 { 924 ext4_grpblk_t next; 925 struct journal_head *jh = bh2jh(bh); 926 927 while (start < maxblocks) { 928 next = ext4_find_next_zero_bit(bh->b_data, maxblocks, start); 929 if (next >= maxblocks) 930 return -1; 931 if (ext4_test_allocatable(next, bh)) 932 return next; 933 jbd_lock_bh_state(bh); 934 if (jh->b_committed_data) 935 start = ext4_find_next_zero_bit(jh->b_committed_data, 936 maxblocks, next); 937 jbd_unlock_bh_state(bh); 938 } 939 return -1; 940 } 941 942 /** 943 * find_next_usable_block() 944 * @start: the starting block (group relative) to find next 945 * allocatable block in bitmap. 946 * @bh: bufferhead contains the block group bitmap 947 * @maxblocks: the ending block (group relative) for the search 948 * 949 * Find an allocatable block in a bitmap. We honor both the bitmap and 950 * its last-committed copy (if that exists), and perform the "most 951 * appropriate allocation" algorithm of looking for a free block near 952 * the initial goal; then for a free byte somewhere in the bitmap; then 953 * for any free bit in the bitmap. 954 */ 955 static ext4_grpblk_t 956 find_next_usable_block(ext4_grpblk_t start, struct buffer_head *bh, 957 ext4_grpblk_t maxblocks) 958 { 959 ext4_grpblk_t here, next; 960 char *p, *r; 961 962 if (start > 0) { 963 /* 964 * The goal was occupied; search forward for a free 965 * block within the next XX blocks. 966 * 967 * end_goal is more or less random, but it has to be 968 * less than EXT4_BLOCKS_PER_GROUP. Aligning up to the 969 * next 64-bit boundary is simple.. 970 */ 971 ext4_grpblk_t end_goal = (start + 63) & ~63; 972 if (end_goal > maxblocks) 973 end_goal = maxblocks; 974 here = ext4_find_next_zero_bit(bh->b_data, end_goal, start); 975 if (here < end_goal && ext4_test_allocatable(here, bh)) 976 return here; 977 ext4_debug("Bit not found near goal\n"); 978 } 979 980 here = start; 981 if (here < 0) 982 here = 0; 983 984 p = ((char *)bh->b_data) + (here >> 3); 985 r = memscan(p, 0, ((maxblocks + 7) >> 3) - (here >> 3)); 986 next = (r - ((char *)bh->b_data)) << 3; 987 988 if (next < maxblocks && next >= start && ext4_test_allocatable(next, bh)) 989 return next; 990 991 /* 992 * The bitmap search --- search forward alternately through the actual 993 * bitmap and the last-committed copy until we find a bit free in 994 * both 995 */ 996 here = bitmap_search_next_usable_block(here, bh, maxblocks); 997 return here; 998 } 999 1000 /** 1001 * claim_block() 1002 * @block: the free block (group relative) to allocate 1003 * @bh: the bufferhead containts the block group bitmap 1004 * 1005 * We think we can allocate this block in this bitmap. Try to set the bit. 1006 * If that succeeds then check that nobody has allocated and then freed the 1007 * block since we saw that is was not marked in b_committed_data. If it _was_ 1008 * allocated and freed then clear the bit in the bitmap again and return 1009 * zero (failure). 1010 */ 1011 static inline int 1012 claim_block(spinlock_t *lock, ext4_grpblk_t block, struct buffer_head *bh) 1013 { 1014 struct journal_head *jh = bh2jh(bh); 1015 int ret; 1016 1017 if (ext4_set_bit_atomic(lock, block, bh->b_data)) 1018 return 0; 1019 jbd_lock_bh_state(bh); 1020 if (jh->b_committed_data && ext4_test_bit(block,jh->b_committed_data)) { 1021 ext4_clear_bit_atomic(lock, block, bh->b_data); 1022 ret = 0; 1023 } else { 1024 ret = 1; 1025 } 1026 jbd_unlock_bh_state(bh); 1027 return ret; 1028 } 1029 1030 /** 1031 * ext4_try_to_allocate() 1032 * @sb: superblock 1033 * @handle: handle to this transaction 1034 * @group: given allocation block group 1035 * @bitmap_bh: bufferhead holds the block bitmap 1036 * @grp_goal: given target block within the group 1037 * @count: target number of blocks to allocate 1038 * @my_rsv: reservation window 1039 * 1040 * Attempt to allocate blocks within a give range. Set the range of allocation 1041 * first, then find the first free bit(s) from the bitmap (within the range), 1042 * and at last, allocate the blocks by claiming the found free bit as allocated. 1043 * 1044 * To set the range of this allocation: 1045 * if there is a reservation window, only try to allocate block(s) from the 1046 * file's own reservation window; 1047 * Otherwise, the allocation range starts from the give goal block, ends at 1048 * the block group's last block. 1049 * 1050 * If we failed to allocate the desired block then we may end up crossing to a 1051 * new bitmap. In that case we must release write access to the old one via 1052 * ext4_journal_release_buffer(), else we'll run out of credits. 1053 */ 1054 static ext4_grpblk_t 1055 ext4_try_to_allocate(struct super_block *sb, handle_t *handle, 1056 ext4_group_t group, struct buffer_head *bitmap_bh, 1057 ext4_grpblk_t grp_goal, unsigned long *count, 1058 struct ext4_reserve_window *my_rsv) 1059 { 1060 ext4_fsblk_t group_first_block; 1061 ext4_grpblk_t start, end; 1062 unsigned long num = 0; 1063 1064 /* we do allocation within the reservation window if we have a window */ 1065 if (my_rsv) { 1066 group_first_block = ext4_group_first_block_no(sb, group); 1067 if (my_rsv->_rsv_start >= group_first_block) 1068 start = my_rsv->_rsv_start - group_first_block; 1069 else 1070 /* reservation window cross group boundary */ 1071 start = 0; 1072 end = my_rsv->_rsv_end - group_first_block + 1; 1073 if (end > EXT4_BLOCKS_PER_GROUP(sb)) 1074 /* reservation window crosses group boundary */ 1075 end = EXT4_BLOCKS_PER_GROUP(sb); 1076 if ((start <= grp_goal) && (grp_goal < end)) 1077 start = grp_goal; 1078 else 1079 grp_goal = -1; 1080 } else { 1081 if (grp_goal > 0) 1082 start = grp_goal; 1083 else 1084 start = 0; 1085 end = EXT4_BLOCKS_PER_GROUP(sb); 1086 } 1087 1088 BUG_ON(start > EXT4_BLOCKS_PER_GROUP(sb)); 1089 1090 repeat: 1091 if (grp_goal < 0 || !ext4_test_allocatable(grp_goal, bitmap_bh)) { 1092 grp_goal = find_next_usable_block(start, bitmap_bh, end); 1093 if (grp_goal < 0) 1094 goto fail_access; 1095 if (!my_rsv) { 1096 int i; 1097 1098 for (i = 0; i < 7 && grp_goal > start && 1099 ext4_test_allocatable(grp_goal - 1, 1100 bitmap_bh); 1101 i++, grp_goal--) 1102 ; 1103 } 1104 } 1105 start = grp_goal; 1106 1107 if (!claim_block(sb_bgl_lock(EXT4_SB(sb), group), 1108 grp_goal, bitmap_bh)) { 1109 /* 1110 * The block was allocated by another thread, or it was 1111 * allocated and then freed by another thread 1112 */ 1113 start++; 1114 grp_goal++; 1115 if (start >= end) 1116 goto fail_access; 1117 goto repeat; 1118 } 1119 num++; 1120 grp_goal++; 1121 while (num < *count && grp_goal < end 1122 && ext4_test_allocatable(grp_goal, bitmap_bh) 1123 && claim_block(sb_bgl_lock(EXT4_SB(sb), group), 1124 grp_goal, bitmap_bh)) { 1125 num++; 1126 grp_goal++; 1127 } 1128 *count = num; 1129 return grp_goal - num; 1130 fail_access: 1131 *count = num; 1132 return -1; 1133 } 1134 1135 /** 1136 * find_next_reservable_window(): 1137 * find a reservable space within the given range. 1138 * It does not allocate the reservation window for now: 1139 * alloc_new_reservation() will do the work later. 1140 * 1141 * @search_head: the head of the searching list; 1142 * This is not necessarily the list head of the whole filesystem 1143 * 1144 * We have both head and start_block to assist the search 1145 * for the reservable space. The list starts from head, 1146 * but we will shift to the place where start_block is, 1147 * then start from there, when looking for a reservable space. 1148 * 1149 * @size: the target new reservation window size 1150 * 1151 * @group_first_block: the first block we consider to start 1152 * the real search from 1153 * 1154 * @last_block: 1155 * the maximum block number that our goal reservable space 1156 * could start from. This is normally the last block in this 1157 * group. The search will end when we found the start of next 1158 * possible reservable space is out of this boundary. 1159 * This could handle the cross boundary reservation window 1160 * request. 1161 * 1162 * basically we search from the given range, rather than the whole 1163 * reservation double linked list, (start_block, last_block) 1164 * to find a free region that is of my size and has not 1165 * been reserved. 1166 * 1167 */ 1168 static int find_next_reservable_window( 1169 struct ext4_reserve_window_node *search_head, 1170 struct ext4_reserve_window_node *my_rsv, 1171 struct super_block * sb, 1172 ext4_fsblk_t start_block, 1173 ext4_fsblk_t last_block) 1174 { 1175 struct rb_node *next; 1176 struct ext4_reserve_window_node *rsv, *prev; 1177 ext4_fsblk_t cur; 1178 int size = my_rsv->rsv_goal_size; 1179 1180 /* TODO: make the start of the reservation window byte-aligned */ 1181 /* cur = *start_block & ~7;*/ 1182 cur = start_block; 1183 rsv = search_head; 1184 if (!rsv) 1185 return -1; 1186 1187 while (1) { 1188 if (cur <= rsv->rsv_end) 1189 cur = rsv->rsv_end + 1; 1190 1191 /* TODO? 1192 * in the case we could not find a reservable space 1193 * that is what is expected, during the re-search, we could 1194 * remember what's the largest reservable space we could have 1195 * and return that one. 1196 * 1197 * For now it will fail if we could not find the reservable 1198 * space with expected-size (or more)... 1199 */ 1200 if (cur > last_block) 1201 return -1; /* fail */ 1202 1203 prev = rsv; 1204 next = rb_next(&rsv->rsv_node); 1205 rsv = rb_entry(next,struct ext4_reserve_window_node,rsv_node); 1206 1207 /* 1208 * Reached the last reservation, we can just append to the 1209 * previous one. 1210 */ 1211 if (!next) 1212 break; 1213 1214 if (cur + size <= rsv->rsv_start) { 1215 /* 1216 * Found a reserveable space big enough. We could 1217 * have a reservation across the group boundary here 1218 */ 1219 break; 1220 } 1221 } 1222 /* 1223 * we come here either : 1224 * when we reach the end of the whole list, 1225 * and there is empty reservable space after last entry in the list. 1226 * append it to the end of the list. 1227 * 1228 * or we found one reservable space in the middle of the list, 1229 * return the reservation window that we could append to. 1230 * succeed. 1231 */ 1232 1233 if ((prev != my_rsv) && (!rsv_is_empty(&my_rsv->rsv_window))) 1234 rsv_window_remove(sb, my_rsv); 1235 1236 /* 1237 * Let's book the whole avaliable window for now. We will check the 1238 * disk bitmap later and then, if there are free blocks then we adjust 1239 * the window size if it's larger than requested. 1240 * Otherwise, we will remove this node from the tree next time 1241 * call find_next_reservable_window. 1242 */ 1243 my_rsv->rsv_start = cur; 1244 my_rsv->rsv_end = cur + size - 1; 1245 my_rsv->rsv_alloc_hit = 0; 1246 1247 if (prev != my_rsv) 1248 ext4_rsv_window_add(sb, my_rsv); 1249 1250 return 0; 1251 } 1252 1253 /** 1254 * alloc_new_reservation()--allocate a new reservation window 1255 * 1256 * To make a new reservation, we search part of the filesystem 1257 * reservation list (the list that inside the group). We try to 1258 * allocate a new reservation window near the allocation goal, 1259 * or the beginning of the group, if there is no goal. 1260 * 1261 * We first find a reservable space after the goal, then from 1262 * there, we check the bitmap for the first free block after 1263 * it. If there is no free block until the end of group, then the 1264 * whole group is full, we failed. Otherwise, check if the free 1265 * block is inside the expected reservable space, if so, we 1266 * succeed. 1267 * If the first free block is outside the reservable space, then 1268 * start from the first free block, we search for next available 1269 * space, and go on. 1270 * 1271 * on succeed, a new reservation will be found and inserted into the list 1272 * It contains at least one free block, and it does not overlap with other 1273 * reservation windows. 1274 * 1275 * failed: we failed to find a reservation window in this group 1276 * 1277 * @rsv: the reservation 1278 * 1279 * @grp_goal: The goal (group-relative). It is where the search for a 1280 * free reservable space should start from. 1281 * if we have a grp_goal(grp_goal >0 ), then start from there, 1282 * no grp_goal(grp_goal = -1), we start from the first block 1283 * of the group. 1284 * 1285 * @sb: the super block 1286 * @group: the group we are trying to allocate in 1287 * @bitmap_bh: the block group block bitmap 1288 * 1289 */ 1290 static int alloc_new_reservation(struct ext4_reserve_window_node *my_rsv, 1291 ext4_grpblk_t grp_goal, struct super_block *sb, 1292 ext4_group_t group, struct buffer_head *bitmap_bh) 1293 { 1294 struct ext4_reserve_window_node *search_head; 1295 ext4_fsblk_t group_first_block, group_end_block, start_block; 1296 ext4_grpblk_t first_free_block; 1297 struct rb_root *fs_rsv_root = &EXT4_SB(sb)->s_rsv_window_root; 1298 unsigned long size; 1299 int ret; 1300 spinlock_t *rsv_lock = &EXT4_SB(sb)->s_rsv_window_lock; 1301 1302 group_first_block = ext4_group_first_block_no(sb, group); 1303 group_end_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1); 1304 1305 if (grp_goal < 0) 1306 start_block = group_first_block; 1307 else 1308 start_block = grp_goal + group_first_block; 1309 1310 size = my_rsv->rsv_goal_size; 1311 1312 if (!rsv_is_empty(&my_rsv->rsv_window)) { 1313 /* 1314 * if the old reservation is cross group boundary 1315 * and if the goal is inside the old reservation window, 1316 * we will come here when we just failed to allocate from 1317 * the first part of the window. We still have another part 1318 * that belongs to the next group. In this case, there is no 1319 * point to discard our window and try to allocate a new one 1320 * in this group(which will fail). we should 1321 * keep the reservation window, just simply move on. 1322 * 1323 * Maybe we could shift the start block of the reservation 1324 * window to the first block of next group. 1325 */ 1326 1327 if ((my_rsv->rsv_start <= group_end_block) && 1328 (my_rsv->rsv_end > group_end_block) && 1329 (start_block >= my_rsv->rsv_start)) 1330 return -1; 1331 1332 if ((my_rsv->rsv_alloc_hit > 1333 (my_rsv->rsv_end - my_rsv->rsv_start + 1) / 2)) { 1334 /* 1335 * if the previously allocation hit ratio is 1336 * greater than 1/2, then we double the size of 1337 * the reservation window the next time, 1338 * otherwise we keep the same size window 1339 */ 1340 size = size * 2; 1341 if (size > EXT4_MAX_RESERVE_BLOCKS) 1342 size = EXT4_MAX_RESERVE_BLOCKS; 1343 my_rsv->rsv_goal_size= size; 1344 } 1345 } 1346 1347 spin_lock(rsv_lock); 1348 /* 1349 * shift the search start to the window near the goal block 1350 */ 1351 search_head = search_reserve_window(fs_rsv_root, start_block); 1352 1353 /* 1354 * find_next_reservable_window() simply finds a reservable window 1355 * inside the given range(start_block, group_end_block). 1356 * 1357 * To make sure the reservation window has a free bit inside it, we 1358 * need to check the bitmap after we found a reservable window. 1359 */ 1360 retry: 1361 ret = find_next_reservable_window(search_head, my_rsv, sb, 1362 start_block, group_end_block); 1363 1364 if (ret == -1) { 1365 if (!rsv_is_empty(&my_rsv->rsv_window)) 1366 rsv_window_remove(sb, my_rsv); 1367 spin_unlock(rsv_lock); 1368 return -1; 1369 } 1370 1371 /* 1372 * On success, find_next_reservable_window() returns the 1373 * reservation window where there is a reservable space after it. 1374 * Before we reserve this reservable space, we need 1375 * to make sure there is at least a free block inside this region. 1376 * 1377 * searching the first free bit on the block bitmap and copy of 1378 * last committed bitmap alternatively, until we found a allocatable 1379 * block. Search start from the start block of the reservable space 1380 * we just found. 1381 */ 1382 spin_unlock(rsv_lock); 1383 first_free_block = bitmap_search_next_usable_block( 1384 my_rsv->rsv_start - group_first_block, 1385 bitmap_bh, group_end_block - group_first_block + 1); 1386 1387 if (first_free_block < 0) { 1388 /* 1389 * no free block left on the bitmap, no point 1390 * to reserve the space. return failed. 1391 */ 1392 spin_lock(rsv_lock); 1393 if (!rsv_is_empty(&my_rsv->rsv_window)) 1394 rsv_window_remove(sb, my_rsv); 1395 spin_unlock(rsv_lock); 1396 return -1; /* failed */ 1397 } 1398 1399 start_block = first_free_block + group_first_block; 1400 /* 1401 * check if the first free block is within the 1402 * free space we just reserved 1403 */ 1404 if (start_block >= my_rsv->rsv_start && start_block <= my_rsv->rsv_end) 1405 return 0; /* success */ 1406 /* 1407 * if the first free bit we found is out of the reservable space 1408 * continue search for next reservable space, 1409 * start from where the free block is, 1410 * we also shift the list head to where we stopped last time 1411 */ 1412 search_head = my_rsv; 1413 spin_lock(rsv_lock); 1414 goto retry; 1415 } 1416 1417 /** 1418 * try_to_extend_reservation() 1419 * @my_rsv: given reservation window 1420 * @sb: super block 1421 * @size: the delta to extend 1422 * 1423 * Attempt to expand the reservation window large enough to have 1424 * required number of free blocks 1425 * 1426 * Since ext4_try_to_allocate() will always allocate blocks within 1427 * the reservation window range, if the window size is too small, 1428 * multiple blocks allocation has to stop at the end of the reservation 1429 * window. To make this more efficient, given the total number of 1430 * blocks needed and the current size of the window, we try to 1431 * expand the reservation window size if necessary on a best-effort 1432 * basis before ext4_new_blocks() tries to allocate blocks, 1433 */ 1434 static void try_to_extend_reservation(struct ext4_reserve_window_node *my_rsv, 1435 struct super_block *sb, int size) 1436 { 1437 struct ext4_reserve_window_node *next_rsv; 1438 struct rb_node *next; 1439 spinlock_t *rsv_lock = &EXT4_SB(sb)->s_rsv_window_lock; 1440 1441 if (!spin_trylock(rsv_lock)) 1442 return; 1443 1444 next = rb_next(&my_rsv->rsv_node); 1445 1446 if (!next) 1447 my_rsv->rsv_end += size; 1448 else { 1449 next_rsv = rb_entry(next, struct ext4_reserve_window_node, rsv_node); 1450 1451 if ((next_rsv->rsv_start - my_rsv->rsv_end - 1) >= size) 1452 my_rsv->rsv_end += size; 1453 else 1454 my_rsv->rsv_end = next_rsv->rsv_start - 1; 1455 } 1456 spin_unlock(rsv_lock); 1457 } 1458 1459 /** 1460 * ext4_try_to_allocate_with_rsv() 1461 * @sb: superblock 1462 * @handle: handle to this transaction 1463 * @group: given allocation block group 1464 * @bitmap_bh: bufferhead holds the block bitmap 1465 * @grp_goal: given target block within the group 1466 * @count: target number of blocks to allocate 1467 * @my_rsv: reservation window 1468 * @errp: pointer to store the error code 1469 * 1470 * This is the main function used to allocate a new block and its reservation 1471 * window. 1472 * 1473 * Each time when a new block allocation is need, first try to allocate from 1474 * its own reservation. If it does not have a reservation window, instead of 1475 * looking for a free bit on bitmap first, then look up the reservation list to 1476 * see if it is inside somebody else's reservation window, we try to allocate a 1477 * reservation window for it starting from the goal first. Then do the block 1478 * allocation within the reservation window. 1479 * 1480 * This will avoid keeping on searching the reservation list again and 1481 * again when somebody is looking for a free block (without 1482 * reservation), and there are lots of free blocks, but they are all 1483 * being reserved. 1484 * 1485 * We use a red-black tree for the per-filesystem reservation list. 1486 * 1487 */ 1488 static ext4_grpblk_t 1489 ext4_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle, 1490 ext4_group_t group, struct buffer_head *bitmap_bh, 1491 ext4_grpblk_t grp_goal, 1492 struct ext4_reserve_window_node * my_rsv, 1493 unsigned long *count, int *errp) 1494 { 1495 ext4_fsblk_t group_first_block, group_last_block; 1496 ext4_grpblk_t ret = 0; 1497 int fatal; 1498 unsigned long num = *count; 1499 1500 *errp = 0; 1501 1502 /* 1503 * Make sure we use undo access for the bitmap, because it is critical 1504 * that we do the frozen_data COW on bitmap buffers in all cases even 1505 * if the buffer is in BJ_Forget state in the committing transaction. 1506 */ 1507 BUFFER_TRACE(bitmap_bh, "get undo access for new block"); 1508 fatal = ext4_journal_get_undo_access(handle, bitmap_bh); 1509 if (fatal) { 1510 *errp = fatal; 1511 return -1; 1512 } 1513 1514 /* 1515 * we don't deal with reservation when 1516 * filesystem is mounted without reservation 1517 * or the file is not a regular file 1518 * or last attempt to allocate a block with reservation turned on failed 1519 */ 1520 if (my_rsv == NULL ) { 1521 ret = ext4_try_to_allocate(sb, handle, group, bitmap_bh, 1522 grp_goal, count, NULL); 1523 goto out; 1524 } 1525 /* 1526 * grp_goal is a group relative block number (if there is a goal) 1527 * 0 <= grp_goal < EXT4_BLOCKS_PER_GROUP(sb) 1528 * first block is a filesystem wide block number 1529 * first block is the block number of the first block in this group 1530 */ 1531 group_first_block = ext4_group_first_block_no(sb, group); 1532 group_last_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1); 1533 1534 /* 1535 * Basically we will allocate a new block from inode's reservation 1536 * window. 1537 * 1538 * We need to allocate a new reservation window, if: 1539 * a) inode does not have a reservation window; or 1540 * b) last attempt to allocate a block from existing reservation 1541 * failed; or 1542 * c) we come here with a goal and with a reservation window 1543 * 1544 * We do not need to allocate a new reservation window if we come here 1545 * at the beginning with a goal and the goal is inside the window, or 1546 * we don't have a goal but already have a reservation window. 1547 * then we could go to allocate from the reservation window directly. 1548 */ 1549 while (1) { 1550 if (rsv_is_empty(&my_rsv->rsv_window) || (ret < 0) || 1551 !goal_in_my_reservation(&my_rsv->rsv_window, 1552 grp_goal, group, sb)) { 1553 if (my_rsv->rsv_goal_size < *count) 1554 my_rsv->rsv_goal_size = *count; 1555 ret = alloc_new_reservation(my_rsv, grp_goal, sb, 1556 group, bitmap_bh); 1557 if (ret < 0) 1558 break; /* failed */ 1559 1560 if (!goal_in_my_reservation(&my_rsv->rsv_window, 1561 grp_goal, group, sb)) 1562 grp_goal = -1; 1563 } else if (grp_goal >= 0) { 1564 int curr = my_rsv->rsv_end - 1565 (grp_goal + group_first_block) + 1; 1566 1567 if (curr < *count) 1568 try_to_extend_reservation(my_rsv, sb, 1569 *count - curr); 1570 } 1571 1572 if ((my_rsv->rsv_start > group_last_block) || 1573 (my_rsv->rsv_end < group_first_block)) { 1574 rsv_window_dump(&EXT4_SB(sb)->s_rsv_window_root, 1); 1575 BUG(); 1576 } 1577 ret = ext4_try_to_allocate(sb, handle, group, bitmap_bh, 1578 grp_goal, &num, &my_rsv->rsv_window); 1579 if (ret >= 0) { 1580 my_rsv->rsv_alloc_hit += num; 1581 *count = num; 1582 break; /* succeed */ 1583 } 1584 num = *count; 1585 } 1586 out: 1587 if (ret >= 0) { 1588 BUFFER_TRACE(bitmap_bh, "journal_dirty_metadata for " 1589 "bitmap block"); 1590 fatal = ext4_journal_dirty_metadata(handle, bitmap_bh); 1591 if (fatal) { 1592 *errp = fatal; 1593 return -1; 1594 } 1595 return ret; 1596 } 1597 1598 BUFFER_TRACE(bitmap_bh, "journal_release_buffer"); 1599 ext4_journal_release_buffer(handle, bitmap_bh); 1600 return ret; 1601 } 1602 1603 /** 1604 * ext4_has_free_blocks() 1605 * @sbi: in-core super block structure. 1606 * @nblocks: number of neeed blocks 1607 * 1608 * Check if filesystem has free blocks available for allocation. 1609 * Return the number of blocks avaible for allocation for this request 1610 * On success, return nblocks 1611 */ 1612 ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi, 1613 ext4_fsblk_t nblocks) 1614 { 1615 ext4_fsblk_t free_blocks; 1616 ext4_fsblk_t root_blocks = 0; 1617 1618 free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); 1619 1620 if (!capable(CAP_SYS_RESOURCE) && 1621 sbi->s_resuid != current->fsuid && 1622 (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid))) 1623 root_blocks = ext4_r_blocks_count(sbi->s_es); 1624 #ifdef CONFIG_SMP 1625 if (free_blocks - root_blocks < FBC_BATCH) 1626 free_blocks = 1627 percpu_counter_sum_and_set(&sbi->s_freeblocks_counter); 1628 #endif 1629 if (free_blocks <= root_blocks) 1630 /* we don't have free space */ 1631 return 0; 1632 if (free_blocks - root_blocks < nblocks) 1633 return free_blocks - root_blocks; 1634 return nblocks; 1635 } 1636 1637 1638 /** 1639 * ext4_should_retry_alloc() 1640 * @sb: super block 1641 * @retries number of attemps has been made 1642 * 1643 * ext4_should_retry_alloc() is called when ENOSPC is returned, and if 1644 * it is profitable to retry the operation, this function will wait 1645 * for the current or commiting transaction to complete, and then 1646 * return TRUE. 1647 * 1648 * if the total number of retries exceed three times, return FALSE. 1649 */ 1650 int ext4_should_retry_alloc(struct super_block *sb, int *retries) 1651 { 1652 if (!ext4_has_free_blocks(EXT4_SB(sb), 1) || (*retries)++ > 3) 1653 return 0; 1654 1655 jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id); 1656 1657 return jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal); 1658 } 1659 1660 /** 1661 * ext4_old_new_blocks() -- core block bitmap based block allocation function 1662 * 1663 * @handle: handle to this transaction 1664 * @inode: file inode 1665 * @goal: given target block(filesystem wide) 1666 * @count: target number of blocks to allocate 1667 * @errp: error code 1668 * 1669 * ext4_old_new_blocks uses a goal block to assist allocation and look up 1670 * the block bitmap directly to do block allocation. It tries to 1671 * allocate block(s) from the block group contains the goal block first. If 1672 * that fails, it will try to allocate block(s) from other block groups 1673 * without any specific goal block. 1674 * 1675 * This function is called when -o nomballoc mount option is enabled 1676 * 1677 */ 1678 ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode, 1679 ext4_fsblk_t goal, unsigned long *count, int *errp) 1680 { 1681 struct buffer_head *bitmap_bh = NULL; 1682 struct buffer_head *gdp_bh; 1683 ext4_group_t group_no; 1684 ext4_group_t goal_group; 1685 ext4_grpblk_t grp_target_blk; /* blockgroup relative goal block */ 1686 ext4_grpblk_t grp_alloc_blk; /* blockgroup-relative allocated block*/ 1687 ext4_fsblk_t ret_block; /* filesyetem-wide allocated block */ 1688 ext4_group_t bgi; /* blockgroup iteration index */ 1689 int fatal = 0, err; 1690 int performed_allocation = 0; 1691 ext4_grpblk_t free_blocks; /* number of free blocks in a group */ 1692 struct super_block *sb; 1693 struct ext4_group_desc *gdp; 1694 struct ext4_super_block *es; 1695 struct ext4_sb_info *sbi; 1696 struct ext4_reserve_window_node *my_rsv = NULL; 1697 struct ext4_block_alloc_info *block_i; 1698 unsigned short windowsz = 0; 1699 ext4_group_t ngroups; 1700 unsigned long num = *count; 1701 1702 sb = inode->i_sb; 1703 if (!sb) { 1704 *errp = -ENODEV; 1705 printk("ext4_new_block: nonexistent device"); 1706 return 0; 1707 } 1708 1709 sbi = EXT4_SB(sb); 1710 if (!EXT4_I(inode)->i_delalloc_reserved_flag) { 1711 /* 1712 * With delalloc we already reserved the blocks 1713 */ 1714 *count = ext4_has_free_blocks(sbi, *count); 1715 } 1716 if (*count == 0) { 1717 *errp = -ENOSPC; 1718 return 0; /*return with ENOSPC error */ 1719 } 1720 num = *count; 1721 1722 /* 1723 * Check quota for allocation of this block. 1724 */ 1725 if (DQUOT_ALLOC_BLOCK(inode, num)) { 1726 *errp = -EDQUOT; 1727 return 0; 1728 } 1729 1730 sbi = EXT4_SB(sb); 1731 es = EXT4_SB(sb)->s_es; 1732 ext4_debug("goal=%llu.\n", goal); 1733 /* 1734 * Allocate a block from reservation only when 1735 * filesystem is mounted with reservation(default,-o reservation), and 1736 * it's a regular file, and 1737 * the desired window size is greater than 0 (One could use ioctl 1738 * command EXT4_IOC_SETRSVSZ to set the window size to 0 to turn off 1739 * reservation on that particular file) 1740 */ 1741 block_i = EXT4_I(inode)->i_block_alloc_info; 1742 if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0)) 1743 my_rsv = &block_i->rsv_window_node; 1744 1745 /* 1746 * First, test whether the goal block is free. 1747 */ 1748 if (goal < le32_to_cpu(es->s_first_data_block) || 1749 goal >= ext4_blocks_count(es)) 1750 goal = le32_to_cpu(es->s_first_data_block); 1751 ext4_get_group_no_and_offset(sb, goal, &group_no, &grp_target_blk); 1752 goal_group = group_no; 1753 retry_alloc: 1754 gdp = ext4_get_group_desc(sb, group_no, &gdp_bh); 1755 if (!gdp) 1756 goto io_error; 1757 1758 free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); 1759 /* 1760 * if there is not enough free blocks to make a new resevation 1761 * turn off reservation for this allocation 1762 */ 1763 if (my_rsv && (free_blocks < windowsz) 1764 && (rsv_is_empty(&my_rsv->rsv_window))) 1765 my_rsv = NULL; 1766 1767 if (free_blocks > 0) { 1768 bitmap_bh = ext4_read_block_bitmap(sb, group_no); 1769 if (!bitmap_bh) 1770 goto io_error; 1771 grp_alloc_blk = ext4_try_to_allocate_with_rsv(sb, handle, 1772 group_no, bitmap_bh, grp_target_blk, 1773 my_rsv, &num, &fatal); 1774 if (fatal) 1775 goto out; 1776 if (grp_alloc_blk >= 0) 1777 goto allocated; 1778 } 1779 1780 ngroups = EXT4_SB(sb)->s_groups_count; 1781 smp_rmb(); 1782 1783 /* 1784 * Now search the rest of the groups. We assume that 1785 * group_no and gdp correctly point to the last group visited. 1786 */ 1787 for (bgi = 0; bgi < ngroups; bgi++) { 1788 group_no++; 1789 if (group_no >= ngroups) 1790 group_no = 0; 1791 gdp = ext4_get_group_desc(sb, group_no, &gdp_bh); 1792 if (!gdp) 1793 goto io_error; 1794 free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); 1795 /* 1796 * skip this group if the number of 1797 * free blocks is less than half of the reservation 1798 * window size. 1799 */ 1800 if (free_blocks <= (windowsz/2)) 1801 continue; 1802 1803 brelse(bitmap_bh); 1804 bitmap_bh = ext4_read_block_bitmap(sb, group_no); 1805 if (!bitmap_bh) 1806 goto io_error; 1807 /* 1808 * try to allocate block(s) from this group, without a goal(-1). 1809 */ 1810 grp_alloc_blk = ext4_try_to_allocate_with_rsv(sb, handle, 1811 group_no, bitmap_bh, -1, my_rsv, 1812 &num, &fatal); 1813 if (fatal) 1814 goto out; 1815 if (grp_alloc_blk >= 0) 1816 goto allocated; 1817 } 1818 /* 1819 * We may end up a bogus ealier ENOSPC error due to 1820 * filesystem is "full" of reservations, but 1821 * there maybe indeed free blocks avaliable on disk 1822 * In this case, we just forget about the reservations 1823 * just do block allocation as without reservations. 1824 */ 1825 if (my_rsv) { 1826 my_rsv = NULL; 1827 windowsz = 0; 1828 group_no = goal_group; 1829 goto retry_alloc; 1830 } 1831 /* No space left on the device */ 1832 *errp = -ENOSPC; 1833 goto out; 1834 1835 allocated: 1836 1837 ext4_debug("using block group %lu(%d)\n", 1838 group_no, gdp->bg_free_blocks_count); 1839 1840 BUFFER_TRACE(gdp_bh, "get_write_access"); 1841 fatal = ext4_journal_get_write_access(handle, gdp_bh); 1842 if (fatal) 1843 goto out; 1844 1845 ret_block = grp_alloc_blk + ext4_group_first_block_no(sb, group_no); 1846 1847 if (in_range(ext4_block_bitmap(sb, gdp), ret_block, num) || 1848 in_range(ext4_inode_bitmap(sb, gdp), ret_block, num) || 1849 in_range(ret_block, ext4_inode_table(sb, gdp), 1850 EXT4_SB(sb)->s_itb_per_group) || 1851 in_range(ret_block + num - 1, ext4_inode_table(sb, gdp), 1852 EXT4_SB(sb)->s_itb_per_group)) { 1853 ext4_error(sb, "ext4_new_block", 1854 "Allocating block in system zone - " 1855 "blocks from %llu, length %lu", 1856 ret_block, num); 1857 /* 1858 * claim_block marked the blocks we allocated 1859 * as in use. So we may want to selectively 1860 * mark some of the blocks as free 1861 */ 1862 goto retry_alloc; 1863 } 1864 1865 performed_allocation = 1; 1866 1867 #ifdef CONFIG_JBD2_DEBUG 1868 { 1869 struct buffer_head *debug_bh; 1870 1871 /* Record bitmap buffer state in the newly allocated block */ 1872 debug_bh = sb_find_get_block(sb, ret_block); 1873 if (debug_bh) { 1874 BUFFER_TRACE(debug_bh, "state when allocated"); 1875 BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap state"); 1876 brelse(debug_bh); 1877 } 1878 } 1879 jbd_lock_bh_state(bitmap_bh); 1880 spin_lock(sb_bgl_lock(sbi, group_no)); 1881 if (buffer_jbd(bitmap_bh) && bh2jh(bitmap_bh)->b_committed_data) { 1882 int i; 1883 1884 for (i = 0; i < num; i++) { 1885 if (ext4_test_bit(grp_alloc_blk+i, 1886 bh2jh(bitmap_bh)->b_committed_data)) { 1887 printk("%s: block was unexpectedly set in " 1888 "b_committed_data\n", __func__); 1889 } 1890 } 1891 } 1892 ext4_debug("found bit %d\n", grp_alloc_blk); 1893 spin_unlock(sb_bgl_lock(sbi, group_no)); 1894 jbd_unlock_bh_state(bitmap_bh); 1895 #endif 1896 1897 if (ret_block + num - 1 >= ext4_blocks_count(es)) { 1898 ext4_error(sb, "ext4_new_block", 1899 "block(%llu) >= blocks count(%llu) - " 1900 "block_group = %lu, es == %p ", ret_block, 1901 ext4_blocks_count(es), group_no, es); 1902 goto out; 1903 } 1904 1905 /* 1906 * It is up to the caller to add the new buffer to a journal 1907 * list of some description. We don't know in advance whether 1908 * the caller wants to use it as metadata or data. 1909 */ 1910 spin_lock(sb_bgl_lock(sbi, group_no)); 1911 if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) 1912 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); 1913 le16_add_cpu(&gdp->bg_free_blocks_count, -num); 1914 gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp); 1915 spin_unlock(sb_bgl_lock(sbi, group_no)); 1916 if (!EXT4_I(inode)->i_delalloc_reserved_flag) 1917 percpu_counter_sub(&sbi->s_freeblocks_counter, num); 1918 1919 if (sbi->s_log_groups_per_flex) { 1920 ext4_group_t flex_group = ext4_flex_group(sbi, group_no); 1921 spin_lock(sb_bgl_lock(sbi, flex_group)); 1922 sbi->s_flex_groups[flex_group].free_blocks -= num; 1923 spin_unlock(sb_bgl_lock(sbi, flex_group)); 1924 } 1925 1926 BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor"); 1927 err = ext4_journal_dirty_metadata(handle, gdp_bh); 1928 if (!fatal) 1929 fatal = err; 1930 1931 sb->s_dirt = 1; 1932 if (fatal) 1933 goto out; 1934 1935 *errp = 0; 1936 brelse(bitmap_bh); 1937 DQUOT_FREE_BLOCK(inode, *count-num); 1938 *count = num; 1939 return ret_block; 1940 1941 io_error: 1942 *errp = -EIO; 1943 out: 1944 if (fatal) { 1945 *errp = fatal; 1946 ext4_std_error(sb, fatal); 1947 } 1948 /* 1949 * Undo the block allocation 1950 */ 1951 if (!performed_allocation) 1952 DQUOT_FREE_BLOCK(inode, *count); 1953 brelse(bitmap_bh); 1954 return 0; 1955 } 1956 1957 #define EXT4_META_BLOCK 0x1 1958 1959 static ext4_fsblk_t do_blk_alloc(handle_t *handle, struct inode *inode, 1960 ext4_lblk_t iblock, ext4_fsblk_t goal, 1961 unsigned long *count, int *errp, int flags) 1962 { 1963 struct ext4_allocation_request ar; 1964 ext4_fsblk_t ret; 1965 1966 if (!test_opt(inode->i_sb, MBALLOC)) { 1967 return ext4_old_new_blocks(handle, inode, goal, count, errp); 1968 } 1969 1970 memset(&ar, 0, sizeof(ar)); 1971 /* Fill with neighbour allocated blocks */ 1972 1973 ar.inode = inode; 1974 ar.goal = goal; 1975 ar.len = *count; 1976 ar.logical = iblock; 1977 1978 if (S_ISREG(inode->i_mode) && !(flags & EXT4_META_BLOCK)) 1979 /* enable in-core preallocation for data block allocation */ 1980 ar.flags = EXT4_MB_HINT_DATA; 1981 else 1982 /* disable in-core preallocation for non-regular files */ 1983 ar.flags = 0; 1984 1985 ret = ext4_mb_new_blocks(handle, &ar, errp); 1986 *count = ar.len; 1987 return ret; 1988 } 1989 1990 /* 1991 * ext4_new_meta_blocks() -- allocate block for meta data (indexing) blocks 1992 * 1993 * @handle: handle to this transaction 1994 * @inode: file inode 1995 * @goal: given target block(filesystem wide) 1996 * @count: total number of blocks need 1997 * @errp: error code 1998 * 1999 * Return 1st allocated block numberon success, *count stores total account 2000 * error stores in errp pointer 2001 */ 2002 ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, 2003 ext4_fsblk_t goal, unsigned long *count, int *errp) 2004 { 2005 ext4_fsblk_t ret; 2006 ret = do_blk_alloc(handle, inode, 0, goal, 2007 count, errp, EXT4_META_BLOCK); 2008 /* 2009 * Account for the allocated meta blocks 2010 */ 2011 if (!(*errp)) { 2012 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 2013 EXT4_I(inode)->i_allocated_meta_blocks += *count; 2014 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 2015 } 2016 return ret; 2017 } 2018 2019 /* 2020 * ext4_new_meta_block() -- allocate block for meta data (indexing) blocks 2021 * 2022 * @handle: handle to this transaction 2023 * @inode: file inode 2024 * @goal: given target block(filesystem wide) 2025 * @errp: error code 2026 * 2027 * Return allocated block number on success 2028 */ 2029 ext4_fsblk_t ext4_new_meta_block(handle_t *handle, struct inode *inode, 2030 ext4_fsblk_t goal, int *errp) 2031 { 2032 unsigned long count = 1; 2033 return ext4_new_meta_blocks(handle, inode, goal, &count, errp); 2034 } 2035 2036 /* 2037 * ext4_new_blocks() -- allocate data blocks 2038 * 2039 * @handle: handle to this transaction 2040 * @inode: file inode 2041 * @goal: given target block(filesystem wide) 2042 * @count: total number of blocks need 2043 * @errp: error code 2044 * 2045 * Return 1st allocated block numberon success, *count stores total account 2046 * error stores in errp pointer 2047 */ 2048 2049 ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode, 2050 ext4_lblk_t iblock, ext4_fsblk_t goal, 2051 unsigned long *count, int *errp) 2052 { 2053 return do_blk_alloc(handle, inode, iblock, goal, count, errp, 0); 2054 } 2055 2056 /** 2057 * ext4_count_free_blocks() -- count filesystem free blocks 2058 * @sb: superblock 2059 * 2060 * Adds up the number of free blocks from each block group. 2061 */ 2062 ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) 2063 { 2064 ext4_fsblk_t desc_count; 2065 struct ext4_group_desc *gdp; 2066 ext4_group_t i; 2067 ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; 2068 #ifdef EXT4FS_DEBUG 2069 struct ext4_super_block *es; 2070 ext4_fsblk_t bitmap_count; 2071 unsigned long x; 2072 struct buffer_head *bitmap_bh = NULL; 2073 2074 es = EXT4_SB(sb)->s_es; 2075 desc_count = 0; 2076 bitmap_count = 0; 2077 gdp = NULL; 2078 2079 smp_rmb(); 2080 for (i = 0; i < ngroups; i++) { 2081 gdp = ext4_get_group_desc(sb, i, NULL); 2082 if (!gdp) 2083 continue; 2084 desc_count += le16_to_cpu(gdp->bg_free_blocks_count); 2085 brelse(bitmap_bh); 2086 bitmap_bh = ext4_read_block_bitmap(sb, i); 2087 if (bitmap_bh == NULL) 2088 continue; 2089 2090 x = ext4_count_free(bitmap_bh, sb->s_blocksize); 2091 printk(KERN_DEBUG "group %lu: stored = %d, counted = %lu\n", 2092 i, le16_to_cpu(gdp->bg_free_blocks_count), x); 2093 bitmap_count += x; 2094 } 2095 brelse(bitmap_bh); 2096 printk("ext4_count_free_blocks: stored = %llu" 2097 ", computed = %llu, %llu\n", 2098 ext4_free_blocks_count(es), 2099 desc_count, bitmap_count); 2100 return bitmap_count; 2101 #else 2102 desc_count = 0; 2103 smp_rmb(); 2104 for (i = 0; i < ngroups; i++) { 2105 gdp = ext4_get_group_desc(sb, i, NULL); 2106 if (!gdp) 2107 continue; 2108 desc_count += le16_to_cpu(gdp->bg_free_blocks_count); 2109 } 2110 2111 return desc_count; 2112 #endif 2113 } 2114 2115 static inline int test_root(ext4_group_t a, int b) 2116 { 2117 int num = b; 2118 2119 while (a > num) 2120 num *= b; 2121 return num == a; 2122 } 2123 2124 static int ext4_group_sparse(ext4_group_t group) 2125 { 2126 if (group <= 1) 2127 return 1; 2128 if (!(group & 1)) 2129 return 0; 2130 return (test_root(group, 7) || test_root(group, 5) || 2131 test_root(group, 3)); 2132 } 2133 2134 /** 2135 * ext4_bg_has_super - number of blocks used by the superblock in group 2136 * @sb: superblock for filesystem 2137 * @group: group number to check 2138 * 2139 * Return the number of blocks used by the superblock (primary or backup) 2140 * in this group. Currently this will be only 0 or 1. 2141 */ 2142 int ext4_bg_has_super(struct super_block *sb, ext4_group_t group) 2143 { 2144 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 2145 EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER) && 2146 !ext4_group_sparse(group)) 2147 return 0; 2148 return 1; 2149 } 2150 2151 static unsigned long ext4_bg_num_gdb_meta(struct super_block *sb, 2152 ext4_group_t group) 2153 { 2154 unsigned long metagroup = group / EXT4_DESC_PER_BLOCK(sb); 2155 ext4_group_t first = metagroup * EXT4_DESC_PER_BLOCK(sb); 2156 ext4_group_t last = first + EXT4_DESC_PER_BLOCK(sb) - 1; 2157 2158 if (group == first || group == first + 1 || group == last) 2159 return 1; 2160 return 0; 2161 } 2162 2163 static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb, 2164 ext4_group_t group) 2165 { 2166 return ext4_bg_has_super(sb, group) ? EXT4_SB(sb)->s_gdb_count : 0; 2167 } 2168 2169 /** 2170 * ext4_bg_num_gdb - number of blocks used by the group table in group 2171 * @sb: superblock for filesystem 2172 * @group: group number to check 2173 * 2174 * Return the number of blocks used by the group descriptor table 2175 * (primary or backup) in this group. In the future there may be a 2176 * different number of descriptor blocks in each group. 2177 */ 2178 unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group) 2179 { 2180 unsigned long first_meta_bg = 2181 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_meta_bg); 2182 unsigned long metagroup = group / EXT4_DESC_PER_BLOCK(sb); 2183 2184 if (!EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG) || 2185 metagroup < first_meta_bg) 2186 return ext4_bg_num_gdb_nometa(sb,group); 2187 2188 return ext4_bg_num_gdb_meta(sb,group); 2189 2190 } 2191