1 /* 2 * linux/fs/ext2/balloc.c 3 * 4 * Copyright (C) 1992, 1993, 1994, 1995 5 * Remy Card (card@masi.ibp.fr) 6 * Laboratoire MASI - Institut Blaise Pascal 7 * Universite Pierre et Marie Curie (Paris VI) 8 * 9 * Enhanced block allocation by Stephen Tweedie (sct@redhat.com), 1993 10 * Big-endian to little-endian byte-swapping/bitmaps by 11 * David S. Miller (davem@caip.rutgers.edu), 1995 12 */ 13 14 #include "ext2.h" 15 #include <linux/quotaops.h> 16 #include <linux/sched.h> 17 #include <linux/buffer_head.h> 18 #include <linux/capability.h> 19 20 /* 21 * balloc.c contains the blocks allocation and deallocation routines 22 */ 23 24 /* 25 * The free blocks are managed by bitmaps. A file system contains several 26 * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap 27 * block for inodes, N blocks for the inode table and data blocks. 28 * 29 * The file system contains group descriptors which are located after the 30 * super block. Each descriptor contains the number of the bitmap block and 31 * the free blocks count in the block. The descriptors are loaded in memory 32 * when a file system is mounted (see ext2_fill_super). 33 */ 34 35 36 #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) 37 38 struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb, 39 unsigned int block_group, 40 struct buffer_head ** bh) 41 { 42 unsigned long group_desc; 43 unsigned long offset; 44 struct ext2_group_desc * desc; 45 struct ext2_sb_info *sbi = EXT2_SB(sb); 46 47 if (block_group >= sbi->s_groups_count) { 48 ext2_error (sb, "ext2_get_group_desc", 49 "block_group >= groups_count - " 50 "block_group = %d, groups_count = %lu", 51 block_group, sbi->s_groups_count); 52 53 return NULL; 54 } 55 56 group_desc = block_group >> EXT2_DESC_PER_BLOCK_BITS(sb); 57 offset = block_group & (EXT2_DESC_PER_BLOCK(sb) - 1); 58 if (!sbi->s_group_desc[group_desc]) { 59 ext2_error (sb, "ext2_get_group_desc", 60 "Group descriptor not loaded - " 61 "block_group = %d, group_desc = %lu, desc = %lu", 62 block_group, group_desc, offset); 63 return NULL; 64 } 65 66 desc = (struct ext2_group_desc *) sbi->s_group_desc[group_desc]->b_data; 67 if (bh) 68 *bh = sbi->s_group_desc[group_desc]; 69 return desc + offset; 70 } 71 72 /* 73 * Read the bitmap for a given block_group, reading into the specified 74 * slot in the superblock's bitmap cache. 75 * 76 * Return buffer_head on success or NULL in case of failure. 77 */ 78 static struct buffer_head * 79 read_block_bitmap(struct super_block *sb, unsigned int block_group) 80 { 81 struct ext2_group_desc * desc; 82 struct buffer_head * bh = NULL; 83 84 desc = ext2_get_group_desc (sb, block_group, NULL); 85 if (!desc) 86 goto error_out; 87 bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap)); 88 if (!bh) 89 ext2_error (sb, "read_block_bitmap", 90 "Cannot read block bitmap - " 91 "block_group = %d, block_bitmap = %u", 92 block_group, le32_to_cpu(desc->bg_block_bitmap)); 93 error_out: 94 return bh; 95 } 96 97 /* 98 * Set sb->s_dirt here because the superblock was "logically" altered. We 99 * need to recalculate its free blocks count and flush it out. 100 */ 101 static int reserve_blocks(struct super_block *sb, int count) 102 { 103 struct ext2_sb_info *sbi = EXT2_SB(sb); 104 struct ext2_super_block *es = sbi->s_es; 105 unsigned free_blocks; 106 unsigned root_blocks; 107 108 free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); 109 root_blocks = le32_to_cpu(es->s_r_blocks_count); 110 111 if (free_blocks < count) 112 count = free_blocks; 113 114 if (free_blocks < root_blocks + count && !capable(CAP_SYS_RESOURCE) && 115 sbi->s_resuid != current->fsuid && 116 (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) { 117 /* 118 * We are too close to reserve and we are not privileged. 119 * Can we allocate anything at all? 120 */ 121 if (free_blocks > root_blocks) 122 count = free_blocks - root_blocks; 123 else 124 return 0; 125 } 126 127 percpu_counter_mod(&sbi->s_freeblocks_counter, -count); 128 sb->s_dirt = 1; 129 return count; 130 } 131 132 static void release_blocks(struct super_block *sb, int count) 133 { 134 if (count) { 135 struct ext2_sb_info *sbi = EXT2_SB(sb); 136 137 percpu_counter_mod(&sbi->s_freeblocks_counter, count); 138 sb->s_dirt = 1; 139 } 140 } 141 142 static int group_reserve_blocks(struct ext2_sb_info *sbi, int group_no, 143 struct ext2_group_desc *desc, struct buffer_head *bh, int count) 144 { 145 unsigned free_blocks; 146 147 if (!desc->bg_free_blocks_count) 148 return 0; 149 150 spin_lock(sb_bgl_lock(sbi, group_no)); 151 free_blocks = le16_to_cpu(desc->bg_free_blocks_count); 152 if (free_blocks < count) 153 count = free_blocks; 154 desc->bg_free_blocks_count = cpu_to_le16(free_blocks - count); 155 spin_unlock(sb_bgl_lock(sbi, group_no)); 156 mark_buffer_dirty(bh); 157 return count; 158 } 159 160 static void group_release_blocks(struct super_block *sb, int group_no, 161 struct ext2_group_desc *desc, struct buffer_head *bh, int count) 162 { 163 if (count) { 164 struct ext2_sb_info *sbi = EXT2_SB(sb); 165 unsigned free_blocks; 166 167 spin_lock(sb_bgl_lock(sbi, group_no)); 168 free_blocks = le16_to_cpu(desc->bg_free_blocks_count); 169 desc->bg_free_blocks_count = cpu_to_le16(free_blocks + count); 170 spin_unlock(sb_bgl_lock(sbi, group_no)); 171 sb->s_dirt = 1; 172 mark_buffer_dirty(bh); 173 } 174 } 175 176 /* Free given blocks, update quota and i_blocks field */ 177 void ext2_free_blocks (struct inode * inode, unsigned long block, 178 unsigned long count) 179 { 180 struct buffer_head *bitmap_bh = NULL; 181 struct buffer_head * bh2; 182 unsigned long block_group; 183 unsigned long bit; 184 unsigned long i; 185 unsigned long overflow; 186 struct super_block * sb = inode->i_sb; 187 struct ext2_sb_info * sbi = EXT2_SB(sb); 188 struct ext2_group_desc * desc; 189 struct ext2_super_block * es = sbi->s_es; 190 unsigned freed = 0, group_freed; 191 192 if (block < le32_to_cpu(es->s_first_data_block) || 193 block + count < block || 194 block + count > le32_to_cpu(es->s_blocks_count)) { 195 ext2_error (sb, "ext2_free_blocks", 196 "Freeing blocks not in datazone - " 197 "block = %lu, count = %lu", block, count); 198 goto error_return; 199 } 200 201 ext2_debug ("freeing block(s) %lu-%lu\n", block, block + count - 1); 202 203 do_more: 204 overflow = 0; 205 block_group = (block - le32_to_cpu(es->s_first_data_block)) / 206 EXT2_BLOCKS_PER_GROUP(sb); 207 bit = (block - le32_to_cpu(es->s_first_data_block)) % 208 EXT2_BLOCKS_PER_GROUP(sb); 209 /* 210 * Check to see if we are freeing blocks across a group 211 * boundary. 212 */ 213 if (bit + count > EXT2_BLOCKS_PER_GROUP(sb)) { 214 overflow = bit + count - EXT2_BLOCKS_PER_GROUP(sb); 215 count -= overflow; 216 } 217 brelse(bitmap_bh); 218 bitmap_bh = read_block_bitmap(sb, block_group); 219 if (!bitmap_bh) 220 goto error_return; 221 222 desc = ext2_get_group_desc (sb, block_group, &bh2); 223 if (!desc) 224 goto error_return; 225 226 if (in_range (le32_to_cpu(desc->bg_block_bitmap), block, count) || 227 in_range (le32_to_cpu(desc->bg_inode_bitmap), block, count) || 228 in_range (block, le32_to_cpu(desc->bg_inode_table), 229 sbi->s_itb_per_group) || 230 in_range (block + count - 1, le32_to_cpu(desc->bg_inode_table), 231 sbi->s_itb_per_group)) 232 ext2_error (sb, "ext2_free_blocks", 233 "Freeing blocks in system zones - " 234 "Block = %lu, count = %lu", 235 block, count); 236 237 for (i = 0, group_freed = 0; i < count; i++) { 238 if (!ext2_clear_bit_atomic(sb_bgl_lock(sbi, block_group), 239 bit + i, bitmap_bh->b_data)) { 240 ext2_error(sb, __FUNCTION__, 241 "bit already cleared for block %lu", block + i); 242 } else { 243 group_freed++; 244 } 245 } 246 247 mark_buffer_dirty(bitmap_bh); 248 if (sb->s_flags & MS_SYNCHRONOUS) 249 sync_dirty_buffer(bitmap_bh); 250 251 group_release_blocks(sb, block_group, desc, bh2, group_freed); 252 freed += group_freed; 253 254 if (overflow) { 255 block += count; 256 count = overflow; 257 goto do_more; 258 } 259 error_return: 260 brelse(bitmap_bh); 261 release_blocks(sb, freed); 262 DQUOT_FREE_BLOCK(inode, freed); 263 } 264 265 static int grab_block(spinlock_t *lock, char *map, unsigned size, int goal) 266 { 267 int k; 268 char *p, *r; 269 270 if (!ext2_test_bit(goal, map)) 271 goto got_it; 272 273 repeat: 274 if (goal) { 275 /* 276 * The goal was occupied; search forward for a free 277 * block within the next XX blocks. 278 * 279 * end_goal is more or less random, but it has to be 280 * less than EXT2_BLOCKS_PER_GROUP. Aligning up to the 281 * next 64-bit boundary is simple.. 282 */ 283 k = (goal + 63) & ~63; 284 goal = ext2_find_next_zero_bit(map, k, goal); 285 if (goal < k) 286 goto got_it; 287 /* 288 * Search in the remainder of the current group. 289 */ 290 } 291 292 p = map + (goal >> 3); 293 r = memscan(p, 0, (size - goal + 7) >> 3); 294 k = (r - map) << 3; 295 if (k < size) { 296 /* 297 * We have succeeded in finding a free byte in the block 298 * bitmap. Now search backwards to find the start of this 299 * group of free blocks - won't take more than 7 iterations. 300 */ 301 for (goal = k; goal && !ext2_test_bit (goal - 1, map); goal--) 302 ; 303 goto got_it; 304 } 305 306 k = ext2_find_next_zero_bit ((u32 *)map, size, goal); 307 if (k < size) { 308 goal = k; 309 goto got_it; 310 } 311 return -1; 312 got_it: 313 if (ext2_set_bit_atomic(lock, goal, (void *) map)) 314 goto repeat; 315 return goal; 316 } 317 318 /* 319 * ext2_new_block uses a goal block to assist allocation. If the goal is 320 * free, or there is a free block within 32 blocks of the goal, that block 321 * is allocated. Otherwise a forward search is made for a free block; within 322 * each block group the search first looks for an entire free byte in the block 323 * bitmap, and then for any free bit if that fails. 324 * This function also updates quota and i_blocks field. 325 */ 326 int ext2_new_block(struct inode *inode, unsigned long goal, 327 u32 *prealloc_count, u32 *prealloc_block, int *err) 328 { 329 struct buffer_head *bitmap_bh = NULL; 330 struct buffer_head *gdp_bh; /* bh2 */ 331 struct ext2_group_desc *desc; 332 int group_no; /* i */ 333 int ret_block; /* j */ 334 int group_idx; /* k */ 335 int target_block; /* tmp */ 336 int block = 0; 337 struct super_block *sb = inode->i_sb; 338 struct ext2_sb_info *sbi = EXT2_SB(sb); 339 struct ext2_super_block *es = sbi->s_es; 340 unsigned group_size = EXT2_BLOCKS_PER_GROUP(sb); 341 unsigned prealloc_goal = es->s_prealloc_blocks; 342 unsigned group_alloc = 0, es_alloc, dq_alloc; 343 int nr_scanned_groups; 344 345 if (!prealloc_goal--) 346 prealloc_goal = EXT2_DEFAULT_PREALLOC_BLOCKS - 1; 347 if (!prealloc_count || *prealloc_count) 348 prealloc_goal = 0; 349 350 if (DQUOT_ALLOC_BLOCK(inode, 1)) { 351 *err = -EDQUOT; 352 goto out; 353 } 354 355 while (prealloc_goal && DQUOT_PREALLOC_BLOCK(inode, prealloc_goal)) 356 prealloc_goal--; 357 358 dq_alloc = prealloc_goal + 1; 359 es_alloc = reserve_blocks(sb, dq_alloc); 360 if (!es_alloc) { 361 *err = -ENOSPC; 362 goto out_dquot; 363 } 364 365 ext2_debug ("goal=%lu.\n", goal); 366 367 if (goal < le32_to_cpu(es->s_first_data_block) || 368 goal >= le32_to_cpu(es->s_blocks_count)) 369 goal = le32_to_cpu(es->s_first_data_block); 370 group_no = (goal - le32_to_cpu(es->s_first_data_block)) / group_size; 371 desc = ext2_get_group_desc (sb, group_no, &gdp_bh); 372 if (!desc) { 373 /* 374 * gdp_bh may still be uninitialised. But group_release_blocks 375 * will not touch it because group_alloc is zero. 376 */ 377 goto io_error; 378 } 379 380 group_alloc = group_reserve_blocks(sbi, group_no, desc, 381 gdp_bh, es_alloc); 382 if (group_alloc) { 383 ret_block = ((goal - le32_to_cpu(es->s_first_data_block)) % 384 group_size); 385 brelse(bitmap_bh); 386 bitmap_bh = read_block_bitmap(sb, group_no); 387 if (!bitmap_bh) 388 goto io_error; 389 390 ext2_debug("goal is at %d:%d.\n", group_no, ret_block); 391 392 ret_block = grab_block(sb_bgl_lock(sbi, group_no), 393 bitmap_bh->b_data, group_size, ret_block); 394 if (ret_block >= 0) 395 goto got_block; 396 group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc); 397 group_alloc = 0; 398 } 399 400 ext2_debug ("Bit not found in block group %d.\n", group_no); 401 402 /* 403 * Now search the rest of the groups. We assume that 404 * i and desc correctly point to the last group visited. 405 */ 406 nr_scanned_groups = 0; 407 retry: 408 for (group_idx = 0; !group_alloc && 409 group_idx < sbi->s_groups_count; group_idx++) { 410 group_no++; 411 if (group_no >= sbi->s_groups_count) 412 group_no = 0; 413 desc = ext2_get_group_desc(sb, group_no, &gdp_bh); 414 if (!desc) 415 goto io_error; 416 group_alloc = group_reserve_blocks(sbi, group_no, desc, 417 gdp_bh, es_alloc); 418 } 419 if (!group_alloc) { 420 *err = -ENOSPC; 421 goto out_release; 422 } 423 brelse(bitmap_bh); 424 bitmap_bh = read_block_bitmap(sb, group_no); 425 if (!bitmap_bh) 426 goto io_error; 427 428 ret_block = grab_block(sb_bgl_lock(sbi, group_no), bitmap_bh->b_data, 429 group_size, 0); 430 if (ret_block < 0) { 431 /* 432 * If a free block counter is corrupted we can loop inifintely. 433 * Detect that here. 434 */ 435 nr_scanned_groups++; 436 if (nr_scanned_groups > 2 * sbi->s_groups_count) { 437 ext2_error(sb, "ext2_new_block", 438 "corrupted free blocks counters"); 439 goto io_error; 440 } 441 /* 442 * Someone else grabbed the last free block in this blockgroup 443 * before us. Retry the scan. 444 */ 445 group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc); 446 group_alloc = 0; 447 goto retry; 448 } 449 450 got_block: 451 ext2_debug("using block group %d(%d)\n", 452 group_no, desc->bg_free_blocks_count); 453 454 target_block = ret_block + group_no * group_size + 455 le32_to_cpu(es->s_first_data_block); 456 457 if (target_block == le32_to_cpu(desc->bg_block_bitmap) || 458 target_block == le32_to_cpu(desc->bg_inode_bitmap) || 459 in_range(target_block, le32_to_cpu(desc->bg_inode_table), 460 sbi->s_itb_per_group)) 461 ext2_error (sb, "ext2_new_block", 462 "Allocating block in system zone - " 463 "block = %u", target_block); 464 465 if (target_block >= le32_to_cpu(es->s_blocks_count)) { 466 ext2_error (sb, "ext2_new_block", 467 "block(%d) >= blocks count(%d) - " 468 "block_group = %d, es == %p ", ret_block, 469 le32_to_cpu(es->s_blocks_count), group_no, es); 470 goto io_error; 471 } 472 block = target_block; 473 474 /* OK, we _had_ allocated something */ 475 ext2_debug("found bit %d\n", ret_block); 476 477 dq_alloc--; 478 es_alloc--; 479 group_alloc--; 480 481 /* 482 * Do block preallocation now if required. 483 */ 484 write_lock(&EXT2_I(inode)->i_meta_lock); 485 if (group_alloc && !*prealloc_count) { 486 unsigned n; 487 488 for (n = 0; n < group_alloc && ++ret_block < group_size; n++) { 489 if (ext2_set_bit_atomic(sb_bgl_lock(sbi, group_no), 490 ret_block, 491 (void*) bitmap_bh->b_data)) 492 break; 493 } 494 *prealloc_block = block + 1; 495 *prealloc_count = n; 496 es_alloc -= n; 497 dq_alloc -= n; 498 group_alloc -= n; 499 } 500 write_unlock(&EXT2_I(inode)->i_meta_lock); 501 502 mark_buffer_dirty(bitmap_bh); 503 if (sb->s_flags & MS_SYNCHRONOUS) 504 sync_dirty_buffer(bitmap_bh); 505 506 ext2_debug ("allocating block %d. ", block); 507 508 *err = 0; 509 out_release: 510 group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc); 511 release_blocks(sb, es_alloc); 512 out_dquot: 513 DQUOT_FREE_BLOCK(inode, dq_alloc); 514 out: 515 brelse(bitmap_bh); 516 return block; 517 518 io_error: 519 *err = -EIO; 520 goto out_release; 521 } 522 523 #ifdef EXT2FS_DEBUG 524 525 static int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0}; 526 527 unsigned long ext2_count_free (struct buffer_head * map, unsigned int numchars) 528 { 529 unsigned int i; 530 unsigned long sum = 0; 531 532 if (!map) 533 return (0); 534 for (i = 0; i < numchars; i++) 535 sum += nibblemap[map->b_data[i] & 0xf] + 536 nibblemap[(map->b_data[i] >> 4) & 0xf]; 537 return (sum); 538 } 539 540 #endif /* EXT2FS_DEBUG */ 541 542 unsigned long ext2_count_free_blocks (struct super_block * sb) 543 { 544 struct ext2_group_desc * desc; 545 unsigned long desc_count = 0; 546 int i; 547 #ifdef EXT2FS_DEBUG 548 unsigned long bitmap_count, x; 549 struct ext2_super_block *es; 550 551 es = EXT2_SB(sb)->s_es; 552 desc_count = 0; 553 bitmap_count = 0; 554 desc = NULL; 555 for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) { 556 struct buffer_head *bitmap_bh; 557 desc = ext2_get_group_desc (sb, i, NULL); 558 if (!desc) 559 continue; 560 desc_count += le16_to_cpu(desc->bg_free_blocks_count); 561 bitmap_bh = read_block_bitmap(sb, i); 562 if (!bitmap_bh) 563 continue; 564 565 x = ext2_count_free(bitmap_bh, sb->s_blocksize); 566 printk ("group %d: stored = %d, counted = %lu\n", 567 i, le16_to_cpu(desc->bg_free_blocks_count), x); 568 bitmap_count += x; 569 brelse(bitmap_bh); 570 } 571 printk("ext2_count_free_blocks: stored = %lu, computed = %lu, %lu\n", 572 (long)le32_to_cpu(es->s_free_blocks_count), 573 desc_count, bitmap_count); 574 return bitmap_count; 575 #else 576 for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) { 577 desc = ext2_get_group_desc (sb, i, NULL); 578 if (!desc) 579 continue; 580 desc_count += le16_to_cpu(desc->bg_free_blocks_count); 581 } 582 return desc_count; 583 #endif 584 } 585 586 static inline int 587 block_in_use(unsigned long block, struct super_block *sb, unsigned char *map) 588 { 589 return ext2_test_bit ((block - 590 le32_to_cpu(EXT2_SB(sb)->s_es->s_first_data_block)) % 591 EXT2_BLOCKS_PER_GROUP(sb), map); 592 } 593 594 static inline int test_root(int a, int b) 595 { 596 int num = b; 597 598 while (a > num) 599 num *= b; 600 return num == a; 601 } 602 603 static int ext2_group_sparse(int group) 604 { 605 if (group <= 1) 606 return 1; 607 return (test_root(group, 3) || test_root(group, 5) || 608 test_root(group, 7)); 609 } 610 611 /** 612 * ext2_bg_has_super - number of blocks used by the superblock in group 613 * @sb: superblock for filesystem 614 * @group: group number to check 615 * 616 * Return the number of blocks used by the superblock (primary or backup) 617 * in this group. Currently this will be only 0 or 1. 618 */ 619 int ext2_bg_has_super(struct super_block *sb, int group) 620 { 621 if (EXT2_HAS_RO_COMPAT_FEATURE(sb,EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER)&& 622 !ext2_group_sparse(group)) 623 return 0; 624 return 1; 625 } 626 627 /** 628 * ext2_bg_num_gdb - number of blocks used by the group table in group 629 * @sb: superblock for filesystem 630 * @group: group number to check 631 * 632 * Return the number of blocks used by the group descriptor table 633 * (primary or backup) in this group. In the future there may be a 634 * different number of descriptor blocks in each group. 635 */ 636 unsigned long ext2_bg_num_gdb(struct super_block *sb, int group) 637 { 638 if (EXT2_HAS_RO_COMPAT_FEATURE(sb,EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER)&& 639 !ext2_group_sparse(group)) 640 return 0; 641 return EXT2_SB(sb)->s_gdb_count; 642 } 643 644