1 /* 2 * linux/fs/ext4/resize.c 3 * 4 * Support for resizing an ext4 filesystem while it is mounted. 5 * 6 * Copyright (C) 2001, 2002 Andreas Dilger <adilger@clusterfs.com> 7 * 8 * This could probably be made into a module, because it is not often in use. 9 */ 10 11 12 #define EXT4FS_DEBUG 13 14 #include <linux/errno.h> 15 #include <linux/slab.h> 16 17 #include "ext4_jbd2.h" 18 19 int ext4_resize_begin(struct super_block *sb) 20 { 21 int ret = 0; 22 23 if (!capable(CAP_SYS_RESOURCE)) 24 return -EPERM; 25 26 if (test_and_set_bit_lock(EXT4_RESIZING, &EXT4_SB(sb)->s_resize_flags)) 27 ret = -EBUSY; 28 29 return ret; 30 } 31 32 void ext4_resize_end(struct super_block *sb) 33 { 34 clear_bit_unlock(EXT4_RESIZING, &EXT4_SB(sb)->s_resize_flags); 35 smp_mb__after_clear_bit(); 36 } 37 38 #define outside(b, first, last) ((b) < (first) || (b) >= (last)) 39 #define inside(b, first, last) ((b) >= (first) && (b) < (last)) 40 41 static int verify_group_input(struct super_block *sb, 42 struct ext4_new_group_data *input) 43 { 44 struct ext4_sb_info *sbi = EXT4_SB(sb); 45 struct ext4_super_block *es = sbi->s_es; 46 ext4_fsblk_t start = ext4_blocks_count(es); 47 ext4_fsblk_t end = start + input->blocks_count; 48 ext4_group_t group = input->group; 49 ext4_fsblk_t itend = input->inode_table + sbi->s_itb_per_group; 50 unsigned overhead = ext4_bg_has_super(sb, group) ? 51 (1 + ext4_bg_num_gdb(sb, group) + 52 le16_to_cpu(es->s_reserved_gdt_blocks)) : 0; 53 ext4_fsblk_t metaend = start + overhead; 54 struct buffer_head *bh = NULL; 55 ext4_grpblk_t free_blocks_count, offset; 56 int err = -EINVAL; 57 58 input->free_blocks_count = free_blocks_count = 59 input->blocks_count - 2 - overhead - sbi->s_itb_per_group; 60 61 if (test_opt(sb, DEBUG)) 62 printk(KERN_DEBUG "EXT4-fs: adding %s group %u: %u blocks " 63 "(%d free, %u reserved)\n", 64 ext4_bg_has_super(sb, input->group) ? "normal" : 65 "no-super", input->group, input->blocks_count, 66 free_blocks_count, input->reserved_blocks); 67 68 ext4_get_group_no_and_offset(sb, start, NULL, &offset); 69 if (group != sbi->s_groups_count) 70 ext4_warning(sb, "Cannot add at group %u (only %u groups)", 71 input->group, sbi->s_groups_count); 72 else if (offset != 0) 73 ext4_warning(sb, "Last group not full"); 74 else if (input->reserved_blocks > input->blocks_count / 5) 75 ext4_warning(sb, "Reserved blocks too high (%u)", 76 input->reserved_blocks); 77 else if (free_blocks_count < 0) 78 ext4_warning(sb, "Bad blocks count %u", 79 input->blocks_count); 80 else if (!(bh = sb_bread(sb, end - 1))) 81 ext4_warning(sb, "Cannot read last block (%llu)", 82 end - 1); 83 else if (outside(input->block_bitmap, start, end)) 84 ext4_warning(sb, "Block bitmap not in group (block %llu)", 85 (unsigned long long)input->block_bitmap); 86 else if (outside(input->inode_bitmap, start, end)) 87 ext4_warning(sb, "Inode bitmap not in group (block %llu)", 88 (unsigned long long)input->inode_bitmap); 89 else if (outside(input->inode_table, start, end) || 90 outside(itend - 1, start, end)) 91 ext4_warning(sb, "Inode table not in group (blocks %llu-%llu)", 92 (unsigned long long)input->inode_table, itend - 1); 93 else if (input->inode_bitmap == input->block_bitmap) 94 ext4_warning(sb, "Block bitmap same as inode bitmap (%llu)", 95 (unsigned long long)input->block_bitmap); 96 else if (inside(input->block_bitmap, input->inode_table, itend)) 97 ext4_warning(sb, "Block bitmap (%llu) in inode table " 98 "(%llu-%llu)", 99 (unsigned long long)input->block_bitmap, 100 (unsigned long long)input->inode_table, itend - 1); 101 else if (inside(input->inode_bitmap, input->inode_table, itend)) 102 ext4_warning(sb, "Inode bitmap (%llu) in inode table " 103 "(%llu-%llu)", 104 (unsigned long long)input->inode_bitmap, 105 (unsigned long long)input->inode_table, itend - 1); 106 else if (inside(input->block_bitmap, start, metaend)) 107 ext4_warning(sb, "Block bitmap (%llu) in GDT table (%llu-%llu)", 108 (unsigned long long)input->block_bitmap, 109 start, metaend - 1); 110 else if (inside(input->inode_bitmap, start, metaend)) 111 ext4_warning(sb, "Inode bitmap (%llu) in GDT table (%llu-%llu)", 112 (unsigned long long)input->inode_bitmap, 113 start, metaend - 1); 114 else if (inside(input->inode_table, start, metaend) || 115 inside(itend - 1, start, metaend)) 116 ext4_warning(sb, "Inode table (%llu-%llu) overlaps GDT table " 117 "(%llu-%llu)", 118 (unsigned long long)input->inode_table, 119 itend - 1, start, metaend - 1); 120 else 121 err = 0; 122 brelse(bh); 123 124 return err; 125 } 126 127 static struct buffer_head *bclean(handle_t *handle, struct super_block *sb, 128 ext4_fsblk_t blk) 129 { 130 struct buffer_head *bh; 131 int err; 132 133 bh = sb_getblk(sb, blk); 134 if (!bh) 135 return ERR_PTR(-EIO); 136 if ((err = ext4_journal_get_write_access(handle, bh))) { 137 brelse(bh); 138 bh = ERR_PTR(err); 139 } else { 140 lock_buffer(bh); 141 memset(bh->b_data, 0, sb->s_blocksize); 142 set_buffer_uptodate(bh); 143 unlock_buffer(bh); 144 } 145 146 return bh; 147 } 148 149 /* 150 * If we have fewer than thresh credits, extend by EXT4_MAX_TRANS_DATA. 151 * If that fails, restart the transaction & regain write access for the 152 * buffer head which is used for block_bitmap modifications. 153 */ 154 static int extend_or_restart_transaction(handle_t *handle, int thresh, 155 struct buffer_head *bh) 156 { 157 int err; 158 159 if (ext4_handle_has_enough_credits(handle, thresh)) 160 return 0; 161 162 err = ext4_journal_extend(handle, EXT4_MAX_TRANS_DATA); 163 if (err < 0) 164 return err; 165 if (err) { 166 if ((err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA))) 167 return err; 168 if ((err = ext4_journal_get_write_access(handle, bh))) 169 return err; 170 } 171 172 return 0; 173 } 174 175 /* 176 * Set up the block and inode bitmaps, and the inode table for the new group. 177 * This doesn't need to be part of the main transaction, since we are only 178 * changing blocks outside the actual filesystem. We still do journaling to 179 * ensure the recovery is correct in case of a failure just after resize. 180 * If any part of this fails, we simply abort the resize. 181 */ 182 static int setup_new_group_blocks(struct super_block *sb, 183 struct ext4_new_group_data *input) 184 { 185 struct ext4_sb_info *sbi = EXT4_SB(sb); 186 ext4_fsblk_t start = ext4_group_first_block_no(sb, input->group); 187 int reserved_gdb = ext4_bg_has_super(sb, input->group) ? 188 le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0; 189 unsigned long gdblocks = ext4_bg_num_gdb(sb, input->group); 190 struct buffer_head *bh; 191 handle_t *handle; 192 ext4_fsblk_t block; 193 ext4_grpblk_t bit; 194 int i; 195 int err = 0, err2; 196 197 /* This transaction may be extended/restarted along the way */ 198 handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA); 199 200 if (IS_ERR(handle)) 201 return PTR_ERR(handle); 202 203 BUG_ON(input->group != sbi->s_groups_count); 204 205 if (IS_ERR(bh = bclean(handle, sb, input->block_bitmap))) { 206 err = PTR_ERR(bh); 207 goto exit_journal; 208 } 209 210 if (ext4_bg_has_super(sb, input->group)) { 211 ext4_debug("mark backup superblock %#04llx (+0)\n", start); 212 ext4_set_bit(0, bh->b_data); 213 } 214 215 /* Copy all of the GDT blocks into the backup in this group */ 216 for (i = 0, bit = 1, block = start + 1; 217 i < gdblocks; i++, block++, bit++) { 218 struct buffer_head *gdb; 219 220 ext4_debug("update backup group %#04llx (+%d)\n", block, bit); 221 222 if ((err = extend_or_restart_transaction(handle, 1, bh))) 223 goto exit_bh; 224 225 gdb = sb_getblk(sb, block); 226 if (!gdb) { 227 err = -EIO; 228 goto exit_bh; 229 } 230 if ((err = ext4_journal_get_write_access(handle, gdb))) { 231 brelse(gdb); 232 goto exit_bh; 233 } 234 lock_buffer(gdb); 235 memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size); 236 set_buffer_uptodate(gdb); 237 unlock_buffer(gdb); 238 err = ext4_handle_dirty_metadata(handle, NULL, gdb); 239 if (unlikely(err)) { 240 brelse(gdb); 241 goto exit_bh; 242 } 243 ext4_set_bit(bit, bh->b_data); 244 brelse(gdb); 245 } 246 247 /* Zero out all of the reserved backup group descriptor table blocks */ 248 ext4_debug("clear inode table blocks %#04llx -> %#04lx\n", 249 block, sbi->s_itb_per_group); 250 err = sb_issue_zeroout(sb, gdblocks + start + 1, reserved_gdb, 251 GFP_NOFS); 252 if (err) 253 goto exit_bh; 254 for (i = 0, bit = gdblocks + 1; i < reserved_gdb; i++, bit++) 255 ext4_set_bit(bit, bh->b_data); 256 257 ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap, 258 input->block_bitmap - start); 259 ext4_set_bit(input->block_bitmap - start, bh->b_data); 260 ext4_debug("mark inode bitmap %#04llx (+%llu)\n", input->inode_bitmap, 261 input->inode_bitmap - start); 262 ext4_set_bit(input->inode_bitmap - start, bh->b_data); 263 264 /* Zero out all of the inode table blocks */ 265 block = input->inode_table; 266 ext4_debug("clear inode table blocks %#04llx -> %#04lx\n", 267 block, sbi->s_itb_per_group); 268 err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS); 269 if (err) 270 goto exit_bh; 271 for (i = 0, bit = input->inode_table - start; 272 i < sbi->s_itb_per_group; i++, bit++) 273 ext4_set_bit(bit, bh->b_data); 274 275 if ((err = extend_or_restart_transaction(handle, 2, bh))) 276 goto exit_bh; 277 278 ext4_mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8, 279 bh->b_data); 280 err = ext4_handle_dirty_metadata(handle, NULL, bh); 281 if (unlikely(err)) { 282 ext4_std_error(sb, err); 283 goto exit_bh; 284 } 285 brelse(bh); 286 /* Mark unused entries in inode bitmap used */ 287 ext4_debug("clear inode bitmap %#04llx (+%llu)\n", 288 input->inode_bitmap, input->inode_bitmap - start); 289 if (IS_ERR(bh = bclean(handle, sb, input->inode_bitmap))) { 290 err = PTR_ERR(bh); 291 goto exit_journal; 292 } 293 294 ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, 295 bh->b_data); 296 err = ext4_handle_dirty_metadata(handle, NULL, bh); 297 if (unlikely(err)) 298 ext4_std_error(sb, err); 299 exit_bh: 300 brelse(bh); 301 302 exit_journal: 303 if ((err2 = ext4_journal_stop(handle)) && !err) 304 err = err2; 305 306 return err; 307 } 308 309 /* 310 * Iterate through the groups which hold BACKUP superblock/GDT copies in an 311 * ext4 filesystem. The counters should be initialized to 1, 5, and 7 before 312 * calling this for the first time. In a sparse filesystem it will be the 313 * sequence of powers of 3, 5, and 7: 1, 3, 5, 7, 9, 25, 27, 49, 81, ... 314 * For a non-sparse filesystem it will be every group: 1, 2, 3, 4, ... 315 */ 316 static unsigned ext4_list_backups(struct super_block *sb, unsigned *three, 317 unsigned *five, unsigned *seven) 318 { 319 unsigned *min = three; 320 int mult = 3; 321 unsigned ret; 322 323 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, 324 EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER)) { 325 ret = *min; 326 *min += 1; 327 return ret; 328 } 329 330 if (*five < *min) { 331 min = five; 332 mult = 5; 333 } 334 if (*seven < *min) { 335 min = seven; 336 mult = 7; 337 } 338 339 ret = *min; 340 *min *= mult; 341 342 return ret; 343 } 344 345 /* 346 * Check that all of the backup GDT blocks are held in the primary GDT block. 347 * It is assumed that they are stored in group order. Returns the number of 348 * groups in current filesystem that have BACKUPS, or -ve error code. 349 */ 350 static int verify_reserved_gdb(struct super_block *sb, 351 struct buffer_head *primary) 352 { 353 const ext4_fsblk_t blk = primary->b_blocknr; 354 const ext4_group_t end = EXT4_SB(sb)->s_groups_count; 355 unsigned three = 1; 356 unsigned five = 5; 357 unsigned seven = 7; 358 unsigned grp; 359 __le32 *p = (__le32 *)primary->b_data; 360 int gdbackups = 0; 361 362 while ((grp = ext4_list_backups(sb, &three, &five, &seven)) < end) { 363 if (le32_to_cpu(*p++) != 364 grp * EXT4_BLOCKS_PER_GROUP(sb) + blk){ 365 ext4_warning(sb, "reserved GDT %llu" 366 " missing grp %d (%llu)", 367 blk, grp, 368 grp * 369 (ext4_fsblk_t)EXT4_BLOCKS_PER_GROUP(sb) + 370 blk); 371 return -EINVAL; 372 } 373 if (++gdbackups > EXT4_ADDR_PER_BLOCK(sb)) 374 return -EFBIG; 375 } 376 377 return gdbackups; 378 } 379 380 /* 381 * Called when we need to bring a reserved group descriptor table block into 382 * use from the resize inode. The primary copy of the new GDT block currently 383 * is an indirect block (under the double indirect block in the resize inode). 384 * The new backup GDT blocks will be stored as leaf blocks in this indirect 385 * block, in group order. Even though we know all the block numbers we need, 386 * we check to ensure that the resize inode has actually reserved these blocks. 387 * 388 * Don't need to update the block bitmaps because the blocks are still in use. 389 * 390 * We get all of the error cases out of the way, so that we are sure to not 391 * fail once we start modifying the data on disk, because JBD has no rollback. 392 */ 393 static int add_new_gdb(handle_t *handle, struct inode *inode, 394 struct ext4_new_group_data *input, 395 struct buffer_head **primary) 396 { 397 struct super_block *sb = inode->i_sb; 398 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 399 unsigned long gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb); 400 ext4_fsblk_t gdblock = EXT4_SB(sb)->s_sbh->b_blocknr + 1 + gdb_num; 401 struct buffer_head **o_group_desc, **n_group_desc; 402 struct buffer_head *dind; 403 int gdbackups; 404 struct ext4_iloc iloc; 405 __le32 *data; 406 int err; 407 408 if (test_opt(sb, DEBUG)) 409 printk(KERN_DEBUG 410 "EXT4-fs: ext4_add_new_gdb: adding group block %lu\n", 411 gdb_num); 412 413 /* 414 * If we are not using the primary superblock/GDT copy don't resize, 415 * because the user tools have no way of handling this. Probably a 416 * bad time to do it anyways. 417 */ 418 if (EXT4_SB(sb)->s_sbh->b_blocknr != 419 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) { 420 ext4_warning(sb, "won't resize using backup superblock at %llu", 421 (unsigned long long)EXT4_SB(sb)->s_sbh->b_blocknr); 422 return -EPERM; 423 } 424 425 *primary = sb_bread(sb, gdblock); 426 if (!*primary) 427 return -EIO; 428 429 if ((gdbackups = verify_reserved_gdb(sb, *primary)) < 0) { 430 err = gdbackups; 431 goto exit_bh; 432 } 433 434 data = EXT4_I(inode)->i_data + EXT4_DIND_BLOCK; 435 dind = sb_bread(sb, le32_to_cpu(*data)); 436 if (!dind) { 437 err = -EIO; 438 goto exit_bh; 439 } 440 441 data = (__le32 *)dind->b_data; 442 if (le32_to_cpu(data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)]) != gdblock) { 443 ext4_warning(sb, "new group %u GDT block %llu not reserved", 444 input->group, gdblock); 445 err = -EINVAL; 446 goto exit_dind; 447 } 448 449 err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); 450 if (unlikely(err)) 451 goto exit_dind; 452 453 err = ext4_journal_get_write_access(handle, *primary); 454 if (unlikely(err)) 455 goto exit_sbh; 456 457 err = ext4_journal_get_write_access(handle, dind); 458 if (unlikely(err)) 459 ext4_std_error(sb, err); 460 461 /* ext4_reserve_inode_write() gets a reference on the iloc */ 462 err = ext4_reserve_inode_write(handle, inode, &iloc); 463 if (unlikely(err)) 464 goto exit_dindj; 465 466 n_group_desc = kmalloc((gdb_num + 1) * sizeof(struct buffer_head *), 467 GFP_NOFS); 468 if (!n_group_desc) { 469 err = -ENOMEM; 470 ext4_warning(sb, 471 "not enough memory for %lu groups", gdb_num + 1); 472 goto exit_inode; 473 } 474 475 /* 476 * Finally, we have all of the possible failures behind us... 477 * 478 * Remove new GDT block from inode double-indirect block and clear out 479 * the new GDT block for use (which also "frees" the backup GDT blocks 480 * from the reserved inode). We don't need to change the bitmaps for 481 * these blocks, because they are marked as in-use from being in the 482 * reserved inode, and will become GDT blocks (primary and backup). 483 */ 484 data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)] = 0; 485 err = ext4_handle_dirty_metadata(handle, NULL, dind); 486 if (unlikely(err)) { 487 ext4_std_error(sb, err); 488 goto exit_inode; 489 } 490 inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9; 491 ext4_mark_iloc_dirty(handle, inode, &iloc); 492 memset((*primary)->b_data, 0, sb->s_blocksize); 493 err = ext4_handle_dirty_metadata(handle, NULL, *primary); 494 if (unlikely(err)) { 495 ext4_std_error(sb, err); 496 goto exit_inode; 497 } 498 brelse(dind); 499 500 o_group_desc = EXT4_SB(sb)->s_group_desc; 501 memcpy(n_group_desc, o_group_desc, 502 EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *)); 503 n_group_desc[gdb_num] = *primary; 504 EXT4_SB(sb)->s_group_desc = n_group_desc; 505 EXT4_SB(sb)->s_gdb_count++; 506 kfree(o_group_desc); 507 508 le16_add_cpu(&es->s_reserved_gdt_blocks, -1); 509 err = ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); 510 if (err) 511 ext4_std_error(sb, err); 512 513 return err; 514 515 exit_inode: 516 /* ext4_handle_release_buffer(handle, iloc.bh); */ 517 brelse(iloc.bh); 518 exit_dindj: 519 /* ext4_handle_release_buffer(handle, dind); */ 520 exit_sbh: 521 /* ext4_handle_release_buffer(handle, EXT4_SB(sb)->s_sbh); */ 522 exit_dind: 523 brelse(dind); 524 exit_bh: 525 brelse(*primary); 526 527 ext4_debug("leaving with error %d\n", err); 528 return err; 529 } 530 531 /* 532 * Called when we are adding a new group which has a backup copy of each of 533 * the GDT blocks (i.e. sparse group) and there are reserved GDT blocks. 534 * We need to add these reserved backup GDT blocks to the resize inode, so 535 * that they are kept for future resizing and not allocated to files. 536 * 537 * Each reserved backup GDT block will go into a different indirect block. 538 * The indirect blocks are actually the primary reserved GDT blocks, 539 * so we know in advance what their block numbers are. We only get the 540 * double-indirect block to verify it is pointing to the primary reserved 541 * GDT blocks so we don't overwrite a data block by accident. The reserved 542 * backup GDT blocks are stored in their reserved primary GDT block. 543 */ 544 static int reserve_backup_gdb(handle_t *handle, struct inode *inode, 545 struct ext4_new_group_data *input) 546 { 547 struct super_block *sb = inode->i_sb; 548 int reserved_gdb =le16_to_cpu(EXT4_SB(sb)->s_es->s_reserved_gdt_blocks); 549 struct buffer_head **primary; 550 struct buffer_head *dind; 551 struct ext4_iloc iloc; 552 ext4_fsblk_t blk; 553 __le32 *data, *end; 554 int gdbackups = 0; 555 int res, i; 556 int err; 557 558 primary = kmalloc(reserved_gdb * sizeof(*primary), GFP_NOFS); 559 if (!primary) 560 return -ENOMEM; 561 562 data = EXT4_I(inode)->i_data + EXT4_DIND_BLOCK; 563 dind = sb_bread(sb, le32_to_cpu(*data)); 564 if (!dind) { 565 err = -EIO; 566 goto exit_free; 567 } 568 569 blk = EXT4_SB(sb)->s_sbh->b_blocknr + 1 + EXT4_SB(sb)->s_gdb_count; 570 data = (__le32 *)dind->b_data + (EXT4_SB(sb)->s_gdb_count % 571 EXT4_ADDR_PER_BLOCK(sb)); 572 end = (__le32 *)dind->b_data + EXT4_ADDR_PER_BLOCK(sb); 573 574 /* Get each reserved primary GDT block and verify it holds backups */ 575 for (res = 0; res < reserved_gdb; res++, blk++) { 576 if (le32_to_cpu(*data) != blk) { 577 ext4_warning(sb, "reserved block %llu" 578 " not at offset %ld", 579 blk, 580 (long)(data - (__le32 *)dind->b_data)); 581 err = -EINVAL; 582 goto exit_bh; 583 } 584 primary[res] = sb_bread(sb, blk); 585 if (!primary[res]) { 586 err = -EIO; 587 goto exit_bh; 588 } 589 if ((gdbackups = verify_reserved_gdb(sb, primary[res])) < 0) { 590 brelse(primary[res]); 591 err = gdbackups; 592 goto exit_bh; 593 } 594 if (++data >= end) 595 data = (__le32 *)dind->b_data; 596 } 597 598 for (i = 0; i < reserved_gdb; i++) { 599 if ((err = ext4_journal_get_write_access(handle, primary[i]))) { 600 /* 601 int j; 602 for (j = 0; j < i; j++) 603 ext4_handle_release_buffer(handle, primary[j]); 604 */ 605 goto exit_bh; 606 } 607 } 608 609 if ((err = ext4_reserve_inode_write(handle, inode, &iloc))) 610 goto exit_bh; 611 612 /* 613 * Finally we can add each of the reserved backup GDT blocks from 614 * the new group to its reserved primary GDT block. 615 */ 616 blk = input->group * EXT4_BLOCKS_PER_GROUP(sb); 617 for (i = 0; i < reserved_gdb; i++) { 618 int err2; 619 data = (__le32 *)primary[i]->b_data; 620 /* printk("reserving backup %lu[%u] = %lu\n", 621 primary[i]->b_blocknr, gdbackups, 622 blk + primary[i]->b_blocknr); */ 623 data[gdbackups] = cpu_to_le32(blk + primary[i]->b_blocknr); 624 err2 = ext4_handle_dirty_metadata(handle, NULL, primary[i]); 625 if (!err) 626 err = err2; 627 } 628 inode->i_blocks += reserved_gdb * sb->s_blocksize >> 9; 629 ext4_mark_iloc_dirty(handle, inode, &iloc); 630 631 exit_bh: 632 while (--res >= 0) 633 brelse(primary[res]); 634 brelse(dind); 635 636 exit_free: 637 kfree(primary); 638 639 return err; 640 } 641 642 /* 643 * Update the backup copies of the ext4 metadata. These don't need to be part 644 * of the main resize transaction, because e2fsck will re-write them if there 645 * is a problem (basically only OOM will cause a problem). However, we 646 * _should_ update the backups if possible, in case the primary gets trashed 647 * for some reason and we need to run e2fsck from a backup superblock. The 648 * important part is that the new block and inode counts are in the backup 649 * superblocks, and the location of the new group metadata in the GDT backups. 650 * 651 * We do not need take the s_resize_lock for this, because these 652 * blocks are not otherwise touched by the filesystem code when it is 653 * mounted. We don't need to worry about last changing from 654 * sbi->s_groups_count, because the worst that can happen is that we 655 * do not copy the full number of backups at this time. The resize 656 * which changed s_groups_count will backup again. 657 */ 658 static void update_backups(struct super_block *sb, 659 int blk_off, char *data, int size) 660 { 661 struct ext4_sb_info *sbi = EXT4_SB(sb); 662 const ext4_group_t last = sbi->s_groups_count; 663 const int bpg = EXT4_BLOCKS_PER_GROUP(sb); 664 unsigned three = 1; 665 unsigned five = 5; 666 unsigned seven = 7; 667 ext4_group_t group; 668 int rest = sb->s_blocksize - size; 669 handle_t *handle; 670 int err = 0, err2; 671 672 handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA); 673 if (IS_ERR(handle)) { 674 group = 1; 675 err = PTR_ERR(handle); 676 goto exit_err; 677 } 678 679 while ((group = ext4_list_backups(sb, &three, &five, &seven)) < last) { 680 struct buffer_head *bh; 681 682 /* Out of journal space, and can't get more - abort - so sad */ 683 if (ext4_handle_valid(handle) && 684 handle->h_buffer_credits == 0 && 685 ext4_journal_extend(handle, EXT4_MAX_TRANS_DATA) && 686 (err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA))) 687 break; 688 689 bh = sb_getblk(sb, group * bpg + blk_off); 690 if (!bh) { 691 err = -EIO; 692 break; 693 } 694 ext4_debug("update metadata backup %#04lx\n", 695 (unsigned long)bh->b_blocknr); 696 if ((err = ext4_journal_get_write_access(handle, bh))) 697 break; 698 lock_buffer(bh); 699 memcpy(bh->b_data, data, size); 700 if (rest) 701 memset(bh->b_data + size, 0, rest); 702 set_buffer_uptodate(bh); 703 unlock_buffer(bh); 704 err = ext4_handle_dirty_metadata(handle, NULL, bh); 705 if (unlikely(err)) 706 ext4_std_error(sb, err); 707 brelse(bh); 708 } 709 if ((err2 = ext4_journal_stop(handle)) && !err) 710 err = err2; 711 712 /* 713 * Ugh! Need to have e2fsck write the backup copies. It is too 714 * late to revert the resize, we shouldn't fail just because of 715 * the backup copies (they are only needed in case of corruption). 716 * 717 * However, if we got here we have a journal problem too, so we 718 * can't really start a transaction to mark the superblock. 719 * Chicken out and just set the flag on the hope it will be written 720 * to disk, and if not - we will simply wait until next fsck. 721 */ 722 exit_err: 723 if (err) { 724 ext4_warning(sb, "can't update backup for group %u (err %d), " 725 "forcing fsck on next reboot", group, err); 726 sbi->s_mount_state &= ~EXT4_VALID_FS; 727 sbi->s_es->s_state &= cpu_to_le16(~EXT4_VALID_FS); 728 mark_buffer_dirty(sbi->s_sbh); 729 } 730 } 731 732 /* Add group descriptor data to an existing or new group descriptor block. 733 * Ensure we handle all possible error conditions _before_ we start modifying 734 * the filesystem, because we cannot abort the transaction and not have it 735 * write the data to disk. 736 * 737 * If we are on a GDT block boundary, we need to get the reserved GDT block. 738 * Otherwise, we may need to add backup GDT blocks for a sparse group. 739 * 740 * We only need to hold the superblock lock while we are actually adding 741 * in the new group's counts to the superblock. Prior to that we have 742 * not really "added" the group at all. We re-check that we are still 743 * adding in the last group in case things have changed since verifying. 744 */ 745 int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) 746 { 747 struct ext4_sb_info *sbi = EXT4_SB(sb); 748 struct ext4_super_block *es = sbi->s_es; 749 int reserved_gdb = ext4_bg_has_super(sb, input->group) ? 750 le16_to_cpu(es->s_reserved_gdt_blocks) : 0; 751 struct buffer_head *primary = NULL; 752 struct ext4_group_desc *gdp; 753 struct inode *inode = NULL; 754 handle_t *handle; 755 int gdb_off, gdb_num; 756 int err, err2; 757 758 gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb); 759 gdb_off = input->group % EXT4_DESC_PER_BLOCK(sb); 760 761 if (gdb_off == 0 && !EXT4_HAS_RO_COMPAT_FEATURE(sb, 762 EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER)) { 763 ext4_warning(sb, "Can't resize non-sparse filesystem further"); 764 return -EPERM; 765 } 766 767 if (ext4_blocks_count(es) + input->blocks_count < 768 ext4_blocks_count(es)) { 769 ext4_warning(sb, "blocks_count overflow"); 770 return -EINVAL; 771 } 772 773 if (le32_to_cpu(es->s_inodes_count) + EXT4_INODES_PER_GROUP(sb) < 774 le32_to_cpu(es->s_inodes_count)) { 775 ext4_warning(sb, "inodes_count overflow"); 776 return -EINVAL; 777 } 778 779 if (reserved_gdb || gdb_off == 0) { 780 if (!EXT4_HAS_COMPAT_FEATURE(sb, 781 EXT4_FEATURE_COMPAT_RESIZE_INODE) 782 || !le16_to_cpu(es->s_reserved_gdt_blocks)) { 783 ext4_warning(sb, 784 "No reserved GDT blocks, can't resize"); 785 return -EPERM; 786 } 787 inode = ext4_iget(sb, EXT4_RESIZE_INO); 788 if (IS_ERR(inode)) { 789 ext4_warning(sb, "Error opening resize inode"); 790 return PTR_ERR(inode); 791 } 792 } 793 794 795 if ((err = verify_group_input(sb, input))) 796 goto exit_put; 797 798 if ((err = setup_new_group_blocks(sb, input))) 799 goto exit_put; 800 801 /* 802 * We will always be modifying at least the superblock and a GDT 803 * block. If we are adding a group past the last current GDT block, 804 * we will also modify the inode and the dindirect block. If we 805 * are adding a group with superblock/GDT backups we will also 806 * modify each of the reserved GDT dindirect blocks. 807 */ 808 handle = ext4_journal_start_sb(sb, 809 ext4_bg_has_super(sb, input->group) ? 810 3 + reserved_gdb : 4); 811 if (IS_ERR(handle)) { 812 err = PTR_ERR(handle); 813 goto exit_put; 814 } 815 816 if ((err = ext4_journal_get_write_access(handle, sbi->s_sbh))) 817 goto exit_journal; 818 819 /* 820 * We will only either add reserved group blocks to a backup group 821 * or remove reserved blocks for the first group in a new group block. 822 * Doing both would be mean more complex code, and sane people don't 823 * use non-sparse filesystems anymore. This is already checked above. 824 */ 825 if (gdb_off) { 826 primary = sbi->s_group_desc[gdb_num]; 827 if ((err = ext4_journal_get_write_access(handle, primary))) 828 goto exit_journal; 829 830 if (reserved_gdb && ext4_bg_num_gdb(sb, input->group) && 831 (err = reserve_backup_gdb(handle, inode, input))) 832 goto exit_journal; 833 } else if ((err = add_new_gdb(handle, inode, input, &primary))) 834 goto exit_journal; 835 836 /* 837 * OK, now we've set up the new group. Time to make it active. 838 * 839 * so we have to be safe wrt. concurrent accesses the group 840 * data. So we need to be careful to set all of the relevant 841 * group descriptor data etc. *before* we enable the group. 842 * 843 * The key field here is sbi->s_groups_count: as long as 844 * that retains its old value, nobody is going to access the new 845 * group. 846 * 847 * So first we update all the descriptor metadata for the new 848 * group; then we update the total disk blocks count; then we 849 * update the groups count to enable the group; then finally we 850 * update the free space counts so that the system can start 851 * using the new disk blocks. 852 */ 853 854 /* Update group descriptor block for new group */ 855 gdp = (struct ext4_group_desc *)((char *)primary->b_data + 856 gdb_off * EXT4_DESC_SIZE(sb)); 857 858 memset(gdp, 0, EXT4_DESC_SIZE(sb)); 859 ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */ 860 ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */ 861 ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */ 862 ext4_free_blks_set(sb, gdp, input->free_blocks_count); 863 ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb)); 864 gdp->bg_flags = cpu_to_le16(EXT4_BG_INODE_ZEROED); 865 gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp); 866 867 /* 868 * We can allocate memory for mb_alloc based on the new group 869 * descriptor 870 */ 871 err = ext4_mb_add_groupinfo(sb, input->group, gdp); 872 if (err) 873 goto exit_journal; 874 875 /* 876 * Make the new blocks and inodes valid next. We do this before 877 * increasing the group count so that once the group is enabled, 878 * all of its blocks and inodes are already valid. 879 * 880 * We always allocate group-by-group, then block-by-block or 881 * inode-by-inode within a group, so enabling these 882 * blocks/inodes before the group is live won't actually let us 883 * allocate the new space yet. 884 */ 885 ext4_blocks_count_set(es, ext4_blocks_count(es) + 886 input->blocks_count); 887 le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb)); 888 889 /* 890 * We need to protect s_groups_count against other CPUs seeing 891 * inconsistent state in the superblock. 892 * 893 * The precise rules we use are: 894 * 895 * * Writers must perform a smp_wmb() after updating all dependent 896 * data and before modifying the groups count 897 * 898 * * Readers must perform an smp_rmb() after reading the groups count 899 * and before reading any dependent data. 900 * 901 * NB. These rules can be relaxed when checking the group count 902 * while freeing data, as we can only allocate from a block 903 * group after serialising against the group count, and we can 904 * only then free after serialising in turn against that 905 * allocation. 906 */ 907 smp_wmb(); 908 909 /* Update the global fs size fields */ 910 sbi->s_groups_count++; 911 912 err = ext4_handle_dirty_metadata(handle, NULL, primary); 913 if (unlikely(err)) { 914 ext4_std_error(sb, err); 915 goto exit_journal; 916 } 917 918 /* Update the reserved block counts only once the new group is 919 * active. */ 920 ext4_r_blocks_count_set(es, ext4_r_blocks_count(es) + 921 input->reserved_blocks); 922 923 /* Update the free space counts */ 924 percpu_counter_add(&sbi->s_freeblocks_counter, 925 input->free_blocks_count); 926 percpu_counter_add(&sbi->s_freeinodes_counter, 927 EXT4_INODES_PER_GROUP(sb)); 928 929 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG) && 930 sbi->s_log_groups_per_flex) { 931 ext4_group_t flex_group; 932 flex_group = ext4_flex_group(sbi, input->group); 933 atomic_add(input->free_blocks_count, 934 &sbi->s_flex_groups[flex_group].free_blocks); 935 atomic_add(EXT4_INODES_PER_GROUP(sb), 936 &sbi->s_flex_groups[flex_group].free_inodes); 937 } 938 939 ext4_handle_dirty_super(handle, sb); 940 941 exit_journal: 942 if ((err2 = ext4_journal_stop(handle)) && !err) 943 err = err2; 944 if (!err) { 945 update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es, 946 sizeof(struct ext4_super_block)); 947 update_backups(sb, primary->b_blocknr, primary->b_data, 948 primary->b_size); 949 } 950 exit_put: 951 iput(inode); 952 return err; 953 } /* ext4_group_add */ 954 955 /* 956 * Extend the filesystem to the new number of blocks specified. This entry 957 * point is only used to extend the current filesystem to the end of the last 958 * existing group. It can be accessed via ioctl, or by "remount,resize=<size>" 959 * for emergencies (because it has no dependencies on reserved blocks). 960 * 961 * If we _really_ wanted, we could use default values to call ext4_group_add() 962 * allow the "remount" trick to work for arbitrary resizing, assuming enough 963 * GDT blocks are reserved to grow to the desired size. 964 */ 965 int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, 966 ext4_fsblk_t n_blocks_count) 967 { 968 ext4_fsblk_t o_blocks_count; 969 ext4_grpblk_t last; 970 ext4_grpblk_t add; 971 struct buffer_head *bh; 972 handle_t *handle; 973 int err; 974 ext4_group_t group; 975 976 o_blocks_count = ext4_blocks_count(es); 977 978 if (test_opt(sb, DEBUG)) 979 printk(KERN_DEBUG "EXT4-fs: extending last group from %llu uto %llu blocks\n", 980 o_blocks_count, n_blocks_count); 981 982 if (n_blocks_count == 0 || n_blocks_count == o_blocks_count) 983 return 0; 984 985 if (n_blocks_count > (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { 986 printk(KERN_ERR "EXT4-fs: filesystem on %s:" 987 " too large to resize to %llu blocks safely\n", 988 sb->s_id, n_blocks_count); 989 if (sizeof(sector_t) < 8) 990 ext4_warning(sb, "CONFIG_LBDAF not enabled"); 991 return -EINVAL; 992 } 993 994 if (n_blocks_count < o_blocks_count) { 995 ext4_warning(sb, "can't shrink FS - resize aborted"); 996 return -EINVAL; 997 } 998 999 /* Handle the remaining blocks in the last group only. */ 1000 ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last); 1001 1002 if (last == 0) { 1003 ext4_warning(sb, "need to use ext2online to resize further"); 1004 return -EPERM; 1005 } 1006 1007 add = EXT4_BLOCKS_PER_GROUP(sb) - last; 1008 1009 if (o_blocks_count + add < o_blocks_count) { 1010 ext4_warning(sb, "blocks_count overflow"); 1011 return -EINVAL; 1012 } 1013 1014 if (o_blocks_count + add > n_blocks_count) 1015 add = n_blocks_count - o_blocks_count; 1016 1017 if (o_blocks_count + add < n_blocks_count) 1018 ext4_warning(sb, "will only finish group (%llu blocks, %u new)", 1019 o_blocks_count + add, add); 1020 1021 /* See if the device is actually as big as what was requested */ 1022 bh = sb_bread(sb, o_blocks_count + add - 1); 1023 if (!bh) { 1024 ext4_warning(sb, "can't read last block, resize aborted"); 1025 return -ENOSPC; 1026 } 1027 brelse(bh); 1028 1029 /* We will update the superblock, one block bitmap, and 1030 * one group descriptor via ext4_free_blocks(). 1031 */ 1032 handle = ext4_journal_start_sb(sb, 3); 1033 if (IS_ERR(handle)) { 1034 err = PTR_ERR(handle); 1035 ext4_warning(sb, "error %d on journal start", err); 1036 goto exit_put; 1037 } 1038 1039 if ((err = ext4_journal_get_write_access(handle, 1040 EXT4_SB(sb)->s_sbh))) { 1041 ext4_warning(sb, "error %d on journal write access", err); 1042 ext4_journal_stop(handle); 1043 goto exit_put; 1044 } 1045 ext4_blocks_count_set(es, o_blocks_count + add); 1046 ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, 1047 o_blocks_count + add); 1048 /* We add the blocks to the bitmap and set the group need init bit */ 1049 ext4_add_groupblocks(handle, sb, o_blocks_count, add); 1050 ext4_handle_dirty_super(handle, sb); 1051 ext4_debug("freed blocks %llu through %llu\n", o_blocks_count, 1052 o_blocks_count + add); 1053 if ((err = ext4_journal_stop(handle))) 1054 goto exit_put; 1055 1056 if (test_opt(sb, DEBUG)) 1057 printk(KERN_DEBUG "EXT4-fs: extended group to %llu blocks\n", 1058 ext4_blocks_count(es)); 1059 update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr, (char *)es, 1060 sizeof(struct ext4_super_block)); 1061 exit_put: 1062 return err; 1063 } /* ext4_group_extend */ 1064