1ac27a0ecSDave Kleikamp /* 2617ba13bSMingming Cao * linux/fs/ext4/resize.c 3ac27a0ecSDave Kleikamp * 4617ba13bSMingming Cao * Support for resizing an ext4 filesystem while it is mounted. 5ac27a0ecSDave Kleikamp * 6ac27a0ecSDave Kleikamp * Copyright (C) 2001, 2002 Andreas Dilger <adilger@clusterfs.com> 7ac27a0ecSDave Kleikamp * 8ac27a0ecSDave Kleikamp * This could probably be made into a module, because it is not often in use. 9ac27a0ecSDave Kleikamp */ 10ac27a0ecSDave Kleikamp 11ac27a0ecSDave Kleikamp 12617ba13bSMingming Cao #define EXT4FS_DEBUG 13ac27a0ecSDave Kleikamp 14ac27a0ecSDave Kleikamp #include <linux/errno.h> 15ac27a0ecSDave Kleikamp #include <linux/slab.h> 16ac27a0ecSDave Kleikamp 173dcf5451SChristoph Hellwig #include "ext4_jbd2.h" 18ac27a0ecSDave Kleikamp 198f82f840SYongqiang Yang int ext4_resize_begin(struct super_block *sb) 208f82f840SYongqiang Yang { 218f82f840SYongqiang Yang int ret = 0; 228f82f840SYongqiang Yang 238f82f840SYongqiang Yang if (!capable(CAP_SYS_RESOURCE)) 248f82f840SYongqiang Yang return -EPERM; 258f82f840SYongqiang Yang 26ce723c31SYongqiang Yang /* 27ce723c31SYongqiang Yang * We are not allowed to do online-resizing on a filesystem mounted 28ce723c31SYongqiang Yang * with error, because it can destroy the filesystem easily. 29ce723c31SYongqiang Yang */ 30ce723c31SYongqiang Yang if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { 31ce723c31SYongqiang Yang ext4_warning(sb, "There are errors in the filesystem, " 32ce723c31SYongqiang Yang "so online resizing is not allowed\n"); 33ce723c31SYongqiang Yang return -EPERM; 34ce723c31SYongqiang Yang } 35ce723c31SYongqiang Yang 368f82f840SYongqiang Yang if (test_and_set_bit_lock(EXT4_RESIZING, &EXT4_SB(sb)->s_resize_flags)) 378f82f840SYongqiang Yang ret = -EBUSY; 388f82f840SYongqiang Yang 398f82f840SYongqiang Yang return ret; 408f82f840SYongqiang Yang } 418f82f840SYongqiang Yang 428f82f840SYongqiang Yang void ext4_resize_end(struct super_block *sb) 438f82f840SYongqiang Yang { 448f82f840SYongqiang Yang clear_bit_unlock(EXT4_RESIZING, &EXT4_SB(sb)->s_resize_flags); 458f82f840SYongqiang Yang smp_mb__after_clear_bit(); 468f82f840SYongqiang Yang } 478f82f840SYongqiang Yang 48ac27a0ecSDave Kleikamp #define outside(b, first, last) ((b) < (first) || (b) >= (last)) 49ac27a0ecSDave Kleikamp #define inside(b, first, last) ((b) >= (first) && (b) < (last)) 50ac27a0ecSDave Kleikamp 51ac27a0ecSDave Kleikamp static int verify_group_input(struct super_block *sb, 52617ba13bSMingming Cao struct ext4_new_group_data *input) 53ac27a0ecSDave Kleikamp { 54617ba13bSMingming Cao struct ext4_sb_info *sbi = EXT4_SB(sb); 55617ba13bSMingming Cao struct ext4_super_block *es = sbi->s_es; 56bd81d8eeSLaurent Vivier ext4_fsblk_t start = ext4_blocks_count(es); 57617ba13bSMingming Cao ext4_fsblk_t end = start + input->blocks_count; 58fd2d4291SAvantika Mathur ext4_group_t group = input->group; 59617ba13bSMingming Cao ext4_fsblk_t itend = input->inode_table + sbi->s_itb_per_group; 60617ba13bSMingming Cao unsigned overhead = ext4_bg_has_super(sb, group) ? 61617ba13bSMingming Cao (1 + ext4_bg_num_gdb(sb, group) + 62ac27a0ecSDave Kleikamp le16_to_cpu(es->s_reserved_gdt_blocks)) : 0; 63617ba13bSMingming Cao ext4_fsblk_t metaend = start + overhead; 64ac27a0ecSDave Kleikamp struct buffer_head *bh = NULL; 653a5b2ecdSMingming Cao ext4_grpblk_t free_blocks_count, offset; 66ac27a0ecSDave Kleikamp int err = -EINVAL; 67ac27a0ecSDave Kleikamp 68ac27a0ecSDave Kleikamp input->free_blocks_count = free_blocks_count = 69ac27a0ecSDave Kleikamp input->blocks_count - 2 - overhead - sbi->s_itb_per_group; 70ac27a0ecSDave Kleikamp 71ac27a0ecSDave Kleikamp if (test_opt(sb, DEBUG)) 72617ba13bSMingming Cao printk(KERN_DEBUG "EXT4-fs: adding %s group %u: %u blocks " 73ac27a0ecSDave Kleikamp "(%d free, %u reserved)\n", 74617ba13bSMingming Cao ext4_bg_has_super(sb, input->group) ? "normal" : 75ac27a0ecSDave Kleikamp "no-super", input->group, input->blocks_count, 76ac27a0ecSDave Kleikamp free_blocks_count, input->reserved_blocks); 77ac27a0ecSDave Kleikamp 783a5b2ecdSMingming Cao ext4_get_group_no_and_offset(sb, start, NULL, &offset); 79ac27a0ecSDave Kleikamp if (group != sbi->s_groups_count) 8012062dddSEric Sandeen ext4_warning(sb, "Cannot add at group %u (only %u groups)", 81ac27a0ecSDave Kleikamp input->group, sbi->s_groups_count); 823a5b2ecdSMingming Cao else if (offset != 0) 8312062dddSEric Sandeen ext4_warning(sb, "Last group not full"); 84ac27a0ecSDave Kleikamp else if (input->reserved_blocks > input->blocks_count / 5) 8512062dddSEric Sandeen ext4_warning(sb, "Reserved blocks too high (%u)", 86ac27a0ecSDave Kleikamp input->reserved_blocks); 87ac27a0ecSDave Kleikamp else if (free_blocks_count < 0) 8812062dddSEric Sandeen ext4_warning(sb, "Bad blocks count %u", 89ac27a0ecSDave Kleikamp input->blocks_count); 90ac27a0ecSDave Kleikamp else if (!(bh = sb_bread(sb, end - 1))) 9112062dddSEric Sandeen ext4_warning(sb, "Cannot read last block (%llu)", 92ac27a0ecSDave Kleikamp end - 1); 93ac27a0ecSDave Kleikamp else if (outside(input->block_bitmap, start, end)) 9412062dddSEric Sandeen ext4_warning(sb, "Block bitmap not in group (block %llu)", 951939e49aSRandy Dunlap (unsigned long long)input->block_bitmap); 96ac27a0ecSDave Kleikamp else if (outside(input->inode_bitmap, start, end)) 9712062dddSEric Sandeen ext4_warning(sb, "Inode bitmap not in group (block %llu)", 981939e49aSRandy Dunlap (unsigned long long)input->inode_bitmap); 99ac27a0ecSDave Kleikamp else if (outside(input->inode_table, start, end) || 100ac27a0ecSDave Kleikamp outside(itend - 1, start, end)) 10112062dddSEric Sandeen ext4_warning(sb, "Inode table not in group (blocks %llu-%llu)", 1021939e49aSRandy Dunlap (unsigned long long)input->inode_table, itend - 1); 103ac27a0ecSDave Kleikamp else if (input->inode_bitmap == input->block_bitmap) 10412062dddSEric Sandeen ext4_warning(sb, "Block bitmap same as inode bitmap (%llu)", 1051939e49aSRandy Dunlap (unsigned long long)input->block_bitmap); 106ac27a0ecSDave Kleikamp else if (inside(input->block_bitmap, input->inode_table, itend)) 10712062dddSEric Sandeen ext4_warning(sb, "Block bitmap (%llu) in inode table " 10812062dddSEric Sandeen "(%llu-%llu)", 1091939e49aSRandy Dunlap (unsigned long long)input->block_bitmap, 1101939e49aSRandy Dunlap (unsigned long long)input->inode_table, itend - 1); 111ac27a0ecSDave Kleikamp else if (inside(input->inode_bitmap, input->inode_table, itend)) 11212062dddSEric Sandeen ext4_warning(sb, "Inode bitmap (%llu) in inode table " 11312062dddSEric Sandeen "(%llu-%llu)", 1141939e49aSRandy Dunlap (unsigned long long)input->inode_bitmap, 1151939e49aSRandy Dunlap (unsigned long long)input->inode_table, itend - 1); 116ac27a0ecSDave Kleikamp else if (inside(input->block_bitmap, start, metaend)) 11712062dddSEric Sandeen ext4_warning(sb, "Block bitmap (%llu) in GDT table (%llu-%llu)", 1181939e49aSRandy Dunlap (unsigned long long)input->block_bitmap, 1191939e49aSRandy Dunlap start, metaend - 1); 120ac27a0ecSDave Kleikamp else if (inside(input->inode_bitmap, start, metaend)) 12112062dddSEric Sandeen ext4_warning(sb, "Inode bitmap (%llu) in GDT table (%llu-%llu)", 1221939e49aSRandy Dunlap (unsigned long long)input->inode_bitmap, 1231939e49aSRandy Dunlap start, metaend - 1); 124ac27a0ecSDave Kleikamp else if (inside(input->inode_table, start, metaend) || 125ac27a0ecSDave Kleikamp inside(itend - 1, start, metaend)) 12612062dddSEric Sandeen ext4_warning(sb, "Inode table (%llu-%llu) overlaps GDT table " 12712062dddSEric Sandeen "(%llu-%llu)", 1281939e49aSRandy Dunlap (unsigned long long)input->inode_table, 1291939e49aSRandy Dunlap itend - 1, start, metaend - 1); 130ac27a0ecSDave Kleikamp else 131ac27a0ecSDave Kleikamp err = 0; 132ac27a0ecSDave Kleikamp brelse(bh); 133ac27a0ecSDave Kleikamp 134ac27a0ecSDave Kleikamp return err; 135ac27a0ecSDave Kleikamp } 136ac27a0ecSDave Kleikamp 13728c7bac0SYongqiang Yang /* 13828c7bac0SYongqiang Yang * ext4_new_flex_group_data is used by 64bit-resize interface to add a flex 13928c7bac0SYongqiang Yang * group each time. 14028c7bac0SYongqiang Yang */ 14128c7bac0SYongqiang Yang struct ext4_new_flex_group_data { 14228c7bac0SYongqiang Yang struct ext4_new_group_data *groups; /* new_group_data for groups 14328c7bac0SYongqiang Yang in the flex group */ 14428c7bac0SYongqiang Yang __u16 *bg_flags; /* block group flags of groups 14528c7bac0SYongqiang Yang in @groups */ 14628c7bac0SYongqiang Yang ext4_group_t count; /* number of groups in @groups 14728c7bac0SYongqiang Yang */ 14828c7bac0SYongqiang Yang }; 14928c7bac0SYongqiang Yang 15028c7bac0SYongqiang Yang /* 15128c7bac0SYongqiang Yang * alloc_flex_gd() allocates a ext4_new_flex_group_data with size of 15228c7bac0SYongqiang Yang * @flexbg_size. 15328c7bac0SYongqiang Yang * 15428c7bac0SYongqiang Yang * Returns NULL on failure otherwise address of the allocated structure. 15528c7bac0SYongqiang Yang */ 15628c7bac0SYongqiang Yang static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned long flexbg_size) 15728c7bac0SYongqiang Yang { 15828c7bac0SYongqiang Yang struct ext4_new_flex_group_data *flex_gd; 15928c7bac0SYongqiang Yang 16028c7bac0SYongqiang Yang flex_gd = kmalloc(sizeof(*flex_gd), GFP_NOFS); 16128c7bac0SYongqiang Yang if (flex_gd == NULL) 16228c7bac0SYongqiang Yang goto out3; 16328c7bac0SYongqiang Yang 16428c7bac0SYongqiang Yang flex_gd->count = flexbg_size; 16528c7bac0SYongqiang Yang 16628c7bac0SYongqiang Yang flex_gd->groups = kmalloc(sizeof(struct ext4_new_group_data) * 16728c7bac0SYongqiang Yang flexbg_size, GFP_NOFS); 16828c7bac0SYongqiang Yang if (flex_gd->groups == NULL) 16928c7bac0SYongqiang Yang goto out2; 17028c7bac0SYongqiang Yang 17128c7bac0SYongqiang Yang flex_gd->bg_flags = kmalloc(flexbg_size * sizeof(__u16), GFP_NOFS); 17228c7bac0SYongqiang Yang if (flex_gd->bg_flags == NULL) 17328c7bac0SYongqiang Yang goto out1; 17428c7bac0SYongqiang Yang 17528c7bac0SYongqiang Yang return flex_gd; 17628c7bac0SYongqiang Yang 17728c7bac0SYongqiang Yang out1: 17828c7bac0SYongqiang Yang kfree(flex_gd->groups); 17928c7bac0SYongqiang Yang out2: 18028c7bac0SYongqiang Yang kfree(flex_gd); 18128c7bac0SYongqiang Yang out3: 18228c7bac0SYongqiang Yang return NULL; 18328c7bac0SYongqiang Yang } 18428c7bac0SYongqiang Yang 18528c7bac0SYongqiang Yang static void free_flex_gd(struct ext4_new_flex_group_data *flex_gd) 18628c7bac0SYongqiang Yang { 18728c7bac0SYongqiang Yang kfree(flex_gd->bg_flags); 18828c7bac0SYongqiang Yang kfree(flex_gd->groups); 18928c7bac0SYongqiang Yang kfree(flex_gd); 19028c7bac0SYongqiang Yang } 19128c7bac0SYongqiang Yang 192ac27a0ecSDave Kleikamp static struct buffer_head *bclean(handle_t *handle, struct super_block *sb, 193617ba13bSMingming Cao ext4_fsblk_t blk) 194ac27a0ecSDave Kleikamp { 195ac27a0ecSDave Kleikamp struct buffer_head *bh; 196ac27a0ecSDave Kleikamp int err; 197ac27a0ecSDave Kleikamp 198ac27a0ecSDave Kleikamp bh = sb_getblk(sb, blk); 199ac27a0ecSDave Kleikamp if (!bh) 200ac27a0ecSDave Kleikamp return ERR_PTR(-EIO); 201617ba13bSMingming Cao if ((err = ext4_journal_get_write_access(handle, bh))) { 202ac27a0ecSDave Kleikamp brelse(bh); 203ac27a0ecSDave Kleikamp bh = ERR_PTR(err); 204ac27a0ecSDave Kleikamp } else { 205ac27a0ecSDave Kleikamp memset(bh->b_data, 0, sb->s_blocksize); 206ac27a0ecSDave Kleikamp set_buffer_uptodate(bh); 207ac27a0ecSDave Kleikamp } 208ac27a0ecSDave Kleikamp 209ac27a0ecSDave Kleikamp return bh; 210ac27a0ecSDave Kleikamp } 211ac27a0ecSDave Kleikamp 212ac27a0ecSDave Kleikamp /* 21314904107SEric Sandeen * If we have fewer than thresh credits, extend by EXT4_MAX_TRANS_DATA. 21414904107SEric Sandeen * If that fails, restart the transaction & regain write access for the 21514904107SEric Sandeen * buffer head which is used for block_bitmap modifications. 21614904107SEric Sandeen */ 2176d40bc5aSYongqiang Yang static int extend_or_restart_transaction(handle_t *handle, int thresh) 21814904107SEric Sandeen { 21914904107SEric Sandeen int err; 22014904107SEric Sandeen 2210390131bSFrank Mayhar if (ext4_handle_has_enough_credits(handle, thresh)) 22214904107SEric Sandeen return 0; 22314904107SEric Sandeen 22414904107SEric Sandeen err = ext4_journal_extend(handle, EXT4_MAX_TRANS_DATA); 22514904107SEric Sandeen if (err < 0) 22614904107SEric Sandeen return err; 22714904107SEric Sandeen if (err) { 2286d40bc5aSYongqiang Yang err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA); 2296d40bc5aSYongqiang Yang if (err) 23014904107SEric Sandeen return err; 23114904107SEric Sandeen } 23214904107SEric Sandeen 23314904107SEric Sandeen return 0; 23414904107SEric Sandeen } 23514904107SEric Sandeen 23614904107SEric Sandeen /* 23733afdcc5SYongqiang Yang * set_flexbg_block_bitmap() mark @count blocks starting from @block used. 23833afdcc5SYongqiang Yang * 23933afdcc5SYongqiang Yang * Helper function for ext4_setup_new_group_blocks() which set . 24033afdcc5SYongqiang Yang * 24133afdcc5SYongqiang Yang * @sb: super block 24233afdcc5SYongqiang Yang * @handle: journal handle 24333afdcc5SYongqiang Yang * @flex_gd: flex group data 24433afdcc5SYongqiang Yang */ 24533afdcc5SYongqiang Yang static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle, 24633afdcc5SYongqiang Yang struct ext4_new_flex_group_data *flex_gd, 24733afdcc5SYongqiang Yang ext4_fsblk_t block, ext4_group_t count) 24833afdcc5SYongqiang Yang { 24933afdcc5SYongqiang Yang ext4_group_t count2; 25033afdcc5SYongqiang Yang 25133afdcc5SYongqiang Yang ext4_debug("mark blocks [%llu/%u] used\n", block, count); 25233afdcc5SYongqiang Yang for (count2 = count; count > 0; count -= count2, block += count2) { 25333afdcc5SYongqiang Yang ext4_fsblk_t start; 25433afdcc5SYongqiang Yang struct buffer_head *bh; 25533afdcc5SYongqiang Yang ext4_group_t group; 25633afdcc5SYongqiang Yang int err; 25733afdcc5SYongqiang Yang 25833afdcc5SYongqiang Yang ext4_get_group_no_and_offset(sb, block, &group, NULL); 25933afdcc5SYongqiang Yang start = ext4_group_first_block_no(sb, group); 26033afdcc5SYongqiang Yang group -= flex_gd->groups[0].group; 26133afdcc5SYongqiang Yang 26233afdcc5SYongqiang Yang count2 = sb->s_blocksize * 8 - (block - start); 26333afdcc5SYongqiang Yang if (count2 > count) 26433afdcc5SYongqiang Yang count2 = count; 26533afdcc5SYongqiang Yang 26633afdcc5SYongqiang Yang if (flex_gd->bg_flags[group] & EXT4_BG_BLOCK_UNINIT) { 26733afdcc5SYongqiang Yang BUG_ON(flex_gd->count > 1); 26833afdcc5SYongqiang Yang continue; 26933afdcc5SYongqiang Yang } 27033afdcc5SYongqiang Yang 27133afdcc5SYongqiang Yang err = extend_or_restart_transaction(handle, 1); 27233afdcc5SYongqiang Yang if (err) 27333afdcc5SYongqiang Yang return err; 27433afdcc5SYongqiang Yang 27533afdcc5SYongqiang Yang bh = sb_getblk(sb, flex_gd->groups[group].block_bitmap); 27633afdcc5SYongqiang Yang if (!bh) 27733afdcc5SYongqiang Yang return -EIO; 27833afdcc5SYongqiang Yang 27933afdcc5SYongqiang Yang err = ext4_journal_get_write_access(handle, bh); 28033afdcc5SYongqiang Yang if (err) 28133afdcc5SYongqiang Yang return err; 28233afdcc5SYongqiang Yang ext4_debug("mark block bitmap %#04llx (+%llu/%u)\n", block, 28333afdcc5SYongqiang Yang block - start, count2); 28433afdcc5SYongqiang Yang ext4_set_bits(bh->b_data, block - start, count2); 28533afdcc5SYongqiang Yang 28633afdcc5SYongqiang Yang err = ext4_handle_dirty_metadata(handle, NULL, bh); 28733afdcc5SYongqiang Yang if (unlikely(err)) 28833afdcc5SYongqiang Yang return err; 28933afdcc5SYongqiang Yang brelse(bh); 29033afdcc5SYongqiang Yang } 29133afdcc5SYongqiang Yang 29233afdcc5SYongqiang Yang return 0; 29333afdcc5SYongqiang Yang } 29433afdcc5SYongqiang Yang 29533afdcc5SYongqiang Yang /* 29633afdcc5SYongqiang Yang * Set up the block and inode bitmaps, and the inode table for the new groups. 29733afdcc5SYongqiang Yang * This doesn't need to be part of the main transaction, since we are only 29833afdcc5SYongqiang Yang * changing blocks outside the actual filesystem. We still do journaling to 29933afdcc5SYongqiang Yang * ensure the recovery is correct in case of a failure just after resize. 30033afdcc5SYongqiang Yang * If any part of this fails, we simply abort the resize. 30133afdcc5SYongqiang Yang * 30233afdcc5SYongqiang Yang * setup_new_flex_group_blocks handles a flex group as follow: 30333afdcc5SYongqiang Yang * 1. copy super block and GDT, and initialize group tables if necessary. 30433afdcc5SYongqiang Yang * In this step, we only set bits in blocks bitmaps for blocks taken by 30533afdcc5SYongqiang Yang * super block and GDT. 30633afdcc5SYongqiang Yang * 2. allocate group tables in block bitmaps, that is, set bits in block 30733afdcc5SYongqiang Yang * bitmap for blocks taken by group tables. 30833afdcc5SYongqiang Yang */ 30933afdcc5SYongqiang Yang static int setup_new_flex_group_blocks(struct super_block *sb, 31033afdcc5SYongqiang Yang struct ext4_new_flex_group_data *flex_gd) 31133afdcc5SYongqiang Yang { 31233afdcc5SYongqiang Yang int group_table_count[] = {1, 1, EXT4_SB(sb)->s_itb_per_group}; 31333afdcc5SYongqiang Yang ext4_fsblk_t start; 31433afdcc5SYongqiang Yang ext4_fsblk_t block; 31533afdcc5SYongqiang Yang struct ext4_sb_info *sbi = EXT4_SB(sb); 31633afdcc5SYongqiang Yang struct ext4_super_block *es = sbi->s_es; 31733afdcc5SYongqiang Yang struct ext4_new_group_data *group_data = flex_gd->groups; 31833afdcc5SYongqiang Yang __u16 *bg_flags = flex_gd->bg_flags; 31933afdcc5SYongqiang Yang handle_t *handle; 32033afdcc5SYongqiang Yang ext4_group_t group, count; 32133afdcc5SYongqiang Yang struct buffer_head *bh = NULL; 32233afdcc5SYongqiang Yang int reserved_gdb, i, j, err = 0, err2; 32333afdcc5SYongqiang Yang 32433afdcc5SYongqiang Yang BUG_ON(!flex_gd->count || !group_data || 32533afdcc5SYongqiang Yang group_data[0].group != sbi->s_groups_count); 32633afdcc5SYongqiang Yang 32733afdcc5SYongqiang Yang reserved_gdb = le16_to_cpu(es->s_reserved_gdt_blocks); 32833afdcc5SYongqiang Yang 32933afdcc5SYongqiang Yang /* This transaction may be extended/restarted along the way */ 33033afdcc5SYongqiang Yang handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA); 33133afdcc5SYongqiang Yang if (IS_ERR(handle)) 33233afdcc5SYongqiang Yang return PTR_ERR(handle); 33333afdcc5SYongqiang Yang 33433afdcc5SYongqiang Yang group = group_data[0].group; 33533afdcc5SYongqiang Yang for (i = 0; i < flex_gd->count; i++, group++) { 33633afdcc5SYongqiang Yang unsigned long gdblocks; 33733afdcc5SYongqiang Yang 33833afdcc5SYongqiang Yang gdblocks = ext4_bg_num_gdb(sb, group); 33933afdcc5SYongqiang Yang start = ext4_group_first_block_no(sb, group); 34033afdcc5SYongqiang Yang 34133afdcc5SYongqiang Yang /* Copy all of the GDT blocks into the backup in this group */ 34233afdcc5SYongqiang Yang for (j = 0, block = start + 1; j < gdblocks; j++, block++) { 34333afdcc5SYongqiang Yang struct buffer_head *gdb; 34433afdcc5SYongqiang Yang 34533afdcc5SYongqiang Yang ext4_debug("update backup group %#04llx\n", block); 34633afdcc5SYongqiang Yang err = extend_or_restart_transaction(handle, 1); 34733afdcc5SYongqiang Yang if (err) 34833afdcc5SYongqiang Yang goto out; 34933afdcc5SYongqiang Yang 35033afdcc5SYongqiang Yang gdb = sb_getblk(sb, block); 35133afdcc5SYongqiang Yang if (!gdb) { 35233afdcc5SYongqiang Yang err = -EIO; 35333afdcc5SYongqiang Yang goto out; 35433afdcc5SYongqiang Yang } 35533afdcc5SYongqiang Yang 35633afdcc5SYongqiang Yang err = ext4_journal_get_write_access(handle, gdb); 35733afdcc5SYongqiang Yang if (err) { 35833afdcc5SYongqiang Yang brelse(gdb); 35933afdcc5SYongqiang Yang goto out; 36033afdcc5SYongqiang Yang } 36133afdcc5SYongqiang Yang memcpy(gdb->b_data, sbi->s_group_desc[j]->b_data, 36233afdcc5SYongqiang Yang gdb->b_size); 36333afdcc5SYongqiang Yang set_buffer_uptodate(gdb); 36433afdcc5SYongqiang Yang 36533afdcc5SYongqiang Yang err = ext4_handle_dirty_metadata(handle, NULL, gdb); 36633afdcc5SYongqiang Yang if (unlikely(err)) { 36733afdcc5SYongqiang Yang brelse(gdb); 36833afdcc5SYongqiang Yang goto out; 36933afdcc5SYongqiang Yang } 37033afdcc5SYongqiang Yang brelse(gdb); 37133afdcc5SYongqiang Yang } 37233afdcc5SYongqiang Yang 37333afdcc5SYongqiang Yang /* Zero out all of the reserved backup group descriptor 37433afdcc5SYongqiang Yang * table blocks 37533afdcc5SYongqiang Yang */ 37633afdcc5SYongqiang Yang if (ext4_bg_has_super(sb, group)) { 37733afdcc5SYongqiang Yang err = sb_issue_zeroout(sb, gdblocks + start + 1, 37833afdcc5SYongqiang Yang reserved_gdb, GFP_NOFS); 37933afdcc5SYongqiang Yang if (err) 38033afdcc5SYongqiang Yang goto out; 38133afdcc5SYongqiang Yang } 38233afdcc5SYongqiang Yang 38333afdcc5SYongqiang Yang /* Initialize group tables of the grop @group */ 38433afdcc5SYongqiang Yang if (!(bg_flags[i] & EXT4_BG_INODE_ZEROED)) 38533afdcc5SYongqiang Yang goto handle_bb; 38633afdcc5SYongqiang Yang 38733afdcc5SYongqiang Yang /* Zero out all of the inode table blocks */ 38833afdcc5SYongqiang Yang block = group_data[i].inode_table; 38933afdcc5SYongqiang Yang ext4_debug("clear inode table blocks %#04llx -> %#04lx\n", 39033afdcc5SYongqiang Yang block, sbi->s_itb_per_group); 39133afdcc5SYongqiang Yang err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, 39233afdcc5SYongqiang Yang GFP_NOFS); 39333afdcc5SYongqiang Yang if (err) 39433afdcc5SYongqiang Yang goto out; 39533afdcc5SYongqiang Yang 39633afdcc5SYongqiang Yang handle_bb: 39733afdcc5SYongqiang Yang if (bg_flags[i] & EXT4_BG_BLOCK_UNINIT) 39833afdcc5SYongqiang Yang goto handle_ib; 39933afdcc5SYongqiang Yang 40033afdcc5SYongqiang Yang /* Initialize block bitmap of the @group */ 40133afdcc5SYongqiang Yang block = group_data[i].block_bitmap; 40233afdcc5SYongqiang Yang err = extend_or_restart_transaction(handle, 1); 40333afdcc5SYongqiang Yang if (err) 40433afdcc5SYongqiang Yang goto out; 40533afdcc5SYongqiang Yang 40633afdcc5SYongqiang Yang bh = bclean(handle, sb, block); 40733afdcc5SYongqiang Yang if (IS_ERR(bh)) { 40833afdcc5SYongqiang Yang err = PTR_ERR(bh); 40933afdcc5SYongqiang Yang goto out; 41033afdcc5SYongqiang Yang } 41133afdcc5SYongqiang Yang if (ext4_bg_has_super(sb, group)) { 41233afdcc5SYongqiang Yang ext4_debug("mark backup superblock %#04llx (+0)\n", 41333afdcc5SYongqiang Yang start); 41433afdcc5SYongqiang Yang ext4_set_bits(bh->b_data, 0, gdblocks + reserved_gdb + 41533afdcc5SYongqiang Yang 1); 41633afdcc5SYongqiang Yang } 41733afdcc5SYongqiang Yang ext4_mark_bitmap_end(group_data[i].blocks_count, 41833afdcc5SYongqiang Yang sb->s_blocksize * 8, bh->b_data); 41933afdcc5SYongqiang Yang err = ext4_handle_dirty_metadata(handle, NULL, bh); 42033afdcc5SYongqiang Yang if (err) 42133afdcc5SYongqiang Yang goto out; 42233afdcc5SYongqiang Yang brelse(bh); 42333afdcc5SYongqiang Yang 42433afdcc5SYongqiang Yang handle_ib: 42533afdcc5SYongqiang Yang if (bg_flags[i] & EXT4_BG_INODE_UNINIT) 42633afdcc5SYongqiang Yang continue; 42733afdcc5SYongqiang Yang 42833afdcc5SYongqiang Yang /* Initialize inode bitmap of the @group */ 42933afdcc5SYongqiang Yang block = group_data[i].inode_bitmap; 43033afdcc5SYongqiang Yang err = extend_or_restart_transaction(handle, 1); 43133afdcc5SYongqiang Yang if (err) 43233afdcc5SYongqiang Yang goto out; 43333afdcc5SYongqiang Yang /* Mark unused entries in inode bitmap used */ 43433afdcc5SYongqiang Yang bh = bclean(handle, sb, block); 43533afdcc5SYongqiang Yang if (IS_ERR(bh)) { 43633afdcc5SYongqiang Yang err = PTR_ERR(bh); 43733afdcc5SYongqiang Yang goto out; 43833afdcc5SYongqiang Yang } 43933afdcc5SYongqiang Yang 44033afdcc5SYongqiang Yang ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), 44133afdcc5SYongqiang Yang sb->s_blocksize * 8, bh->b_data); 44233afdcc5SYongqiang Yang err = ext4_handle_dirty_metadata(handle, NULL, bh); 44333afdcc5SYongqiang Yang if (err) 44433afdcc5SYongqiang Yang goto out; 44533afdcc5SYongqiang Yang brelse(bh); 44633afdcc5SYongqiang Yang } 44733afdcc5SYongqiang Yang bh = NULL; 44833afdcc5SYongqiang Yang 44933afdcc5SYongqiang Yang /* Mark group tables in block bitmap */ 45033afdcc5SYongqiang Yang for (j = 0; j < GROUP_TABLE_COUNT; j++) { 45133afdcc5SYongqiang Yang count = group_table_count[j]; 45233afdcc5SYongqiang Yang start = (&group_data[0].block_bitmap)[j]; 45333afdcc5SYongqiang Yang block = start; 45433afdcc5SYongqiang Yang for (i = 1; i < flex_gd->count; i++) { 45533afdcc5SYongqiang Yang block += group_table_count[j]; 45633afdcc5SYongqiang Yang if (block == (&group_data[i].block_bitmap)[j]) { 45733afdcc5SYongqiang Yang count += group_table_count[j]; 45833afdcc5SYongqiang Yang continue; 45933afdcc5SYongqiang Yang } 46033afdcc5SYongqiang Yang err = set_flexbg_block_bitmap(sb, handle, 46133afdcc5SYongqiang Yang flex_gd, start, count); 46233afdcc5SYongqiang Yang if (err) 46333afdcc5SYongqiang Yang goto out; 46433afdcc5SYongqiang Yang count = group_table_count[j]; 46533afdcc5SYongqiang Yang start = group_data[i].block_bitmap; 46633afdcc5SYongqiang Yang block = start; 46733afdcc5SYongqiang Yang } 46833afdcc5SYongqiang Yang 46933afdcc5SYongqiang Yang if (count) { 47033afdcc5SYongqiang Yang err = set_flexbg_block_bitmap(sb, handle, 47133afdcc5SYongqiang Yang flex_gd, start, count); 47233afdcc5SYongqiang Yang if (err) 47333afdcc5SYongqiang Yang goto out; 47433afdcc5SYongqiang Yang } 47533afdcc5SYongqiang Yang } 47633afdcc5SYongqiang Yang 47733afdcc5SYongqiang Yang out: 47833afdcc5SYongqiang Yang brelse(bh); 47933afdcc5SYongqiang Yang err2 = ext4_journal_stop(handle); 48033afdcc5SYongqiang Yang if (err2 && !err) 48133afdcc5SYongqiang Yang err = err2; 48233afdcc5SYongqiang Yang 48333afdcc5SYongqiang Yang return err; 48433afdcc5SYongqiang Yang } 48533afdcc5SYongqiang Yang 48633afdcc5SYongqiang Yang /* 487ac27a0ecSDave Kleikamp * Set up the block and inode bitmaps, and the inode table for the new group. 488ac27a0ecSDave Kleikamp * This doesn't need to be part of the main transaction, since we are only 489ac27a0ecSDave Kleikamp * changing blocks outside the actual filesystem. We still do journaling to 490ac27a0ecSDave Kleikamp * ensure the recovery is correct in case of a failure just after resize. 491ac27a0ecSDave Kleikamp * If any part of this fails, we simply abort the resize. 492ac27a0ecSDave Kleikamp */ 493ac27a0ecSDave Kleikamp static int setup_new_group_blocks(struct super_block *sb, 494617ba13bSMingming Cao struct ext4_new_group_data *input) 495ac27a0ecSDave Kleikamp { 496617ba13bSMingming Cao struct ext4_sb_info *sbi = EXT4_SB(sb); 497617ba13bSMingming Cao ext4_fsblk_t start = ext4_group_first_block_no(sb, input->group); 498617ba13bSMingming Cao int reserved_gdb = ext4_bg_has_super(sb, input->group) ? 499ac27a0ecSDave Kleikamp le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0; 500617ba13bSMingming Cao unsigned long gdblocks = ext4_bg_num_gdb(sb, input->group); 501ac27a0ecSDave Kleikamp struct buffer_head *bh; 502ac27a0ecSDave Kleikamp handle_t *handle; 503617ba13bSMingming Cao ext4_fsblk_t block; 504617ba13bSMingming Cao ext4_grpblk_t bit; 505ac27a0ecSDave Kleikamp int i; 506ac27a0ecSDave Kleikamp int err = 0, err2; 507ac27a0ecSDave Kleikamp 50814904107SEric Sandeen /* This transaction may be extended/restarted along the way */ 50914904107SEric Sandeen handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA); 51014904107SEric Sandeen 511ac27a0ecSDave Kleikamp if (IS_ERR(handle)) 512ac27a0ecSDave Kleikamp return PTR_ERR(handle); 513ac27a0ecSDave Kleikamp 5148f82f840SYongqiang Yang BUG_ON(input->group != sbi->s_groups_count); 515ac27a0ecSDave Kleikamp 516ac27a0ecSDave Kleikamp /* Copy all of the GDT blocks into the backup in this group */ 517ac27a0ecSDave Kleikamp for (i = 0, bit = 1, block = start + 1; 518ac27a0ecSDave Kleikamp i < gdblocks; i++, block++, bit++) { 519ac27a0ecSDave Kleikamp struct buffer_head *gdb; 520ac27a0ecSDave Kleikamp 521c549a95dSEric Sandeen ext4_debug("update backup group %#04llx (+%d)\n", block, bit); 5226d40bc5aSYongqiang Yang err = extend_or_restart_transaction(handle, 1); 5236d40bc5aSYongqiang Yang if (err) 5246d40bc5aSYongqiang Yang goto exit_journal; 52514904107SEric Sandeen 526ac27a0ecSDave Kleikamp gdb = sb_getblk(sb, block); 527ac27a0ecSDave Kleikamp if (!gdb) { 528ac27a0ecSDave Kleikamp err = -EIO; 5296d40bc5aSYongqiang Yang goto exit_journal; 530ac27a0ecSDave Kleikamp } 531617ba13bSMingming Cao if ((err = ext4_journal_get_write_access(handle, gdb))) { 532ac27a0ecSDave Kleikamp brelse(gdb); 5336d40bc5aSYongqiang Yang goto exit_journal; 534ac27a0ecSDave Kleikamp } 5355b615287SEric Sandeen memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size); 536ac27a0ecSDave Kleikamp set_buffer_uptodate(gdb); 537b4097142STheodore Ts'o err = ext4_handle_dirty_metadata(handle, NULL, gdb); 538b4097142STheodore Ts'o if (unlikely(err)) { 539b4097142STheodore Ts'o brelse(gdb); 5406d40bc5aSYongqiang Yang goto exit_journal; 541b4097142STheodore Ts'o } 542ac27a0ecSDave Kleikamp brelse(gdb); 543ac27a0ecSDave Kleikamp } 544ac27a0ecSDave Kleikamp 545ac27a0ecSDave Kleikamp /* Zero out all of the reserved backup group descriptor table blocks */ 546da488945STheodore Ts'o ext4_debug("clear inode table blocks %#04llx -> %#04lx\n", 547a31437b8SLukas Czerner block, sbi->s_itb_per_group); 548a31437b8SLukas Czerner err = sb_issue_zeroout(sb, gdblocks + start + 1, reserved_gdb, 549a107e5a3STheodore Ts'o GFP_NOFS); 550a31437b8SLukas Czerner if (err) 5516d40bc5aSYongqiang Yang goto exit_journal; 5526d40bc5aSYongqiang Yang 5536d40bc5aSYongqiang Yang err = extend_or_restart_transaction(handle, 2); 5546d40bc5aSYongqiang Yang if (err) 5556d40bc5aSYongqiang Yang goto exit_journal; 5566d40bc5aSYongqiang Yang 5576d40bc5aSYongqiang Yang bh = bclean(handle, sb, input->block_bitmap); 5586d40bc5aSYongqiang Yang if (IS_ERR(bh)) { 5596d40bc5aSYongqiang Yang err = PTR_ERR(bh); 5606d40bc5aSYongqiang Yang goto exit_journal; 5616d40bc5aSYongqiang Yang } 562c3e94d1dSYongqiang Yang 563c3e94d1dSYongqiang Yang if (ext4_bg_has_super(sb, input->group)) { 564c3e94d1dSYongqiang Yang ext4_debug("mark backup group tables %#04llx (+0)\n", start); 565c3e94d1dSYongqiang Yang ext4_set_bits(bh->b_data, 0, gdblocks + reserved_gdb + 1); 566c3e94d1dSYongqiang Yang } 56714904107SEric Sandeen 568c549a95dSEric Sandeen ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap, 569ac27a0ecSDave Kleikamp input->block_bitmap - start); 570617ba13bSMingming Cao ext4_set_bit(input->block_bitmap - start, bh->b_data); 571c549a95dSEric Sandeen ext4_debug("mark inode bitmap %#04llx (+%llu)\n", input->inode_bitmap, 572ac27a0ecSDave Kleikamp input->inode_bitmap - start); 573617ba13bSMingming Cao ext4_set_bit(input->inode_bitmap - start, bh->b_data); 574ac27a0ecSDave Kleikamp 575ac27a0ecSDave Kleikamp /* Zero out all of the inode table blocks */ 576a31437b8SLukas Czerner block = input->inode_table; 577da488945STheodore Ts'o ext4_debug("clear inode table blocks %#04llx -> %#04lx\n", 578a31437b8SLukas Czerner block, sbi->s_itb_per_group); 579a107e5a3STheodore Ts'o err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS); 580a31437b8SLukas Czerner if (err) 58114904107SEric Sandeen goto exit_bh; 582c3e94d1dSYongqiang Yang ext4_set_bits(bh->b_data, input->inode_table - start, 583c3e94d1dSYongqiang Yang sbi->s_itb_per_group); 58414904107SEric Sandeen 58514904107SEric Sandeen 58661d08673STheodore Ts'o ext4_mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8, 58761d08673STheodore Ts'o bh->b_data); 588b4097142STheodore Ts'o err = ext4_handle_dirty_metadata(handle, NULL, bh); 589b4097142STheodore Ts'o if (unlikely(err)) { 590b4097142STheodore Ts'o ext4_std_error(sb, err); 591b4097142STheodore Ts'o goto exit_bh; 592b4097142STheodore Ts'o } 593ac27a0ecSDave Kleikamp brelse(bh); 594ac27a0ecSDave Kleikamp /* Mark unused entries in inode bitmap used */ 595c549a95dSEric Sandeen ext4_debug("clear inode bitmap %#04llx (+%llu)\n", 596ac27a0ecSDave Kleikamp input->inode_bitmap, input->inode_bitmap - start); 597ac27a0ecSDave Kleikamp if (IS_ERR(bh = bclean(handle, sb, input->inode_bitmap))) { 598ac27a0ecSDave Kleikamp err = PTR_ERR(bh); 599ac27a0ecSDave Kleikamp goto exit_journal; 600ac27a0ecSDave Kleikamp } 601ac27a0ecSDave Kleikamp 60261d08673STheodore Ts'o ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, 603ac27a0ecSDave Kleikamp bh->b_data); 604b4097142STheodore Ts'o err = ext4_handle_dirty_metadata(handle, NULL, bh); 605b4097142STheodore Ts'o if (unlikely(err)) 606b4097142STheodore Ts'o ext4_std_error(sb, err); 607ac27a0ecSDave Kleikamp exit_bh: 608ac27a0ecSDave Kleikamp brelse(bh); 609ac27a0ecSDave Kleikamp 610ac27a0ecSDave Kleikamp exit_journal: 611617ba13bSMingming Cao if ((err2 = ext4_journal_stop(handle)) && !err) 612ac27a0ecSDave Kleikamp err = err2; 613ac27a0ecSDave Kleikamp 614ac27a0ecSDave Kleikamp return err; 615ac27a0ecSDave Kleikamp } 616ac27a0ecSDave Kleikamp 617ac27a0ecSDave Kleikamp /* 618ac27a0ecSDave Kleikamp * Iterate through the groups which hold BACKUP superblock/GDT copies in an 619617ba13bSMingming Cao * ext4 filesystem. The counters should be initialized to 1, 5, and 7 before 620ac27a0ecSDave Kleikamp * calling this for the first time. In a sparse filesystem it will be the 621ac27a0ecSDave Kleikamp * sequence of powers of 3, 5, and 7: 1, 3, 5, 7, 9, 25, 27, 49, 81, ... 622ac27a0ecSDave Kleikamp * For a non-sparse filesystem it will be every group: 1, 2, 3, 4, ... 623ac27a0ecSDave Kleikamp */ 624617ba13bSMingming Cao static unsigned ext4_list_backups(struct super_block *sb, unsigned *three, 625ac27a0ecSDave Kleikamp unsigned *five, unsigned *seven) 626ac27a0ecSDave Kleikamp { 627ac27a0ecSDave Kleikamp unsigned *min = three; 628ac27a0ecSDave Kleikamp int mult = 3; 629ac27a0ecSDave Kleikamp unsigned ret; 630ac27a0ecSDave Kleikamp 631617ba13bSMingming Cao if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, 632617ba13bSMingming Cao EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER)) { 633ac27a0ecSDave Kleikamp ret = *min; 634ac27a0ecSDave Kleikamp *min += 1; 635ac27a0ecSDave Kleikamp return ret; 636ac27a0ecSDave Kleikamp } 637ac27a0ecSDave Kleikamp 638ac27a0ecSDave Kleikamp if (*five < *min) { 639ac27a0ecSDave Kleikamp min = five; 640ac27a0ecSDave Kleikamp mult = 5; 641ac27a0ecSDave Kleikamp } 642ac27a0ecSDave Kleikamp if (*seven < *min) { 643ac27a0ecSDave Kleikamp min = seven; 644ac27a0ecSDave Kleikamp mult = 7; 645ac27a0ecSDave Kleikamp } 646ac27a0ecSDave Kleikamp 647ac27a0ecSDave Kleikamp ret = *min; 648ac27a0ecSDave Kleikamp *min *= mult; 649ac27a0ecSDave Kleikamp 650ac27a0ecSDave Kleikamp return ret; 651ac27a0ecSDave Kleikamp } 652ac27a0ecSDave Kleikamp 653ac27a0ecSDave Kleikamp /* 654ac27a0ecSDave Kleikamp * Check that all of the backup GDT blocks are held in the primary GDT block. 655ac27a0ecSDave Kleikamp * It is assumed that they are stored in group order. Returns the number of 656ac27a0ecSDave Kleikamp * groups in current filesystem that have BACKUPS, or -ve error code. 657ac27a0ecSDave Kleikamp */ 658ac27a0ecSDave Kleikamp static int verify_reserved_gdb(struct super_block *sb, 659*c72df9f9SYongqiang Yang ext4_group_t end, 660ac27a0ecSDave Kleikamp struct buffer_head *primary) 661ac27a0ecSDave Kleikamp { 662617ba13bSMingming Cao const ext4_fsblk_t blk = primary->b_blocknr; 663ac27a0ecSDave Kleikamp unsigned three = 1; 664ac27a0ecSDave Kleikamp unsigned five = 5; 665ac27a0ecSDave Kleikamp unsigned seven = 7; 666ac27a0ecSDave Kleikamp unsigned grp; 667ac27a0ecSDave Kleikamp __le32 *p = (__le32 *)primary->b_data; 668ac27a0ecSDave Kleikamp int gdbackups = 0; 669ac27a0ecSDave Kleikamp 670617ba13bSMingming Cao while ((grp = ext4_list_backups(sb, &three, &five, &seven)) < end) { 671bd81d8eeSLaurent Vivier if (le32_to_cpu(*p++) != 672bd81d8eeSLaurent Vivier grp * EXT4_BLOCKS_PER_GROUP(sb) + blk){ 67312062dddSEric Sandeen ext4_warning(sb, "reserved GDT %llu" 6742ae02107SMingming Cao " missing grp %d (%llu)", 675ac27a0ecSDave Kleikamp blk, grp, 676bd81d8eeSLaurent Vivier grp * 677bd81d8eeSLaurent Vivier (ext4_fsblk_t)EXT4_BLOCKS_PER_GROUP(sb) + 678bd81d8eeSLaurent Vivier blk); 679ac27a0ecSDave Kleikamp return -EINVAL; 680ac27a0ecSDave Kleikamp } 681617ba13bSMingming Cao if (++gdbackups > EXT4_ADDR_PER_BLOCK(sb)) 682ac27a0ecSDave Kleikamp return -EFBIG; 683ac27a0ecSDave Kleikamp } 684ac27a0ecSDave Kleikamp 685ac27a0ecSDave Kleikamp return gdbackups; 686ac27a0ecSDave Kleikamp } 687ac27a0ecSDave Kleikamp 688ac27a0ecSDave Kleikamp /* 689ac27a0ecSDave Kleikamp * Called when we need to bring a reserved group descriptor table block into 690ac27a0ecSDave Kleikamp * use from the resize inode. The primary copy of the new GDT block currently 691ac27a0ecSDave Kleikamp * is an indirect block (under the double indirect block in the resize inode). 692ac27a0ecSDave Kleikamp * The new backup GDT blocks will be stored as leaf blocks in this indirect 693ac27a0ecSDave Kleikamp * block, in group order. Even though we know all the block numbers we need, 694ac27a0ecSDave Kleikamp * we check to ensure that the resize inode has actually reserved these blocks. 695ac27a0ecSDave Kleikamp * 696ac27a0ecSDave Kleikamp * Don't need to update the block bitmaps because the blocks are still in use. 697ac27a0ecSDave Kleikamp * 698ac27a0ecSDave Kleikamp * We get all of the error cases out of the way, so that we are sure to not 699ac27a0ecSDave Kleikamp * fail once we start modifying the data on disk, because JBD has no rollback. 700ac27a0ecSDave Kleikamp */ 701ac27a0ecSDave Kleikamp static int add_new_gdb(handle_t *handle, struct inode *inode, 7022f919710SYongqiang Yang ext4_group_t group) 703ac27a0ecSDave Kleikamp { 704ac27a0ecSDave Kleikamp struct super_block *sb = inode->i_sb; 705617ba13bSMingming Cao struct ext4_super_block *es = EXT4_SB(sb)->s_es; 7062f919710SYongqiang Yang unsigned long gdb_num = group / EXT4_DESC_PER_BLOCK(sb); 707617ba13bSMingming Cao ext4_fsblk_t gdblock = EXT4_SB(sb)->s_sbh->b_blocknr + 1 + gdb_num; 708ac27a0ecSDave Kleikamp struct buffer_head **o_group_desc, **n_group_desc; 709ac27a0ecSDave Kleikamp struct buffer_head *dind; 7102f919710SYongqiang Yang struct buffer_head *gdb_bh; 711ac27a0ecSDave Kleikamp int gdbackups; 712617ba13bSMingming Cao struct ext4_iloc iloc; 713ac27a0ecSDave Kleikamp __le32 *data; 714ac27a0ecSDave Kleikamp int err; 715ac27a0ecSDave Kleikamp 716ac27a0ecSDave Kleikamp if (test_opt(sb, DEBUG)) 717ac27a0ecSDave Kleikamp printk(KERN_DEBUG 718617ba13bSMingming Cao "EXT4-fs: ext4_add_new_gdb: adding group block %lu\n", 719ac27a0ecSDave Kleikamp gdb_num); 720ac27a0ecSDave Kleikamp 721ac27a0ecSDave Kleikamp /* 722ac27a0ecSDave Kleikamp * If we are not using the primary superblock/GDT copy don't resize, 723ac27a0ecSDave Kleikamp * because the user tools have no way of handling this. Probably a 724ac27a0ecSDave Kleikamp * bad time to do it anyways. 725ac27a0ecSDave Kleikamp */ 726617ba13bSMingming Cao if (EXT4_SB(sb)->s_sbh->b_blocknr != 727617ba13bSMingming Cao le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) { 72812062dddSEric Sandeen ext4_warning(sb, "won't resize using backup superblock at %llu", 729617ba13bSMingming Cao (unsigned long long)EXT4_SB(sb)->s_sbh->b_blocknr); 730ac27a0ecSDave Kleikamp return -EPERM; 731ac27a0ecSDave Kleikamp } 732ac27a0ecSDave Kleikamp 7332f919710SYongqiang Yang gdb_bh = sb_bread(sb, gdblock); 7342f919710SYongqiang Yang if (!gdb_bh) 735ac27a0ecSDave Kleikamp return -EIO; 736ac27a0ecSDave Kleikamp 737*c72df9f9SYongqiang Yang gdbackups = verify_reserved_gdb(sb, group, gdb_bh); 7382f919710SYongqiang Yang if (gdbackups < 0) { 739ac27a0ecSDave Kleikamp err = gdbackups; 740ac27a0ecSDave Kleikamp goto exit_bh; 741ac27a0ecSDave Kleikamp } 742ac27a0ecSDave Kleikamp 743617ba13bSMingming Cao data = EXT4_I(inode)->i_data + EXT4_DIND_BLOCK; 744ac27a0ecSDave Kleikamp dind = sb_bread(sb, le32_to_cpu(*data)); 745ac27a0ecSDave Kleikamp if (!dind) { 746ac27a0ecSDave Kleikamp err = -EIO; 747ac27a0ecSDave Kleikamp goto exit_bh; 748ac27a0ecSDave Kleikamp } 749ac27a0ecSDave Kleikamp 750ac27a0ecSDave Kleikamp data = (__le32 *)dind->b_data; 751617ba13bSMingming Cao if (le32_to_cpu(data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)]) != gdblock) { 75212062dddSEric Sandeen ext4_warning(sb, "new group %u GDT block %llu not reserved", 7532f919710SYongqiang Yang group, gdblock); 754ac27a0ecSDave Kleikamp err = -EINVAL; 755ac27a0ecSDave Kleikamp goto exit_dind; 756ac27a0ecSDave Kleikamp } 757ac27a0ecSDave Kleikamp 758b4097142STheodore Ts'o err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); 759b4097142STheodore Ts'o if (unlikely(err)) 760ac27a0ecSDave Kleikamp goto exit_dind; 761ac27a0ecSDave Kleikamp 7622f919710SYongqiang Yang err = ext4_journal_get_write_access(handle, gdb_bh); 763b4097142STheodore Ts'o if (unlikely(err)) 764ac27a0ecSDave Kleikamp goto exit_sbh; 765ac27a0ecSDave Kleikamp 766b4097142STheodore Ts'o err = ext4_journal_get_write_access(handle, dind); 767b4097142STheodore Ts'o if (unlikely(err)) 768b4097142STheodore Ts'o ext4_std_error(sb, err); 769ac27a0ecSDave Kleikamp 770617ba13bSMingming Cao /* ext4_reserve_inode_write() gets a reference on the iloc */ 771b4097142STheodore Ts'o err = ext4_reserve_inode_write(handle, inode, &iloc); 772b4097142STheodore Ts'o if (unlikely(err)) 773ac27a0ecSDave Kleikamp goto exit_dindj; 774ac27a0ecSDave Kleikamp 775f18a5f21STheodore Ts'o n_group_desc = ext4_kvmalloc((gdb_num + 1) * 776f18a5f21STheodore Ts'o sizeof(struct buffer_head *), 777216553c4SJosef Bacik GFP_NOFS); 778ac27a0ecSDave Kleikamp if (!n_group_desc) { 779ac27a0ecSDave Kleikamp err = -ENOMEM; 780f18a5f21STheodore Ts'o ext4_warning(sb, "not enough memory for %lu groups", 781f18a5f21STheodore Ts'o gdb_num + 1); 782ac27a0ecSDave Kleikamp goto exit_inode; 783ac27a0ecSDave Kleikamp } 784ac27a0ecSDave Kleikamp 785ac27a0ecSDave Kleikamp /* 786ac27a0ecSDave Kleikamp * Finally, we have all of the possible failures behind us... 787ac27a0ecSDave Kleikamp * 788ac27a0ecSDave Kleikamp * Remove new GDT block from inode double-indirect block and clear out 789ac27a0ecSDave Kleikamp * the new GDT block for use (which also "frees" the backup GDT blocks 790ac27a0ecSDave Kleikamp * from the reserved inode). We don't need to change the bitmaps for 791ac27a0ecSDave Kleikamp * these blocks, because they are marked as in-use from being in the 792ac27a0ecSDave Kleikamp * reserved inode, and will become GDT blocks (primary and backup). 793ac27a0ecSDave Kleikamp */ 794617ba13bSMingming Cao data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)] = 0; 795b4097142STheodore Ts'o err = ext4_handle_dirty_metadata(handle, NULL, dind); 796b4097142STheodore Ts'o if (unlikely(err)) { 797b4097142STheodore Ts'o ext4_std_error(sb, err); 798b4097142STheodore Ts'o goto exit_inode; 799b4097142STheodore Ts'o } 800ac27a0ecSDave Kleikamp inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9; 801617ba13bSMingming Cao ext4_mark_iloc_dirty(handle, inode, &iloc); 8022f919710SYongqiang Yang memset(gdb_bh->b_data, 0, sb->s_blocksize); 8032f919710SYongqiang Yang err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh); 804b4097142STheodore Ts'o if (unlikely(err)) { 805b4097142STheodore Ts'o ext4_std_error(sb, err); 806b4097142STheodore Ts'o goto exit_inode; 807b4097142STheodore Ts'o } 808b4097142STheodore Ts'o brelse(dind); 809ac27a0ecSDave Kleikamp 810617ba13bSMingming Cao o_group_desc = EXT4_SB(sb)->s_group_desc; 811ac27a0ecSDave Kleikamp memcpy(n_group_desc, o_group_desc, 812617ba13bSMingming Cao EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *)); 8132f919710SYongqiang Yang n_group_desc[gdb_num] = gdb_bh; 814617ba13bSMingming Cao EXT4_SB(sb)->s_group_desc = n_group_desc; 815617ba13bSMingming Cao EXT4_SB(sb)->s_gdb_count++; 816f18a5f21STheodore Ts'o ext4_kvfree(o_group_desc); 817ac27a0ecSDave Kleikamp 818e8546d06SMarcin Slusarz le16_add_cpu(&es->s_reserved_gdt_blocks, -1); 819b4097142STheodore Ts'o err = ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); 820b4097142STheodore Ts'o if (err) 821b4097142STheodore Ts'o ext4_std_error(sb, err); 822ac27a0ecSDave Kleikamp 823b4097142STheodore Ts'o return err; 824ac27a0ecSDave Kleikamp 825ac27a0ecSDave Kleikamp exit_inode: 826f18a5f21STheodore Ts'o ext4_kvfree(n_group_desc); 827537a0310SAmir Goldstein /* ext4_handle_release_buffer(handle, iloc.bh); */ 828ac27a0ecSDave Kleikamp brelse(iloc.bh); 829ac27a0ecSDave Kleikamp exit_dindj: 830537a0310SAmir Goldstein /* ext4_handle_release_buffer(handle, dind); */ 831ac27a0ecSDave Kleikamp exit_sbh: 832537a0310SAmir Goldstein /* ext4_handle_release_buffer(handle, EXT4_SB(sb)->s_sbh); */ 833ac27a0ecSDave Kleikamp exit_dind: 834ac27a0ecSDave Kleikamp brelse(dind); 835ac27a0ecSDave Kleikamp exit_bh: 8362f919710SYongqiang Yang brelse(gdb_bh); 837ac27a0ecSDave Kleikamp 838617ba13bSMingming Cao ext4_debug("leaving with error %d\n", err); 839ac27a0ecSDave Kleikamp return err; 840ac27a0ecSDave Kleikamp } 841ac27a0ecSDave Kleikamp 842ac27a0ecSDave Kleikamp /* 843ac27a0ecSDave Kleikamp * Called when we are adding a new group which has a backup copy of each of 844ac27a0ecSDave Kleikamp * the GDT blocks (i.e. sparse group) and there are reserved GDT blocks. 845ac27a0ecSDave Kleikamp * We need to add these reserved backup GDT blocks to the resize inode, so 846ac27a0ecSDave Kleikamp * that they are kept for future resizing and not allocated to files. 847ac27a0ecSDave Kleikamp * 848ac27a0ecSDave Kleikamp * Each reserved backup GDT block will go into a different indirect block. 849ac27a0ecSDave Kleikamp * The indirect blocks are actually the primary reserved GDT blocks, 850ac27a0ecSDave Kleikamp * so we know in advance what their block numbers are. We only get the 851ac27a0ecSDave Kleikamp * double-indirect block to verify it is pointing to the primary reserved 852ac27a0ecSDave Kleikamp * GDT blocks so we don't overwrite a data block by accident. The reserved 853ac27a0ecSDave Kleikamp * backup GDT blocks are stored in their reserved primary GDT block. 854ac27a0ecSDave Kleikamp */ 855ac27a0ecSDave Kleikamp static int reserve_backup_gdb(handle_t *handle, struct inode *inode, 856668f4dc5SYongqiang Yang ext4_group_t group) 857ac27a0ecSDave Kleikamp { 858ac27a0ecSDave Kleikamp struct super_block *sb = inode->i_sb; 859617ba13bSMingming Cao int reserved_gdb =le16_to_cpu(EXT4_SB(sb)->s_es->s_reserved_gdt_blocks); 860ac27a0ecSDave Kleikamp struct buffer_head **primary; 861ac27a0ecSDave Kleikamp struct buffer_head *dind; 862617ba13bSMingming Cao struct ext4_iloc iloc; 863617ba13bSMingming Cao ext4_fsblk_t blk; 864ac27a0ecSDave Kleikamp __le32 *data, *end; 865ac27a0ecSDave Kleikamp int gdbackups = 0; 866ac27a0ecSDave Kleikamp int res, i; 867ac27a0ecSDave Kleikamp int err; 868ac27a0ecSDave Kleikamp 869216553c4SJosef Bacik primary = kmalloc(reserved_gdb * sizeof(*primary), GFP_NOFS); 870ac27a0ecSDave Kleikamp if (!primary) 871ac27a0ecSDave Kleikamp return -ENOMEM; 872ac27a0ecSDave Kleikamp 873617ba13bSMingming Cao data = EXT4_I(inode)->i_data + EXT4_DIND_BLOCK; 874ac27a0ecSDave Kleikamp dind = sb_bread(sb, le32_to_cpu(*data)); 875ac27a0ecSDave Kleikamp if (!dind) { 876ac27a0ecSDave Kleikamp err = -EIO; 877ac27a0ecSDave Kleikamp goto exit_free; 878ac27a0ecSDave Kleikamp } 879ac27a0ecSDave Kleikamp 880617ba13bSMingming Cao blk = EXT4_SB(sb)->s_sbh->b_blocknr + 1 + EXT4_SB(sb)->s_gdb_count; 88194460093SJosef Bacik data = (__le32 *)dind->b_data + (EXT4_SB(sb)->s_gdb_count % 88294460093SJosef Bacik EXT4_ADDR_PER_BLOCK(sb)); 883617ba13bSMingming Cao end = (__le32 *)dind->b_data + EXT4_ADDR_PER_BLOCK(sb); 884ac27a0ecSDave Kleikamp 885ac27a0ecSDave Kleikamp /* Get each reserved primary GDT block and verify it holds backups */ 886ac27a0ecSDave Kleikamp for (res = 0; res < reserved_gdb; res++, blk++) { 887ac27a0ecSDave Kleikamp if (le32_to_cpu(*data) != blk) { 88812062dddSEric Sandeen ext4_warning(sb, "reserved block %llu" 889ac27a0ecSDave Kleikamp " not at offset %ld", 890ac27a0ecSDave Kleikamp blk, 891ac27a0ecSDave Kleikamp (long)(data - (__le32 *)dind->b_data)); 892ac27a0ecSDave Kleikamp err = -EINVAL; 893ac27a0ecSDave Kleikamp goto exit_bh; 894ac27a0ecSDave Kleikamp } 895ac27a0ecSDave Kleikamp primary[res] = sb_bread(sb, blk); 896ac27a0ecSDave Kleikamp if (!primary[res]) { 897ac27a0ecSDave Kleikamp err = -EIO; 898ac27a0ecSDave Kleikamp goto exit_bh; 899ac27a0ecSDave Kleikamp } 900*c72df9f9SYongqiang Yang gdbackups = verify_reserved_gdb(sb, group, primary[res]); 901*c72df9f9SYongqiang Yang if (gdbackups < 0) { 902ac27a0ecSDave Kleikamp brelse(primary[res]); 903ac27a0ecSDave Kleikamp err = gdbackups; 904ac27a0ecSDave Kleikamp goto exit_bh; 905ac27a0ecSDave Kleikamp } 906ac27a0ecSDave Kleikamp if (++data >= end) 907ac27a0ecSDave Kleikamp data = (__le32 *)dind->b_data; 908ac27a0ecSDave Kleikamp } 909ac27a0ecSDave Kleikamp 910ac27a0ecSDave Kleikamp for (i = 0; i < reserved_gdb; i++) { 911617ba13bSMingming Cao if ((err = ext4_journal_get_write_access(handle, primary[i]))) { 912ac27a0ecSDave Kleikamp /* 913ac27a0ecSDave Kleikamp int j; 914ac27a0ecSDave Kleikamp for (j = 0; j < i; j++) 915537a0310SAmir Goldstein ext4_handle_release_buffer(handle, primary[j]); 916ac27a0ecSDave Kleikamp */ 917ac27a0ecSDave Kleikamp goto exit_bh; 918ac27a0ecSDave Kleikamp } 919ac27a0ecSDave Kleikamp } 920ac27a0ecSDave Kleikamp 921617ba13bSMingming Cao if ((err = ext4_reserve_inode_write(handle, inode, &iloc))) 922ac27a0ecSDave Kleikamp goto exit_bh; 923ac27a0ecSDave Kleikamp 924ac27a0ecSDave Kleikamp /* 925ac27a0ecSDave Kleikamp * Finally we can add each of the reserved backup GDT blocks from 926ac27a0ecSDave Kleikamp * the new group to its reserved primary GDT block. 927ac27a0ecSDave Kleikamp */ 928668f4dc5SYongqiang Yang blk = group * EXT4_BLOCKS_PER_GROUP(sb); 929ac27a0ecSDave Kleikamp for (i = 0; i < reserved_gdb; i++) { 930ac27a0ecSDave Kleikamp int err2; 931ac27a0ecSDave Kleikamp data = (__le32 *)primary[i]->b_data; 932ac27a0ecSDave Kleikamp /* printk("reserving backup %lu[%u] = %lu\n", 933ac27a0ecSDave Kleikamp primary[i]->b_blocknr, gdbackups, 934ac27a0ecSDave Kleikamp blk + primary[i]->b_blocknr); */ 935ac27a0ecSDave Kleikamp data[gdbackups] = cpu_to_le32(blk + primary[i]->b_blocknr); 9360390131bSFrank Mayhar err2 = ext4_handle_dirty_metadata(handle, NULL, primary[i]); 937ac27a0ecSDave Kleikamp if (!err) 938ac27a0ecSDave Kleikamp err = err2; 939ac27a0ecSDave Kleikamp } 940ac27a0ecSDave Kleikamp inode->i_blocks += reserved_gdb * sb->s_blocksize >> 9; 941617ba13bSMingming Cao ext4_mark_iloc_dirty(handle, inode, &iloc); 942ac27a0ecSDave Kleikamp 943ac27a0ecSDave Kleikamp exit_bh: 944ac27a0ecSDave Kleikamp while (--res >= 0) 945ac27a0ecSDave Kleikamp brelse(primary[res]); 946ac27a0ecSDave Kleikamp brelse(dind); 947ac27a0ecSDave Kleikamp 948ac27a0ecSDave Kleikamp exit_free: 949ac27a0ecSDave Kleikamp kfree(primary); 950ac27a0ecSDave Kleikamp 951ac27a0ecSDave Kleikamp return err; 952ac27a0ecSDave Kleikamp } 953ac27a0ecSDave Kleikamp 954ac27a0ecSDave Kleikamp /* 955617ba13bSMingming Cao * Update the backup copies of the ext4 metadata. These don't need to be part 956ac27a0ecSDave Kleikamp * of the main resize transaction, because e2fsck will re-write them if there 957ac27a0ecSDave Kleikamp * is a problem (basically only OOM will cause a problem). However, we 958ac27a0ecSDave Kleikamp * _should_ update the backups if possible, in case the primary gets trashed 959ac27a0ecSDave Kleikamp * for some reason and we need to run e2fsck from a backup superblock. The 960ac27a0ecSDave Kleikamp * important part is that the new block and inode counts are in the backup 961ac27a0ecSDave Kleikamp * superblocks, and the location of the new group metadata in the GDT backups. 962ac27a0ecSDave Kleikamp * 96332ed5058STheodore Ts'o * We do not need take the s_resize_lock for this, because these 96432ed5058STheodore Ts'o * blocks are not otherwise touched by the filesystem code when it is 96532ed5058STheodore Ts'o * mounted. We don't need to worry about last changing from 96632ed5058STheodore Ts'o * sbi->s_groups_count, because the worst that can happen is that we 96732ed5058STheodore Ts'o * do not copy the full number of backups at this time. The resize 96832ed5058STheodore Ts'o * which changed s_groups_count will backup again. 969ac27a0ecSDave Kleikamp */ 970ac27a0ecSDave Kleikamp static void update_backups(struct super_block *sb, 971ac27a0ecSDave Kleikamp int blk_off, char *data, int size) 972ac27a0ecSDave Kleikamp { 973617ba13bSMingming Cao struct ext4_sb_info *sbi = EXT4_SB(sb); 974fd2d4291SAvantika Mathur const ext4_group_t last = sbi->s_groups_count; 975617ba13bSMingming Cao const int bpg = EXT4_BLOCKS_PER_GROUP(sb); 976ac27a0ecSDave Kleikamp unsigned three = 1; 977ac27a0ecSDave Kleikamp unsigned five = 5; 978ac27a0ecSDave Kleikamp unsigned seven = 7; 979fd2d4291SAvantika Mathur ext4_group_t group; 980ac27a0ecSDave Kleikamp int rest = sb->s_blocksize - size; 981ac27a0ecSDave Kleikamp handle_t *handle; 982ac27a0ecSDave Kleikamp int err = 0, err2; 983ac27a0ecSDave Kleikamp 984617ba13bSMingming Cao handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA); 985ac27a0ecSDave Kleikamp if (IS_ERR(handle)) { 986ac27a0ecSDave Kleikamp group = 1; 987ac27a0ecSDave Kleikamp err = PTR_ERR(handle); 988ac27a0ecSDave Kleikamp goto exit_err; 989ac27a0ecSDave Kleikamp } 990ac27a0ecSDave Kleikamp 991617ba13bSMingming Cao while ((group = ext4_list_backups(sb, &three, &five, &seven)) < last) { 992ac27a0ecSDave Kleikamp struct buffer_head *bh; 993ac27a0ecSDave Kleikamp 994ac27a0ecSDave Kleikamp /* Out of journal space, and can't get more - abort - so sad */ 9950390131bSFrank Mayhar if (ext4_handle_valid(handle) && 9960390131bSFrank Mayhar handle->h_buffer_credits == 0 && 997617ba13bSMingming Cao ext4_journal_extend(handle, EXT4_MAX_TRANS_DATA) && 998617ba13bSMingming Cao (err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA))) 999ac27a0ecSDave Kleikamp break; 1000ac27a0ecSDave Kleikamp 1001ac27a0ecSDave Kleikamp bh = sb_getblk(sb, group * bpg + blk_off); 1002ac27a0ecSDave Kleikamp if (!bh) { 1003ac27a0ecSDave Kleikamp err = -EIO; 1004ac27a0ecSDave Kleikamp break; 1005ac27a0ecSDave Kleikamp } 1006617ba13bSMingming Cao ext4_debug("update metadata backup %#04lx\n", 1007ac27a0ecSDave Kleikamp (unsigned long)bh->b_blocknr); 1008617ba13bSMingming Cao if ((err = ext4_journal_get_write_access(handle, bh))) 1009ac27a0ecSDave Kleikamp break; 1010ac27a0ecSDave Kleikamp lock_buffer(bh); 1011ac27a0ecSDave Kleikamp memcpy(bh->b_data, data, size); 1012ac27a0ecSDave Kleikamp if (rest) 1013ac27a0ecSDave Kleikamp memset(bh->b_data + size, 0, rest); 1014ac27a0ecSDave Kleikamp set_buffer_uptodate(bh); 1015ac27a0ecSDave Kleikamp unlock_buffer(bh); 1016b4097142STheodore Ts'o err = ext4_handle_dirty_metadata(handle, NULL, bh); 1017b4097142STheodore Ts'o if (unlikely(err)) 1018b4097142STheodore Ts'o ext4_std_error(sb, err); 1019ac27a0ecSDave Kleikamp brelse(bh); 1020ac27a0ecSDave Kleikamp } 1021617ba13bSMingming Cao if ((err2 = ext4_journal_stop(handle)) && !err) 1022ac27a0ecSDave Kleikamp err = err2; 1023ac27a0ecSDave Kleikamp 1024ac27a0ecSDave Kleikamp /* 1025ac27a0ecSDave Kleikamp * Ugh! Need to have e2fsck write the backup copies. It is too 1026ac27a0ecSDave Kleikamp * late to revert the resize, we shouldn't fail just because of 1027ac27a0ecSDave Kleikamp * the backup copies (they are only needed in case of corruption). 1028ac27a0ecSDave Kleikamp * 1029ac27a0ecSDave Kleikamp * However, if we got here we have a journal problem too, so we 1030ac27a0ecSDave Kleikamp * can't really start a transaction to mark the superblock. 1031ac27a0ecSDave Kleikamp * Chicken out and just set the flag on the hope it will be written 1032ac27a0ecSDave Kleikamp * to disk, and if not - we will simply wait until next fsck. 1033ac27a0ecSDave Kleikamp */ 1034ac27a0ecSDave Kleikamp exit_err: 1035ac27a0ecSDave Kleikamp if (err) { 103612062dddSEric Sandeen ext4_warning(sb, "can't update backup for group %u (err %d), " 1037ac27a0ecSDave Kleikamp "forcing fsck on next reboot", group, err); 1038617ba13bSMingming Cao sbi->s_mount_state &= ~EXT4_VALID_FS; 1039617ba13bSMingming Cao sbi->s_es->s_state &= cpu_to_le16(~EXT4_VALID_FS); 1040ac27a0ecSDave Kleikamp mark_buffer_dirty(sbi->s_sbh); 1041ac27a0ecSDave Kleikamp } 1042ac27a0ecSDave Kleikamp } 1043ac27a0ecSDave Kleikamp 1044bb08c1e7SYongqiang Yang /* 1045bb08c1e7SYongqiang Yang * ext4_add_new_descs() adds @count group descriptor of groups 1046bb08c1e7SYongqiang Yang * starting at @group 1047bb08c1e7SYongqiang Yang * 1048bb08c1e7SYongqiang Yang * @handle: journal handle 1049bb08c1e7SYongqiang Yang * @sb: super block 1050bb08c1e7SYongqiang Yang * @group: the group no. of the first group desc to be added 1051bb08c1e7SYongqiang Yang * @resize_inode: the resize inode 1052bb08c1e7SYongqiang Yang * @count: number of group descriptors to be added 1053bb08c1e7SYongqiang Yang */ 1054bb08c1e7SYongqiang Yang static int ext4_add_new_descs(handle_t *handle, struct super_block *sb, 1055bb08c1e7SYongqiang Yang ext4_group_t group, struct inode *resize_inode, 1056bb08c1e7SYongqiang Yang ext4_group_t count) 1057bb08c1e7SYongqiang Yang { 1058bb08c1e7SYongqiang Yang struct ext4_sb_info *sbi = EXT4_SB(sb); 1059bb08c1e7SYongqiang Yang struct ext4_super_block *es = sbi->s_es; 1060bb08c1e7SYongqiang Yang struct buffer_head *gdb_bh; 1061bb08c1e7SYongqiang Yang int i, gdb_off, gdb_num, err = 0; 1062bb08c1e7SYongqiang Yang 1063bb08c1e7SYongqiang Yang for (i = 0; i < count; i++, group++) { 1064bb08c1e7SYongqiang Yang int reserved_gdb = ext4_bg_has_super(sb, group) ? 1065bb08c1e7SYongqiang Yang le16_to_cpu(es->s_reserved_gdt_blocks) : 0; 1066bb08c1e7SYongqiang Yang 1067bb08c1e7SYongqiang Yang gdb_off = group % EXT4_DESC_PER_BLOCK(sb); 1068bb08c1e7SYongqiang Yang gdb_num = group / EXT4_DESC_PER_BLOCK(sb); 1069bb08c1e7SYongqiang Yang 1070bb08c1e7SYongqiang Yang /* 1071bb08c1e7SYongqiang Yang * We will only either add reserved group blocks to a backup group 1072bb08c1e7SYongqiang Yang * or remove reserved blocks for the first group in a new group block. 1073bb08c1e7SYongqiang Yang * Doing both would be mean more complex code, and sane people don't 1074bb08c1e7SYongqiang Yang * use non-sparse filesystems anymore. This is already checked above. 1075bb08c1e7SYongqiang Yang */ 1076bb08c1e7SYongqiang Yang if (gdb_off) { 1077bb08c1e7SYongqiang Yang gdb_bh = sbi->s_group_desc[gdb_num]; 1078bb08c1e7SYongqiang Yang err = ext4_journal_get_write_access(handle, gdb_bh); 1079bb08c1e7SYongqiang Yang 1080bb08c1e7SYongqiang Yang if (!err && reserved_gdb && ext4_bg_num_gdb(sb, group)) 1081bb08c1e7SYongqiang Yang err = reserve_backup_gdb(handle, resize_inode, group); 1082bb08c1e7SYongqiang Yang } else 1083bb08c1e7SYongqiang Yang err = add_new_gdb(handle, resize_inode, group); 1084bb08c1e7SYongqiang Yang if (err) 1085bb08c1e7SYongqiang Yang break; 1086bb08c1e7SYongqiang Yang } 1087bb08c1e7SYongqiang Yang return err; 1088bb08c1e7SYongqiang Yang } 1089bb08c1e7SYongqiang Yang 1090083f5b24SYongqiang Yang /* 1091083f5b24SYongqiang Yang * ext4_setup_new_descs() will set up the group descriptor descriptors of a flex bg 1092083f5b24SYongqiang Yang */ 1093083f5b24SYongqiang Yang static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb, 1094083f5b24SYongqiang Yang struct ext4_new_flex_group_data *flex_gd) 1095083f5b24SYongqiang Yang { 1096083f5b24SYongqiang Yang struct ext4_new_group_data *group_data = flex_gd->groups; 1097083f5b24SYongqiang Yang struct ext4_group_desc *gdp; 1098083f5b24SYongqiang Yang struct ext4_sb_info *sbi = EXT4_SB(sb); 1099083f5b24SYongqiang Yang struct buffer_head *gdb_bh; 1100083f5b24SYongqiang Yang ext4_group_t group; 1101083f5b24SYongqiang Yang __u16 *bg_flags = flex_gd->bg_flags; 1102083f5b24SYongqiang Yang int i, gdb_off, gdb_num, err = 0; 1103083f5b24SYongqiang Yang 1104083f5b24SYongqiang Yang 1105083f5b24SYongqiang Yang for (i = 0; i < flex_gd->count; i++, group_data++, bg_flags++) { 1106083f5b24SYongqiang Yang group = group_data->group; 1107083f5b24SYongqiang Yang 1108083f5b24SYongqiang Yang gdb_off = group % EXT4_DESC_PER_BLOCK(sb); 1109083f5b24SYongqiang Yang gdb_num = group / EXT4_DESC_PER_BLOCK(sb); 1110083f5b24SYongqiang Yang 1111083f5b24SYongqiang Yang /* 1112083f5b24SYongqiang Yang * get_write_access() has been called on gdb_bh by ext4_add_new_desc(). 1113083f5b24SYongqiang Yang */ 1114083f5b24SYongqiang Yang gdb_bh = sbi->s_group_desc[gdb_num]; 1115083f5b24SYongqiang Yang /* Update group descriptor block for new group */ 1116083f5b24SYongqiang Yang gdp = (struct ext4_group_desc *)((char *)gdb_bh->b_data + 1117083f5b24SYongqiang Yang gdb_off * EXT4_DESC_SIZE(sb)); 1118083f5b24SYongqiang Yang 1119083f5b24SYongqiang Yang memset(gdp, 0, EXT4_DESC_SIZE(sb)); 1120083f5b24SYongqiang Yang ext4_block_bitmap_set(sb, gdp, group_data->block_bitmap); 1121083f5b24SYongqiang Yang ext4_inode_bitmap_set(sb, gdp, group_data->inode_bitmap); 1122083f5b24SYongqiang Yang ext4_inode_table_set(sb, gdp, group_data->inode_table); 1123083f5b24SYongqiang Yang ext4_free_group_clusters_set(sb, gdp, 1124083f5b24SYongqiang Yang EXT4_B2C(sbi, group_data->free_blocks_count)); 1125083f5b24SYongqiang Yang ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb)); 1126083f5b24SYongqiang Yang gdp->bg_flags = cpu_to_le16(*bg_flags); 1127083f5b24SYongqiang Yang gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); 1128083f5b24SYongqiang Yang 1129083f5b24SYongqiang Yang err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh); 1130083f5b24SYongqiang Yang if (unlikely(err)) { 1131083f5b24SYongqiang Yang ext4_std_error(sb, err); 1132083f5b24SYongqiang Yang break; 1133083f5b24SYongqiang Yang } 1134083f5b24SYongqiang Yang 1135083f5b24SYongqiang Yang /* 1136083f5b24SYongqiang Yang * We can allocate memory for mb_alloc based on the new group 1137083f5b24SYongqiang Yang * descriptor 1138083f5b24SYongqiang Yang */ 1139083f5b24SYongqiang Yang err = ext4_mb_add_groupinfo(sb, group, gdp); 1140083f5b24SYongqiang Yang if (err) 1141083f5b24SYongqiang Yang break; 1142083f5b24SYongqiang Yang } 1143083f5b24SYongqiang Yang return err; 1144083f5b24SYongqiang Yang } 1145083f5b24SYongqiang Yang 11462e10e2f2SYongqiang Yang /* 11472e10e2f2SYongqiang Yang * ext4_update_super() updates the super block so that the newly added 11482e10e2f2SYongqiang Yang * groups can be seen by the filesystem. 11492e10e2f2SYongqiang Yang * 11502e10e2f2SYongqiang Yang * @sb: super block 11512e10e2f2SYongqiang Yang * @flex_gd: new added groups 11522e10e2f2SYongqiang Yang */ 11532e10e2f2SYongqiang Yang static void ext4_update_super(struct super_block *sb, 11542e10e2f2SYongqiang Yang struct ext4_new_flex_group_data *flex_gd) 11552e10e2f2SYongqiang Yang { 11562e10e2f2SYongqiang Yang ext4_fsblk_t blocks_count = 0; 11572e10e2f2SYongqiang Yang ext4_fsblk_t free_blocks = 0; 11582e10e2f2SYongqiang Yang ext4_fsblk_t reserved_blocks = 0; 11592e10e2f2SYongqiang Yang struct ext4_new_group_data *group_data = flex_gd->groups; 11602e10e2f2SYongqiang Yang struct ext4_sb_info *sbi = EXT4_SB(sb); 11612e10e2f2SYongqiang Yang struct ext4_super_block *es = sbi->s_es; 11622e10e2f2SYongqiang Yang int i; 11632e10e2f2SYongqiang Yang 11642e10e2f2SYongqiang Yang BUG_ON(flex_gd->count == 0 || group_data == NULL); 11652e10e2f2SYongqiang Yang /* 11662e10e2f2SYongqiang Yang * Make the new blocks and inodes valid next. We do this before 11672e10e2f2SYongqiang Yang * increasing the group count so that once the group is enabled, 11682e10e2f2SYongqiang Yang * all of its blocks and inodes are already valid. 11692e10e2f2SYongqiang Yang * 11702e10e2f2SYongqiang Yang * We always allocate group-by-group, then block-by-block or 11712e10e2f2SYongqiang Yang * inode-by-inode within a group, so enabling these 11722e10e2f2SYongqiang Yang * blocks/inodes before the group is live won't actually let us 11732e10e2f2SYongqiang Yang * allocate the new space yet. 11742e10e2f2SYongqiang Yang */ 11752e10e2f2SYongqiang Yang for (i = 0; i < flex_gd->count; i++) { 11762e10e2f2SYongqiang Yang blocks_count += group_data[i].blocks_count; 11772e10e2f2SYongqiang Yang free_blocks += group_data[i].free_blocks_count; 11782e10e2f2SYongqiang Yang } 11792e10e2f2SYongqiang Yang 11802e10e2f2SYongqiang Yang reserved_blocks = ext4_r_blocks_count(es) * 100; 11812e10e2f2SYongqiang Yang do_div(reserved_blocks, ext4_blocks_count(es)); 11822e10e2f2SYongqiang Yang reserved_blocks *= blocks_count; 11832e10e2f2SYongqiang Yang do_div(reserved_blocks, 100); 11842e10e2f2SYongqiang Yang 11852e10e2f2SYongqiang Yang ext4_blocks_count_set(es, ext4_blocks_count(es) + blocks_count); 11862e10e2f2SYongqiang Yang le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb) * 11872e10e2f2SYongqiang Yang flex_gd->count); 11882e10e2f2SYongqiang Yang 11892e10e2f2SYongqiang Yang /* 11902e10e2f2SYongqiang Yang * We need to protect s_groups_count against other CPUs seeing 11912e10e2f2SYongqiang Yang * inconsistent state in the superblock. 11922e10e2f2SYongqiang Yang * 11932e10e2f2SYongqiang Yang * The precise rules we use are: 11942e10e2f2SYongqiang Yang * 11952e10e2f2SYongqiang Yang * * Writers must perform a smp_wmb() after updating all 11962e10e2f2SYongqiang Yang * dependent data and before modifying the groups count 11972e10e2f2SYongqiang Yang * 11982e10e2f2SYongqiang Yang * * Readers must perform an smp_rmb() after reading the groups 11992e10e2f2SYongqiang Yang * count and before reading any dependent data. 12002e10e2f2SYongqiang Yang * 12012e10e2f2SYongqiang Yang * NB. These rules can be relaxed when checking the group count 12022e10e2f2SYongqiang Yang * while freeing data, as we can only allocate from a block 12032e10e2f2SYongqiang Yang * group after serialising against the group count, and we can 12042e10e2f2SYongqiang Yang * only then free after serialising in turn against that 12052e10e2f2SYongqiang Yang * allocation. 12062e10e2f2SYongqiang Yang */ 12072e10e2f2SYongqiang Yang smp_wmb(); 12082e10e2f2SYongqiang Yang 12092e10e2f2SYongqiang Yang /* Update the global fs size fields */ 12102e10e2f2SYongqiang Yang sbi->s_groups_count += flex_gd->count; 12112e10e2f2SYongqiang Yang 12122e10e2f2SYongqiang Yang /* Update the reserved block counts only once the new group is 12132e10e2f2SYongqiang Yang * active. */ 12142e10e2f2SYongqiang Yang ext4_r_blocks_count_set(es, ext4_r_blocks_count(es) + 12152e10e2f2SYongqiang Yang reserved_blocks); 12162e10e2f2SYongqiang Yang 12172e10e2f2SYongqiang Yang /* Update the free space counts */ 12182e10e2f2SYongqiang Yang percpu_counter_add(&sbi->s_freeclusters_counter, 12192e10e2f2SYongqiang Yang EXT4_B2C(sbi, free_blocks)); 12202e10e2f2SYongqiang Yang percpu_counter_add(&sbi->s_freeinodes_counter, 12212e10e2f2SYongqiang Yang EXT4_INODES_PER_GROUP(sb) * flex_gd->count); 12222e10e2f2SYongqiang Yang 12232e10e2f2SYongqiang Yang if (EXT4_HAS_INCOMPAT_FEATURE(sb, 12242e10e2f2SYongqiang Yang EXT4_FEATURE_INCOMPAT_FLEX_BG) && 12252e10e2f2SYongqiang Yang sbi->s_log_groups_per_flex) { 12262e10e2f2SYongqiang Yang ext4_group_t flex_group; 12272e10e2f2SYongqiang Yang flex_group = ext4_flex_group(sbi, group_data[0].group); 12282e10e2f2SYongqiang Yang atomic_add(EXT4_B2C(sbi, free_blocks), 12292e10e2f2SYongqiang Yang &sbi->s_flex_groups[flex_group].free_clusters); 12302e10e2f2SYongqiang Yang atomic_add(EXT4_INODES_PER_GROUP(sb) * flex_gd->count, 12312e10e2f2SYongqiang Yang &sbi->s_flex_groups[flex_group].free_inodes); 12322e10e2f2SYongqiang Yang } 12332e10e2f2SYongqiang Yang 12342e10e2f2SYongqiang Yang if (test_opt(sb, DEBUG)) 12352e10e2f2SYongqiang Yang printk(KERN_DEBUG "EXT4-fs: added group %u:" 12362e10e2f2SYongqiang Yang "%llu blocks(%llu free %llu reserved)\n", flex_gd->count, 12372e10e2f2SYongqiang Yang blocks_count, free_blocks, reserved_blocks); 12382e10e2f2SYongqiang Yang } 12392e10e2f2SYongqiang Yang 1240ac27a0ecSDave Kleikamp /* Add group descriptor data to an existing or new group descriptor block. 1241ac27a0ecSDave Kleikamp * Ensure we handle all possible error conditions _before_ we start modifying 1242ac27a0ecSDave Kleikamp * the filesystem, because we cannot abort the transaction and not have it 1243ac27a0ecSDave Kleikamp * write the data to disk. 1244ac27a0ecSDave Kleikamp * 1245ac27a0ecSDave Kleikamp * If we are on a GDT block boundary, we need to get the reserved GDT block. 1246ac27a0ecSDave Kleikamp * Otherwise, we may need to add backup GDT blocks for a sparse group. 1247ac27a0ecSDave Kleikamp * 1248ac27a0ecSDave Kleikamp * We only need to hold the superblock lock while we are actually adding 1249ac27a0ecSDave Kleikamp * in the new group's counts to the superblock. Prior to that we have 1250ac27a0ecSDave Kleikamp * not really "added" the group at all. We re-check that we are still 1251ac27a0ecSDave Kleikamp * adding in the last group in case things have changed since verifying. 1252ac27a0ecSDave Kleikamp */ 1253617ba13bSMingming Cao int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) 1254ac27a0ecSDave Kleikamp { 1255617ba13bSMingming Cao struct ext4_sb_info *sbi = EXT4_SB(sb); 1256617ba13bSMingming Cao struct ext4_super_block *es = sbi->s_es; 1257617ba13bSMingming Cao int reserved_gdb = ext4_bg_has_super(sb, input->group) ? 1258ac27a0ecSDave Kleikamp le16_to_cpu(es->s_reserved_gdt_blocks) : 0; 1259ac27a0ecSDave Kleikamp struct buffer_head *primary = NULL; 1260617ba13bSMingming Cao struct ext4_group_desc *gdp; 1261ac27a0ecSDave Kleikamp struct inode *inode = NULL; 1262ac27a0ecSDave Kleikamp handle_t *handle; 1263ac27a0ecSDave Kleikamp int gdb_off, gdb_num; 1264ac27a0ecSDave Kleikamp int err, err2; 1265ac27a0ecSDave Kleikamp 1266617ba13bSMingming Cao gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb); 1267617ba13bSMingming Cao gdb_off = input->group % EXT4_DESC_PER_BLOCK(sb); 1268ac27a0ecSDave Kleikamp 1269617ba13bSMingming Cao if (gdb_off == 0 && !EXT4_HAS_RO_COMPAT_FEATURE(sb, 1270617ba13bSMingming Cao EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER)) { 127112062dddSEric Sandeen ext4_warning(sb, "Can't resize non-sparse filesystem further"); 1272ac27a0ecSDave Kleikamp return -EPERM; 1273ac27a0ecSDave Kleikamp } 1274ac27a0ecSDave Kleikamp 1275bd81d8eeSLaurent Vivier if (ext4_blocks_count(es) + input->blocks_count < 1276bd81d8eeSLaurent Vivier ext4_blocks_count(es)) { 127712062dddSEric Sandeen ext4_warning(sb, "blocks_count overflow"); 1278ac27a0ecSDave Kleikamp return -EINVAL; 1279ac27a0ecSDave Kleikamp } 1280ac27a0ecSDave Kleikamp 1281617ba13bSMingming Cao if (le32_to_cpu(es->s_inodes_count) + EXT4_INODES_PER_GROUP(sb) < 1282ac27a0ecSDave Kleikamp le32_to_cpu(es->s_inodes_count)) { 128312062dddSEric Sandeen ext4_warning(sb, "inodes_count overflow"); 1284ac27a0ecSDave Kleikamp return -EINVAL; 1285ac27a0ecSDave Kleikamp } 1286ac27a0ecSDave Kleikamp 1287ac27a0ecSDave Kleikamp if (reserved_gdb || gdb_off == 0) { 1288617ba13bSMingming Cao if (!EXT4_HAS_COMPAT_FEATURE(sb, 128937609fd5SJosef Bacik EXT4_FEATURE_COMPAT_RESIZE_INODE) 129037609fd5SJosef Bacik || !le16_to_cpu(es->s_reserved_gdt_blocks)) { 129112062dddSEric Sandeen ext4_warning(sb, 1292ac27a0ecSDave Kleikamp "No reserved GDT blocks, can't resize"); 1293ac27a0ecSDave Kleikamp return -EPERM; 1294ac27a0ecSDave Kleikamp } 12951d1fe1eeSDavid Howells inode = ext4_iget(sb, EXT4_RESIZE_INO); 12961d1fe1eeSDavid Howells if (IS_ERR(inode)) { 129712062dddSEric Sandeen ext4_warning(sb, "Error opening resize inode"); 12981d1fe1eeSDavid Howells return PTR_ERR(inode); 1299ac27a0ecSDave Kleikamp } 1300ac27a0ecSDave Kleikamp } 1301ac27a0ecSDave Kleikamp 1302920313a7SAneesh Kumar K.V 1303ac27a0ecSDave Kleikamp if ((err = verify_group_input(sb, input))) 1304ac27a0ecSDave Kleikamp goto exit_put; 1305ac27a0ecSDave Kleikamp 1306ac27a0ecSDave Kleikamp if ((err = setup_new_group_blocks(sb, input))) 1307ac27a0ecSDave Kleikamp goto exit_put; 1308ac27a0ecSDave Kleikamp 1309ac27a0ecSDave Kleikamp /* 1310ac27a0ecSDave Kleikamp * We will always be modifying at least the superblock and a GDT 1311ac27a0ecSDave Kleikamp * block. If we are adding a group past the last current GDT block, 1312ac27a0ecSDave Kleikamp * we will also modify the inode and the dindirect block. If we 1313ac27a0ecSDave Kleikamp * are adding a group with superblock/GDT backups we will also 1314ac27a0ecSDave Kleikamp * modify each of the reserved GDT dindirect blocks. 1315ac27a0ecSDave Kleikamp */ 1316617ba13bSMingming Cao handle = ext4_journal_start_sb(sb, 1317617ba13bSMingming Cao ext4_bg_has_super(sb, input->group) ? 1318ac27a0ecSDave Kleikamp 3 + reserved_gdb : 4); 1319ac27a0ecSDave Kleikamp if (IS_ERR(handle)) { 1320ac27a0ecSDave Kleikamp err = PTR_ERR(handle); 1321ac27a0ecSDave Kleikamp goto exit_put; 1322ac27a0ecSDave Kleikamp } 1323ac27a0ecSDave Kleikamp 1324617ba13bSMingming Cao if ((err = ext4_journal_get_write_access(handle, sbi->s_sbh))) 1325ac27a0ecSDave Kleikamp goto exit_journal; 1326ac27a0ecSDave Kleikamp 1327ac27a0ecSDave Kleikamp /* 1328ac27a0ecSDave Kleikamp * We will only either add reserved group blocks to a backup group 1329ac27a0ecSDave Kleikamp * or remove reserved blocks for the first group in a new group block. 1330ac27a0ecSDave Kleikamp * Doing both would be mean more complex code, and sane people don't 1331ac27a0ecSDave Kleikamp * use non-sparse filesystems anymore. This is already checked above. 1332ac27a0ecSDave Kleikamp */ 1333ac27a0ecSDave Kleikamp if (gdb_off) { 1334ac27a0ecSDave Kleikamp primary = sbi->s_group_desc[gdb_num]; 1335617ba13bSMingming Cao if ((err = ext4_journal_get_write_access(handle, primary))) 1336ac27a0ecSDave Kleikamp goto exit_journal; 1337ac27a0ecSDave Kleikamp 1338668f4dc5SYongqiang Yang if (reserved_gdb && ext4_bg_num_gdb(sb, input->group)) { 1339668f4dc5SYongqiang Yang err = reserve_backup_gdb(handle, inode, input->group); 1340668f4dc5SYongqiang Yang if (err) 1341ac27a0ecSDave Kleikamp goto exit_journal; 1342668f4dc5SYongqiang Yang } 13432f919710SYongqiang Yang } else { 13442f919710SYongqiang Yang /* 13452f919710SYongqiang Yang * Note that we can access new group descriptor block safely 13462f919710SYongqiang Yang * only if add_new_gdb() succeeds. 13472f919710SYongqiang Yang */ 13482f919710SYongqiang Yang err = add_new_gdb(handle, inode, input->group); 13492f919710SYongqiang Yang if (err) 1350ac27a0ecSDave Kleikamp goto exit_journal; 13512f919710SYongqiang Yang primary = sbi->s_group_desc[gdb_num]; 13522f919710SYongqiang Yang } 1353ac27a0ecSDave Kleikamp 1354ac27a0ecSDave Kleikamp /* 1355ac27a0ecSDave Kleikamp * OK, now we've set up the new group. Time to make it active. 1356ac27a0ecSDave Kleikamp * 1357ac27a0ecSDave Kleikamp * so we have to be safe wrt. concurrent accesses the group 1358ac27a0ecSDave Kleikamp * data. So we need to be careful to set all of the relevant 1359ac27a0ecSDave Kleikamp * group descriptor data etc. *before* we enable the group. 1360ac27a0ecSDave Kleikamp * 1361ac27a0ecSDave Kleikamp * The key field here is sbi->s_groups_count: as long as 1362ac27a0ecSDave Kleikamp * that retains its old value, nobody is going to access the new 1363ac27a0ecSDave Kleikamp * group. 1364ac27a0ecSDave Kleikamp * 1365ac27a0ecSDave Kleikamp * So first we update all the descriptor metadata for the new 1366ac27a0ecSDave Kleikamp * group; then we update the total disk blocks count; then we 1367ac27a0ecSDave Kleikamp * update the groups count to enable the group; then finally we 1368ac27a0ecSDave Kleikamp * update the free space counts so that the system can start 1369ac27a0ecSDave Kleikamp * using the new disk blocks. 1370ac27a0ecSDave Kleikamp */ 1371ac27a0ecSDave Kleikamp 1372ac27a0ecSDave Kleikamp /* Update group descriptor block for new group */ 13732856922cSFrederic Bohe gdp = (struct ext4_group_desc *)((char *)primary->b_data + 13742856922cSFrederic Bohe gdb_off * EXT4_DESC_SIZE(sb)); 1375ac27a0ecSDave Kleikamp 1376fdff73f0STheodore Ts'o memset(gdp, 0, EXT4_DESC_SIZE(sb)); 13778fadc143SAlexandre Ratchov ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */ 13788fadc143SAlexandre Ratchov ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */ 13798fadc143SAlexandre Ratchov ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */ 1380021b65bbSTheodore Ts'o ext4_free_group_clusters_set(sb, gdp, input->free_blocks_count); 1381560671a0SAneesh Kumar K.V ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb)); 1382fdff73f0STheodore Ts'o gdp->bg_flags = cpu_to_le16(EXT4_BG_INODE_ZEROED); 1383717d50e4SAndreas Dilger gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp); 1384ac27a0ecSDave Kleikamp 1385ac27a0ecSDave Kleikamp /* 13865f21b0e6SFrederic Bohe * We can allocate memory for mb_alloc based on the new group 13875f21b0e6SFrederic Bohe * descriptor 13885f21b0e6SFrederic Bohe */ 1389920313a7SAneesh Kumar K.V err = ext4_mb_add_groupinfo(sb, input->group, gdp); 139008c3a813SAneesh Kumar K.V if (err) 13915f21b0e6SFrederic Bohe goto exit_journal; 1392c2ea3fdeSTheodore Ts'o 13935f21b0e6SFrederic Bohe /* 1394ac27a0ecSDave Kleikamp * Make the new blocks and inodes valid next. We do this before 1395ac27a0ecSDave Kleikamp * increasing the group count so that once the group is enabled, 1396ac27a0ecSDave Kleikamp * all of its blocks and inodes are already valid. 1397ac27a0ecSDave Kleikamp * 1398ac27a0ecSDave Kleikamp * We always allocate group-by-group, then block-by-block or 1399ac27a0ecSDave Kleikamp * inode-by-inode within a group, so enabling these 1400ac27a0ecSDave Kleikamp * blocks/inodes before the group is live won't actually let us 1401ac27a0ecSDave Kleikamp * allocate the new space yet. 1402ac27a0ecSDave Kleikamp */ 1403bd81d8eeSLaurent Vivier ext4_blocks_count_set(es, ext4_blocks_count(es) + 1404ac27a0ecSDave Kleikamp input->blocks_count); 1405e8546d06SMarcin Slusarz le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb)); 1406ac27a0ecSDave Kleikamp 1407ac27a0ecSDave Kleikamp /* 1408ac27a0ecSDave Kleikamp * We need to protect s_groups_count against other CPUs seeing 1409ac27a0ecSDave Kleikamp * inconsistent state in the superblock. 1410ac27a0ecSDave Kleikamp * 1411ac27a0ecSDave Kleikamp * The precise rules we use are: 1412ac27a0ecSDave Kleikamp * 1413ac27a0ecSDave Kleikamp * * Writers must perform a smp_wmb() after updating all dependent 1414ac27a0ecSDave Kleikamp * data and before modifying the groups count 1415ac27a0ecSDave Kleikamp * 1416ac27a0ecSDave Kleikamp * * Readers must perform an smp_rmb() after reading the groups count 1417ac27a0ecSDave Kleikamp * and before reading any dependent data. 1418ac27a0ecSDave Kleikamp * 1419ac27a0ecSDave Kleikamp * NB. These rules can be relaxed when checking the group count 1420ac27a0ecSDave Kleikamp * while freeing data, as we can only allocate from a block 1421ac27a0ecSDave Kleikamp * group after serialising against the group count, and we can 1422ac27a0ecSDave Kleikamp * only then free after serialising in turn against that 1423ac27a0ecSDave Kleikamp * allocation. 1424ac27a0ecSDave Kleikamp */ 1425ac27a0ecSDave Kleikamp smp_wmb(); 1426ac27a0ecSDave Kleikamp 1427ac27a0ecSDave Kleikamp /* Update the global fs size fields */ 1428ac27a0ecSDave Kleikamp sbi->s_groups_count++; 1429ac27a0ecSDave Kleikamp 1430b4097142STheodore Ts'o err = ext4_handle_dirty_metadata(handle, NULL, primary); 1431b4097142STheodore Ts'o if (unlikely(err)) { 1432b4097142STheodore Ts'o ext4_std_error(sb, err); 1433b4097142STheodore Ts'o goto exit_journal; 1434b4097142STheodore Ts'o } 1435ac27a0ecSDave Kleikamp 1436ac27a0ecSDave Kleikamp /* Update the reserved block counts only once the new group is 1437ac27a0ecSDave Kleikamp * active. */ 1438bd81d8eeSLaurent Vivier ext4_r_blocks_count_set(es, ext4_r_blocks_count(es) + 1439ac27a0ecSDave Kleikamp input->reserved_blocks); 1440ac27a0ecSDave Kleikamp 1441ac27a0ecSDave Kleikamp /* Update the free space counts */ 144257042651STheodore Ts'o percpu_counter_add(&sbi->s_freeclusters_counter, 144357042651STheodore Ts'o EXT4_B2C(sbi, input->free_blocks_count)); 1444aa0dff2dSPeter Zijlstra percpu_counter_add(&sbi->s_freeinodes_counter, 1445617ba13bSMingming Cao EXT4_INODES_PER_GROUP(sb)); 1446ac27a0ecSDave Kleikamp 144742007efdSEric Sandeen if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG) && 144842007efdSEric Sandeen sbi->s_log_groups_per_flex) { 1449c62a11fdSFrederic Bohe ext4_group_t flex_group; 1450c62a11fdSFrederic Bohe flex_group = ext4_flex_group(sbi, input->group); 145124aaa8efSTheodore Ts'o atomic_add(EXT4_B2C(sbi, input->free_blocks_count), 145224aaa8efSTheodore Ts'o &sbi->s_flex_groups[flex_group].free_clusters); 14539f24e420STheodore Ts'o atomic_add(EXT4_INODES_PER_GROUP(sb), 14549f24e420STheodore Ts'o &sbi->s_flex_groups[flex_group].free_inodes); 1455c62a11fdSFrederic Bohe } 1456c62a11fdSFrederic Bohe 1457a0375156STheodore Ts'o ext4_handle_dirty_super(handle, sb); 1458ac27a0ecSDave Kleikamp 1459ac27a0ecSDave Kleikamp exit_journal: 1460617ba13bSMingming Cao if ((err2 = ext4_journal_stop(handle)) && !err) 1461ac27a0ecSDave Kleikamp err = err2; 14622f919710SYongqiang Yang if (!err && primary) { 1463ac27a0ecSDave Kleikamp update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es, 1464617ba13bSMingming Cao sizeof(struct ext4_super_block)); 1465ac27a0ecSDave Kleikamp update_backups(sb, primary->b_blocknr, primary->b_data, 1466ac27a0ecSDave Kleikamp primary->b_size); 1467ac27a0ecSDave Kleikamp } 1468ac27a0ecSDave Kleikamp exit_put: 1469ac27a0ecSDave Kleikamp iput(inode); 1470ac27a0ecSDave Kleikamp return err; 1471617ba13bSMingming Cao } /* ext4_group_add */ 1472ac27a0ecSDave Kleikamp 14732b2d6d01STheodore Ts'o /* 147418e31438SYongqiang Yang * extend a group without checking assuming that checking has been done. 147518e31438SYongqiang Yang */ 147618e31438SYongqiang Yang static int ext4_group_extend_no_check(struct super_block *sb, 147718e31438SYongqiang Yang ext4_fsblk_t o_blocks_count, ext4_grpblk_t add) 147818e31438SYongqiang Yang { 147918e31438SYongqiang Yang struct ext4_super_block *es = EXT4_SB(sb)->s_es; 148018e31438SYongqiang Yang handle_t *handle; 148118e31438SYongqiang Yang int err = 0, err2; 148218e31438SYongqiang Yang 148318e31438SYongqiang Yang /* We will update the superblock, one block bitmap, and 148418e31438SYongqiang Yang * one group descriptor via ext4_group_add_blocks(). 148518e31438SYongqiang Yang */ 148618e31438SYongqiang Yang handle = ext4_journal_start_sb(sb, 3); 148718e31438SYongqiang Yang if (IS_ERR(handle)) { 148818e31438SYongqiang Yang err = PTR_ERR(handle); 148918e31438SYongqiang Yang ext4_warning(sb, "error %d on journal start", err); 149018e31438SYongqiang Yang return err; 149118e31438SYongqiang Yang } 149218e31438SYongqiang Yang 149318e31438SYongqiang Yang err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); 149418e31438SYongqiang Yang if (err) { 149518e31438SYongqiang Yang ext4_warning(sb, "error %d on journal write access", err); 149618e31438SYongqiang Yang goto errout; 149718e31438SYongqiang Yang } 149818e31438SYongqiang Yang 149918e31438SYongqiang Yang ext4_blocks_count_set(es, o_blocks_count + add); 150018e31438SYongqiang Yang ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, 150118e31438SYongqiang Yang o_blocks_count + add); 150218e31438SYongqiang Yang /* We add the blocks to the bitmap and set the group need init bit */ 150318e31438SYongqiang Yang err = ext4_group_add_blocks(handle, sb, o_blocks_count, add); 150418e31438SYongqiang Yang if (err) 150518e31438SYongqiang Yang goto errout; 150618e31438SYongqiang Yang ext4_handle_dirty_super(handle, sb); 150718e31438SYongqiang Yang ext4_debug("freed blocks %llu through %llu\n", o_blocks_count, 150818e31438SYongqiang Yang o_blocks_count + add); 150918e31438SYongqiang Yang errout: 151018e31438SYongqiang Yang err2 = ext4_journal_stop(handle); 151118e31438SYongqiang Yang if (err2 && !err) 151218e31438SYongqiang Yang err = err2; 151318e31438SYongqiang Yang 151418e31438SYongqiang Yang if (!err) { 151518e31438SYongqiang Yang if (test_opt(sb, DEBUG)) 151618e31438SYongqiang Yang printk(KERN_DEBUG "EXT4-fs: extended group to %llu " 151718e31438SYongqiang Yang "blocks\n", ext4_blocks_count(es)); 151818e31438SYongqiang Yang update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr, (char *)es, 151918e31438SYongqiang Yang sizeof(struct ext4_super_block)); 152018e31438SYongqiang Yang } 152118e31438SYongqiang Yang return err; 152218e31438SYongqiang Yang } 152318e31438SYongqiang Yang 152418e31438SYongqiang Yang /* 15252b2d6d01STheodore Ts'o * Extend the filesystem to the new number of blocks specified. This entry 1526ac27a0ecSDave Kleikamp * point is only used to extend the current filesystem to the end of the last 1527ac27a0ecSDave Kleikamp * existing group. It can be accessed via ioctl, or by "remount,resize=<size>" 1528ac27a0ecSDave Kleikamp * for emergencies (because it has no dependencies on reserved blocks). 1529ac27a0ecSDave Kleikamp * 1530617ba13bSMingming Cao * If we _really_ wanted, we could use default values to call ext4_group_add() 1531ac27a0ecSDave Kleikamp * allow the "remount" trick to work for arbitrary resizing, assuming enough 1532ac27a0ecSDave Kleikamp * GDT blocks are reserved to grow to the desired size. 1533ac27a0ecSDave Kleikamp */ 1534617ba13bSMingming Cao int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, 1535617ba13bSMingming Cao ext4_fsblk_t n_blocks_count) 1536ac27a0ecSDave Kleikamp { 1537617ba13bSMingming Cao ext4_fsblk_t o_blocks_count; 1538617ba13bSMingming Cao ext4_grpblk_t last; 1539617ba13bSMingming Cao ext4_grpblk_t add; 1540ac27a0ecSDave Kleikamp struct buffer_head *bh; 1541ac27a0ecSDave Kleikamp handle_t *handle; 1542cc7365dfSYongqiang Yang int err, err2; 15435f21b0e6SFrederic Bohe ext4_group_t group; 1544ac27a0ecSDave Kleikamp 1545bd81d8eeSLaurent Vivier o_blocks_count = ext4_blocks_count(es); 1546ac27a0ecSDave Kleikamp 1547ac27a0ecSDave Kleikamp if (test_opt(sb, DEBUG)) 15482b79b09dSYongqiang Yang printk(KERN_DEBUG "EXT4-fs: extending last group from %llu to %llu blocks\n", 1549ac27a0ecSDave Kleikamp o_blocks_count, n_blocks_count); 1550ac27a0ecSDave Kleikamp 1551ac27a0ecSDave Kleikamp if (n_blocks_count == 0 || n_blocks_count == o_blocks_count) 1552ac27a0ecSDave Kleikamp return 0; 1553ac27a0ecSDave Kleikamp 1554ac27a0ecSDave Kleikamp if (n_blocks_count > (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { 1555617ba13bSMingming Cao printk(KERN_ERR "EXT4-fs: filesystem on %s:" 15562ae02107SMingming Cao " too large to resize to %llu blocks safely\n", 1557ac27a0ecSDave Kleikamp sb->s_id, n_blocks_count); 1558ac27a0ecSDave Kleikamp if (sizeof(sector_t) < 8) 155912062dddSEric Sandeen ext4_warning(sb, "CONFIG_LBDAF not enabled"); 1560ac27a0ecSDave Kleikamp return -EINVAL; 1561ac27a0ecSDave Kleikamp } 1562ac27a0ecSDave Kleikamp 1563ac27a0ecSDave Kleikamp if (n_blocks_count < o_blocks_count) { 156412062dddSEric Sandeen ext4_warning(sb, "can't shrink FS - resize aborted"); 15658f82f840SYongqiang Yang return -EINVAL; 1566ac27a0ecSDave Kleikamp } 1567ac27a0ecSDave Kleikamp 1568ac27a0ecSDave Kleikamp /* Handle the remaining blocks in the last group only. */ 15695f21b0e6SFrederic Bohe ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last); 1570ac27a0ecSDave Kleikamp 1571ac27a0ecSDave Kleikamp if (last == 0) { 157212062dddSEric Sandeen ext4_warning(sb, "need to use ext2online to resize further"); 1573ac27a0ecSDave Kleikamp return -EPERM; 1574ac27a0ecSDave Kleikamp } 1575ac27a0ecSDave Kleikamp 1576617ba13bSMingming Cao add = EXT4_BLOCKS_PER_GROUP(sb) - last; 1577ac27a0ecSDave Kleikamp 1578ac27a0ecSDave Kleikamp if (o_blocks_count + add < o_blocks_count) { 157912062dddSEric Sandeen ext4_warning(sb, "blocks_count overflow"); 1580ac27a0ecSDave Kleikamp return -EINVAL; 1581ac27a0ecSDave Kleikamp } 1582ac27a0ecSDave Kleikamp 1583ac27a0ecSDave Kleikamp if (o_blocks_count + add > n_blocks_count) 1584ac27a0ecSDave Kleikamp add = n_blocks_count - o_blocks_count; 1585ac27a0ecSDave Kleikamp 1586ac27a0ecSDave Kleikamp if (o_blocks_count + add < n_blocks_count) 158712062dddSEric Sandeen ext4_warning(sb, "will only finish group (%llu blocks, %u new)", 1588ac27a0ecSDave Kleikamp o_blocks_count + add, add); 1589ac27a0ecSDave Kleikamp 1590ac27a0ecSDave Kleikamp /* See if the device is actually as big as what was requested */ 1591ac27a0ecSDave Kleikamp bh = sb_bread(sb, o_blocks_count + add - 1); 1592ac27a0ecSDave Kleikamp if (!bh) { 159312062dddSEric Sandeen ext4_warning(sb, "can't read last block, resize aborted"); 1594ac27a0ecSDave Kleikamp return -ENOSPC; 1595ac27a0ecSDave Kleikamp } 1596ac27a0ecSDave Kleikamp brelse(bh); 1597ac27a0ecSDave Kleikamp 1598ac27a0ecSDave Kleikamp /* We will update the superblock, one block bitmap, and 1599617ba13bSMingming Cao * one group descriptor via ext4_free_blocks(). 1600ac27a0ecSDave Kleikamp */ 1601617ba13bSMingming Cao handle = ext4_journal_start_sb(sb, 3); 1602ac27a0ecSDave Kleikamp if (IS_ERR(handle)) { 1603ac27a0ecSDave Kleikamp err = PTR_ERR(handle); 160412062dddSEric Sandeen ext4_warning(sb, "error %d on journal start", err); 1605ac27a0ecSDave Kleikamp goto exit_put; 1606ac27a0ecSDave Kleikamp } 1607ac27a0ecSDave Kleikamp 1608617ba13bSMingming Cao if ((err = ext4_journal_get_write_access(handle, 1609617ba13bSMingming Cao EXT4_SB(sb)->s_sbh))) { 161012062dddSEric Sandeen ext4_warning(sb, "error %d on journal write access", err); 1611617ba13bSMingming Cao ext4_journal_stop(handle); 1612ac27a0ecSDave Kleikamp goto exit_put; 1613ac27a0ecSDave Kleikamp } 1614bd81d8eeSLaurent Vivier ext4_blocks_count_set(es, o_blocks_count + add); 1615c549a95dSEric Sandeen ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, 1616ac27a0ecSDave Kleikamp o_blocks_count + add); 1617e21675d4SAneesh Kumar K.V /* We add the blocks to the bitmap and set the group need init bit */ 1618cc7365dfSYongqiang Yang err = ext4_group_add_blocks(handle, sb, o_blocks_count, add); 1619a0375156STheodore Ts'o ext4_handle_dirty_super(handle, sb); 16202ae02107SMingming Cao ext4_debug("freed blocks %llu through %llu\n", o_blocks_count, 1621ac27a0ecSDave Kleikamp o_blocks_count + add); 1622cc7365dfSYongqiang Yang err2 = ext4_journal_stop(handle); 1623cc7365dfSYongqiang Yang if (!err && err2) 1624cc7365dfSYongqiang Yang err = err2; 1625cc7365dfSYongqiang Yang 1626cc7365dfSYongqiang Yang if (err) 1627ac27a0ecSDave Kleikamp goto exit_put; 16285f21b0e6SFrederic Bohe 1629ac27a0ecSDave Kleikamp if (test_opt(sb, DEBUG)) 1630bd81d8eeSLaurent Vivier printk(KERN_DEBUG "EXT4-fs: extended group to %llu blocks\n", 1631bd81d8eeSLaurent Vivier ext4_blocks_count(es)); 1632617ba13bSMingming Cao update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr, (char *)es, 1633617ba13bSMingming Cao sizeof(struct ext4_super_block)); 1634ac27a0ecSDave Kleikamp exit_put: 1635ac27a0ecSDave Kleikamp return err; 1636617ba13bSMingming Cao } /* ext4_group_extend */ 1637