1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0 26fd058f7STheodore Ts'o /* 36fd058f7STheodore Ts'o * linux/fs/ext4/block_validity.c 46fd058f7STheodore Ts'o * 56fd058f7STheodore Ts'o * Copyright (C) 2009 66fd058f7STheodore Ts'o * Theodore Ts'o (tytso@mit.edu) 76fd058f7STheodore Ts'o * 86fd058f7STheodore Ts'o * Track which blocks in the filesystem are metadata blocks that 96fd058f7STheodore Ts'o * should never be used as data blocks by files or directories. 106fd058f7STheodore Ts'o */ 116fd058f7STheodore Ts'o 126fd058f7STheodore Ts'o #include <linux/time.h> 136fd058f7STheodore Ts'o #include <linux/fs.h> 146fd058f7STheodore Ts'o #include <linux/namei.h> 156fd058f7STheodore Ts'o #include <linux/quotaops.h> 166fd058f7STheodore Ts'o #include <linux/buffer_head.h> 176fd058f7STheodore Ts'o #include <linux/swap.h> 186fd058f7STheodore Ts'o #include <linux/pagemap.h> 196fd058f7STheodore Ts'o #include <linux/blkdev.h> 205a0e3ad6STejun Heo #include <linux/slab.h> 216fd058f7STheodore Ts'o #include "ext4.h" 226fd058f7STheodore Ts'o 236fd058f7STheodore Ts'o struct ext4_system_zone { 246fd058f7STheodore Ts'o struct rb_node node; 256fd058f7STheodore Ts'o ext4_fsblk_t start_blk; 266fd058f7STheodore Ts'o unsigned int count; 27ce9f24ccSJan Kara u32 ino; 286fd058f7STheodore Ts'o }; 296fd058f7STheodore Ts'o 306fd058f7STheodore Ts'o static struct kmem_cache *ext4_system_zone_cachep; 316fd058f7STheodore Ts'o 325dabfc78STheodore Ts'o int __init ext4_init_system_zone(void) 336fd058f7STheodore Ts'o { 3416828088STheodore Ts'o ext4_system_zone_cachep = KMEM_CACHE(ext4_system_zone, 0); 356fd058f7STheodore Ts'o if (ext4_system_zone_cachep == NULL) 366fd058f7STheodore Ts'o return -ENOMEM; 376fd058f7STheodore Ts'o return 0; 386fd058f7STheodore Ts'o } 396fd058f7STheodore Ts'o 405dabfc78STheodore Ts'o void ext4_exit_system_zone(void) 416fd058f7STheodore Ts'o { 427727ae52Szhangyi (F) rcu_barrier(); 436fd058f7STheodore Ts'o kmem_cache_destroy(ext4_system_zone_cachep); 446fd058f7STheodore Ts'o } 456fd058f7STheodore Ts'o 466fd058f7STheodore Ts'o static inline int can_merge(struct ext4_system_zone *entry1, 476fd058f7STheodore Ts'o struct ext4_system_zone *entry2) 486fd058f7STheodore Ts'o { 49ce9f24ccSJan Kara if ((entry1->start_blk + entry1->count) == entry2->start_blk && 50ce9f24ccSJan Kara entry1->ino == entry2->ino) 516fd058f7STheodore Ts'o return 1; 526fd058f7STheodore Ts'o return 0; 536fd058f7STheodore Ts'o } 546fd058f7STheodore Ts'o 557727ae52Szhangyi (F) static void release_system_zone(struct ext4_system_blocks *system_blks) 567727ae52Szhangyi (F) { 577727ae52Szhangyi (F) struct ext4_system_zone *entry, *n; 587727ae52Szhangyi (F) 597727ae52Szhangyi (F) rbtree_postorder_for_each_entry_safe(entry, n, 607727ae52Szhangyi (F) &system_blks->root, node) 617727ae52Szhangyi (F) kmem_cache_free(ext4_system_zone_cachep, entry); 627727ae52Szhangyi (F) } 637727ae52Szhangyi (F) 646fd058f7STheodore Ts'o /* 656fd058f7STheodore Ts'o * Mark a range of blocks as belonging to the "system zone" --- that 666fd058f7STheodore Ts'o * is, filesystem metadata blocks which should never be used by 676fd058f7STheodore Ts'o * inodes. 686fd058f7STheodore Ts'o */ 697727ae52Szhangyi (F) static int add_system_zone(struct ext4_system_blocks *system_blks, 706fd058f7STheodore Ts'o ext4_fsblk_t start_blk, 71ce9f24ccSJan Kara unsigned int count, u32 ino) 726fd058f7STheodore Ts'o { 73bf9a379dSJan Kara struct ext4_system_zone *new_entry, *entry; 747727ae52Szhangyi (F) struct rb_node **n = &system_blks->root.rb_node, *node; 756fd058f7STheodore Ts'o struct rb_node *parent = NULL, *new_node = NULL; 766fd058f7STheodore Ts'o 776fd058f7STheodore Ts'o while (*n) { 786fd058f7STheodore Ts'o parent = *n; 796fd058f7STheodore Ts'o entry = rb_entry(parent, struct ext4_system_zone, node); 806fd058f7STheodore Ts'o if (start_blk < entry->start_blk) 816fd058f7STheodore Ts'o n = &(*n)->rb_left; 826fd058f7STheodore Ts'o else if (start_blk >= (entry->start_blk + entry->count)) 836fd058f7STheodore Ts'o n = &(*n)->rb_right; 84bf9a379dSJan Kara else /* Unexpected overlap of system zones. */ 85bf9a379dSJan Kara return -EFSCORRUPTED; 866fd058f7STheodore Ts'o } 876fd058f7STheodore Ts'o 886fd058f7STheodore Ts'o new_entry = kmem_cache_alloc(ext4_system_zone_cachep, 896fd058f7STheodore Ts'o GFP_KERNEL); 906fd058f7STheodore Ts'o if (!new_entry) 916fd058f7STheodore Ts'o return -ENOMEM; 926fd058f7STheodore Ts'o new_entry->start_blk = start_blk; 936fd058f7STheodore Ts'o new_entry->count = count; 94ce9f24ccSJan Kara new_entry->ino = ino; 956fd058f7STheodore Ts'o new_node = &new_entry->node; 966fd058f7STheodore Ts'o 976fd058f7STheodore Ts'o rb_link_node(new_node, parent, n); 987727ae52Szhangyi (F) rb_insert_color(new_node, &system_blks->root); 996fd058f7STheodore Ts'o 1006fd058f7STheodore Ts'o /* Can we merge to the left? */ 1016fd058f7STheodore Ts'o node = rb_prev(new_node); 1026fd058f7STheodore Ts'o if (node) { 1036fd058f7STheodore Ts'o entry = rb_entry(node, struct ext4_system_zone, node); 1046fd058f7STheodore Ts'o if (can_merge(entry, new_entry)) { 1056fd058f7STheodore Ts'o new_entry->start_blk = entry->start_blk; 1066fd058f7STheodore Ts'o new_entry->count += entry->count; 1077727ae52Szhangyi (F) rb_erase(node, &system_blks->root); 1086fd058f7STheodore Ts'o kmem_cache_free(ext4_system_zone_cachep, entry); 1096fd058f7STheodore Ts'o } 1106fd058f7STheodore Ts'o } 1116fd058f7STheodore Ts'o 1126fd058f7STheodore Ts'o /* Can we merge to the right? */ 1136fd058f7STheodore Ts'o node = rb_next(new_node); 1146fd058f7STheodore Ts'o if (node) { 1156fd058f7STheodore Ts'o entry = rb_entry(node, struct ext4_system_zone, node); 1166fd058f7STheodore Ts'o if (can_merge(new_entry, entry)) { 1176fd058f7STheodore Ts'o new_entry->count += entry->count; 1187727ae52Szhangyi (F) rb_erase(node, &system_blks->root); 1196fd058f7STheodore Ts'o kmem_cache_free(ext4_system_zone_cachep, entry); 1206fd058f7STheodore Ts'o } 1216fd058f7STheodore Ts'o } 1226fd058f7STheodore Ts'o return 0; 1236fd058f7STheodore Ts'o } 1246fd058f7STheodore Ts'o 1256fd058f7STheodore Ts'o static void debug_print_tree(struct ext4_sb_info *sbi) 1266fd058f7STheodore Ts'o { 1276fd058f7STheodore Ts'o struct rb_node *node; 1286fd058f7STheodore Ts'o struct ext4_system_zone *entry; 12969000d82SPhong Tran struct ext4_system_blocks *system_blks; 1306fd058f7STheodore Ts'o int first = 1; 1316fd058f7STheodore Ts'o 1326fd058f7STheodore Ts'o printk(KERN_INFO "System zones: "); 13369000d82SPhong Tran rcu_read_lock(); 134dd0db94fSChunguang Xu system_blks = rcu_dereference(sbi->s_system_blks); 13569000d82SPhong Tran node = rb_first(&system_blks->root); 1366fd058f7STheodore Ts'o while (node) { 1376fd058f7STheodore Ts'o entry = rb_entry(node, struct ext4_system_zone, node); 138d74f3d25SJoe Perches printk(KERN_CONT "%s%llu-%llu", first ? "" : ", ", 1396fd058f7STheodore Ts'o entry->start_blk, entry->start_blk + entry->count - 1); 1406fd058f7STheodore Ts'o first = 0; 1416fd058f7STheodore Ts'o node = rb_next(node); 1426fd058f7STheodore Ts'o } 14369000d82SPhong Tran rcu_read_unlock(); 144d74f3d25SJoe Perches printk(KERN_CONT "\n"); 1456fd058f7STheodore Ts'o } 1466fd058f7STheodore Ts'o 1477727ae52Szhangyi (F) static int ext4_protect_reserved_inode(struct super_block *sb, 1487727ae52Szhangyi (F) struct ext4_system_blocks *system_blks, 1497727ae52Szhangyi (F) u32 ino) 150345c0dbfSTheodore Ts'o { 151345c0dbfSTheodore Ts'o struct inode *inode; 152345c0dbfSTheodore Ts'o struct ext4_sb_info *sbi = EXT4_SB(sb); 153345c0dbfSTheodore Ts'o struct ext4_map_blocks map; 154fbbbbd2fSColin Ian King u32 i = 0, num; 155fbbbbd2fSColin Ian King int err = 0, n; 156345c0dbfSTheodore Ts'o 157345c0dbfSTheodore Ts'o if ((ino < EXT4_ROOT_INO) || 158345c0dbfSTheodore Ts'o (ino > le32_to_cpu(sbi->s_es->s_inodes_count))) 159345c0dbfSTheodore Ts'o return -EINVAL; 160345c0dbfSTheodore Ts'o inode = ext4_iget(sb, ino, EXT4_IGET_SPECIAL); 161345c0dbfSTheodore Ts'o if (IS_ERR(inode)) 162345c0dbfSTheodore Ts'o return PTR_ERR(inode); 163345c0dbfSTheodore Ts'o num = (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; 164345c0dbfSTheodore Ts'o while (i < num) { 165af133adeSShijie Luo cond_resched(); 166345c0dbfSTheodore Ts'o map.m_lblk = i; 167345c0dbfSTheodore Ts'o map.m_len = num - i; 168345c0dbfSTheodore Ts'o n = ext4_map_blocks(NULL, inode, &map, 0); 169345c0dbfSTheodore Ts'o if (n < 0) { 170345c0dbfSTheodore Ts'o err = n; 171345c0dbfSTheodore Ts'o break; 172345c0dbfSTheodore Ts'o } 173345c0dbfSTheodore Ts'o if (n == 0) { 174345c0dbfSTheodore Ts'o i++; 175345c0dbfSTheodore Ts'o } else { 176ce9f24ccSJan Kara err = add_system_zone(system_blks, map.m_pblk, n, ino); 177ce9f24ccSJan Kara if (err < 0) { 178ce9f24ccSJan Kara if (err == -EFSCORRUPTED) { 179ce9f24ccSJan Kara __ext4_error(sb, __func__, __LINE__, 180ce9f24ccSJan Kara -err, map.m_pblk, 181ce9f24ccSJan Kara "blocks %llu-%llu from inode %u overlap system zone", 18254d3adbcSTheodore Ts'o map.m_pblk, 183ce9f24ccSJan Kara map.m_pblk + map.m_len - 1, 184ce9f24ccSJan Kara ino); 185ce9f24ccSJan Kara } 186345c0dbfSTheodore Ts'o break; 187345c0dbfSTheodore Ts'o } 188345c0dbfSTheodore Ts'o i += n; 189345c0dbfSTheodore Ts'o } 190345c0dbfSTheodore Ts'o } 191345c0dbfSTheodore Ts'o iput(inode); 192345c0dbfSTheodore Ts'o return err; 193345c0dbfSTheodore Ts'o } 194345c0dbfSTheodore Ts'o 1957727ae52Szhangyi (F) static void ext4_destroy_system_zone(struct rcu_head *rcu) 1967727ae52Szhangyi (F) { 1977727ae52Szhangyi (F) struct ext4_system_blocks *system_blks; 1987727ae52Szhangyi (F) 1997727ae52Szhangyi (F) system_blks = container_of(rcu, struct ext4_system_blocks, rcu); 2007727ae52Szhangyi (F) release_system_zone(system_blks); 2017727ae52Szhangyi (F) kfree(system_blks); 2027727ae52Szhangyi (F) } 2037727ae52Szhangyi (F) 2047727ae52Szhangyi (F) /* 2057727ae52Szhangyi (F) * Build system zone rbtree which is used for block validity checking. 2067727ae52Szhangyi (F) * 2077727ae52Szhangyi (F) * The update of system_blks pointer in this function is protected by 2087727ae52Szhangyi (F) * sb->s_umount semaphore. However we have to be careful as we can be 209*8041ac64SChunguang Xu * racing with ext4_inode_block_valid() calls reading system_blks rbtree 2107727ae52Szhangyi (F) * protected only by RCU. That's why we first build the rbtree and then 2117727ae52Szhangyi (F) * swap it in place. 2127727ae52Szhangyi (F) */ 2136fd058f7STheodore Ts'o int ext4_setup_system_zone(struct super_block *sb) 2146fd058f7STheodore Ts'o { 2156fd058f7STheodore Ts'o ext4_group_t ngroups = ext4_get_groups_count(sb); 2166fd058f7STheodore Ts'o struct ext4_sb_info *sbi = EXT4_SB(sb); 2177727ae52Szhangyi (F) struct ext4_system_blocks *system_blks; 2186fd058f7STheodore Ts'o struct ext4_group_desc *gdp; 2196fd058f7STheodore Ts'o ext4_group_t i; 2206fd058f7STheodore Ts'o int flex_size = ext4_flex_bg_size(sbi); 2216fd058f7STheodore Ts'o int ret; 2226fd058f7STheodore Ts'o 2237727ae52Szhangyi (F) system_blks = kzalloc(sizeof(*system_blks), GFP_KERNEL); 2247727ae52Szhangyi (F) if (!system_blks) 2257727ae52Szhangyi (F) return -ENOMEM; 2267727ae52Szhangyi (F) 2276fd058f7STheodore Ts'o for (i=0; i < ngroups; i++) { 2284b99faa2SKhazhismel Kumykov cond_resched(); 2296fd058f7STheodore Ts'o if (ext4_bg_has_super(sb, i) && 230e7bfb5c9SJan Kara ((i < 5) || ((i % flex_size) == 0))) { 231e7bfb5c9SJan Kara ret = add_system_zone(system_blks, 2327727ae52Szhangyi (F) ext4_group_first_block_no(sb, i), 233ce9f24ccSJan Kara ext4_bg_num_gdb(sb, i) + 1, 0); 234e7bfb5c9SJan Kara if (ret) 235e7bfb5c9SJan Kara goto err; 236e7bfb5c9SJan Kara } 2376fd058f7STheodore Ts'o gdp = ext4_get_group_desc(sb, i, NULL); 2387727ae52Szhangyi (F) ret = add_system_zone(system_blks, 239ce9f24ccSJan Kara ext4_block_bitmap(sb, gdp), 1, 0); 2406fd058f7STheodore Ts'o if (ret) 2417727ae52Szhangyi (F) goto err; 2427727ae52Szhangyi (F) ret = add_system_zone(system_blks, 243ce9f24ccSJan Kara ext4_inode_bitmap(sb, gdp), 1, 0); 2446fd058f7STheodore Ts'o if (ret) 2457727ae52Szhangyi (F) goto err; 2467727ae52Szhangyi (F) ret = add_system_zone(system_blks, 2477727ae52Szhangyi (F) ext4_inode_table(sb, gdp), 248ce9f24ccSJan Kara sbi->s_itb_per_group, 0); 2496fd058f7STheodore Ts'o if (ret) 2507727ae52Szhangyi (F) goto err; 2516fd058f7STheodore Ts'o } 252345c0dbfSTheodore Ts'o if (ext4_has_feature_journal(sb) && sbi->s_es->s_journal_inum) { 2537727ae52Szhangyi (F) ret = ext4_protect_reserved_inode(sb, system_blks, 254345c0dbfSTheodore Ts'o le32_to_cpu(sbi->s_es->s_journal_inum)); 255345c0dbfSTheodore Ts'o if (ret) 2567727ae52Szhangyi (F) goto err; 257345c0dbfSTheodore Ts'o } 2586fd058f7STheodore Ts'o 2597727ae52Szhangyi (F) /* 2607727ae52Szhangyi (F) * System blks rbtree complete, announce it once to prevent racing 261*8041ac64SChunguang Xu * with ext4_inode_block_valid() accessing the rbtree at the same 2627727ae52Szhangyi (F) * time. 2637727ae52Szhangyi (F) */ 264dd0db94fSChunguang Xu rcu_assign_pointer(sbi->s_system_blks, system_blks); 2657727ae52Szhangyi (F) 2666fd058f7STheodore Ts'o if (test_opt(sb, DEBUG)) 26749598e04SJun Piao debug_print_tree(sbi); 2686fd058f7STheodore Ts'o return 0; 2697727ae52Szhangyi (F) err: 2707727ae52Szhangyi (F) release_system_zone(system_blks); 2717727ae52Szhangyi (F) kfree(system_blks); 2727727ae52Szhangyi (F) return ret; 2736fd058f7STheodore Ts'o } 2746fd058f7STheodore Ts'o 2756fd058f7STheodore Ts'o /* 2767727ae52Szhangyi (F) * Called when the filesystem is unmounted or when remounting it with 2777727ae52Szhangyi (F) * noblock_validity specified. 2787727ae52Szhangyi (F) * 2797727ae52Szhangyi (F) * The update of system_blks pointer in this function is protected by 2807727ae52Szhangyi (F) * sb->s_umount semaphore. However we have to be careful as we can be 281*8041ac64SChunguang Xu * racing with ext4_inode_block_valid() calls reading system_blks rbtree 2827727ae52Szhangyi (F) * protected only by RCU. So we first clear the system_blks pointer and 2837727ae52Szhangyi (F) * then free the rbtree only after RCU grace period expires. 2846fd058f7STheodore Ts'o */ 2857727ae52Szhangyi (F) void ext4_release_system_zone(struct super_block *sb) 2867727ae52Szhangyi (F) { 2877727ae52Szhangyi (F) struct ext4_system_blocks *system_blks; 2887727ae52Szhangyi (F) 289dd0db94fSChunguang Xu system_blks = rcu_dereference_protected(EXT4_SB(sb)->s_system_blks, 2907727ae52Szhangyi (F) lockdep_is_held(&sb->s_umount)); 291dd0db94fSChunguang Xu rcu_assign_pointer(EXT4_SB(sb)->s_system_blks, NULL); 2927727ae52Szhangyi (F) 2937727ae52Szhangyi (F) if (system_blks) 2947727ae52Szhangyi (F) call_rcu(&system_blks->rcu, ext4_destroy_system_zone); 2957727ae52Szhangyi (F) } 2967727ae52Szhangyi (F) 2973f67e7cfSJan Kara /* 2983f67e7cfSJan Kara * Returns 1 if the passed-in block region (start_blk, 2993f67e7cfSJan Kara * start_blk+count) is valid; 0 if some part of the block region 3003f67e7cfSJan Kara * overlaps with some other filesystem metadata blocks. 3013f67e7cfSJan Kara */ 302ce9f24ccSJan Kara int ext4_inode_block_valid(struct inode *inode, ext4_fsblk_t start_blk, 3036fd058f7STheodore Ts'o unsigned int count) 3046fd058f7STheodore Ts'o { 3053f67e7cfSJan Kara struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 3067727ae52Szhangyi (F) struct ext4_system_blocks *system_blks; 3073f67e7cfSJan Kara struct ext4_system_zone *entry; 3083f67e7cfSJan Kara struct rb_node *n; 3093f67e7cfSJan Kara int ret = 1; 3103f67e7cfSJan Kara 3113f67e7cfSJan Kara if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) || 3123f67e7cfSJan Kara (start_blk + count < start_blk) || 3133f67e7cfSJan Kara (start_blk + count > ext4_blocks_count(sbi->s_es))) 3143f67e7cfSJan Kara return 0; 3156fd058f7STheodore Ts'o 3167727ae52Szhangyi (F) /* 3177727ae52Szhangyi (F) * Lock the system zone to prevent it being released concurrently 3187727ae52Szhangyi (F) * when doing a remount which inverse current "[no]block_validity" 3197727ae52Szhangyi (F) * mount option. 3207727ae52Szhangyi (F) */ 3217727ae52Szhangyi (F) rcu_read_lock(); 322dd0db94fSChunguang Xu system_blks = rcu_dereference(sbi->s_system_blks); 3233f67e7cfSJan Kara if (system_blks == NULL) 3243f67e7cfSJan Kara goto out_rcu; 3253f67e7cfSJan Kara 3263f67e7cfSJan Kara n = system_blks->root.rb_node; 3273f67e7cfSJan Kara while (n) { 3283f67e7cfSJan Kara entry = rb_entry(n, struct ext4_system_zone, node); 3293f67e7cfSJan Kara if (start_blk + count - 1 < entry->start_blk) 3303f67e7cfSJan Kara n = n->rb_left; 3313f67e7cfSJan Kara else if (start_blk >= (entry->start_blk + entry->count)) 3323f67e7cfSJan Kara n = n->rb_right; 3333f67e7cfSJan Kara else { 3343f67e7cfSJan Kara ret = (entry->ino == inode->i_ino); 3353f67e7cfSJan Kara break; 3363f67e7cfSJan Kara } 3373f67e7cfSJan Kara } 3383f67e7cfSJan Kara out_rcu: 3397727ae52Szhangyi (F) rcu_read_unlock(); 3407727ae52Szhangyi (F) return ret; 3416fd058f7STheodore Ts'o } 3426fd058f7STheodore Ts'o 3431f7d1e77STheodore Ts'o int ext4_check_blockref(const char *function, unsigned int line, 3441f7d1e77STheodore Ts'o struct inode *inode, __le32 *p, unsigned int max) 3451f7d1e77STheodore Ts'o { 3461f7d1e77STheodore Ts'o __le32 *bref = p; 3471f7d1e77STheodore Ts'o unsigned int blk; 3481f7d1e77STheodore Ts'o 349170417c8STheodore Ts'o if (ext4_has_feature_journal(inode->i_sb) && 350170417c8STheodore Ts'o (inode->i_ino == 351170417c8STheodore Ts'o le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_journal_inum))) 352170417c8STheodore Ts'o return 0; 353170417c8STheodore Ts'o 3541f7d1e77STheodore Ts'o while (bref < p+max) { 3551f7d1e77STheodore Ts'o blk = le32_to_cpu(*bref++); 3561f7d1e77STheodore Ts'o if (blk && 357ce9f24ccSJan Kara unlikely(!ext4_inode_block_valid(inode, blk, 1))) { 3581f7d1e77STheodore Ts'o ext4_error_inode(inode, function, line, blk, 3591f7d1e77STheodore Ts'o "invalid block"); 3606a797d27SDarrick J. Wong return -EFSCORRUPTED; 3611f7d1e77STheodore Ts'o } 3621f7d1e77STheodore Ts'o } 3631f7d1e77STheodore Ts'o return 0; 3641f7d1e77STheodore Ts'o } 365dae1e52cSAmir Goldstein 366