xref: /openbmc/linux/fs/ext4/orphan.c (revision 02f310fc)
125c6d98fSJan Kara /*
225c6d98fSJan Kara  * Ext4 orphan inode handling
325c6d98fSJan Kara  */
425c6d98fSJan Kara #include <linux/fs.h>
525c6d98fSJan Kara #include <linux/quotaops.h>
625c6d98fSJan Kara #include <linux/buffer_head.h>
725c6d98fSJan Kara 
825c6d98fSJan Kara #include "ext4.h"
925c6d98fSJan Kara #include "ext4_jbd2.h"
1025c6d98fSJan Kara 
11*02f310fcSJan Kara static int ext4_orphan_file_add(handle_t *handle, struct inode *inode)
12*02f310fcSJan Kara {
13*02f310fcSJan Kara 	int i, j;
14*02f310fcSJan Kara 	struct ext4_orphan_info *oi = &EXT4_SB(inode->i_sb)->s_orphan_info;
15*02f310fcSJan Kara 	int ret = 0;
16*02f310fcSJan Kara 	__le32 *bdata;
17*02f310fcSJan Kara 	int inodes_per_ob = ext4_inodes_per_orphan_block(inode->i_sb);
18*02f310fcSJan Kara 
19*02f310fcSJan Kara 	spin_lock(&oi->of_lock);
20*02f310fcSJan Kara 	for (i = 0; i < oi->of_blocks && !oi->of_binfo[i].ob_free_entries; i++);
21*02f310fcSJan Kara 	if (i == oi->of_blocks) {
22*02f310fcSJan Kara 		spin_unlock(&oi->of_lock);
23*02f310fcSJan Kara 		/*
24*02f310fcSJan Kara 		 * For now we don't grow or shrink orphan file. We just use
25*02f310fcSJan Kara 		 * whatever was allocated at mke2fs time. The additional
26*02f310fcSJan Kara 		 * credits we would have to reserve for each orphan inode
27*02f310fcSJan Kara 		 * operation just don't seem worth it.
28*02f310fcSJan Kara 		 */
29*02f310fcSJan Kara 		return -ENOSPC;
30*02f310fcSJan Kara 	}
31*02f310fcSJan Kara 	oi->of_binfo[i].ob_free_entries--;
32*02f310fcSJan Kara 	spin_unlock(&oi->of_lock);
33*02f310fcSJan Kara 
34*02f310fcSJan Kara 	/*
35*02f310fcSJan Kara 	 * Get access to orphan block. We have dropped of_lock but since we
36*02f310fcSJan Kara 	 * have decremented number of free entries we are guaranteed free entry
37*02f310fcSJan Kara 	 * in our block.
38*02f310fcSJan Kara 	 */
39*02f310fcSJan Kara 	ret = ext4_journal_get_write_access(handle, inode->i_sb,
40*02f310fcSJan Kara 				oi->of_binfo[i].ob_bh, EXT4_JTR_ORPHAN_FILE);
41*02f310fcSJan Kara 	if (ret)
42*02f310fcSJan Kara 		return ret;
43*02f310fcSJan Kara 
44*02f310fcSJan Kara 	bdata = (__le32 *)(oi->of_binfo[i].ob_bh->b_data);
45*02f310fcSJan Kara 	spin_lock(&oi->of_lock);
46*02f310fcSJan Kara 	/* Find empty slot in a block */
47*02f310fcSJan Kara 	for (j = 0; j < inodes_per_ob && bdata[j]; j++);
48*02f310fcSJan Kara 	BUG_ON(j == inodes_per_ob);
49*02f310fcSJan Kara 	bdata[j] = cpu_to_le32(inode->i_ino);
50*02f310fcSJan Kara 	EXT4_I(inode)->i_orphan_idx = i * inodes_per_ob + j;
51*02f310fcSJan Kara 	ext4_set_inode_state(inode, EXT4_STATE_ORPHAN_FILE);
52*02f310fcSJan Kara 	spin_unlock(&oi->of_lock);
53*02f310fcSJan Kara 
54*02f310fcSJan Kara 	return ext4_handle_dirty_metadata(handle, NULL, oi->of_binfo[i].ob_bh);
55*02f310fcSJan Kara }
56*02f310fcSJan Kara 
5725c6d98fSJan Kara /*
5825c6d98fSJan Kara  * ext4_orphan_add() links an unlinked or truncated inode into a list of
5925c6d98fSJan Kara  * such inodes, starting at the superblock, in case we crash before the
6025c6d98fSJan Kara  * file is closed/deleted, or in case the inode truncate spans multiple
6125c6d98fSJan Kara  * transactions and the last transaction is not recovered after a crash.
6225c6d98fSJan Kara  *
6325c6d98fSJan Kara  * At filesystem recovery time, we walk this list deleting unlinked
6425c6d98fSJan Kara  * inodes and truncating linked inodes in ext4_orphan_cleanup().
6525c6d98fSJan Kara  *
6625c6d98fSJan Kara  * Orphan list manipulation functions must be called under i_mutex unless
6725c6d98fSJan Kara  * we are just creating the inode or deleting it.
6825c6d98fSJan Kara  */
6925c6d98fSJan Kara int ext4_orphan_add(handle_t *handle, struct inode *inode)
7025c6d98fSJan Kara {
7125c6d98fSJan Kara 	struct super_block *sb = inode->i_sb;
7225c6d98fSJan Kara 	struct ext4_sb_info *sbi = EXT4_SB(sb);
7325c6d98fSJan Kara 	struct ext4_iloc iloc;
7425c6d98fSJan Kara 	int err = 0, rc;
7525c6d98fSJan Kara 	bool dirty = false;
7625c6d98fSJan Kara 
7725c6d98fSJan Kara 	if (!sbi->s_journal || is_bad_inode(inode))
7825c6d98fSJan Kara 		return 0;
7925c6d98fSJan Kara 
8025c6d98fSJan Kara 	WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) &&
8125c6d98fSJan Kara 		     !inode_is_locked(inode));
8225c6d98fSJan Kara 	/*
83*02f310fcSJan Kara 	 * Inode orphaned in orphan file or in orphan list?
8425c6d98fSJan Kara 	 */
85*02f310fcSJan Kara 	if (ext4_test_inode_state(inode, EXT4_STATE_ORPHAN_FILE) ||
86*02f310fcSJan Kara 	    !list_empty(&EXT4_I(inode)->i_orphan))
8725c6d98fSJan Kara 		return 0;
8825c6d98fSJan Kara 
8925c6d98fSJan Kara 	/*
9025c6d98fSJan Kara 	 * Orphan handling is only valid for files with data blocks
9125c6d98fSJan Kara 	 * being truncated, or files being unlinked. Note that we either
9225c6d98fSJan Kara 	 * hold i_mutex, or the inode can not be referenced from outside,
9325c6d98fSJan Kara 	 * so i_nlink should not be bumped due to race
9425c6d98fSJan Kara 	 */
9525c6d98fSJan Kara 	ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
9625c6d98fSJan Kara 		  S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
9725c6d98fSJan Kara 
98*02f310fcSJan Kara 	if (sbi->s_orphan_info.of_blocks) {
99*02f310fcSJan Kara 		err = ext4_orphan_file_add(handle, inode);
100*02f310fcSJan Kara 		/*
101*02f310fcSJan Kara 		 * Fallback to normal orphan list of orphan file is
102*02f310fcSJan Kara 		 * out of space
103*02f310fcSJan Kara 		 */
104*02f310fcSJan Kara 		if (err != -ENOSPC)
105*02f310fcSJan Kara 			return err;
106*02f310fcSJan Kara 	}
107*02f310fcSJan Kara 
10825c6d98fSJan Kara 	BUFFER_TRACE(sbi->s_sbh, "get_write_access");
10925c6d98fSJan Kara 	err = ext4_journal_get_write_access(handle, sb, sbi->s_sbh,
11025c6d98fSJan Kara 					    EXT4_JTR_NONE);
11125c6d98fSJan Kara 	if (err)
11225c6d98fSJan Kara 		goto out;
11325c6d98fSJan Kara 
11425c6d98fSJan Kara 	err = ext4_reserve_inode_write(handle, inode, &iloc);
11525c6d98fSJan Kara 	if (err)
11625c6d98fSJan Kara 		goto out;
11725c6d98fSJan Kara 
11825c6d98fSJan Kara 	mutex_lock(&sbi->s_orphan_lock);
11925c6d98fSJan Kara 	/*
12025c6d98fSJan Kara 	 * Due to previous errors inode may be already a part of on-disk
12125c6d98fSJan Kara 	 * orphan list. If so skip on-disk list modification.
12225c6d98fSJan Kara 	 */
12325c6d98fSJan Kara 	if (!NEXT_ORPHAN(inode) || NEXT_ORPHAN(inode) >
12425c6d98fSJan Kara 	    (le32_to_cpu(sbi->s_es->s_inodes_count))) {
12525c6d98fSJan Kara 		/* Insert this inode at the head of the on-disk orphan list */
12625c6d98fSJan Kara 		NEXT_ORPHAN(inode) = le32_to_cpu(sbi->s_es->s_last_orphan);
12725c6d98fSJan Kara 		lock_buffer(sbi->s_sbh);
12825c6d98fSJan Kara 		sbi->s_es->s_last_orphan = cpu_to_le32(inode->i_ino);
12925c6d98fSJan Kara 		ext4_superblock_csum_set(sb);
13025c6d98fSJan Kara 		unlock_buffer(sbi->s_sbh);
13125c6d98fSJan Kara 		dirty = true;
13225c6d98fSJan Kara 	}
13325c6d98fSJan Kara 	list_add(&EXT4_I(inode)->i_orphan, &sbi->s_orphan);
13425c6d98fSJan Kara 	mutex_unlock(&sbi->s_orphan_lock);
13525c6d98fSJan Kara 
13625c6d98fSJan Kara 	if (dirty) {
13725c6d98fSJan Kara 		err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh);
13825c6d98fSJan Kara 		rc = ext4_mark_iloc_dirty(handle, inode, &iloc);
13925c6d98fSJan Kara 		if (!err)
14025c6d98fSJan Kara 			err = rc;
14125c6d98fSJan Kara 		if (err) {
14225c6d98fSJan Kara 			/*
14325c6d98fSJan Kara 			 * We have to remove inode from in-memory list if
14425c6d98fSJan Kara 			 * addition to on disk orphan list failed. Stray orphan
14525c6d98fSJan Kara 			 * list entries can cause panics at unmount time.
14625c6d98fSJan Kara 			 */
14725c6d98fSJan Kara 			mutex_lock(&sbi->s_orphan_lock);
14825c6d98fSJan Kara 			list_del_init(&EXT4_I(inode)->i_orphan);
14925c6d98fSJan Kara 			mutex_unlock(&sbi->s_orphan_lock);
15025c6d98fSJan Kara 		}
15125c6d98fSJan Kara 	} else
15225c6d98fSJan Kara 		brelse(iloc.bh);
15325c6d98fSJan Kara 
15425c6d98fSJan Kara 	jbd_debug(4, "superblock will point to %lu\n", inode->i_ino);
15525c6d98fSJan Kara 	jbd_debug(4, "orphan inode %lu will point to %d\n",
15625c6d98fSJan Kara 			inode->i_ino, NEXT_ORPHAN(inode));
15725c6d98fSJan Kara out:
15825c6d98fSJan Kara 	ext4_std_error(sb, err);
15925c6d98fSJan Kara 	return err;
16025c6d98fSJan Kara }
16125c6d98fSJan Kara 
162*02f310fcSJan Kara static int ext4_orphan_file_del(handle_t *handle, struct inode *inode)
163*02f310fcSJan Kara {
164*02f310fcSJan Kara 	struct ext4_orphan_info *oi = &EXT4_SB(inode->i_sb)->s_orphan_info;
165*02f310fcSJan Kara 	__le32 *bdata;
166*02f310fcSJan Kara 	int blk, off;
167*02f310fcSJan Kara 	int inodes_per_ob = ext4_inodes_per_orphan_block(inode->i_sb);
168*02f310fcSJan Kara 	int ret = 0;
169*02f310fcSJan Kara 
170*02f310fcSJan Kara 	if (!handle)
171*02f310fcSJan Kara 		goto out;
172*02f310fcSJan Kara 	blk = EXT4_I(inode)->i_orphan_idx / inodes_per_ob;
173*02f310fcSJan Kara 	off = EXT4_I(inode)->i_orphan_idx % inodes_per_ob;
174*02f310fcSJan Kara 	if (WARN_ON_ONCE(blk >= oi->of_blocks))
175*02f310fcSJan Kara 		goto out;
176*02f310fcSJan Kara 
177*02f310fcSJan Kara 	ret = ext4_journal_get_write_access(handle, inode->i_sb,
178*02f310fcSJan Kara 				oi->of_binfo[blk].ob_bh, EXT4_JTR_ORPHAN_FILE);
179*02f310fcSJan Kara 	if (ret)
180*02f310fcSJan Kara 		goto out;
181*02f310fcSJan Kara 
182*02f310fcSJan Kara 	bdata = (__le32 *)(oi->of_binfo[blk].ob_bh->b_data);
183*02f310fcSJan Kara 	spin_lock(&oi->of_lock);
184*02f310fcSJan Kara 	bdata[off] = 0;
185*02f310fcSJan Kara 	oi->of_binfo[blk].ob_free_entries++;
186*02f310fcSJan Kara 	spin_unlock(&oi->of_lock);
187*02f310fcSJan Kara 	ret = ext4_handle_dirty_metadata(handle, NULL, oi->of_binfo[blk].ob_bh);
188*02f310fcSJan Kara out:
189*02f310fcSJan Kara 	ext4_clear_inode_state(inode, EXT4_STATE_ORPHAN_FILE);
190*02f310fcSJan Kara 	INIT_LIST_HEAD(&EXT4_I(inode)->i_orphan);
191*02f310fcSJan Kara 
192*02f310fcSJan Kara 	return ret;
193*02f310fcSJan Kara }
194*02f310fcSJan Kara 
19525c6d98fSJan Kara /*
19625c6d98fSJan Kara  * ext4_orphan_del() removes an unlinked or truncated inode from the list
19725c6d98fSJan Kara  * of such inodes stored on disk, because it is finally being cleaned up.
19825c6d98fSJan Kara  */
19925c6d98fSJan Kara int ext4_orphan_del(handle_t *handle, struct inode *inode)
20025c6d98fSJan Kara {
20125c6d98fSJan Kara 	struct list_head *prev;
20225c6d98fSJan Kara 	struct ext4_inode_info *ei = EXT4_I(inode);
20325c6d98fSJan Kara 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
20425c6d98fSJan Kara 	__u32 ino_next;
20525c6d98fSJan Kara 	struct ext4_iloc iloc;
20625c6d98fSJan Kara 	int err = 0;
20725c6d98fSJan Kara 
20825c6d98fSJan Kara 	if (!sbi->s_journal && !(sbi->s_mount_state & EXT4_ORPHAN_FS))
20925c6d98fSJan Kara 		return 0;
21025c6d98fSJan Kara 
21125c6d98fSJan Kara 	WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) &&
21225c6d98fSJan Kara 		     !inode_is_locked(inode));
213*02f310fcSJan Kara 	if (ext4_test_inode_state(inode, EXT4_STATE_ORPHAN_FILE))
214*02f310fcSJan Kara 		return ext4_orphan_file_del(handle, inode);
215*02f310fcSJan Kara 
21625c6d98fSJan Kara 	/* Do this quick check before taking global s_orphan_lock. */
21725c6d98fSJan Kara 	if (list_empty(&ei->i_orphan))
21825c6d98fSJan Kara 		return 0;
21925c6d98fSJan Kara 
22025c6d98fSJan Kara 	if (handle) {
22125c6d98fSJan Kara 		/* Grab inode buffer early before taking global s_orphan_lock */
22225c6d98fSJan Kara 		err = ext4_reserve_inode_write(handle, inode, &iloc);
22325c6d98fSJan Kara 	}
22425c6d98fSJan Kara 
22525c6d98fSJan Kara 	mutex_lock(&sbi->s_orphan_lock);
22625c6d98fSJan Kara 	jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino);
22725c6d98fSJan Kara 
22825c6d98fSJan Kara 	prev = ei->i_orphan.prev;
22925c6d98fSJan Kara 	list_del_init(&ei->i_orphan);
23025c6d98fSJan Kara 
23125c6d98fSJan Kara 	/* If we're on an error path, we may not have a valid
23225c6d98fSJan Kara 	 * transaction handle with which to update the orphan list on
23325c6d98fSJan Kara 	 * disk, but we still need to remove the inode from the linked
23425c6d98fSJan Kara 	 * list in memory. */
23525c6d98fSJan Kara 	if (!handle || err) {
23625c6d98fSJan Kara 		mutex_unlock(&sbi->s_orphan_lock);
23725c6d98fSJan Kara 		goto out_err;
23825c6d98fSJan Kara 	}
23925c6d98fSJan Kara 
24025c6d98fSJan Kara 	ino_next = NEXT_ORPHAN(inode);
24125c6d98fSJan Kara 	if (prev == &sbi->s_orphan) {
24225c6d98fSJan Kara 		jbd_debug(4, "superblock will point to %u\n", ino_next);
24325c6d98fSJan Kara 		BUFFER_TRACE(sbi->s_sbh, "get_write_access");
24425c6d98fSJan Kara 		err = ext4_journal_get_write_access(handle, inode->i_sb,
24525c6d98fSJan Kara 						    sbi->s_sbh, EXT4_JTR_NONE);
24625c6d98fSJan Kara 		if (err) {
24725c6d98fSJan Kara 			mutex_unlock(&sbi->s_orphan_lock);
24825c6d98fSJan Kara 			goto out_brelse;
24925c6d98fSJan Kara 		}
25025c6d98fSJan Kara 		lock_buffer(sbi->s_sbh);
25125c6d98fSJan Kara 		sbi->s_es->s_last_orphan = cpu_to_le32(ino_next);
25225c6d98fSJan Kara 		ext4_superblock_csum_set(inode->i_sb);
25325c6d98fSJan Kara 		unlock_buffer(sbi->s_sbh);
25425c6d98fSJan Kara 		mutex_unlock(&sbi->s_orphan_lock);
25525c6d98fSJan Kara 		err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh);
25625c6d98fSJan Kara 	} else {
25725c6d98fSJan Kara 		struct ext4_iloc iloc2;
25825c6d98fSJan Kara 		struct inode *i_prev =
25925c6d98fSJan Kara 			&list_entry(prev, struct ext4_inode_info, i_orphan)->vfs_inode;
26025c6d98fSJan Kara 
26125c6d98fSJan Kara 		jbd_debug(4, "orphan inode %lu will point to %u\n",
26225c6d98fSJan Kara 			  i_prev->i_ino, ino_next);
26325c6d98fSJan Kara 		err = ext4_reserve_inode_write(handle, i_prev, &iloc2);
26425c6d98fSJan Kara 		if (err) {
26525c6d98fSJan Kara 			mutex_unlock(&sbi->s_orphan_lock);
26625c6d98fSJan Kara 			goto out_brelse;
26725c6d98fSJan Kara 		}
26825c6d98fSJan Kara 		NEXT_ORPHAN(i_prev) = ino_next;
26925c6d98fSJan Kara 		err = ext4_mark_iloc_dirty(handle, i_prev, &iloc2);
27025c6d98fSJan Kara 		mutex_unlock(&sbi->s_orphan_lock);
27125c6d98fSJan Kara 	}
27225c6d98fSJan Kara 	if (err)
27325c6d98fSJan Kara 		goto out_brelse;
27425c6d98fSJan Kara 	NEXT_ORPHAN(inode) = 0;
27525c6d98fSJan Kara 	err = ext4_mark_iloc_dirty(handle, inode, &iloc);
27625c6d98fSJan Kara out_err:
27725c6d98fSJan Kara 	ext4_std_error(inode->i_sb, err);
27825c6d98fSJan Kara 	return err;
27925c6d98fSJan Kara 
28025c6d98fSJan Kara out_brelse:
28125c6d98fSJan Kara 	brelse(iloc.bh);
28225c6d98fSJan Kara 	goto out_err;
28325c6d98fSJan Kara }
28425c6d98fSJan Kara 
28525c6d98fSJan Kara #ifdef CONFIG_QUOTA
28625c6d98fSJan Kara static int ext4_quota_on_mount(struct super_block *sb, int type)
28725c6d98fSJan Kara {
28825c6d98fSJan Kara 	return dquot_quota_on_mount(sb,
28925c6d98fSJan Kara 		rcu_dereference_protected(EXT4_SB(sb)->s_qf_names[type],
29025c6d98fSJan Kara 					  lockdep_is_held(&sb->s_umount)),
29125c6d98fSJan Kara 		EXT4_SB(sb)->s_jquota_fmt, type);
29225c6d98fSJan Kara }
29325c6d98fSJan Kara #endif
29425c6d98fSJan Kara 
295*02f310fcSJan Kara static void ext4_process_orphan(struct inode *inode,
296*02f310fcSJan Kara 				int *nr_truncates, int *nr_orphans)
297*02f310fcSJan Kara {
298*02f310fcSJan Kara 	struct super_block *sb = inode->i_sb;
299*02f310fcSJan Kara 	int ret;
300*02f310fcSJan Kara 
301*02f310fcSJan Kara 	dquot_initialize(inode);
302*02f310fcSJan Kara 	if (inode->i_nlink) {
303*02f310fcSJan Kara 		if (test_opt(sb, DEBUG))
304*02f310fcSJan Kara 			ext4_msg(sb, KERN_DEBUG,
305*02f310fcSJan Kara 				"%s: truncating inode %lu to %lld bytes",
306*02f310fcSJan Kara 				__func__, inode->i_ino, inode->i_size);
307*02f310fcSJan Kara 		jbd_debug(2, "truncating inode %lu to %lld bytes\n",
308*02f310fcSJan Kara 			  inode->i_ino, inode->i_size);
309*02f310fcSJan Kara 		inode_lock(inode);
310*02f310fcSJan Kara 		truncate_inode_pages(inode->i_mapping, inode->i_size);
311*02f310fcSJan Kara 		ret = ext4_truncate(inode);
312*02f310fcSJan Kara 		if (ret) {
313*02f310fcSJan Kara 			/*
314*02f310fcSJan Kara 			 * We need to clean up the in-core orphan list
315*02f310fcSJan Kara 			 * manually if ext4_truncate() failed to get a
316*02f310fcSJan Kara 			 * transaction handle.
317*02f310fcSJan Kara 			 */
318*02f310fcSJan Kara 			ext4_orphan_del(NULL, inode);
319*02f310fcSJan Kara 			ext4_std_error(inode->i_sb, ret);
320*02f310fcSJan Kara 		}
321*02f310fcSJan Kara 		inode_unlock(inode);
322*02f310fcSJan Kara 		(*nr_truncates)++;
323*02f310fcSJan Kara 	} else {
324*02f310fcSJan Kara 		if (test_opt(sb, DEBUG))
325*02f310fcSJan Kara 			ext4_msg(sb, KERN_DEBUG,
326*02f310fcSJan Kara 				"%s: deleting unreferenced inode %lu",
327*02f310fcSJan Kara 				__func__, inode->i_ino);
328*02f310fcSJan Kara 		jbd_debug(2, "deleting unreferenced inode %lu\n",
329*02f310fcSJan Kara 			  inode->i_ino);
330*02f310fcSJan Kara 		(*nr_orphans)++;
331*02f310fcSJan Kara 	}
332*02f310fcSJan Kara 	iput(inode);  /* The delete magic happens here! */
333*02f310fcSJan Kara }
334*02f310fcSJan Kara 
33525c6d98fSJan Kara /* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at
33625c6d98fSJan Kara  * the superblock) which were deleted from all directories, but held open by
33725c6d98fSJan Kara  * a process at the time of a crash.  We walk the list and try to delete these
33825c6d98fSJan Kara  * inodes at recovery time (only with a read-write filesystem).
33925c6d98fSJan Kara  *
34025c6d98fSJan Kara  * In order to keep the orphan inode chain consistent during traversal (in
34125c6d98fSJan Kara  * case of crash during recovery), we link each inode into the superblock
34225c6d98fSJan Kara  * orphan list_head and handle it the same way as an inode deletion during
34325c6d98fSJan Kara  * normal operation (which journals the operations for us).
34425c6d98fSJan Kara  *
34525c6d98fSJan Kara  * We only do an iget() and an iput() on each inode, which is very safe if we
34625c6d98fSJan Kara  * accidentally point at an in-use or already deleted inode.  The worst that
34725c6d98fSJan Kara  * can happen in this case is that we get a "bit already cleared" message from
34825c6d98fSJan Kara  * ext4_free_inode().  The only reason we would point at a wrong inode is if
34925c6d98fSJan Kara  * e2fsck was run on this filesystem, and it must have already done the orphan
35025c6d98fSJan Kara  * inode cleanup for us, so we can safely abort without any further action.
35125c6d98fSJan Kara  */
35225c6d98fSJan Kara void ext4_orphan_cleanup(struct super_block *sb, struct ext4_super_block *es)
35325c6d98fSJan Kara {
35425c6d98fSJan Kara 	unsigned int s_flags = sb->s_flags;
355*02f310fcSJan Kara 	int nr_orphans = 0, nr_truncates = 0;
356*02f310fcSJan Kara 	struct inode *inode;
357*02f310fcSJan Kara 	int i, j;
35825c6d98fSJan Kara #ifdef CONFIG_QUOTA
35925c6d98fSJan Kara 	int quota_update = 0;
36025c6d98fSJan Kara #endif
361*02f310fcSJan Kara 	__le32 *bdata;
362*02f310fcSJan Kara 	struct ext4_orphan_info *oi = &EXT4_SB(sb)->s_orphan_info;
363*02f310fcSJan Kara 	int inodes_per_ob = ext4_inodes_per_orphan_block(sb);
364*02f310fcSJan Kara 
365*02f310fcSJan Kara 	if (!es->s_last_orphan && !oi->of_blocks) {
36625c6d98fSJan Kara 		jbd_debug(4, "no orphan inodes to clean up\n");
36725c6d98fSJan Kara 		return;
36825c6d98fSJan Kara 	}
36925c6d98fSJan Kara 
37025c6d98fSJan Kara 	if (bdev_read_only(sb->s_bdev)) {
37125c6d98fSJan Kara 		ext4_msg(sb, KERN_ERR, "write access "
37225c6d98fSJan Kara 			"unavailable, skipping orphan cleanup");
37325c6d98fSJan Kara 		return;
37425c6d98fSJan Kara 	}
37525c6d98fSJan Kara 
37625c6d98fSJan Kara 	/* Check if feature set would not allow a r/w mount */
37725c6d98fSJan Kara 	if (!ext4_feature_set_ok(sb, 0)) {
37825c6d98fSJan Kara 		ext4_msg(sb, KERN_INFO, "Skipping orphan cleanup due to "
37925c6d98fSJan Kara 			 "unknown ROCOMPAT features");
38025c6d98fSJan Kara 		return;
38125c6d98fSJan Kara 	}
38225c6d98fSJan Kara 
38325c6d98fSJan Kara 	if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
38425c6d98fSJan Kara 		/* don't clear list on RO mount w/ errors */
38525c6d98fSJan Kara 		if (es->s_last_orphan && !(s_flags & SB_RDONLY)) {
38625c6d98fSJan Kara 			ext4_msg(sb, KERN_INFO, "Errors on filesystem, "
38725c6d98fSJan Kara 				  "clearing orphan list.\n");
38825c6d98fSJan Kara 			es->s_last_orphan = 0;
38925c6d98fSJan Kara 		}
39025c6d98fSJan Kara 		jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
39125c6d98fSJan Kara 		return;
39225c6d98fSJan Kara 	}
39325c6d98fSJan Kara 
39425c6d98fSJan Kara 	if (s_flags & SB_RDONLY) {
39525c6d98fSJan Kara 		ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs");
39625c6d98fSJan Kara 		sb->s_flags &= ~SB_RDONLY;
39725c6d98fSJan Kara 	}
39825c6d98fSJan Kara #ifdef CONFIG_QUOTA
39925c6d98fSJan Kara 	/*
40025c6d98fSJan Kara 	 * Turn on quotas which were not enabled for read-only mounts if
40125c6d98fSJan Kara 	 * filesystem has quota feature, so that they are updated correctly.
40225c6d98fSJan Kara 	 */
40325c6d98fSJan Kara 	if (ext4_has_feature_quota(sb) && (s_flags & SB_RDONLY)) {
40425c6d98fSJan Kara 		int ret = ext4_enable_quotas(sb);
40525c6d98fSJan Kara 
40625c6d98fSJan Kara 		if (!ret)
40725c6d98fSJan Kara 			quota_update = 1;
40825c6d98fSJan Kara 		else
40925c6d98fSJan Kara 			ext4_msg(sb, KERN_ERR,
41025c6d98fSJan Kara 				"Cannot turn on quotas: error %d", ret);
41125c6d98fSJan Kara 	}
41225c6d98fSJan Kara 
41325c6d98fSJan Kara 	/* Turn on journaled quotas used for old sytle */
41425c6d98fSJan Kara 	for (i = 0; i < EXT4_MAXQUOTAS; i++) {
41525c6d98fSJan Kara 		if (EXT4_SB(sb)->s_qf_names[i]) {
41625c6d98fSJan Kara 			int ret = ext4_quota_on_mount(sb, i);
41725c6d98fSJan Kara 
41825c6d98fSJan Kara 			if (!ret)
41925c6d98fSJan Kara 				quota_update = 1;
42025c6d98fSJan Kara 			else
42125c6d98fSJan Kara 				ext4_msg(sb, KERN_ERR,
42225c6d98fSJan Kara 					"Cannot turn on journaled "
42325c6d98fSJan Kara 					"quota: type %d: error %d", i, ret);
42425c6d98fSJan Kara 		}
42525c6d98fSJan Kara 	}
42625c6d98fSJan Kara #endif
42725c6d98fSJan Kara 
42825c6d98fSJan Kara 	while (es->s_last_orphan) {
42925c6d98fSJan Kara 		/*
43025c6d98fSJan Kara 		 * We may have encountered an error during cleanup; if
43125c6d98fSJan Kara 		 * so, skip the rest.
43225c6d98fSJan Kara 		 */
43325c6d98fSJan Kara 		if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
43425c6d98fSJan Kara 			jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
43525c6d98fSJan Kara 			es->s_last_orphan = 0;
43625c6d98fSJan Kara 			break;
43725c6d98fSJan Kara 		}
43825c6d98fSJan Kara 
43925c6d98fSJan Kara 		inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
44025c6d98fSJan Kara 		if (IS_ERR(inode)) {
44125c6d98fSJan Kara 			es->s_last_orphan = 0;
44225c6d98fSJan Kara 			break;
44325c6d98fSJan Kara 		}
44425c6d98fSJan Kara 
44525c6d98fSJan Kara 		list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
446*02f310fcSJan Kara 		ext4_process_orphan(inode, &nr_truncates, &nr_orphans);
44725c6d98fSJan Kara 	}
448*02f310fcSJan Kara 
449*02f310fcSJan Kara 	for (i = 0; i < oi->of_blocks; i++) {
450*02f310fcSJan Kara 		bdata = (__le32 *)(oi->of_binfo[i].ob_bh->b_data);
451*02f310fcSJan Kara 		for (j = 0; j < inodes_per_ob; j++) {
452*02f310fcSJan Kara 			if (!bdata[j])
453*02f310fcSJan Kara 				continue;
454*02f310fcSJan Kara 			inode = ext4_orphan_get(sb, le32_to_cpu(bdata[j]));
455*02f310fcSJan Kara 			if (IS_ERR(inode))
456*02f310fcSJan Kara 				continue;
457*02f310fcSJan Kara 			ext4_set_inode_state(inode, EXT4_STATE_ORPHAN_FILE);
458*02f310fcSJan Kara 			EXT4_I(inode)->i_orphan_idx = i * inodes_per_ob + j;
459*02f310fcSJan Kara 			ext4_process_orphan(inode, &nr_truncates, &nr_orphans);
46025c6d98fSJan Kara 		}
46125c6d98fSJan Kara 	}
46225c6d98fSJan Kara 
46325c6d98fSJan Kara #define PLURAL(x) (x), ((x) == 1) ? "" : "s"
46425c6d98fSJan Kara 
46525c6d98fSJan Kara 	if (nr_orphans)
46625c6d98fSJan Kara 		ext4_msg(sb, KERN_INFO, "%d orphan inode%s deleted",
46725c6d98fSJan Kara 		       PLURAL(nr_orphans));
46825c6d98fSJan Kara 	if (nr_truncates)
46925c6d98fSJan Kara 		ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up",
47025c6d98fSJan Kara 		       PLURAL(nr_truncates));
47125c6d98fSJan Kara #ifdef CONFIG_QUOTA
47225c6d98fSJan Kara 	/* Turn off quotas if they were enabled for orphan cleanup */
47325c6d98fSJan Kara 	if (quota_update) {
47425c6d98fSJan Kara 		for (i = 0; i < EXT4_MAXQUOTAS; i++) {
47525c6d98fSJan Kara 			if (sb_dqopt(sb)->files[i])
47625c6d98fSJan Kara 				dquot_quota_off(sb, i);
47725c6d98fSJan Kara 		}
47825c6d98fSJan Kara 	}
47925c6d98fSJan Kara #endif
48025c6d98fSJan Kara 	sb->s_flags = s_flags; /* Restore SB_RDONLY status */
48125c6d98fSJan Kara }
482*02f310fcSJan Kara 
483*02f310fcSJan Kara void ext4_release_orphan_info(struct super_block *sb)
484*02f310fcSJan Kara {
485*02f310fcSJan Kara 	int i;
486*02f310fcSJan Kara 	struct ext4_orphan_info *oi = &EXT4_SB(sb)->s_orphan_info;
487*02f310fcSJan Kara 
488*02f310fcSJan Kara 	if (!oi->of_blocks)
489*02f310fcSJan Kara 		return;
490*02f310fcSJan Kara 	for (i = 0; i < oi->of_blocks; i++)
491*02f310fcSJan Kara 		brelse(oi->of_binfo[i].ob_bh);
492*02f310fcSJan Kara 	kfree(oi->of_binfo);
493*02f310fcSJan Kara }
494*02f310fcSJan Kara 
495*02f310fcSJan Kara static struct ext4_orphan_block_tail *ext4_orphan_block_tail(
496*02f310fcSJan Kara 						struct super_block *sb,
497*02f310fcSJan Kara 						struct buffer_head *bh)
498*02f310fcSJan Kara {
499*02f310fcSJan Kara 	return (struct ext4_orphan_block_tail *)(bh->b_data + sb->s_blocksize -
500*02f310fcSJan Kara 				sizeof(struct ext4_orphan_block_tail));
501*02f310fcSJan Kara }
502*02f310fcSJan Kara 
503*02f310fcSJan Kara static int ext4_orphan_file_block_csum_verify(struct super_block *sb,
504*02f310fcSJan Kara 					      struct buffer_head *bh)
505*02f310fcSJan Kara {
506*02f310fcSJan Kara 	__u32 calculated;
507*02f310fcSJan Kara 	int inodes_per_ob = ext4_inodes_per_orphan_block(sb);
508*02f310fcSJan Kara 	struct ext4_orphan_info *oi = &EXT4_SB(sb)->s_orphan_info;
509*02f310fcSJan Kara 	struct ext4_orphan_block_tail *ot;
510*02f310fcSJan Kara 	__le64 dsk_block_nr = cpu_to_le64(bh->b_blocknr);
511*02f310fcSJan Kara 
512*02f310fcSJan Kara 	if (!ext4_has_metadata_csum(sb))
513*02f310fcSJan Kara 		return 1;
514*02f310fcSJan Kara 
515*02f310fcSJan Kara 	ot = ext4_orphan_block_tail(sb, bh);
516*02f310fcSJan Kara 	calculated = ext4_chksum(EXT4_SB(sb), oi->of_csum_seed,
517*02f310fcSJan Kara 				 (__u8 *)&dsk_block_nr, sizeof(dsk_block_nr));
518*02f310fcSJan Kara 	calculated = ext4_chksum(EXT4_SB(sb), calculated, (__u8 *)bh->b_data,
519*02f310fcSJan Kara 				 inodes_per_ob * sizeof(__u32));
520*02f310fcSJan Kara 	return le32_to_cpu(ot->ob_checksum) == calculated;
521*02f310fcSJan Kara }
522*02f310fcSJan Kara 
523*02f310fcSJan Kara /* This gets called only when checksumming is enabled */
524*02f310fcSJan Kara void ext4_orphan_file_block_trigger(struct jbd2_buffer_trigger_type *triggers,
525*02f310fcSJan Kara 				    struct buffer_head *bh,
526*02f310fcSJan Kara 				    void *data, size_t size)
527*02f310fcSJan Kara {
528*02f310fcSJan Kara 	struct super_block *sb = EXT4_TRIGGER(triggers)->sb;
529*02f310fcSJan Kara 	__u32 csum;
530*02f310fcSJan Kara 	int inodes_per_ob = ext4_inodes_per_orphan_block(sb);
531*02f310fcSJan Kara 	struct ext4_orphan_info *oi = &EXT4_SB(sb)->s_orphan_info;
532*02f310fcSJan Kara 	struct ext4_orphan_block_tail *ot;
533*02f310fcSJan Kara 	__le64 dsk_block_nr = cpu_to_le64(bh->b_blocknr);
534*02f310fcSJan Kara 
535*02f310fcSJan Kara 	csum = ext4_chksum(EXT4_SB(sb), oi->of_csum_seed,
536*02f310fcSJan Kara 			   (__u8 *)&dsk_block_nr, sizeof(dsk_block_nr));
537*02f310fcSJan Kara 	csum = ext4_chksum(EXT4_SB(sb), csum, (__u8 *)data,
538*02f310fcSJan Kara 			   inodes_per_ob * sizeof(__u32));
539*02f310fcSJan Kara 	ot = ext4_orphan_block_tail(sb, bh);
540*02f310fcSJan Kara 	ot->ob_checksum = cpu_to_le32(csum);
541*02f310fcSJan Kara }
542*02f310fcSJan Kara 
543*02f310fcSJan Kara int ext4_init_orphan_info(struct super_block *sb)
544*02f310fcSJan Kara {
545*02f310fcSJan Kara 	struct ext4_orphan_info *oi = &EXT4_SB(sb)->s_orphan_info;
546*02f310fcSJan Kara 	struct inode *inode;
547*02f310fcSJan Kara 	int i, j;
548*02f310fcSJan Kara 	int ret;
549*02f310fcSJan Kara 	int free;
550*02f310fcSJan Kara 	__le32 *bdata;
551*02f310fcSJan Kara 	int inodes_per_ob = ext4_inodes_per_orphan_block(sb);
552*02f310fcSJan Kara 	struct ext4_orphan_block_tail *ot;
553*02f310fcSJan Kara 	ino_t orphan_ino = le32_to_cpu(EXT4_SB(sb)->s_es->s_orphan_file_inum);
554*02f310fcSJan Kara 
555*02f310fcSJan Kara 	spin_lock_init(&oi->of_lock);
556*02f310fcSJan Kara 
557*02f310fcSJan Kara 	if (!ext4_has_feature_orphan_file(sb))
558*02f310fcSJan Kara 		return 0;
559*02f310fcSJan Kara 
560*02f310fcSJan Kara 	inode = ext4_iget(sb, orphan_ino, EXT4_IGET_SPECIAL);
561*02f310fcSJan Kara 	if (IS_ERR(inode)) {
562*02f310fcSJan Kara 		ext4_msg(sb, KERN_ERR, "get orphan inode failed");
563*02f310fcSJan Kara 		return PTR_ERR(inode);
564*02f310fcSJan Kara 	}
565*02f310fcSJan Kara 	oi->of_blocks = inode->i_size >> sb->s_blocksize_bits;
566*02f310fcSJan Kara 	oi->of_csum_seed = EXT4_I(inode)->i_csum_seed;
567*02f310fcSJan Kara 	oi->of_binfo = kmalloc(oi->of_blocks*sizeof(struct ext4_orphan_block),
568*02f310fcSJan Kara 			       GFP_KERNEL);
569*02f310fcSJan Kara 	if (!oi->of_binfo) {
570*02f310fcSJan Kara 		ret = -ENOMEM;
571*02f310fcSJan Kara 		goto out_put;
572*02f310fcSJan Kara 	}
573*02f310fcSJan Kara 	for (i = 0; i < oi->of_blocks; i++) {
574*02f310fcSJan Kara 		oi->of_binfo[i].ob_bh = ext4_bread(NULL, inode, i, 0);
575*02f310fcSJan Kara 		if (IS_ERR(oi->of_binfo[i].ob_bh)) {
576*02f310fcSJan Kara 			ret = PTR_ERR(oi->of_binfo[i].ob_bh);
577*02f310fcSJan Kara 			goto out_free;
578*02f310fcSJan Kara 		}
579*02f310fcSJan Kara 		if (!oi->of_binfo[i].ob_bh) {
580*02f310fcSJan Kara 			ret = -EIO;
581*02f310fcSJan Kara 			goto out_free;
582*02f310fcSJan Kara 		}
583*02f310fcSJan Kara 		ot = ext4_orphan_block_tail(sb, oi->of_binfo[i].ob_bh);
584*02f310fcSJan Kara 		if (le32_to_cpu(ot->ob_magic) != EXT4_ORPHAN_BLOCK_MAGIC) {
585*02f310fcSJan Kara 			ext4_error(sb, "orphan file block %d: bad magic", i);
586*02f310fcSJan Kara 			ret = -EIO;
587*02f310fcSJan Kara 			goto out_free;
588*02f310fcSJan Kara 		}
589*02f310fcSJan Kara 		if (!ext4_orphan_file_block_csum_verify(sb,
590*02f310fcSJan Kara 						oi->of_binfo[i].ob_bh)) {
591*02f310fcSJan Kara 			ext4_error(sb, "orphan file block %d: bad checksum", i);
592*02f310fcSJan Kara 			ret = -EIO;
593*02f310fcSJan Kara 			goto out_free;
594*02f310fcSJan Kara 		}
595*02f310fcSJan Kara 		bdata = (__le32 *)(oi->of_binfo[i].ob_bh->b_data);
596*02f310fcSJan Kara 		free = 0;
597*02f310fcSJan Kara 		for (j = 0; j < inodes_per_ob; j++)
598*02f310fcSJan Kara 			if (bdata[j] == 0)
599*02f310fcSJan Kara 				free++;
600*02f310fcSJan Kara 		oi->of_binfo[i].ob_free_entries = free;
601*02f310fcSJan Kara 	}
602*02f310fcSJan Kara 	iput(inode);
603*02f310fcSJan Kara 	return 0;
604*02f310fcSJan Kara out_free:
605*02f310fcSJan Kara 	for (i--; i >= 0; i--)
606*02f310fcSJan Kara 		brelse(oi->of_binfo[i].ob_bh);
607*02f310fcSJan Kara 	kfree(oi->of_binfo);
608*02f310fcSJan Kara out_put:
609*02f310fcSJan Kara 	iput(inode);
610*02f310fcSJan Kara 	return ret;
611*02f310fcSJan Kara }
612*02f310fcSJan Kara 
613*02f310fcSJan Kara int ext4_orphan_file_empty(struct super_block *sb)
614*02f310fcSJan Kara {
615*02f310fcSJan Kara 	struct ext4_orphan_info *oi = &EXT4_SB(sb)->s_orphan_info;
616*02f310fcSJan Kara 	int i;
617*02f310fcSJan Kara 	int inodes_per_ob = ext4_inodes_per_orphan_block(sb);
618*02f310fcSJan Kara 
619*02f310fcSJan Kara 	if (!ext4_has_feature_orphan_file(sb))
620*02f310fcSJan Kara 		return 1;
621*02f310fcSJan Kara 	for (i = 0; i < oi->of_blocks; i++)
622*02f310fcSJan Kara 		if (oi->of_binfo[i].ob_free_entries != inodes_per_ob)
623*02f310fcSJan Kara 			return 0;
624*02f310fcSJan Kara 	return 1;
625*02f310fcSJan Kara }
626