xref: /openbmc/linux/fs/btrfs/btrfs_inode.h (revision 3cd24c698004d2f7668e0eb9fc1f096f533c791b)
19888c340SDavid Sterba /* SPDX-License-Identifier: GPL-2.0 */
26cbd5570SChris Mason /*
36cbd5570SChris Mason  * Copyright (C) 2007 Oracle.  All rights reserved.
46cbd5570SChris Mason  */
56cbd5570SChris Mason 
69888c340SDavid Sterba #ifndef BTRFS_INODE_H
79888c340SDavid Sterba #define BTRFS_INODE_H
82c90e5d6SChris Mason 
9778ba82bSFilipe David Borba Manana #include <linux/hash.h>
10a52d9a80SChris Mason #include "extent_map.h"
11d1310b2eSChris Mason #include "extent_io.h"
12e6dcd2dcSChris Mason #include "ordered-data.h"
1316cdcec7SMiao Xie #include "delayed-inode.h"
14a52d9a80SChris Mason 
1572ac3c0dSJosef Bacik /*
1672ac3c0dSJosef Bacik  * ordered_data_close is set by truncate when a file that used
1772ac3c0dSJosef Bacik  * to have good data has been truncated to zero.  When it is set
1872ac3c0dSJosef Bacik  * the btrfs file release call will add this inode to the
1972ac3c0dSJosef Bacik  * ordered operations list so that we make sure to flush out any
2072ac3c0dSJosef Bacik  * new data the application may have written before commit.
2172ac3c0dSJosef Bacik  */
227efc3e34SOmar Sandoval enum {
237efc3e34SOmar Sandoval 	BTRFS_INODE_ORDERED_DATA_CLOSE = 0,
247efc3e34SOmar Sandoval 	BTRFS_INODE_DUMMY,
257efc3e34SOmar Sandoval 	BTRFS_INODE_IN_DEFRAG,
267efc3e34SOmar Sandoval 	BTRFS_INODE_HAS_ASYNC_EXTENT,
277efc3e34SOmar Sandoval 	BTRFS_INODE_NEEDS_FULL_SYNC,
287efc3e34SOmar Sandoval 	BTRFS_INODE_COPY_EVERYTHING,
297efc3e34SOmar Sandoval 	BTRFS_INODE_IN_DELALLOC_LIST,
307efc3e34SOmar Sandoval 	BTRFS_INODE_READDIO_NEED_LOCK,
317efc3e34SOmar Sandoval 	BTRFS_INODE_HAS_PROPS,
32*3cd24c69SEthan Lien 	BTRFS_INODE_SNAPSHOT_FLUSH,
337efc3e34SOmar Sandoval };
3472ac3c0dSJosef Bacik 
35f1ace244SAneesh /* in memory btrfs inode */
362c90e5d6SChris Mason struct btrfs_inode {
37d352ac68SChris Mason 	/* which subvolume this inode belongs to */
38d6e4a428SChris Mason 	struct btrfs_root *root;
39d352ac68SChris Mason 
40d352ac68SChris Mason 	/* key used to find this inode on disk.  This is used by the code
41d352ac68SChris Mason 	 * to read in roots of subvolumes
42d352ac68SChris Mason 	 */
43d6e4a428SChris Mason 	struct btrfs_key location;
44d352ac68SChris Mason 
452f2ff0eeSFilipe Manana 	/*
462f2ff0eeSFilipe Manana 	 * Lock for counters and all fields used to determine if the inode is in
472f2ff0eeSFilipe Manana 	 * the log or not (last_trans, last_sub_trans, last_log_commit,
482f2ff0eeSFilipe Manana 	 * logged_trans).
492f2ff0eeSFilipe Manana 	 */
509e0baf60SJosef Bacik 	spinlock_t lock;
519e0baf60SJosef Bacik 
52d352ac68SChris Mason 	/* the extent_tree has caches of all the extent mappings to disk */
53a52d9a80SChris Mason 	struct extent_map_tree extent_tree;
54d352ac68SChris Mason 
55d352ac68SChris Mason 	/* the io_tree does range state (DIRTY, LOCKED etc) */
56d1310b2eSChris Mason 	struct extent_io_tree io_tree;
57d352ac68SChris Mason 
58d352ac68SChris Mason 	/* special utility tree used to record which mirrors have already been
59d352ac68SChris Mason 	 * tried when checksums fail for a given block
60d352ac68SChris Mason 	 */
617e38326fSChris Mason 	struct extent_io_tree io_failure_tree;
62d352ac68SChris Mason 
63d352ac68SChris Mason 	/* held while logging the inode in tree-log.c */
64e02119d5SChris Mason 	struct mutex log_mutex;
65d352ac68SChris Mason 
66f248679eSJosef Bacik 	/* held while doing delalloc reservations */
67f248679eSJosef Bacik 	struct mutex delalloc_mutex;
68f248679eSJosef Bacik 
69d352ac68SChris Mason 	/* used to order data wrt metadata */
70e6dcd2dcSChris Mason 	struct btrfs_ordered_inode_tree ordered_tree;
7115ee9bc7SJosef Bacik 
72d352ac68SChris Mason 	/* list of all the delalloc inodes in the FS.  There are times we need
73d352ac68SChris Mason 	 * to write all the delalloc pages to disk, and this list is used
74d352ac68SChris Mason 	 * to walk them all.
75d352ac68SChris Mason 	 */
76ea8c2819SChris Mason 	struct list_head delalloc_inodes;
77ea8c2819SChris Mason 
785d4f98a2SYan Zheng 	/* node for the red-black tree that links inodes in subvolume root */
795d4f98a2SYan Zheng 	struct rb_node rb_node;
805d4f98a2SYan Zheng 
8172ac3c0dSJosef Bacik 	unsigned long runtime_flags;
8272ac3c0dSJosef Bacik 
839c931c5aSNathaniel Yazdani 	/* Keep track of who's O_SYNC/fsyncing currently */
84b812ce28SJosef Bacik 	atomic_t sync_writers;
85b812ce28SJosef Bacik 
86d352ac68SChris Mason 	/* full 64 bit generation number, struct vfs_inode doesn't have a big
87d352ac68SChris Mason 	 * enough field for this.
88d352ac68SChris Mason 	 */
89e02119d5SChris Mason 	u64 generation;
90e02119d5SChris Mason 
9115ee9bc7SJosef Bacik 	/*
9215ee9bc7SJosef Bacik 	 * transid of the trans_handle that last modified this inode
9315ee9bc7SJosef Bacik 	 */
9415ee9bc7SJosef Bacik 	u64 last_trans;
95257c62e1SChris Mason 
96257c62e1SChris Mason 	/*
97e02119d5SChris Mason 	 * transid that last logged this inode
98e02119d5SChris Mason 	 */
99e02119d5SChris Mason 	u64 logged_trans;
10049eb7e46SChris Mason 
101bb14a59bSMiao Xie 	/*
102bb14a59bSMiao Xie 	 * log transid when this inode was last modified
103bb14a59bSMiao Xie 	 */
104bb14a59bSMiao Xie 	int last_sub_trans;
105bb14a59bSMiao Xie 
106bb14a59bSMiao Xie 	/* a local copy of root's last_log_commit */
107bb14a59bSMiao Xie 	int last_log_commit;
108bb14a59bSMiao Xie 
109d352ac68SChris Mason 	/* total number of bytes pending delalloc, used by stat to calc the
110d352ac68SChris Mason 	 * real block usage of the file
111d352ac68SChris Mason 	 */
1129069218dSChris Mason 	u64 delalloc_bytes;
113d352ac68SChris Mason 
114d352ac68SChris Mason 	/*
115a7e3b975SFilipe Manana 	 * Total number of bytes pending delalloc that fall within a file
116a7e3b975SFilipe Manana 	 * range that is either a hole or beyond EOF (and no prealloc extent
117a7e3b975SFilipe Manana 	 * exists in the range). This is always <= delalloc_bytes.
118a7e3b975SFilipe Manana 	 */
119a7e3b975SFilipe Manana 	u64 new_delalloc_bytes;
120a7e3b975SFilipe Manana 
121a7e3b975SFilipe Manana 	/*
12247059d93SWang Shilong 	 * total number of bytes pending defrag, used by stat to check whether
12347059d93SWang Shilong 	 * it needs COW.
12447059d93SWang Shilong 	 */
12547059d93SWang Shilong 	u64 defrag_bytes;
12647059d93SWang Shilong 
12747059d93SWang Shilong 	/*
128d352ac68SChris Mason 	 * the size of the file stored in the metadata on disk.  data=ordered
129d352ac68SChris Mason 	 * means the in-memory i_size might be larger than the size on disk
130d352ac68SChris Mason 	 * because not all the blocks are written yet.
131d352ac68SChris Mason 	 */
132dbe674a9SChris Mason 	u64 disk_i_size;
133d352ac68SChris Mason 
134aec7477bSJosef Bacik 	/*
135aec7477bSJosef Bacik 	 * if this is a directory then index_cnt is the counter for the index
136aec7477bSJosef Bacik 	 * number for new files that are created
137aec7477bSJosef Bacik 	 */
138aec7477bSJosef Bacik 	u64 index_cnt;
139d352ac68SChris Mason 
14067de1176SMiao Xie 	/* Cache the directory index number to speed the dir/file remove */
14167de1176SMiao Xie 	u64 dir_index;
14267de1176SMiao Xie 
14312fcfd22SChris Mason 	/* the fsync log has some corner cases that mean we have to check
14412fcfd22SChris Mason 	 * directories to see if any unlinks have been done before
14512fcfd22SChris Mason 	 * the directory was logged.  See tree-log.c for all the
14612fcfd22SChris Mason 	 * details
14712fcfd22SChris Mason 	 */
14812fcfd22SChris Mason 	u64 last_unlink_trans;
14912fcfd22SChris Mason 
1507709cde3SJosef Bacik 	/*
1517709cde3SJosef Bacik 	 * Number of bytes outstanding that are going to need csums.  This is
1527709cde3SJosef Bacik 	 * used in ENOSPC accounting.
1537709cde3SJosef Bacik 	 */
1547709cde3SJosef Bacik 	u64 csum_bytes;
1557709cde3SJosef Bacik 
156f1bdcc0aSJosef Bacik 	/* flags field from the on disk inode */
157f1bdcc0aSJosef Bacik 	u32 flags;
158f1bdcc0aSJosef Bacik 
1595a3f23d5SChris Mason 	/*
16032c00affSJosef Bacik 	 * Counters to keep track of the number of extent item's we may use due
16132c00affSJosef Bacik 	 * to delalloc and such.  outstanding_extents is the number of extent
16232c00affSJosef Bacik 	 * items we think we'll end up using, and reserved_extents is the number
16332c00affSJosef Bacik 	 * of extent items we've reserved metadata for.
1649ed74f2dSJosef Bacik 	 */
1659e0baf60SJosef Bacik 	unsigned outstanding_extents;
16669fe2d75SJosef Bacik 
16769fe2d75SJosef Bacik 	struct btrfs_block_rsv block_rsv;
1689ed74f2dSJosef Bacik 
1699ed74f2dSJosef Bacik 	/*
170b52aa8c9SDavid Sterba 	 * Cached values of inode properties
1711e701a32SChris Mason 	 */
172b52aa8c9SDavid Sterba 	unsigned prop_compress;		/* per-file compression algorithm */
173eec63c65SDavid Sterba 	/*
174eec63c65SDavid Sterba 	 * Force compression on the file using the defrag ioctl, could be
175eec63c65SDavid Sterba 	 * different from prop_compress and takes precedence if set
176eec63c65SDavid Sterba 	 */
177eec63c65SDavid Sterba 	unsigned defrag_compress;
1781e701a32SChris Mason 
17916cdcec7SMiao Xie 	struct btrfs_delayed_node *delayed_node;
18016cdcec7SMiao Xie 
1819cc97d64Schandan r 	/* File creation time. */
182d3c6be6fSArnd Bergmann 	struct timespec64 i_otime;
1839cc97d64Schandan r 
1848089fe62SDavid Sterba 	/* Hook into fs_info->delayed_iputs */
1858089fe62SDavid Sterba 	struct list_head delayed_iput;
1868089fe62SDavid Sterba 
1875f9a8a51SFilipe Manana 	/*
1885f9a8a51SFilipe Manana 	 * To avoid races between lockless (i_mutex not held) direct IO writes
1895f9a8a51SFilipe Manana 	 * and concurrent fsync requests. Direct IO writes must acquire read
1905f9a8a51SFilipe Manana 	 * access on this semaphore for creating an extent map and its
1915f9a8a51SFilipe Manana 	 * corresponding ordered extent. The fast fsync path must acquire write
1925f9a8a51SFilipe Manana 	 * access on this semaphore before it collects ordered extents and
1935f9a8a51SFilipe Manana 	 * extent maps.
1945f9a8a51SFilipe Manana 	 */
1955f9a8a51SFilipe Manana 	struct rw_semaphore dio_sem;
1965f9a8a51SFilipe Manana 
197d352ac68SChris Mason 	struct inode vfs_inode;
1982c90e5d6SChris Mason };
199dbe674a9SChris Mason 
20016cdcec7SMiao Xie extern unsigned char btrfs_filetype_table[];
20116cdcec7SMiao Xie 
2029a35b637SJeff Mahoney static inline struct btrfs_inode *BTRFS_I(const struct inode *inode)
2032c90e5d6SChris Mason {
2042c90e5d6SChris Mason 	return container_of(inode, struct btrfs_inode, vfs_inode);
2052c90e5d6SChris Mason }
2062c90e5d6SChris Mason 
207778ba82bSFilipe David Borba Manana static inline unsigned long btrfs_inode_hash(u64 objectid,
208778ba82bSFilipe David Borba Manana 					     const struct btrfs_root *root)
209778ba82bSFilipe David Borba Manana {
2104fd786e6SMisono Tomohiro 	u64 h = objectid ^ (root->root_key.objectid * GOLDEN_RATIO_PRIME);
211778ba82bSFilipe David Borba Manana 
212778ba82bSFilipe David Borba Manana #if BITS_PER_LONG == 32
213778ba82bSFilipe David Borba Manana 	h = (h >> 32) ^ (h & 0xffffffff);
214778ba82bSFilipe David Borba Manana #endif
215778ba82bSFilipe David Borba Manana 
216778ba82bSFilipe David Borba Manana 	return (unsigned long)h;
217778ba82bSFilipe David Borba Manana }
218778ba82bSFilipe David Borba Manana 
219778ba82bSFilipe David Borba Manana static inline void btrfs_insert_inode_hash(struct inode *inode)
220778ba82bSFilipe David Borba Manana {
221778ba82bSFilipe David Borba Manana 	unsigned long h = btrfs_inode_hash(inode->i_ino, BTRFS_I(inode)->root);
222778ba82bSFilipe David Borba Manana 
223778ba82bSFilipe David Borba Manana 	__insert_inode_hash(inode, h);
224778ba82bSFilipe David Borba Manana }
225778ba82bSFilipe David Borba Manana 
2269a35b637SJeff Mahoney static inline u64 btrfs_ino(const struct btrfs_inode *inode)
22733345d01SLi Zefan {
2284a0cc7caSNikolay Borisov 	u64 ino = inode->location.objectid;
22933345d01SLi Zefan 
23014c7cca7SLiu Bo 	/*
23114c7cca7SLiu Bo 	 * !ino: btree_inode
23214c7cca7SLiu Bo 	 * type == BTRFS_ROOT_ITEM_KEY: subvol dir
23314c7cca7SLiu Bo 	 */
2344a0cc7caSNikolay Borisov 	if (!ino || inode->location.type == BTRFS_ROOT_ITEM_KEY)
2354a0cc7caSNikolay Borisov 		ino = inode->vfs_inode.i_ino;
23633345d01SLi Zefan 	return ino;
23733345d01SLi Zefan }
23833345d01SLi Zefan 
2396ef06d27SNikolay Borisov static inline void btrfs_i_size_write(struct btrfs_inode *inode, u64 size)
240dbe674a9SChris Mason {
2416ef06d27SNikolay Borisov 	i_size_write(&inode->vfs_inode, size);
2426ef06d27SNikolay Borisov 	inode->disk_i_size = size;
243dbe674a9SChris Mason }
244dbe674a9SChris Mason 
24570ddc553SNikolay Borisov static inline bool btrfs_is_free_space_inode(struct btrfs_inode *inode)
2462cf8572dSChris Mason {
24770ddc553SNikolay Borisov 	struct btrfs_root *root = inode->root;
24883eea1f1SLiu Bo 
24951a8cf9dSLiu Bo 	if (root == root->fs_info->tree_root &&
25070ddc553SNikolay Borisov 	    btrfs_ino(inode) != BTRFS_BTREE_INODE_OBJECTID)
25151a8cf9dSLiu Bo 		return true;
25270ddc553SNikolay Borisov 	if (inode->location.objectid == BTRFS_FREE_INO_OBJECTID)
2532cf8572dSChris Mason 		return true;
2542cf8572dSChris Mason 	return false;
2552cf8572dSChris Mason }
2562cf8572dSChris Mason 
25706f2548fSNikolay Borisov static inline bool is_data_inode(struct inode *inode)
25806f2548fSNikolay Borisov {
25906f2548fSNikolay Borisov 	return btrfs_ino(BTRFS_I(inode)) != BTRFS_BTREE_INODE_OBJECTID;
26006f2548fSNikolay Borisov }
26106f2548fSNikolay Borisov 
2628b62f87bSJosef Bacik static inline void btrfs_mod_outstanding_extents(struct btrfs_inode *inode,
2638b62f87bSJosef Bacik 						 int mod)
2648b62f87bSJosef Bacik {
2658b62f87bSJosef Bacik 	lockdep_assert_held(&inode->lock);
2668b62f87bSJosef Bacik 	inode->outstanding_extents += mod;
2678b62f87bSJosef Bacik 	if (btrfs_is_free_space_inode(inode))
2688b62f87bSJosef Bacik 		return;
269dd48d407SJosef Bacik 	trace_btrfs_inode_mod_outstanding_extents(inode->root, btrfs_ino(inode),
270dd48d407SJosef Bacik 						  mod);
2718b62f87bSJosef Bacik }
2728b62f87bSJosef Bacik 
2730f8939b8SNikolay Borisov static inline int btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation)
27422ee6985SJosef Bacik {
2752f2ff0eeSFilipe Manana 	int ret = 0;
2762f2ff0eeSFilipe Manana 
2770f8939b8SNikolay Borisov 	spin_lock(&inode->lock);
2780f8939b8SNikolay Borisov 	if (inode->logged_trans == generation &&
2790f8939b8SNikolay Borisov 	    inode->last_sub_trans <= inode->last_log_commit &&
2800f8939b8SNikolay Borisov 	    inode->last_sub_trans <= inode->root->last_log_commit) {
281125c4cf9SFilipe Manana 		/*
282125c4cf9SFilipe Manana 		 * After a ranged fsync we might have left some extent maps
283125c4cf9SFilipe Manana 		 * (that fall outside the fsync's range). So return false
284125c4cf9SFilipe Manana 		 * here if the list isn't empty, to make sure btrfs_log_inode()
285125c4cf9SFilipe Manana 		 * will be called and process those extent maps.
286125c4cf9SFilipe Manana 		 */
287125c4cf9SFilipe Manana 		smp_mb();
2880f8939b8SNikolay Borisov 		if (list_empty(&inode->extent_tree.modified_extents))
2892f2ff0eeSFilipe Manana 			ret = 1;
290125c4cf9SFilipe Manana 	}
2910f8939b8SNikolay Borisov 	spin_unlock(&inode->lock);
2922f2ff0eeSFilipe Manana 	return ret;
29322ee6985SJosef Bacik }
29422ee6985SJosef Bacik 
295c1dc0896SMiao Xie #define BTRFS_DIO_ORIG_BIO_SUBMITTED	0x1
296c1dc0896SMiao Xie 
297facc8a22SMiao Xie struct btrfs_dio_private {
298facc8a22SMiao Xie 	struct inode *inode;
299c1dc0896SMiao Xie 	unsigned long flags;
300facc8a22SMiao Xie 	u64 logical_offset;
301facc8a22SMiao Xie 	u64 disk_bytenr;
302facc8a22SMiao Xie 	u64 bytes;
303facc8a22SMiao Xie 	void *private;
304facc8a22SMiao Xie 
305facc8a22SMiao Xie 	/* number of bios pending for this dio */
306facc8a22SMiao Xie 	atomic_t pending_bios;
307facc8a22SMiao Xie 
308facc8a22SMiao Xie 	/* IO errors */
309facc8a22SMiao Xie 	int errors;
310facc8a22SMiao Xie 
311facc8a22SMiao Xie 	/* orig_bio is our btrfs_io_bio */
312facc8a22SMiao Xie 	struct bio *orig_bio;
313facc8a22SMiao Xie 
314facc8a22SMiao Xie 	/* dio_bio came from fs/direct-io.c */
315facc8a22SMiao Xie 	struct bio *dio_bio;
316c1dc0896SMiao Xie 
317c1dc0896SMiao Xie 	/*
31801327610SNicholas D Steeves 	 * The original bio may be split to several sub-bios, this is
319c1dc0896SMiao Xie 	 * done during endio of sub-bios
320c1dc0896SMiao Xie 	 */
3214e4cbee9SChristoph Hellwig 	blk_status_t (*subio_endio)(struct inode *, struct btrfs_io_bio *,
3224e4cbee9SChristoph Hellwig 			blk_status_t);
323facc8a22SMiao Xie };
324facc8a22SMiao Xie 
3252e60a51eSMiao Xie /*
3262e60a51eSMiao Xie  * Disable DIO read nolock optimization, so new dio readers will be forced
3272e60a51eSMiao Xie  * to grab i_mutex. It is used to avoid the endless truncate due to
3282e60a51eSMiao Xie  * nonlocked dio read.
3292e60a51eSMiao Xie  */
330abcefb1eSNikolay Borisov static inline void btrfs_inode_block_unlocked_dio(struct btrfs_inode *inode)
3312e60a51eSMiao Xie {
332abcefb1eSNikolay Borisov 	set_bit(BTRFS_INODE_READDIO_NEED_LOCK, &inode->runtime_flags);
3332e60a51eSMiao Xie 	smp_mb();
3342e60a51eSMiao Xie }
3352e60a51eSMiao Xie 
3360b581701SNikolay Borisov static inline void btrfs_inode_resume_unlocked_dio(struct btrfs_inode *inode)
3372e60a51eSMiao Xie {
3384e857c58SPeter Zijlstra 	smp_mb__before_atomic();
3390b581701SNikolay Borisov 	clear_bit(BTRFS_INODE_READDIO_NEED_LOCK, &inode->runtime_flags);
3402e60a51eSMiao Xie }
3412e60a51eSMiao Xie 
3420970a22eSNikolay Borisov static inline void btrfs_print_data_csum_error(struct btrfs_inode *inode,
3436f6b643eSQu Wenruo 		u64 logical_start, u32 csum, u32 csum_expected, int mirror_num)
3446f6b643eSQu Wenruo {
3450970a22eSNikolay Borisov 	struct btrfs_root *root = inode->root;
3466f6b643eSQu Wenruo 
3476f6b643eSQu Wenruo 	/* Output minus objectid, which is more meaningful */
3484fd786e6SMisono Tomohiro 	if (root->root_key.objectid >= BTRFS_LAST_FREE_OBJECTID)
3496f6b643eSQu Wenruo 		btrfs_warn_rl(root->fs_info,
3506f6b643eSQu Wenruo 	"csum failed root %lld ino %lld off %llu csum 0x%08x expected csum 0x%08x mirror %d",
3514fd786e6SMisono Tomohiro 			root->root_key.objectid, btrfs_ino(inode),
3526f6b643eSQu Wenruo 			logical_start, csum, csum_expected, mirror_num);
3536f6b643eSQu Wenruo 	else
3546f6b643eSQu Wenruo 		btrfs_warn_rl(root->fs_info,
3556f6b643eSQu Wenruo 	"csum failed root %llu ino %llu off %llu csum 0x%08x expected csum 0x%08x mirror %d",
3564fd786e6SMisono Tomohiro 			root->root_key.objectid, btrfs_ino(inode),
3576f6b643eSQu Wenruo 			logical_start, csum, csum_expected, mirror_num);
3586f6b643eSQu Wenruo }
3596f6b643eSQu Wenruo 
3602c90e5d6SChris Mason #endif
361